{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 61000, "global_step": 247180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.091269520187717e-05, "grad_norm": 12.379002571105957, "learning_rate": 4.045634760093859e-09, "loss": 0.757, "step": 10 }, { "epoch": 0.00016182539040375434, "grad_norm": 6.766618251800537, "learning_rate": 8.091269520187718e-09, "loss": 0.6757, "step": 20 }, { "epoch": 0.00024273808560563152, "grad_norm": 8.904035568237305, "learning_rate": 1.2136904280281577e-08, "loss": 0.7167, "step": 30 }, { "epoch": 0.0003236507808075087, "grad_norm": 6.01339864730835, "learning_rate": 1.6182539040375435e-08, "loss": 0.7246, "step": 40 }, { "epoch": 0.00040456347600938586, "grad_norm": 8.555109024047852, "learning_rate": 2.0228173800469296e-08, "loss": 0.7254, "step": 50 }, { "epoch": 0.00048547617121126303, "grad_norm": 12.658461570739746, "learning_rate": 2.4273808560563153e-08, "loss": 0.7426, "step": 60 }, { "epoch": 0.0005663888664131403, "grad_norm": 9.224655151367188, "learning_rate": 2.8319443320657014e-08, "loss": 0.7289, "step": 70 }, { "epoch": 0.0006473015616150174, "grad_norm": 11.249184608459473, "learning_rate": 3.236507808075087e-08, "loss": 0.7167, "step": 80 }, { "epoch": 0.0007282142568168946, "grad_norm": 11.428268432617188, "learning_rate": 3.641071284084473e-08, "loss": 0.7272, "step": 90 }, { "epoch": 0.0008091269520187717, "grad_norm": 8.701935768127441, "learning_rate": 4.045634760093859e-08, "loss": 0.714, "step": 100 }, { "epoch": 0.000890039647220649, "grad_norm": 9.000862121582031, "learning_rate": 4.4501982361032446e-08, "loss": 0.7236, "step": 110 }, { "epoch": 0.0009709523424225261, "grad_norm": 9.238840103149414, "learning_rate": 4.8547617121126306e-08, "loss": 0.6626, "step": 120 }, { "epoch": 0.0010518650376244033, "grad_norm": 7.1845855712890625, "learning_rate": 5.2593251881220174e-08, "loss": 0.7119, "step": 130 }, { "epoch": 0.0011327777328262805, "grad_norm": 9.151616096496582, "learning_rate": 5.663888664131403e-08, "loss": 0.691, "step": 140 }, { "epoch": 0.0012136904280281575, "grad_norm": 8.636963844299316, "learning_rate": 6.068452140140788e-08, "loss": 0.7035, "step": 150 }, { "epoch": 0.0012946031232300348, "grad_norm": 13.511943817138672, "learning_rate": 6.473015616150174e-08, "loss": 0.6775, "step": 160 }, { "epoch": 0.001375515818431912, "grad_norm": 8.694615364074707, "learning_rate": 6.87757909215956e-08, "loss": 0.6397, "step": 170 }, { "epoch": 0.0014564285136337892, "grad_norm": 8.372581481933594, "learning_rate": 7.282142568168946e-08, "loss": 0.6441, "step": 180 }, { "epoch": 0.0015373412088356662, "grad_norm": 8.748292922973633, "learning_rate": 7.686706044178332e-08, "loss": 0.6136, "step": 190 }, { "epoch": 0.0016182539040375434, "grad_norm": 6.486173629760742, "learning_rate": 8.091269520187718e-08, "loss": 0.6242, "step": 200 }, { "epoch": 0.0016991665992394207, "grad_norm": 5.64271879196167, "learning_rate": 8.495832996197104e-08, "loss": 0.6183, "step": 210 }, { "epoch": 0.001780079294441298, "grad_norm": 6.769535064697266, "learning_rate": 8.900396472206489e-08, "loss": 0.5846, "step": 220 }, { "epoch": 0.001860991989643175, "grad_norm": 7.145188331604004, "learning_rate": 9.304959948215877e-08, "loss": 0.5563, "step": 230 }, { "epoch": 0.0019419046848450521, "grad_norm": 5.675797462463379, "learning_rate": 9.709523424225261e-08, "loss": 0.5659, "step": 240 }, { "epoch": 0.0020228173800469294, "grad_norm": 5.847397327423096, "learning_rate": 1.0114086900234647e-07, "loss": 0.5424, "step": 250 }, { "epoch": 0.0021037300752488066, "grad_norm": 5.381962776184082, "learning_rate": 1.0518650376244035e-07, "loss": 0.5453, "step": 260 }, { "epoch": 0.002184642770450684, "grad_norm": 5.614905834197998, "learning_rate": 1.092321385225342e-07, "loss": 0.4841, "step": 270 }, { "epoch": 0.002265555465652561, "grad_norm": 5.928528308868408, "learning_rate": 1.1327777328262805e-07, "loss": 0.5116, "step": 280 }, { "epoch": 0.0023464681608544383, "grad_norm": 6.536757946014404, "learning_rate": 1.173234080427219e-07, "loss": 0.4909, "step": 290 }, { "epoch": 0.002427380856056315, "grad_norm": 4.865281105041504, "learning_rate": 1.2136904280281576e-07, "loss": 0.4196, "step": 300 }, { "epoch": 0.0025082935512581923, "grad_norm": 5.311683177947998, "learning_rate": 1.2541467756290965e-07, "loss": 0.4117, "step": 310 }, { "epoch": 0.0025892062464600695, "grad_norm": 3.6997504234313965, "learning_rate": 1.2946031232300348e-07, "loss": 0.4483, "step": 320 }, { "epoch": 0.0026701189416619467, "grad_norm": 5.498950958251953, "learning_rate": 1.3350594708309734e-07, "loss": 0.4065, "step": 330 }, { "epoch": 0.002751031636863824, "grad_norm": 4.913830280303955, "learning_rate": 1.375515818431912e-07, "loss": 0.3833, "step": 340 }, { "epoch": 0.002831944332065701, "grad_norm": 3.7510697841644287, "learning_rate": 1.4159721660328507e-07, "loss": 0.3677, "step": 350 }, { "epoch": 0.0029128570272675784, "grad_norm": 4.827446937561035, "learning_rate": 1.4564285136337893e-07, "loss": 0.35, "step": 360 }, { "epoch": 0.0029937697224694556, "grad_norm": 3.3381733894348145, "learning_rate": 1.4968848612347279e-07, "loss": 0.3837, "step": 370 }, { "epoch": 0.0030746824176713324, "grad_norm": 3.4584944248199463, "learning_rate": 1.5373412088356665e-07, "loss": 0.3692, "step": 380 }, { "epoch": 0.0031555951128732097, "grad_norm": 4.044813632965088, "learning_rate": 1.577797556436605e-07, "loss": 0.3554, "step": 390 }, { "epoch": 0.003236507808075087, "grad_norm": 3.2526895999908447, "learning_rate": 1.6182539040375437e-07, "loss": 0.3335, "step": 400 }, { "epoch": 0.003317420503276964, "grad_norm": 3.80802059173584, "learning_rate": 1.658710251638482e-07, "loss": 0.2995, "step": 410 }, { "epoch": 0.0033983331984788413, "grad_norm": 3.4544379711151123, "learning_rate": 1.699166599239421e-07, "loss": 0.303, "step": 420 }, { "epoch": 0.0034792458936807186, "grad_norm": 4.834680080413818, "learning_rate": 1.7396229468403595e-07, "loss": 0.301, "step": 430 }, { "epoch": 0.003560158588882596, "grad_norm": 4.430881500244141, "learning_rate": 1.7800792944412978e-07, "loss": 0.277, "step": 440 }, { "epoch": 0.003641071284084473, "grad_norm": 4.520712852478027, "learning_rate": 1.8205356420422367e-07, "loss": 0.2813, "step": 450 }, { "epoch": 0.00372198397928635, "grad_norm": 3.840907096862793, "learning_rate": 1.8609919896431753e-07, "loss": 0.3122, "step": 460 }, { "epoch": 0.003802896674488227, "grad_norm": 4.620613098144531, "learning_rate": 1.9014483372441137e-07, "loss": 0.3129, "step": 470 }, { "epoch": 0.0038838093696901043, "grad_norm": 3.4103245735168457, "learning_rate": 1.9419046848450523e-07, "loss": 0.271, "step": 480 }, { "epoch": 0.003964722064891982, "grad_norm": 2.994216203689575, "learning_rate": 1.982361032445991e-07, "loss": 0.246, "step": 490 }, { "epoch": 0.004045634760093859, "grad_norm": 3.427161693572998, "learning_rate": 2.0228173800469295e-07, "loss": 0.2549, "step": 500 }, { "epoch": 0.0041265474552957355, "grad_norm": 4.34993839263916, "learning_rate": 2.063273727647868e-07, "loss": 0.2478, "step": 510 }, { "epoch": 0.004207460150497613, "grad_norm": 3.5475993156433105, "learning_rate": 2.103730075248807e-07, "loss": 0.2173, "step": 520 }, { "epoch": 0.00428837284569949, "grad_norm": 3.3531546592712402, "learning_rate": 2.1441864228497453e-07, "loss": 0.2799, "step": 530 }, { "epoch": 0.004369285540901368, "grad_norm": 5.000548839569092, "learning_rate": 2.184642770450684e-07, "loss": 0.2994, "step": 540 }, { "epoch": 0.004450198236103244, "grad_norm": 3.4936842918395996, "learning_rate": 2.2250991180516228e-07, "loss": 0.2922, "step": 550 }, { "epoch": 0.004531110931305122, "grad_norm": 3.308675527572632, "learning_rate": 2.265555465652561e-07, "loss": 0.2611, "step": 560 }, { "epoch": 0.004612023626506999, "grad_norm": 4.257160663604736, "learning_rate": 2.3060118132534997e-07, "loss": 0.2256, "step": 570 }, { "epoch": 0.0046929363217088765, "grad_norm": 2.7568166255950928, "learning_rate": 2.346468160854438e-07, "loss": 0.2675, "step": 580 }, { "epoch": 0.004773849016910753, "grad_norm": 3.8126919269561768, "learning_rate": 2.386924508455377e-07, "loss": 0.238, "step": 590 }, { "epoch": 0.00485476171211263, "grad_norm": 3.6163361072540283, "learning_rate": 2.427380856056315e-07, "loss": 0.1819, "step": 600 }, { "epoch": 0.004935674407314508, "grad_norm": 7.711766242980957, "learning_rate": 2.467837203657254e-07, "loss": 0.2621, "step": 610 }, { "epoch": 0.005016587102516385, "grad_norm": 3.08115291595459, "learning_rate": 2.508293551258193e-07, "loss": 0.2399, "step": 620 }, { "epoch": 0.005097499797718262, "grad_norm": 3.376234769821167, "learning_rate": 2.5487498988591313e-07, "loss": 0.2172, "step": 630 }, { "epoch": 0.005178412492920139, "grad_norm": 4.1586689949035645, "learning_rate": 2.5892062464600697e-07, "loss": 0.2224, "step": 640 }, { "epoch": 0.005259325188122017, "grad_norm": 3.4253158569335938, "learning_rate": 2.6296625940610085e-07, "loss": 0.2253, "step": 650 }, { "epoch": 0.0053402378833238935, "grad_norm": 6.396710395812988, "learning_rate": 2.670118941661947e-07, "loss": 0.1957, "step": 660 }, { "epoch": 0.00542115057852577, "grad_norm": 5.417088508605957, "learning_rate": 2.710575289262886e-07, "loss": 0.2124, "step": 670 }, { "epoch": 0.005502063273727648, "grad_norm": 4.594139099121094, "learning_rate": 2.751031636863824e-07, "loss": 0.1996, "step": 680 }, { "epoch": 0.005582975968929525, "grad_norm": 4.534917831420898, "learning_rate": 2.791487984464763e-07, "loss": 0.2853, "step": 690 }, { "epoch": 0.005663888664131402, "grad_norm": 3.375166654586792, "learning_rate": 2.8319443320657013e-07, "loss": 0.2274, "step": 700 }, { "epoch": 0.005744801359333279, "grad_norm": 3.8811707496643066, "learning_rate": 2.8724006796666396e-07, "loss": 0.2118, "step": 710 }, { "epoch": 0.005825714054535157, "grad_norm": 3.3508832454681396, "learning_rate": 2.9128570272675785e-07, "loss": 0.2416, "step": 720 }, { "epoch": 0.005906626749737034, "grad_norm": 3.72011137008667, "learning_rate": 2.9533133748685174e-07, "loss": 0.2322, "step": 730 }, { "epoch": 0.005987539444938911, "grad_norm": 2.743861675262451, "learning_rate": 2.9937697224694557e-07, "loss": 0.1997, "step": 740 }, { "epoch": 0.006068452140140788, "grad_norm": 2.5576601028442383, "learning_rate": 3.0342260700703946e-07, "loss": 0.1816, "step": 750 }, { "epoch": 0.006149364835342665, "grad_norm": 3.724195718765259, "learning_rate": 3.074682417671333e-07, "loss": 0.2006, "step": 760 }, { "epoch": 0.0062302775305445425, "grad_norm": 4.11584997177124, "learning_rate": 3.1151387652722713e-07, "loss": 0.2258, "step": 770 }, { "epoch": 0.006311190225746419, "grad_norm": 3.0799951553344727, "learning_rate": 3.15559511287321e-07, "loss": 0.1988, "step": 780 }, { "epoch": 0.006392102920948297, "grad_norm": 3.3659675121307373, "learning_rate": 3.1960514604741485e-07, "loss": 0.1856, "step": 790 }, { "epoch": 0.006473015616150174, "grad_norm": 3.713207483291626, "learning_rate": 3.2365078080750874e-07, "loss": 0.2152, "step": 800 }, { "epoch": 0.0065539283113520514, "grad_norm": 3.3741424083709717, "learning_rate": 3.276964155676026e-07, "loss": 0.2085, "step": 810 }, { "epoch": 0.006634841006553928, "grad_norm": 4.645244121551514, "learning_rate": 3.317420503276964e-07, "loss": 0.2256, "step": 820 }, { "epoch": 0.006715753701755806, "grad_norm": 4.63256311416626, "learning_rate": 3.357876850877903e-07, "loss": 0.2189, "step": 830 }, { "epoch": 0.006796666396957683, "grad_norm": 3.523310661315918, "learning_rate": 3.398333198478842e-07, "loss": 0.2038, "step": 840 }, { "epoch": 0.0068775790921595595, "grad_norm": 3.1734118461608887, "learning_rate": 3.43878954607978e-07, "loss": 0.1842, "step": 850 }, { "epoch": 0.006958491787361437, "grad_norm": 3.5565481185913086, "learning_rate": 3.479245893680719e-07, "loss": 0.1562, "step": 860 }, { "epoch": 0.007039404482563314, "grad_norm": 3.624450445175171, "learning_rate": 3.519702241281658e-07, "loss": 0.1919, "step": 870 }, { "epoch": 0.007120317177765192, "grad_norm": 3.10187029838562, "learning_rate": 3.5601585888825957e-07, "loss": 0.2034, "step": 880 }, { "epoch": 0.007201229872967068, "grad_norm": 3.1238534450531006, "learning_rate": 3.6006149364835345e-07, "loss": 0.1869, "step": 890 }, { "epoch": 0.007282142568168946, "grad_norm": 3.7281148433685303, "learning_rate": 3.6410712840844734e-07, "loss": 0.2045, "step": 900 }, { "epoch": 0.007363055263370823, "grad_norm": 3.60780930519104, "learning_rate": 3.681527631685412e-07, "loss": 0.1521, "step": 910 }, { "epoch": 0.0074439679585727, "grad_norm": 4.0806474685668945, "learning_rate": 3.7219839792863506e-07, "loss": 0.2149, "step": 920 }, { "epoch": 0.007524880653774577, "grad_norm": 3.4462502002716064, "learning_rate": 3.7624403268872884e-07, "loss": 0.2092, "step": 930 }, { "epoch": 0.007605793348976454, "grad_norm": 3.1142451763153076, "learning_rate": 3.8028966744882273e-07, "loss": 0.197, "step": 940 }, { "epoch": 0.007686706044178332, "grad_norm": 3.337491750717163, "learning_rate": 3.843353022089166e-07, "loss": 0.2323, "step": 950 }, { "epoch": 0.0077676187393802085, "grad_norm": 3.4845011234283447, "learning_rate": 3.8838093696901045e-07, "loss": 0.1947, "step": 960 }, { "epoch": 0.007848531434582086, "grad_norm": 3.152672529220581, "learning_rate": 3.9242657172910434e-07, "loss": 0.1654, "step": 970 }, { "epoch": 0.007929444129783964, "grad_norm": 3.5038528442382812, "learning_rate": 3.964722064891982e-07, "loss": 0.2074, "step": 980 }, { "epoch": 0.00801035682498584, "grad_norm": 3.0682966709136963, "learning_rate": 4.00517841249292e-07, "loss": 0.1971, "step": 990 }, { "epoch": 0.008091269520187717, "grad_norm": 3.296968460083008, "learning_rate": 4.045634760093859e-07, "loss": 0.1931, "step": 1000 }, { "epoch": 0.008172182215389595, "grad_norm": 2.605437755584717, "learning_rate": 4.086091107694798e-07, "loss": 0.1731, "step": 1010 }, { "epoch": 0.008253094910591471, "grad_norm": 3.4666991233825684, "learning_rate": 4.126547455295736e-07, "loss": 0.175, "step": 1020 }, { "epoch": 0.008334007605793349, "grad_norm": 3.323270082473755, "learning_rate": 4.167003802896675e-07, "loss": 0.1357, "step": 1030 }, { "epoch": 0.008414920300995226, "grad_norm": 2.8505561351776123, "learning_rate": 4.207460150497614e-07, "loss": 0.2003, "step": 1040 }, { "epoch": 0.008495832996197104, "grad_norm": 3.800140380859375, "learning_rate": 4.2479164980985517e-07, "loss": 0.1791, "step": 1050 }, { "epoch": 0.00857674569139898, "grad_norm": 3.335207939147949, "learning_rate": 4.2883728456994906e-07, "loss": 0.1998, "step": 1060 }, { "epoch": 0.008657658386600858, "grad_norm": 3.441448211669922, "learning_rate": 4.3288291933004294e-07, "loss": 0.1637, "step": 1070 }, { "epoch": 0.008738571081802735, "grad_norm": 4.486982822418213, "learning_rate": 4.369285540901368e-07, "loss": 0.2044, "step": 1080 }, { "epoch": 0.008819483777004611, "grad_norm": 5.554523944854736, "learning_rate": 4.4097418885023066e-07, "loss": 0.2179, "step": 1090 }, { "epoch": 0.008900396472206489, "grad_norm": 5.776538848876953, "learning_rate": 4.4501982361032455e-07, "loss": 0.189, "step": 1100 }, { "epoch": 0.008981309167408367, "grad_norm": 2.9057066440582275, "learning_rate": 4.4906545837041833e-07, "loss": 0.183, "step": 1110 }, { "epoch": 0.009062221862610244, "grad_norm": 3.646470546722412, "learning_rate": 4.531110931305122e-07, "loss": 0.1817, "step": 1120 }, { "epoch": 0.00914313455781212, "grad_norm": 3.412902593612671, "learning_rate": 4.5715672789060605e-07, "loss": 0.158, "step": 1130 }, { "epoch": 0.009224047253013998, "grad_norm": 2.814008951187134, "learning_rate": 4.6120236265069994e-07, "loss": 0.1681, "step": 1140 }, { "epoch": 0.009304959948215875, "grad_norm": 3.7407374382019043, "learning_rate": 4.6524799741079383e-07, "loss": 0.16, "step": 1150 }, { "epoch": 0.009385872643417753, "grad_norm": 3.114804983139038, "learning_rate": 4.692936321708876e-07, "loss": 0.1604, "step": 1160 }, { "epoch": 0.009466785338619629, "grad_norm": 3.6801137924194336, "learning_rate": 4.733392669309815e-07, "loss": 0.1782, "step": 1170 }, { "epoch": 0.009547698033821507, "grad_norm": 4.836055755615234, "learning_rate": 4.773849016910754e-07, "loss": 0.1337, "step": 1180 }, { "epoch": 0.009628610729023384, "grad_norm": 5.025834560394287, "learning_rate": 4.814305364511692e-07, "loss": 0.1631, "step": 1190 }, { "epoch": 0.00970952342422526, "grad_norm": 2.7603020668029785, "learning_rate": 4.85476171211263e-07, "loss": 0.1723, "step": 1200 }, { "epoch": 0.009790436119427138, "grad_norm": 4.246847152709961, "learning_rate": 4.89521805971357e-07, "loss": 0.1951, "step": 1210 }, { "epoch": 0.009871348814629016, "grad_norm": 3.764586925506592, "learning_rate": 4.935674407314508e-07, "loss": 0.1687, "step": 1220 }, { "epoch": 0.009952261509830893, "grad_norm": 3.2553017139434814, "learning_rate": 4.976130754915447e-07, "loss": 0.1526, "step": 1230 }, { "epoch": 0.01003317420503277, "grad_norm": 3.6567211151123047, "learning_rate": 5.016587102516386e-07, "loss": 0.1725, "step": 1240 }, { "epoch": 0.010114086900234647, "grad_norm": 3.5167927742004395, "learning_rate": 5.057043450117323e-07, "loss": 0.1447, "step": 1250 }, { "epoch": 0.010194999595436524, "grad_norm": 2.8025596141815186, "learning_rate": 5.097499797718263e-07, "loss": 0.1561, "step": 1260 }, { "epoch": 0.0102759122906384, "grad_norm": 3.1487576961517334, "learning_rate": 5.137956145319201e-07, "loss": 0.1222, "step": 1270 }, { "epoch": 0.010356824985840278, "grad_norm": 4.184289455413818, "learning_rate": 5.178412492920139e-07, "loss": 0.1445, "step": 1280 }, { "epoch": 0.010437737681042156, "grad_norm": 2.830324172973633, "learning_rate": 5.218868840521079e-07, "loss": 0.1512, "step": 1290 }, { "epoch": 0.010518650376244033, "grad_norm": 2.8302245140075684, "learning_rate": 5.259325188122017e-07, "loss": 0.1792, "step": 1300 }, { "epoch": 0.01059956307144591, "grad_norm": 2.426011323928833, "learning_rate": 5.299781535722955e-07, "loss": 0.1529, "step": 1310 }, { "epoch": 0.010680475766647787, "grad_norm": 3.1110141277313232, "learning_rate": 5.340237883323894e-07, "loss": 0.1683, "step": 1320 }, { "epoch": 0.010761388461849665, "grad_norm": 2.8938634395599365, "learning_rate": 5.380694230924832e-07, "loss": 0.1498, "step": 1330 }, { "epoch": 0.01084230115705154, "grad_norm": 3.588777542114258, "learning_rate": 5.421150578525772e-07, "loss": 0.1204, "step": 1340 }, { "epoch": 0.010923213852253418, "grad_norm": 3.7952706813812256, "learning_rate": 5.46160692612671e-07, "loss": 0.1989, "step": 1350 }, { "epoch": 0.011004126547455296, "grad_norm": 2.509969472885132, "learning_rate": 5.502063273727648e-07, "loss": 0.1595, "step": 1360 }, { "epoch": 0.011085039242657174, "grad_norm": 3.1478559970855713, "learning_rate": 5.542519621328587e-07, "loss": 0.1328, "step": 1370 }, { "epoch": 0.01116595193785905, "grad_norm": 2.8996856212615967, "learning_rate": 5.582975968929526e-07, "loss": 0.143, "step": 1380 }, { "epoch": 0.011246864633060927, "grad_norm": 2.786529779434204, "learning_rate": 5.623432316530464e-07, "loss": 0.1298, "step": 1390 }, { "epoch": 0.011327777328262805, "grad_norm": 2.2554385662078857, "learning_rate": 5.663888664131403e-07, "loss": 0.1474, "step": 1400 }, { "epoch": 0.011408690023464682, "grad_norm": 3.5068140029907227, "learning_rate": 5.704345011732342e-07, "loss": 0.1606, "step": 1410 }, { "epoch": 0.011489602718666558, "grad_norm": 3.553576946258545, "learning_rate": 5.744801359333279e-07, "loss": 0.149, "step": 1420 }, { "epoch": 0.011570515413868436, "grad_norm": 3.5269813537597656, "learning_rate": 5.785257706934219e-07, "loss": 0.1226, "step": 1430 }, { "epoch": 0.011651428109070314, "grad_norm": 2.6978275775909424, "learning_rate": 5.825714054535157e-07, "loss": 0.1623, "step": 1440 }, { "epoch": 0.01173234080427219, "grad_norm": 3.1413493156433105, "learning_rate": 5.866170402136095e-07, "loss": 0.1234, "step": 1450 }, { "epoch": 0.011813253499474067, "grad_norm": 3.00105357170105, "learning_rate": 5.906626749737035e-07, "loss": 0.1503, "step": 1460 }, { "epoch": 0.011894166194675945, "grad_norm": 2.491643190383911, "learning_rate": 5.947083097337973e-07, "loss": 0.12, "step": 1470 }, { "epoch": 0.011975078889877823, "grad_norm": 4.106692314147949, "learning_rate": 5.987539444938911e-07, "loss": 0.1335, "step": 1480 }, { "epoch": 0.012055991585079699, "grad_norm": 2.352979898452759, "learning_rate": 6.02799579253985e-07, "loss": 0.1405, "step": 1490 }, { "epoch": 0.012136904280281576, "grad_norm": 2.6845829486846924, "learning_rate": 6.068452140140789e-07, "loss": 0.1362, "step": 1500 }, { "epoch": 0.012217816975483454, "grad_norm": 3.6045360565185547, "learning_rate": 6.108908487741728e-07, "loss": 0.1485, "step": 1510 }, { "epoch": 0.01229872967068533, "grad_norm": 3.1807703971862793, "learning_rate": 6.149364835342666e-07, "loss": 0.1726, "step": 1520 }, { "epoch": 0.012379642365887207, "grad_norm": 2.5376954078674316, "learning_rate": 6.189821182943604e-07, "loss": 0.1384, "step": 1530 }, { "epoch": 0.012460555061089085, "grad_norm": 4.064521312713623, "learning_rate": 6.230277530544543e-07, "loss": 0.1443, "step": 1540 }, { "epoch": 0.012541467756290963, "grad_norm": 3.8524208068847656, "learning_rate": 6.270733878145481e-07, "loss": 0.1421, "step": 1550 }, { "epoch": 0.012622380451492839, "grad_norm": 2.717435836791992, "learning_rate": 6.31119022574642e-07, "loss": 0.133, "step": 1560 }, { "epoch": 0.012703293146694716, "grad_norm": 3.170163154602051, "learning_rate": 6.351646573347359e-07, "loss": 0.1153, "step": 1570 }, { "epoch": 0.012784205841896594, "grad_norm": 3.1952850818634033, "learning_rate": 6.392102920948297e-07, "loss": 0.1415, "step": 1580 }, { "epoch": 0.01286511853709847, "grad_norm": 2.5966908931732178, "learning_rate": 6.432559268549236e-07, "loss": 0.1249, "step": 1590 }, { "epoch": 0.012946031232300348, "grad_norm": 4.755983352661133, "learning_rate": 6.473015616150175e-07, "loss": 0.1643, "step": 1600 }, { "epoch": 0.013026943927502225, "grad_norm": 2.4486217498779297, "learning_rate": 6.513471963751113e-07, "loss": 0.1246, "step": 1610 }, { "epoch": 0.013107856622704103, "grad_norm": 3.7277944087982178, "learning_rate": 6.553928311352052e-07, "loss": 0.1454, "step": 1620 }, { "epoch": 0.013188769317905979, "grad_norm": 2.131896495819092, "learning_rate": 6.594384658952991e-07, "loss": 0.1077, "step": 1630 }, { "epoch": 0.013269682013107856, "grad_norm": 3.4123847484588623, "learning_rate": 6.634841006553928e-07, "loss": 0.1843, "step": 1640 }, { "epoch": 0.013350594708309734, "grad_norm": 2.9605822563171387, "learning_rate": 6.675297354154867e-07, "loss": 0.1339, "step": 1650 }, { "epoch": 0.013431507403511612, "grad_norm": 3.3927063941955566, "learning_rate": 6.715753701755806e-07, "loss": 0.1285, "step": 1660 }, { "epoch": 0.013512420098713488, "grad_norm": 3.828677177429199, "learning_rate": 6.756210049356744e-07, "loss": 0.1404, "step": 1670 }, { "epoch": 0.013593332793915365, "grad_norm": 2.22856068611145, "learning_rate": 6.796666396957684e-07, "loss": 0.119, "step": 1680 }, { "epoch": 0.013674245489117243, "grad_norm": 3.2398173809051514, "learning_rate": 6.837122744558622e-07, "loss": 0.1314, "step": 1690 }, { "epoch": 0.013755158184319119, "grad_norm": 2.6846957206726074, "learning_rate": 6.87757909215956e-07, "loss": 0.1447, "step": 1700 }, { "epoch": 0.013836070879520997, "grad_norm": 2.0524067878723145, "learning_rate": 6.9180354397605e-07, "loss": 0.1343, "step": 1710 }, { "epoch": 0.013916983574722874, "grad_norm": 3.2000553607940674, "learning_rate": 6.958491787361438e-07, "loss": 0.1168, "step": 1720 }, { "epoch": 0.013997896269924752, "grad_norm": 2.5470049381256104, "learning_rate": 6.998948134962376e-07, "loss": 0.1484, "step": 1730 }, { "epoch": 0.014078808965126628, "grad_norm": 2.9575867652893066, "learning_rate": 7.039404482563316e-07, "loss": 0.149, "step": 1740 }, { "epoch": 0.014159721660328506, "grad_norm": 4.677399158477783, "learning_rate": 7.079860830164253e-07, "loss": 0.1304, "step": 1750 }, { "epoch": 0.014240634355530383, "grad_norm": 2.5765581130981445, "learning_rate": 7.120317177765191e-07, "loss": 0.1055, "step": 1760 }, { "epoch": 0.014321547050732259, "grad_norm": 2.180269241333008, "learning_rate": 7.160773525366131e-07, "loss": 0.1098, "step": 1770 }, { "epoch": 0.014402459745934137, "grad_norm": 3.904073715209961, "learning_rate": 7.201229872967069e-07, "loss": 0.1695, "step": 1780 }, { "epoch": 0.014483372441136014, "grad_norm": 3.1508078575134277, "learning_rate": 7.241686220568007e-07, "loss": 0.1321, "step": 1790 }, { "epoch": 0.014564285136337892, "grad_norm": 1.769440770149231, "learning_rate": 7.282142568168947e-07, "loss": 0.1264, "step": 1800 }, { "epoch": 0.014645197831539768, "grad_norm": 2.213376522064209, "learning_rate": 7.322598915769885e-07, "loss": 0.1616, "step": 1810 }, { "epoch": 0.014726110526741646, "grad_norm": 3.2754929065704346, "learning_rate": 7.363055263370823e-07, "loss": 0.1271, "step": 1820 }, { "epoch": 0.014807023221943523, "grad_norm": 3.466562271118164, "learning_rate": 7.403511610971762e-07, "loss": 0.1386, "step": 1830 }, { "epoch": 0.0148879359171454, "grad_norm": 2.7725071907043457, "learning_rate": 7.443967958572701e-07, "loss": 0.1369, "step": 1840 }, { "epoch": 0.014968848612347277, "grad_norm": 2.686899423599243, "learning_rate": 7.48442430617364e-07, "loss": 0.1503, "step": 1850 }, { "epoch": 0.015049761307549155, "grad_norm": 3.385806083679199, "learning_rate": 7.524880653774577e-07, "loss": 0.136, "step": 1860 }, { "epoch": 0.015130674002751032, "grad_norm": 3.4498255252838135, "learning_rate": 7.565337001375516e-07, "loss": 0.1056, "step": 1870 }, { "epoch": 0.015211586697952908, "grad_norm": 2.3533167839050293, "learning_rate": 7.605793348976455e-07, "loss": 0.1248, "step": 1880 }, { "epoch": 0.015292499393154786, "grad_norm": 1.8921456336975098, "learning_rate": 7.646249696577393e-07, "loss": 0.1336, "step": 1890 }, { "epoch": 0.015373412088356663, "grad_norm": 3.635103464126587, "learning_rate": 7.686706044178332e-07, "loss": 0.1357, "step": 1900 }, { "epoch": 0.015454324783558541, "grad_norm": 2.5344769954681396, "learning_rate": 7.727162391779271e-07, "loss": 0.1283, "step": 1910 }, { "epoch": 0.015535237478760417, "grad_norm": 2.280944347381592, "learning_rate": 7.767618739380209e-07, "loss": 0.1057, "step": 1920 }, { "epoch": 0.015616150173962295, "grad_norm": 2.7406625747680664, "learning_rate": 7.808075086981148e-07, "loss": 0.1103, "step": 1930 }, { "epoch": 0.015697062869164172, "grad_norm": 3.1310410499572754, "learning_rate": 7.848531434582087e-07, "loss": 0.1448, "step": 1940 }, { "epoch": 0.01577797556436605, "grad_norm": 3.0073962211608887, "learning_rate": 7.888987782183025e-07, "loss": 0.1266, "step": 1950 }, { "epoch": 0.015858888259567928, "grad_norm": 3.850083827972412, "learning_rate": 7.929444129783965e-07, "loss": 0.1156, "step": 1960 }, { "epoch": 0.015939800954769802, "grad_norm": 3.233969211578369, "learning_rate": 7.969900477384902e-07, "loss": 0.1514, "step": 1970 }, { "epoch": 0.01602071364997168, "grad_norm": 2.266785144805908, "learning_rate": 8.01035682498584e-07, "loss": 0.1713, "step": 1980 }, { "epoch": 0.016101626345173557, "grad_norm": 2.606696128845215, "learning_rate": 8.05081317258678e-07, "loss": 0.1369, "step": 1990 }, { "epoch": 0.016182539040375435, "grad_norm": 2.4673171043395996, "learning_rate": 8.091269520187718e-07, "loss": 0.1174, "step": 2000 }, { "epoch": 0.016263451735577313, "grad_norm": 2.688140630722046, "learning_rate": 8.131725867788656e-07, "loss": 0.1234, "step": 2010 }, { "epoch": 0.01634436443077919, "grad_norm": 3.261702537536621, "learning_rate": 8.172182215389596e-07, "loss": 0.1131, "step": 2020 }, { "epoch": 0.016425277125981068, "grad_norm": 2.8621068000793457, "learning_rate": 8.212638562990534e-07, "loss": 0.1024, "step": 2030 }, { "epoch": 0.016506189821182942, "grad_norm": 3.0533087253570557, "learning_rate": 8.253094910591472e-07, "loss": 0.1043, "step": 2040 }, { "epoch": 0.01658710251638482, "grad_norm": 3.079784393310547, "learning_rate": 8.293551258192412e-07, "loss": 0.1342, "step": 2050 }, { "epoch": 0.016668015211586697, "grad_norm": 2.996828317642212, "learning_rate": 8.33400760579335e-07, "loss": 0.119, "step": 2060 }, { "epoch": 0.016748927906788575, "grad_norm": 4.019902229309082, "learning_rate": 8.374463953394288e-07, "loss": 0.1203, "step": 2070 }, { "epoch": 0.016829840601990453, "grad_norm": 2.354902982711792, "learning_rate": 8.414920300995228e-07, "loss": 0.1073, "step": 2080 }, { "epoch": 0.01691075329719233, "grad_norm": 3.4016852378845215, "learning_rate": 8.455376648596165e-07, "loss": 0.1361, "step": 2090 }, { "epoch": 0.016991665992394208, "grad_norm": 2.32135009765625, "learning_rate": 8.495832996197103e-07, "loss": 0.129, "step": 2100 }, { "epoch": 0.017072578687596082, "grad_norm": 2.689134120941162, "learning_rate": 8.536289343798043e-07, "loss": 0.1307, "step": 2110 }, { "epoch": 0.01715349138279796, "grad_norm": 3.391117572784424, "learning_rate": 8.576745691398981e-07, "loss": 0.1062, "step": 2120 }, { "epoch": 0.017234404077999838, "grad_norm": 2.7830846309661865, "learning_rate": 8.617202038999919e-07, "loss": 0.1054, "step": 2130 }, { "epoch": 0.017315316773201715, "grad_norm": 2.8256471157073975, "learning_rate": 8.657658386600859e-07, "loss": 0.1267, "step": 2140 }, { "epoch": 0.017396229468403593, "grad_norm": 3.9184858798980713, "learning_rate": 8.698114734201797e-07, "loss": 0.103, "step": 2150 }, { "epoch": 0.01747714216360547, "grad_norm": 4.560610771179199, "learning_rate": 8.738571081802736e-07, "loss": 0.1012, "step": 2160 }, { "epoch": 0.017558054858807348, "grad_norm": 3.25357985496521, "learning_rate": 8.779027429403675e-07, "loss": 0.1282, "step": 2170 }, { "epoch": 0.017638967554009222, "grad_norm": 2.898176908493042, "learning_rate": 8.819483777004613e-07, "loss": 0.1504, "step": 2180 }, { "epoch": 0.0177198802492111, "grad_norm": 2.0445284843444824, "learning_rate": 8.859940124605551e-07, "loss": 0.1236, "step": 2190 }, { "epoch": 0.017800792944412978, "grad_norm": 2.5784928798675537, "learning_rate": 8.900396472206491e-07, "loss": 0.1183, "step": 2200 }, { "epoch": 0.017881705639614855, "grad_norm": 2.6275644302368164, "learning_rate": 8.940852819807428e-07, "loss": 0.1279, "step": 2210 }, { "epoch": 0.017962618334816733, "grad_norm": 2.6717700958251953, "learning_rate": 8.981309167408367e-07, "loss": 0.1106, "step": 2220 }, { "epoch": 0.01804353103001861, "grad_norm": 3.222085952758789, "learning_rate": 9.021765515009305e-07, "loss": 0.125, "step": 2230 }, { "epoch": 0.01812444372522049, "grad_norm": 2.642137050628662, "learning_rate": 9.062221862610244e-07, "loss": 0.1142, "step": 2240 }, { "epoch": 0.018205356420422366, "grad_norm": 2.7111222743988037, "learning_rate": 9.102678210211183e-07, "loss": 0.1222, "step": 2250 }, { "epoch": 0.01828626911562424, "grad_norm": 2.38632869720459, "learning_rate": 9.143134557812121e-07, "loss": 0.1035, "step": 2260 }, { "epoch": 0.018367181810826118, "grad_norm": 2.5600736141204834, "learning_rate": 9.18359090541306e-07, "loss": 0.0901, "step": 2270 }, { "epoch": 0.018448094506027995, "grad_norm": 3.530273914337158, "learning_rate": 9.224047253013999e-07, "loss": 0.112, "step": 2280 }, { "epoch": 0.018529007201229873, "grad_norm": 3.2236361503601074, "learning_rate": 9.264503600614937e-07, "loss": 0.1115, "step": 2290 }, { "epoch": 0.01860991989643175, "grad_norm": 3.621511220932007, "learning_rate": 9.304959948215877e-07, "loss": 0.1156, "step": 2300 }, { "epoch": 0.01869083259163363, "grad_norm": 1.3391962051391602, "learning_rate": 9.345416295816814e-07, "loss": 0.1313, "step": 2310 }, { "epoch": 0.018771745286835506, "grad_norm": 2.5039713382720947, "learning_rate": 9.385872643417752e-07, "loss": 0.1316, "step": 2320 }, { "epoch": 0.01885265798203738, "grad_norm": 2.763693332672119, "learning_rate": 9.426328991018692e-07, "loss": 0.107, "step": 2330 }, { "epoch": 0.018933570677239258, "grad_norm": 2.3031013011932373, "learning_rate": 9.46678533861963e-07, "loss": 0.1097, "step": 2340 }, { "epoch": 0.019014483372441136, "grad_norm": 3.47409987449646, "learning_rate": 9.507241686220568e-07, "loss": 0.1051, "step": 2350 }, { "epoch": 0.019095396067643013, "grad_norm": 2.092069387435913, "learning_rate": 9.547698033821508e-07, "loss": 0.1206, "step": 2360 }, { "epoch": 0.01917630876284489, "grad_norm": 3.4080891609191895, "learning_rate": 9.588154381422445e-07, "loss": 0.1213, "step": 2370 }, { "epoch": 0.01925722145804677, "grad_norm": 2.631908655166626, "learning_rate": 9.628610729023384e-07, "loss": 0.1114, "step": 2380 }, { "epoch": 0.019338134153248646, "grad_norm": 2.4644205570220947, "learning_rate": 9.669067076624324e-07, "loss": 0.1271, "step": 2390 }, { "epoch": 0.01941904684845052, "grad_norm": 1.9869613647460938, "learning_rate": 9.70952342422526e-07, "loss": 0.1468, "step": 2400 }, { "epoch": 0.019499959543652398, "grad_norm": 4.259750843048096, "learning_rate": 9.7499797718262e-07, "loss": 0.1036, "step": 2410 }, { "epoch": 0.019580872238854276, "grad_norm": 3.0218169689178467, "learning_rate": 9.79043611942714e-07, "loss": 0.1128, "step": 2420 }, { "epoch": 0.019661784934056153, "grad_norm": 2.741788625717163, "learning_rate": 9.830892467028077e-07, "loss": 0.1102, "step": 2430 }, { "epoch": 0.01974269762925803, "grad_norm": 2.1182994842529297, "learning_rate": 9.871348814629016e-07, "loss": 0.1429, "step": 2440 }, { "epoch": 0.01982361032445991, "grad_norm": 3.241915464401245, "learning_rate": 9.911805162229956e-07, "loss": 0.1413, "step": 2450 }, { "epoch": 0.019904523019661786, "grad_norm": 1.9867362976074219, "learning_rate": 9.952261509830893e-07, "loss": 0.1022, "step": 2460 }, { "epoch": 0.01998543571486366, "grad_norm": 1.5651336908340454, "learning_rate": 9.99271785743183e-07, "loss": 0.1261, "step": 2470 }, { "epoch": 0.02006634841006554, "grad_norm": 2.087550401687622, "learning_rate": 1.0033174205032772e-06, "loss": 0.1151, "step": 2480 }, { "epoch": 0.020147261105267416, "grad_norm": 2.3227386474609375, "learning_rate": 1.007363055263371e-06, "loss": 0.1188, "step": 2490 }, { "epoch": 0.020228173800469294, "grad_norm": 2.7237865924835205, "learning_rate": 1.0114086900234647e-06, "loss": 0.0924, "step": 2500 }, { "epoch": 0.02030908649567117, "grad_norm": 2.5800235271453857, "learning_rate": 1.0154543247835586e-06, "loss": 0.1051, "step": 2510 }, { "epoch": 0.02038999919087305, "grad_norm": 2.6274607181549072, "learning_rate": 1.0194999595436525e-06, "loss": 0.095, "step": 2520 }, { "epoch": 0.020470911886074927, "grad_norm": 4.072608470916748, "learning_rate": 1.0235455943037463e-06, "loss": 0.1219, "step": 2530 }, { "epoch": 0.0205518245812768, "grad_norm": 2.42045521736145, "learning_rate": 1.0275912290638402e-06, "loss": 0.1342, "step": 2540 }, { "epoch": 0.02063273727647868, "grad_norm": 2.7406668663024902, "learning_rate": 1.0316368638239341e-06, "loss": 0.0706, "step": 2550 }, { "epoch": 0.020713649971680556, "grad_norm": 3.373342990875244, "learning_rate": 1.0356824985840279e-06, "loss": 0.1417, "step": 2560 }, { "epoch": 0.020794562666882434, "grad_norm": 2.4951424598693848, "learning_rate": 1.0397281333441218e-06, "loss": 0.1019, "step": 2570 }, { "epoch": 0.02087547536208431, "grad_norm": 3.4431533813476562, "learning_rate": 1.0437737681042157e-06, "loss": 0.1164, "step": 2580 }, { "epoch": 0.02095638805728619, "grad_norm": 1.47208833694458, "learning_rate": 1.0478194028643095e-06, "loss": 0.0891, "step": 2590 }, { "epoch": 0.021037300752488067, "grad_norm": 1.9703980684280396, "learning_rate": 1.0518650376244034e-06, "loss": 0.1321, "step": 2600 }, { "epoch": 0.02111821344768994, "grad_norm": 2.2634358406066895, "learning_rate": 1.0559106723844971e-06, "loss": 0.1347, "step": 2610 }, { "epoch": 0.02119912614289182, "grad_norm": 1.9372203350067139, "learning_rate": 1.059956307144591e-06, "loss": 0.1359, "step": 2620 }, { "epoch": 0.021280038838093696, "grad_norm": 2.6169662475585938, "learning_rate": 1.0640019419046848e-06, "loss": 0.1128, "step": 2630 }, { "epoch": 0.021360951533295574, "grad_norm": 3.2565925121307373, "learning_rate": 1.0680475766647788e-06, "loss": 0.1075, "step": 2640 }, { "epoch": 0.02144186422849745, "grad_norm": 3.0818445682525635, "learning_rate": 1.0720932114248727e-06, "loss": 0.0899, "step": 2650 }, { "epoch": 0.02152277692369933, "grad_norm": 2.728285551071167, "learning_rate": 1.0761388461849664e-06, "loss": 0.0932, "step": 2660 }, { "epoch": 0.021603689618901207, "grad_norm": 1.6560415029525757, "learning_rate": 1.0801844809450604e-06, "loss": 0.1222, "step": 2670 }, { "epoch": 0.02168460231410308, "grad_norm": 3.493453025817871, "learning_rate": 1.0842301157051543e-06, "loss": 0.1332, "step": 2680 }, { "epoch": 0.02176551500930496, "grad_norm": 3.281716823577881, "learning_rate": 1.088275750465248e-06, "loss": 0.1058, "step": 2690 }, { "epoch": 0.021846427704506836, "grad_norm": 2.234534502029419, "learning_rate": 1.092321385225342e-06, "loss": 0.1138, "step": 2700 }, { "epoch": 0.021927340399708714, "grad_norm": 2.5262930393218994, "learning_rate": 1.0963670199854357e-06, "loss": 0.0678, "step": 2710 }, { "epoch": 0.02200825309491059, "grad_norm": 3.2079269886016846, "learning_rate": 1.1004126547455296e-06, "loss": 0.117, "step": 2720 }, { "epoch": 0.02208916579011247, "grad_norm": 2.2148001194000244, "learning_rate": 1.1044582895056236e-06, "loss": 0.1195, "step": 2730 }, { "epoch": 0.022170078485314347, "grad_norm": 3.7867674827575684, "learning_rate": 1.1085039242657173e-06, "loss": 0.0814, "step": 2740 }, { "epoch": 0.022250991180516225, "grad_norm": 1.6040045022964478, "learning_rate": 1.1125495590258112e-06, "loss": 0.1035, "step": 2750 }, { "epoch": 0.0223319038757181, "grad_norm": 3.51934814453125, "learning_rate": 1.1165951937859052e-06, "loss": 0.0878, "step": 2760 }, { "epoch": 0.022412816570919977, "grad_norm": 2.493701696395874, "learning_rate": 1.120640828545999e-06, "loss": 0.1209, "step": 2770 }, { "epoch": 0.022493729266121854, "grad_norm": 2.607140302658081, "learning_rate": 1.1246864633060929e-06, "loss": 0.1217, "step": 2780 }, { "epoch": 0.022574641961323732, "grad_norm": 2.821559429168701, "learning_rate": 1.1287320980661868e-06, "loss": 0.0973, "step": 2790 }, { "epoch": 0.02265555465652561, "grad_norm": 2.6885173320770264, "learning_rate": 1.1327777328262805e-06, "loss": 0.1171, "step": 2800 }, { "epoch": 0.022736467351727487, "grad_norm": 1.9558929204940796, "learning_rate": 1.1368233675863743e-06, "loss": 0.1216, "step": 2810 }, { "epoch": 0.022817380046929365, "grad_norm": 2.351844549179077, "learning_rate": 1.1408690023464684e-06, "loss": 0.103, "step": 2820 }, { "epoch": 0.02289829274213124, "grad_norm": 2.9221320152282715, "learning_rate": 1.1449146371065621e-06, "loss": 0.1348, "step": 2830 }, { "epoch": 0.022979205437333117, "grad_norm": 2.9279119968414307, "learning_rate": 1.1489602718666559e-06, "loss": 0.0924, "step": 2840 }, { "epoch": 0.023060118132534994, "grad_norm": 2.6771202087402344, "learning_rate": 1.1530059066267498e-06, "loss": 0.0973, "step": 2850 }, { "epoch": 0.023141030827736872, "grad_norm": 2.938502788543701, "learning_rate": 1.1570515413868437e-06, "loss": 0.1013, "step": 2860 }, { "epoch": 0.02322194352293875, "grad_norm": 3.604884624481201, "learning_rate": 1.1610971761469375e-06, "loss": 0.1104, "step": 2870 }, { "epoch": 0.023302856218140627, "grad_norm": 3.327443838119507, "learning_rate": 1.1651428109070314e-06, "loss": 0.0981, "step": 2880 }, { "epoch": 0.023383768913342505, "grad_norm": 3.643439292907715, "learning_rate": 1.1691884456671253e-06, "loss": 0.082, "step": 2890 }, { "epoch": 0.02346468160854438, "grad_norm": 3.359574556350708, "learning_rate": 1.173234080427219e-06, "loss": 0.1185, "step": 2900 }, { "epoch": 0.023545594303746257, "grad_norm": 3.151677131652832, "learning_rate": 1.177279715187313e-06, "loss": 0.097, "step": 2910 }, { "epoch": 0.023626506998948135, "grad_norm": 2.507763385772705, "learning_rate": 1.181325349947407e-06, "loss": 0.1092, "step": 2920 }, { "epoch": 0.023707419694150012, "grad_norm": 2.290861129760742, "learning_rate": 1.1853709847075007e-06, "loss": 0.1287, "step": 2930 }, { "epoch": 0.02378833238935189, "grad_norm": 1.920516848564148, "learning_rate": 1.1894166194675946e-06, "loss": 0.0914, "step": 2940 }, { "epoch": 0.023869245084553768, "grad_norm": 2.6895906925201416, "learning_rate": 1.1934622542276884e-06, "loss": 0.0922, "step": 2950 }, { "epoch": 0.023950157779755645, "grad_norm": 3.5835037231445312, "learning_rate": 1.1975078889877823e-06, "loss": 0.0946, "step": 2960 }, { "epoch": 0.02403107047495752, "grad_norm": 2.3637020587921143, "learning_rate": 1.2015535237478762e-06, "loss": 0.1235, "step": 2970 }, { "epoch": 0.024111983170159397, "grad_norm": 1.9515297412872314, "learning_rate": 1.20559915850797e-06, "loss": 0.1121, "step": 2980 }, { "epoch": 0.024192895865361275, "grad_norm": 2.6730241775512695, "learning_rate": 1.209644793268064e-06, "loss": 0.1362, "step": 2990 }, { "epoch": 0.024273808560563152, "grad_norm": 2.58339786529541, "learning_rate": 1.2136904280281578e-06, "loss": 0.1137, "step": 3000 }, { "epoch": 0.02435472125576503, "grad_norm": 2.0539205074310303, "learning_rate": 1.2177360627882516e-06, "loss": 0.1146, "step": 3010 }, { "epoch": 0.024435633950966908, "grad_norm": 2.028289318084717, "learning_rate": 1.2217816975483455e-06, "loss": 0.1002, "step": 3020 }, { "epoch": 0.024516546646168785, "grad_norm": 1.5057803392410278, "learning_rate": 1.2258273323084392e-06, "loss": 0.0942, "step": 3030 }, { "epoch": 0.02459745934137066, "grad_norm": 1.64219331741333, "learning_rate": 1.2298729670685332e-06, "loss": 0.0766, "step": 3040 }, { "epoch": 0.024678372036572537, "grad_norm": 2.0195512771606445, "learning_rate": 1.233918601828627e-06, "loss": 0.1157, "step": 3050 }, { "epoch": 0.024759284731774415, "grad_norm": 1.990330696105957, "learning_rate": 1.2379642365887208e-06, "loss": 0.117, "step": 3060 }, { "epoch": 0.024840197426976292, "grad_norm": 4.218479156494141, "learning_rate": 1.2420098713488148e-06, "loss": 0.0938, "step": 3070 }, { "epoch": 0.02492111012217817, "grad_norm": 3.167558431625366, "learning_rate": 1.2460555061089085e-06, "loss": 0.1047, "step": 3080 }, { "epoch": 0.025002022817380048, "grad_norm": 2.4616050720214844, "learning_rate": 1.2501011408690025e-06, "loss": 0.0899, "step": 3090 }, { "epoch": 0.025082935512581925, "grad_norm": 2.0545642375946045, "learning_rate": 1.2541467756290962e-06, "loss": 0.1304, "step": 3100 }, { "epoch": 0.0251638482077838, "grad_norm": 2.684370756149292, "learning_rate": 1.2581924103891901e-06, "loss": 0.1194, "step": 3110 }, { "epoch": 0.025244760902985677, "grad_norm": 2.201157569885254, "learning_rate": 1.262238045149284e-06, "loss": 0.1218, "step": 3120 }, { "epoch": 0.025325673598187555, "grad_norm": 3.4129505157470703, "learning_rate": 1.2662836799093778e-06, "loss": 0.1016, "step": 3130 }, { "epoch": 0.025406586293389433, "grad_norm": 2.717299222946167, "learning_rate": 1.2703293146694717e-06, "loss": 0.118, "step": 3140 }, { "epoch": 0.02548749898859131, "grad_norm": 2.415850877761841, "learning_rate": 1.2743749494295657e-06, "loss": 0.1281, "step": 3150 }, { "epoch": 0.025568411683793188, "grad_norm": 2.0534651279449463, "learning_rate": 1.2784205841896594e-06, "loss": 0.0963, "step": 3160 }, { "epoch": 0.025649324378995066, "grad_norm": 2.466344118118286, "learning_rate": 1.2824662189497533e-06, "loss": 0.1323, "step": 3170 }, { "epoch": 0.02573023707419694, "grad_norm": 3.089982271194458, "learning_rate": 1.2865118537098473e-06, "loss": 0.1323, "step": 3180 }, { "epoch": 0.025811149769398817, "grad_norm": 2.6558570861816406, "learning_rate": 1.290557488469941e-06, "loss": 0.1002, "step": 3190 }, { "epoch": 0.025892062464600695, "grad_norm": 2.84468936920166, "learning_rate": 1.294603123230035e-06, "loss": 0.0976, "step": 3200 }, { "epoch": 0.025972975159802573, "grad_norm": 2.1415138244628906, "learning_rate": 1.2986487579901289e-06, "loss": 0.1125, "step": 3210 }, { "epoch": 0.02605388785500445, "grad_norm": 2.173358201980591, "learning_rate": 1.3026943927502226e-06, "loss": 0.122, "step": 3220 }, { "epoch": 0.026134800550206328, "grad_norm": 3.3644137382507324, "learning_rate": 1.3067400275103166e-06, "loss": 0.0941, "step": 3230 }, { "epoch": 0.026215713245408206, "grad_norm": 2.3094332218170166, "learning_rate": 1.3107856622704105e-06, "loss": 0.1004, "step": 3240 }, { "epoch": 0.026296625940610083, "grad_norm": 2.858825206756592, "learning_rate": 1.314831297030504e-06, "loss": 0.1261, "step": 3250 }, { "epoch": 0.026377538635811958, "grad_norm": 2.2050886154174805, "learning_rate": 1.3188769317905982e-06, "loss": 0.0987, "step": 3260 }, { "epoch": 0.026458451331013835, "grad_norm": 3.3504152297973633, "learning_rate": 1.322922566550692e-06, "loss": 0.1021, "step": 3270 }, { "epoch": 0.026539364026215713, "grad_norm": 3.107907772064209, "learning_rate": 1.3269682013107856e-06, "loss": 0.1101, "step": 3280 }, { "epoch": 0.02662027672141759, "grad_norm": 1.9401071071624756, "learning_rate": 1.3310138360708796e-06, "loss": 0.0931, "step": 3290 }, { "epoch": 0.026701189416619468, "grad_norm": 2.4990897178649902, "learning_rate": 1.3350594708309735e-06, "loss": 0.1048, "step": 3300 }, { "epoch": 0.026782102111821346, "grad_norm": 1.8261767625808716, "learning_rate": 1.3391051055910672e-06, "loss": 0.0817, "step": 3310 }, { "epoch": 0.026863014807023224, "grad_norm": 2.661374807357788, "learning_rate": 1.3431507403511612e-06, "loss": 0.0917, "step": 3320 }, { "epoch": 0.026943927502225098, "grad_norm": 2.4484410285949707, "learning_rate": 1.347196375111255e-06, "loss": 0.0719, "step": 3330 }, { "epoch": 0.027024840197426975, "grad_norm": 2.1319966316223145, "learning_rate": 1.3512420098713488e-06, "loss": 0.0914, "step": 3340 }, { "epoch": 0.027105752892628853, "grad_norm": 1.5471011400222778, "learning_rate": 1.3552876446314428e-06, "loss": 0.1051, "step": 3350 }, { "epoch": 0.02718666558783073, "grad_norm": 2.442431926727295, "learning_rate": 1.3593332793915367e-06, "loss": 0.1233, "step": 3360 }, { "epoch": 0.02726757828303261, "grad_norm": 1.9464102983474731, "learning_rate": 1.3633789141516304e-06, "loss": 0.1093, "step": 3370 }, { "epoch": 0.027348490978234486, "grad_norm": 2.925173044204712, "learning_rate": 1.3674245489117244e-06, "loss": 0.1374, "step": 3380 }, { "epoch": 0.027429403673436364, "grad_norm": 3.059738874435425, "learning_rate": 1.3714701836718183e-06, "loss": 0.0963, "step": 3390 }, { "epoch": 0.027510316368638238, "grad_norm": 2.613312005996704, "learning_rate": 1.375515818431912e-06, "loss": 0.0768, "step": 3400 }, { "epoch": 0.027591229063840116, "grad_norm": 1.4428672790527344, "learning_rate": 1.379561453192006e-06, "loss": 0.0597, "step": 3410 }, { "epoch": 0.027672141759041993, "grad_norm": 1.5350897312164307, "learning_rate": 1.3836070879521e-06, "loss": 0.0714, "step": 3420 }, { "epoch": 0.02775305445424387, "grad_norm": 1.5685943365097046, "learning_rate": 1.3876527227121937e-06, "loss": 0.0969, "step": 3430 }, { "epoch": 0.02783396714944575, "grad_norm": 2.002136707305908, "learning_rate": 1.3916983574722876e-06, "loss": 0.0911, "step": 3440 }, { "epoch": 0.027914879844647626, "grad_norm": 2.340088367462158, "learning_rate": 1.3957439922323815e-06, "loss": 0.1026, "step": 3450 }, { "epoch": 0.027995792539849504, "grad_norm": 1.780013918876648, "learning_rate": 1.3997896269924753e-06, "loss": 0.0948, "step": 3460 }, { "epoch": 0.028076705235051378, "grad_norm": 1.5367995500564575, "learning_rate": 1.4038352617525692e-06, "loss": 0.0913, "step": 3470 }, { "epoch": 0.028157617930253256, "grad_norm": 1.775583267211914, "learning_rate": 1.4078808965126631e-06, "loss": 0.1113, "step": 3480 }, { "epoch": 0.028238530625455133, "grad_norm": 2.723604679107666, "learning_rate": 1.4119265312727567e-06, "loss": 0.0923, "step": 3490 }, { "epoch": 0.02831944332065701, "grad_norm": 2.005330801010132, "learning_rate": 1.4159721660328506e-06, "loss": 0.1104, "step": 3500 }, { "epoch": 0.02840035601585889, "grad_norm": 1.9476207494735718, "learning_rate": 1.4200178007929448e-06, "loss": 0.086, "step": 3510 }, { "epoch": 0.028481268711060766, "grad_norm": 2.1467254161834717, "learning_rate": 1.4240634355530383e-06, "loss": 0.1156, "step": 3520 }, { "epoch": 0.028562181406262644, "grad_norm": 2.4180707931518555, "learning_rate": 1.4281090703131322e-06, "loss": 0.1072, "step": 3530 }, { "epoch": 0.028643094101464518, "grad_norm": 1.8243309259414673, "learning_rate": 1.4321547050732261e-06, "loss": 0.1133, "step": 3540 }, { "epoch": 0.028724006796666396, "grad_norm": 2.221869707107544, "learning_rate": 1.4362003398333199e-06, "loss": 0.0834, "step": 3550 }, { "epoch": 0.028804919491868274, "grad_norm": 3.149139165878296, "learning_rate": 1.4402459745934138e-06, "loss": 0.1231, "step": 3560 }, { "epoch": 0.02888583218707015, "grad_norm": 2.4985530376434326, "learning_rate": 1.4442916093535078e-06, "loss": 0.1115, "step": 3570 }, { "epoch": 0.02896674488227203, "grad_norm": 1.7628146409988403, "learning_rate": 1.4483372441136015e-06, "loss": 0.0867, "step": 3580 }, { "epoch": 0.029047657577473907, "grad_norm": 2.2109556198120117, "learning_rate": 1.4523828788736954e-06, "loss": 0.0785, "step": 3590 }, { "epoch": 0.029128570272675784, "grad_norm": 2.8331551551818848, "learning_rate": 1.4564285136337894e-06, "loss": 0.0927, "step": 3600 }, { "epoch": 0.02920948296787766, "grad_norm": 2.205512285232544, "learning_rate": 1.460474148393883e-06, "loss": 0.0883, "step": 3610 }, { "epoch": 0.029290395663079536, "grad_norm": 2.8296239376068115, "learning_rate": 1.464519783153977e-06, "loss": 0.0869, "step": 3620 }, { "epoch": 0.029371308358281414, "grad_norm": 2.2754456996917725, "learning_rate": 1.4685654179140708e-06, "loss": 0.1002, "step": 3630 }, { "epoch": 0.02945222105348329, "grad_norm": 2.0829622745513916, "learning_rate": 1.4726110526741647e-06, "loss": 0.0884, "step": 3640 }, { "epoch": 0.02953313374868517, "grad_norm": 2.4689652919769287, "learning_rate": 1.4766566874342586e-06, "loss": 0.1036, "step": 3650 }, { "epoch": 0.029614046443887047, "grad_norm": 2.543543815612793, "learning_rate": 1.4807023221943524e-06, "loss": 0.1086, "step": 3660 }, { "epoch": 0.029694959139088924, "grad_norm": 2.166330337524414, "learning_rate": 1.4847479569544463e-06, "loss": 0.0907, "step": 3670 }, { "epoch": 0.0297758718342908, "grad_norm": 2.7840383052825928, "learning_rate": 1.4887935917145402e-06, "loss": 0.0793, "step": 3680 }, { "epoch": 0.029856784529492676, "grad_norm": 2.573249101638794, "learning_rate": 1.4928392264746338e-06, "loss": 0.1005, "step": 3690 }, { "epoch": 0.029937697224694554, "grad_norm": 1.9684895277023315, "learning_rate": 1.496884861234728e-06, "loss": 0.0825, "step": 3700 }, { "epoch": 0.03001860991989643, "grad_norm": 1.707645297050476, "learning_rate": 1.5009304959948219e-06, "loss": 0.1187, "step": 3710 }, { "epoch": 0.03009952261509831, "grad_norm": 2.4064958095550537, "learning_rate": 1.5049761307549154e-06, "loss": 0.089, "step": 3720 }, { "epoch": 0.030180435310300187, "grad_norm": 3.170875310897827, "learning_rate": 1.5090217655150093e-06, "loss": 0.106, "step": 3730 }, { "epoch": 0.030261348005502064, "grad_norm": 1.556012749671936, "learning_rate": 1.5130674002751033e-06, "loss": 0.082, "step": 3740 }, { "epoch": 0.030342260700703942, "grad_norm": 1.9481234550476074, "learning_rate": 1.517113035035197e-06, "loss": 0.0772, "step": 3750 }, { "epoch": 0.030423173395905816, "grad_norm": 2.334333896636963, "learning_rate": 1.521158669795291e-06, "loss": 0.1073, "step": 3760 }, { "epoch": 0.030504086091107694, "grad_norm": 2.180539131164551, "learning_rate": 1.5252043045553849e-06, "loss": 0.0887, "step": 3770 }, { "epoch": 0.03058499878630957, "grad_norm": 1.4400229454040527, "learning_rate": 1.5292499393154786e-06, "loss": 0.1287, "step": 3780 }, { "epoch": 0.03066591148151145, "grad_norm": 1.3089052438735962, "learning_rate": 1.5332955740755725e-06, "loss": 0.0956, "step": 3790 }, { "epoch": 0.030746824176713327, "grad_norm": 2.201781749725342, "learning_rate": 1.5373412088356665e-06, "loss": 0.0987, "step": 3800 }, { "epoch": 0.030827736871915205, "grad_norm": 2.40985107421875, "learning_rate": 1.5413868435957602e-06, "loss": 0.0873, "step": 3810 }, { "epoch": 0.030908649567117082, "grad_norm": 2.4087111949920654, "learning_rate": 1.5454324783558541e-06, "loss": 0.0941, "step": 3820 }, { "epoch": 0.030989562262318956, "grad_norm": 1.6027379035949707, "learning_rate": 1.549478113115948e-06, "loss": 0.0878, "step": 3830 }, { "epoch": 0.031070474957520834, "grad_norm": 1.5681816339492798, "learning_rate": 1.5535237478760418e-06, "loss": 0.0955, "step": 3840 }, { "epoch": 0.031151387652722712, "grad_norm": 2.1689372062683105, "learning_rate": 1.5575693826361357e-06, "loss": 0.1019, "step": 3850 }, { "epoch": 0.03123230034792459, "grad_norm": 2.9758102893829346, "learning_rate": 1.5616150173962297e-06, "loss": 0.0766, "step": 3860 }, { "epoch": 0.03131321304312647, "grad_norm": 2.3904402256011963, "learning_rate": 1.5656606521563234e-06, "loss": 0.0838, "step": 3870 }, { "epoch": 0.031394125738328345, "grad_norm": 2.284153699874878, "learning_rate": 1.5697062869164174e-06, "loss": 0.0921, "step": 3880 }, { "epoch": 0.03147503843353022, "grad_norm": 2.1854376792907715, "learning_rate": 1.5737519216765113e-06, "loss": 0.1102, "step": 3890 }, { "epoch": 0.0315559511287321, "grad_norm": 3.6501994132995605, "learning_rate": 1.577797556436605e-06, "loss": 0.1066, "step": 3900 }, { "epoch": 0.03163686382393398, "grad_norm": 2.6116342544555664, "learning_rate": 1.581843191196699e-06, "loss": 0.0876, "step": 3910 }, { "epoch": 0.031717776519135855, "grad_norm": 1.9641289710998535, "learning_rate": 1.585888825956793e-06, "loss": 0.0986, "step": 3920 }, { "epoch": 0.031798689214337726, "grad_norm": 1.9068633317947388, "learning_rate": 1.5899344607168864e-06, "loss": 0.0734, "step": 3930 }, { "epoch": 0.031879601909539604, "grad_norm": 2.822996139526367, "learning_rate": 1.5939800954769804e-06, "loss": 0.1061, "step": 3940 }, { "epoch": 0.03196051460474148, "grad_norm": 2.37636399269104, "learning_rate": 1.5980257302370745e-06, "loss": 0.0998, "step": 3950 }, { "epoch": 0.03204142729994336, "grad_norm": 2.8380579948425293, "learning_rate": 1.602071364997168e-06, "loss": 0.0707, "step": 3960 }, { "epoch": 0.03212233999514524, "grad_norm": 1.7976980209350586, "learning_rate": 1.606116999757262e-06, "loss": 0.1143, "step": 3970 }, { "epoch": 0.032203252690347114, "grad_norm": 3.567847967147827, "learning_rate": 1.610162634517356e-06, "loss": 0.1039, "step": 3980 }, { "epoch": 0.03228416538554899, "grad_norm": 2.2964231967926025, "learning_rate": 1.6142082692774496e-06, "loss": 0.1124, "step": 3990 }, { "epoch": 0.03236507808075087, "grad_norm": 3.089376211166382, "learning_rate": 1.6182539040375436e-06, "loss": 0.119, "step": 4000 }, { "epoch": 0.03244599077595275, "grad_norm": 2.7086360454559326, "learning_rate": 1.6222995387976375e-06, "loss": 0.0939, "step": 4010 }, { "epoch": 0.032526903471154625, "grad_norm": 1.8448307514190674, "learning_rate": 1.6263451735577312e-06, "loss": 0.0904, "step": 4020 }, { "epoch": 0.0326078161663565, "grad_norm": 1.5715686082839966, "learning_rate": 1.6303908083178252e-06, "loss": 0.075, "step": 4030 }, { "epoch": 0.03268872886155838, "grad_norm": 2.9664101600646973, "learning_rate": 1.6344364430779191e-06, "loss": 0.0967, "step": 4040 }, { "epoch": 0.03276964155676026, "grad_norm": 2.6410775184631348, "learning_rate": 1.6384820778380128e-06, "loss": 0.0896, "step": 4050 }, { "epoch": 0.032850554251962136, "grad_norm": 1.8643900156021118, "learning_rate": 1.6425277125981068e-06, "loss": 0.0953, "step": 4060 }, { "epoch": 0.03293146694716401, "grad_norm": 2.611412763595581, "learning_rate": 1.6465733473582007e-06, "loss": 0.0814, "step": 4070 }, { "epoch": 0.033012379642365884, "grad_norm": 2.078660488128662, "learning_rate": 1.6506189821182945e-06, "loss": 0.0934, "step": 4080 }, { "epoch": 0.03309329233756776, "grad_norm": 1.5390934944152832, "learning_rate": 1.6546646168783884e-06, "loss": 0.0841, "step": 4090 }, { "epoch": 0.03317420503276964, "grad_norm": 2.1987223625183105, "learning_rate": 1.6587102516384823e-06, "loss": 0.095, "step": 4100 }, { "epoch": 0.03325511772797152, "grad_norm": 1.9249314069747925, "learning_rate": 1.662755886398576e-06, "loss": 0.1203, "step": 4110 }, { "epoch": 0.033336030423173395, "grad_norm": 2.143361806869507, "learning_rate": 1.66680152115867e-06, "loss": 0.0992, "step": 4120 }, { "epoch": 0.03341694311837527, "grad_norm": 1.343994379043579, "learning_rate": 1.670847155918764e-06, "loss": 0.0777, "step": 4130 }, { "epoch": 0.03349785581357715, "grad_norm": 2.65529203414917, "learning_rate": 1.6748927906788577e-06, "loss": 0.0979, "step": 4140 }, { "epoch": 0.03357876850877903, "grad_norm": 3.209259271621704, "learning_rate": 1.6789384254389516e-06, "loss": 0.1084, "step": 4150 }, { "epoch": 0.033659681203980905, "grad_norm": 2.1840240955352783, "learning_rate": 1.6829840601990456e-06, "loss": 0.0726, "step": 4160 }, { "epoch": 0.03374059389918278, "grad_norm": 1.7153671979904175, "learning_rate": 1.687029694959139e-06, "loss": 0.1013, "step": 4170 }, { "epoch": 0.03382150659438466, "grad_norm": 2.5526821613311768, "learning_rate": 1.691075329719233e-06, "loss": 0.0951, "step": 4180 }, { "epoch": 0.03390241928958654, "grad_norm": 3.7276298999786377, "learning_rate": 1.6951209644793272e-06, "loss": 0.0887, "step": 4190 }, { "epoch": 0.033983331984788416, "grad_norm": 2.6721341609954834, "learning_rate": 1.6991665992394207e-06, "loss": 0.0897, "step": 4200 }, { "epoch": 0.034064244679990294, "grad_norm": 2.9820477962493896, "learning_rate": 1.7032122339995146e-06, "loss": 0.094, "step": 4210 }, { "epoch": 0.034145157375192164, "grad_norm": 1.5900006294250488, "learning_rate": 1.7072578687596086e-06, "loss": 0.0784, "step": 4220 }, { "epoch": 0.03422607007039404, "grad_norm": 2.7198643684387207, "learning_rate": 1.7113035035197023e-06, "loss": 0.1028, "step": 4230 }, { "epoch": 0.03430698276559592, "grad_norm": 2.2055790424346924, "learning_rate": 1.7153491382797962e-06, "loss": 0.1039, "step": 4240 }, { "epoch": 0.0343878954607978, "grad_norm": 1.8763900995254517, "learning_rate": 1.7193947730398902e-06, "loss": 0.1155, "step": 4250 }, { "epoch": 0.034468808155999675, "grad_norm": 1.893835186958313, "learning_rate": 1.7234404077999839e-06, "loss": 0.1045, "step": 4260 }, { "epoch": 0.03454972085120155, "grad_norm": 1.6340516805648804, "learning_rate": 1.7274860425600778e-06, "loss": 0.0851, "step": 4270 }, { "epoch": 0.03463063354640343, "grad_norm": 1.8460265398025513, "learning_rate": 1.7315316773201718e-06, "loss": 0.0875, "step": 4280 }, { "epoch": 0.03471154624160531, "grad_norm": 2.0805251598358154, "learning_rate": 1.7355773120802655e-06, "loss": 0.0801, "step": 4290 }, { "epoch": 0.034792458936807186, "grad_norm": 1.2786165475845337, "learning_rate": 1.7396229468403594e-06, "loss": 0.0827, "step": 4300 }, { "epoch": 0.03487337163200906, "grad_norm": 2.439070224761963, "learning_rate": 1.7436685816004534e-06, "loss": 0.0892, "step": 4310 }, { "epoch": 0.03495428432721094, "grad_norm": 2.3607852458953857, "learning_rate": 1.7477142163605471e-06, "loss": 0.0971, "step": 4320 }, { "epoch": 0.03503519702241282, "grad_norm": 1.4721710681915283, "learning_rate": 1.751759851120641e-06, "loss": 0.0651, "step": 4330 }, { "epoch": 0.035116109717614696, "grad_norm": 2.26054310798645, "learning_rate": 1.755805485880735e-06, "loss": 0.1073, "step": 4340 }, { "epoch": 0.035197022412816574, "grad_norm": 1.944057822227478, "learning_rate": 1.7598511206408287e-06, "loss": 0.1171, "step": 4350 }, { "epoch": 0.035277935108018445, "grad_norm": 1.3343956470489502, "learning_rate": 1.7638967554009227e-06, "loss": 0.0993, "step": 4360 }, { "epoch": 0.03535884780322032, "grad_norm": 2.835983991622925, "learning_rate": 1.7679423901610166e-06, "loss": 0.09, "step": 4370 }, { "epoch": 0.0354397604984222, "grad_norm": 1.2436025142669678, "learning_rate": 1.7719880249211101e-06, "loss": 0.0896, "step": 4380 }, { "epoch": 0.03552067319362408, "grad_norm": 1.873132586479187, "learning_rate": 1.7760336596812043e-06, "loss": 0.0702, "step": 4390 }, { "epoch": 0.035601585888825955, "grad_norm": 2.867314577102661, "learning_rate": 1.7800792944412982e-06, "loss": 0.104, "step": 4400 }, { "epoch": 0.03568249858402783, "grad_norm": 1.9751499891281128, "learning_rate": 1.7841249292013917e-06, "loss": 0.1067, "step": 4410 }, { "epoch": 0.03576341127922971, "grad_norm": 2.4397547245025635, "learning_rate": 1.7881705639614857e-06, "loss": 0.0957, "step": 4420 }, { "epoch": 0.03584432397443159, "grad_norm": 2.264174461364746, "learning_rate": 1.7922161987215794e-06, "loss": 0.0881, "step": 4430 }, { "epoch": 0.035925236669633466, "grad_norm": 1.8422114849090576, "learning_rate": 1.7962618334816733e-06, "loss": 0.0872, "step": 4440 }, { "epoch": 0.036006149364835344, "grad_norm": 1.3862531185150146, "learning_rate": 1.8003074682417673e-06, "loss": 0.0846, "step": 4450 }, { "epoch": 0.03608706206003722, "grad_norm": 3.6037471294403076, "learning_rate": 1.804353103001861e-06, "loss": 0.0792, "step": 4460 }, { "epoch": 0.0361679747552391, "grad_norm": 1.714288353919983, "learning_rate": 1.808398737761955e-06, "loss": 0.0843, "step": 4470 }, { "epoch": 0.03624888745044098, "grad_norm": 1.9737452268600464, "learning_rate": 1.8124443725220489e-06, "loss": 0.0972, "step": 4480 }, { "epoch": 0.036329800145642854, "grad_norm": 1.1709117889404297, "learning_rate": 1.8164900072821426e-06, "loss": 0.0899, "step": 4490 }, { "epoch": 0.03641071284084473, "grad_norm": 1.7360717058181763, "learning_rate": 1.8205356420422365e-06, "loss": 0.0881, "step": 4500 }, { "epoch": 0.0364916255360466, "grad_norm": 2.1651084423065186, "learning_rate": 1.8245812768023305e-06, "loss": 0.0883, "step": 4510 }, { "epoch": 0.03657253823124848, "grad_norm": 2.229262113571167, "learning_rate": 1.8286269115624242e-06, "loss": 0.1011, "step": 4520 }, { "epoch": 0.03665345092645036, "grad_norm": 1.9758538007736206, "learning_rate": 1.8326725463225182e-06, "loss": 0.0961, "step": 4530 }, { "epoch": 0.036734363621652236, "grad_norm": 2.0817179679870605, "learning_rate": 1.836718181082612e-06, "loss": 0.0985, "step": 4540 }, { "epoch": 0.03681527631685411, "grad_norm": 2.517457962036133, "learning_rate": 1.8407638158427058e-06, "loss": 0.0661, "step": 4550 }, { "epoch": 0.03689618901205599, "grad_norm": 1.5627325773239136, "learning_rate": 1.8448094506027998e-06, "loss": 0.0849, "step": 4560 }, { "epoch": 0.03697710170725787, "grad_norm": 1.9463926553726196, "learning_rate": 1.8488550853628937e-06, "loss": 0.0922, "step": 4570 }, { "epoch": 0.037058014402459746, "grad_norm": 3.564232349395752, "learning_rate": 1.8529007201229874e-06, "loss": 0.0908, "step": 4580 }, { "epoch": 0.037138927097661624, "grad_norm": 2.601978063583374, "learning_rate": 1.8569463548830814e-06, "loss": 0.0864, "step": 4590 }, { "epoch": 0.0372198397928635, "grad_norm": 7.5315070152282715, "learning_rate": 1.8609919896431753e-06, "loss": 0.0845, "step": 4600 }, { "epoch": 0.03730075248806538, "grad_norm": 1.932097315788269, "learning_rate": 1.8650376244032688e-06, "loss": 0.0767, "step": 4610 }, { "epoch": 0.03738166518326726, "grad_norm": 1.7416924238204956, "learning_rate": 1.8690832591633628e-06, "loss": 0.081, "step": 4620 }, { "epoch": 0.037462577878469135, "grad_norm": 2.581759452819824, "learning_rate": 1.873128893923457e-06, "loss": 0.0641, "step": 4630 }, { "epoch": 0.03754349057367101, "grad_norm": 2.2743217945098877, "learning_rate": 1.8771745286835504e-06, "loss": 0.1064, "step": 4640 }, { "epoch": 0.03762440326887288, "grad_norm": 2.665893077850342, "learning_rate": 1.8812201634436444e-06, "loss": 0.0879, "step": 4650 }, { "epoch": 0.03770531596407476, "grad_norm": 1.5558580160140991, "learning_rate": 1.8852657982037383e-06, "loss": 0.0918, "step": 4660 }, { "epoch": 0.03778622865927664, "grad_norm": 1.7111514806747437, "learning_rate": 1.889311432963832e-06, "loss": 0.0999, "step": 4670 }, { "epoch": 0.037867141354478516, "grad_norm": 2.210649013519287, "learning_rate": 1.893357067723926e-06, "loss": 0.0955, "step": 4680 }, { "epoch": 0.037948054049680394, "grad_norm": 2.582871913909912, "learning_rate": 1.89740270248402e-06, "loss": 0.0996, "step": 4690 }, { "epoch": 0.03802896674488227, "grad_norm": 1.026922345161438, "learning_rate": 1.9014483372441137e-06, "loss": 0.0714, "step": 4700 }, { "epoch": 0.03810987944008415, "grad_norm": 3.6100451946258545, "learning_rate": 1.9054939720042076e-06, "loss": 0.0875, "step": 4710 }, { "epoch": 0.03819079213528603, "grad_norm": 1.962062120437622, "learning_rate": 1.9095396067643015e-06, "loss": 0.0903, "step": 4720 }, { "epoch": 0.038271704830487904, "grad_norm": 1.3070719242095947, "learning_rate": 1.913585241524395e-06, "loss": 0.0901, "step": 4730 }, { "epoch": 0.03835261752568978, "grad_norm": 1.3596700429916382, "learning_rate": 1.917630876284489e-06, "loss": 0.0776, "step": 4740 }, { "epoch": 0.03843353022089166, "grad_norm": 1.893821120262146, "learning_rate": 1.921676511044583e-06, "loss": 0.1096, "step": 4750 }, { "epoch": 0.03851444291609354, "grad_norm": 2.6807332038879395, "learning_rate": 1.925722145804677e-06, "loss": 0.121, "step": 4760 }, { "epoch": 0.038595355611295415, "grad_norm": 2.099287986755371, "learning_rate": 1.929767780564771e-06, "loss": 0.0916, "step": 4770 }, { "epoch": 0.03867626830649729, "grad_norm": 1.6524566411972046, "learning_rate": 1.9338134153248647e-06, "loss": 0.0976, "step": 4780 }, { "epoch": 0.03875718100169916, "grad_norm": 2.1935033798217773, "learning_rate": 1.9378590500849583e-06, "loss": 0.0904, "step": 4790 }, { "epoch": 0.03883809369690104, "grad_norm": 2.518470525741577, "learning_rate": 1.941904684845052e-06, "loss": 0.1012, "step": 4800 }, { "epoch": 0.03891900639210292, "grad_norm": 1.3983052968978882, "learning_rate": 1.945950319605146e-06, "loss": 0.0869, "step": 4810 }, { "epoch": 0.038999919087304796, "grad_norm": 1.6885274648666382, "learning_rate": 1.94999595436524e-06, "loss": 0.0776, "step": 4820 }, { "epoch": 0.039080831782506674, "grad_norm": 2.4496963024139404, "learning_rate": 1.954041589125334e-06, "loss": 0.0802, "step": 4830 }, { "epoch": 0.03916174447770855, "grad_norm": 1.736063838005066, "learning_rate": 1.958087223885428e-06, "loss": 0.0894, "step": 4840 }, { "epoch": 0.03924265717291043, "grad_norm": 1.8685420751571655, "learning_rate": 1.9621328586455215e-06, "loss": 0.0998, "step": 4850 }, { "epoch": 0.03932356986811231, "grad_norm": 1.3981101512908936, "learning_rate": 1.9661784934056154e-06, "loss": 0.0692, "step": 4860 }, { "epoch": 0.039404482563314185, "grad_norm": 1.5764601230621338, "learning_rate": 1.9702241281657094e-06, "loss": 0.0948, "step": 4870 }, { "epoch": 0.03948539525851606, "grad_norm": 2.2534000873565674, "learning_rate": 1.9742697629258033e-06, "loss": 0.0827, "step": 4880 }, { "epoch": 0.03956630795371794, "grad_norm": 1.7004269361495972, "learning_rate": 1.9783153976858972e-06, "loss": 0.0802, "step": 4890 }, { "epoch": 0.03964722064891982, "grad_norm": 1.7765733003616333, "learning_rate": 1.982361032445991e-06, "loss": 0.0646, "step": 4900 }, { "epoch": 0.039728133344121695, "grad_norm": 1.7036375999450684, "learning_rate": 1.9864066672060847e-06, "loss": 0.0885, "step": 4910 }, { "epoch": 0.03980904603932357, "grad_norm": 1.6193289756774902, "learning_rate": 1.9904523019661786e-06, "loss": 0.0906, "step": 4920 }, { "epoch": 0.039889958734525444, "grad_norm": 2.0269458293914795, "learning_rate": 1.9944979367262726e-06, "loss": 0.0554, "step": 4930 }, { "epoch": 0.03997087142972732, "grad_norm": 1.8189992904663086, "learning_rate": 1.998543571486366e-06, "loss": 0.0866, "step": 4940 }, { "epoch": 0.0400517841249292, "grad_norm": 1.349178433418274, "learning_rate": 2.0025892062464605e-06, "loss": 0.0791, "step": 4950 }, { "epoch": 0.04013269682013108, "grad_norm": 2.4950034618377686, "learning_rate": 2.0066348410065544e-06, "loss": 0.0828, "step": 4960 }, { "epoch": 0.040213609515332954, "grad_norm": 1.8050974607467651, "learning_rate": 2.010680475766648e-06, "loss": 0.0847, "step": 4970 }, { "epoch": 0.04029452221053483, "grad_norm": 1.9281790256500244, "learning_rate": 2.014726110526742e-06, "loss": 0.0954, "step": 4980 }, { "epoch": 0.04037543490573671, "grad_norm": 2.454810619354248, "learning_rate": 2.018771745286836e-06, "loss": 0.1035, "step": 4990 }, { "epoch": 0.04045634760093859, "grad_norm": 2.8772618770599365, "learning_rate": 2.0228173800469293e-06, "loss": 0.0978, "step": 5000 }, { "epoch": 0.040537260296140465, "grad_norm": 1.8899805545806885, "learning_rate": 2.0268630148070232e-06, "loss": 0.0798, "step": 5010 }, { "epoch": 0.04061817299134234, "grad_norm": 2.6849558353424072, "learning_rate": 2.030908649567117e-06, "loss": 0.1008, "step": 5020 }, { "epoch": 0.04069908568654422, "grad_norm": 2.370584011077881, "learning_rate": 2.034954284327211e-06, "loss": 0.0801, "step": 5030 }, { "epoch": 0.0407799983817461, "grad_norm": 2.945876121520996, "learning_rate": 2.038999919087305e-06, "loss": 0.091, "step": 5040 }, { "epoch": 0.040860911076947976, "grad_norm": 1.9871400594711304, "learning_rate": 2.043045553847399e-06, "loss": 0.0736, "step": 5050 }, { "epoch": 0.04094182377214985, "grad_norm": 1.546486735343933, "learning_rate": 2.0470911886074925e-06, "loss": 0.078, "step": 5060 }, { "epoch": 0.04102273646735173, "grad_norm": 1.686281442642212, "learning_rate": 2.0511368233675865e-06, "loss": 0.083, "step": 5070 }, { "epoch": 0.0411036491625536, "grad_norm": 1.9640003442764282, "learning_rate": 2.0551824581276804e-06, "loss": 0.0918, "step": 5080 }, { "epoch": 0.04118456185775548, "grad_norm": 5.0208330154418945, "learning_rate": 2.0592280928877743e-06, "loss": 0.1068, "step": 5090 }, { "epoch": 0.04126547455295736, "grad_norm": 1.9928936958312988, "learning_rate": 2.0632737276478683e-06, "loss": 0.0975, "step": 5100 }, { "epoch": 0.041346387248159235, "grad_norm": 1.2125608921051025, "learning_rate": 2.0673193624079622e-06, "loss": 0.0876, "step": 5110 }, { "epoch": 0.04142729994336111, "grad_norm": 2.4064817428588867, "learning_rate": 2.0713649971680557e-06, "loss": 0.0887, "step": 5120 }, { "epoch": 0.04150821263856299, "grad_norm": 1.5048969984054565, "learning_rate": 2.0754106319281497e-06, "loss": 0.0736, "step": 5130 }, { "epoch": 0.04158912533376487, "grad_norm": 2.8116302490234375, "learning_rate": 2.0794562666882436e-06, "loss": 0.0975, "step": 5140 }, { "epoch": 0.041670038028966745, "grad_norm": 1.3595644235610962, "learning_rate": 2.0835019014483376e-06, "loss": 0.0756, "step": 5150 }, { "epoch": 0.04175095072416862, "grad_norm": 1.721056580543518, "learning_rate": 2.0875475362084315e-06, "loss": 0.0657, "step": 5160 }, { "epoch": 0.0418318634193705, "grad_norm": 2.142266273498535, "learning_rate": 2.0915931709685254e-06, "loss": 0.0951, "step": 5170 }, { "epoch": 0.04191277611457238, "grad_norm": 1.7527512311935425, "learning_rate": 2.095638805728619e-06, "loss": 0.0973, "step": 5180 }, { "epoch": 0.041993688809774256, "grad_norm": 1.6032062768936157, "learning_rate": 2.099684440488713e-06, "loss": 0.0921, "step": 5190 }, { "epoch": 0.042074601504976133, "grad_norm": 1.589940071105957, "learning_rate": 2.103730075248807e-06, "loss": 0.1016, "step": 5200 }, { "epoch": 0.04215551420017801, "grad_norm": 2.023250102996826, "learning_rate": 2.1077757100089004e-06, "loss": 0.0689, "step": 5210 }, { "epoch": 0.04223642689537988, "grad_norm": 2.2335851192474365, "learning_rate": 2.1118213447689943e-06, "loss": 0.0908, "step": 5220 }, { "epoch": 0.04231733959058176, "grad_norm": 1.980453610420227, "learning_rate": 2.1158669795290882e-06, "loss": 0.0783, "step": 5230 }, { "epoch": 0.04239825228578364, "grad_norm": 1.8922892808914185, "learning_rate": 2.119912614289182e-06, "loss": 0.087, "step": 5240 }, { "epoch": 0.042479164980985515, "grad_norm": 2.238924264907837, "learning_rate": 2.123958249049276e-06, "loss": 0.0893, "step": 5250 }, { "epoch": 0.04256007767618739, "grad_norm": 2.1587541103363037, "learning_rate": 2.1280038838093696e-06, "loss": 0.0996, "step": 5260 }, { "epoch": 0.04264099037138927, "grad_norm": 2.5724098682403564, "learning_rate": 2.1320495185694636e-06, "loss": 0.0995, "step": 5270 }, { "epoch": 0.04272190306659115, "grad_norm": 1.6299608945846558, "learning_rate": 2.1360951533295575e-06, "loss": 0.1221, "step": 5280 }, { "epoch": 0.042802815761793025, "grad_norm": 1.2288849353790283, "learning_rate": 2.1401407880896514e-06, "loss": 0.0659, "step": 5290 }, { "epoch": 0.0428837284569949, "grad_norm": 1.4219882488250732, "learning_rate": 2.1441864228497454e-06, "loss": 0.0808, "step": 5300 }, { "epoch": 0.04296464115219678, "grad_norm": 2.3605310916900635, "learning_rate": 2.1482320576098393e-06, "loss": 0.0973, "step": 5310 }, { "epoch": 0.04304555384739866, "grad_norm": 2.4384772777557373, "learning_rate": 2.152277692369933e-06, "loss": 0.0942, "step": 5320 }, { "epoch": 0.043126466542600536, "grad_norm": 1.6062020063400269, "learning_rate": 2.1563233271300268e-06, "loss": 0.0775, "step": 5330 }, { "epoch": 0.043207379237802414, "grad_norm": 1.94057297706604, "learning_rate": 2.1603689618901207e-06, "loss": 0.073, "step": 5340 }, { "epoch": 0.04328829193300429, "grad_norm": 2.1561975479125977, "learning_rate": 2.1644145966502147e-06, "loss": 0.0967, "step": 5350 }, { "epoch": 0.04336920462820616, "grad_norm": 2.209407091140747, "learning_rate": 2.1684602314103086e-06, "loss": 0.0929, "step": 5360 }, { "epoch": 0.04345011732340804, "grad_norm": 2.353501558303833, "learning_rate": 2.1725058661704025e-06, "loss": 0.0791, "step": 5370 }, { "epoch": 0.04353103001860992, "grad_norm": 1.910529613494873, "learning_rate": 2.176551500930496e-06, "loss": 0.1019, "step": 5380 }, { "epoch": 0.043611942713811795, "grad_norm": 1.7262619733810425, "learning_rate": 2.18059713569059e-06, "loss": 0.0904, "step": 5390 }, { "epoch": 0.04369285540901367, "grad_norm": 2.59426212310791, "learning_rate": 2.184642770450684e-06, "loss": 0.1291, "step": 5400 }, { "epoch": 0.04377376810421555, "grad_norm": 1.2860559225082397, "learning_rate": 2.1886884052107775e-06, "loss": 0.0835, "step": 5410 }, { "epoch": 0.04385468079941743, "grad_norm": 1.656887173652649, "learning_rate": 2.1927340399708714e-06, "loss": 0.076, "step": 5420 }, { "epoch": 0.043935593494619306, "grad_norm": 1.7857797145843506, "learning_rate": 2.1967796747309653e-06, "loss": 0.0898, "step": 5430 }, { "epoch": 0.04401650618982118, "grad_norm": 1.8772821426391602, "learning_rate": 2.2008253094910593e-06, "loss": 0.0796, "step": 5440 }, { "epoch": 0.04409741888502306, "grad_norm": 1.6047663688659668, "learning_rate": 2.2048709442511532e-06, "loss": 0.0656, "step": 5450 }, { "epoch": 0.04417833158022494, "grad_norm": 1.8985008001327515, "learning_rate": 2.208916579011247e-06, "loss": 0.0774, "step": 5460 }, { "epoch": 0.044259244275426816, "grad_norm": 2.1074986457824707, "learning_rate": 2.2129622137713407e-06, "loss": 0.0814, "step": 5470 }, { "epoch": 0.044340156970628694, "grad_norm": 1.5919344425201416, "learning_rate": 2.2170078485314346e-06, "loss": 0.0759, "step": 5480 }, { "epoch": 0.04442106966583057, "grad_norm": 1.1153978109359741, "learning_rate": 2.2210534832915286e-06, "loss": 0.0907, "step": 5490 }, { "epoch": 0.04450198236103245, "grad_norm": 1.6785190105438232, "learning_rate": 2.2250991180516225e-06, "loss": 0.0772, "step": 5500 }, { "epoch": 0.04458289505623432, "grad_norm": 2.7185962200164795, "learning_rate": 2.2291447528117164e-06, "loss": 0.0699, "step": 5510 }, { "epoch": 0.0446638077514362, "grad_norm": 2.3474621772766113, "learning_rate": 2.2331903875718104e-06, "loss": 0.0773, "step": 5520 }, { "epoch": 0.044744720446638075, "grad_norm": 2.1826164722442627, "learning_rate": 2.237236022331904e-06, "loss": 0.0781, "step": 5530 }, { "epoch": 0.04482563314183995, "grad_norm": 2.0049595832824707, "learning_rate": 2.241281657091998e-06, "loss": 0.0687, "step": 5540 }, { "epoch": 0.04490654583704183, "grad_norm": 1.1843128204345703, "learning_rate": 2.2453272918520918e-06, "loss": 0.0608, "step": 5550 }, { "epoch": 0.04498745853224371, "grad_norm": 1.8248350620269775, "learning_rate": 2.2493729266121857e-06, "loss": 0.0789, "step": 5560 }, { "epoch": 0.045068371227445586, "grad_norm": 2.2481143474578857, "learning_rate": 2.2534185613722796e-06, "loss": 0.0855, "step": 5570 }, { "epoch": 0.045149283922647464, "grad_norm": 2.152229070663452, "learning_rate": 2.2574641961323736e-06, "loss": 0.074, "step": 5580 }, { "epoch": 0.04523019661784934, "grad_norm": 1.6323626041412354, "learning_rate": 2.261509830892467e-06, "loss": 0.0831, "step": 5590 }, { "epoch": 0.04531110931305122, "grad_norm": 1.665816068649292, "learning_rate": 2.265555465652561e-06, "loss": 0.0531, "step": 5600 }, { "epoch": 0.0453920220082531, "grad_norm": 1.6380051374435425, "learning_rate": 2.269601100412655e-06, "loss": 0.0785, "step": 5610 }, { "epoch": 0.045472934703454974, "grad_norm": 2.640465259552002, "learning_rate": 2.2736467351727485e-06, "loss": 0.076, "step": 5620 }, { "epoch": 0.04555384739865685, "grad_norm": 1.6051607131958008, "learning_rate": 2.2776923699328424e-06, "loss": 0.0716, "step": 5630 }, { "epoch": 0.04563476009385873, "grad_norm": 1.4628361463546753, "learning_rate": 2.281738004692937e-06, "loss": 0.0728, "step": 5640 }, { "epoch": 0.0457156727890606, "grad_norm": 1.9768860340118408, "learning_rate": 2.2857836394530303e-06, "loss": 0.1011, "step": 5650 }, { "epoch": 0.04579658548426248, "grad_norm": 1.6538546085357666, "learning_rate": 2.2898292742131243e-06, "loss": 0.0545, "step": 5660 }, { "epoch": 0.045877498179464356, "grad_norm": 1.5856231451034546, "learning_rate": 2.293874908973218e-06, "loss": 0.0657, "step": 5670 }, { "epoch": 0.04595841087466623, "grad_norm": 2.0238709449768066, "learning_rate": 2.2979205437333117e-06, "loss": 0.079, "step": 5680 }, { "epoch": 0.04603932356986811, "grad_norm": 1.2451986074447632, "learning_rate": 2.3019661784934057e-06, "loss": 0.0904, "step": 5690 }, { "epoch": 0.04612023626506999, "grad_norm": 1.7582995891571045, "learning_rate": 2.3060118132534996e-06, "loss": 0.0756, "step": 5700 }, { "epoch": 0.046201148960271866, "grad_norm": 1.5853369235992432, "learning_rate": 2.3100574480135935e-06, "loss": 0.0683, "step": 5710 }, { "epoch": 0.046282061655473744, "grad_norm": 1.7031865119934082, "learning_rate": 2.3141030827736875e-06, "loss": 0.0824, "step": 5720 }, { "epoch": 0.04636297435067562, "grad_norm": 1.8604408502578735, "learning_rate": 2.3181487175337814e-06, "loss": 0.0649, "step": 5730 }, { "epoch": 0.0464438870458775, "grad_norm": 1.8702560663223267, "learning_rate": 2.322194352293875e-06, "loss": 0.0709, "step": 5740 }, { "epoch": 0.04652479974107938, "grad_norm": 1.6928176879882812, "learning_rate": 2.326239987053969e-06, "loss": 0.078, "step": 5750 }, { "epoch": 0.046605712436281255, "grad_norm": 2.1044065952301025, "learning_rate": 2.330285621814063e-06, "loss": 0.0689, "step": 5760 }, { "epoch": 0.04668662513148313, "grad_norm": 2.0098636150360107, "learning_rate": 2.3343312565741568e-06, "loss": 0.0817, "step": 5770 }, { "epoch": 0.04676753782668501, "grad_norm": 1.9344679117202759, "learning_rate": 2.3383768913342507e-06, "loss": 0.072, "step": 5780 }, { "epoch": 0.04684845052188688, "grad_norm": 1.319514513015747, "learning_rate": 2.3424225260943446e-06, "loss": 0.0713, "step": 5790 }, { "epoch": 0.04692936321708876, "grad_norm": 2.238028049468994, "learning_rate": 2.346468160854438e-06, "loss": 0.083, "step": 5800 }, { "epoch": 0.047010275912290636, "grad_norm": 3.900758981704712, "learning_rate": 2.350513795614532e-06, "loss": 0.0799, "step": 5810 }, { "epoch": 0.047091188607492514, "grad_norm": 1.089406132698059, "learning_rate": 2.354559430374626e-06, "loss": 0.0833, "step": 5820 }, { "epoch": 0.04717210130269439, "grad_norm": 1.757535457611084, "learning_rate": 2.35860506513472e-06, "loss": 0.0916, "step": 5830 }, { "epoch": 0.04725301399789627, "grad_norm": 1.639850378036499, "learning_rate": 2.362650699894814e-06, "loss": 0.0584, "step": 5840 }, { "epoch": 0.04733392669309815, "grad_norm": 2.106066942214966, "learning_rate": 2.366696334654908e-06, "loss": 0.0808, "step": 5850 }, { "epoch": 0.047414839388300024, "grad_norm": 1.0338685512542725, "learning_rate": 2.3707419694150014e-06, "loss": 0.0559, "step": 5860 }, { "epoch": 0.0474957520835019, "grad_norm": 1.063887357711792, "learning_rate": 2.3747876041750953e-06, "loss": 0.0911, "step": 5870 }, { "epoch": 0.04757666477870378, "grad_norm": 2.176053762435913, "learning_rate": 2.3788332389351892e-06, "loss": 0.0883, "step": 5880 }, { "epoch": 0.04765757747390566, "grad_norm": 1.9439582824707031, "learning_rate": 2.3828788736952828e-06, "loss": 0.0537, "step": 5890 }, { "epoch": 0.047738490169107535, "grad_norm": 2.5395991802215576, "learning_rate": 2.3869245084553767e-06, "loss": 0.0891, "step": 5900 }, { "epoch": 0.04781940286430941, "grad_norm": 1.5466617345809937, "learning_rate": 2.3909701432154706e-06, "loss": 0.0896, "step": 5910 }, { "epoch": 0.04790031555951129, "grad_norm": 2.1909823417663574, "learning_rate": 2.3950157779755646e-06, "loss": 0.0712, "step": 5920 }, { "epoch": 0.04798122825471316, "grad_norm": 2.686227798461914, "learning_rate": 2.3990614127356585e-06, "loss": 0.0826, "step": 5930 }, { "epoch": 0.04806214094991504, "grad_norm": 1.9588127136230469, "learning_rate": 2.4031070474957525e-06, "loss": 0.0831, "step": 5940 }, { "epoch": 0.048143053645116916, "grad_norm": 2.028891086578369, "learning_rate": 2.407152682255846e-06, "loss": 0.0977, "step": 5950 }, { "epoch": 0.048223966340318794, "grad_norm": 2.031863212585449, "learning_rate": 2.41119831701594e-06, "loss": 0.1234, "step": 5960 }, { "epoch": 0.04830487903552067, "grad_norm": 1.5765665769577026, "learning_rate": 2.415243951776034e-06, "loss": 0.0742, "step": 5970 }, { "epoch": 0.04838579173072255, "grad_norm": 2.2418789863586426, "learning_rate": 2.419289586536128e-06, "loss": 0.0768, "step": 5980 }, { "epoch": 0.04846670442592443, "grad_norm": 0.9214484095573425, "learning_rate": 2.4233352212962217e-06, "loss": 0.0686, "step": 5990 }, { "epoch": 0.048547617121126305, "grad_norm": 1.7381823062896729, "learning_rate": 2.4273808560563157e-06, "loss": 0.1047, "step": 6000 }, { "epoch": 0.04862852981632818, "grad_norm": 2.145622968673706, "learning_rate": 2.431426490816409e-06, "loss": 0.0809, "step": 6010 }, { "epoch": 0.04870944251153006, "grad_norm": 1.2086924314498901, "learning_rate": 2.435472125576503e-06, "loss": 0.0629, "step": 6020 }, { "epoch": 0.04879035520673194, "grad_norm": 2.8748180866241455, "learning_rate": 2.439517760336597e-06, "loss": 0.0863, "step": 6030 }, { "epoch": 0.048871267901933815, "grad_norm": 1.6467472314834595, "learning_rate": 2.443563395096691e-06, "loss": 0.0909, "step": 6040 }, { "epoch": 0.04895218059713569, "grad_norm": 2.1082205772399902, "learning_rate": 2.447609029856785e-06, "loss": 0.0564, "step": 6050 }, { "epoch": 0.04903309329233757, "grad_norm": 1.3916493654251099, "learning_rate": 2.4516546646168785e-06, "loss": 0.112, "step": 6060 }, { "epoch": 0.04911400598753945, "grad_norm": 2.918328046798706, "learning_rate": 2.4557002993769724e-06, "loss": 0.0879, "step": 6070 }, { "epoch": 0.04919491868274132, "grad_norm": 1.600243091583252, "learning_rate": 2.4597459341370663e-06, "loss": 0.0745, "step": 6080 }, { "epoch": 0.0492758313779432, "grad_norm": 2.128371477127075, "learning_rate": 2.46379156889716e-06, "loss": 0.0951, "step": 6090 }, { "epoch": 0.049356744073145074, "grad_norm": 1.4596885442733765, "learning_rate": 2.467837203657254e-06, "loss": 0.0809, "step": 6100 }, { "epoch": 0.04943765676834695, "grad_norm": 1.351402759552002, "learning_rate": 2.4718828384173477e-06, "loss": 0.0834, "step": 6110 }, { "epoch": 0.04951856946354883, "grad_norm": 2.15824031829834, "learning_rate": 2.4759284731774417e-06, "loss": 0.0993, "step": 6120 }, { "epoch": 0.04959948215875071, "grad_norm": 2.5114047527313232, "learning_rate": 2.4799741079375356e-06, "loss": 0.0833, "step": 6130 }, { "epoch": 0.049680394853952585, "grad_norm": 2.5415451526641846, "learning_rate": 2.4840197426976296e-06, "loss": 0.0642, "step": 6140 }, { "epoch": 0.04976130754915446, "grad_norm": 2.266960382461548, "learning_rate": 2.488065377457723e-06, "loss": 0.069, "step": 6150 }, { "epoch": 0.04984222024435634, "grad_norm": 1.9045170545578003, "learning_rate": 2.492111012217817e-06, "loss": 0.081, "step": 6160 }, { "epoch": 0.04992313293955822, "grad_norm": 2.518308401107788, "learning_rate": 2.496156646977911e-06, "loss": 0.0615, "step": 6170 }, { "epoch": 0.050004045634760096, "grad_norm": 1.7742820978164673, "learning_rate": 2.500202281738005e-06, "loss": 0.0748, "step": 6180 }, { "epoch": 0.05008495832996197, "grad_norm": 2.0198326110839844, "learning_rate": 2.504247916498099e-06, "loss": 0.0751, "step": 6190 }, { "epoch": 0.05016587102516385, "grad_norm": 1.2895112037658691, "learning_rate": 2.5082935512581924e-06, "loss": 0.0857, "step": 6200 }, { "epoch": 0.05024678372036573, "grad_norm": 1.6007320880889893, "learning_rate": 2.5123391860182867e-06, "loss": 0.0844, "step": 6210 }, { "epoch": 0.0503276964155676, "grad_norm": 1.8487398624420166, "learning_rate": 2.5163848207783802e-06, "loss": 0.0535, "step": 6220 }, { "epoch": 0.05040860911076948, "grad_norm": 1.9775465726852417, "learning_rate": 2.520430455538474e-06, "loss": 0.0593, "step": 6230 }, { "epoch": 0.050489521805971355, "grad_norm": 2.0875136852264404, "learning_rate": 2.524476090298568e-06, "loss": 0.0813, "step": 6240 }, { "epoch": 0.05057043450117323, "grad_norm": 2.3256711959838867, "learning_rate": 2.528521725058662e-06, "loss": 0.088, "step": 6250 }, { "epoch": 0.05065134719637511, "grad_norm": 1.5717004537582397, "learning_rate": 2.5325673598187556e-06, "loss": 0.0564, "step": 6260 }, { "epoch": 0.05073225989157699, "grad_norm": 1.3022099733352661, "learning_rate": 2.53661299457885e-06, "loss": 0.0823, "step": 6270 }, { "epoch": 0.050813172586778865, "grad_norm": 0.8657129406929016, "learning_rate": 2.5406586293389435e-06, "loss": 0.0743, "step": 6280 }, { "epoch": 0.05089408528198074, "grad_norm": 1.3313161134719849, "learning_rate": 2.544704264099037e-06, "loss": 0.0785, "step": 6290 }, { "epoch": 0.05097499797718262, "grad_norm": 1.3104350566864014, "learning_rate": 2.5487498988591313e-06, "loss": 0.0796, "step": 6300 }, { "epoch": 0.0510559106723845, "grad_norm": 1.4155601263046265, "learning_rate": 2.552795533619225e-06, "loss": 0.0835, "step": 6310 }, { "epoch": 0.051136823367586376, "grad_norm": 1.2331156730651855, "learning_rate": 2.556841168379319e-06, "loss": 0.0649, "step": 6320 }, { "epoch": 0.051217736062788254, "grad_norm": 1.8967458009719849, "learning_rate": 2.560886803139413e-06, "loss": 0.0959, "step": 6330 }, { "epoch": 0.05129864875799013, "grad_norm": 1.3366036415100098, "learning_rate": 2.5649324378995067e-06, "loss": 0.0894, "step": 6340 }, { "epoch": 0.05137956145319201, "grad_norm": 0.7626665830612183, "learning_rate": 2.5689780726596e-06, "loss": 0.0764, "step": 6350 }, { "epoch": 0.05146047414839388, "grad_norm": 1.3011972904205322, "learning_rate": 2.5730237074196945e-06, "loss": 0.0683, "step": 6360 }, { "epoch": 0.05154138684359576, "grad_norm": 1.5565357208251953, "learning_rate": 2.577069342179788e-06, "loss": 0.082, "step": 6370 }, { "epoch": 0.051622299538797635, "grad_norm": 2.0953962802886963, "learning_rate": 2.581114976939882e-06, "loss": 0.0663, "step": 6380 }, { "epoch": 0.05170321223399951, "grad_norm": 1.652972936630249, "learning_rate": 2.585160611699976e-06, "loss": 0.0922, "step": 6390 }, { "epoch": 0.05178412492920139, "grad_norm": 1.224468469619751, "learning_rate": 2.58920624646007e-06, "loss": 0.0716, "step": 6400 }, { "epoch": 0.05186503762440327, "grad_norm": 2.106627941131592, "learning_rate": 2.5932518812201634e-06, "loss": 0.0772, "step": 6410 }, { "epoch": 0.051945950319605146, "grad_norm": 1.8650851249694824, "learning_rate": 2.5972975159802578e-06, "loss": 0.0923, "step": 6420 }, { "epoch": 0.05202686301480702, "grad_norm": 1.8184767961502075, "learning_rate": 2.6013431507403513e-06, "loss": 0.073, "step": 6430 }, { "epoch": 0.0521077757100089, "grad_norm": 1.31785249710083, "learning_rate": 2.6053887855004452e-06, "loss": 0.0709, "step": 6440 }, { "epoch": 0.05218868840521078, "grad_norm": 1.8772975206375122, "learning_rate": 2.609434420260539e-06, "loss": 0.0711, "step": 6450 }, { "epoch": 0.052269601100412656, "grad_norm": 1.5335112810134888, "learning_rate": 2.613480055020633e-06, "loss": 0.07, "step": 6460 }, { "epoch": 0.052350513795614534, "grad_norm": 1.319510817527771, "learning_rate": 2.6175256897807266e-06, "loss": 0.0606, "step": 6470 }, { "epoch": 0.05243142649081641, "grad_norm": 1.5085227489471436, "learning_rate": 2.621571324540821e-06, "loss": 0.0852, "step": 6480 }, { "epoch": 0.05251233918601829, "grad_norm": 2.1260054111480713, "learning_rate": 2.6256169593009145e-06, "loss": 0.0643, "step": 6490 }, { "epoch": 0.05259325188122017, "grad_norm": 2.147106409072876, "learning_rate": 2.629662594061008e-06, "loss": 0.1005, "step": 6500 }, { "epoch": 0.05267416457642204, "grad_norm": 1.5466110706329346, "learning_rate": 2.6337082288211024e-06, "loss": 0.0911, "step": 6510 }, { "epoch": 0.052755077271623915, "grad_norm": 2.053074598312378, "learning_rate": 2.6377538635811963e-06, "loss": 0.0653, "step": 6520 }, { "epoch": 0.05283598996682579, "grad_norm": 1.9098234176635742, "learning_rate": 2.64179949834129e-06, "loss": 0.0852, "step": 6530 }, { "epoch": 0.05291690266202767, "grad_norm": 2.7261364459991455, "learning_rate": 2.645845133101384e-06, "loss": 0.0562, "step": 6540 }, { "epoch": 0.05299781535722955, "grad_norm": 1.661271333694458, "learning_rate": 2.6498907678614777e-06, "loss": 0.0751, "step": 6550 }, { "epoch": 0.053078728052431426, "grad_norm": 1.0633739233016968, "learning_rate": 2.6539364026215712e-06, "loss": 0.061, "step": 6560 }, { "epoch": 0.053159640747633304, "grad_norm": 1.384228229522705, "learning_rate": 2.6579820373816656e-06, "loss": 0.0929, "step": 6570 }, { "epoch": 0.05324055344283518, "grad_norm": 2.434288740158081, "learning_rate": 2.662027672141759e-06, "loss": 0.0653, "step": 6580 }, { "epoch": 0.05332146613803706, "grad_norm": 0.4937869608402252, "learning_rate": 2.666073306901853e-06, "loss": 0.0744, "step": 6590 }, { "epoch": 0.053402378833238937, "grad_norm": 1.2541364431381226, "learning_rate": 2.670118941661947e-06, "loss": 0.0915, "step": 6600 }, { "epoch": 0.053483291528440814, "grad_norm": 1.6768956184387207, "learning_rate": 2.674164576422041e-06, "loss": 0.0828, "step": 6610 }, { "epoch": 0.05356420422364269, "grad_norm": 2.422426700592041, "learning_rate": 2.6782102111821344e-06, "loss": 0.0716, "step": 6620 }, { "epoch": 0.05364511691884457, "grad_norm": 1.0737656354904175, "learning_rate": 2.682255845942229e-06, "loss": 0.0837, "step": 6630 }, { "epoch": 0.05372602961404645, "grad_norm": 1.7206401824951172, "learning_rate": 2.6863014807023223e-06, "loss": 0.0796, "step": 6640 }, { "epoch": 0.05380694230924832, "grad_norm": 2.094972848892212, "learning_rate": 2.6903471154624163e-06, "loss": 0.0576, "step": 6650 }, { "epoch": 0.053887855004450196, "grad_norm": 1.9968302249908447, "learning_rate": 2.69439275022251e-06, "loss": 0.0619, "step": 6660 }, { "epoch": 0.05396876769965207, "grad_norm": 1.5522429943084717, "learning_rate": 2.698438384982604e-06, "loss": 0.0786, "step": 6670 }, { "epoch": 0.05404968039485395, "grad_norm": 1.8141212463378906, "learning_rate": 2.7024840197426977e-06, "loss": 0.075, "step": 6680 }, { "epoch": 0.05413059309005583, "grad_norm": 1.887544870376587, "learning_rate": 2.706529654502792e-06, "loss": 0.0541, "step": 6690 }, { "epoch": 0.054211505785257706, "grad_norm": 1.3188971281051636, "learning_rate": 2.7105752892628855e-06, "loss": 0.0697, "step": 6700 }, { "epoch": 0.054292418480459584, "grad_norm": 1.3949788808822632, "learning_rate": 2.7146209240229795e-06, "loss": 0.0901, "step": 6710 }, { "epoch": 0.05437333117566146, "grad_norm": 1.4812257289886475, "learning_rate": 2.7186665587830734e-06, "loss": 0.088, "step": 6720 }, { "epoch": 0.05445424387086334, "grad_norm": 0.7962333559989929, "learning_rate": 2.7227121935431674e-06, "loss": 0.0696, "step": 6730 }, { "epoch": 0.05453515656606522, "grad_norm": 1.2905975580215454, "learning_rate": 2.726757828303261e-06, "loss": 0.0835, "step": 6740 }, { "epoch": 0.054616069261267094, "grad_norm": 1.7589014768600464, "learning_rate": 2.7308034630633552e-06, "loss": 0.0608, "step": 6750 }, { "epoch": 0.05469698195646897, "grad_norm": 3.0233702659606934, "learning_rate": 2.7348490978234488e-06, "loss": 0.122, "step": 6760 }, { "epoch": 0.05477789465167085, "grad_norm": 1.101932406425476, "learning_rate": 2.7388947325835423e-06, "loss": 0.0892, "step": 6770 }, { "epoch": 0.05485880734687273, "grad_norm": 1.633610486984253, "learning_rate": 2.7429403673436366e-06, "loss": 0.0776, "step": 6780 }, { "epoch": 0.0549397200420746, "grad_norm": 1.5618000030517578, "learning_rate": 2.74698600210373e-06, "loss": 0.0697, "step": 6790 }, { "epoch": 0.055020632737276476, "grad_norm": 0.5742787718772888, "learning_rate": 2.751031636863824e-06, "loss": 0.055, "step": 6800 }, { "epoch": 0.055101545432478354, "grad_norm": 1.87576162815094, "learning_rate": 2.7550772716239185e-06, "loss": 0.0758, "step": 6810 }, { "epoch": 0.05518245812768023, "grad_norm": 1.5190373659133911, "learning_rate": 2.759122906384012e-06, "loss": 0.066, "step": 6820 }, { "epoch": 0.05526337082288211, "grad_norm": 1.9208102226257324, "learning_rate": 2.7631685411441055e-06, "loss": 0.0687, "step": 6830 }, { "epoch": 0.055344283518083986, "grad_norm": 1.1988950967788696, "learning_rate": 2.7672141759042e-06, "loss": 0.0559, "step": 6840 }, { "epoch": 0.055425196213285864, "grad_norm": 1.2211685180664062, "learning_rate": 2.7712598106642934e-06, "loss": 0.0858, "step": 6850 }, { "epoch": 0.05550610890848774, "grad_norm": 1.6618263721466064, "learning_rate": 2.7753054454243873e-06, "loss": 0.0623, "step": 6860 }, { "epoch": 0.05558702160368962, "grad_norm": 1.1580228805541992, "learning_rate": 2.7793510801844813e-06, "loss": 0.077, "step": 6870 }, { "epoch": 0.0556679342988915, "grad_norm": 1.6479123830795288, "learning_rate": 2.783396714944575e-06, "loss": 0.064, "step": 6880 }, { "epoch": 0.055748846994093375, "grad_norm": 1.3938993215560913, "learning_rate": 2.7874423497046687e-06, "loss": 0.0804, "step": 6890 }, { "epoch": 0.05582975968929525, "grad_norm": 1.8458549976348877, "learning_rate": 2.791487984464763e-06, "loss": 0.0805, "step": 6900 }, { "epoch": 0.05591067238449713, "grad_norm": 1.14557683467865, "learning_rate": 2.7955336192248566e-06, "loss": 0.0646, "step": 6910 }, { "epoch": 0.05599158507969901, "grad_norm": 1.515055537223816, "learning_rate": 2.7995792539849505e-06, "loss": 0.0792, "step": 6920 }, { "epoch": 0.05607249777490088, "grad_norm": 1.389580249786377, "learning_rate": 2.8036248887450445e-06, "loss": 0.0723, "step": 6930 }, { "epoch": 0.056153410470102756, "grad_norm": 1.5816255807876587, "learning_rate": 2.8076705235051384e-06, "loss": 0.084, "step": 6940 }, { "epoch": 0.056234323165304634, "grad_norm": 1.805830478668213, "learning_rate": 2.811716158265232e-06, "loss": 0.0642, "step": 6950 }, { "epoch": 0.05631523586050651, "grad_norm": 2.1237354278564453, "learning_rate": 2.8157617930253263e-06, "loss": 0.0677, "step": 6960 }, { "epoch": 0.05639614855570839, "grad_norm": 1.287699580192566, "learning_rate": 2.81980742778542e-06, "loss": 0.0695, "step": 6970 }, { "epoch": 0.05647706125091027, "grad_norm": 0.7646516561508179, "learning_rate": 2.8238530625455133e-06, "loss": 0.1026, "step": 6980 }, { "epoch": 0.056557973946112144, "grad_norm": 1.468332052230835, "learning_rate": 2.8278986973056077e-06, "loss": 0.0815, "step": 6990 }, { "epoch": 0.05663888664131402, "grad_norm": 1.214944839477539, "learning_rate": 2.831944332065701e-06, "loss": 0.0735, "step": 7000 }, { "epoch": 0.0567197993365159, "grad_norm": 2.056596517562866, "learning_rate": 2.835989966825795e-06, "loss": 0.0777, "step": 7010 }, { "epoch": 0.05680071203171778, "grad_norm": 1.63990318775177, "learning_rate": 2.8400356015858895e-06, "loss": 0.0732, "step": 7020 }, { "epoch": 0.056881624726919655, "grad_norm": 1.6985172033309937, "learning_rate": 2.844081236345983e-06, "loss": 0.0727, "step": 7030 }, { "epoch": 0.05696253742212153, "grad_norm": 1.3261772394180298, "learning_rate": 2.8481268711060765e-06, "loss": 0.0819, "step": 7040 }, { "epoch": 0.05704345011732341, "grad_norm": 1.5816633701324463, "learning_rate": 2.852172505866171e-06, "loss": 0.0662, "step": 7050 }, { "epoch": 0.05712436281252529, "grad_norm": 1.6664072275161743, "learning_rate": 2.8562181406262644e-06, "loss": 0.083, "step": 7060 }, { "epoch": 0.057205275507727166, "grad_norm": 1.1443909406661987, "learning_rate": 2.8602637753863584e-06, "loss": 0.0976, "step": 7070 }, { "epoch": 0.057286188202929036, "grad_norm": 1.7200042009353638, "learning_rate": 2.8643094101464523e-06, "loss": 0.084, "step": 7080 }, { "epoch": 0.057367100898130914, "grad_norm": 1.9063706398010254, "learning_rate": 2.8683550449065462e-06, "loss": 0.0816, "step": 7090 }, { "epoch": 0.05744801359333279, "grad_norm": 1.3849279880523682, "learning_rate": 2.8724006796666398e-06, "loss": 0.0881, "step": 7100 }, { "epoch": 0.05752892628853467, "grad_norm": 1.4877018928527832, "learning_rate": 2.876446314426734e-06, "loss": 0.0757, "step": 7110 }, { "epoch": 0.05760983898373655, "grad_norm": 1.5855036973953247, "learning_rate": 2.8804919491868276e-06, "loss": 0.0639, "step": 7120 }, { "epoch": 0.057690751678938425, "grad_norm": 2.3853633403778076, "learning_rate": 2.8845375839469216e-06, "loss": 0.1056, "step": 7130 }, { "epoch": 0.0577716643741403, "grad_norm": 1.91507887840271, "learning_rate": 2.8885832187070155e-06, "loss": 0.0812, "step": 7140 }, { "epoch": 0.05785257706934218, "grad_norm": 1.9371412992477417, "learning_rate": 2.8926288534671095e-06, "loss": 0.0898, "step": 7150 }, { "epoch": 0.05793348976454406, "grad_norm": 2.842005729675293, "learning_rate": 2.896674488227203e-06, "loss": 0.0719, "step": 7160 }, { "epoch": 0.058014402459745935, "grad_norm": 0.9570710062980652, "learning_rate": 2.9007201229872973e-06, "loss": 0.0572, "step": 7170 }, { "epoch": 0.05809531515494781, "grad_norm": 1.5243322849273682, "learning_rate": 2.904765757747391e-06, "loss": 0.0519, "step": 7180 }, { "epoch": 0.05817622785014969, "grad_norm": 1.5047048330307007, "learning_rate": 2.9088113925074844e-06, "loss": 0.0682, "step": 7190 }, { "epoch": 0.05825714054535157, "grad_norm": 2.070338726043701, "learning_rate": 2.9128570272675787e-06, "loss": 0.1019, "step": 7200 }, { "epoch": 0.058338053240553446, "grad_norm": 2.220769166946411, "learning_rate": 2.9169026620276727e-06, "loss": 0.0913, "step": 7210 }, { "epoch": 0.05841896593575532, "grad_norm": 2.3951416015625, "learning_rate": 2.920948296787766e-06, "loss": 0.0681, "step": 7220 }, { "epoch": 0.058499878630957194, "grad_norm": 1.8020451068878174, "learning_rate": 2.9249939315478597e-06, "loss": 0.0613, "step": 7230 }, { "epoch": 0.05858079132615907, "grad_norm": 1.375316858291626, "learning_rate": 2.929039566307954e-06, "loss": 0.0798, "step": 7240 }, { "epoch": 0.05866170402136095, "grad_norm": 1.434489369392395, "learning_rate": 2.9330852010680476e-06, "loss": 0.0645, "step": 7250 }, { "epoch": 0.05874261671656283, "grad_norm": 1.016425371170044, "learning_rate": 2.9371308358281415e-06, "loss": 0.0735, "step": 7260 }, { "epoch": 0.058823529411764705, "grad_norm": 2.0587422847747803, "learning_rate": 2.9411764705882355e-06, "loss": 0.0643, "step": 7270 }, { "epoch": 0.05890444210696658, "grad_norm": 1.6542421579360962, "learning_rate": 2.9452221053483294e-06, "loss": 0.0891, "step": 7280 }, { "epoch": 0.05898535480216846, "grad_norm": 1.1508080959320068, "learning_rate": 2.949267740108423e-06, "loss": 0.1094, "step": 7290 }, { "epoch": 0.05906626749737034, "grad_norm": 1.4398283958435059, "learning_rate": 2.9533133748685173e-06, "loss": 0.0884, "step": 7300 }, { "epoch": 0.059147180192572216, "grad_norm": 2.4014689922332764, "learning_rate": 2.957359009628611e-06, "loss": 0.0678, "step": 7310 }, { "epoch": 0.05922809288777409, "grad_norm": 1.1609185934066772, "learning_rate": 2.9614046443887047e-06, "loss": 0.0768, "step": 7320 }, { "epoch": 0.05930900558297597, "grad_norm": 1.6733548641204834, "learning_rate": 2.9654502791487987e-06, "loss": 0.0756, "step": 7330 }, { "epoch": 0.05938991827817785, "grad_norm": 1.4795444011688232, "learning_rate": 2.9694959139088926e-06, "loss": 0.0798, "step": 7340 }, { "epoch": 0.059470830973379726, "grad_norm": 2.1716396808624268, "learning_rate": 2.973541548668986e-06, "loss": 0.0642, "step": 7350 }, { "epoch": 0.0595517436685816, "grad_norm": 1.0814168453216553, "learning_rate": 2.9775871834290805e-06, "loss": 0.0645, "step": 7360 }, { "epoch": 0.059632656363783475, "grad_norm": 2.6835038661956787, "learning_rate": 2.981632818189174e-06, "loss": 0.0851, "step": 7370 }, { "epoch": 0.05971356905898535, "grad_norm": 1.3090304136276245, "learning_rate": 2.9856784529492675e-06, "loss": 0.0833, "step": 7380 }, { "epoch": 0.05979448175418723, "grad_norm": 2.1190247535705566, "learning_rate": 2.989724087709362e-06, "loss": 0.0818, "step": 7390 }, { "epoch": 0.05987539444938911, "grad_norm": 3.2471587657928467, "learning_rate": 2.993769722469456e-06, "loss": 0.0898, "step": 7400 }, { "epoch": 0.059956307144590985, "grad_norm": 2.249889612197876, "learning_rate": 2.9978153572295493e-06, "loss": 0.0816, "step": 7410 }, { "epoch": 0.06003721983979286, "grad_norm": 1.7754931449890137, "learning_rate": 3.0018609919896437e-06, "loss": 0.0743, "step": 7420 }, { "epoch": 0.06011813253499474, "grad_norm": 1.8923991918563843, "learning_rate": 3.0059066267497372e-06, "loss": 0.0589, "step": 7430 }, { "epoch": 0.06019904523019662, "grad_norm": 2.090233325958252, "learning_rate": 3.0099522615098307e-06, "loss": 0.0759, "step": 7440 }, { "epoch": 0.060279957925398496, "grad_norm": 1.7957127094268799, "learning_rate": 3.013997896269925e-06, "loss": 0.062, "step": 7450 }, { "epoch": 0.060360870620600374, "grad_norm": 1.137239694595337, "learning_rate": 3.0180435310300186e-06, "loss": 0.0848, "step": 7460 }, { "epoch": 0.06044178331580225, "grad_norm": 1.5551917552947998, "learning_rate": 3.0220891657901126e-06, "loss": 0.0737, "step": 7470 }, { "epoch": 0.06052269601100413, "grad_norm": 1.3482359647750854, "learning_rate": 3.0261348005502065e-06, "loss": 0.0672, "step": 7480 }, { "epoch": 0.06060360870620601, "grad_norm": 1.6083009243011475, "learning_rate": 3.0301804353103004e-06, "loss": 0.0885, "step": 7490 }, { "epoch": 0.060684521401407884, "grad_norm": 0.4061047434806824, "learning_rate": 3.034226070070394e-06, "loss": 0.076, "step": 7500 }, { "epoch": 0.060765434096609755, "grad_norm": 1.2746232748031616, "learning_rate": 3.0382717048304883e-06, "loss": 0.0646, "step": 7510 }, { "epoch": 0.06084634679181163, "grad_norm": 1.6720476150512695, "learning_rate": 3.042317339590582e-06, "loss": 0.0868, "step": 7520 }, { "epoch": 0.06092725948701351, "grad_norm": 1.6138468980789185, "learning_rate": 3.0463629743506758e-06, "loss": 0.0677, "step": 7530 }, { "epoch": 0.06100817218221539, "grad_norm": 1.8931105136871338, "learning_rate": 3.0504086091107697e-06, "loss": 0.1077, "step": 7540 }, { "epoch": 0.061089084877417266, "grad_norm": 2.206317186355591, "learning_rate": 3.0544542438708637e-06, "loss": 0.0722, "step": 7550 }, { "epoch": 0.06116999757261914, "grad_norm": 1.1887191534042358, "learning_rate": 3.058499878630957e-06, "loss": 0.0932, "step": 7560 }, { "epoch": 0.06125091026782102, "grad_norm": 2.4578492641448975, "learning_rate": 3.0625455133910515e-06, "loss": 0.0522, "step": 7570 }, { "epoch": 0.0613318229630229, "grad_norm": 0.835616946220398, "learning_rate": 3.066591148151145e-06, "loss": 0.0575, "step": 7580 }, { "epoch": 0.061412735658224776, "grad_norm": 1.9980968236923218, "learning_rate": 3.070636782911239e-06, "loss": 0.0745, "step": 7590 }, { "epoch": 0.061493648353426654, "grad_norm": 1.1226232051849365, "learning_rate": 3.074682417671333e-06, "loss": 0.0581, "step": 7600 }, { "epoch": 0.06157456104862853, "grad_norm": 1.6026309728622437, "learning_rate": 3.078728052431427e-06, "loss": 0.0641, "step": 7610 }, { "epoch": 0.06165547374383041, "grad_norm": 1.911146879196167, "learning_rate": 3.0827736871915204e-06, "loss": 0.082, "step": 7620 }, { "epoch": 0.06173638643903229, "grad_norm": 1.6235730648040771, "learning_rate": 3.0868193219516148e-06, "loss": 0.087, "step": 7630 }, { "epoch": 0.061817299134234165, "grad_norm": 1.2142730951309204, "learning_rate": 3.0908649567117083e-06, "loss": 0.0674, "step": 7640 }, { "epoch": 0.061898211829436035, "grad_norm": 0.7371937036514282, "learning_rate": 3.094910591471802e-06, "loss": 0.0788, "step": 7650 }, { "epoch": 0.06197912452463791, "grad_norm": 2.0774545669555664, "learning_rate": 3.098956226231896e-06, "loss": 0.0744, "step": 7660 }, { "epoch": 0.06206003721983979, "grad_norm": 1.4690132141113281, "learning_rate": 3.1030018609919897e-06, "loss": 0.0673, "step": 7670 }, { "epoch": 0.06214094991504167, "grad_norm": 1.1372771263122559, "learning_rate": 3.1070474957520836e-06, "loss": 0.0692, "step": 7680 }, { "epoch": 0.062221862610243546, "grad_norm": 1.8690402507781982, "learning_rate": 3.111093130512178e-06, "loss": 0.0651, "step": 7690 }, { "epoch": 0.062302775305445424, "grad_norm": 1.3322442770004272, "learning_rate": 3.1151387652722715e-06, "loss": 0.0524, "step": 7700 }, { "epoch": 0.0623836880006473, "grad_norm": 1.110657811164856, "learning_rate": 3.119184400032365e-06, "loss": 0.0701, "step": 7710 }, { "epoch": 0.06246460069584918, "grad_norm": 1.6420366764068604, "learning_rate": 3.1232300347924594e-06, "loss": 0.0738, "step": 7720 }, { "epoch": 0.06254551339105105, "grad_norm": 1.3318116664886475, "learning_rate": 3.127275669552553e-06, "loss": 0.0696, "step": 7730 }, { "epoch": 0.06262642608625293, "grad_norm": 1.7117449045181274, "learning_rate": 3.131321304312647e-06, "loss": 0.0695, "step": 7740 }, { "epoch": 0.0627073387814548, "grad_norm": 2.659878969192505, "learning_rate": 3.1353669390727408e-06, "loss": 0.1053, "step": 7750 }, { "epoch": 0.06278825147665669, "grad_norm": 1.8101129531860352, "learning_rate": 3.1394125738328347e-06, "loss": 0.0837, "step": 7760 }, { "epoch": 0.06286916417185856, "grad_norm": 1.4311121702194214, "learning_rate": 3.1434582085929282e-06, "loss": 0.0665, "step": 7770 }, { "epoch": 0.06295007686706044, "grad_norm": 1.630977988243103, "learning_rate": 3.1475038433530226e-06, "loss": 0.0705, "step": 7780 }, { "epoch": 0.06303098956226232, "grad_norm": 1.2187050580978394, "learning_rate": 3.151549478113116e-06, "loss": 0.0709, "step": 7790 }, { "epoch": 0.0631119022574642, "grad_norm": 2.419186592102051, "learning_rate": 3.15559511287321e-06, "loss": 0.1027, "step": 7800 }, { "epoch": 0.06319281495266607, "grad_norm": 1.0062575340270996, "learning_rate": 3.159640747633304e-06, "loss": 0.0841, "step": 7810 }, { "epoch": 0.06327372764786796, "grad_norm": 2.1198017597198486, "learning_rate": 3.163686382393398e-06, "loss": 0.0594, "step": 7820 }, { "epoch": 0.06335464034306983, "grad_norm": 1.2096731662750244, "learning_rate": 3.1677320171534914e-06, "loss": 0.0675, "step": 7830 }, { "epoch": 0.06343555303827171, "grad_norm": 1.2512304782867432, "learning_rate": 3.171777651913586e-06, "loss": 0.0636, "step": 7840 }, { "epoch": 0.06351646573347358, "grad_norm": 1.7627720832824707, "learning_rate": 3.1758232866736793e-06, "loss": 0.0762, "step": 7850 }, { "epoch": 0.06359737842867545, "grad_norm": 1.7883925437927246, "learning_rate": 3.179868921433773e-06, "loss": 0.0823, "step": 7860 }, { "epoch": 0.06367829112387734, "grad_norm": 1.7250488996505737, "learning_rate": 3.183914556193867e-06, "loss": 0.0818, "step": 7870 }, { "epoch": 0.06375920381907921, "grad_norm": 2.3296661376953125, "learning_rate": 3.1879601909539607e-06, "loss": 0.0665, "step": 7880 }, { "epoch": 0.06384011651428109, "grad_norm": 1.81304132938385, "learning_rate": 3.1920058257140547e-06, "loss": 0.0866, "step": 7890 }, { "epoch": 0.06392102920948296, "grad_norm": 1.7944762706756592, "learning_rate": 3.196051460474149e-06, "loss": 0.093, "step": 7900 }, { "epoch": 0.06400194190468485, "grad_norm": 1.783581256866455, "learning_rate": 3.2000970952342425e-06, "loss": 0.0947, "step": 7910 }, { "epoch": 0.06408285459988672, "grad_norm": 0.8311818838119507, "learning_rate": 3.204142729994336e-06, "loss": 0.0914, "step": 7920 }, { "epoch": 0.0641637672950886, "grad_norm": 0.6540054678916931, "learning_rate": 3.2081883647544304e-06, "loss": 0.0717, "step": 7930 }, { "epoch": 0.06424467999029047, "grad_norm": 2.3822243213653564, "learning_rate": 3.212233999514524e-06, "loss": 0.0525, "step": 7940 }, { "epoch": 0.06432559268549236, "grad_norm": 1.2979589700698853, "learning_rate": 3.216279634274618e-06, "loss": 0.0642, "step": 7950 }, { "epoch": 0.06440650538069423, "grad_norm": 0.9231691956520081, "learning_rate": 3.220325269034712e-06, "loss": 0.0749, "step": 7960 }, { "epoch": 0.06448741807589611, "grad_norm": 1.0957375764846802, "learning_rate": 3.2243709037948057e-06, "loss": 0.066, "step": 7970 }, { "epoch": 0.06456833077109798, "grad_norm": 2.0658740997314453, "learning_rate": 3.2284165385548993e-06, "loss": 0.0801, "step": 7980 }, { "epoch": 0.06464924346629987, "grad_norm": 1.5835529565811157, "learning_rate": 3.2324621733149936e-06, "loss": 0.0698, "step": 7990 }, { "epoch": 0.06473015616150174, "grad_norm": 1.2505898475646973, "learning_rate": 3.236507808075087e-06, "loss": 0.0699, "step": 8000 }, { "epoch": 0.06481106885670361, "grad_norm": 2.2304975986480713, "learning_rate": 3.240553442835181e-06, "loss": 0.0806, "step": 8010 }, { "epoch": 0.0648919815519055, "grad_norm": 1.9247874021530151, "learning_rate": 3.244599077595275e-06, "loss": 0.0696, "step": 8020 }, { "epoch": 0.06497289424710737, "grad_norm": 1.0821748971939087, "learning_rate": 3.248644712355369e-06, "loss": 0.0736, "step": 8030 }, { "epoch": 0.06505380694230925, "grad_norm": 1.6276108026504517, "learning_rate": 3.2526903471154625e-06, "loss": 0.0877, "step": 8040 }, { "epoch": 0.06513471963751112, "grad_norm": 1.969282865524292, "learning_rate": 3.256735981875557e-06, "loss": 0.072, "step": 8050 }, { "epoch": 0.065215632332713, "grad_norm": 1.5022081136703491, "learning_rate": 3.2607816166356504e-06, "loss": 0.081, "step": 8060 }, { "epoch": 0.06529654502791488, "grad_norm": 1.4494491815567017, "learning_rate": 3.264827251395744e-06, "loss": 0.067, "step": 8070 }, { "epoch": 0.06537745772311676, "grad_norm": 1.7449439764022827, "learning_rate": 3.2688728861558382e-06, "loss": 0.0675, "step": 8080 }, { "epoch": 0.06545837041831863, "grad_norm": 2.032803773880005, "learning_rate": 3.272918520915932e-06, "loss": 0.0624, "step": 8090 }, { "epoch": 0.06553928311352052, "grad_norm": 1.5204404592514038, "learning_rate": 3.2769641556760257e-06, "loss": 0.0867, "step": 8100 }, { "epoch": 0.06562019580872239, "grad_norm": 2.047757863998413, "learning_rate": 3.28100979043612e-06, "loss": 0.0652, "step": 8110 }, { "epoch": 0.06570110850392427, "grad_norm": 1.371053695678711, "learning_rate": 3.2850554251962136e-06, "loss": 0.0568, "step": 8120 }, { "epoch": 0.06578202119912614, "grad_norm": 1.2375426292419434, "learning_rate": 3.289101059956307e-06, "loss": 0.0683, "step": 8130 }, { "epoch": 0.06586293389432803, "grad_norm": 0.9838273525238037, "learning_rate": 3.2931466947164015e-06, "loss": 0.0794, "step": 8140 }, { "epoch": 0.0659438465895299, "grad_norm": 1.418907642364502, "learning_rate": 3.297192329476495e-06, "loss": 0.0662, "step": 8150 }, { "epoch": 0.06602475928473177, "grad_norm": 1.4000850915908813, "learning_rate": 3.301237964236589e-06, "loss": 0.0674, "step": 8160 }, { "epoch": 0.06610567197993365, "grad_norm": 1.032943606376648, "learning_rate": 3.305283598996683e-06, "loss": 0.0631, "step": 8170 }, { "epoch": 0.06618658467513552, "grad_norm": 1.505902886390686, "learning_rate": 3.309329233756777e-06, "loss": 0.0721, "step": 8180 }, { "epoch": 0.06626749737033741, "grad_norm": 1.6510461568832397, "learning_rate": 3.3133748685168703e-06, "loss": 0.072, "step": 8190 }, { "epoch": 0.06634841006553928, "grad_norm": 1.4119733572006226, "learning_rate": 3.3174205032769647e-06, "loss": 0.0956, "step": 8200 }, { "epoch": 0.06642932276074116, "grad_norm": 1.3690910339355469, "learning_rate": 3.321466138037058e-06, "loss": 0.0671, "step": 8210 }, { "epoch": 0.06651023545594303, "grad_norm": 0.8352145552635193, "learning_rate": 3.325511772797152e-06, "loss": 0.0782, "step": 8220 }, { "epoch": 0.06659114815114492, "grad_norm": 1.069956660270691, "learning_rate": 3.329557407557246e-06, "loss": 0.0747, "step": 8230 }, { "epoch": 0.06667206084634679, "grad_norm": 1.1736551523208618, "learning_rate": 3.33360304231734e-06, "loss": 0.0694, "step": 8240 }, { "epoch": 0.06675297354154867, "grad_norm": 1.6922098398208618, "learning_rate": 3.3376486770774335e-06, "loss": 0.0674, "step": 8250 }, { "epoch": 0.06683388623675054, "grad_norm": 2.040254831314087, "learning_rate": 3.341694311837528e-06, "loss": 0.0581, "step": 8260 }, { "epoch": 0.06691479893195243, "grad_norm": 1.4485327005386353, "learning_rate": 3.3457399465976214e-06, "loss": 0.0748, "step": 8270 }, { "epoch": 0.0669957116271543, "grad_norm": 1.7088342905044556, "learning_rate": 3.3497855813577153e-06, "loss": 0.0741, "step": 8280 }, { "epoch": 0.06707662432235617, "grad_norm": 2.0632197856903076, "learning_rate": 3.3538312161178093e-06, "loss": 0.0802, "step": 8290 }, { "epoch": 0.06715753701755806, "grad_norm": 0.9086840748786926, "learning_rate": 3.3578768508779032e-06, "loss": 0.0789, "step": 8300 }, { "epoch": 0.06723844971275993, "grad_norm": 1.1648118495941162, "learning_rate": 3.3619224856379967e-06, "loss": 0.0735, "step": 8310 }, { "epoch": 0.06731936240796181, "grad_norm": 1.5164157152175903, "learning_rate": 3.365968120398091e-06, "loss": 0.0624, "step": 8320 }, { "epoch": 0.06740027510316368, "grad_norm": 1.6764589548110962, "learning_rate": 3.3700137551581846e-06, "loss": 0.0734, "step": 8330 }, { "epoch": 0.06748118779836557, "grad_norm": 1.4223599433898926, "learning_rate": 3.374059389918278e-06, "loss": 0.0626, "step": 8340 }, { "epoch": 0.06756210049356744, "grad_norm": 1.8249672651290894, "learning_rate": 3.3781050246783725e-06, "loss": 0.0649, "step": 8350 }, { "epoch": 0.06764301318876932, "grad_norm": 1.1940593719482422, "learning_rate": 3.382150659438466e-06, "loss": 0.06, "step": 8360 }, { "epoch": 0.06772392588397119, "grad_norm": 1.2903954982757568, "learning_rate": 3.38619629419856e-06, "loss": 0.0629, "step": 8370 }, { "epoch": 0.06780483857917308, "grad_norm": 1.6639394760131836, "learning_rate": 3.3902419289586543e-06, "loss": 0.0696, "step": 8380 }, { "epoch": 0.06788575127437495, "grad_norm": 1.7114551067352295, "learning_rate": 3.394287563718748e-06, "loss": 0.0777, "step": 8390 }, { "epoch": 0.06796666396957683, "grad_norm": 1.688876986503601, "learning_rate": 3.3983331984788414e-06, "loss": 0.0626, "step": 8400 }, { "epoch": 0.0680475766647787, "grad_norm": 1.9317920207977295, "learning_rate": 3.4023788332389357e-06, "loss": 0.0455, "step": 8410 }, { "epoch": 0.06812848935998059, "grad_norm": 2.4713335037231445, "learning_rate": 3.4064244679990292e-06, "loss": 0.0673, "step": 8420 }, { "epoch": 0.06820940205518246, "grad_norm": 2.0177061557769775, "learning_rate": 3.410470102759123e-06, "loss": 0.0878, "step": 8430 }, { "epoch": 0.06829031475038433, "grad_norm": 1.8558878898620605, "learning_rate": 3.414515737519217e-06, "loss": 0.0744, "step": 8440 }, { "epoch": 0.06837122744558621, "grad_norm": 0.9240636825561523, "learning_rate": 3.418561372279311e-06, "loss": 0.0639, "step": 8450 }, { "epoch": 0.06845214014078808, "grad_norm": 1.8149998188018799, "learning_rate": 3.4226070070394046e-06, "loss": 0.0427, "step": 8460 }, { "epoch": 0.06853305283598997, "grad_norm": 1.5105791091918945, "learning_rate": 3.426652641799499e-06, "loss": 0.063, "step": 8470 }, { "epoch": 0.06861396553119184, "grad_norm": 2.1153695583343506, "learning_rate": 3.4306982765595925e-06, "loss": 0.0825, "step": 8480 }, { "epoch": 0.06869487822639372, "grad_norm": 1.3344963788986206, "learning_rate": 3.4347439113196864e-06, "loss": 0.0643, "step": 8490 }, { "epoch": 0.0687757909215956, "grad_norm": 1.3398418426513672, "learning_rate": 3.4387895460797803e-06, "loss": 0.0854, "step": 8500 }, { "epoch": 0.06885670361679748, "grad_norm": 1.6496556997299194, "learning_rate": 3.4428351808398743e-06, "loss": 0.0763, "step": 8510 }, { "epoch": 0.06893761631199935, "grad_norm": 1.930051326751709, "learning_rate": 3.4468808155999678e-06, "loss": 0.0948, "step": 8520 }, { "epoch": 0.06901852900720123, "grad_norm": 1.7098206281661987, "learning_rate": 3.450926450360062e-06, "loss": 0.0639, "step": 8530 }, { "epoch": 0.0690994417024031, "grad_norm": 1.5728386640548706, "learning_rate": 3.4549720851201557e-06, "loss": 0.0895, "step": 8540 }, { "epoch": 0.06918035439760499, "grad_norm": 1.8594374656677246, "learning_rate": 3.459017719880249e-06, "loss": 0.0668, "step": 8550 }, { "epoch": 0.06926126709280686, "grad_norm": 0.9099370837211609, "learning_rate": 3.4630633546403435e-06, "loss": 0.0519, "step": 8560 }, { "epoch": 0.06934217978800875, "grad_norm": 1.3641165494918823, "learning_rate": 3.4671089894004375e-06, "loss": 0.0634, "step": 8570 }, { "epoch": 0.06942309248321062, "grad_norm": 1.0556641817092896, "learning_rate": 3.471154624160531e-06, "loss": 0.079, "step": 8580 }, { "epoch": 0.06950400517841249, "grad_norm": 2.2348711490631104, "learning_rate": 3.4752002589206254e-06, "loss": 0.0613, "step": 8590 }, { "epoch": 0.06958491787361437, "grad_norm": 2.141763925552368, "learning_rate": 3.479245893680719e-06, "loss": 0.0842, "step": 8600 }, { "epoch": 0.06966583056881624, "grad_norm": 1.1108667850494385, "learning_rate": 3.4832915284408124e-06, "loss": 0.0664, "step": 8610 }, { "epoch": 0.06974674326401813, "grad_norm": 1.18134343624115, "learning_rate": 3.4873371632009068e-06, "loss": 0.0816, "step": 8620 }, { "epoch": 0.06982765595922, "grad_norm": 1.1788785457611084, "learning_rate": 3.4913827979610003e-06, "loss": 0.0863, "step": 8630 }, { "epoch": 0.06990856865442188, "grad_norm": 1.2738920450210571, "learning_rate": 3.4954284327210942e-06, "loss": 0.0702, "step": 8640 }, { "epoch": 0.06998948134962375, "grad_norm": 1.126133680343628, "learning_rate": 3.499474067481188e-06, "loss": 0.0671, "step": 8650 }, { "epoch": 0.07007039404482564, "grad_norm": 0.9866386651992798, "learning_rate": 3.503519702241282e-06, "loss": 0.1103, "step": 8660 }, { "epoch": 0.07015130674002751, "grad_norm": 1.6493604183197021, "learning_rate": 3.5075653370013756e-06, "loss": 0.0866, "step": 8670 }, { "epoch": 0.07023221943522939, "grad_norm": 1.8559761047363281, "learning_rate": 3.51161097176147e-06, "loss": 0.0684, "step": 8680 }, { "epoch": 0.07031313213043126, "grad_norm": 1.4936630725860596, "learning_rate": 3.5156566065215635e-06, "loss": 0.077, "step": 8690 }, { "epoch": 0.07039404482563315, "grad_norm": 1.2676417827606201, "learning_rate": 3.5197022412816574e-06, "loss": 0.0672, "step": 8700 }, { "epoch": 0.07047495752083502, "grad_norm": 1.4024313688278198, "learning_rate": 3.5237478760417514e-06, "loss": 0.0598, "step": 8710 }, { "epoch": 0.07055587021603689, "grad_norm": 1.0576956272125244, "learning_rate": 3.5277935108018453e-06, "loss": 0.0802, "step": 8720 }, { "epoch": 0.07063678291123877, "grad_norm": 1.3864449262619019, "learning_rate": 3.531839145561939e-06, "loss": 0.0715, "step": 8730 }, { "epoch": 0.07071769560644064, "grad_norm": 1.274749994277954, "learning_rate": 3.535884780322033e-06, "loss": 0.0688, "step": 8740 }, { "epoch": 0.07079860830164253, "grad_norm": 1.461421012878418, "learning_rate": 3.5399304150821267e-06, "loss": 0.0622, "step": 8750 }, { "epoch": 0.0708795209968444, "grad_norm": 1.5121508836746216, "learning_rate": 3.5439760498422202e-06, "loss": 0.0783, "step": 8760 }, { "epoch": 0.07096043369204628, "grad_norm": 1.4315416812896729, "learning_rate": 3.5480216846023146e-06, "loss": 0.06, "step": 8770 }, { "epoch": 0.07104134638724816, "grad_norm": 2.0745110511779785, "learning_rate": 3.5520673193624085e-06, "loss": 0.083, "step": 8780 }, { "epoch": 0.07112225908245004, "grad_norm": 0.796010434627533, "learning_rate": 3.556112954122502e-06, "loss": 0.0735, "step": 8790 }, { "epoch": 0.07120317177765191, "grad_norm": 1.271601676940918, "learning_rate": 3.5601585888825964e-06, "loss": 0.0776, "step": 8800 }, { "epoch": 0.0712840844728538, "grad_norm": 1.5835487842559814, "learning_rate": 3.56420422364269e-06, "loss": 0.0904, "step": 8810 }, { "epoch": 0.07136499716805567, "grad_norm": 1.474135398864746, "learning_rate": 3.5682498584027834e-06, "loss": 0.0839, "step": 8820 }, { "epoch": 0.07144590986325755, "grad_norm": 1.5216957330703735, "learning_rate": 3.5722954931628774e-06, "loss": 0.0779, "step": 8830 }, { "epoch": 0.07152682255845942, "grad_norm": 1.4989694356918335, "learning_rate": 3.5763411279229713e-06, "loss": 0.063, "step": 8840 }, { "epoch": 0.0716077352536613, "grad_norm": 1.717637300491333, "learning_rate": 3.5803867626830653e-06, "loss": 0.0562, "step": 8850 }, { "epoch": 0.07168864794886318, "grad_norm": 2.2786219120025635, "learning_rate": 3.5844323974431588e-06, "loss": 0.0814, "step": 8860 }, { "epoch": 0.07176956064406505, "grad_norm": 1.65048348903656, "learning_rate": 3.588478032203253e-06, "loss": 0.0588, "step": 8870 }, { "epoch": 0.07185047333926693, "grad_norm": 1.9541914463043213, "learning_rate": 3.5925236669633467e-06, "loss": 0.0695, "step": 8880 }, { "epoch": 0.0719313860344688, "grad_norm": 1.2613444328308105, "learning_rate": 3.5965693017234406e-06, "loss": 0.0616, "step": 8890 }, { "epoch": 0.07201229872967069, "grad_norm": 1.214187741279602, "learning_rate": 3.6006149364835345e-06, "loss": 0.0775, "step": 8900 }, { "epoch": 0.07209321142487256, "grad_norm": 1.6483850479125977, "learning_rate": 3.6046605712436285e-06, "loss": 0.0698, "step": 8910 }, { "epoch": 0.07217412412007444, "grad_norm": 1.0347459316253662, "learning_rate": 3.608706206003722e-06, "loss": 0.0571, "step": 8920 }, { "epoch": 0.07225503681527631, "grad_norm": 2.202852725982666, "learning_rate": 3.6127518407638164e-06, "loss": 0.0845, "step": 8930 }, { "epoch": 0.0723359495104782, "grad_norm": 1.7404965162277222, "learning_rate": 3.61679747552391e-06, "loss": 0.0806, "step": 8940 }, { "epoch": 0.07241686220568007, "grad_norm": 1.3250675201416016, "learning_rate": 3.6208431102840034e-06, "loss": 0.045, "step": 8950 }, { "epoch": 0.07249777490088195, "grad_norm": 1.1592152118682861, "learning_rate": 3.6248887450440978e-06, "loss": 0.0575, "step": 8960 }, { "epoch": 0.07257868759608382, "grad_norm": 1.4487580060958862, "learning_rate": 3.6289343798041917e-06, "loss": 0.088, "step": 8970 }, { "epoch": 0.07265960029128571, "grad_norm": 1.216860055923462, "learning_rate": 3.6329800145642852e-06, "loss": 0.0732, "step": 8980 }, { "epoch": 0.07274051298648758, "grad_norm": 1.3487235307693481, "learning_rate": 3.6370256493243796e-06, "loss": 0.0588, "step": 8990 }, { "epoch": 0.07282142568168946, "grad_norm": 1.3965388536453247, "learning_rate": 3.641071284084473e-06, "loss": 0.0738, "step": 9000 }, { "epoch": 0.07290233837689133, "grad_norm": 1.7366269826889038, "learning_rate": 3.6451169188445666e-06, "loss": 0.0748, "step": 9010 }, { "epoch": 0.0729832510720932, "grad_norm": 1.4826035499572754, "learning_rate": 3.649162553604661e-06, "loss": 0.0742, "step": 9020 }, { "epoch": 0.07306416376729509, "grad_norm": 1.8000450134277344, "learning_rate": 3.6532081883647545e-06, "loss": 0.0605, "step": 9030 }, { "epoch": 0.07314507646249696, "grad_norm": 0.6600704789161682, "learning_rate": 3.6572538231248484e-06, "loss": 0.0611, "step": 9040 }, { "epoch": 0.07322598915769885, "grad_norm": 1.0148192644119263, "learning_rate": 3.6612994578849424e-06, "loss": 0.0847, "step": 9050 }, { "epoch": 0.07330690185290072, "grad_norm": 1.3741183280944824, "learning_rate": 3.6653450926450363e-06, "loss": 0.0888, "step": 9060 }, { "epoch": 0.0733878145481026, "grad_norm": 2.051671028137207, "learning_rate": 3.66939072740513e-06, "loss": 0.0584, "step": 9070 }, { "epoch": 0.07346872724330447, "grad_norm": 1.169702172279358, "learning_rate": 3.673436362165224e-06, "loss": 0.0814, "step": 9080 }, { "epoch": 0.07354963993850636, "grad_norm": 1.037855863571167, "learning_rate": 3.6774819969253177e-06, "loss": 0.0688, "step": 9090 }, { "epoch": 0.07363055263370823, "grad_norm": 0.7618744373321533, "learning_rate": 3.6815276316854116e-06, "loss": 0.0454, "step": 9100 }, { "epoch": 0.07371146532891011, "grad_norm": 1.8877677917480469, "learning_rate": 3.6855732664455056e-06, "loss": 0.0794, "step": 9110 }, { "epoch": 0.07379237802411198, "grad_norm": 1.3045645952224731, "learning_rate": 3.6896189012055995e-06, "loss": 0.0369, "step": 9120 }, { "epoch": 0.07387329071931387, "grad_norm": 1.329563856124878, "learning_rate": 3.693664535965693e-06, "loss": 0.082, "step": 9130 }, { "epoch": 0.07395420341451574, "grad_norm": 1.3308671712875366, "learning_rate": 3.6977101707257874e-06, "loss": 0.0596, "step": 9140 }, { "epoch": 0.07403511610971761, "grad_norm": 0.946377158164978, "learning_rate": 3.701755805485881e-06, "loss": 0.0467, "step": 9150 }, { "epoch": 0.07411602880491949, "grad_norm": 1.5094714164733887, "learning_rate": 3.705801440245975e-06, "loss": 0.0617, "step": 9160 }, { "epoch": 0.07419694150012136, "grad_norm": 1.7017838954925537, "learning_rate": 3.709847075006069e-06, "loss": 0.0689, "step": 9170 }, { "epoch": 0.07427785419532325, "grad_norm": 1.8462891578674316, "learning_rate": 3.7138927097661627e-06, "loss": 0.0829, "step": 9180 }, { "epoch": 0.07435876689052512, "grad_norm": 1.3410199880599976, "learning_rate": 3.7179383445262563e-06, "loss": 0.0846, "step": 9190 }, { "epoch": 0.074439679585727, "grad_norm": 1.765282392501831, "learning_rate": 3.7219839792863506e-06, "loss": 0.0873, "step": 9200 }, { "epoch": 0.07452059228092887, "grad_norm": 1.526738166809082, "learning_rate": 3.726029614046444e-06, "loss": 0.0739, "step": 9210 }, { "epoch": 0.07460150497613076, "grad_norm": 1.4071239233016968, "learning_rate": 3.7300752488065377e-06, "loss": 0.0711, "step": 9220 }, { "epoch": 0.07468241767133263, "grad_norm": 3.191854953765869, "learning_rate": 3.734120883566632e-06, "loss": 0.0652, "step": 9230 }, { "epoch": 0.07476333036653451, "grad_norm": 1.4601702690124512, "learning_rate": 3.7381665183267255e-06, "loss": 0.0614, "step": 9240 }, { "epoch": 0.07484424306173638, "grad_norm": 1.2909799814224243, "learning_rate": 3.7422121530868195e-06, "loss": 0.0923, "step": 9250 }, { "epoch": 0.07492515575693827, "grad_norm": 1.7589178085327148, "learning_rate": 3.746257787846914e-06, "loss": 0.0451, "step": 9260 }, { "epoch": 0.07500606845214014, "grad_norm": 2.3708198070526123, "learning_rate": 3.7503034226070074e-06, "loss": 0.0832, "step": 9270 }, { "epoch": 0.07508698114734202, "grad_norm": 1.9902535676956177, "learning_rate": 3.754349057367101e-06, "loss": 0.0657, "step": 9280 }, { "epoch": 0.0751678938425439, "grad_norm": 1.3832206726074219, "learning_rate": 3.7583946921271952e-06, "loss": 0.0816, "step": 9290 }, { "epoch": 0.07524880653774577, "grad_norm": 1.0463765859603882, "learning_rate": 3.7624403268872887e-06, "loss": 0.0505, "step": 9300 }, { "epoch": 0.07532971923294765, "grad_norm": 0.9547312259674072, "learning_rate": 3.7664859616473827e-06, "loss": 0.0858, "step": 9310 }, { "epoch": 0.07541063192814952, "grad_norm": 1.2685680389404297, "learning_rate": 3.7705315964074766e-06, "loss": 0.0684, "step": 9320 }, { "epoch": 0.0754915446233514, "grad_norm": 1.9537209272384644, "learning_rate": 3.7745772311675706e-06, "loss": 0.0664, "step": 9330 }, { "epoch": 0.07557245731855328, "grad_norm": 1.1992720365524292, "learning_rate": 3.778622865927664e-06, "loss": 0.0542, "step": 9340 }, { "epoch": 0.07565337001375516, "grad_norm": 1.0105282068252563, "learning_rate": 3.7826685006877584e-06, "loss": 0.0655, "step": 9350 }, { "epoch": 0.07573428270895703, "grad_norm": 1.5447996854782104, "learning_rate": 3.786714135447852e-06, "loss": 0.0518, "step": 9360 }, { "epoch": 0.07581519540415892, "grad_norm": 1.561934471130371, "learning_rate": 3.790759770207946e-06, "loss": 0.0765, "step": 9370 }, { "epoch": 0.07589610809936079, "grad_norm": 1.3251172304153442, "learning_rate": 3.79480540496804e-06, "loss": 0.0685, "step": 9380 }, { "epoch": 0.07597702079456267, "grad_norm": 1.4267700910568237, "learning_rate": 3.7988510397281338e-06, "loss": 0.0813, "step": 9390 }, { "epoch": 0.07605793348976454, "grad_norm": 1.1293842792510986, "learning_rate": 3.8028966744882273e-06, "loss": 0.083, "step": 9400 }, { "epoch": 0.07613884618496643, "grad_norm": 1.3470066785812378, "learning_rate": 3.8069423092483217e-06, "loss": 0.0519, "step": 9410 }, { "epoch": 0.0762197588801683, "grad_norm": 0.572001039981842, "learning_rate": 3.810987944008415e-06, "loss": 0.0522, "step": 9420 }, { "epoch": 0.07630067157537017, "grad_norm": 2.186087131500244, "learning_rate": 3.815033578768509e-06, "loss": 0.052, "step": 9430 }, { "epoch": 0.07638158427057205, "grad_norm": 1.509737253189087, "learning_rate": 3.819079213528603e-06, "loss": 0.0606, "step": 9440 }, { "epoch": 0.07646249696577392, "grad_norm": 1.6033849716186523, "learning_rate": 3.823124848288697e-06, "loss": 0.0571, "step": 9450 }, { "epoch": 0.07654340966097581, "grad_norm": 0.9659163355827332, "learning_rate": 3.82717048304879e-06, "loss": 0.0901, "step": 9460 }, { "epoch": 0.07662432235617768, "grad_norm": 0.8776724338531494, "learning_rate": 3.831216117808885e-06, "loss": 0.0388, "step": 9470 }, { "epoch": 0.07670523505137956, "grad_norm": 1.8847815990447998, "learning_rate": 3.835261752568978e-06, "loss": 0.0733, "step": 9480 }, { "epoch": 0.07678614774658143, "grad_norm": 1.6647385358810425, "learning_rate": 3.839307387329072e-06, "loss": 0.0592, "step": 9490 }, { "epoch": 0.07686706044178332, "grad_norm": 1.7379951477050781, "learning_rate": 3.843353022089166e-06, "loss": 0.0688, "step": 9500 }, { "epoch": 0.07694797313698519, "grad_norm": 1.1133286952972412, "learning_rate": 3.84739865684926e-06, "loss": 0.0615, "step": 9510 }, { "epoch": 0.07702888583218707, "grad_norm": 0.9045236706733704, "learning_rate": 3.851444291609354e-06, "loss": 0.0549, "step": 9520 }, { "epoch": 0.07710979852738895, "grad_norm": 1.149172306060791, "learning_rate": 3.855489926369448e-06, "loss": 0.0548, "step": 9530 }, { "epoch": 0.07719071122259083, "grad_norm": 1.100716471672058, "learning_rate": 3.859535561129542e-06, "loss": 0.0717, "step": 9540 }, { "epoch": 0.0772716239177927, "grad_norm": 1.14410400390625, "learning_rate": 3.8635811958896356e-06, "loss": 0.0715, "step": 9550 }, { "epoch": 0.07735253661299459, "grad_norm": 1.3925143480300903, "learning_rate": 3.8676268306497295e-06, "loss": 0.0719, "step": 9560 }, { "epoch": 0.07743344930819646, "grad_norm": 0.9582210183143616, "learning_rate": 3.8716724654098234e-06, "loss": 0.0689, "step": 9570 }, { "epoch": 0.07751436200339833, "grad_norm": 1.3763693571090698, "learning_rate": 3.8757181001699165e-06, "loss": 0.0728, "step": 9580 }, { "epoch": 0.07759527469860021, "grad_norm": 2.0275015830993652, "learning_rate": 3.879763734930011e-06, "loss": 0.0658, "step": 9590 }, { "epoch": 0.07767618739380208, "grad_norm": 1.4800351858139038, "learning_rate": 3.883809369690104e-06, "loss": 0.0543, "step": 9600 }, { "epoch": 0.07775710008900397, "grad_norm": 1.330930471420288, "learning_rate": 3.887855004450198e-06, "loss": 0.0585, "step": 9610 }, { "epoch": 0.07783801278420584, "grad_norm": 1.2575589418411255, "learning_rate": 3.891900639210292e-06, "loss": 0.0556, "step": 9620 }, { "epoch": 0.07791892547940772, "grad_norm": 1.6855298280715942, "learning_rate": 3.895946273970386e-06, "loss": 0.0597, "step": 9630 }, { "epoch": 0.07799983817460959, "grad_norm": 1.1208267211914062, "learning_rate": 3.89999190873048e-06, "loss": 0.0467, "step": 9640 }, { "epoch": 0.07808075086981148, "grad_norm": 1.0028406381607056, "learning_rate": 3.904037543490574e-06, "loss": 0.0747, "step": 9650 }, { "epoch": 0.07816166356501335, "grad_norm": 1.7908211946487427, "learning_rate": 3.908083178250668e-06, "loss": 0.0583, "step": 9660 }, { "epoch": 0.07824257626021523, "grad_norm": 1.4064630270004272, "learning_rate": 3.912128813010761e-06, "loss": 0.0594, "step": 9670 }, { "epoch": 0.0783234889554171, "grad_norm": 0.8017114400863647, "learning_rate": 3.916174447770856e-06, "loss": 0.0589, "step": 9680 }, { "epoch": 0.07840440165061899, "grad_norm": 0.676328182220459, "learning_rate": 3.920220082530949e-06, "loss": 0.0625, "step": 9690 }, { "epoch": 0.07848531434582086, "grad_norm": 1.8198946714401245, "learning_rate": 3.924265717291043e-06, "loss": 0.0711, "step": 9700 }, { "epoch": 0.07856622704102274, "grad_norm": 0.9824803471565247, "learning_rate": 3.928311352051138e-06, "loss": 0.0607, "step": 9710 }, { "epoch": 0.07864713973622461, "grad_norm": 1.024584412574768, "learning_rate": 3.932356986811231e-06, "loss": 0.0709, "step": 9720 }, { "epoch": 0.07872805243142648, "grad_norm": 1.216973900794983, "learning_rate": 3.936402621571325e-06, "loss": 0.0803, "step": 9730 }, { "epoch": 0.07880896512662837, "grad_norm": 1.1551872491836548, "learning_rate": 3.940448256331419e-06, "loss": 0.0761, "step": 9740 }, { "epoch": 0.07888987782183024, "grad_norm": 1.4305994510650635, "learning_rate": 3.944493891091513e-06, "loss": 0.0792, "step": 9750 }, { "epoch": 0.07897079051703212, "grad_norm": 1.3744990825653076, "learning_rate": 3.948539525851607e-06, "loss": 0.0806, "step": 9760 }, { "epoch": 0.079051703212234, "grad_norm": 1.1050372123718262, "learning_rate": 3.9525851606117005e-06, "loss": 0.0514, "step": 9770 }, { "epoch": 0.07913261590743588, "grad_norm": 2.0720157623291016, "learning_rate": 3.9566307953717945e-06, "loss": 0.0914, "step": 9780 }, { "epoch": 0.07921352860263775, "grad_norm": 1.667354702949524, "learning_rate": 3.9606764301318876e-06, "loss": 0.0588, "step": 9790 }, { "epoch": 0.07929444129783964, "grad_norm": 0.9548352360725403, "learning_rate": 3.964722064891982e-06, "loss": 0.066, "step": 9800 }, { "epoch": 0.0793753539930415, "grad_norm": 0.6282824277877808, "learning_rate": 3.9687676996520754e-06, "loss": 0.0601, "step": 9810 }, { "epoch": 0.07945626668824339, "grad_norm": 1.2516272068023682, "learning_rate": 3.972813334412169e-06, "loss": 0.0665, "step": 9820 }, { "epoch": 0.07953717938344526, "grad_norm": 1.0770076513290405, "learning_rate": 3.976858969172263e-06, "loss": 0.0826, "step": 9830 }, { "epoch": 0.07961809207864715, "grad_norm": 1.4057490825653076, "learning_rate": 3.980904603932357e-06, "loss": 0.0655, "step": 9840 }, { "epoch": 0.07969900477384902, "grad_norm": 0.9066917896270752, "learning_rate": 3.984950238692451e-06, "loss": 0.0548, "step": 9850 }, { "epoch": 0.07977991746905089, "grad_norm": 1.1360538005828857, "learning_rate": 3.988995873452545e-06, "loss": 0.0723, "step": 9860 }, { "epoch": 0.07986083016425277, "grad_norm": 1.739890456199646, "learning_rate": 3.993041508212639e-06, "loss": 0.0654, "step": 9870 }, { "epoch": 0.07994174285945464, "grad_norm": 1.6184861660003662, "learning_rate": 3.997087142972732e-06, "loss": 0.0853, "step": 9880 }, { "epoch": 0.08002265555465653, "grad_norm": 1.267410397529602, "learning_rate": 4.001132777732827e-06, "loss": 0.0736, "step": 9890 }, { "epoch": 0.0801035682498584, "grad_norm": 1.016761302947998, "learning_rate": 4.005178412492921e-06, "loss": 0.0906, "step": 9900 }, { "epoch": 0.08018448094506028, "grad_norm": 0.9323352575302124, "learning_rate": 4.009224047253014e-06, "loss": 0.0528, "step": 9910 }, { "epoch": 0.08026539364026215, "grad_norm": 1.0107556581497192, "learning_rate": 4.013269682013109e-06, "loss": 0.0427, "step": 9920 }, { "epoch": 0.08034630633546404, "grad_norm": 0.7629503011703491, "learning_rate": 4.017315316773202e-06, "loss": 0.0672, "step": 9930 }, { "epoch": 0.08042721903066591, "grad_norm": 1.0755189657211304, "learning_rate": 4.021360951533296e-06, "loss": 0.0436, "step": 9940 }, { "epoch": 0.0805081317258678, "grad_norm": 1.4091880321502686, "learning_rate": 4.02540658629339e-06, "loss": 0.0572, "step": 9950 }, { "epoch": 0.08058904442106966, "grad_norm": 1.3048810958862305, "learning_rate": 4.029452221053484e-06, "loss": 0.0748, "step": 9960 }, { "epoch": 0.08066995711627155, "grad_norm": 1.5315134525299072, "learning_rate": 4.033497855813578e-06, "loss": 0.0684, "step": 9970 }, { "epoch": 0.08075086981147342, "grad_norm": 1.6610256433486938, "learning_rate": 4.037543490573672e-06, "loss": 0.0545, "step": 9980 }, { "epoch": 0.0808317825066753, "grad_norm": 1.8054087162017822, "learning_rate": 4.0415891253337655e-06, "loss": 0.0817, "step": 9990 }, { "epoch": 0.08091269520187717, "grad_norm": 1.4543932676315308, "learning_rate": 4.045634760093859e-06, "loss": 0.0635, "step": 10000 }, { "epoch": 0.08099360789707905, "grad_norm": 1.4921393394470215, "learning_rate": 4.049680394853953e-06, "loss": 0.0706, "step": 10010 }, { "epoch": 0.08107452059228093, "grad_norm": 1.6504409313201904, "learning_rate": 4.0537260296140465e-06, "loss": 0.0796, "step": 10020 }, { "epoch": 0.0811554332874828, "grad_norm": 1.413742184638977, "learning_rate": 4.0577716643741404e-06, "loss": 0.0757, "step": 10030 }, { "epoch": 0.08123634598268469, "grad_norm": 1.214687466621399, "learning_rate": 4.061817299134234e-06, "loss": 0.0645, "step": 10040 }, { "epoch": 0.08131725867788656, "grad_norm": 0.9954489469528198, "learning_rate": 4.065862933894328e-06, "loss": 0.0714, "step": 10050 }, { "epoch": 0.08139817137308844, "grad_norm": 1.3402061462402344, "learning_rate": 4.069908568654422e-06, "loss": 0.0578, "step": 10060 }, { "epoch": 0.08147908406829031, "grad_norm": 1.695314884185791, "learning_rate": 4.073954203414516e-06, "loss": 0.0978, "step": 10070 }, { "epoch": 0.0815599967634922, "grad_norm": 1.2190765142440796, "learning_rate": 4.07799983817461e-06, "loss": 0.0599, "step": 10080 }, { "epoch": 0.08164090945869407, "grad_norm": 1.4077256917953491, "learning_rate": 4.082045472934704e-06, "loss": 0.0613, "step": 10090 }, { "epoch": 0.08172182215389595, "grad_norm": 1.379207968711853, "learning_rate": 4.086091107694798e-06, "loss": 0.0723, "step": 10100 }, { "epoch": 0.08180273484909782, "grad_norm": 1.7054932117462158, "learning_rate": 4.090136742454892e-06, "loss": 0.0648, "step": 10110 }, { "epoch": 0.0818836475442997, "grad_norm": 0.9840673804283142, "learning_rate": 4.094182377214985e-06, "loss": 0.0726, "step": 10120 }, { "epoch": 0.08196456023950158, "grad_norm": 0.9831422567367554, "learning_rate": 4.09822801197508e-06, "loss": 0.0865, "step": 10130 }, { "epoch": 0.08204547293470346, "grad_norm": 1.0750291347503662, "learning_rate": 4.102273646735173e-06, "loss": 0.0676, "step": 10140 }, { "epoch": 0.08212638562990533, "grad_norm": 1.8019814491271973, "learning_rate": 4.106319281495267e-06, "loss": 0.07, "step": 10150 }, { "epoch": 0.0822072983251072, "grad_norm": 1.0518972873687744, "learning_rate": 4.110364916255361e-06, "loss": 0.0873, "step": 10160 }, { "epoch": 0.08228821102030909, "grad_norm": 1.0478688478469849, "learning_rate": 4.114410551015455e-06, "loss": 0.0547, "step": 10170 }, { "epoch": 0.08236912371551096, "grad_norm": 1.348955750465393, "learning_rate": 4.118456185775549e-06, "loss": 0.0712, "step": 10180 }, { "epoch": 0.08245003641071284, "grad_norm": 1.3341090679168701, "learning_rate": 4.122501820535643e-06, "loss": 0.0759, "step": 10190 }, { "epoch": 0.08253094910591471, "grad_norm": 0.9327678084373474, "learning_rate": 4.1265474552957366e-06, "loss": 0.0826, "step": 10200 }, { "epoch": 0.0826118618011166, "grad_norm": 1.0353797674179077, "learning_rate": 4.13059309005583e-06, "loss": 0.0718, "step": 10210 }, { "epoch": 0.08269277449631847, "grad_norm": 2.377943992614746, "learning_rate": 4.1346387248159244e-06, "loss": 0.0631, "step": 10220 }, { "epoch": 0.08277368719152035, "grad_norm": 0.847554624080658, "learning_rate": 4.1386843595760175e-06, "loss": 0.0607, "step": 10230 }, { "epoch": 0.08285459988672222, "grad_norm": 1.4127753973007202, "learning_rate": 4.1427299943361115e-06, "loss": 0.0589, "step": 10240 }, { "epoch": 0.08293551258192411, "grad_norm": 0.9523839354515076, "learning_rate": 4.146775629096205e-06, "loss": 0.0682, "step": 10250 }, { "epoch": 0.08301642527712598, "grad_norm": 1.0772660970687866, "learning_rate": 4.150821263856299e-06, "loss": 0.068, "step": 10260 }, { "epoch": 0.08309733797232786, "grad_norm": 1.3979684114456177, "learning_rate": 4.154866898616393e-06, "loss": 0.0603, "step": 10270 }, { "epoch": 0.08317825066752974, "grad_norm": 1.46531081199646, "learning_rate": 4.158912533376487e-06, "loss": 0.0884, "step": 10280 }, { "epoch": 0.0832591633627316, "grad_norm": 1.6886268854141235, "learning_rate": 4.162958168136581e-06, "loss": 0.0874, "step": 10290 }, { "epoch": 0.08334007605793349, "grad_norm": 1.1268459558486938, "learning_rate": 4.167003802896675e-06, "loss": 0.0619, "step": 10300 }, { "epoch": 0.08342098875313536, "grad_norm": 2.1854023933410645, "learning_rate": 4.171049437656769e-06, "loss": 0.0611, "step": 10310 }, { "epoch": 0.08350190144833725, "grad_norm": 1.7797802686691284, "learning_rate": 4.175095072416863e-06, "loss": 0.0716, "step": 10320 }, { "epoch": 0.08358281414353912, "grad_norm": 1.718961477279663, "learning_rate": 4.179140707176956e-06, "loss": 0.0555, "step": 10330 }, { "epoch": 0.083663726838741, "grad_norm": 1.3028556108474731, "learning_rate": 4.183186341937051e-06, "loss": 0.077, "step": 10340 }, { "epoch": 0.08374463953394287, "grad_norm": 1.1438099145889282, "learning_rate": 4.187231976697144e-06, "loss": 0.064, "step": 10350 }, { "epoch": 0.08382555222914476, "grad_norm": 1.3230032920837402, "learning_rate": 4.191277611457238e-06, "loss": 0.0872, "step": 10360 }, { "epoch": 0.08390646492434663, "grad_norm": 1.729299783706665, "learning_rate": 4.195323246217332e-06, "loss": 0.053, "step": 10370 }, { "epoch": 0.08398737761954851, "grad_norm": 1.456915020942688, "learning_rate": 4.199368880977426e-06, "loss": 0.0566, "step": 10380 }, { "epoch": 0.08406829031475038, "grad_norm": 1.031765103340149, "learning_rate": 4.20341451573752e-06, "loss": 0.0639, "step": 10390 }, { "epoch": 0.08414920300995227, "grad_norm": 2.1603970527648926, "learning_rate": 4.207460150497614e-06, "loss": 0.1011, "step": 10400 }, { "epoch": 0.08423011570515414, "grad_norm": 0.3808901309967041, "learning_rate": 4.211505785257708e-06, "loss": 0.0696, "step": 10410 }, { "epoch": 0.08431102840035602, "grad_norm": 1.260579228401184, "learning_rate": 4.215551420017801e-06, "loss": 0.0708, "step": 10420 }, { "epoch": 0.08439194109555789, "grad_norm": 0.39143654704093933, "learning_rate": 4.2195970547778955e-06, "loss": 0.0381, "step": 10430 }, { "epoch": 0.08447285379075976, "grad_norm": 1.2562158107757568, "learning_rate": 4.223642689537989e-06, "loss": 0.0521, "step": 10440 }, { "epoch": 0.08455376648596165, "grad_norm": 1.367056965827942, "learning_rate": 4.2276883242980825e-06, "loss": 0.0671, "step": 10450 }, { "epoch": 0.08463467918116352, "grad_norm": 1.1512527465820312, "learning_rate": 4.2317339590581765e-06, "loss": 0.0671, "step": 10460 }, { "epoch": 0.0847155918763654, "grad_norm": 0.9875103831291199, "learning_rate": 4.23577959381827e-06, "loss": 0.0731, "step": 10470 }, { "epoch": 0.08479650457156727, "grad_norm": 1.3345723152160645, "learning_rate": 4.239825228578364e-06, "loss": 0.0578, "step": 10480 }, { "epoch": 0.08487741726676916, "grad_norm": 0.6689889430999756, "learning_rate": 4.243870863338458e-06, "loss": 0.0603, "step": 10490 }, { "epoch": 0.08495832996197103, "grad_norm": 1.0003052949905396, "learning_rate": 4.247916498098552e-06, "loss": 0.0636, "step": 10500 }, { "epoch": 0.08503924265717291, "grad_norm": 1.180757761001587, "learning_rate": 4.251962132858646e-06, "loss": 0.0714, "step": 10510 }, { "epoch": 0.08512015535237479, "grad_norm": 1.008794903755188, "learning_rate": 4.256007767618739e-06, "loss": 0.0611, "step": 10520 }, { "epoch": 0.08520106804757667, "grad_norm": 1.3462868928909302, "learning_rate": 4.260053402378834e-06, "loss": 0.062, "step": 10530 }, { "epoch": 0.08528198074277854, "grad_norm": 1.1255054473876953, "learning_rate": 4.264099037138927e-06, "loss": 0.0712, "step": 10540 }, { "epoch": 0.08536289343798042, "grad_norm": 0.4516887664794922, "learning_rate": 4.268144671899021e-06, "loss": 0.0806, "step": 10550 }, { "epoch": 0.0854438061331823, "grad_norm": 1.2898173332214355, "learning_rate": 4.272190306659115e-06, "loss": 0.0578, "step": 10560 }, { "epoch": 0.08552471882838418, "grad_norm": 1.039771318435669, "learning_rate": 4.276235941419209e-06, "loss": 0.0543, "step": 10570 }, { "epoch": 0.08560563152358605, "grad_norm": 1.4008649587631226, "learning_rate": 4.280281576179303e-06, "loss": 0.0669, "step": 10580 }, { "epoch": 0.08568654421878792, "grad_norm": 1.668981909751892, "learning_rate": 4.284327210939397e-06, "loss": 0.0615, "step": 10590 }, { "epoch": 0.0857674569139898, "grad_norm": 1.2072771787643433, "learning_rate": 4.288372845699491e-06, "loss": 0.0558, "step": 10600 }, { "epoch": 0.08584836960919168, "grad_norm": 1.0281366109848022, "learning_rate": 4.292418480459584e-06, "loss": 0.0652, "step": 10610 }, { "epoch": 0.08592928230439356, "grad_norm": 0.8505672216415405, "learning_rate": 4.296464115219679e-06, "loss": 0.0734, "step": 10620 }, { "epoch": 0.08601019499959543, "grad_norm": 1.5303720235824585, "learning_rate": 4.300509749979772e-06, "loss": 0.0887, "step": 10630 }, { "epoch": 0.08609110769479732, "grad_norm": 0.4728931188583374, "learning_rate": 4.304555384739866e-06, "loss": 0.042, "step": 10640 }, { "epoch": 0.08617202038999919, "grad_norm": 1.8442142009735107, "learning_rate": 4.30860101949996e-06, "loss": 0.0684, "step": 10650 }, { "epoch": 0.08625293308520107, "grad_norm": 0.876367449760437, "learning_rate": 4.3126466542600536e-06, "loss": 0.0843, "step": 10660 }, { "epoch": 0.08633384578040294, "grad_norm": 1.243927240371704, "learning_rate": 4.3166922890201475e-06, "loss": 0.0505, "step": 10670 }, { "epoch": 0.08641475847560483, "grad_norm": 1.1639082431793213, "learning_rate": 4.3207379237802414e-06, "loss": 0.051, "step": 10680 }, { "epoch": 0.0864956711708067, "grad_norm": 1.261679768562317, "learning_rate": 4.324783558540335e-06, "loss": 0.0518, "step": 10690 }, { "epoch": 0.08657658386600858, "grad_norm": 0.9853487014770508, "learning_rate": 4.328829193300429e-06, "loss": 0.0654, "step": 10700 }, { "epoch": 0.08665749656121045, "grad_norm": 1.4298527240753174, "learning_rate": 4.332874828060523e-06, "loss": 0.0785, "step": 10710 }, { "epoch": 0.08673840925641232, "grad_norm": 1.199097752571106, "learning_rate": 4.336920462820617e-06, "loss": 0.0825, "step": 10720 }, { "epoch": 0.08681932195161421, "grad_norm": 1.4010285139083862, "learning_rate": 4.34096609758071e-06, "loss": 0.0753, "step": 10730 }, { "epoch": 0.08690023464681608, "grad_norm": 2.215395450592041, "learning_rate": 4.345011732340805e-06, "loss": 0.0542, "step": 10740 }, { "epoch": 0.08698114734201796, "grad_norm": 1.1265246868133545, "learning_rate": 4.349057367100898e-06, "loss": 0.0678, "step": 10750 }, { "epoch": 0.08706206003721983, "grad_norm": 1.240291953086853, "learning_rate": 4.353103001860992e-06, "loss": 0.0479, "step": 10760 }, { "epoch": 0.08714297273242172, "grad_norm": 1.2194030284881592, "learning_rate": 4.357148636621086e-06, "loss": 0.0622, "step": 10770 }, { "epoch": 0.08722388542762359, "grad_norm": 1.7228498458862305, "learning_rate": 4.36119427138118e-06, "loss": 0.0681, "step": 10780 }, { "epoch": 0.08730479812282547, "grad_norm": 1.1947240829467773, "learning_rate": 4.365239906141274e-06, "loss": 0.0607, "step": 10790 }, { "epoch": 0.08738571081802735, "grad_norm": 1.0224722623825073, "learning_rate": 4.369285540901368e-06, "loss": 0.0533, "step": 10800 }, { "epoch": 0.08746662351322923, "grad_norm": 1.42864990234375, "learning_rate": 4.373331175661462e-06, "loss": 0.0714, "step": 10810 }, { "epoch": 0.0875475362084311, "grad_norm": 0.6462215781211853, "learning_rate": 4.377376810421555e-06, "loss": 0.0584, "step": 10820 }, { "epoch": 0.08762844890363299, "grad_norm": 2.0795676708221436, "learning_rate": 4.38142244518165e-06, "loss": 0.0686, "step": 10830 }, { "epoch": 0.08770936159883486, "grad_norm": 2.0389177799224854, "learning_rate": 4.385468079941743e-06, "loss": 0.0717, "step": 10840 }, { "epoch": 0.08779027429403674, "grad_norm": 0.46824517846107483, "learning_rate": 4.389513714701837e-06, "loss": 0.0607, "step": 10850 }, { "epoch": 0.08787118698923861, "grad_norm": 1.3579741716384888, "learning_rate": 4.393559349461931e-06, "loss": 0.0611, "step": 10860 }, { "epoch": 0.08795209968444048, "grad_norm": 0.9097315073013306, "learning_rate": 4.397604984222025e-06, "loss": 0.0643, "step": 10870 }, { "epoch": 0.08803301237964237, "grad_norm": 0.7516142129898071, "learning_rate": 4.4016506189821186e-06, "loss": 0.0482, "step": 10880 }, { "epoch": 0.08811392507484424, "grad_norm": 1.8776867389678955, "learning_rate": 4.4056962537422125e-06, "loss": 0.0741, "step": 10890 }, { "epoch": 0.08819483777004612, "grad_norm": 1.4092669486999512, "learning_rate": 4.4097418885023064e-06, "loss": 0.0614, "step": 10900 }, { "epoch": 0.08827575046524799, "grad_norm": 1.0549896955490112, "learning_rate": 4.4137875232624e-06, "loss": 0.0728, "step": 10910 }, { "epoch": 0.08835666316044988, "grad_norm": 1.452660322189331, "learning_rate": 4.417833158022494e-06, "loss": 0.0792, "step": 10920 }, { "epoch": 0.08843757585565175, "grad_norm": 1.378825306892395, "learning_rate": 4.421878792782588e-06, "loss": 0.0624, "step": 10930 }, { "epoch": 0.08851848855085363, "grad_norm": 1.067986011505127, "learning_rate": 4.425924427542681e-06, "loss": 0.0587, "step": 10940 }, { "epoch": 0.0885994012460555, "grad_norm": 1.2169142961502075, "learning_rate": 4.429970062302776e-06, "loss": 0.0723, "step": 10950 }, { "epoch": 0.08868031394125739, "grad_norm": 2.3021390438079834, "learning_rate": 4.434015697062869e-06, "loss": 0.0797, "step": 10960 }, { "epoch": 0.08876122663645926, "grad_norm": 1.2914379835128784, "learning_rate": 4.438061331822963e-06, "loss": 0.0707, "step": 10970 }, { "epoch": 0.08884213933166114, "grad_norm": 1.257838487625122, "learning_rate": 4.442106966583057e-06, "loss": 0.0697, "step": 10980 }, { "epoch": 0.08892305202686301, "grad_norm": 0.6343650817871094, "learning_rate": 4.446152601343151e-06, "loss": 0.0788, "step": 10990 }, { "epoch": 0.0890039647220649, "grad_norm": 1.4704792499542236, "learning_rate": 4.450198236103245e-06, "loss": 0.0684, "step": 11000 }, { "epoch": 0.08908487741726677, "grad_norm": 1.0577776432037354, "learning_rate": 4.454243870863339e-06, "loss": 0.0765, "step": 11010 }, { "epoch": 0.08916579011246864, "grad_norm": 1.3053992986679077, "learning_rate": 4.458289505623433e-06, "loss": 0.0694, "step": 11020 }, { "epoch": 0.08924670280767052, "grad_norm": 1.421334147453308, "learning_rate": 4.462335140383526e-06, "loss": 0.0666, "step": 11030 }, { "epoch": 0.0893276155028724, "grad_norm": 0.9072417616844177, "learning_rate": 4.466380775143621e-06, "loss": 0.0832, "step": 11040 }, { "epoch": 0.08940852819807428, "grad_norm": 1.0297913551330566, "learning_rate": 4.470426409903714e-06, "loss": 0.0494, "step": 11050 }, { "epoch": 0.08948944089327615, "grad_norm": 1.3561558723449707, "learning_rate": 4.474472044663808e-06, "loss": 0.0828, "step": 11060 }, { "epoch": 0.08957035358847804, "grad_norm": 0.6849731802940369, "learning_rate": 4.4785176794239026e-06, "loss": 0.066, "step": 11070 }, { "epoch": 0.0896512662836799, "grad_norm": 0.4843485355377197, "learning_rate": 4.482563314183996e-06, "loss": 0.0469, "step": 11080 }, { "epoch": 0.08973217897888179, "grad_norm": 1.7114654779434204, "learning_rate": 4.48660894894409e-06, "loss": 0.0797, "step": 11090 }, { "epoch": 0.08981309167408366, "grad_norm": 1.3212196826934814, "learning_rate": 4.4906545837041835e-06, "loss": 0.0711, "step": 11100 }, { "epoch": 0.08989400436928555, "grad_norm": 1.5997068881988525, "learning_rate": 4.4947002184642775e-06, "loss": 0.0944, "step": 11110 }, { "epoch": 0.08997491706448742, "grad_norm": 1.7329068183898926, "learning_rate": 4.498745853224371e-06, "loss": 0.0827, "step": 11120 }, { "epoch": 0.0900558297596893, "grad_norm": 2.030345916748047, "learning_rate": 4.502791487984465e-06, "loss": 0.0684, "step": 11130 }, { "epoch": 0.09013674245489117, "grad_norm": 2.114751100540161, "learning_rate": 4.506837122744559e-06, "loss": 0.0694, "step": 11140 }, { "epoch": 0.09021765515009304, "grad_norm": 0.945920467376709, "learning_rate": 4.510882757504652e-06, "loss": 0.0545, "step": 11150 }, { "epoch": 0.09029856784529493, "grad_norm": 2.363100051879883, "learning_rate": 4.514928392264747e-06, "loss": 0.0926, "step": 11160 }, { "epoch": 0.0903794805404968, "grad_norm": 1.6320239305496216, "learning_rate": 4.51897402702484e-06, "loss": 0.079, "step": 11170 }, { "epoch": 0.09046039323569868, "grad_norm": 1.5166327953338623, "learning_rate": 4.523019661784934e-06, "loss": 0.0429, "step": 11180 }, { "epoch": 0.09054130593090055, "grad_norm": 1.2238662242889404, "learning_rate": 4.527065296545028e-06, "loss": 0.0519, "step": 11190 }, { "epoch": 0.09062221862610244, "grad_norm": 1.267106056213379, "learning_rate": 4.531110931305122e-06, "loss": 0.0663, "step": 11200 }, { "epoch": 0.09070313132130431, "grad_norm": 1.0591689348220825, "learning_rate": 4.535156566065216e-06, "loss": 0.0499, "step": 11210 }, { "epoch": 0.0907840440165062, "grad_norm": 1.5230894088745117, "learning_rate": 4.53920220082531e-06, "loss": 0.0634, "step": 11220 }, { "epoch": 0.09086495671170806, "grad_norm": 1.1598690748214722, "learning_rate": 4.543247835585404e-06, "loss": 0.0608, "step": 11230 }, { "epoch": 0.09094586940690995, "grad_norm": 1.7426795959472656, "learning_rate": 4.547293470345497e-06, "loss": 0.0621, "step": 11240 }, { "epoch": 0.09102678210211182, "grad_norm": 0.9064708948135376, "learning_rate": 4.551339105105592e-06, "loss": 0.0799, "step": 11250 }, { "epoch": 0.0911076947973137, "grad_norm": 1.3799716234207153, "learning_rate": 4.555384739865685e-06, "loss": 0.0708, "step": 11260 }, { "epoch": 0.09118860749251557, "grad_norm": 0.4270719289779663, "learning_rate": 4.559430374625779e-06, "loss": 0.0658, "step": 11270 }, { "epoch": 0.09126952018771746, "grad_norm": 1.5309942960739136, "learning_rate": 4.563476009385874e-06, "loss": 0.0481, "step": 11280 }, { "epoch": 0.09135043288291933, "grad_norm": 1.1863760948181152, "learning_rate": 4.567521644145967e-06, "loss": 0.0925, "step": 11290 }, { "epoch": 0.0914313455781212, "grad_norm": 1.5839474201202393, "learning_rate": 4.571567278906061e-06, "loss": 0.0504, "step": 11300 }, { "epoch": 0.09151225827332309, "grad_norm": 0.9899383783340454, "learning_rate": 4.575612913666155e-06, "loss": 0.066, "step": 11310 }, { "epoch": 0.09159317096852496, "grad_norm": 1.2668511867523193, "learning_rate": 4.5796585484262485e-06, "loss": 0.0669, "step": 11320 }, { "epoch": 0.09167408366372684, "grad_norm": 1.2573169469833374, "learning_rate": 4.5837041831863425e-06, "loss": 0.0722, "step": 11330 }, { "epoch": 0.09175499635892871, "grad_norm": 0.8920037746429443, "learning_rate": 4.587749817946436e-06, "loss": 0.0625, "step": 11340 }, { "epoch": 0.0918359090541306, "grad_norm": 1.4447352886199951, "learning_rate": 4.59179545270653e-06, "loss": 0.0646, "step": 11350 }, { "epoch": 0.09191682174933247, "grad_norm": 1.4188584089279175, "learning_rate": 4.5958410874666234e-06, "loss": 0.0673, "step": 11360 }, { "epoch": 0.09199773444453435, "grad_norm": 0.8413045406341553, "learning_rate": 4.599886722226718e-06, "loss": 0.0761, "step": 11370 }, { "epoch": 0.09207864713973622, "grad_norm": 0.8671655654907227, "learning_rate": 4.603932356986811e-06, "loss": 0.0643, "step": 11380 }, { "epoch": 0.0921595598349381, "grad_norm": 1.4753857851028442, "learning_rate": 4.607977991746905e-06, "loss": 0.0491, "step": 11390 }, { "epoch": 0.09224047253013998, "grad_norm": 1.1663141250610352, "learning_rate": 4.612023626506999e-06, "loss": 0.0906, "step": 11400 }, { "epoch": 0.09232138522534186, "grad_norm": 0.9797104001045227, "learning_rate": 4.616069261267093e-06, "loss": 0.0617, "step": 11410 }, { "epoch": 0.09240229792054373, "grad_norm": 1.4605423212051392, "learning_rate": 4.620114896027187e-06, "loss": 0.051, "step": 11420 }, { "epoch": 0.09248321061574562, "grad_norm": 1.5335661172866821, "learning_rate": 4.624160530787281e-06, "loss": 0.072, "step": 11430 }, { "epoch": 0.09256412331094749, "grad_norm": 0.561458170413971, "learning_rate": 4.628206165547375e-06, "loss": 0.0598, "step": 11440 }, { "epoch": 0.09264503600614936, "grad_norm": 1.6718230247497559, "learning_rate": 4.632251800307468e-06, "loss": 0.0645, "step": 11450 }, { "epoch": 0.09272594870135124, "grad_norm": 0.5536269545555115, "learning_rate": 4.636297435067563e-06, "loss": 0.0613, "step": 11460 }, { "epoch": 0.09280686139655311, "grad_norm": 1.120682716369629, "learning_rate": 4.640343069827657e-06, "loss": 0.0628, "step": 11470 }, { "epoch": 0.092887774091755, "grad_norm": 1.2930076122283936, "learning_rate": 4.64438870458775e-06, "loss": 0.0815, "step": 11480 }, { "epoch": 0.09296868678695687, "grad_norm": 1.6572866439819336, "learning_rate": 4.648434339347845e-06, "loss": 0.0818, "step": 11490 }, { "epoch": 0.09304959948215875, "grad_norm": 3.3894381523132324, "learning_rate": 4.652479974107938e-06, "loss": 0.0842, "step": 11500 }, { "epoch": 0.09313051217736062, "grad_norm": 0.9231201410293579, "learning_rate": 4.656525608868032e-06, "loss": 0.0696, "step": 11510 }, { "epoch": 0.09321142487256251, "grad_norm": 1.614188551902771, "learning_rate": 4.660571243628126e-06, "loss": 0.0544, "step": 11520 }, { "epoch": 0.09329233756776438, "grad_norm": 0.7944612503051758, "learning_rate": 4.6646168783882196e-06, "loss": 0.0688, "step": 11530 }, { "epoch": 0.09337325026296626, "grad_norm": 1.5576725006103516, "learning_rate": 4.6686625131483135e-06, "loss": 0.0611, "step": 11540 }, { "epoch": 0.09345416295816814, "grad_norm": 1.4582616090774536, "learning_rate": 4.6727081479084074e-06, "loss": 0.0624, "step": 11550 }, { "epoch": 0.09353507565337002, "grad_norm": 0.914644181728363, "learning_rate": 4.676753782668501e-06, "loss": 0.0729, "step": 11560 }, { "epoch": 0.09361598834857189, "grad_norm": 1.4639463424682617, "learning_rate": 4.6807994174285945e-06, "loss": 0.0759, "step": 11570 }, { "epoch": 0.09369690104377376, "grad_norm": 0.8900485634803772, "learning_rate": 4.684845052188689e-06, "loss": 0.0574, "step": 11580 }, { "epoch": 0.09377781373897565, "grad_norm": 0.9711294174194336, "learning_rate": 4.688890686948782e-06, "loss": 0.0603, "step": 11590 }, { "epoch": 0.09385872643417752, "grad_norm": 0.9631531834602356, "learning_rate": 4.692936321708876e-06, "loss": 0.0492, "step": 11600 }, { "epoch": 0.0939396391293794, "grad_norm": 1.357488751411438, "learning_rate": 4.69698195646897e-06, "loss": 0.0617, "step": 11610 }, { "epoch": 0.09402055182458127, "grad_norm": 1.5567964315414429, "learning_rate": 4.701027591229064e-06, "loss": 0.0651, "step": 11620 }, { "epoch": 0.09410146451978316, "grad_norm": 2.096747875213623, "learning_rate": 4.705073225989158e-06, "loss": 0.0627, "step": 11630 }, { "epoch": 0.09418237721498503, "grad_norm": 2.0935170650482178, "learning_rate": 4.709118860749252e-06, "loss": 0.0651, "step": 11640 }, { "epoch": 0.09426328991018691, "grad_norm": 1.0582659244537354, "learning_rate": 4.713164495509346e-06, "loss": 0.0682, "step": 11650 }, { "epoch": 0.09434420260538878, "grad_norm": 1.2018144130706787, "learning_rate": 4.71721013026944e-06, "loss": 0.0859, "step": 11660 }, { "epoch": 0.09442511530059067, "grad_norm": 1.4523143768310547, "learning_rate": 4.721255765029534e-06, "loss": 0.0553, "step": 11670 }, { "epoch": 0.09450602799579254, "grad_norm": 1.0642204284667969, "learning_rate": 4.725301399789628e-06, "loss": 0.062, "step": 11680 }, { "epoch": 0.09458694069099442, "grad_norm": 1.0429372787475586, "learning_rate": 4.729347034549721e-06, "loss": 0.0559, "step": 11690 }, { "epoch": 0.0946678533861963, "grad_norm": 1.9302843809127808, "learning_rate": 4.733392669309816e-06, "loss": 0.0536, "step": 11700 }, { "epoch": 0.09474876608139818, "grad_norm": 0.9294273853302002, "learning_rate": 4.737438304069909e-06, "loss": 0.0574, "step": 11710 }, { "epoch": 0.09482967877660005, "grad_norm": 1.5151745080947876, "learning_rate": 4.741483938830003e-06, "loss": 0.0596, "step": 11720 }, { "epoch": 0.09491059147180192, "grad_norm": 1.5071226358413696, "learning_rate": 4.745529573590097e-06, "loss": 0.0718, "step": 11730 }, { "epoch": 0.0949915041670038, "grad_norm": 1.8523445129394531, "learning_rate": 4.749575208350191e-06, "loss": 0.0709, "step": 11740 }, { "epoch": 0.09507241686220567, "grad_norm": 1.6593924760818481, "learning_rate": 4.7536208431102845e-06, "loss": 0.0706, "step": 11750 }, { "epoch": 0.09515332955740756, "grad_norm": 0.9868592023849487, "learning_rate": 4.7576664778703785e-06, "loss": 0.0573, "step": 11760 }, { "epoch": 0.09523424225260943, "grad_norm": 0.9631950855255127, "learning_rate": 4.7617121126304724e-06, "loss": 0.0644, "step": 11770 }, { "epoch": 0.09531515494781131, "grad_norm": 1.153191089630127, "learning_rate": 4.7657577473905655e-06, "loss": 0.0708, "step": 11780 }, { "epoch": 0.09539606764301319, "grad_norm": 0.7024262547492981, "learning_rate": 4.76980338215066e-06, "loss": 0.0553, "step": 11790 }, { "epoch": 0.09547698033821507, "grad_norm": 0.9723532199859619, "learning_rate": 4.773849016910753e-06, "loss": 0.0603, "step": 11800 }, { "epoch": 0.09555789303341694, "grad_norm": 2.085850238800049, "learning_rate": 4.777894651670847e-06, "loss": 0.0637, "step": 11810 }, { "epoch": 0.09563880572861883, "grad_norm": 1.4768232107162476, "learning_rate": 4.781940286430941e-06, "loss": 0.072, "step": 11820 }, { "epoch": 0.0957197184238207, "grad_norm": 1.1609755754470825, "learning_rate": 4.785985921191035e-06, "loss": 0.0652, "step": 11830 }, { "epoch": 0.09580063111902258, "grad_norm": 1.1372736692428589, "learning_rate": 4.790031555951129e-06, "loss": 0.0805, "step": 11840 }, { "epoch": 0.09588154381422445, "grad_norm": 0.8837674856185913, "learning_rate": 4.794077190711223e-06, "loss": 0.0642, "step": 11850 }, { "epoch": 0.09596245650942632, "grad_norm": 1.131810188293457, "learning_rate": 4.798122825471317e-06, "loss": 0.0517, "step": 11860 }, { "epoch": 0.0960433692046282, "grad_norm": 1.3304450511932373, "learning_rate": 4.802168460231411e-06, "loss": 0.0618, "step": 11870 }, { "epoch": 0.09612428189983008, "grad_norm": 1.5709763765335083, "learning_rate": 4.806214094991505e-06, "loss": 0.0676, "step": 11880 }, { "epoch": 0.09620519459503196, "grad_norm": 1.7762935161590576, "learning_rate": 4.810259729751599e-06, "loss": 0.0632, "step": 11890 }, { "epoch": 0.09628610729023383, "grad_norm": 1.3394898176193237, "learning_rate": 4.814305364511692e-06, "loss": 0.0555, "step": 11900 }, { "epoch": 0.09636701998543572, "grad_norm": 1.743291974067688, "learning_rate": 4.818350999271787e-06, "loss": 0.0877, "step": 11910 }, { "epoch": 0.09644793268063759, "grad_norm": 0.582655668258667, "learning_rate": 4.82239663403188e-06, "loss": 0.045, "step": 11920 }, { "epoch": 0.09652884537583947, "grad_norm": 1.2448029518127441, "learning_rate": 4.826442268791974e-06, "loss": 0.0843, "step": 11930 }, { "epoch": 0.09660975807104134, "grad_norm": 1.7612618207931519, "learning_rate": 4.830487903552068e-06, "loss": 0.0586, "step": 11940 }, { "epoch": 0.09669067076624323, "grad_norm": 0.6957379579544067, "learning_rate": 4.834533538312162e-06, "loss": 0.0714, "step": 11950 }, { "epoch": 0.0967715834614451, "grad_norm": 1.5321520566940308, "learning_rate": 4.838579173072256e-06, "loss": 0.0725, "step": 11960 }, { "epoch": 0.09685249615664698, "grad_norm": 0.9540291428565979, "learning_rate": 4.8426248078323495e-06, "loss": 0.0713, "step": 11970 }, { "epoch": 0.09693340885184885, "grad_norm": 1.7551214694976807, "learning_rate": 4.8466704425924435e-06, "loss": 0.0627, "step": 11980 }, { "epoch": 0.09701432154705074, "grad_norm": 0.8711128234863281, "learning_rate": 4.8507160773525366e-06, "loss": 0.0694, "step": 11990 }, { "epoch": 0.09709523424225261, "grad_norm": 1.0974174737930298, "learning_rate": 4.854761712112631e-06, "loss": 0.0598, "step": 12000 }, { "epoch": 0.09717614693745448, "grad_norm": 1.3440111875534058, "learning_rate": 4.8588073468727244e-06, "loss": 0.059, "step": 12010 }, { "epoch": 0.09725705963265636, "grad_norm": 1.2916542291641235, "learning_rate": 4.862852981632818e-06, "loss": 0.0723, "step": 12020 }, { "epoch": 0.09733797232785824, "grad_norm": 0.9922534227371216, "learning_rate": 4.866898616392912e-06, "loss": 0.0679, "step": 12030 }, { "epoch": 0.09741888502306012, "grad_norm": 1.1600613594055176, "learning_rate": 4.870944251153006e-06, "loss": 0.0593, "step": 12040 }, { "epoch": 0.09749979771826199, "grad_norm": 1.0876133441925049, "learning_rate": 4.8749898859131e-06, "loss": 0.0722, "step": 12050 }, { "epoch": 0.09758071041346388, "grad_norm": 0.808556079864502, "learning_rate": 4.879035520673194e-06, "loss": 0.0442, "step": 12060 }, { "epoch": 0.09766162310866575, "grad_norm": 1.2520575523376465, "learning_rate": 4.883081155433288e-06, "loss": 0.0717, "step": 12070 }, { "epoch": 0.09774253580386763, "grad_norm": 1.3081327676773071, "learning_rate": 4.887126790193382e-06, "loss": 0.0853, "step": 12080 }, { "epoch": 0.0978234484990695, "grad_norm": 1.274829387664795, "learning_rate": 4.891172424953475e-06, "loss": 0.063, "step": 12090 }, { "epoch": 0.09790436119427139, "grad_norm": 0.9922951459884644, "learning_rate": 4.89521805971357e-06, "loss": 0.0644, "step": 12100 }, { "epoch": 0.09798527388947326, "grad_norm": 1.7442156076431274, "learning_rate": 4.899263694473663e-06, "loss": 0.0766, "step": 12110 }, { "epoch": 0.09806618658467514, "grad_norm": 0.7362623810768127, "learning_rate": 4.903309329233757e-06, "loss": 0.0437, "step": 12120 }, { "epoch": 0.09814709927987701, "grad_norm": 0.5354350805282593, "learning_rate": 4.907354963993851e-06, "loss": 0.0589, "step": 12130 }, { "epoch": 0.0982280119750789, "grad_norm": 1.0607134103775024, "learning_rate": 4.911400598753945e-06, "loss": 0.0567, "step": 12140 }, { "epoch": 0.09830892467028077, "grad_norm": 2.086897373199463, "learning_rate": 4.915446233514039e-06, "loss": 0.0617, "step": 12150 }, { "epoch": 0.09838983736548264, "grad_norm": 0.8532183170318604, "learning_rate": 4.919491868274133e-06, "loss": 0.0596, "step": 12160 }, { "epoch": 0.09847075006068452, "grad_norm": 1.257121205329895, "learning_rate": 4.923537503034227e-06, "loss": 0.0593, "step": 12170 }, { "epoch": 0.0985516627558864, "grad_norm": 0.6029738187789917, "learning_rate": 4.92758313779432e-06, "loss": 0.0392, "step": 12180 }, { "epoch": 0.09863257545108828, "grad_norm": 1.4668599367141724, "learning_rate": 4.9316287725544145e-06, "loss": 0.0573, "step": 12190 }, { "epoch": 0.09871348814629015, "grad_norm": 0.46124398708343506, "learning_rate": 4.935674407314508e-06, "loss": 0.0375, "step": 12200 }, { "epoch": 0.09879440084149203, "grad_norm": 0.9299999475479126, "learning_rate": 4.9397200420746016e-06, "loss": 0.0875, "step": 12210 }, { "epoch": 0.0988753135366939, "grad_norm": 1.392033576965332, "learning_rate": 4.9437656768346955e-06, "loss": 0.0675, "step": 12220 }, { "epoch": 0.09895622623189579, "grad_norm": 1.4260891675949097, "learning_rate": 4.9478113115947894e-06, "loss": 0.0482, "step": 12230 }, { "epoch": 0.09903713892709766, "grad_norm": 0.6516099572181702, "learning_rate": 4.951856946354883e-06, "loss": 0.0575, "step": 12240 }, { "epoch": 0.09911805162229954, "grad_norm": 1.328467845916748, "learning_rate": 4.955902581114977e-06, "loss": 0.0526, "step": 12250 }, { "epoch": 0.09919896431750141, "grad_norm": 0.7678608894348145, "learning_rate": 4.959948215875071e-06, "loss": 0.0613, "step": 12260 }, { "epoch": 0.0992798770127033, "grad_norm": 1.6072466373443604, "learning_rate": 4.963993850635165e-06, "loss": 0.0561, "step": 12270 }, { "epoch": 0.09936078970790517, "grad_norm": 1.036116361618042, "learning_rate": 4.968039485395259e-06, "loss": 0.0741, "step": 12280 }, { "epoch": 0.09944170240310704, "grad_norm": 0.9007984399795532, "learning_rate": 4.972085120155353e-06, "loss": 0.0755, "step": 12290 }, { "epoch": 0.09952261509830893, "grad_norm": 0.9675401449203491, "learning_rate": 4.976130754915446e-06, "loss": 0.0649, "step": 12300 }, { "epoch": 0.0996035277935108, "grad_norm": 1.028504490852356, "learning_rate": 4.980176389675541e-06, "loss": 0.0411, "step": 12310 }, { "epoch": 0.09968444048871268, "grad_norm": 1.5721641778945923, "learning_rate": 4.984222024435634e-06, "loss": 0.0605, "step": 12320 }, { "epoch": 0.09976535318391455, "grad_norm": 1.358616590499878, "learning_rate": 4.988267659195728e-06, "loss": 0.0588, "step": 12330 }, { "epoch": 0.09984626587911644, "grad_norm": 1.748096227645874, "learning_rate": 4.992313293955822e-06, "loss": 0.0604, "step": 12340 }, { "epoch": 0.0999271785743183, "grad_norm": 0.03421665355563164, "learning_rate": 4.996358928715916e-06, "loss": 0.0583, "step": 12350 }, { "epoch": 0.10000809126952019, "grad_norm": 1.0534734725952148, "learning_rate": 5.00040456347601e-06, "loss": 0.0613, "step": 12360 }, { "epoch": 0.10008900396472206, "grad_norm": 1.2270689010620117, "learning_rate": 5.004450198236104e-06, "loss": 0.0599, "step": 12370 }, { "epoch": 0.10016991665992395, "grad_norm": 0.871955156326294, "learning_rate": 5.008495832996198e-06, "loss": 0.0515, "step": 12380 }, { "epoch": 0.10025082935512582, "grad_norm": 1.515899896621704, "learning_rate": 5.012541467756291e-06, "loss": 0.0691, "step": 12390 }, { "epoch": 0.1003317420503277, "grad_norm": 1.260300874710083, "learning_rate": 5.016587102516385e-06, "loss": 0.0582, "step": 12400 }, { "epoch": 0.10041265474552957, "grad_norm": 1.3323891162872314, "learning_rate": 5.0206327372764795e-06, "loss": 0.0699, "step": 12410 }, { "epoch": 0.10049356744073146, "grad_norm": 0.968757152557373, "learning_rate": 5.0246783720365734e-06, "loss": 0.0599, "step": 12420 }, { "epoch": 0.10057448013593333, "grad_norm": 1.1687124967575073, "learning_rate": 5.0287240067966665e-06, "loss": 0.0481, "step": 12430 }, { "epoch": 0.1006553928311352, "grad_norm": 1.3716181516647339, "learning_rate": 5.0327696415567605e-06, "loss": 0.0555, "step": 12440 }, { "epoch": 0.10073630552633708, "grad_norm": 1.198803186416626, "learning_rate": 5.036815276316854e-06, "loss": 0.0732, "step": 12450 }, { "epoch": 0.10081721822153895, "grad_norm": 1.0069454908370972, "learning_rate": 5.040860911076948e-06, "loss": 0.0694, "step": 12460 }, { "epoch": 0.10089813091674084, "grad_norm": 0.8730552196502686, "learning_rate": 5.044906545837043e-06, "loss": 0.0884, "step": 12470 }, { "epoch": 0.10097904361194271, "grad_norm": 1.066559076309204, "learning_rate": 5.048952180597136e-06, "loss": 0.0643, "step": 12480 }, { "epoch": 0.1010599563071446, "grad_norm": 0.9040141105651855, "learning_rate": 5.05299781535723e-06, "loss": 0.0613, "step": 12490 }, { "epoch": 0.10114086900234646, "grad_norm": 1.1844602823257446, "learning_rate": 5.057043450117324e-06, "loss": 0.0519, "step": 12500 }, { "epoch": 0.10122178169754835, "grad_norm": 1.6095198392868042, "learning_rate": 5.061089084877417e-06, "loss": 0.04, "step": 12510 }, { "epoch": 0.10130269439275022, "grad_norm": 1.1133488416671753, "learning_rate": 5.065134719637511e-06, "loss": 0.072, "step": 12520 }, { "epoch": 0.1013836070879521, "grad_norm": 1.3081660270690918, "learning_rate": 5.069180354397606e-06, "loss": 0.0615, "step": 12530 }, { "epoch": 0.10146451978315398, "grad_norm": 1.3932185173034668, "learning_rate": 5.0732259891577e-06, "loss": 0.0443, "step": 12540 }, { "epoch": 0.10154543247835586, "grad_norm": 0.37217947840690613, "learning_rate": 5.077271623917793e-06, "loss": 0.0596, "step": 12550 }, { "epoch": 0.10162634517355773, "grad_norm": 0.6804710626602173, "learning_rate": 5.081317258677887e-06, "loss": 0.0659, "step": 12560 }, { "epoch": 0.10170725786875962, "grad_norm": 0.8080295920372009, "learning_rate": 5.085362893437981e-06, "loss": 0.0506, "step": 12570 }, { "epoch": 0.10178817056396149, "grad_norm": 1.4820888042449951, "learning_rate": 5.089408528198074e-06, "loss": 0.0658, "step": 12580 }, { "epoch": 0.10186908325916336, "grad_norm": 1.5065925121307373, "learning_rate": 5.093454162958169e-06, "loss": 0.0821, "step": 12590 }, { "epoch": 0.10194999595436524, "grad_norm": 2.3323066234588623, "learning_rate": 5.097499797718263e-06, "loss": 0.0854, "step": 12600 }, { "epoch": 0.10203090864956711, "grad_norm": 1.210044264793396, "learning_rate": 5.101545432478357e-06, "loss": 0.0519, "step": 12610 }, { "epoch": 0.102111821344769, "grad_norm": 1.2813639640808105, "learning_rate": 5.10559106723845e-06, "loss": 0.066, "step": 12620 }, { "epoch": 0.10219273403997087, "grad_norm": 1.0251976251602173, "learning_rate": 5.109636701998544e-06, "loss": 0.0487, "step": 12630 }, { "epoch": 0.10227364673517275, "grad_norm": 1.030982255935669, "learning_rate": 5.113682336758638e-06, "loss": 0.0589, "step": 12640 }, { "epoch": 0.10235455943037462, "grad_norm": 1.0939056873321533, "learning_rate": 5.117727971518732e-06, "loss": 0.0607, "step": 12650 }, { "epoch": 0.10243547212557651, "grad_norm": 1.2744594812393188, "learning_rate": 5.121773606278826e-06, "loss": 0.0413, "step": 12660 }, { "epoch": 0.10251638482077838, "grad_norm": 0.9503705501556396, "learning_rate": 5.125819241038919e-06, "loss": 0.0726, "step": 12670 }, { "epoch": 0.10259729751598026, "grad_norm": 1.4935510158538818, "learning_rate": 5.129864875799013e-06, "loss": 0.0736, "step": 12680 }, { "epoch": 0.10267821021118213, "grad_norm": 1.564500331878662, "learning_rate": 5.133910510559107e-06, "loss": 0.0688, "step": 12690 }, { "epoch": 0.10275912290638402, "grad_norm": 0.8377265930175781, "learning_rate": 5.1379561453192e-06, "loss": 0.0665, "step": 12700 }, { "epoch": 0.10284003560158589, "grad_norm": 1.2158650159835815, "learning_rate": 5.142001780079295e-06, "loss": 0.0687, "step": 12710 }, { "epoch": 0.10292094829678776, "grad_norm": 1.18136465549469, "learning_rate": 5.146047414839389e-06, "loss": 0.0632, "step": 12720 }, { "epoch": 0.10300186099198964, "grad_norm": 1.3966213464736938, "learning_rate": 5.150093049599483e-06, "loss": 0.0703, "step": 12730 }, { "epoch": 0.10308277368719151, "grad_norm": 0.6308597326278687, "learning_rate": 5.154138684359576e-06, "loss": 0.0509, "step": 12740 }, { "epoch": 0.1031636863823934, "grad_norm": 1.3494811058044434, "learning_rate": 5.15818431911967e-06, "loss": 0.058, "step": 12750 }, { "epoch": 0.10324459907759527, "grad_norm": 0.745650053024292, "learning_rate": 5.162229953879764e-06, "loss": 0.0744, "step": 12760 }, { "epoch": 0.10332551177279715, "grad_norm": 0.7656611800193787, "learning_rate": 5.166275588639859e-06, "loss": 0.0415, "step": 12770 }, { "epoch": 0.10340642446799903, "grad_norm": 0.7929429411888123, "learning_rate": 5.170321223399952e-06, "loss": 0.0643, "step": 12780 }, { "epoch": 0.10348733716320091, "grad_norm": 1.0983474254608154, "learning_rate": 5.174366858160046e-06, "loss": 0.0597, "step": 12790 }, { "epoch": 0.10356824985840278, "grad_norm": 0.7629553079605103, "learning_rate": 5.17841249292014e-06, "loss": 0.0724, "step": 12800 }, { "epoch": 0.10364916255360467, "grad_norm": 1.7415131330490112, "learning_rate": 5.182458127680233e-06, "loss": 0.0603, "step": 12810 }, { "epoch": 0.10373007524880654, "grad_norm": 1.2609097957611084, "learning_rate": 5.186503762440327e-06, "loss": 0.0448, "step": 12820 }, { "epoch": 0.10381098794400842, "grad_norm": 1.1711244583129883, "learning_rate": 5.190549397200422e-06, "loss": 0.07, "step": 12830 }, { "epoch": 0.10389190063921029, "grad_norm": 0.8465361595153809, "learning_rate": 5.1945950319605155e-06, "loss": 0.0704, "step": 12840 }, { "epoch": 0.10397281333441218, "grad_norm": 0.8891329765319824, "learning_rate": 5.1986406667206095e-06, "loss": 0.0575, "step": 12850 }, { "epoch": 0.10405372602961405, "grad_norm": 0.660988450050354, "learning_rate": 5.2026863014807026e-06, "loss": 0.0644, "step": 12860 }, { "epoch": 0.10413463872481592, "grad_norm": 1.4334825277328491, "learning_rate": 5.2067319362407965e-06, "loss": 0.0722, "step": 12870 }, { "epoch": 0.1042155514200178, "grad_norm": 1.1883537769317627, "learning_rate": 5.2107775710008904e-06, "loss": 0.059, "step": 12880 }, { "epoch": 0.10429646411521967, "grad_norm": 1.6294960975646973, "learning_rate": 5.2148232057609835e-06, "loss": 0.0758, "step": 12890 }, { "epoch": 0.10437737681042156, "grad_norm": 1.3309515714645386, "learning_rate": 5.218868840521078e-06, "loss": 0.0801, "step": 12900 }, { "epoch": 0.10445828950562343, "grad_norm": 1.7019233703613281, "learning_rate": 5.222914475281172e-06, "loss": 0.0539, "step": 12910 }, { "epoch": 0.10453920220082531, "grad_norm": 1.3624056577682495, "learning_rate": 5.226960110041266e-06, "loss": 0.0898, "step": 12920 }, { "epoch": 0.10462011489602718, "grad_norm": 1.1995081901550293, "learning_rate": 5.231005744801359e-06, "loss": 0.0738, "step": 12930 }, { "epoch": 0.10470102759122907, "grad_norm": 0.9264097809791565, "learning_rate": 5.235051379561453e-06, "loss": 0.043, "step": 12940 }, { "epoch": 0.10478194028643094, "grad_norm": 1.180635929107666, "learning_rate": 5.239097014321547e-06, "loss": 0.0879, "step": 12950 }, { "epoch": 0.10486285298163282, "grad_norm": 1.5573371648788452, "learning_rate": 5.243142649081642e-06, "loss": 0.0707, "step": 12960 }, { "epoch": 0.1049437656768347, "grad_norm": 1.0343180894851685, "learning_rate": 5.247188283841735e-06, "loss": 0.0525, "step": 12970 }, { "epoch": 0.10502467837203658, "grad_norm": 1.5998327732086182, "learning_rate": 5.251233918601829e-06, "loss": 0.0519, "step": 12980 }, { "epoch": 0.10510559106723845, "grad_norm": 1.1157772541046143, "learning_rate": 5.255279553361923e-06, "loss": 0.0778, "step": 12990 }, { "epoch": 0.10518650376244033, "grad_norm": 1.3793705701828003, "learning_rate": 5.259325188122016e-06, "loss": 0.0625, "step": 13000 }, { "epoch": 0.1052674164576422, "grad_norm": 1.116626262664795, "learning_rate": 5.26337082288211e-06, "loss": 0.0539, "step": 13010 }, { "epoch": 0.10534832915284408, "grad_norm": 2.0103116035461426, "learning_rate": 5.267416457642205e-06, "loss": 0.052, "step": 13020 }, { "epoch": 0.10542924184804596, "grad_norm": 0.9818688631057739, "learning_rate": 5.271462092402299e-06, "loss": 0.0698, "step": 13030 }, { "epoch": 0.10551015454324783, "grad_norm": 1.3238171339035034, "learning_rate": 5.275507727162393e-06, "loss": 0.0593, "step": 13040 }, { "epoch": 0.10559106723844972, "grad_norm": 1.3729000091552734, "learning_rate": 5.279553361922486e-06, "loss": 0.073, "step": 13050 }, { "epoch": 0.10567197993365159, "grad_norm": 2.27829647064209, "learning_rate": 5.28359899668258e-06, "loss": 0.0654, "step": 13060 }, { "epoch": 0.10575289262885347, "grad_norm": 1.739066243171692, "learning_rate": 5.287644631442674e-06, "loss": 0.0635, "step": 13070 }, { "epoch": 0.10583380532405534, "grad_norm": 1.4408363103866577, "learning_rate": 5.291690266202768e-06, "loss": 0.067, "step": 13080 }, { "epoch": 0.10591471801925723, "grad_norm": 1.0222948789596558, "learning_rate": 5.2957359009628615e-06, "loss": 0.0651, "step": 13090 }, { "epoch": 0.1059956307144591, "grad_norm": 0.8881967067718506, "learning_rate": 5.2997815357229554e-06, "loss": 0.0617, "step": 13100 }, { "epoch": 0.10607654340966098, "grad_norm": 1.045540452003479, "learning_rate": 5.303827170483049e-06, "loss": 0.0606, "step": 13110 }, { "epoch": 0.10615745610486285, "grad_norm": 1.279841661453247, "learning_rate": 5.3078728052431425e-06, "loss": 0.0934, "step": 13120 }, { "epoch": 0.10623836880006474, "grad_norm": 1.1870110034942627, "learning_rate": 5.311918440003236e-06, "loss": 0.066, "step": 13130 }, { "epoch": 0.10631928149526661, "grad_norm": 1.340924859046936, "learning_rate": 5.315964074763331e-06, "loss": 0.0578, "step": 13140 }, { "epoch": 0.10640019419046848, "grad_norm": 0.9932453036308289, "learning_rate": 5.320009709523425e-06, "loss": 0.0725, "step": 13150 }, { "epoch": 0.10648110688567036, "grad_norm": 1.4785438776016235, "learning_rate": 5.324055344283518e-06, "loss": 0.0698, "step": 13160 }, { "epoch": 0.10656201958087223, "grad_norm": 1.0079983472824097, "learning_rate": 5.328100979043612e-06, "loss": 0.0596, "step": 13170 }, { "epoch": 0.10664293227607412, "grad_norm": 1.2375413179397583, "learning_rate": 5.332146613803706e-06, "loss": 0.0724, "step": 13180 }, { "epoch": 0.10672384497127599, "grad_norm": 0.4988793730735779, "learning_rate": 5.336192248563799e-06, "loss": 0.0701, "step": 13190 }, { "epoch": 0.10680475766647787, "grad_norm": 1.2364261150360107, "learning_rate": 5.340237883323894e-06, "loss": 0.062, "step": 13200 }, { "epoch": 0.10688567036167974, "grad_norm": 1.27632474899292, "learning_rate": 5.344283518083988e-06, "loss": 0.0633, "step": 13210 }, { "epoch": 0.10696658305688163, "grad_norm": 1.5958670377731323, "learning_rate": 5.348329152844082e-06, "loss": 0.0667, "step": 13220 }, { "epoch": 0.1070474957520835, "grad_norm": 0.9785521626472473, "learning_rate": 5.352374787604176e-06, "loss": 0.0521, "step": 13230 }, { "epoch": 0.10712840844728538, "grad_norm": 1.4820690155029297, "learning_rate": 5.356420422364269e-06, "loss": 0.0525, "step": 13240 }, { "epoch": 0.10720932114248725, "grad_norm": 1.1415843963623047, "learning_rate": 5.360466057124363e-06, "loss": 0.0498, "step": 13250 }, { "epoch": 0.10729023383768914, "grad_norm": 0.9726760387420654, "learning_rate": 5.364511691884458e-06, "loss": 0.0658, "step": 13260 }, { "epoch": 0.10737114653289101, "grad_norm": 1.0584518909454346, "learning_rate": 5.3685573266445516e-06, "loss": 0.0677, "step": 13270 }, { "epoch": 0.1074520592280929, "grad_norm": 1.4776231050491333, "learning_rate": 5.372602961404645e-06, "loss": 0.0689, "step": 13280 }, { "epoch": 0.10753297192329477, "grad_norm": 0.25351715087890625, "learning_rate": 5.376648596164739e-06, "loss": 0.0569, "step": 13290 }, { "epoch": 0.10761388461849664, "grad_norm": 1.660498857498169, "learning_rate": 5.3806942309248325e-06, "loss": 0.0544, "step": 13300 }, { "epoch": 0.10769479731369852, "grad_norm": 0.8619779348373413, "learning_rate": 5.384739865684926e-06, "loss": 0.0793, "step": 13310 }, { "epoch": 0.10777571000890039, "grad_norm": 0.8391332030296326, "learning_rate": 5.38878550044502e-06, "loss": 0.0697, "step": 13320 }, { "epoch": 0.10785662270410228, "grad_norm": 1.0506361722946167, "learning_rate": 5.392831135205114e-06, "loss": 0.0553, "step": 13330 }, { "epoch": 0.10793753539930415, "grad_norm": 0.9536338448524475, "learning_rate": 5.396876769965208e-06, "loss": 0.0556, "step": 13340 }, { "epoch": 0.10801844809450603, "grad_norm": 1.0888901948928833, "learning_rate": 5.400922404725301e-06, "loss": 0.0668, "step": 13350 }, { "epoch": 0.1080993607897079, "grad_norm": 2.2199490070343018, "learning_rate": 5.404968039485395e-06, "loss": 0.0553, "step": 13360 }, { "epoch": 0.10818027348490979, "grad_norm": 0.8546271920204163, "learning_rate": 5.409013674245489e-06, "loss": 0.0736, "step": 13370 }, { "epoch": 0.10826118618011166, "grad_norm": 1.1477290391921997, "learning_rate": 5.413059309005584e-06, "loss": 0.065, "step": 13380 }, { "epoch": 0.10834209887531354, "grad_norm": 1.198947548866272, "learning_rate": 5.417104943765677e-06, "loss": 0.0548, "step": 13390 }, { "epoch": 0.10842301157051541, "grad_norm": 1.6635019779205322, "learning_rate": 5.421150578525771e-06, "loss": 0.082, "step": 13400 }, { "epoch": 0.1085039242657173, "grad_norm": 0.9126958250999451, "learning_rate": 5.425196213285865e-06, "loss": 0.0468, "step": 13410 }, { "epoch": 0.10858483696091917, "grad_norm": 0.8989891409873962, "learning_rate": 5.429241848045959e-06, "loss": 0.0612, "step": 13420 }, { "epoch": 0.10866574965612105, "grad_norm": 1.158130407333374, "learning_rate": 5.433287482806052e-06, "loss": 0.0559, "step": 13430 }, { "epoch": 0.10874666235132292, "grad_norm": 1.0702807903289795, "learning_rate": 5.437333117566147e-06, "loss": 0.0537, "step": 13440 }, { "epoch": 0.1088275750465248, "grad_norm": 0.65296471118927, "learning_rate": 5.441378752326241e-06, "loss": 0.0795, "step": 13450 }, { "epoch": 0.10890848774172668, "grad_norm": 1.0357050895690918, "learning_rate": 5.445424387086335e-06, "loss": 0.0699, "step": 13460 }, { "epoch": 0.10898940043692855, "grad_norm": 1.220276951789856, "learning_rate": 5.449470021846428e-06, "loss": 0.0462, "step": 13470 }, { "epoch": 0.10907031313213043, "grad_norm": 1.0781971216201782, "learning_rate": 5.453515656606522e-06, "loss": 0.0658, "step": 13480 }, { "epoch": 0.1091512258273323, "grad_norm": 1.0765290260314941, "learning_rate": 5.457561291366616e-06, "loss": 0.0462, "step": 13490 }, { "epoch": 0.10923213852253419, "grad_norm": 0.4161517024040222, "learning_rate": 5.4616069261267105e-06, "loss": 0.0296, "step": 13500 }, { "epoch": 0.10931305121773606, "grad_norm": 1.1264711618423462, "learning_rate": 5.465652560886804e-06, "loss": 0.0608, "step": 13510 }, { "epoch": 0.10939396391293794, "grad_norm": 1.5768091678619385, "learning_rate": 5.4696981956468975e-06, "loss": 0.0679, "step": 13520 }, { "epoch": 0.10947487660813982, "grad_norm": 1.3610446453094482, "learning_rate": 5.4737438304069915e-06, "loss": 0.0642, "step": 13530 }, { "epoch": 0.1095557893033417, "grad_norm": 0.6825269460678101, "learning_rate": 5.4777894651670846e-06, "loss": 0.0525, "step": 13540 }, { "epoch": 0.10963670199854357, "grad_norm": 1.1547527313232422, "learning_rate": 5.4818350999271785e-06, "loss": 0.0887, "step": 13550 }, { "epoch": 0.10971761469374545, "grad_norm": 0.9919534921646118, "learning_rate": 5.485880734687273e-06, "loss": 0.0623, "step": 13560 }, { "epoch": 0.10979852738894733, "grad_norm": 1.2539321184158325, "learning_rate": 5.489926369447367e-06, "loss": 0.0791, "step": 13570 }, { "epoch": 0.1098794400841492, "grad_norm": 1.0098869800567627, "learning_rate": 5.49397200420746e-06, "loss": 0.0558, "step": 13580 }, { "epoch": 0.10996035277935108, "grad_norm": 1.452967643737793, "learning_rate": 5.498017638967554e-06, "loss": 0.0608, "step": 13590 }, { "epoch": 0.11004126547455295, "grad_norm": 1.0765100717544556, "learning_rate": 5.502063273727648e-06, "loss": 0.0524, "step": 13600 }, { "epoch": 0.11012217816975484, "grad_norm": 1.068119764328003, "learning_rate": 5.506108908487742e-06, "loss": 0.0581, "step": 13610 }, { "epoch": 0.11020309086495671, "grad_norm": 1.4961169958114624, "learning_rate": 5.510154543247837e-06, "loss": 0.0693, "step": 13620 }, { "epoch": 0.11028400356015859, "grad_norm": 1.0725325345993042, "learning_rate": 5.51420017800793e-06, "loss": 0.0558, "step": 13630 }, { "epoch": 0.11036491625536046, "grad_norm": 0.5845354199409485, "learning_rate": 5.518245812768024e-06, "loss": 0.0413, "step": 13640 }, { "epoch": 0.11044582895056235, "grad_norm": 1.6044694185256958, "learning_rate": 5.522291447528118e-06, "loss": 0.08, "step": 13650 }, { "epoch": 0.11052674164576422, "grad_norm": 0.8371835350990295, "learning_rate": 5.526337082288211e-06, "loss": 0.0621, "step": 13660 }, { "epoch": 0.1106076543409661, "grad_norm": 0.8124852776527405, "learning_rate": 5.530382717048305e-06, "loss": 0.0701, "step": 13670 }, { "epoch": 0.11068856703616797, "grad_norm": 1.5605369806289673, "learning_rate": 5.5344283518084e-06, "loss": 0.0636, "step": 13680 }, { "epoch": 0.11076947973136986, "grad_norm": 1.5045701265335083, "learning_rate": 5.538473986568494e-06, "loss": 0.0599, "step": 13690 }, { "epoch": 0.11085039242657173, "grad_norm": 0.8375858664512634, "learning_rate": 5.542519621328587e-06, "loss": 0.0561, "step": 13700 }, { "epoch": 0.11093130512177361, "grad_norm": 1.6543395519256592, "learning_rate": 5.546565256088681e-06, "loss": 0.0637, "step": 13710 }, { "epoch": 0.11101221781697548, "grad_norm": 1.068491816520691, "learning_rate": 5.550610890848775e-06, "loss": 0.0805, "step": 13720 }, { "epoch": 0.11109313051217735, "grad_norm": 1.6838572025299072, "learning_rate": 5.554656525608868e-06, "loss": 0.0734, "step": 13730 }, { "epoch": 0.11117404320737924, "grad_norm": 1.4104807376861572, "learning_rate": 5.5587021603689625e-06, "loss": 0.0575, "step": 13740 }, { "epoch": 0.11125495590258111, "grad_norm": 1.1671591997146606, "learning_rate": 5.5627477951290564e-06, "loss": 0.0538, "step": 13750 }, { "epoch": 0.111335868597783, "grad_norm": 0.9848739504814148, "learning_rate": 5.56679342988915e-06, "loss": 0.0743, "step": 13760 }, { "epoch": 0.11141678129298486, "grad_norm": 1.324408769607544, "learning_rate": 5.5708390646492435e-06, "loss": 0.0453, "step": 13770 }, { "epoch": 0.11149769398818675, "grad_norm": 1.4400995969772339, "learning_rate": 5.574884699409337e-06, "loss": 0.0802, "step": 13780 }, { "epoch": 0.11157860668338862, "grad_norm": 0.8186243176460266, "learning_rate": 5.578930334169431e-06, "loss": 0.0548, "step": 13790 }, { "epoch": 0.1116595193785905, "grad_norm": 1.2345470190048218, "learning_rate": 5.582975968929526e-06, "loss": 0.0567, "step": 13800 }, { "epoch": 0.11174043207379238, "grad_norm": 0.8888283967971802, "learning_rate": 5.587021603689619e-06, "loss": 0.0746, "step": 13810 }, { "epoch": 0.11182134476899426, "grad_norm": 0.6178440451622009, "learning_rate": 5.591067238449713e-06, "loss": 0.0448, "step": 13820 }, { "epoch": 0.11190225746419613, "grad_norm": 1.0793460607528687, "learning_rate": 5.595112873209807e-06, "loss": 0.054, "step": 13830 }, { "epoch": 0.11198317015939802, "grad_norm": 1.4325538873672485, "learning_rate": 5.599158507969901e-06, "loss": 0.0551, "step": 13840 }, { "epoch": 0.11206408285459989, "grad_norm": 0.9874424338340759, "learning_rate": 5.603204142729994e-06, "loss": 0.075, "step": 13850 }, { "epoch": 0.11214499554980176, "grad_norm": 0.8082350492477417, "learning_rate": 5.607249777490089e-06, "loss": 0.0691, "step": 13860 }, { "epoch": 0.11222590824500364, "grad_norm": 1.0663845539093018, "learning_rate": 5.611295412250183e-06, "loss": 0.0412, "step": 13870 }, { "epoch": 0.11230682094020551, "grad_norm": 0.5208383202552795, "learning_rate": 5.615341047010277e-06, "loss": 0.0652, "step": 13880 }, { "epoch": 0.1123877336354074, "grad_norm": 0.6453835368156433, "learning_rate": 5.61938668177037e-06, "loss": 0.044, "step": 13890 }, { "epoch": 0.11246864633060927, "grad_norm": 1.4331735372543335, "learning_rate": 5.623432316530464e-06, "loss": 0.0502, "step": 13900 }, { "epoch": 0.11254955902581115, "grad_norm": 0.677666187286377, "learning_rate": 5.627477951290558e-06, "loss": 0.0562, "step": 13910 }, { "epoch": 0.11263047172101302, "grad_norm": 0.9384909868240356, "learning_rate": 5.6315235860506526e-06, "loss": 0.0597, "step": 13920 }, { "epoch": 0.11271138441621491, "grad_norm": 1.8916223049163818, "learning_rate": 5.635569220810746e-06, "loss": 0.0396, "step": 13930 }, { "epoch": 0.11279229711141678, "grad_norm": 1.092901349067688, "learning_rate": 5.63961485557084e-06, "loss": 0.0388, "step": 13940 }, { "epoch": 0.11287320980661866, "grad_norm": 1.457837462425232, "learning_rate": 5.6436604903309335e-06, "loss": 0.0583, "step": 13950 }, { "epoch": 0.11295412250182053, "grad_norm": 0.9042636752128601, "learning_rate": 5.647706125091027e-06, "loss": 0.0749, "step": 13960 }, { "epoch": 0.11303503519702242, "grad_norm": 0.9823919534683228, "learning_rate": 5.651751759851121e-06, "loss": 0.0598, "step": 13970 }, { "epoch": 0.11311594789222429, "grad_norm": 1.1733964681625366, "learning_rate": 5.655797394611215e-06, "loss": 0.0677, "step": 13980 }, { "epoch": 0.11319686058742617, "grad_norm": 1.926571011543274, "learning_rate": 5.659843029371309e-06, "loss": 0.0603, "step": 13990 }, { "epoch": 0.11327777328262804, "grad_norm": 0.8796321153640747, "learning_rate": 5.663888664131402e-06, "loss": 0.0406, "step": 14000 }, { "epoch": 0.11335868597782991, "grad_norm": 0.9397196769714355, "learning_rate": 5.667934298891496e-06, "loss": 0.0543, "step": 14010 }, { "epoch": 0.1134395986730318, "grad_norm": 0.9261781573295593, "learning_rate": 5.67197993365159e-06, "loss": 0.0699, "step": 14020 }, { "epoch": 0.11352051136823367, "grad_norm": 0.9568076729774475, "learning_rate": 5.676025568411684e-06, "loss": 0.0849, "step": 14030 }, { "epoch": 0.11360142406343555, "grad_norm": 1.2190546989440918, "learning_rate": 5.680071203171779e-06, "loss": 0.083, "step": 14040 }, { "epoch": 0.11368233675863743, "grad_norm": 1.644532322883606, "learning_rate": 5.684116837931872e-06, "loss": 0.0728, "step": 14050 }, { "epoch": 0.11376324945383931, "grad_norm": 0.8547542691230774, "learning_rate": 5.688162472691966e-06, "loss": 0.0738, "step": 14060 }, { "epoch": 0.11384416214904118, "grad_norm": 0.6396374702453613, "learning_rate": 5.69220810745206e-06, "loss": 0.0511, "step": 14070 }, { "epoch": 0.11392507484424307, "grad_norm": 0.8732357025146484, "learning_rate": 5.696253742212153e-06, "loss": 0.0474, "step": 14080 }, { "epoch": 0.11400598753944494, "grad_norm": 1.8734419345855713, "learning_rate": 5.700299376972247e-06, "loss": 0.0503, "step": 14090 }, { "epoch": 0.11408690023464682, "grad_norm": 0.7715119123458862, "learning_rate": 5.704345011732342e-06, "loss": 0.0377, "step": 14100 }, { "epoch": 0.11416781292984869, "grad_norm": 0.7901647090911865, "learning_rate": 5.708390646492436e-06, "loss": 0.0705, "step": 14110 }, { "epoch": 0.11424872562505058, "grad_norm": 0.8790513277053833, "learning_rate": 5.712436281252529e-06, "loss": 0.063, "step": 14120 }, { "epoch": 0.11432963832025245, "grad_norm": 1.180189847946167, "learning_rate": 5.716481916012623e-06, "loss": 0.0562, "step": 14130 }, { "epoch": 0.11441055101545433, "grad_norm": 1.0493568181991577, "learning_rate": 5.720527550772717e-06, "loss": 0.051, "step": 14140 }, { "epoch": 0.1144914637106562, "grad_norm": 0.5986998677253723, "learning_rate": 5.72457318553281e-06, "loss": 0.0525, "step": 14150 }, { "epoch": 0.11457237640585807, "grad_norm": 1.072369933128357, "learning_rate": 5.728618820292905e-06, "loss": 0.0897, "step": 14160 }, { "epoch": 0.11465328910105996, "grad_norm": 1.0943361520767212, "learning_rate": 5.7326644550529985e-06, "loss": 0.0462, "step": 14170 }, { "epoch": 0.11473420179626183, "grad_norm": 0.3198572099208832, "learning_rate": 5.7367100898130925e-06, "loss": 0.056, "step": 14180 }, { "epoch": 0.11481511449146371, "grad_norm": 1.4641352891921997, "learning_rate": 5.7407557245731856e-06, "loss": 0.0611, "step": 14190 }, { "epoch": 0.11489602718666558, "grad_norm": 1.2658426761627197, "learning_rate": 5.7448013593332795e-06, "loss": 0.0612, "step": 14200 }, { "epoch": 0.11497693988186747, "grad_norm": 1.419825792312622, "learning_rate": 5.7488469940933734e-06, "loss": 0.0673, "step": 14210 }, { "epoch": 0.11505785257706934, "grad_norm": 0.7249202132225037, "learning_rate": 5.752892628853468e-06, "loss": 0.0725, "step": 14220 }, { "epoch": 0.11513876527227122, "grad_norm": 1.0421576499938965, "learning_rate": 5.756938263613562e-06, "loss": 0.0613, "step": 14230 }, { "epoch": 0.1152196779674731, "grad_norm": 0.9013673663139343, "learning_rate": 5.760983898373655e-06, "loss": 0.0758, "step": 14240 }, { "epoch": 0.11530059066267498, "grad_norm": 1.1650018692016602, "learning_rate": 5.765029533133749e-06, "loss": 0.0625, "step": 14250 }, { "epoch": 0.11538150335787685, "grad_norm": 1.3621090650558472, "learning_rate": 5.769075167893843e-06, "loss": 0.0629, "step": 14260 }, { "epoch": 0.11546241605307873, "grad_norm": 0.6500400304794312, "learning_rate": 5.773120802653936e-06, "loss": 0.0615, "step": 14270 }, { "epoch": 0.1155433287482806, "grad_norm": 0.7127833366394043, "learning_rate": 5.777166437414031e-06, "loss": 0.0759, "step": 14280 }, { "epoch": 0.11562424144348248, "grad_norm": 0.7809874415397644, "learning_rate": 5.781212072174125e-06, "loss": 0.0421, "step": 14290 }, { "epoch": 0.11570515413868436, "grad_norm": 0.9388716220855713, "learning_rate": 5.785257706934219e-06, "loss": 0.0719, "step": 14300 }, { "epoch": 0.11578606683388623, "grad_norm": 0.8442487716674805, "learning_rate": 5.789303341694312e-06, "loss": 0.0604, "step": 14310 }, { "epoch": 0.11586697952908812, "grad_norm": 0.8376803398132324, "learning_rate": 5.793348976454406e-06, "loss": 0.0481, "step": 14320 }, { "epoch": 0.11594789222428999, "grad_norm": 1.5362825393676758, "learning_rate": 5.7973946112145e-06, "loss": 0.0482, "step": 14330 }, { "epoch": 0.11602880491949187, "grad_norm": 0.7965448498725891, "learning_rate": 5.801440245974595e-06, "loss": 0.0472, "step": 14340 }, { "epoch": 0.11610971761469374, "grad_norm": 0.8083991408348083, "learning_rate": 5.805485880734688e-06, "loss": 0.042, "step": 14350 }, { "epoch": 0.11619063030989563, "grad_norm": 1.3327617645263672, "learning_rate": 5.809531515494782e-06, "loss": 0.0603, "step": 14360 }, { "epoch": 0.1162715430050975, "grad_norm": 1.2966313362121582, "learning_rate": 5.813577150254876e-06, "loss": 0.0549, "step": 14370 }, { "epoch": 0.11635245570029938, "grad_norm": 1.0710428953170776, "learning_rate": 5.817622785014969e-06, "loss": 0.0466, "step": 14380 }, { "epoch": 0.11643336839550125, "grad_norm": 1.30098557472229, "learning_rate": 5.821668419775063e-06, "loss": 0.0661, "step": 14390 }, { "epoch": 0.11651428109070314, "grad_norm": 0.758461058139801, "learning_rate": 5.8257140545351575e-06, "loss": 0.0599, "step": 14400 }, { "epoch": 0.11659519378590501, "grad_norm": 1.1683670282363892, "learning_rate": 5.829759689295251e-06, "loss": 0.0592, "step": 14410 }, { "epoch": 0.11667610648110689, "grad_norm": 1.2680234909057617, "learning_rate": 5.833805324055345e-06, "loss": 0.0623, "step": 14420 }, { "epoch": 0.11675701917630876, "grad_norm": 1.1644158363342285, "learning_rate": 5.8378509588154384e-06, "loss": 0.0591, "step": 14430 }, { "epoch": 0.11683793187151063, "grad_norm": 1.8504767417907715, "learning_rate": 5.841896593575532e-06, "loss": 0.0563, "step": 14440 }, { "epoch": 0.11691884456671252, "grad_norm": 0.8593546152114868, "learning_rate": 5.845942228335626e-06, "loss": 0.0432, "step": 14450 }, { "epoch": 0.11699975726191439, "grad_norm": 1.1645773649215698, "learning_rate": 5.849987863095719e-06, "loss": 0.0541, "step": 14460 }, { "epoch": 0.11708066995711627, "grad_norm": 1.083162784576416, "learning_rate": 5.854033497855814e-06, "loss": 0.0574, "step": 14470 }, { "epoch": 0.11716158265231814, "grad_norm": 1.107238531112671, "learning_rate": 5.858079132615908e-06, "loss": 0.0401, "step": 14480 }, { "epoch": 0.11724249534752003, "grad_norm": 0.6436665654182434, "learning_rate": 5.862124767376002e-06, "loss": 0.0826, "step": 14490 }, { "epoch": 0.1173234080427219, "grad_norm": 0.9708002805709839, "learning_rate": 5.866170402136095e-06, "loss": 0.0498, "step": 14500 }, { "epoch": 0.11740432073792378, "grad_norm": 1.5270239114761353, "learning_rate": 5.870216036896189e-06, "loss": 0.0429, "step": 14510 }, { "epoch": 0.11748523343312565, "grad_norm": 1.054024577140808, "learning_rate": 5.874261671656283e-06, "loss": 0.0512, "step": 14520 }, { "epoch": 0.11756614612832754, "grad_norm": 0.7580834031105042, "learning_rate": 5.878307306416378e-06, "loss": 0.0557, "step": 14530 }, { "epoch": 0.11764705882352941, "grad_norm": 0.8079416751861572, "learning_rate": 5.882352941176471e-06, "loss": 0.0757, "step": 14540 }, { "epoch": 0.1177279715187313, "grad_norm": 0.9142869710922241, "learning_rate": 5.886398575936565e-06, "loss": 0.0569, "step": 14550 }, { "epoch": 0.11780888421393317, "grad_norm": 0.6707258224487305, "learning_rate": 5.890444210696659e-06, "loss": 0.0434, "step": 14560 }, { "epoch": 0.11788979690913505, "grad_norm": 0.6951971054077148, "learning_rate": 5.894489845456752e-06, "loss": 0.0597, "step": 14570 }, { "epoch": 0.11797070960433692, "grad_norm": 1.30574369430542, "learning_rate": 5.898535480216846e-06, "loss": 0.0869, "step": 14580 }, { "epoch": 0.11805162229953879, "grad_norm": 0.8446406722068787, "learning_rate": 5.902581114976941e-06, "loss": 0.0572, "step": 14590 }, { "epoch": 0.11813253499474068, "grad_norm": 1.0198171138763428, "learning_rate": 5.9066267497370346e-06, "loss": 0.067, "step": 14600 }, { "epoch": 0.11821344768994255, "grad_norm": 1.0341126918792725, "learning_rate": 5.9106723844971285e-06, "loss": 0.0732, "step": 14610 }, { "epoch": 0.11829436038514443, "grad_norm": 0.8205816745758057, "learning_rate": 5.914718019257222e-06, "loss": 0.0613, "step": 14620 }, { "epoch": 0.1183752730803463, "grad_norm": 0.6872541308403015, "learning_rate": 5.9187636540173155e-06, "loss": 0.0534, "step": 14630 }, { "epoch": 0.11845618577554819, "grad_norm": 1.581216812133789, "learning_rate": 5.9228092887774095e-06, "loss": 0.0639, "step": 14640 }, { "epoch": 0.11853709847075006, "grad_norm": 0.7258896827697754, "learning_rate": 5.926854923537504e-06, "loss": 0.0554, "step": 14650 }, { "epoch": 0.11861801116595194, "grad_norm": 0.9078906774520874, "learning_rate": 5.930900558297597e-06, "loss": 0.0493, "step": 14660 }, { "epoch": 0.11869892386115381, "grad_norm": 0.8923003077507019, "learning_rate": 5.934946193057691e-06, "loss": 0.0652, "step": 14670 }, { "epoch": 0.1187798365563557, "grad_norm": 1.179883599281311, "learning_rate": 5.938991827817785e-06, "loss": 0.0608, "step": 14680 }, { "epoch": 0.11886074925155757, "grad_norm": 1.5530070066452026, "learning_rate": 5.943037462577878e-06, "loss": 0.0599, "step": 14690 }, { "epoch": 0.11894166194675945, "grad_norm": 1.3868718147277832, "learning_rate": 5.947083097337972e-06, "loss": 0.0752, "step": 14700 }, { "epoch": 0.11902257464196132, "grad_norm": 1.2615760564804077, "learning_rate": 5.951128732098067e-06, "loss": 0.0616, "step": 14710 }, { "epoch": 0.1191034873371632, "grad_norm": 0.8829371929168701, "learning_rate": 5.955174366858161e-06, "loss": 0.0493, "step": 14720 }, { "epoch": 0.11918440003236508, "grad_norm": 1.1986807584762573, "learning_rate": 5.959220001618254e-06, "loss": 0.0549, "step": 14730 }, { "epoch": 0.11926531272756695, "grad_norm": 1.0919300317764282, "learning_rate": 5.963265636378348e-06, "loss": 0.063, "step": 14740 }, { "epoch": 0.11934622542276883, "grad_norm": 0.6693935990333557, "learning_rate": 5.967311271138442e-06, "loss": 0.0661, "step": 14750 }, { "epoch": 0.1194271381179707, "grad_norm": 1.8922066688537598, "learning_rate": 5.971356905898535e-06, "loss": 0.0569, "step": 14760 }, { "epoch": 0.11950805081317259, "grad_norm": 1.7437118291854858, "learning_rate": 5.97540254065863e-06, "loss": 0.0578, "step": 14770 }, { "epoch": 0.11958896350837446, "grad_norm": 1.2612338066101074, "learning_rate": 5.979448175418724e-06, "loss": 0.0427, "step": 14780 }, { "epoch": 0.11966987620357634, "grad_norm": 1.2202563285827637, "learning_rate": 5.983493810178818e-06, "loss": 0.0973, "step": 14790 }, { "epoch": 0.11975078889877822, "grad_norm": 0.7425749897956848, "learning_rate": 5.987539444938912e-06, "loss": 0.0674, "step": 14800 }, { "epoch": 0.1198317015939801, "grad_norm": 1.329538106918335, "learning_rate": 5.991585079699005e-06, "loss": 0.0508, "step": 14810 }, { "epoch": 0.11991261428918197, "grad_norm": 0.7232909202575684, "learning_rate": 5.995630714459099e-06, "loss": 0.0448, "step": 14820 }, { "epoch": 0.11999352698438386, "grad_norm": 0.7270302176475525, "learning_rate": 5.9996763492191935e-06, "loss": 0.0525, "step": 14830 }, { "epoch": 0.12007443967958573, "grad_norm": 1.199450135231018, "learning_rate": 6.0037219839792874e-06, "loss": 0.0534, "step": 14840 }, { "epoch": 0.12015535237478761, "grad_norm": 1.0578022003173828, "learning_rate": 6.0077676187393805e-06, "loss": 0.0527, "step": 14850 }, { "epoch": 0.12023626506998948, "grad_norm": 0.6200411319732666, "learning_rate": 6.0118132534994745e-06, "loss": 0.059, "step": 14860 }, { "epoch": 0.12031717776519135, "grad_norm": 0.8118218183517456, "learning_rate": 6.015858888259568e-06, "loss": 0.0372, "step": 14870 }, { "epoch": 0.12039809046039324, "grad_norm": 1.1720918416976929, "learning_rate": 6.0199045230196615e-06, "loss": 0.0809, "step": 14880 }, { "epoch": 0.12047900315559511, "grad_norm": 1.2725861072540283, "learning_rate": 6.023950157779756e-06, "loss": 0.0591, "step": 14890 }, { "epoch": 0.12055991585079699, "grad_norm": 0.8642858862876892, "learning_rate": 6.02799579253985e-06, "loss": 0.0542, "step": 14900 }, { "epoch": 0.12064082854599886, "grad_norm": 0.6287131905555725, "learning_rate": 6.032041427299944e-06, "loss": 0.0511, "step": 14910 }, { "epoch": 0.12072174124120075, "grad_norm": 1.262939453125, "learning_rate": 6.036087062060037e-06, "loss": 0.0509, "step": 14920 }, { "epoch": 0.12080265393640262, "grad_norm": 0.9784843325614929, "learning_rate": 6.040132696820131e-06, "loss": 0.0498, "step": 14930 }, { "epoch": 0.1208835666316045, "grad_norm": 0.8720395565032959, "learning_rate": 6.044178331580225e-06, "loss": 0.0781, "step": 14940 }, { "epoch": 0.12096447932680637, "grad_norm": 0.7985841035842896, "learning_rate": 6.04822396634032e-06, "loss": 0.0654, "step": 14950 }, { "epoch": 0.12104539202200826, "grad_norm": 0.8112380504608154, "learning_rate": 6.052269601100413e-06, "loss": 0.0719, "step": 14960 }, { "epoch": 0.12112630471721013, "grad_norm": 0.9378396272659302, "learning_rate": 6.056315235860507e-06, "loss": 0.0676, "step": 14970 }, { "epoch": 0.12120721741241201, "grad_norm": 1.2723857164382935, "learning_rate": 6.060360870620601e-06, "loss": 0.0533, "step": 14980 }, { "epoch": 0.12128813010761388, "grad_norm": 1.826432466506958, "learning_rate": 6.064406505380695e-06, "loss": 0.07, "step": 14990 }, { "epoch": 0.12136904280281577, "grad_norm": 1.1633827686309814, "learning_rate": 6.068452140140788e-06, "loss": 0.0667, "step": 15000 }, { "epoch": 0.12144995549801764, "grad_norm": 1.0640571117401123, "learning_rate": 6.072497774900883e-06, "loss": 0.0411, "step": 15010 }, { "epoch": 0.12153086819321951, "grad_norm": 1.5066193342208862, "learning_rate": 6.076543409660977e-06, "loss": 0.0598, "step": 15020 }, { "epoch": 0.1216117808884214, "grad_norm": 0.6485815048217773, "learning_rate": 6.080589044421071e-06, "loss": 0.0433, "step": 15030 }, { "epoch": 0.12169269358362327, "grad_norm": 0.8281228542327881, "learning_rate": 6.084634679181164e-06, "loss": 0.0556, "step": 15040 }, { "epoch": 0.12177360627882515, "grad_norm": 1.5656414031982422, "learning_rate": 6.088680313941258e-06, "loss": 0.057, "step": 15050 }, { "epoch": 0.12185451897402702, "grad_norm": 1.2055895328521729, "learning_rate": 6.0927259487013516e-06, "loss": 0.0581, "step": 15060 }, { "epoch": 0.1219354316692289, "grad_norm": 0.9929128289222717, "learning_rate": 6.096771583461446e-06, "loss": 0.052, "step": 15070 }, { "epoch": 0.12201634436443078, "grad_norm": 1.0730109214782715, "learning_rate": 6.1008172182215394e-06, "loss": 0.0584, "step": 15080 }, { "epoch": 0.12209725705963266, "grad_norm": 1.0592422485351562, "learning_rate": 6.104862852981633e-06, "loss": 0.0828, "step": 15090 }, { "epoch": 0.12217816975483453, "grad_norm": 0.5716356635093689, "learning_rate": 6.108908487741727e-06, "loss": 0.0523, "step": 15100 }, { "epoch": 0.12225908245003642, "grad_norm": 0.8643407225608826, "learning_rate": 6.11295412250182e-06, "loss": 0.0625, "step": 15110 }, { "epoch": 0.12233999514523829, "grad_norm": 1.16661536693573, "learning_rate": 6.116999757261914e-06, "loss": 0.0639, "step": 15120 }, { "epoch": 0.12242090784044017, "grad_norm": 0.8004400730133057, "learning_rate": 6.121045392022009e-06, "loss": 0.0578, "step": 15130 }, { "epoch": 0.12250182053564204, "grad_norm": 1.196425199508667, "learning_rate": 6.125091026782103e-06, "loss": 0.0583, "step": 15140 }, { "epoch": 0.12258273323084391, "grad_norm": 0.7544881701469421, "learning_rate": 6.129136661542196e-06, "loss": 0.0427, "step": 15150 }, { "epoch": 0.1226636459260458, "grad_norm": 0.6743447780609131, "learning_rate": 6.13318229630229e-06, "loss": 0.0588, "step": 15160 }, { "epoch": 0.12274455862124767, "grad_norm": 0.5252828001976013, "learning_rate": 6.137227931062384e-06, "loss": 0.0609, "step": 15170 }, { "epoch": 0.12282547131644955, "grad_norm": 0.8900952935218811, "learning_rate": 6.141273565822478e-06, "loss": 0.0476, "step": 15180 }, { "epoch": 0.12290638401165142, "grad_norm": 1.2575265169143677, "learning_rate": 6.145319200582573e-06, "loss": 0.0559, "step": 15190 }, { "epoch": 0.12298729670685331, "grad_norm": 1.0012643337249756, "learning_rate": 6.149364835342666e-06, "loss": 0.0593, "step": 15200 }, { "epoch": 0.12306820940205518, "grad_norm": 1.12649405002594, "learning_rate": 6.15341047010276e-06, "loss": 0.0429, "step": 15210 }, { "epoch": 0.12314912209725706, "grad_norm": 2.0429847240448, "learning_rate": 6.157456104862854e-06, "loss": 0.0547, "step": 15220 }, { "epoch": 0.12323003479245893, "grad_norm": 0.9709304571151733, "learning_rate": 6.161501739622947e-06, "loss": 0.0717, "step": 15230 }, { "epoch": 0.12331094748766082, "grad_norm": 1.1906518936157227, "learning_rate": 6.165547374383041e-06, "loss": 0.0596, "step": 15240 }, { "epoch": 0.12339186018286269, "grad_norm": 0.7716512084007263, "learning_rate": 6.1695930091431356e-06, "loss": 0.0524, "step": 15250 }, { "epoch": 0.12347277287806457, "grad_norm": 1.2839066982269287, "learning_rate": 6.1736386439032295e-06, "loss": 0.0559, "step": 15260 }, { "epoch": 0.12355368557326644, "grad_norm": 1.7039631605148315, "learning_rate": 6.177684278663323e-06, "loss": 0.0482, "step": 15270 }, { "epoch": 0.12363459826846833, "grad_norm": 1.1514054536819458, "learning_rate": 6.1817299134234165e-06, "loss": 0.0498, "step": 15280 }, { "epoch": 0.1237155109636702, "grad_norm": 1.2531613111495972, "learning_rate": 6.1857755481835105e-06, "loss": 0.0761, "step": 15290 }, { "epoch": 0.12379642365887207, "grad_norm": 1.238742709159851, "learning_rate": 6.189821182943604e-06, "loss": 0.0705, "step": 15300 }, { "epoch": 0.12387733635407396, "grad_norm": 1.4731452465057373, "learning_rate": 6.193866817703698e-06, "loss": 0.0425, "step": 15310 }, { "epoch": 0.12395824904927583, "grad_norm": 0.7915396094322205, "learning_rate": 6.197912452463792e-06, "loss": 0.0434, "step": 15320 }, { "epoch": 0.12403916174447771, "grad_norm": 1.4193353652954102, "learning_rate": 6.201958087223886e-06, "loss": 0.0853, "step": 15330 }, { "epoch": 0.12412007443967958, "grad_norm": 0.8721662759780884, "learning_rate": 6.206003721983979e-06, "loss": 0.0754, "step": 15340 }, { "epoch": 0.12420098713488147, "grad_norm": 0.7300727367401123, "learning_rate": 6.210049356744073e-06, "loss": 0.0508, "step": 15350 }, { "epoch": 0.12428189983008334, "grad_norm": 0.7937477827072144, "learning_rate": 6.214094991504167e-06, "loss": 0.0522, "step": 15360 }, { "epoch": 0.12436281252528522, "grad_norm": 0.9795616269111633, "learning_rate": 6.218140626264262e-06, "loss": 0.0621, "step": 15370 }, { "epoch": 0.12444372522048709, "grad_norm": 1.624343991279602, "learning_rate": 6.222186261024356e-06, "loss": 0.0701, "step": 15380 }, { "epoch": 0.12452463791568898, "grad_norm": 0.9346029162406921, "learning_rate": 6.226231895784449e-06, "loss": 0.0363, "step": 15390 }, { "epoch": 0.12460555061089085, "grad_norm": 1.3519254922866821, "learning_rate": 6.230277530544543e-06, "loss": 0.0448, "step": 15400 }, { "epoch": 0.12468646330609273, "grad_norm": 0.7472984194755554, "learning_rate": 6.234323165304637e-06, "loss": 0.0589, "step": 15410 }, { "epoch": 0.1247673760012946, "grad_norm": 1.4025825262069702, "learning_rate": 6.23836880006473e-06, "loss": 0.0619, "step": 15420 }, { "epoch": 0.12484828869649649, "grad_norm": 0.6811575293540955, "learning_rate": 6.242414434824825e-06, "loss": 0.0491, "step": 15430 }, { "epoch": 0.12492920139169836, "grad_norm": 0.9807518720626831, "learning_rate": 6.246460069584919e-06, "loss": 0.0759, "step": 15440 }, { "epoch": 0.12501011408690024, "grad_norm": 0.7225323915481567, "learning_rate": 6.250505704345013e-06, "loss": 0.0536, "step": 15450 }, { "epoch": 0.1250910267821021, "grad_norm": 1.4015865325927734, "learning_rate": 6.254551339105106e-06, "loss": 0.0768, "step": 15460 }, { "epoch": 0.12517193947730398, "grad_norm": 1.1886258125305176, "learning_rate": 6.2585969738652e-06, "loss": 0.0792, "step": 15470 }, { "epoch": 0.12525285217250587, "grad_norm": 0.7836548686027527, "learning_rate": 6.262642608625294e-06, "loss": 0.0444, "step": 15480 }, { "epoch": 0.12533376486770775, "grad_norm": 1.4361342191696167, "learning_rate": 6.2666882433853884e-06, "loss": 0.059, "step": 15490 }, { "epoch": 0.1254146775629096, "grad_norm": 0.8043684959411621, "learning_rate": 6.2707338781454815e-06, "loss": 0.0798, "step": 15500 }, { "epoch": 0.1254955902581115, "grad_norm": 1.0666940212249756, "learning_rate": 6.2747795129055755e-06, "loss": 0.0468, "step": 15510 }, { "epoch": 0.12557650295331338, "grad_norm": 1.173579454421997, "learning_rate": 6.278825147665669e-06, "loss": 0.0727, "step": 15520 }, { "epoch": 0.12565741564851526, "grad_norm": 1.0083199739456177, "learning_rate": 6.2828707824257625e-06, "loss": 0.0672, "step": 15530 }, { "epoch": 0.12573832834371712, "grad_norm": 1.2173486948013306, "learning_rate": 6.2869164171858564e-06, "loss": 0.0762, "step": 15540 }, { "epoch": 0.125819241038919, "grad_norm": 0.8074051737785339, "learning_rate": 6.290962051945951e-06, "loss": 0.0631, "step": 15550 }, { "epoch": 0.1259001537341209, "grad_norm": 1.0956586599349976, "learning_rate": 6.295007686706045e-06, "loss": 0.0577, "step": 15560 }, { "epoch": 0.12598106642932277, "grad_norm": 1.1747660636901855, "learning_rate": 6.299053321466138e-06, "loss": 0.0486, "step": 15570 }, { "epoch": 0.12606197912452463, "grad_norm": 0.6617406010627747, "learning_rate": 6.303098956226232e-06, "loss": 0.0473, "step": 15580 }, { "epoch": 0.12614289181972652, "grad_norm": 0.897654116153717, "learning_rate": 6.307144590986326e-06, "loss": 0.0644, "step": 15590 }, { "epoch": 0.1262238045149284, "grad_norm": 0.8206512331962585, "learning_rate": 6.31119022574642e-06, "loss": 0.052, "step": 15600 }, { "epoch": 0.12630471721013026, "grad_norm": 1.4060546159744263, "learning_rate": 6.315235860506515e-06, "loss": 0.0757, "step": 15610 }, { "epoch": 0.12638562990533214, "grad_norm": 0.5852802395820618, "learning_rate": 6.319281495266608e-06, "loss": 0.0607, "step": 15620 }, { "epoch": 0.12646654260053403, "grad_norm": 0.8008260726928711, "learning_rate": 6.323327130026702e-06, "loss": 0.0527, "step": 15630 }, { "epoch": 0.1265474552957359, "grad_norm": 0.8124776482582092, "learning_rate": 6.327372764786796e-06, "loss": 0.0615, "step": 15640 }, { "epoch": 0.12662836799093777, "grad_norm": 1.4875860214233398, "learning_rate": 6.331418399546889e-06, "loss": 0.0521, "step": 15650 }, { "epoch": 0.12670928068613965, "grad_norm": 0.25122106075286865, "learning_rate": 6.335464034306983e-06, "loss": 0.054, "step": 15660 }, { "epoch": 0.12679019338134154, "grad_norm": 1.188482642173767, "learning_rate": 6.339509669067078e-06, "loss": 0.0803, "step": 15670 }, { "epoch": 0.12687110607654342, "grad_norm": 0.695786714553833, "learning_rate": 6.343555303827172e-06, "loss": 0.058, "step": 15680 }, { "epoch": 0.12695201877174528, "grad_norm": 0.9078125953674316, "learning_rate": 6.347600938587265e-06, "loss": 0.0584, "step": 15690 }, { "epoch": 0.12703293146694716, "grad_norm": 0.8410818576812744, "learning_rate": 6.351646573347359e-06, "loss": 0.05, "step": 15700 }, { "epoch": 0.12711384416214905, "grad_norm": 1.2156504392623901, "learning_rate": 6.3556922081074526e-06, "loss": 0.0654, "step": 15710 }, { "epoch": 0.1271947568573509, "grad_norm": 0.8975445032119751, "learning_rate": 6.359737842867546e-06, "loss": 0.0755, "step": 15720 }, { "epoch": 0.1272756695525528, "grad_norm": 1.3132548332214355, "learning_rate": 6.3637834776276405e-06, "loss": 0.0726, "step": 15730 }, { "epoch": 0.12735658224775467, "grad_norm": 0.8763004541397095, "learning_rate": 6.367829112387734e-06, "loss": 0.0459, "step": 15740 }, { "epoch": 0.12743749494295656, "grad_norm": 2.5096707344055176, "learning_rate": 6.371874747147828e-06, "loss": 0.0421, "step": 15750 }, { "epoch": 0.12751840763815842, "grad_norm": 1.3912246227264404, "learning_rate": 6.3759203819079214e-06, "loss": 0.0579, "step": 15760 }, { "epoch": 0.1275993203333603, "grad_norm": 0.7889121770858765, "learning_rate": 6.379966016668015e-06, "loss": 0.0564, "step": 15770 }, { "epoch": 0.12768023302856218, "grad_norm": 0.9911024570465088, "learning_rate": 6.384011651428109e-06, "loss": 0.0547, "step": 15780 }, { "epoch": 0.12776114572376407, "grad_norm": 0.7792393565177917, "learning_rate": 6.388057286188204e-06, "loss": 0.0601, "step": 15790 }, { "epoch": 0.12784205841896593, "grad_norm": 0.8453497290611267, "learning_rate": 6.392102920948298e-06, "loss": 0.0793, "step": 15800 }, { "epoch": 0.1279229711141678, "grad_norm": 0.3472346067428589, "learning_rate": 6.396148555708391e-06, "loss": 0.0389, "step": 15810 }, { "epoch": 0.1280038838093697, "grad_norm": 0.8494481444358826, "learning_rate": 6.400194190468485e-06, "loss": 0.0646, "step": 15820 }, { "epoch": 0.12808479650457158, "grad_norm": 0.872310221195221, "learning_rate": 6.404239825228579e-06, "loss": 0.0675, "step": 15830 }, { "epoch": 0.12816570919977344, "grad_norm": 1.7311848402023315, "learning_rate": 6.408285459988672e-06, "loss": 0.0724, "step": 15840 }, { "epoch": 0.12824662189497532, "grad_norm": 1.395011067390442, "learning_rate": 6.412331094748767e-06, "loss": 0.0529, "step": 15850 }, { "epoch": 0.1283275345901772, "grad_norm": 1.2498925924301147, "learning_rate": 6.416376729508861e-06, "loss": 0.0465, "step": 15860 }, { "epoch": 0.12840844728537906, "grad_norm": 0.8693771362304688, "learning_rate": 6.420422364268955e-06, "loss": 0.0655, "step": 15870 }, { "epoch": 0.12848935998058095, "grad_norm": 1.5157991647720337, "learning_rate": 6.424467999029048e-06, "loss": 0.0776, "step": 15880 }, { "epoch": 0.12857027267578283, "grad_norm": 1.004002571105957, "learning_rate": 6.428513633789142e-06, "loss": 0.0607, "step": 15890 }, { "epoch": 0.12865118537098472, "grad_norm": 0.6027103066444397, "learning_rate": 6.432559268549236e-06, "loss": 0.0795, "step": 15900 }, { "epoch": 0.12873209806618657, "grad_norm": 0.7288621068000793, "learning_rate": 6.4366049033093305e-06, "loss": 0.0537, "step": 15910 }, { "epoch": 0.12881301076138846, "grad_norm": 0.6703323125839233, "learning_rate": 6.440650538069424e-06, "loss": 0.048, "step": 15920 }, { "epoch": 0.12889392345659034, "grad_norm": 1.0045946836471558, "learning_rate": 6.4446961728295176e-06, "loss": 0.0439, "step": 15930 }, { "epoch": 0.12897483615179223, "grad_norm": 0.8947062492370605, "learning_rate": 6.4487418075896115e-06, "loss": 0.0617, "step": 15940 }, { "epoch": 0.12905574884699408, "grad_norm": 0.5109455585479736, "learning_rate": 6.452787442349705e-06, "loss": 0.0505, "step": 15950 }, { "epoch": 0.12913666154219597, "grad_norm": 1.2855873107910156, "learning_rate": 6.4568330771097985e-06, "loss": 0.048, "step": 15960 }, { "epoch": 0.12921757423739785, "grad_norm": 1.7557374238967896, "learning_rate": 6.460878711869893e-06, "loss": 0.0768, "step": 15970 }, { "epoch": 0.12929848693259974, "grad_norm": 1.220337152481079, "learning_rate": 6.464924346629987e-06, "loss": 0.0613, "step": 15980 }, { "epoch": 0.1293793996278016, "grad_norm": 0.7160137295722961, "learning_rate": 6.468969981390081e-06, "loss": 0.0666, "step": 15990 }, { "epoch": 0.12946031232300348, "grad_norm": 0.781281590461731, "learning_rate": 6.473015616150174e-06, "loss": 0.0646, "step": 16000 }, { "epoch": 0.12954122501820536, "grad_norm": 0.3218052089214325, "learning_rate": 6.477061250910268e-06, "loss": 0.0375, "step": 16010 }, { "epoch": 0.12962213771340722, "grad_norm": 0.7854814529418945, "learning_rate": 6.481106885670362e-06, "loss": 0.0353, "step": 16020 }, { "epoch": 0.1297030504086091, "grad_norm": 0.6190142631530762, "learning_rate": 6.485152520430457e-06, "loss": 0.055, "step": 16030 }, { "epoch": 0.129783963103811, "grad_norm": 0.9133961796760559, "learning_rate": 6.48919815519055e-06, "loss": 0.0538, "step": 16040 }, { "epoch": 0.12986487579901287, "grad_norm": 1.140571117401123, "learning_rate": 6.493243789950644e-06, "loss": 0.0624, "step": 16050 }, { "epoch": 0.12994578849421473, "grad_norm": 0.9927918910980225, "learning_rate": 6.497289424710738e-06, "loss": 0.0564, "step": 16060 }, { "epoch": 0.13002670118941662, "grad_norm": 0.8473881483078003, "learning_rate": 6.501335059470831e-06, "loss": 0.0673, "step": 16070 }, { "epoch": 0.1301076138846185, "grad_norm": 1.645367980003357, "learning_rate": 6.505380694230925e-06, "loss": 0.053, "step": 16080 }, { "epoch": 0.13018852657982039, "grad_norm": 1.0265132188796997, "learning_rate": 6.509426328991019e-06, "loss": 0.0616, "step": 16090 }, { "epoch": 0.13026943927502224, "grad_norm": 0.9995094537734985, "learning_rate": 6.513471963751114e-06, "loss": 0.0525, "step": 16100 }, { "epoch": 0.13035035197022413, "grad_norm": 1.3704053163528442, "learning_rate": 6.517517598511207e-06, "loss": 0.0713, "step": 16110 }, { "epoch": 0.130431264665426, "grad_norm": 0.7929096817970276, "learning_rate": 6.521563233271301e-06, "loss": 0.0401, "step": 16120 }, { "epoch": 0.1305121773606279, "grad_norm": 1.2193940877914429, "learning_rate": 6.525608868031395e-06, "loss": 0.0512, "step": 16130 }, { "epoch": 0.13059309005582975, "grad_norm": 1.076338291168213, "learning_rate": 6.529654502791488e-06, "loss": 0.0434, "step": 16140 }, { "epoch": 0.13067400275103164, "grad_norm": 1.717667579650879, "learning_rate": 6.533700137551582e-06, "loss": 0.0615, "step": 16150 }, { "epoch": 0.13075491544623352, "grad_norm": 1.0893346071243286, "learning_rate": 6.5377457723116765e-06, "loss": 0.0675, "step": 16160 }, { "epoch": 0.13083582814143538, "grad_norm": 0.48232129216194153, "learning_rate": 6.54179140707177e-06, "loss": 0.0556, "step": 16170 }, { "epoch": 0.13091674083663726, "grad_norm": 0.3293824791908264, "learning_rate": 6.545837041831864e-06, "loss": 0.0499, "step": 16180 }, { "epoch": 0.13099765353183915, "grad_norm": 1.071606159210205, "learning_rate": 6.5498826765919575e-06, "loss": 0.067, "step": 16190 }, { "epoch": 0.13107856622704103, "grad_norm": 0.9021790623664856, "learning_rate": 6.553928311352051e-06, "loss": 0.0688, "step": 16200 }, { "epoch": 0.1311594789222429, "grad_norm": 0.8827465772628784, "learning_rate": 6.557973946112145e-06, "loss": 0.0775, "step": 16210 }, { "epoch": 0.13124039161744477, "grad_norm": 0.5882213115692139, "learning_rate": 6.56201958087224e-06, "loss": 0.0497, "step": 16220 }, { "epoch": 0.13132130431264666, "grad_norm": 0.9995368123054504, "learning_rate": 6.566065215632333e-06, "loss": 0.0573, "step": 16230 }, { "epoch": 0.13140221700784854, "grad_norm": 0.7791953086853027, "learning_rate": 6.570110850392427e-06, "loss": 0.0599, "step": 16240 }, { "epoch": 0.1314831297030504, "grad_norm": 1.2242428064346313, "learning_rate": 6.574156485152521e-06, "loss": 0.0654, "step": 16250 }, { "epoch": 0.13156404239825228, "grad_norm": 1.0701653957366943, "learning_rate": 6.578202119912614e-06, "loss": 0.0614, "step": 16260 }, { "epoch": 0.13164495509345417, "grad_norm": 0.9139896631240845, "learning_rate": 6.582247754672708e-06, "loss": 0.0478, "step": 16270 }, { "epoch": 0.13172586778865605, "grad_norm": 0.7993553876876831, "learning_rate": 6.586293389432803e-06, "loss": 0.0461, "step": 16280 }, { "epoch": 0.1318067804838579, "grad_norm": 1.1121395826339722, "learning_rate": 6.590339024192897e-06, "loss": 0.0539, "step": 16290 }, { "epoch": 0.1318876931790598, "grad_norm": 1.162501573562622, "learning_rate": 6.59438465895299e-06, "loss": 0.0731, "step": 16300 }, { "epoch": 0.13196860587426168, "grad_norm": 0.8255689740180969, "learning_rate": 6.598430293713084e-06, "loss": 0.0341, "step": 16310 }, { "epoch": 0.13204951856946354, "grad_norm": 0.6887086033821106, "learning_rate": 6.602475928473178e-06, "loss": 0.0478, "step": 16320 }, { "epoch": 0.13213043126466542, "grad_norm": 0.8969037532806396, "learning_rate": 6.606521563233271e-06, "loss": 0.046, "step": 16330 }, { "epoch": 0.1322113439598673, "grad_norm": 1.6335035562515259, "learning_rate": 6.610567197993366e-06, "loss": 0.0831, "step": 16340 }, { "epoch": 0.1322922566550692, "grad_norm": 1.0037479400634766, "learning_rate": 6.61461283275346e-06, "loss": 0.0528, "step": 16350 }, { "epoch": 0.13237316935027105, "grad_norm": 0.822056233882904, "learning_rate": 6.618658467513554e-06, "loss": 0.0654, "step": 16360 }, { "epoch": 0.13245408204547293, "grad_norm": 1.248984694480896, "learning_rate": 6.6227041022736475e-06, "loss": 0.0629, "step": 16370 }, { "epoch": 0.13253499474067482, "grad_norm": 0.9390102624893188, "learning_rate": 6.626749737033741e-06, "loss": 0.0489, "step": 16380 }, { "epoch": 0.1326159074358767, "grad_norm": 1.0585161447525024, "learning_rate": 6.6307953717938346e-06, "loss": 0.0501, "step": 16390 }, { "epoch": 0.13269682013107856, "grad_norm": 1.4652882814407349, "learning_rate": 6.634841006553929e-06, "loss": 0.0546, "step": 16400 }, { "epoch": 0.13277773282628044, "grad_norm": 1.1216222047805786, "learning_rate": 6.638886641314023e-06, "loss": 0.0703, "step": 16410 }, { "epoch": 0.13285864552148233, "grad_norm": 1.0251096487045288, "learning_rate": 6.642932276074116e-06, "loss": 0.0464, "step": 16420 }, { "epoch": 0.1329395582166842, "grad_norm": 1.1637146472930908, "learning_rate": 6.64697791083421e-06, "loss": 0.0689, "step": 16430 }, { "epoch": 0.13302047091188607, "grad_norm": 0.6650951504707336, "learning_rate": 6.651023545594304e-06, "loss": 0.0667, "step": 16440 }, { "epoch": 0.13310138360708795, "grad_norm": 0.8399026989936829, "learning_rate": 6.655069180354397e-06, "loss": 0.0598, "step": 16450 }, { "epoch": 0.13318229630228984, "grad_norm": 0.1955503672361374, "learning_rate": 6.659114815114492e-06, "loss": 0.0348, "step": 16460 }, { "epoch": 0.1332632089974917, "grad_norm": 0.8807395100593567, "learning_rate": 6.663160449874586e-06, "loss": 0.0621, "step": 16470 }, { "epoch": 0.13334412169269358, "grad_norm": 0.9245742559432983, "learning_rate": 6.66720608463468e-06, "loss": 0.0681, "step": 16480 }, { "epoch": 0.13342503438789546, "grad_norm": 0.5323507785797119, "learning_rate": 6.671251719394773e-06, "loss": 0.051, "step": 16490 }, { "epoch": 0.13350594708309735, "grad_norm": 0.7762954235076904, "learning_rate": 6.675297354154867e-06, "loss": 0.055, "step": 16500 }, { "epoch": 0.1335868597782992, "grad_norm": 0.7338122129440308, "learning_rate": 6.679342988914961e-06, "loss": 0.0669, "step": 16510 }, { "epoch": 0.1336677724735011, "grad_norm": 0.856303870677948, "learning_rate": 6.683388623675056e-06, "loss": 0.0539, "step": 16520 }, { "epoch": 0.13374868516870297, "grad_norm": 1.0723323822021484, "learning_rate": 6.687434258435149e-06, "loss": 0.0844, "step": 16530 }, { "epoch": 0.13382959786390486, "grad_norm": 0.6653358340263367, "learning_rate": 6.691479893195243e-06, "loss": 0.0554, "step": 16540 }, { "epoch": 0.13391051055910672, "grad_norm": 0.2613326609134674, "learning_rate": 6.695525527955337e-06, "loss": 0.0492, "step": 16550 }, { "epoch": 0.1339914232543086, "grad_norm": 1.1502052545547485, "learning_rate": 6.699571162715431e-06, "loss": 0.0371, "step": 16560 }, { "epoch": 0.13407233594951048, "grad_norm": 0.8143825531005859, "learning_rate": 6.703616797475524e-06, "loss": 0.0695, "step": 16570 }, { "epoch": 0.13415324864471234, "grad_norm": 1.325374722480774, "learning_rate": 6.7076624322356186e-06, "loss": 0.0587, "step": 16580 }, { "epoch": 0.13423416133991423, "grad_norm": 0.8602579236030579, "learning_rate": 6.7117080669957125e-06, "loss": 0.0854, "step": 16590 }, { "epoch": 0.1343150740351161, "grad_norm": 1.7025203704833984, "learning_rate": 6.7157537017558065e-06, "loss": 0.0614, "step": 16600 }, { "epoch": 0.134395986730318, "grad_norm": 1.0234794616699219, "learning_rate": 6.7197993365158995e-06, "loss": 0.0598, "step": 16610 }, { "epoch": 0.13447689942551985, "grad_norm": 1.116432547569275, "learning_rate": 6.7238449712759935e-06, "loss": 0.0471, "step": 16620 }, { "epoch": 0.13455781212072174, "grad_norm": 0.4329617917537689, "learning_rate": 6.7278906060360874e-06, "loss": 0.0528, "step": 16630 }, { "epoch": 0.13463872481592362, "grad_norm": 1.2775835990905762, "learning_rate": 6.731936240796182e-06, "loss": 0.0541, "step": 16640 }, { "epoch": 0.1347196375111255, "grad_norm": 0.8205927014350891, "learning_rate": 6.735981875556275e-06, "loss": 0.0446, "step": 16650 }, { "epoch": 0.13480055020632736, "grad_norm": 0.8593440055847168, "learning_rate": 6.740027510316369e-06, "loss": 0.0492, "step": 16660 }, { "epoch": 0.13488146290152925, "grad_norm": 0.845952570438385, "learning_rate": 6.744073145076463e-06, "loss": 0.042, "step": 16670 }, { "epoch": 0.13496237559673113, "grad_norm": 1.1355856657028198, "learning_rate": 6.748118779836556e-06, "loss": 0.0535, "step": 16680 }, { "epoch": 0.13504328829193302, "grad_norm": 1.0031050443649292, "learning_rate": 6.75216441459665e-06, "loss": 0.0485, "step": 16690 }, { "epoch": 0.13512420098713487, "grad_norm": 0.8586022257804871, "learning_rate": 6.756210049356745e-06, "loss": 0.0596, "step": 16700 }, { "epoch": 0.13520511368233676, "grad_norm": 1.0027917623519897, "learning_rate": 6.760255684116839e-06, "loss": 0.0841, "step": 16710 }, { "epoch": 0.13528602637753864, "grad_norm": 0.8739833235740662, "learning_rate": 6.764301318876932e-06, "loss": 0.0472, "step": 16720 }, { "epoch": 0.1353669390727405, "grad_norm": 0.7577219605445862, "learning_rate": 6.768346953637026e-06, "loss": 0.0601, "step": 16730 }, { "epoch": 0.13544785176794238, "grad_norm": 1.0417877435684204, "learning_rate": 6.77239258839712e-06, "loss": 0.0569, "step": 16740 }, { "epoch": 0.13552876446314427, "grad_norm": 1.3983221054077148, "learning_rate": 6.776438223157214e-06, "loss": 0.0829, "step": 16750 }, { "epoch": 0.13560967715834615, "grad_norm": 0.8778470158576965, "learning_rate": 6.780483857917309e-06, "loss": 0.0353, "step": 16760 }, { "epoch": 0.135690589853548, "grad_norm": 1.0657953023910522, "learning_rate": 6.784529492677402e-06, "loss": 0.0749, "step": 16770 }, { "epoch": 0.1357715025487499, "grad_norm": 1.002474308013916, "learning_rate": 6.788575127437496e-06, "loss": 0.0674, "step": 16780 }, { "epoch": 0.13585241524395178, "grad_norm": 0.6341093182563782, "learning_rate": 6.79262076219759e-06, "loss": 0.0497, "step": 16790 }, { "epoch": 0.13593332793915366, "grad_norm": 0.9873220324516296, "learning_rate": 6.796666396957683e-06, "loss": 0.0518, "step": 16800 }, { "epoch": 0.13601424063435552, "grad_norm": 0.7729663252830505, "learning_rate": 6.800712031717777e-06, "loss": 0.061, "step": 16810 }, { "epoch": 0.1360951533295574, "grad_norm": 1.0023891925811768, "learning_rate": 6.8047576664778714e-06, "loss": 0.065, "step": 16820 }, { "epoch": 0.1361760660247593, "grad_norm": 1.1075341701507568, "learning_rate": 6.808803301237965e-06, "loss": 0.0635, "step": 16830 }, { "epoch": 0.13625697871996117, "grad_norm": 0.730959415435791, "learning_rate": 6.8128489359980585e-06, "loss": 0.0701, "step": 16840 }, { "epoch": 0.13633789141516303, "grad_norm": 1.3844223022460938, "learning_rate": 6.816894570758152e-06, "loss": 0.0577, "step": 16850 }, { "epoch": 0.13641880411036492, "grad_norm": 0.9719905257225037, "learning_rate": 6.820940205518246e-06, "loss": 0.0507, "step": 16860 }, { "epoch": 0.1364997168055668, "grad_norm": 1.1421982049942017, "learning_rate": 6.8249858402783394e-06, "loss": 0.0497, "step": 16870 }, { "epoch": 0.13658062950076866, "grad_norm": 0.6677561402320862, "learning_rate": 6.829031475038434e-06, "loss": 0.0881, "step": 16880 }, { "epoch": 0.13666154219597054, "grad_norm": 0.7711272239685059, "learning_rate": 6.833077109798528e-06, "loss": 0.0702, "step": 16890 }, { "epoch": 0.13674245489117243, "grad_norm": 0.8003450036048889, "learning_rate": 6.837122744558622e-06, "loss": 0.0457, "step": 16900 }, { "epoch": 0.1368233675863743, "grad_norm": 1.1587938070297241, "learning_rate": 6.841168379318715e-06, "loss": 0.0551, "step": 16910 }, { "epoch": 0.13690428028157617, "grad_norm": 0.5464590787887573, "learning_rate": 6.845214014078809e-06, "loss": 0.0508, "step": 16920 }, { "epoch": 0.13698519297677805, "grad_norm": 0.7289870381355286, "learning_rate": 6.849259648838903e-06, "loss": 0.0482, "step": 16930 }, { "epoch": 0.13706610567197994, "grad_norm": 2.2639012336730957, "learning_rate": 6.853305283598998e-06, "loss": 0.0593, "step": 16940 }, { "epoch": 0.13714701836718182, "grad_norm": 0.9582295417785645, "learning_rate": 6.857350918359092e-06, "loss": 0.0821, "step": 16950 }, { "epoch": 0.13722793106238368, "grad_norm": 0.675664484500885, "learning_rate": 6.861396553119185e-06, "loss": 0.0411, "step": 16960 }, { "epoch": 0.13730884375758556, "grad_norm": 0.7873378992080688, "learning_rate": 6.865442187879279e-06, "loss": 0.0661, "step": 16970 }, { "epoch": 0.13738975645278745, "grad_norm": 0.8803954124450684, "learning_rate": 6.869487822639373e-06, "loss": 0.0413, "step": 16980 }, { "epoch": 0.13747066914798933, "grad_norm": 0.5731702446937561, "learning_rate": 6.873533457399466e-06, "loss": 0.0444, "step": 16990 }, { "epoch": 0.1375515818431912, "grad_norm": 1.0579766035079956, "learning_rate": 6.877579092159561e-06, "loss": 0.0388, "step": 17000 }, { "epoch": 0.13763249453839307, "grad_norm": 1.2126750946044922, "learning_rate": 6.881624726919655e-06, "loss": 0.0689, "step": 17010 }, { "epoch": 0.13771340723359496, "grad_norm": 0.8297341465950012, "learning_rate": 6.8856703616797485e-06, "loss": 0.0396, "step": 17020 }, { "epoch": 0.13779431992879682, "grad_norm": 0.8409543633460999, "learning_rate": 6.889715996439842e-06, "loss": 0.0551, "step": 17030 }, { "epoch": 0.1378752326239987, "grad_norm": 0.9563897848129272, "learning_rate": 6.8937616311999356e-06, "loss": 0.0478, "step": 17040 }, { "epoch": 0.13795614531920058, "grad_norm": 1.0222151279449463, "learning_rate": 6.8978072659600295e-06, "loss": 0.0726, "step": 17050 }, { "epoch": 0.13803705801440247, "grad_norm": 1.074458360671997, "learning_rate": 6.901852900720124e-06, "loss": 0.0357, "step": 17060 }, { "epoch": 0.13811797070960433, "grad_norm": 0.8117614984512329, "learning_rate": 6.905898535480217e-06, "loss": 0.0417, "step": 17070 }, { "epoch": 0.1381988834048062, "grad_norm": 1.1914869546890259, "learning_rate": 6.909944170240311e-06, "loss": 0.0589, "step": 17080 }, { "epoch": 0.1382797961000081, "grad_norm": 1.0405019521713257, "learning_rate": 6.913989805000405e-06, "loss": 0.04, "step": 17090 }, { "epoch": 0.13836070879520998, "grad_norm": 0.5640339851379395, "learning_rate": 6.918035439760498e-06, "loss": 0.0445, "step": 17100 }, { "epoch": 0.13844162149041184, "grad_norm": 0.6429269909858704, "learning_rate": 6.922081074520592e-06, "loss": 0.0612, "step": 17110 }, { "epoch": 0.13852253418561372, "grad_norm": 0.9544475674629211, "learning_rate": 6.926126709280687e-06, "loss": 0.0822, "step": 17120 }, { "epoch": 0.1386034468808156, "grad_norm": 1.4174575805664062, "learning_rate": 6.930172344040781e-06, "loss": 0.0555, "step": 17130 }, { "epoch": 0.1386843595760175, "grad_norm": 0.6546519994735718, "learning_rate": 6.934217978800875e-06, "loss": 0.0571, "step": 17140 }, { "epoch": 0.13876527227121935, "grad_norm": 0.5856276750564575, "learning_rate": 6.938263613560968e-06, "loss": 0.0854, "step": 17150 }, { "epoch": 0.13884618496642123, "grad_norm": 0.4318764805793762, "learning_rate": 6.942309248321062e-06, "loss": 0.0446, "step": 17160 }, { "epoch": 0.13892709766162312, "grad_norm": 0.7356941103935242, "learning_rate": 6.946354883081156e-06, "loss": 0.0403, "step": 17170 }, { "epoch": 0.13900801035682497, "grad_norm": 1.3371028900146484, "learning_rate": 6.950400517841251e-06, "loss": 0.0616, "step": 17180 }, { "epoch": 0.13908892305202686, "grad_norm": 0.9894187450408936, "learning_rate": 6.954446152601344e-06, "loss": 0.0546, "step": 17190 }, { "epoch": 0.13916983574722874, "grad_norm": 0.2655934691429138, "learning_rate": 6.958491787361438e-06, "loss": 0.0386, "step": 17200 }, { "epoch": 0.13925074844243063, "grad_norm": 0.7082529067993164, "learning_rate": 6.962537422121532e-06, "loss": 0.0496, "step": 17210 }, { "epoch": 0.13933166113763248, "grad_norm": 0.8044230341911316, "learning_rate": 6.966583056881625e-06, "loss": 0.0689, "step": 17220 }, { "epoch": 0.13941257383283437, "grad_norm": 1.4169237613677979, "learning_rate": 6.970628691641719e-06, "loss": 0.06, "step": 17230 }, { "epoch": 0.13949348652803625, "grad_norm": 0.5128964185714722, "learning_rate": 6.9746743264018135e-06, "loss": 0.0409, "step": 17240 }, { "epoch": 0.13957439922323814, "grad_norm": 1.6207765340805054, "learning_rate": 6.9787199611619075e-06, "loss": 0.0589, "step": 17250 }, { "epoch": 0.13965531191844, "grad_norm": 0.1643364280462265, "learning_rate": 6.9827655959220006e-06, "loss": 0.0677, "step": 17260 }, { "epoch": 0.13973622461364188, "grad_norm": 0.7719764113426208, "learning_rate": 6.9868112306820945e-06, "loss": 0.0512, "step": 17270 }, { "epoch": 0.13981713730884376, "grad_norm": 1.396575927734375, "learning_rate": 6.9908568654421884e-06, "loss": 0.066, "step": 17280 }, { "epoch": 0.13989805000404562, "grad_norm": 1.5386825799942017, "learning_rate": 6.9949025002022815e-06, "loss": 0.0666, "step": 17290 }, { "epoch": 0.1399789626992475, "grad_norm": 1.3868408203125, "learning_rate": 6.998948134962376e-06, "loss": 0.0858, "step": 17300 }, { "epoch": 0.1400598753944494, "grad_norm": 0.7874878644943237, "learning_rate": 7.00299376972247e-06, "loss": 0.0449, "step": 17310 }, { "epoch": 0.14014078808965127, "grad_norm": 0.8942269086837769, "learning_rate": 7.007039404482564e-06, "loss": 0.0672, "step": 17320 }, { "epoch": 0.14022170078485313, "grad_norm": 1.2211182117462158, "learning_rate": 7.011085039242657e-06, "loss": 0.0551, "step": 17330 }, { "epoch": 0.14030261348005502, "grad_norm": 1.0889217853546143, "learning_rate": 7.015130674002751e-06, "loss": 0.0536, "step": 17340 }, { "epoch": 0.1403835261752569, "grad_norm": 1.6173068284988403, "learning_rate": 7.019176308762845e-06, "loss": 0.071, "step": 17350 }, { "epoch": 0.14046443887045879, "grad_norm": 0.6056824922561646, "learning_rate": 7.02322194352294e-06, "loss": 0.0491, "step": 17360 }, { "epoch": 0.14054535156566064, "grad_norm": 1.1184000968933105, "learning_rate": 7.027267578283034e-06, "loss": 0.0542, "step": 17370 }, { "epoch": 0.14062626426086253, "grad_norm": 0.8670839667320251, "learning_rate": 7.031313213043127e-06, "loss": 0.0432, "step": 17380 }, { "epoch": 0.1407071769560644, "grad_norm": 0.44559356570243835, "learning_rate": 7.035358847803221e-06, "loss": 0.0448, "step": 17390 }, { "epoch": 0.1407880896512663, "grad_norm": 0.8262737393379211, "learning_rate": 7.039404482563315e-06, "loss": 0.0513, "step": 17400 }, { "epoch": 0.14086900234646815, "grad_norm": 1.245792031288147, "learning_rate": 7.043450117323408e-06, "loss": 0.0594, "step": 17410 }, { "epoch": 0.14094991504167004, "grad_norm": 1.0227082967758179, "learning_rate": 7.047495752083503e-06, "loss": 0.0376, "step": 17420 }, { "epoch": 0.14103082773687192, "grad_norm": 1.5641276836395264, "learning_rate": 7.051541386843597e-06, "loss": 0.0689, "step": 17430 }, { "epoch": 0.14111174043207378, "grad_norm": 1.086252212524414, "learning_rate": 7.055587021603691e-06, "loss": 0.0465, "step": 17440 }, { "epoch": 0.14119265312727566, "grad_norm": 0.3815096318721771, "learning_rate": 7.059632656363784e-06, "loss": 0.0512, "step": 17450 }, { "epoch": 0.14127356582247755, "grad_norm": 0.8156102299690247, "learning_rate": 7.063678291123878e-06, "loss": 0.0634, "step": 17460 }, { "epoch": 0.14135447851767943, "grad_norm": 0.9629905819892883, "learning_rate": 7.067723925883972e-06, "loss": 0.0533, "step": 17470 }, { "epoch": 0.1414353912128813, "grad_norm": 1.353765845298767, "learning_rate": 7.071769560644066e-06, "loss": 0.041, "step": 17480 }, { "epoch": 0.14151630390808317, "grad_norm": 1.153268575668335, "learning_rate": 7.0758151954041595e-06, "loss": 0.056, "step": 17490 }, { "epoch": 0.14159721660328506, "grad_norm": 1.1145527362823486, "learning_rate": 7.079860830164253e-06, "loss": 0.0677, "step": 17500 }, { "epoch": 0.14167812929848694, "grad_norm": 0.8350751399993896, "learning_rate": 7.083906464924347e-06, "loss": 0.0534, "step": 17510 }, { "epoch": 0.1417590419936888, "grad_norm": 0.9552974104881287, "learning_rate": 7.0879520996844405e-06, "loss": 0.0353, "step": 17520 }, { "epoch": 0.14183995468889068, "grad_norm": 1.0748090744018555, "learning_rate": 7.091997734444534e-06, "loss": 0.0664, "step": 17530 }, { "epoch": 0.14192086738409257, "grad_norm": 1.2993168830871582, "learning_rate": 7.096043369204629e-06, "loss": 0.0546, "step": 17540 }, { "epoch": 0.14200178007929445, "grad_norm": 0.5527724623680115, "learning_rate": 7.100089003964723e-06, "loss": 0.0484, "step": 17550 }, { "epoch": 0.1420826927744963, "grad_norm": 1.1975303888320923, "learning_rate": 7.104134638724817e-06, "loss": 0.0454, "step": 17560 }, { "epoch": 0.1421636054696982, "grad_norm": 0.9300798773765564, "learning_rate": 7.10818027348491e-06, "loss": 0.0661, "step": 17570 }, { "epoch": 0.14224451816490008, "grad_norm": 1.1889182329177856, "learning_rate": 7.112225908245004e-06, "loss": 0.0485, "step": 17580 }, { "epoch": 0.14232543086010194, "grad_norm": 0.9592097401618958, "learning_rate": 7.116271543005098e-06, "loss": 0.0504, "step": 17590 }, { "epoch": 0.14240634355530382, "grad_norm": 1.1357982158660889, "learning_rate": 7.120317177765193e-06, "loss": 0.0498, "step": 17600 }, { "epoch": 0.1424872562505057, "grad_norm": 1.2075681686401367, "learning_rate": 7.124362812525286e-06, "loss": 0.0517, "step": 17610 }, { "epoch": 0.1425681689457076, "grad_norm": 0.9293810129165649, "learning_rate": 7.12840844728538e-06, "loss": 0.0338, "step": 17620 }, { "epoch": 0.14264908164090945, "grad_norm": 1.0710184574127197, "learning_rate": 7.132454082045474e-06, "loss": 0.0704, "step": 17630 }, { "epoch": 0.14272999433611133, "grad_norm": 0.7941362857818604, "learning_rate": 7.136499716805567e-06, "loss": 0.0682, "step": 17640 }, { "epoch": 0.14281090703131322, "grad_norm": 0.7872747778892517, "learning_rate": 7.140545351565661e-06, "loss": 0.0493, "step": 17650 }, { "epoch": 0.1428918197265151, "grad_norm": 1.1006180047988892, "learning_rate": 7.144590986325755e-06, "loss": 0.0421, "step": 17660 }, { "epoch": 0.14297273242171696, "grad_norm": 0.6270741820335388, "learning_rate": 7.1486366210858496e-06, "loss": 0.0532, "step": 17670 }, { "epoch": 0.14305364511691884, "grad_norm": 1.3031984567642212, "learning_rate": 7.152682255845943e-06, "loss": 0.0427, "step": 17680 }, { "epoch": 0.14313455781212073, "grad_norm": 0.9336056709289551, "learning_rate": 7.156727890606037e-06, "loss": 0.068, "step": 17690 }, { "epoch": 0.1432154705073226, "grad_norm": 1.0190401077270508, "learning_rate": 7.1607735253661305e-06, "loss": 0.0609, "step": 17700 }, { "epoch": 0.14329638320252447, "grad_norm": 0.7813653349876404, "learning_rate": 7.164819160126224e-06, "loss": 0.0547, "step": 17710 }, { "epoch": 0.14337729589772635, "grad_norm": 0.9737709760665894, "learning_rate": 7.1688647948863176e-06, "loss": 0.0466, "step": 17720 }, { "epoch": 0.14345820859292824, "grad_norm": 0.63303542137146, "learning_rate": 7.172910429646412e-06, "loss": 0.0436, "step": 17730 }, { "epoch": 0.1435391212881301, "grad_norm": 0.9229593276977539, "learning_rate": 7.176956064406506e-06, "loss": 0.0633, "step": 17740 }, { "epoch": 0.14362003398333198, "grad_norm": 1.0264673233032227, "learning_rate": 7.1810016991666e-06, "loss": 0.0622, "step": 17750 }, { "epoch": 0.14370094667853386, "grad_norm": 0.653853714466095, "learning_rate": 7.185047333926693e-06, "loss": 0.0531, "step": 17760 }, { "epoch": 0.14378185937373575, "grad_norm": 0.9550175070762634, "learning_rate": 7.189092968686787e-06, "loss": 0.0537, "step": 17770 }, { "epoch": 0.1438627720689376, "grad_norm": 1.2318891286849976, "learning_rate": 7.193138603446881e-06, "loss": 0.0637, "step": 17780 }, { "epoch": 0.1439436847641395, "grad_norm": 0.8861283659934998, "learning_rate": 7.197184238206976e-06, "loss": 0.0691, "step": 17790 }, { "epoch": 0.14402459745934137, "grad_norm": 0.6487277150154114, "learning_rate": 7.201229872967069e-06, "loss": 0.0466, "step": 17800 }, { "epoch": 0.14410551015454326, "grad_norm": 1.0735821723937988, "learning_rate": 7.205275507727163e-06, "loss": 0.0828, "step": 17810 }, { "epoch": 0.14418642284974512, "grad_norm": 1.2890338897705078, "learning_rate": 7.209321142487257e-06, "loss": 0.0442, "step": 17820 }, { "epoch": 0.144267335544947, "grad_norm": 0.5752227306365967, "learning_rate": 7.21336677724735e-06, "loss": 0.0667, "step": 17830 }, { "epoch": 0.14434824824014889, "grad_norm": 0.6599384546279907, "learning_rate": 7.217412412007444e-06, "loss": 0.0429, "step": 17840 }, { "epoch": 0.14442916093535077, "grad_norm": 0.7163469195365906, "learning_rate": 7.221458046767539e-06, "loss": 0.0368, "step": 17850 }, { "epoch": 0.14451007363055263, "grad_norm": 1.1779427528381348, "learning_rate": 7.225503681527633e-06, "loss": 0.0594, "step": 17860 }, { "epoch": 0.1445909863257545, "grad_norm": 0.8469677567481995, "learning_rate": 7.229549316287726e-06, "loss": 0.0634, "step": 17870 }, { "epoch": 0.1446718990209564, "grad_norm": 0.9881393313407898, "learning_rate": 7.23359495104782e-06, "loss": 0.0567, "step": 17880 }, { "epoch": 0.14475281171615825, "grad_norm": 0.7752466201782227, "learning_rate": 7.237640585807914e-06, "loss": 0.0476, "step": 17890 }, { "epoch": 0.14483372441136014, "grad_norm": 1.0041738748550415, "learning_rate": 7.241686220568007e-06, "loss": 0.0477, "step": 17900 }, { "epoch": 0.14491463710656202, "grad_norm": 1.398561954498291, "learning_rate": 7.2457318553281016e-06, "loss": 0.0605, "step": 17910 }, { "epoch": 0.1449955498017639, "grad_norm": 0.7845554947853088, "learning_rate": 7.2497774900881955e-06, "loss": 0.0509, "step": 17920 }, { "epoch": 0.14507646249696576, "grad_norm": 0.7734313607215881, "learning_rate": 7.2538231248482895e-06, "loss": 0.0513, "step": 17930 }, { "epoch": 0.14515737519216765, "grad_norm": 1.0476723909378052, "learning_rate": 7.257868759608383e-06, "loss": 0.0648, "step": 17940 }, { "epoch": 0.14523828788736953, "grad_norm": 0.6849809885025024, "learning_rate": 7.2619143943684765e-06, "loss": 0.0523, "step": 17950 }, { "epoch": 0.14531920058257142, "grad_norm": 4.113121509552002, "learning_rate": 7.2659600291285704e-06, "loss": 0.082, "step": 17960 }, { "epoch": 0.14540011327777327, "grad_norm": 0.7093430757522583, "learning_rate": 7.270005663888665e-06, "loss": 0.0476, "step": 17970 }, { "epoch": 0.14548102597297516, "grad_norm": 1.406106948852539, "learning_rate": 7.274051298648759e-06, "loss": 0.0659, "step": 17980 }, { "epoch": 0.14556193866817704, "grad_norm": 1.574483036994934, "learning_rate": 7.278096933408852e-06, "loss": 0.0566, "step": 17990 }, { "epoch": 0.14564285136337893, "grad_norm": 1.055230975151062, "learning_rate": 7.282142568168946e-06, "loss": 0.064, "step": 18000 }, { "epoch": 0.14572376405858078, "grad_norm": 0.618689775466919, "learning_rate": 7.28618820292904e-06, "loss": 0.0669, "step": 18010 }, { "epoch": 0.14580467675378267, "grad_norm": 0.6950468420982361, "learning_rate": 7.290233837689133e-06, "loss": 0.0933, "step": 18020 }, { "epoch": 0.14588558944898455, "grad_norm": 0.779051661491394, "learning_rate": 7.294279472449228e-06, "loss": 0.0625, "step": 18030 }, { "epoch": 0.1459665021441864, "grad_norm": 0.6584148406982422, "learning_rate": 7.298325107209322e-06, "loss": 0.0514, "step": 18040 }, { "epoch": 0.1460474148393883, "grad_norm": 0.8389225602149963, "learning_rate": 7.302370741969416e-06, "loss": 0.0474, "step": 18050 }, { "epoch": 0.14612832753459018, "grad_norm": 1.3347315788269043, "learning_rate": 7.306416376729509e-06, "loss": 0.0549, "step": 18060 }, { "epoch": 0.14620924022979206, "grad_norm": 0.9655293226242065, "learning_rate": 7.310462011489603e-06, "loss": 0.0681, "step": 18070 }, { "epoch": 0.14629015292499392, "grad_norm": 0.6765417456626892, "learning_rate": 7.314507646249697e-06, "loss": 0.0525, "step": 18080 }, { "epoch": 0.1463710656201958, "grad_norm": 0.7878867983818054, "learning_rate": 7.318553281009792e-06, "loss": 0.0756, "step": 18090 }, { "epoch": 0.1464519783153977, "grad_norm": 0.6548960208892822, "learning_rate": 7.322598915769885e-06, "loss": 0.0611, "step": 18100 }, { "epoch": 0.14653289101059958, "grad_norm": 1.016261100769043, "learning_rate": 7.326644550529979e-06, "loss": 0.0845, "step": 18110 }, { "epoch": 0.14661380370580143, "grad_norm": 1.0584893226623535, "learning_rate": 7.330690185290073e-06, "loss": 0.0368, "step": 18120 }, { "epoch": 0.14669471640100332, "grad_norm": 1.4243073463439941, "learning_rate": 7.3347358200501666e-06, "loss": 0.0816, "step": 18130 }, { "epoch": 0.1467756290962052, "grad_norm": 1.0075185298919678, "learning_rate": 7.33878145481026e-06, "loss": 0.0554, "step": 18140 }, { "epoch": 0.14685654179140706, "grad_norm": 1.0389758348464966, "learning_rate": 7.3428270895703544e-06, "loss": 0.0605, "step": 18150 }, { "epoch": 0.14693745448660894, "grad_norm": 1.0654767751693726, "learning_rate": 7.346872724330448e-06, "loss": 0.0575, "step": 18160 }, { "epoch": 0.14701836718181083, "grad_norm": 0.8007611632347107, "learning_rate": 7.350918359090542e-06, "loss": 0.0511, "step": 18170 }, { "epoch": 0.1470992798770127, "grad_norm": 0.7343431115150452, "learning_rate": 7.354963993850635e-06, "loss": 0.0527, "step": 18180 }, { "epoch": 0.14718019257221457, "grad_norm": 1.487007737159729, "learning_rate": 7.359009628610729e-06, "loss": 0.0624, "step": 18190 }, { "epoch": 0.14726110526741645, "grad_norm": 2.3071937561035156, "learning_rate": 7.363055263370823e-06, "loss": 0.0785, "step": 18200 }, { "epoch": 0.14734201796261834, "grad_norm": 1.5161027908325195, "learning_rate": 7.367100898130918e-06, "loss": 0.053, "step": 18210 }, { "epoch": 0.14742293065782022, "grad_norm": 0.864922821521759, "learning_rate": 7.371146532891011e-06, "loss": 0.0528, "step": 18220 }, { "epoch": 0.14750384335302208, "grad_norm": 0.8678741455078125, "learning_rate": 7.375192167651105e-06, "loss": 0.0412, "step": 18230 }, { "epoch": 0.14758475604822396, "grad_norm": 1.0313102006912231, "learning_rate": 7.379237802411199e-06, "loss": 0.0552, "step": 18240 }, { "epoch": 0.14766566874342585, "grad_norm": 1.293936848640442, "learning_rate": 7.383283437171292e-06, "loss": 0.0733, "step": 18250 }, { "epoch": 0.14774658143862773, "grad_norm": 0.9282755851745605, "learning_rate": 7.387329071931386e-06, "loss": 0.0708, "step": 18260 }, { "epoch": 0.1478274941338296, "grad_norm": 1.7116758823394775, "learning_rate": 7.391374706691481e-06, "loss": 0.0589, "step": 18270 }, { "epoch": 0.14790840682903147, "grad_norm": 0.7263040542602539, "learning_rate": 7.395420341451575e-06, "loss": 0.0312, "step": 18280 }, { "epoch": 0.14798931952423336, "grad_norm": 0.4341198205947876, "learning_rate": 7.399465976211668e-06, "loss": 0.0508, "step": 18290 }, { "epoch": 0.14807023221943522, "grad_norm": 0.8334675431251526, "learning_rate": 7.403511610971762e-06, "loss": 0.0523, "step": 18300 }, { "epoch": 0.1481511449146371, "grad_norm": 2.224053382873535, "learning_rate": 7.407557245731856e-06, "loss": 0.0644, "step": 18310 }, { "epoch": 0.14823205760983899, "grad_norm": 0.9462864398956299, "learning_rate": 7.41160288049195e-06, "loss": 0.0806, "step": 18320 }, { "epoch": 0.14831297030504087, "grad_norm": 0.5749278664588928, "learning_rate": 7.4156485152520445e-06, "loss": 0.0475, "step": 18330 }, { "epoch": 0.14839388300024273, "grad_norm": 0.9254096150398254, "learning_rate": 7.419694150012138e-06, "loss": 0.0283, "step": 18340 }, { "epoch": 0.1484747956954446, "grad_norm": 0.5528712272644043, "learning_rate": 7.4237397847722315e-06, "loss": 0.0529, "step": 18350 }, { "epoch": 0.1485557083906465, "grad_norm": 0.930838942527771, "learning_rate": 7.4277854195323255e-06, "loss": 0.0649, "step": 18360 }, { "epoch": 0.14863662108584838, "grad_norm": 0.7411123514175415, "learning_rate": 7.4318310542924186e-06, "loss": 0.0609, "step": 18370 }, { "epoch": 0.14871753378105024, "grad_norm": 1.3360364437103271, "learning_rate": 7.4358766890525125e-06, "loss": 0.0574, "step": 18380 }, { "epoch": 0.14879844647625212, "grad_norm": 1.079719066619873, "learning_rate": 7.439922323812607e-06, "loss": 0.053, "step": 18390 }, { "epoch": 0.148879359171454, "grad_norm": 1.0961933135986328, "learning_rate": 7.443967958572701e-06, "loss": 0.0599, "step": 18400 }, { "epoch": 0.1489602718666559, "grad_norm": 1.0887130498886108, "learning_rate": 7.448013593332794e-06, "loss": 0.0373, "step": 18410 }, { "epoch": 0.14904118456185775, "grad_norm": 0.6860816478729248, "learning_rate": 7.452059228092888e-06, "loss": 0.052, "step": 18420 }, { "epoch": 0.14912209725705963, "grad_norm": 0.7720785140991211, "learning_rate": 7.456104862852982e-06, "loss": 0.0403, "step": 18430 }, { "epoch": 0.14920300995226152, "grad_norm": 0.8638312220573425, "learning_rate": 7.460150497613075e-06, "loss": 0.0481, "step": 18440 }, { "epoch": 0.14928392264746337, "grad_norm": 0.8113582134246826, "learning_rate": 7.46419613237317e-06, "loss": 0.0567, "step": 18450 }, { "epoch": 0.14936483534266526, "grad_norm": 0.6734049320220947, "learning_rate": 7.468241767133264e-06, "loss": 0.0684, "step": 18460 }, { "epoch": 0.14944574803786714, "grad_norm": 0.3971197307109833, "learning_rate": 7.472287401893358e-06, "loss": 0.066, "step": 18470 }, { "epoch": 0.14952666073306903, "grad_norm": 1.3547097444534302, "learning_rate": 7.476333036653451e-06, "loss": 0.0601, "step": 18480 }, { "epoch": 0.14960757342827088, "grad_norm": 0.9374409317970276, "learning_rate": 7.480378671413545e-06, "loss": 0.0626, "step": 18490 }, { "epoch": 0.14968848612347277, "grad_norm": 1.0560564994812012, "learning_rate": 7.484424306173639e-06, "loss": 0.0574, "step": 18500 }, { "epoch": 0.14976939881867465, "grad_norm": 1.069386601448059, "learning_rate": 7.488469940933734e-06, "loss": 0.0845, "step": 18510 }, { "epoch": 0.14985031151387654, "grad_norm": 0.7418326139450073, "learning_rate": 7.492515575693828e-06, "loss": 0.0555, "step": 18520 }, { "epoch": 0.1499312242090784, "grad_norm": 1.1265696287155151, "learning_rate": 7.496561210453921e-06, "loss": 0.0769, "step": 18530 }, { "epoch": 0.15001213690428028, "grad_norm": 1.2892684936523438, "learning_rate": 7.500606845214015e-06, "loss": 0.0598, "step": 18540 }, { "epoch": 0.15009304959948216, "grad_norm": 0.7090634107589722, "learning_rate": 7.504652479974109e-06, "loss": 0.0586, "step": 18550 }, { "epoch": 0.15017396229468405, "grad_norm": 1.2660460472106934, "learning_rate": 7.508698114734202e-06, "loss": 0.0524, "step": 18560 }, { "epoch": 0.1502548749898859, "grad_norm": 1.099012017250061, "learning_rate": 7.5127437494942965e-06, "loss": 0.0518, "step": 18570 }, { "epoch": 0.1503357876850878, "grad_norm": 0.9335222840309143, "learning_rate": 7.5167893842543905e-06, "loss": 0.0762, "step": 18580 }, { "epoch": 0.15041670038028968, "grad_norm": 0.9837499856948853, "learning_rate": 7.520835019014484e-06, "loss": 0.068, "step": 18590 }, { "epoch": 0.15049761307549153, "grad_norm": 0.8587425947189331, "learning_rate": 7.5248806537745775e-06, "loss": 0.053, "step": 18600 }, { "epoch": 0.15057852577069342, "grad_norm": 0.6239402890205383, "learning_rate": 7.5289262885346714e-06, "loss": 0.0595, "step": 18610 }, { "epoch": 0.1506594384658953, "grad_norm": 0.4246198832988739, "learning_rate": 7.532971923294765e-06, "loss": 0.0432, "step": 18620 }, { "epoch": 0.15074035116109719, "grad_norm": 0.8975555300712585, "learning_rate": 7.53701755805486e-06, "loss": 0.0572, "step": 18630 }, { "epoch": 0.15082126385629904, "grad_norm": 0.9667057394981384, "learning_rate": 7.541063192814953e-06, "loss": 0.0588, "step": 18640 }, { "epoch": 0.15090217655150093, "grad_norm": 0.6927478313446045, "learning_rate": 7.545108827575047e-06, "loss": 0.0625, "step": 18650 }, { "epoch": 0.1509830892467028, "grad_norm": 1.171057105064392, "learning_rate": 7.549154462335141e-06, "loss": 0.0703, "step": 18660 }, { "epoch": 0.1510640019419047, "grad_norm": 0.6620337963104248, "learning_rate": 7.553200097095234e-06, "loss": 0.0604, "step": 18670 }, { "epoch": 0.15114491463710655, "grad_norm": 0.7258151173591614, "learning_rate": 7.557245731855328e-06, "loss": 0.0603, "step": 18680 }, { "epoch": 0.15122582733230844, "grad_norm": 0.741958498954773, "learning_rate": 7.561291366615423e-06, "loss": 0.0513, "step": 18690 }, { "epoch": 0.15130674002751032, "grad_norm": 1.1686211824417114, "learning_rate": 7.565337001375517e-06, "loss": 0.068, "step": 18700 }, { "epoch": 0.1513876527227122, "grad_norm": 1.0446182489395142, "learning_rate": 7.569382636135611e-06, "loss": 0.0673, "step": 18710 }, { "epoch": 0.15146856541791406, "grad_norm": 1.2566475868225098, "learning_rate": 7.573428270895704e-06, "loss": 0.0364, "step": 18720 }, { "epoch": 0.15154947811311595, "grad_norm": 0.7461131811141968, "learning_rate": 7.577473905655798e-06, "loss": 0.0527, "step": 18730 }, { "epoch": 0.15163039080831783, "grad_norm": 0.8661378026008606, "learning_rate": 7.581519540415892e-06, "loss": 0.0507, "step": 18740 }, { "epoch": 0.1517113035035197, "grad_norm": 1.309601902961731, "learning_rate": 7.585565175175987e-06, "loss": 0.0731, "step": 18750 }, { "epoch": 0.15179221619872157, "grad_norm": 0.7249888181686401, "learning_rate": 7.58961080993608e-06, "loss": 0.0584, "step": 18760 }, { "epoch": 0.15187312889392346, "grad_norm": 0.9962270855903625, "learning_rate": 7.593656444696174e-06, "loss": 0.0621, "step": 18770 }, { "epoch": 0.15195404158912534, "grad_norm": 1.3530433177947998, "learning_rate": 7.5977020794562676e-06, "loss": 0.073, "step": 18780 }, { "epoch": 0.1520349542843272, "grad_norm": 0.9981517791748047, "learning_rate": 7.601747714216361e-06, "loss": 0.0769, "step": 18790 }, { "epoch": 0.15211586697952909, "grad_norm": 0.8394637107849121, "learning_rate": 7.605793348976455e-06, "loss": 0.0716, "step": 18800 }, { "epoch": 0.15219677967473097, "grad_norm": 1.2607823610305786, "learning_rate": 7.609838983736549e-06, "loss": 0.0565, "step": 18810 }, { "epoch": 0.15227769236993285, "grad_norm": 0.6442524790763855, "learning_rate": 7.613884618496643e-06, "loss": 0.0422, "step": 18820 }, { "epoch": 0.1523586050651347, "grad_norm": 0.7222189903259277, "learning_rate": 7.617930253256736e-06, "loss": 0.0379, "step": 18830 }, { "epoch": 0.1524395177603366, "grad_norm": 0.7243174314498901, "learning_rate": 7.62197588801683e-06, "loss": 0.0593, "step": 18840 }, { "epoch": 0.15252043045553848, "grad_norm": 0.5853238105773926, "learning_rate": 7.626021522776924e-06, "loss": 0.0422, "step": 18850 }, { "epoch": 0.15260134315074034, "grad_norm": 0.8410107493400574, "learning_rate": 7.630067157537018e-06, "loss": 0.06, "step": 18860 }, { "epoch": 0.15268225584594222, "grad_norm": 0.7901229858398438, "learning_rate": 7.634112792297112e-06, "loss": 0.0617, "step": 18870 }, { "epoch": 0.1527631685411441, "grad_norm": 0.6463631391525269, "learning_rate": 7.638158427057206e-06, "loss": 0.0497, "step": 18880 }, { "epoch": 0.152844081236346, "grad_norm": 0.6876876354217529, "learning_rate": 7.6422040618173e-06, "loss": 0.0555, "step": 18890 }, { "epoch": 0.15292499393154785, "grad_norm": 1.3202329874038696, "learning_rate": 7.646249696577394e-06, "loss": 0.0476, "step": 18900 }, { "epoch": 0.15300590662674973, "grad_norm": 0.6739966869354248, "learning_rate": 7.650295331337488e-06, "loss": 0.0341, "step": 18910 }, { "epoch": 0.15308681932195162, "grad_norm": 1.0310312509536743, "learning_rate": 7.65434096609758e-06, "loss": 0.0458, "step": 18920 }, { "epoch": 0.1531677320171535, "grad_norm": 0.745438277721405, "learning_rate": 7.658386600857676e-06, "loss": 0.0501, "step": 18930 }, { "epoch": 0.15324864471235536, "grad_norm": 2.0329642295837402, "learning_rate": 7.66243223561777e-06, "loss": 0.0498, "step": 18940 }, { "epoch": 0.15332955740755724, "grad_norm": 0.6757729649543762, "learning_rate": 7.666477870377864e-06, "loss": 0.0518, "step": 18950 }, { "epoch": 0.15341047010275913, "grad_norm": 0.711021900177002, "learning_rate": 7.670523505137956e-06, "loss": 0.0409, "step": 18960 }, { "epoch": 0.153491382797961, "grad_norm": 0.7734776735305786, "learning_rate": 7.67456913989805e-06, "loss": 0.0683, "step": 18970 }, { "epoch": 0.15357229549316287, "grad_norm": 0.8619532585144043, "learning_rate": 7.678614774658144e-06, "loss": 0.0552, "step": 18980 }, { "epoch": 0.15365320818836475, "grad_norm": 1.0582391023635864, "learning_rate": 7.68266040941824e-06, "loss": 0.0613, "step": 18990 }, { "epoch": 0.15373412088356664, "grad_norm": 1.527062177658081, "learning_rate": 7.686706044178332e-06, "loss": 0.0538, "step": 19000 }, { "epoch": 0.1538150335787685, "grad_norm": 0.8615618944168091, "learning_rate": 7.690751678938426e-06, "loss": 0.0585, "step": 19010 }, { "epoch": 0.15389594627397038, "grad_norm": 1.073807716369629, "learning_rate": 7.69479731369852e-06, "loss": 0.0565, "step": 19020 }, { "epoch": 0.15397685896917226, "grad_norm": 0.42266881465911865, "learning_rate": 7.698842948458614e-06, "loss": 0.0589, "step": 19030 }, { "epoch": 0.15405777166437415, "grad_norm": 1.678389549255371, "learning_rate": 7.702888583218707e-06, "loss": 0.0688, "step": 19040 }, { "epoch": 0.154138684359576, "grad_norm": 1.459477186203003, "learning_rate": 7.706934217978801e-06, "loss": 0.0601, "step": 19050 }, { "epoch": 0.1542195970547779, "grad_norm": 0.600021481513977, "learning_rate": 7.710979852738895e-06, "loss": 0.0667, "step": 19060 }, { "epoch": 0.15430050974997978, "grad_norm": 0.8210203647613525, "learning_rate": 7.71502548749899e-06, "loss": 0.0626, "step": 19070 }, { "epoch": 0.15438142244518166, "grad_norm": 1.179092288017273, "learning_rate": 7.719071122259083e-06, "loss": 0.0668, "step": 19080 }, { "epoch": 0.15446233514038352, "grad_norm": 0.5256611108779907, "learning_rate": 7.723116757019177e-06, "loss": 0.0585, "step": 19090 }, { "epoch": 0.1545432478355854, "grad_norm": 0.649084746837616, "learning_rate": 7.727162391779271e-06, "loss": 0.0599, "step": 19100 }, { "epoch": 0.15462416053078729, "grad_norm": 0.561471700668335, "learning_rate": 7.731208026539365e-06, "loss": 0.0696, "step": 19110 }, { "epoch": 0.15470507322598917, "grad_norm": 1.0317000150680542, "learning_rate": 7.735253661299459e-06, "loss": 0.0671, "step": 19120 }, { "epoch": 0.15478598592119103, "grad_norm": 0.0486573800444603, "learning_rate": 7.739299296059553e-06, "loss": 0.0676, "step": 19130 }, { "epoch": 0.1548668986163929, "grad_norm": 0.9610732793807983, "learning_rate": 7.743344930819647e-06, "loss": 0.0502, "step": 19140 }, { "epoch": 0.1549478113115948, "grad_norm": 0.635736346244812, "learning_rate": 7.747390565579739e-06, "loss": 0.0533, "step": 19150 }, { "epoch": 0.15502872400679665, "grad_norm": 0.7696754932403564, "learning_rate": 7.751436200339833e-06, "loss": 0.0534, "step": 19160 }, { "epoch": 0.15510963670199854, "grad_norm": 0.683045506477356, "learning_rate": 7.755481835099929e-06, "loss": 0.052, "step": 19170 }, { "epoch": 0.15519054939720042, "grad_norm": 0.633103609085083, "learning_rate": 7.759527469860023e-06, "loss": 0.0523, "step": 19180 }, { "epoch": 0.1552714620924023, "grad_norm": 0.580108106136322, "learning_rate": 7.763573104620115e-06, "loss": 0.0399, "step": 19190 }, { "epoch": 0.15535237478760416, "grad_norm": 1.1432044506072998, "learning_rate": 7.767618739380209e-06, "loss": 0.0574, "step": 19200 }, { "epoch": 0.15543328748280605, "grad_norm": 1.1459957361221313, "learning_rate": 7.771664374140303e-06, "loss": 0.0695, "step": 19210 }, { "epoch": 0.15551420017800793, "grad_norm": 1.1074413061141968, "learning_rate": 7.775710008900397e-06, "loss": 0.0631, "step": 19220 }, { "epoch": 0.15559511287320982, "grad_norm": 0.6109682321548462, "learning_rate": 7.779755643660492e-06, "loss": 0.0362, "step": 19230 }, { "epoch": 0.15567602556841167, "grad_norm": 1.0646240711212158, "learning_rate": 7.783801278420585e-06, "loss": 0.0645, "step": 19240 }, { "epoch": 0.15575693826361356, "grad_norm": 1.004236102104187, "learning_rate": 7.787846913180679e-06, "loss": 0.0703, "step": 19250 }, { "epoch": 0.15583785095881544, "grad_norm": 0.6982329487800598, "learning_rate": 7.791892547940772e-06, "loss": 0.0587, "step": 19260 }, { "epoch": 0.15591876365401733, "grad_norm": 4.028690814971924, "learning_rate": 7.795938182700866e-06, "loss": 0.057, "step": 19270 }, { "epoch": 0.15599967634921919, "grad_norm": 0.7098841667175293, "learning_rate": 7.79998381746096e-06, "loss": 0.0684, "step": 19280 }, { "epoch": 0.15608058904442107, "grad_norm": 1.0963374376296997, "learning_rate": 7.804029452221054e-06, "loss": 0.0575, "step": 19290 }, { "epoch": 0.15616150173962295, "grad_norm": 0.7792829275131226, "learning_rate": 7.808075086981148e-06, "loss": 0.0501, "step": 19300 }, { "epoch": 0.1562424144348248, "grad_norm": 0.7459583878517151, "learning_rate": 7.812120721741242e-06, "loss": 0.0601, "step": 19310 }, { "epoch": 0.1563233271300267, "grad_norm": 0.6942937970161438, "learning_rate": 7.816166356501336e-06, "loss": 0.0432, "step": 19320 }, { "epoch": 0.15640423982522858, "grad_norm": 0.92978435754776, "learning_rate": 7.82021199126143e-06, "loss": 0.0642, "step": 19330 }, { "epoch": 0.15648515252043046, "grad_norm": 0.9079196453094482, "learning_rate": 7.824257626021522e-06, "loss": 0.0427, "step": 19340 }, { "epoch": 0.15656606521563232, "grad_norm": 1.197559118270874, "learning_rate": 7.828303260781616e-06, "loss": 0.0581, "step": 19350 }, { "epoch": 0.1566469779108342, "grad_norm": 1.068740725517273, "learning_rate": 7.832348895541712e-06, "loss": 0.0434, "step": 19360 }, { "epoch": 0.1567278906060361, "grad_norm": 1.2774406671524048, "learning_rate": 7.836394530301806e-06, "loss": 0.0576, "step": 19370 }, { "epoch": 0.15680880330123798, "grad_norm": 0.8889063000679016, "learning_rate": 7.840440165061898e-06, "loss": 0.061, "step": 19380 }, { "epoch": 0.15688971599643983, "grad_norm": 1.5564032793045044, "learning_rate": 7.844485799821992e-06, "loss": 0.097, "step": 19390 }, { "epoch": 0.15697062869164172, "grad_norm": 0.6527207493782043, "learning_rate": 7.848531434582086e-06, "loss": 0.0608, "step": 19400 }, { "epoch": 0.1570515413868436, "grad_norm": 0.9090126752853394, "learning_rate": 7.85257706934218e-06, "loss": 0.0503, "step": 19410 }, { "epoch": 0.1571324540820455, "grad_norm": 0.7569395899772644, "learning_rate": 7.856622704102275e-06, "loss": 0.0562, "step": 19420 }, { "epoch": 0.15721336677724734, "grad_norm": 0.5342950820922852, "learning_rate": 7.860668338862368e-06, "loss": 0.0586, "step": 19430 }, { "epoch": 0.15729427947244923, "grad_norm": 0.5084393620491028, "learning_rate": 7.864713973622462e-06, "loss": 0.0697, "step": 19440 }, { "epoch": 0.1573751921676511, "grad_norm": 1.4391462802886963, "learning_rate": 7.868759608382556e-06, "loss": 0.0634, "step": 19450 }, { "epoch": 0.15745610486285297, "grad_norm": 0.7479986548423767, "learning_rate": 7.87280524314265e-06, "loss": 0.0608, "step": 19460 }, { "epoch": 0.15753701755805485, "grad_norm": 1.670440673828125, "learning_rate": 7.876850877902743e-06, "loss": 0.065, "step": 19470 }, { "epoch": 0.15761793025325674, "grad_norm": 1.0237035751342773, "learning_rate": 7.880896512662837e-06, "loss": 0.0673, "step": 19480 }, { "epoch": 0.15769884294845862, "grad_norm": 0.9304565787315369, "learning_rate": 7.884942147422931e-06, "loss": 0.0579, "step": 19490 }, { "epoch": 0.15777975564366048, "grad_norm": 0.8666211366653442, "learning_rate": 7.888987782183025e-06, "loss": 0.0486, "step": 19500 }, { "epoch": 0.15786066833886236, "grad_norm": 1.1551811695098877, "learning_rate": 7.89303341694312e-06, "loss": 0.0523, "step": 19510 }, { "epoch": 0.15794158103406425, "grad_norm": 0.916835606098175, "learning_rate": 7.897079051703213e-06, "loss": 0.0594, "step": 19520 }, { "epoch": 0.15802249372926613, "grad_norm": 1.1492398977279663, "learning_rate": 7.901124686463305e-06, "loss": 0.0549, "step": 19530 }, { "epoch": 0.158103406424468, "grad_norm": 1.1224662065505981, "learning_rate": 7.905170321223401e-06, "loss": 0.0683, "step": 19540 }, { "epoch": 0.15818431911966988, "grad_norm": 0.25538259744644165, "learning_rate": 7.909215955983495e-06, "loss": 0.0421, "step": 19550 }, { "epoch": 0.15826523181487176, "grad_norm": 1.0971286296844482, "learning_rate": 7.913261590743589e-06, "loss": 0.0445, "step": 19560 }, { "epoch": 0.15834614451007364, "grad_norm": 0.8374471664428711, "learning_rate": 7.917307225503681e-06, "loss": 0.0607, "step": 19570 }, { "epoch": 0.1584270572052755, "grad_norm": 0.6353275179862976, "learning_rate": 7.921352860263775e-06, "loss": 0.0502, "step": 19580 }, { "epoch": 0.15850796990047739, "grad_norm": 1.2059736251831055, "learning_rate": 7.925398495023869e-06, "loss": 0.0529, "step": 19590 }, { "epoch": 0.15858888259567927, "grad_norm": 1.1500990390777588, "learning_rate": 7.929444129783965e-06, "loss": 0.0503, "step": 19600 }, { "epoch": 0.15866979529088113, "grad_norm": 1.8411530256271362, "learning_rate": 7.933489764544059e-06, "loss": 0.0553, "step": 19610 }, { "epoch": 0.158750707986083, "grad_norm": 0.5482963919639587, "learning_rate": 7.937535399304151e-06, "loss": 0.054, "step": 19620 }, { "epoch": 0.1588316206812849, "grad_norm": 0.5977728366851807, "learning_rate": 7.941581034064245e-06, "loss": 0.0484, "step": 19630 }, { "epoch": 0.15891253337648678, "grad_norm": 0.24489037692546844, "learning_rate": 7.945626668824339e-06, "loss": 0.0419, "step": 19640 }, { "epoch": 0.15899344607168864, "grad_norm": 1.3052699565887451, "learning_rate": 7.949672303584433e-06, "loss": 0.0582, "step": 19650 }, { "epoch": 0.15907435876689052, "grad_norm": 0.5517367124557495, "learning_rate": 7.953717938344527e-06, "loss": 0.0599, "step": 19660 }, { "epoch": 0.1591552714620924, "grad_norm": 0.9298095107078552, "learning_rate": 7.95776357310462e-06, "loss": 0.0854, "step": 19670 }, { "epoch": 0.1592361841572943, "grad_norm": 1.0750069618225098, "learning_rate": 7.961809207864715e-06, "loss": 0.0487, "step": 19680 }, { "epoch": 0.15931709685249615, "grad_norm": 0.6077127456665039, "learning_rate": 7.965854842624808e-06, "loss": 0.0592, "step": 19690 }, { "epoch": 0.15939800954769803, "grad_norm": 0.9393805861473083, "learning_rate": 7.969900477384902e-06, "loss": 0.0637, "step": 19700 }, { "epoch": 0.15947892224289992, "grad_norm": 1.1072959899902344, "learning_rate": 7.973946112144996e-06, "loss": 0.0474, "step": 19710 }, { "epoch": 0.15955983493810177, "grad_norm": 1.1490392684936523, "learning_rate": 7.97799174690509e-06, "loss": 0.057, "step": 19720 }, { "epoch": 0.15964074763330366, "grad_norm": 0.4497911334037781, "learning_rate": 7.982037381665184e-06, "loss": 0.07, "step": 19730 }, { "epoch": 0.15972166032850554, "grad_norm": 1.9896090030670166, "learning_rate": 7.986083016425278e-06, "loss": 0.034, "step": 19740 }, { "epoch": 0.15980257302370743, "grad_norm": 1.7235091924667358, "learning_rate": 7.990128651185372e-06, "loss": 0.0678, "step": 19750 }, { "epoch": 0.15988348571890929, "grad_norm": 0.8013109564781189, "learning_rate": 7.994174285945464e-06, "loss": 0.0583, "step": 19760 }, { "epoch": 0.15996439841411117, "grad_norm": 0.7377381324768066, "learning_rate": 7.998219920705558e-06, "loss": 0.0598, "step": 19770 }, { "epoch": 0.16004531110931305, "grad_norm": 1.4761674404144287, "learning_rate": 8.002265555465654e-06, "loss": 0.0562, "step": 19780 }, { "epoch": 0.16012622380451494, "grad_norm": 0.8014826774597168, "learning_rate": 8.006311190225748e-06, "loss": 0.0437, "step": 19790 }, { "epoch": 0.1602071364997168, "grad_norm": 0.9319077730178833, "learning_rate": 8.010356824985842e-06, "loss": 0.0732, "step": 19800 }, { "epoch": 0.16028804919491868, "grad_norm": 0.9196180105209351, "learning_rate": 8.014402459745934e-06, "loss": 0.0829, "step": 19810 }, { "epoch": 0.16036896189012056, "grad_norm": 1.0535808801651, "learning_rate": 8.018448094506028e-06, "loss": 0.0424, "step": 19820 }, { "epoch": 0.16044987458532245, "grad_norm": 0.8462996482849121, "learning_rate": 8.022493729266122e-06, "loss": 0.0679, "step": 19830 }, { "epoch": 0.1605307872805243, "grad_norm": 1.0265235900878906, "learning_rate": 8.026539364026218e-06, "loss": 0.0643, "step": 19840 }, { "epoch": 0.1606116999757262, "grad_norm": 0.773690938949585, "learning_rate": 8.03058499878631e-06, "loss": 0.062, "step": 19850 }, { "epoch": 0.16069261267092808, "grad_norm": 0.8928182721138, "learning_rate": 8.034630633546404e-06, "loss": 0.0639, "step": 19860 }, { "epoch": 0.16077352536612993, "grad_norm": 0.6016058325767517, "learning_rate": 8.038676268306498e-06, "loss": 0.0604, "step": 19870 }, { "epoch": 0.16085443806133182, "grad_norm": 0.8465270400047302, "learning_rate": 8.042721903066592e-06, "loss": 0.0485, "step": 19880 }, { "epoch": 0.1609353507565337, "grad_norm": 0.6712111234664917, "learning_rate": 8.046767537826686e-06, "loss": 0.0622, "step": 19890 }, { "epoch": 0.1610162634517356, "grad_norm": 1.1336520910263062, "learning_rate": 8.05081317258678e-06, "loss": 0.0786, "step": 19900 }, { "epoch": 0.16109717614693744, "grad_norm": 0.9201558232307434, "learning_rate": 8.054858807346873e-06, "loss": 0.0429, "step": 19910 }, { "epoch": 0.16117808884213933, "grad_norm": 0.676886796951294, "learning_rate": 8.058904442106967e-06, "loss": 0.0439, "step": 19920 }, { "epoch": 0.1612590015373412, "grad_norm": 0.8849956393241882, "learning_rate": 8.062950076867061e-06, "loss": 0.0515, "step": 19930 }, { "epoch": 0.1613399142325431, "grad_norm": 0.6373624801635742, "learning_rate": 8.066995711627155e-06, "loss": 0.06, "step": 19940 }, { "epoch": 0.16142082692774495, "grad_norm": 0.7011350393295288, "learning_rate": 8.071041346387248e-06, "loss": 0.0673, "step": 19950 }, { "epoch": 0.16150173962294684, "grad_norm": 0.9786041378974915, "learning_rate": 8.075086981147343e-06, "loss": 0.0585, "step": 19960 }, { "epoch": 0.16158265231814872, "grad_norm": 0.7295734286308289, "learning_rate": 8.079132615907437e-06, "loss": 0.0636, "step": 19970 }, { "epoch": 0.1616635650133506, "grad_norm": 1.2340110540390015, "learning_rate": 8.083178250667531e-06, "loss": 0.0629, "step": 19980 }, { "epoch": 0.16174447770855246, "grad_norm": 0.9395706057548523, "learning_rate": 8.087223885427625e-06, "loss": 0.0502, "step": 19990 }, { "epoch": 0.16182539040375435, "grad_norm": 0.6075953245162964, "learning_rate": 8.091269520187717e-06, "loss": 0.0581, "step": 20000 }, { "epoch": 0.16190630309895623, "grad_norm": 0.7650282979011536, "learning_rate": 8.095315154947811e-06, "loss": 0.052, "step": 20010 }, { "epoch": 0.1619872157941581, "grad_norm": 0.8829438090324402, "learning_rate": 8.099360789707907e-06, "loss": 0.0578, "step": 20020 }, { "epoch": 0.16206812848935997, "grad_norm": 0.8356672525405884, "learning_rate": 8.103406424468e-06, "loss": 0.0546, "step": 20030 }, { "epoch": 0.16214904118456186, "grad_norm": 1.264543890953064, "learning_rate": 8.107452059228093e-06, "loss": 0.0613, "step": 20040 }, { "epoch": 0.16222995387976374, "grad_norm": 0.9599885940551758, "learning_rate": 8.111497693988187e-06, "loss": 0.0548, "step": 20050 }, { "epoch": 0.1623108665749656, "grad_norm": 0.6562296748161316, "learning_rate": 8.115543328748281e-06, "loss": 0.0655, "step": 20060 }, { "epoch": 0.16239177927016749, "grad_norm": 0.6710636615753174, "learning_rate": 8.119588963508375e-06, "loss": 0.0559, "step": 20070 }, { "epoch": 0.16247269196536937, "grad_norm": 0.7705518007278442, "learning_rate": 8.123634598268469e-06, "loss": 0.0477, "step": 20080 }, { "epoch": 0.16255360466057125, "grad_norm": 0.6150139570236206, "learning_rate": 8.127680233028563e-06, "loss": 0.0496, "step": 20090 }, { "epoch": 0.1626345173557731, "grad_norm": 0.8717988729476929, "learning_rate": 8.131725867788657e-06, "loss": 0.0526, "step": 20100 }, { "epoch": 0.162715430050975, "grad_norm": 1.1219497919082642, "learning_rate": 8.13577150254875e-06, "loss": 0.0498, "step": 20110 }, { "epoch": 0.16279634274617688, "grad_norm": 1.163527488708496, "learning_rate": 8.139817137308845e-06, "loss": 0.0909, "step": 20120 }, { "epoch": 0.16287725544137877, "grad_norm": 0.9680331945419312, "learning_rate": 8.143862772068938e-06, "loss": 0.0604, "step": 20130 }, { "epoch": 0.16295816813658062, "grad_norm": 0.9658838510513306, "learning_rate": 8.147908406829032e-06, "loss": 0.0665, "step": 20140 }, { "epoch": 0.1630390808317825, "grad_norm": 0.920851469039917, "learning_rate": 8.151954041589126e-06, "loss": 0.0633, "step": 20150 }, { "epoch": 0.1631199935269844, "grad_norm": 0.8658110499382019, "learning_rate": 8.15599967634922e-06, "loss": 0.0494, "step": 20160 }, { "epoch": 0.16320090622218625, "grad_norm": 0.6297053098678589, "learning_rate": 8.160045311109314e-06, "loss": 0.0571, "step": 20170 }, { "epoch": 0.16328181891738813, "grad_norm": 1.1791285276412964, "learning_rate": 8.164090945869408e-06, "loss": 0.0483, "step": 20180 }, { "epoch": 0.16336273161259002, "grad_norm": 1.4502755403518677, "learning_rate": 8.1681365806295e-06, "loss": 0.0606, "step": 20190 }, { "epoch": 0.1634436443077919, "grad_norm": 0.659234881401062, "learning_rate": 8.172182215389596e-06, "loss": 0.0382, "step": 20200 }, { "epoch": 0.16352455700299376, "grad_norm": 0.8241320848464966, "learning_rate": 8.17622785014969e-06, "loss": 0.0525, "step": 20210 }, { "epoch": 0.16360546969819564, "grad_norm": 1.220990777015686, "learning_rate": 8.180273484909784e-06, "loss": 0.072, "step": 20220 }, { "epoch": 0.16368638239339753, "grad_norm": 0.8687805533409119, "learning_rate": 8.184319119669876e-06, "loss": 0.0479, "step": 20230 }, { "epoch": 0.1637672950885994, "grad_norm": 1.7033771276474, "learning_rate": 8.18836475442997e-06, "loss": 0.0736, "step": 20240 }, { "epoch": 0.16384820778380127, "grad_norm": 0.7567147016525269, "learning_rate": 8.192410389190064e-06, "loss": 0.0523, "step": 20250 }, { "epoch": 0.16392912047900315, "grad_norm": 0.43349480628967285, "learning_rate": 8.19645602395016e-06, "loss": 0.0553, "step": 20260 }, { "epoch": 0.16401003317420504, "grad_norm": 0.6464446187019348, "learning_rate": 8.200501658710252e-06, "loss": 0.0373, "step": 20270 }, { "epoch": 0.16409094586940692, "grad_norm": 0.949047327041626, "learning_rate": 8.204547293470346e-06, "loss": 0.067, "step": 20280 }, { "epoch": 0.16417185856460878, "grad_norm": 1.556719183921814, "learning_rate": 8.20859292823044e-06, "loss": 0.038, "step": 20290 }, { "epoch": 0.16425277125981066, "grad_norm": 0.62428879737854, "learning_rate": 8.212638562990534e-06, "loss": 0.0649, "step": 20300 }, { "epoch": 0.16433368395501255, "grad_norm": 1.127492904663086, "learning_rate": 8.216684197750628e-06, "loss": 0.0691, "step": 20310 }, { "epoch": 0.1644145966502144, "grad_norm": 0.6934379935264587, "learning_rate": 8.220729832510722e-06, "loss": 0.0358, "step": 20320 }, { "epoch": 0.1644955093454163, "grad_norm": 1.0548794269561768, "learning_rate": 8.224775467270816e-06, "loss": 0.0539, "step": 20330 }, { "epoch": 0.16457642204061818, "grad_norm": 0.5936501622200012, "learning_rate": 8.22882110203091e-06, "loss": 0.0539, "step": 20340 }, { "epoch": 0.16465733473582006, "grad_norm": 1.1102160215377808, "learning_rate": 8.232866736791003e-06, "loss": 0.0684, "step": 20350 }, { "epoch": 0.16473824743102192, "grad_norm": 0.8299624919891357, "learning_rate": 8.236912371551097e-06, "loss": 0.0452, "step": 20360 }, { "epoch": 0.1648191601262238, "grad_norm": 0.6449360847473145, "learning_rate": 8.240958006311191e-06, "loss": 0.0563, "step": 20370 }, { "epoch": 0.16490007282142569, "grad_norm": 0.6281391382217407, "learning_rate": 8.245003641071285e-06, "loss": 0.0438, "step": 20380 }, { "epoch": 0.16498098551662757, "grad_norm": 0.7907322645187378, "learning_rate": 8.24904927583138e-06, "loss": 0.0397, "step": 20390 }, { "epoch": 0.16506189821182943, "grad_norm": 0.5747731924057007, "learning_rate": 8.253094910591473e-06, "loss": 0.0615, "step": 20400 }, { "epoch": 0.1651428109070313, "grad_norm": 0.8236907720565796, "learning_rate": 8.257140545351567e-06, "loss": 0.0433, "step": 20410 }, { "epoch": 0.1652237236022332, "grad_norm": 0.35188165307044983, "learning_rate": 8.26118618011166e-06, "loss": 0.0321, "step": 20420 }, { "epoch": 0.16530463629743508, "grad_norm": 0.936817467212677, "learning_rate": 8.265231814871753e-06, "loss": 0.0479, "step": 20430 }, { "epoch": 0.16538554899263694, "grad_norm": 0.8479475378990173, "learning_rate": 8.269277449631849e-06, "loss": 0.0646, "step": 20440 }, { "epoch": 0.16546646168783882, "grad_norm": 0.7845199704170227, "learning_rate": 8.273323084391943e-06, "loss": 0.0479, "step": 20450 }, { "epoch": 0.1655473743830407, "grad_norm": 0.4159863591194153, "learning_rate": 8.277368719152035e-06, "loss": 0.037, "step": 20460 }, { "epoch": 0.16562828707824256, "grad_norm": 1.438148856163025, "learning_rate": 8.281414353912129e-06, "loss": 0.0694, "step": 20470 }, { "epoch": 0.16570919977344445, "grad_norm": 0.770673394203186, "learning_rate": 8.285459988672223e-06, "loss": 0.0549, "step": 20480 }, { "epoch": 0.16579011246864633, "grad_norm": 1.0347689390182495, "learning_rate": 8.289505623432317e-06, "loss": 0.0794, "step": 20490 }, { "epoch": 0.16587102516384822, "grad_norm": 0.6809335350990295, "learning_rate": 8.29355125819241e-06, "loss": 0.0424, "step": 20500 }, { "epoch": 0.16595193785905007, "grad_norm": 0.6736905574798584, "learning_rate": 8.297596892952505e-06, "loss": 0.0838, "step": 20510 }, { "epoch": 0.16603285055425196, "grad_norm": 0.6150520443916321, "learning_rate": 8.301642527712599e-06, "loss": 0.0575, "step": 20520 }, { "epoch": 0.16611376324945384, "grad_norm": 0.7576379179954529, "learning_rate": 8.305688162472693e-06, "loss": 0.0648, "step": 20530 }, { "epoch": 0.16619467594465573, "grad_norm": 0.9655968546867371, "learning_rate": 8.309733797232787e-06, "loss": 0.0599, "step": 20540 }, { "epoch": 0.16627558863985759, "grad_norm": 0.9560700058937073, "learning_rate": 8.31377943199288e-06, "loss": 0.0778, "step": 20550 }, { "epoch": 0.16635650133505947, "grad_norm": 0.6492120027542114, "learning_rate": 8.317825066752974e-06, "loss": 0.0492, "step": 20560 }, { "epoch": 0.16643741403026135, "grad_norm": 1.1603496074676514, "learning_rate": 8.321870701513068e-06, "loss": 0.0672, "step": 20570 }, { "epoch": 0.1665183267254632, "grad_norm": 0.6700226664543152, "learning_rate": 8.325916336273162e-06, "loss": 0.0289, "step": 20580 }, { "epoch": 0.1665992394206651, "grad_norm": 1.0642365217208862, "learning_rate": 8.329961971033256e-06, "loss": 0.0661, "step": 20590 }, { "epoch": 0.16668015211586698, "grad_norm": 0.4606260359287262, "learning_rate": 8.33400760579335e-06, "loss": 0.053, "step": 20600 }, { "epoch": 0.16676106481106887, "grad_norm": 0.8108723759651184, "learning_rate": 8.338053240553442e-06, "loss": 0.043, "step": 20610 }, { "epoch": 0.16684197750627072, "grad_norm": 1.1730448007583618, "learning_rate": 8.342098875313538e-06, "loss": 0.0685, "step": 20620 }, { "epoch": 0.1669228902014726, "grad_norm": 0.536375105381012, "learning_rate": 8.346144510073632e-06, "loss": 0.0542, "step": 20630 }, { "epoch": 0.1670038028966745, "grad_norm": 0.4937063753604889, "learning_rate": 8.350190144833726e-06, "loss": 0.0443, "step": 20640 }, { "epoch": 0.16708471559187638, "grad_norm": 1.4101903438568115, "learning_rate": 8.354235779593818e-06, "loss": 0.044, "step": 20650 }, { "epoch": 0.16716562828707823, "grad_norm": 0.34743639826774597, "learning_rate": 8.358281414353912e-06, "loss": 0.0514, "step": 20660 }, { "epoch": 0.16724654098228012, "grad_norm": 0.2541548013687134, "learning_rate": 8.362327049114006e-06, "loss": 0.0423, "step": 20670 }, { "epoch": 0.167327453677482, "grad_norm": 0.8076031804084778, "learning_rate": 8.366372683874102e-06, "loss": 0.0577, "step": 20680 }, { "epoch": 0.1674083663726839, "grad_norm": 0.8759945034980774, "learning_rate": 8.370418318634194e-06, "loss": 0.0551, "step": 20690 }, { "epoch": 0.16748927906788574, "grad_norm": 0.362179696559906, "learning_rate": 8.374463953394288e-06, "loss": 0.0451, "step": 20700 }, { "epoch": 0.16757019176308763, "grad_norm": 1.210174798965454, "learning_rate": 8.378509588154382e-06, "loss": 0.0478, "step": 20710 }, { "epoch": 0.1676511044582895, "grad_norm": 0.5415124893188477, "learning_rate": 8.382555222914476e-06, "loss": 0.0606, "step": 20720 }, { "epoch": 0.16773201715349137, "grad_norm": 1.5242518186569214, "learning_rate": 8.38660085767457e-06, "loss": 0.0525, "step": 20730 }, { "epoch": 0.16781292984869325, "grad_norm": 0.9336087107658386, "learning_rate": 8.390646492434664e-06, "loss": 0.0587, "step": 20740 }, { "epoch": 0.16789384254389514, "grad_norm": 0.9916934370994568, "learning_rate": 8.394692127194758e-06, "loss": 0.0781, "step": 20750 }, { "epoch": 0.16797475523909702, "grad_norm": 1.2701892852783203, "learning_rate": 8.398737761954852e-06, "loss": 0.0639, "step": 20760 }, { "epoch": 0.16805566793429888, "grad_norm": 0.3656611144542694, "learning_rate": 8.402783396714946e-06, "loss": 0.0539, "step": 20770 }, { "epoch": 0.16813658062950076, "grad_norm": 0.5818819403648376, "learning_rate": 8.40682903147504e-06, "loss": 0.0568, "step": 20780 }, { "epoch": 0.16821749332470265, "grad_norm": 0.9395386576652527, "learning_rate": 8.410874666235133e-06, "loss": 0.0436, "step": 20790 }, { "epoch": 0.16829840601990453, "grad_norm": 0.8000349998474121, "learning_rate": 8.414920300995227e-06, "loss": 0.0611, "step": 20800 }, { "epoch": 0.1683793187151064, "grad_norm": 0.8235951662063599, "learning_rate": 8.418965935755321e-06, "loss": 0.0461, "step": 20810 }, { "epoch": 0.16846023141030828, "grad_norm": 0.5683212876319885, "learning_rate": 8.423011570515415e-06, "loss": 0.0597, "step": 20820 }, { "epoch": 0.16854114410551016, "grad_norm": 1.8377703428268433, "learning_rate": 8.42705720527551e-06, "loss": 0.0455, "step": 20830 }, { "epoch": 0.16862205680071204, "grad_norm": 0.7255060076713562, "learning_rate": 8.431102840035601e-06, "loss": 0.046, "step": 20840 }, { "epoch": 0.1687029694959139, "grad_norm": 0.3003844618797302, "learning_rate": 8.435148474795695e-06, "loss": 0.0438, "step": 20850 }, { "epoch": 0.16878388219111579, "grad_norm": 0.6441047191619873, "learning_rate": 8.439194109555791e-06, "loss": 0.0691, "step": 20860 }, { "epoch": 0.16886479488631767, "grad_norm": 0.9597476720809937, "learning_rate": 8.443239744315885e-06, "loss": 0.0571, "step": 20870 }, { "epoch": 0.16894570758151953, "grad_norm": 0.8296467661857605, "learning_rate": 8.447285379075977e-06, "loss": 0.0683, "step": 20880 }, { "epoch": 0.1690266202767214, "grad_norm": 1.1363019943237305, "learning_rate": 8.451331013836071e-06, "loss": 0.0576, "step": 20890 }, { "epoch": 0.1691075329719233, "grad_norm": 1.0583949089050293, "learning_rate": 8.455376648596165e-06, "loss": 0.0393, "step": 20900 }, { "epoch": 0.16918844566712518, "grad_norm": 0.7752929329872131, "learning_rate": 8.459422283356259e-06, "loss": 0.0508, "step": 20910 }, { "epoch": 0.16926935836232704, "grad_norm": 0.6317596435546875, "learning_rate": 8.463467918116353e-06, "loss": 0.0537, "step": 20920 }, { "epoch": 0.16935027105752892, "grad_norm": 1.2129961252212524, "learning_rate": 8.467513552876447e-06, "loss": 0.0533, "step": 20930 }, { "epoch": 0.1694311837527308, "grad_norm": 0.8468016386032104, "learning_rate": 8.47155918763654e-06, "loss": 0.0619, "step": 20940 }, { "epoch": 0.1695120964479327, "grad_norm": 0.7795577645301819, "learning_rate": 8.475604822396635e-06, "loss": 0.0552, "step": 20950 }, { "epoch": 0.16959300914313455, "grad_norm": 0.9477965831756592, "learning_rate": 8.479650457156729e-06, "loss": 0.0485, "step": 20960 }, { "epoch": 0.16967392183833643, "grad_norm": 0.5267478823661804, "learning_rate": 8.483696091916823e-06, "loss": 0.0488, "step": 20970 }, { "epoch": 0.16975483453353832, "grad_norm": 0.9229347109794617, "learning_rate": 8.487741726676917e-06, "loss": 0.0682, "step": 20980 }, { "epoch": 0.1698357472287402, "grad_norm": 0.5379460453987122, "learning_rate": 8.49178736143701e-06, "loss": 0.0581, "step": 20990 }, { "epoch": 0.16991665992394206, "grad_norm": 0.9565072059631348, "learning_rate": 8.495832996197104e-06, "loss": 0.0401, "step": 21000 }, { "epoch": 0.16999757261914394, "grad_norm": 1.1708498001098633, "learning_rate": 8.499878630957198e-06, "loss": 0.0545, "step": 21010 }, { "epoch": 0.17007848531434583, "grad_norm": 1.0633909702301025, "learning_rate": 8.503924265717292e-06, "loss": 0.0529, "step": 21020 }, { "epoch": 0.17015939800954769, "grad_norm": 0.4807872474193573, "learning_rate": 8.507969900477385e-06, "loss": 0.0336, "step": 21030 }, { "epoch": 0.17024031070474957, "grad_norm": 0.737000048160553, "learning_rate": 8.512015535237479e-06, "loss": 0.05, "step": 21040 }, { "epoch": 0.17032122339995145, "grad_norm": 1.1836826801300049, "learning_rate": 8.516061169997574e-06, "loss": 0.0457, "step": 21050 }, { "epoch": 0.17040213609515334, "grad_norm": 0.9524039030075073, "learning_rate": 8.520106804757668e-06, "loss": 0.0582, "step": 21060 }, { "epoch": 0.1704830487903552, "grad_norm": 0.61459881067276, "learning_rate": 8.52415243951776e-06, "loss": 0.0436, "step": 21070 }, { "epoch": 0.17056396148555708, "grad_norm": 1.1256312131881714, "learning_rate": 8.528198074277854e-06, "loss": 0.0549, "step": 21080 }, { "epoch": 0.17064487418075897, "grad_norm": 0.12583178281784058, "learning_rate": 8.532243709037948e-06, "loss": 0.0599, "step": 21090 }, { "epoch": 0.17072578687596085, "grad_norm": 1.4517320394515991, "learning_rate": 8.536289343798042e-06, "loss": 0.0614, "step": 21100 }, { "epoch": 0.1708066995711627, "grad_norm": 0.32901012897491455, "learning_rate": 8.540334978558136e-06, "loss": 0.0482, "step": 21110 }, { "epoch": 0.1708876122663646, "grad_norm": 1.169837474822998, "learning_rate": 8.54438061331823e-06, "loss": 0.0651, "step": 21120 }, { "epoch": 0.17096852496156648, "grad_norm": 0.6329976916313171, "learning_rate": 8.548426248078324e-06, "loss": 0.0541, "step": 21130 }, { "epoch": 0.17104943765676836, "grad_norm": 0.8479434251785278, "learning_rate": 8.552471882838418e-06, "loss": 0.0524, "step": 21140 }, { "epoch": 0.17113035035197022, "grad_norm": 0.6718777418136597, "learning_rate": 8.556517517598512e-06, "loss": 0.0529, "step": 21150 }, { "epoch": 0.1712112630471721, "grad_norm": 0.7085447311401367, "learning_rate": 8.560563152358606e-06, "loss": 0.0488, "step": 21160 }, { "epoch": 0.171292175742374, "grad_norm": 1.0072979927062988, "learning_rate": 8.5646087871187e-06, "loss": 0.0539, "step": 21170 }, { "epoch": 0.17137308843757584, "grad_norm": 1.065184235572815, "learning_rate": 8.568654421878794e-06, "loss": 0.0435, "step": 21180 }, { "epoch": 0.17145400113277773, "grad_norm": 1.1240841150283813, "learning_rate": 8.572700056638888e-06, "loss": 0.0523, "step": 21190 }, { "epoch": 0.1715349138279796, "grad_norm": 0.82747483253479, "learning_rate": 8.576745691398982e-06, "loss": 0.0513, "step": 21200 }, { "epoch": 0.1716158265231815, "grad_norm": 0.9751425385475159, "learning_rate": 8.580791326159075e-06, "loss": 0.0869, "step": 21210 }, { "epoch": 0.17169673921838335, "grad_norm": 0.6233755350112915, "learning_rate": 8.584836960919168e-06, "loss": 0.0421, "step": 21220 }, { "epoch": 0.17177765191358524, "grad_norm": 1.0922540426254272, "learning_rate": 8.588882595679263e-06, "loss": 0.0783, "step": 21230 }, { "epoch": 0.17185856460878712, "grad_norm": 0.7915715575218201, "learning_rate": 8.592928230439357e-06, "loss": 0.0611, "step": 21240 }, { "epoch": 0.171939477303989, "grad_norm": 0.8987292647361755, "learning_rate": 8.596973865199451e-06, "loss": 0.0602, "step": 21250 }, { "epoch": 0.17202038999919086, "grad_norm": 0.5677723288536072, "learning_rate": 8.601019499959543e-06, "loss": 0.0889, "step": 21260 }, { "epoch": 0.17210130269439275, "grad_norm": 0.9623451828956604, "learning_rate": 8.605065134719637e-06, "loss": 0.0421, "step": 21270 }, { "epoch": 0.17218221538959463, "grad_norm": 0.9208790063858032, "learning_rate": 8.609110769479731e-06, "loss": 0.0567, "step": 21280 }, { "epoch": 0.1722631280847965, "grad_norm": 0.8940656781196594, "learning_rate": 8.613156404239827e-06, "loss": 0.0362, "step": 21290 }, { "epoch": 0.17234404077999838, "grad_norm": 0.652341902256012, "learning_rate": 8.61720203899992e-06, "loss": 0.0679, "step": 21300 }, { "epoch": 0.17242495347520026, "grad_norm": 0.8416263461112976, "learning_rate": 8.621247673760013e-06, "loss": 0.0612, "step": 21310 }, { "epoch": 0.17250586617040214, "grad_norm": 0.7015694975852966, "learning_rate": 8.625293308520107e-06, "loss": 0.0489, "step": 21320 }, { "epoch": 0.172586778865604, "grad_norm": 0.8051696419715881, "learning_rate": 8.629338943280201e-06, "loss": 0.0616, "step": 21330 }, { "epoch": 0.17266769156080589, "grad_norm": 0.8404372930526733, "learning_rate": 8.633384578040295e-06, "loss": 0.0392, "step": 21340 }, { "epoch": 0.17274860425600777, "grad_norm": 0.6876725554466248, "learning_rate": 8.637430212800389e-06, "loss": 0.0808, "step": 21350 }, { "epoch": 0.17282951695120966, "grad_norm": 0.3590717315673828, "learning_rate": 8.641475847560483e-06, "loss": 0.0368, "step": 21360 }, { "epoch": 0.1729104296464115, "grad_norm": 0.8409148454666138, "learning_rate": 8.645521482320577e-06, "loss": 0.0412, "step": 21370 }, { "epoch": 0.1729913423416134, "grad_norm": 1.3070988655090332, "learning_rate": 8.64956711708067e-06, "loss": 0.0443, "step": 21380 }, { "epoch": 0.17307225503681528, "grad_norm": 0.6406136155128479, "learning_rate": 8.653612751840765e-06, "loss": 0.0478, "step": 21390 }, { "epoch": 0.17315316773201717, "grad_norm": 0.9345735311508179, "learning_rate": 8.657658386600859e-06, "loss": 0.0497, "step": 21400 }, { "epoch": 0.17323408042721902, "grad_norm": 0.5276831984519958, "learning_rate": 8.661704021360953e-06, "loss": 0.0552, "step": 21410 }, { "epoch": 0.1733149931224209, "grad_norm": 1.4383671283721924, "learning_rate": 8.665749656121047e-06, "loss": 0.0756, "step": 21420 }, { "epoch": 0.1733959058176228, "grad_norm": 0.9489129781723022, "learning_rate": 8.66979529088114e-06, "loss": 0.0487, "step": 21430 }, { "epoch": 0.17347681851282465, "grad_norm": 1.2131052017211914, "learning_rate": 8.673840925641234e-06, "loss": 0.0653, "step": 21440 }, { "epoch": 0.17355773120802653, "grad_norm": 0.7046981453895569, "learning_rate": 8.677886560401327e-06, "loss": 0.0722, "step": 21450 }, { "epoch": 0.17363864390322842, "grad_norm": 0.7868642807006836, "learning_rate": 8.68193219516142e-06, "loss": 0.0456, "step": 21460 }, { "epoch": 0.1737195565984303, "grad_norm": 0.7846167087554932, "learning_rate": 8.685977829921516e-06, "loss": 0.0547, "step": 21470 }, { "epoch": 0.17380046929363216, "grad_norm": 0.8933252692222595, "learning_rate": 8.69002346468161e-06, "loss": 0.0541, "step": 21480 }, { "epoch": 0.17388138198883404, "grad_norm": 1.1716773509979248, "learning_rate": 8.694069099441702e-06, "loss": 0.0479, "step": 21490 }, { "epoch": 0.17396229468403593, "grad_norm": 1.3231199979782104, "learning_rate": 8.698114734201796e-06, "loss": 0.0751, "step": 21500 }, { "epoch": 0.1740432073792378, "grad_norm": 2.894442558288574, "learning_rate": 8.70216036896189e-06, "loss": 0.0608, "step": 21510 }, { "epoch": 0.17412412007443967, "grad_norm": 1.4249680042266846, "learning_rate": 8.706206003721984e-06, "loss": 0.0694, "step": 21520 }, { "epoch": 0.17420503276964155, "grad_norm": 1.40652596950531, "learning_rate": 8.710251638482078e-06, "loss": 0.0598, "step": 21530 }, { "epoch": 0.17428594546484344, "grad_norm": 0.9152050614356995, "learning_rate": 8.714297273242172e-06, "loss": 0.0452, "step": 21540 }, { "epoch": 0.17436685816004532, "grad_norm": 0.6582836508750916, "learning_rate": 8.718342908002266e-06, "loss": 0.07, "step": 21550 }, { "epoch": 0.17444777085524718, "grad_norm": 0.4769841730594635, "learning_rate": 8.72238854276236e-06, "loss": 0.0375, "step": 21560 }, { "epoch": 0.17452868355044907, "grad_norm": 0.6669135689735413, "learning_rate": 8.726434177522454e-06, "loss": 0.0423, "step": 21570 }, { "epoch": 0.17460959624565095, "grad_norm": 0.45039868354797363, "learning_rate": 8.730479812282548e-06, "loss": 0.0544, "step": 21580 }, { "epoch": 0.1746905089408528, "grad_norm": 0.9606529474258423, "learning_rate": 8.734525447042642e-06, "loss": 0.061, "step": 21590 }, { "epoch": 0.1747714216360547, "grad_norm": 0.766740620136261, "learning_rate": 8.738571081802736e-06, "loss": 0.0486, "step": 21600 }, { "epoch": 0.17485233433125658, "grad_norm": 0.9670313596725464, "learning_rate": 8.74261671656283e-06, "loss": 0.0658, "step": 21610 }, { "epoch": 0.17493324702645846, "grad_norm": 0.5189354419708252, "learning_rate": 8.746662351322924e-06, "loss": 0.0373, "step": 21620 }, { "epoch": 0.17501415972166032, "grad_norm": 1.2554373741149902, "learning_rate": 8.750707986083018e-06, "loss": 0.0467, "step": 21630 }, { "epoch": 0.1750950724168622, "grad_norm": 0.6378191709518433, "learning_rate": 8.75475362084311e-06, "loss": 0.0512, "step": 21640 }, { "epoch": 0.1751759851120641, "grad_norm": 0.9227825999259949, "learning_rate": 8.758799255603205e-06, "loss": 0.0584, "step": 21650 }, { "epoch": 0.17525689780726597, "grad_norm": 0.8859764933586121, "learning_rate": 8.7628448903633e-06, "loss": 0.0724, "step": 21660 }, { "epoch": 0.17533781050246783, "grad_norm": 0.6635335683822632, "learning_rate": 8.766890525123393e-06, "loss": 0.0516, "step": 21670 }, { "epoch": 0.1754187231976697, "grad_norm": 0.8639729022979736, "learning_rate": 8.770936159883486e-06, "loss": 0.0525, "step": 21680 }, { "epoch": 0.1754996358928716, "grad_norm": 1.2058969736099243, "learning_rate": 8.77498179464358e-06, "loss": 0.0464, "step": 21690 }, { "epoch": 0.17558054858807348, "grad_norm": 0.604248583316803, "learning_rate": 8.779027429403673e-06, "loss": 0.0458, "step": 21700 }, { "epoch": 0.17566146128327534, "grad_norm": 0.8424373269081116, "learning_rate": 8.783073064163769e-06, "loss": 0.0664, "step": 21710 }, { "epoch": 0.17574237397847722, "grad_norm": 0.5488427877426147, "learning_rate": 8.787118698923861e-06, "loss": 0.0386, "step": 21720 }, { "epoch": 0.1758232866736791, "grad_norm": 0.7389363050460815, "learning_rate": 8.791164333683955e-06, "loss": 0.0398, "step": 21730 }, { "epoch": 0.17590419936888096, "grad_norm": 1.6904224157333374, "learning_rate": 8.79520996844405e-06, "loss": 0.0549, "step": 21740 }, { "epoch": 0.17598511206408285, "grad_norm": 0.890625, "learning_rate": 8.799255603204143e-06, "loss": 0.0393, "step": 21750 }, { "epoch": 0.17606602475928473, "grad_norm": 0.5782867670059204, "learning_rate": 8.803301237964237e-06, "loss": 0.0659, "step": 21760 }, { "epoch": 0.17614693745448662, "grad_norm": 0.6789834499359131, "learning_rate": 8.807346872724331e-06, "loss": 0.0533, "step": 21770 }, { "epoch": 0.17622785014968848, "grad_norm": 0.9652461409568787, "learning_rate": 8.811392507484425e-06, "loss": 0.0671, "step": 21780 }, { "epoch": 0.17630876284489036, "grad_norm": 0.8323616981506348, "learning_rate": 8.815438142244519e-06, "loss": 0.0666, "step": 21790 }, { "epoch": 0.17638967554009224, "grad_norm": 0.9870997071266174, "learning_rate": 8.819483777004613e-06, "loss": 0.05, "step": 21800 }, { "epoch": 0.17647058823529413, "grad_norm": 0.7919730544090271, "learning_rate": 8.823529411764707e-06, "loss": 0.0474, "step": 21810 }, { "epoch": 0.17655150093049599, "grad_norm": 1.2865402698516846, "learning_rate": 8.8275750465248e-06, "loss": 0.0438, "step": 21820 }, { "epoch": 0.17663241362569787, "grad_norm": 0.5695391893386841, "learning_rate": 8.831620681284895e-06, "loss": 0.0527, "step": 21830 }, { "epoch": 0.17671332632089976, "grad_norm": 0.3772773742675781, "learning_rate": 8.835666316044989e-06, "loss": 0.0372, "step": 21840 }, { "epoch": 0.17679423901610164, "grad_norm": 0.9750086665153503, "learning_rate": 8.839711950805083e-06, "loss": 0.0445, "step": 21850 }, { "epoch": 0.1768751517113035, "grad_norm": 0.7294058799743652, "learning_rate": 8.843757585565177e-06, "loss": 0.0798, "step": 21860 }, { "epoch": 0.17695606440650538, "grad_norm": 1.1373305320739746, "learning_rate": 8.847803220325269e-06, "loss": 0.0586, "step": 21870 }, { "epoch": 0.17703697710170727, "grad_norm": 1.138804316520691, "learning_rate": 8.851848855085363e-06, "loss": 0.0756, "step": 21880 }, { "epoch": 0.17711788979690912, "grad_norm": 0.4378642141819, "learning_rate": 8.855894489845458e-06, "loss": 0.0554, "step": 21890 }, { "epoch": 0.177198802492111, "grad_norm": 0.7165071368217468, "learning_rate": 8.859940124605552e-06, "loss": 0.0492, "step": 21900 }, { "epoch": 0.1772797151873129, "grad_norm": 0.5248031616210938, "learning_rate": 8.863985759365645e-06, "loss": 0.0386, "step": 21910 }, { "epoch": 0.17736062788251478, "grad_norm": 1.0484633445739746, "learning_rate": 8.868031394125738e-06, "loss": 0.066, "step": 21920 }, { "epoch": 0.17744154057771663, "grad_norm": 0.4757148027420044, "learning_rate": 8.872077028885832e-06, "loss": 0.0451, "step": 21930 }, { "epoch": 0.17752245327291852, "grad_norm": 0.7355186939239502, "learning_rate": 8.876122663645926e-06, "loss": 0.0475, "step": 21940 }, { "epoch": 0.1776033659681204, "grad_norm": 0.5245320200920105, "learning_rate": 8.880168298406022e-06, "loss": 0.0287, "step": 21950 }, { "epoch": 0.1776842786633223, "grad_norm": 0.7959167957305908, "learning_rate": 8.884213933166114e-06, "loss": 0.0548, "step": 21960 }, { "epoch": 0.17776519135852414, "grad_norm": 0.44327107071876526, "learning_rate": 8.888259567926208e-06, "loss": 0.0476, "step": 21970 }, { "epoch": 0.17784610405372603, "grad_norm": 1.1930372714996338, "learning_rate": 8.892305202686302e-06, "loss": 0.0649, "step": 21980 }, { "epoch": 0.1779270167489279, "grad_norm": 0.8333078026771545, "learning_rate": 8.896350837446396e-06, "loss": 0.0457, "step": 21990 }, { "epoch": 0.1780079294441298, "grad_norm": 0.9276374578475952, "learning_rate": 8.90039647220649e-06, "loss": 0.0745, "step": 22000 }, { "epoch": 0.17808884213933165, "grad_norm": 0.7908908128738403, "learning_rate": 8.904442106966584e-06, "loss": 0.0429, "step": 22010 }, { "epoch": 0.17816975483453354, "grad_norm": 1.629086971282959, "learning_rate": 8.908487741726678e-06, "loss": 0.0687, "step": 22020 }, { "epoch": 0.17825066752973542, "grad_norm": 0.4020514190196991, "learning_rate": 8.912533376486772e-06, "loss": 0.0444, "step": 22030 }, { "epoch": 0.17833158022493728, "grad_norm": 0.6562597155570984, "learning_rate": 8.916579011246866e-06, "loss": 0.0718, "step": 22040 }, { "epoch": 0.17841249292013917, "grad_norm": 0.8533996343612671, "learning_rate": 8.92062464600696e-06, "loss": 0.0831, "step": 22050 }, { "epoch": 0.17849340561534105, "grad_norm": 0.9319466352462769, "learning_rate": 8.924670280767052e-06, "loss": 0.0518, "step": 22060 }, { "epoch": 0.17857431831054293, "grad_norm": 1.1015825271606445, "learning_rate": 8.928715915527148e-06, "loss": 0.0662, "step": 22070 }, { "epoch": 0.1786552310057448, "grad_norm": 0.941718578338623, "learning_rate": 8.932761550287241e-06, "loss": 0.0818, "step": 22080 }, { "epoch": 0.17873614370094668, "grad_norm": 1.362794280052185, "learning_rate": 8.936807185047335e-06, "loss": 0.0536, "step": 22090 }, { "epoch": 0.17881705639614856, "grad_norm": 0.3811943233013153, "learning_rate": 8.940852819807428e-06, "loss": 0.0393, "step": 22100 }, { "epoch": 0.17889796909135045, "grad_norm": 0.7887089848518372, "learning_rate": 8.944898454567522e-06, "loss": 0.0524, "step": 22110 }, { "epoch": 0.1789788817865523, "grad_norm": 0.9647254943847656, "learning_rate": 8.948944089327616e-06, "loss": 0.0475, "step": 22120 }, { "epoch": 0.1790597944817542, "grad_norm": 0.8271838426589966, "learning_rate": 8.952989724087711e-06, "loss": 0.0521, "step": 22130 }, { "epoch": 0.17914070717695607, "grad_norm": 0.8489834666252136, "learning_rate": 8.957035358847805e-06, "loss": 0.0533, "step": 22140 }, { "epoch": 0.17922161987215793, "grad_norm": 0.3950437009334564, "learning_rate": 8.961080993607897e-06, "loss": 0.0488, "step": 22150 }, { "epoch": 0.1793025325673598, "grad_norm": 0.29261401295661926, "learning_rate": 8.965126628367991e-06, "loss": 0.0506, "step": 22160 }, { "epoch": 0.1793834452625617, "grad_norm": 1.3196607828140259, "learning_rate": 8.969172263128085e-06, "loss": 0.0524, "step": 22170 }, { "epoch": 0.17946435795776358, "grad_norm": 0.5320600271224976, "learning_rate": 8.97321789788818e-06, "loss": 0.0476, "step": 22180 }, { "epoch": 0.17954527065296544, "grad_norm": 1.0347760915756226, "learning_rate": 8.977263532648273e-06, "loss": 0.0594, "step": 22190 }, { "epoch": 0.17962618334816732, "grad_norm": 0.8855200409889221, "learning_rate": 8.981309167408367e-06, "loss": 0.0392, "step": 22200 }, { "epoch": 0.1797070960433692, "grad_norm": 0.9005618095397949, "learning_rate": 8.985354802168461e-06, "loss": 0.0366, "step": 22210 }, { "epoch": 0.1797880087385711, "grad_norm": 0.7872035503387451, "learning_rate": 8.989400436928555e-06, "loss": 0.0509, "step": 22220 }, { "epoch": 0.17986892143377295, "grad_norm": 1.0318766832351685, "learning_rate": 8.993446071688649e-06, "loss": 0.0729, "step": 22230 }, { "epoch": 0.17994983412897483, "grad_norm": 0.6076838374137878, "learning_rate": 8.997491706448743e-06, "loss": 0.0556, "step": 22240 }, { "epoch": 0.18003074682417672, "grad_norm": 1.0957292318344116, "learning_rate": 9.001537341208837e-06, "loss": 0.0779, "step": 22250 }, { "epoch": 0.1801116595193786, "grad_norm": 1.2276476621627808, "learning_rate": 9.00558297596893e-06, "loss": 0.0501, "step": 22260 }, { "epoch": 0.18019257221458046, "grad_norm": 0.6887305974960327, "learning_rate": 9.009628610729025e-06, "loss": 0.042, "step": 22270 }, { "epoch": 0.18027348490978234, "grad_norm": 0.9892587065696716, "learning_rate": 9.013674245489119e-06, "loss": 0.0477, "step": 22280 }, { "epoch": 0.18035439760498423, "grad_norm": 8.900334358215332, "learning_rate": 9.01771988024921e-06, "loss": 0.0697, "step": 22290 }, { "epoch": 0.18043531030018609, "grad_norm": 0.6208449006080627, "learning_rate": 9.021765515009305e-06, "loss": 0.0371, "step": 22300 }, { "epoch": 0.18051622299538797, "grad_norm": 0.7272336483001709, "learning_rate": 9.0258111497694e-06, "loss": 0.0581, "step": 22310 }, { "epoch": 0.18059713569058986, "grad_norm": 0.5565505623817444, "learning_rate": 9.029856784529494e-06, "loss": 0.0432, "step": 22320 }, { "epoch": 0.18067804838579174, "grad_norm": 1.7184956073760986, "learning_rate": 9.033902419289587e-06, "loss": 0.0665, "step": 22330 }, { "epoch": 0.1807589610809936, "grad_norm": 0.48571905493736267, "learning_rate": 9.03794805404968e-06, "loss": 0.0697, "step": 22340 }, { "epoch": 0.18083987377619548, "grad_norm": 0.9721336364746094, "learning_rate": 9.041993688809774e-06, "loss": 0.0522, "step": 22350 }, { "epoch": 0.18092078647139737, "grad_norm": 0.7555946707725525, "learning_rate": 9.046039323569868e-06, "loss": 0.0721, "step": 22360 }, { "epoch": 0.18100169916659925, "grad_norm": 1.2317441701889038, "learning_rate": 9.050084958329964e-06, "loss": 0.0853, "step": 22370 }, { "epoch": 0.1810826118618011, "grad_norm": 1.322463035583496, "learning_rate": 9.054130593090056e-06, "loss": 0.0499, "step": 22380 }, { "epoch": 0.181163524557003, "grad_norm": 0.7874332070350647, "learning_rate": 9.05817622785015e-06, "loss": 0.0539, "step": 22390 }, { "epoch": 0.18124443725220488, "grad_norm": 0.9336652159690857, "learning_rate": 9.062221862610244e-06, "loss": 0.0431, "step": 22400 }, { "epoch": 0.18132534994740676, "grad_norm": 0.7501932382583618, "learning_rate": 9.066267497370338e-06, "loss": 0.0367, "step": 22410 }, { "epoch": 0.18140626264260862, "grad_norm": 1.0255221128463745, "learning_rate": 9.070313132130432e-06, "loss": 0.0669, "step": 22420 }, { "epoch": 0.1814871753378105, "grad_norm": 0.5623070597648621, "learning_rate": 9.074358766890526e-06, "loss": 0.0576, "step": 22430 }, { "epoch": 0.1815680880330124, "grad_norm": 0.39057037234306335, "learning_rate": 9.07840440165062e-06, "loss": 0.0451, "step": 22440 }, { "epoch": 0.18164900072821424, "grad_norm": 1.3174453973770142, "learning_rate": 9.082450036410714e-06, "loss": 0.0831, "step": 22450 }, { "epoch": 0.18172991342341613, "grad_norm": 1.3599460124969482, "learning_rate": 9.086495671170808e-06, "loss": 0.0402, "step": 22460 }, { "epoch": 0.181810826118618, "grad_norm": 0.9541048407554626, "learning_rate": 9.090541305930902e-06, "loss": 0.0595, "step": 22470 }, { "epoch": 0.1818917388138199, "grad_norm": 0.8442445397377014, "learning_rate": 9.094586940690994e-06, "loss": 0.0644, "step": 22480 }, { "epoch": 0.18197265150902175, "grad_norm": 1.1906235218048096, "learning_rate": 9.098632575451088e-06, "loss": 0.0565, "step": 22490 }, { "epoch": 0.18205356420422364, "grad_norm": 0.4903331696987152, "learning_rate": 9.102678210211184e-06, "loss": 0.0312, "step": 22500 }, { "epoch": 0.18213447689942552, "grad_norm": 1.0831812620162964, "learning_rate": 9.106723844971278e-06, "loss": 0.0692, "step": 22510 }, { "epoch": 0.1822153895946274, "grad_norm": 1.0394039154052734, "learning_rate": 9.11076947973137e-06, "loss": 0.0492, "step": 22520 }, { "epoch": 0.18229630228982927, "grad_norm": 0.8681750297546387, "learning_rate": 9.114815114491464e-06, "loss": 0.0488, "step": 22530 }, { "epoch": 0.18237721498503115, "grad_norm": 0.369144082069397, "learning_rate": 9.118860749251558e-06, "loss": 0.0452, "step": 22540 }, { "epoch": 0.18245812768023303, "grad_norm": 0.9269756078720093, "learning_rate": 9.122906384011652e-06, "loss": 0.0644, "step": 22550 }, { "epoch": 0.18253904037543492, "grad_norm": 1.0092307329177856, "learning_rate": 9.126952018771747e-06, "loss": 0.0703, "step": 22560 }, { "epoch": 0.18261995307063678, "grad_norm": 1.0042588710784912, "learning_rate": 9.13099765353184e-06, "loss": 0.0528, "step": 22570 }, { "epoch": 0.18270086576583866, "grad_norm": 0.6624580025672913, "learning_rate": 9.135043288291933e-06, "loss": 0.0952, "step": 22580 }, { "epoch": 0.18278177846104054, "grad_norm": 1.2052112817764282, "learning_rate": 9.139088923052027e-06, "loss": 0.062, "step": 22590 }, { "epoch": 0.1828626911562424, "grad_norm": 0.7385228872299194, "learning_rate": 9.143134557812121e-06, "loss": 0.0517, "step": 22600 }, { "epoch": 0.1829436038514443, "grad_norm": 1.2467817068099976, "learning_rate": 9.147180192572215e-06, "loss": 0.0594, "step": 22610 }, { "epoch": 0.18302451654664617, "grad_norm": 0.8414062857627869, "learning_rate": 9.15122582733231e-06, "loss": 0.0517, "step": 22620 }, { "epoch": 0.18310542924184806, "grad_norm": 1.2870138883590698, "learning_rate": 9.155271462092403e-06, "loss": 0.0585, "step": 22630 }, { "epoch": 0.1831863419370499, "grad_norm": 0.8764962553977966, "learning_rate": 9.159317096852497e-06, "loss": 0.0459, "step": 22640 }, { "epoch": 0.1832672546322518, "grad_norm": 0.20987536013126373, "learning_rate": 9.163362731612591e-06, "loss": 0.0417, "step": 22650 }, { "epoch": 0.18334816732745368, "grad_norm": 1.2034531831741333, "learning_rate": 9.167408366372685e-06, "loss": 0.0604, "step": 22660 }, { "epoch": 0.18342908002265557, "grad_norm": 2.574601411819458, "learning_rate": 9.171454001132777e-06, "loss": 0.0661, "step": 22670 }, { "epoch": 0.18350999271785742, "grad_norm": 0.5881232023239136, "learning_rate": 9.175499635892873e-06, "loss": 0.068, "step": 22680 }, { "epoch": 0.1835909054130593, "grad_norm": 0.8718109130859375, "learning_rate": 9.179545270652967e-06, "loss": 0.0902, "step": 22690 }, { "epoch": 0.1836718181082612, "grad_norm": 0.7330299615859985, "learning_rate": 9.18359090541306e-06, "loss": 0.0523, "step": 22700 }, { "epoch": 0.18375273080346308, "grad_norm": 0.9671722054481506, "learning_rate": 9.187636540173153e-06, "loss": 0.0531, "step": 22710 }, { "epoch": 0.18383364349866493, "grad_norm": 0.6235611438751221, "learning_rate": 9.191682174933247e-06, "loss": 0.0495, "step": 22720 }, { "epoch": 0.18391455619386682, "grad_norm": 0.2822299599647522, "learning_rate": 9.19572780969334e-06, "loss": 0.0667, "step": 22730 }, { "epoch": 0.1839954688890687, "grad_norm": 0.9571437239646912, "learning_rate": 9.199773444453436e-06, "loss": 0.0612, "step": 22740 }, { "epoch": 0.18407638158427056, "grad_norm": 0.566222608089447, "learning_rate": 9.20381907921353e-06, "loss": 0.0463, "step": 22750 }, { "epoch": 0.18415729427947244, "grad_norm": 0.48524120450019836, "learning_rate": 9.207864713973623e-06, "loss": 0.0696, "step": 22760 }, { "epoch": 0.18423820697467433, "grad_norm": 0.4192524254322052, "learning_rate": 9.211910348733717e-06, "loss": 0.06, "step": 22770 }, { "epoch": 0.1843191196698762, "grad_norm": 0.8341526389122009, "learning_rate": 9.21595598349381e-06, "loss": 0.0592, "step": 22780 }, { "epoch": 0.18440003236507807, "grad_norm": 1.0438131093978882, "learning_rate": 9.220001618253904e-06, "loss": 0.0489, "step": 22790 }, { "epoch": 0.18448094506027995, "grad_norm": 0.40686526894569397, "learning_rate": 9.224047253013998e-06, "loss": 0.0369, "step": 22800 }, { "epoch": 0.18456185775548184, "grad_norm": 1.2348124980926514, "learning_rate": 9.228092887774092e-06, "loss": 0.0458, "step": 22810 }, { "epoch": 0.18464277045068372, "grad_norm": 1.106433391571045, "learning_rate": 9.232138522534186e-06, "loss": 0.0493, "step": 22820 }, { "epoch": 0.18472368314588558, "grad_norm": 1.9110965728759766, "learning_rate": 9.23618415729428e-06, "loss": 0.0526, "step": 22830 }, { "epoch": 0.18480459584108747, "grad_norm": 0.5324411988258362, "learning_rate": 9.240229792054374e-06, "loss": 0.0428, "step": 22840 }, { "epoch": 0.18488550853628935, "grad_norm": 0.3708842694759369, "learning_rate": 9.244275426814468e-06, "loss": 0.0533, "step": 22850 }, { "epoch": 0.18496642123149123, "grad_norm": 0.8822302222251892, "learning_rate": 9.248321061574562e-06, "loss": 0.0595, "step": 22860 }, { "epoch": 0.1850473339266931, "grad_norm": 1.0906771421432495, "learning_rate": 9.252366696334656e-06, "loss": 0.0783, "step": 22870 }, { "epoch": 0.18512824662189498, "grad_norm": 0.6330471634864807, "learning_rate": 9.25641233109475e-06, "loss": 0.0591, "step": 22880 }, { "epoch": 0.18520915931709686, "grad_norm": 0.9194020628929138, "learning_rate": 9.260457965854844e-06, "loss": 0.0546, "step": 22890 }, { "epoch": 0.18529007201229872, "grad_norm": 1.094058871269226, "learning_rate": 9.264503600614936e-06, "loss": 0.0629, "step": 22900 }, { "epoch": 0.1853709847075006, "grad_norm": 0.4897007346153259, "learning_rate": 9.26854923537503e-06, "loss": 0.0542, "step": 22910 }, { "epoch": 0.1854518974027025, "grad_norm": 1.6974047422409058, "learning_rate": 9.272594870135126e-06, "loss": 0.0584, "step": 22920 }, { "epoch": 0.18553281009790437, "grad_norm": 1.1468456983566284, "learning_rate": 9.27664050489522e-06, "loss": 0.0427, "step": 22930 }, { "epoch": 0.18561372279310623, "grad_norm": 0.5123955011367798, "learning_rate": 9.280686139655314e-06, "loss": 0.0559, "step": 22940 }, { "epoch": 0.1856946354883081, "grad_norm": 0.5171351432800293, "learning_rate": 9.284731774415406e-06, "loss": 0.048, "step": 22950 }, { "epoch": 0.18577554818351, "grad_norm": 1.3119075298309326, "learning_rate": 9.2887774091755e-06, "loss": 0.062, "step": 22960 }, { "epoch": 0.18585646087871188, "grad_norm": 0.5398312211036682, "learning_rate": 9.292823043935594e-06, "loss": 0.0823, "step": 22970 }, { "epoch": 0.18593737357391374, "grad_norm": 1.120223045349121, "learning_rate": 9.29686867869569e-06, "loss": 0.0589, "step": 22980 }, { "epoch": 0.18601828626911562, "grad_norm": 0.6688364148139954, "learning_rate": 9.300914313455782e-06, "loss": 0.0446, "step": 22990 }, { "epoch": 0.1860991989643175, "grad_norm": 0.675934910774231, "learning_rate": 9.304959948215875e-06, "loss": 0.038, "step": 23000 }, { "epoch": 0.18618011165951937, "grad_norm": 0.6109613180160522, "learning_rate": 9.30900558297597e-06, "loss": 0.0569, "step": 23010 }, { "epoch": 0.18626102435472125, "grad_norm": 0.6575759649276733, "learning_rate": 9.313051217736063e-06, "loss": 0.0567, "step": 23020 }, { "epoch": 0.18634193704992313, "grad_norm": 0.5953474640846252, "learning_rate": 9.317096852496157e-06, "loss": 0.0561, "step": 23030 }, { "epoch": 0.18642284974512502, "grad_norm": 1.0580328702926636, "learning_rate": 9.321142487256251e-06, "loss": 0.0601, "step": 23040 }, { "epoch": 0.18650376244032688, "grad_norm": 0.6128376126289368, "learning_rate": 9.325188122016345e-06, "loss": 0.0381, "step": 23050 }, { "epoch": 0.18658467513552876, "grad_norm": 0.5763182044029236, "learning_rate": 9.329233756776439e-06, "loss": 0.0426, "step": 23060 }, { "epoch": 0.18666558783073064, "grad_norm": 0.8795872926712036, "learning_rate": 9.333279391536533e-06, "loss": 0.0667, "step": 23070 }, { "epoch": 0.18674650052593253, "grad_norm": 0.42084237933158875, "learning_rate": 9.337325026296627e-06, "loss": 0.0482, "step": 23080 }, { "epoch": 0.1868274132211344, "grad_norm": 1.0605312585830688, "learning_rate": 9.34137066105672e-06, "loss": 0.0541, "step": 23090 }, { "epoch": 0.18690832591633627, "grad_norm": 1.4629027843475342, "learning_rate": 9.345416295816815e-06, "loss": 0.0656, "step": 23100 }, { "epoch": 0.18698923861153816, "grad_norm": 0.8962796926498413, "learning_rate": 9.349461930576909e-06, "loss": 0.0517, "step": 23110 }, { "epoch": 0.18707015130674004, "grad_norm": 0.6096726655960083, "learning_rate": 9.353507565337003e-06, "loss": 0.0393, "step": 23120 }, { "epoch": 0.1871510640019419, "grad_norm": 0.9594897627830505, "learning_rate": 9.357553200097097e-06, "loss": 0.0726, "step": 23130 }, { "epoch": 0.18723197669714378, "grad_norm": 1.6097816228866577, "learning_rate": 9.361598834857189e-06, "loss": 0.0727, "step": 23140 }, { "epoch": 0.18731288939234567, "grad_norm": 0.8781548142433167, "learning_rate": 9.365644469617283e-06, "loss": 0.0439, "step": 23150 }, { "epoch": 0.18739380208754752, "grad_norm": 0.8310880661010742, "learning_rate": 9.369690104377379e-06, "loss": 0.0616, "step": 23160 }, { "epoch": 0.1874747147827494, "grad_norm": 0.7300166487693787, "learning_rate": 9.373735739137472e-06, "loss": 0.0535, "step": 23170 }, { "epoch": 0.1875556274779513, "grad_norm": 0.7896425127983093, "learning_rate": 9.377781373897565e-06, "loss": 0.0787, "step": 23180 }, { "epoch": 0.18763654017315318, "grad_norm": 1.1351287364959717, "learning_rate": 9.381827008657659e-06, "loss": 0.0691, "step": 23190 }, { "epoch": 0.18771745286835503, "grad_norm": 0.4852314293384552, "learning_rate": 9.385872643417753e-06, "loss": 0.0502, "step": 23200 }, { "epoch": 0.18779836556355692, "grad_norm": 0.582512378692627, "learning_rate": 9.389918278177847e-06, "loss": 0.0613, "step": 23210 }, { "epoch": 0.1878792782587588, "grad_norm": 0.8130285739898682, "learning_rate": 9.39396391293794e-06, "loss": 0.0517, "step": 23220 }, { "epoch": 0.1879601909539607, "grad_norm": 0.8553426861763, "learning_rate": 9.398009547698034e-06, "loss": 0.0458, "step": 23230 }, { "epoch": 0.18804110364916254, "grad_norm": 1.5957914590835571, "learning_rate": 9.402055182458128e-06, "loss": 0.0629, "step": 23240 }, { "epoch": 0.18812201634436443, "grad_norm": 1.72809636592865, "learning_rate": 9.406100817218222e-06, "loss": 0.0491, "step": 23250 }, { "epoch": 0.1882029290395663, "grad_norm": 0.42171141505241394, "learning_rate": 9.410146451978316e-06, "loss": 0.0367, "step": 23260 }, { "epoch": 0.1882838417347682, "grad_norm": 0.45960330963134766, "learning_rate": 9.41419208673841e-06, "loss": 0.0523, "step": 23270 }, { "epoch": 0.18836475442997005, "grad_norm": 0.4928479790687561, "learning_rate": 9.418237721498504e-06, "loss": 0.0642, "step": 23280 }, { "epoch": 0.18844566712517194, "grad_norm": 0.926129937171936, "learning_rate": 9.422283356258598e-06, "loss": 0.043, "step": 23290 }, { "epoch": 0.18852657982037382, "grad_norm": 1.0205669403076172, "learning_rate": 9.426328991018692e-06, "loss": 0.0519, "step": 23300 }, { "epoch": 0.18860749251557568, "grad_norm": 1.0876624584197998, "learning_rate": 9.430374625778786e-06, "loss": 0.0524, "step": 23310 }, { "epoch": 0.18868840521077757, "grad_norm": 0.8289431929588318, "learning_rate": 9.43442026053888e-06, "loss": 0.0557, "step": 23320 }, { "epoch": 0.18876931790597945, "grad_norm": 1.0592907667160034, "learning_rate": 9.438465895298972e-06, "loss": 0.0414, "step": 23330 }, { "epoch": 0.18885023060118133, "grad_norm": 0.6380603909492493, "learning_rate": 9.442511530059068e-06, "loss": 0.0529, "step": 23340 }, { "epoch": 0.1889311432963832, "grad_norm": 0.5346880555152893, "learning_rate": 9.446557164819162e-06, "loss": 0.0511, "step": 23350 }, { "epoch": 0.18901205599158508, "grad_norm": 0.5449624061584473, "learning_rate": 9.450602799579256e-06, "loss": 0.0495, "step": 23360 }, { "epoch": 0.18909296868678696, "grad_norm": 0.4861913025379181, "learning_rate": 9.454648434339348e-06, "loss": 0.06, "step": 23370 }, { "epoch": 0.18917388138198885, "grad_norm": 0.8188908100128174, "learning_rate": 9.458694069099442e-06, "loss": 0.0586, "step": 23380 }, { "epoch": 0.1892547940771907, "grad_norm": 1.125915765762329, "learning_rate": 9.462739703859536e-06, "loss": 0.0661, "step": 23390 }, { "epoch": 0.1893357067723926, "grad_norm": 1.4218333959579468, "learning_rate": 9.466785338619631e-06, "loss": 0.0781, "step": 23400 }, { "epoch": 0.18941661946759447, "grad_norm": 0.8018788695335388, "learning_rate": 9.470830973379724e-06, "loss": 0.0403, "step": 23410 }, { "epoch": 0.18949753216279636, "grad_norm": 0.7173539996147156, "learning_rate": 9.474876608139818e-06, "loss": 0.0521, "step": 23420 }, { "epoch": 0.1895784448579982, "grad_norm": 1.3677623271942139, "learning_rate": 9.478922242899912e-06, "loss": 0.0396, "step": 23430 }, { "epoch": 0.1896593575532001, "grad_norm": 1.227473258972168, "learning_rate": 9.482967877660005e-06, "loss": 0.0502, "step": 23440 }, { "epoch": 0.18974027024840198, "grad_norm": 0.5829499959945679, "learning_rate": 9.4870135124201e-06, "loss": 0.0424, "step": 23450 }, { "epoch": 0.18982118294360384, "grad_norm": 1.734204888343811, "learning_rate": 9.491059147180193e-06, "loss": 0.0527, "step": 23460 }, { "epoch": 0.18990209563880572, "grad_norm": 0.6951296925544739, "learning_rate": 9.495104781940287e-06, "loss": 0.0494, "step": 23470 }, { "epoch": 0.1899830083340076, "grad_norm": 0.9414178133010864, "learning_rate": 9.499150416700381e-06, "loss": 0.0591, "step": 23480 }, { "epoch": 0.1900639210292095, "grad_norm": 0.4835696220397949, "learning_rate": 9.503196051460475e-06, "loss": 0.0575, "step": 23490 }, { "epoch": 0.19014483372441135, "grad_norm": 0.8114397525787354, "learning_rate": 9.507241686220569e-06, "loss": 0.0546, "step": 23500 }, { "epoch": 0.19022574641961323, "grad_norm": 0.7965264916419983, "learning_rate": 9.511287320980663e-06, "loss": 0.0506, "step": 23510 }, { "epoch": 0.19030665911481512, "grad_norm": 0.8657309412956238, "learning_rate": 9.515332955740757e-06, "loss": 0.0492, "step": 23520 }, { "epoch": 0.190387571810017, "grad_norm": 0.5823547840118408, "learning_rate": 9.519378590500851e-06, "loss": 0.0746, "step": 23530 }, { "epoch": 0.19046848450521886, "grad_norm": 0.6336300373077393, "learning_rate": 9.523424225260945e-06, "loss": 0.0354, "step": 23540 }, { "epoch": 0.19054939720042074, "grad_norm": 0.5488144159317017, "learning_rate": 9.527469860021039e-06, "loss": 0.0393, "step": 23550 }, { "epoch": 0.19063030989562263, "grad_norm": 1.1488903760910034, "learning_rate": 9.531515494781131e-06, "loss": 0.0484, "step": 23560 }, { "epoch": 0.19071122259082451, "grad_norm": 1.1615945100784302, "learning_rate": 9.535561129541225e-06, "loss": 0.0581, "step": 23570 }, { "epoch": 0.19079213528602637, "grad_norm": 1.0783326625823975, "learning_rate": 9.53960676430132e-06, "loss": 0.0606, "step": 23580 }, { "epoch": 0.19087304798122826, "grad_norm": 0.7946123480796814, "learning_rate": 9.543652399061415e-06, "loss": 0.0458, "step": 23590 }, { "epoch": 0.19095396067643014, "grad_norm": 0.8570497035980225, "learning_rate": 9.547698033821507e-06, "loss": 0.0373, "step": 23600 }, { "epoch": 0.191034873371632, "grad_norm": 0.5415481925010681, "learning_rate": 9.5517436685816e-06, "loss": 0.0566, "step": 23610 }, { "epoch": 0.19111578606683388, "grad_norm": 0.7116758227348328, "learning_rate": 9.555789303341695e-06, "loss": 0.0531, "step": 23620 }, { "epoch": 0.19119669876203577, "grad_norm": 0.8861972093582153, "learning_rate": 9.559834938101789e-06, "loss": 0.0722, "step": 23630 }, { "epoch": 0.19127761145723765, "grad_norm": 0.773812472820282, "learning_rate": 9.563880572861883e-06, "loss": 0.064, "step": 23640 }, { "epoch": 0.1913585241524395, "grad_norm": 0.966128408908844, "learning_rate": 9.567926207621977e-06, "loss": 0.0512, "step": 23650 }, { "epoch": 0.1914394368476414, "grad_norm": 0.86457359790802, "learning_rate": 9.57197184238207e-06, "loss": 0.0547, "step": 23660 }, { "epoch": 0.19152034954284328, "grad_norm": 0.767979621887207, "learning_rate": 9.576017477142164e-06, "loss": 0.0537, "step": 23670 }, { "epoch": 0.19160126223804516, "grad_norm": 1.2436890602111816, "learning_rate": 9.580063111902258e-06, "loss": 0.0619, "step": 23680 }, { "epoch": 0.19168217493324702, "grad_norm": 0.8465378880500793, "learning_rate": 9.584108746662352e-06, "loss": 0.0607, "step": 23690 }, { "epoch": 0.1917630876284489, "grad_norm": 0.9319906830787659, "learning_rate": 9.588154381422446e-06, "loss": 0.044, "step": 23700 }, { "epoch": 0.1918440003236508, "grad_norm": 0.6060811877250671, "learning_rate": 9.59220001618254e-06, "loss": 0.0281, "step": 23710 }, { "epoch": 0.19192491301885264, "grad_norm": 0.7327439188957214, "learning_rate": 9.596245650942634e-06, "loss": 0.0551, "step": 23720 }, { "epoch": 0.19200582571405453, "grad_norm": 0.6350522637367249, "learning_rate": 9.600291285702728e-06, "loss": 0.0464, "step": 23730 }, { "epoch": 0.1920867384092564, "grad_norm": 1.1567164659500122, "learning_rate": 9.604336920462822e-06, "loss": 0.0464, "step": 23740 }, { "epoch": 0.1921676511044583, "grad_norm": 0.720273494720459, "learning_rate": 9.608382555222914e-06, "loss": 0.0391, "step": 23750 }, { "epoch": 0.19224856379966015, "grad_norm": 0.9665845036506653, "learning_rate": 9.61242818998301e-06, "loss": 0.0473, "step": 23760 }, { "epoch": 0.19232947649486204, "grad_norm": 1.2468125820159912, "learning_rate": 9.616473824743104e-06, "loss": 0.069, "step": 23770 }, { "epoch": 0.19241038919006392, "grad_norm": 0.6767359972000122, "learning_rate": 9.620519459503198e-06, "loss": 0.0361, "step": 23780 }, { "epoch": 0.1924913018852658, "grad_norm": 0.8376327157020569, "learning_rate": 9.62456509426329e-06, "loss": 0.0648, "step": 23790 }, { "epoch": 0.19257221458046767, "grad_norm": 0.7919451594352722, "learning_rate": 9.628610729023384e-06, "loss": 0.0726, "step": 23800 }, { "epoch": 0.19265312727566955, "grad_norm": 0.753880500793457, "learning_rate": 9.632656363783478e-06, "loss": 0.0529, "step": 23810 }, { "epoch": 0.19273403997087143, "grad_norm": 0.7754253149032593, "learning_rate": 9.636701998543573e-06, "loss": 0.0562, "step": 23820 }, { "epoch": 0.19281495266607332, "grad_norm": 0.6510062217712402, "learning_rate": 9.640747633303666e-06, "loss": 0.0628, "step": 23830 }, { "epoch": 0.19289586536127518, "grad_norm": 0.7099917531013489, "learning_rate": 9.64479326806376e-06, "loss": 0.0491, "step": 23840 }, { "epoch": 0.19297677805647706, "grad_norm": 1.1044739484786987, "learning_rate": 9.648838902823854e-06, "loss": 0.0588, "step": 23850 }, { "epoch": 0.19305769075167895, "grad_norm": 1.4175090789794922, "learning_rate": 9.652884537583948e-06, "loss": 0.0484, "step": 23860 }, { "epoch": 0.1931386034468808, "grad_norm": 0.802536129951477, "learning_rate": 9.656930172344041e-06, "loss": 0.0856, "step": 23870 }, { "epoch": 0.1932195161420827, "grad_norm": 1.0213435888290405, "learning_rate": 9.660975807104135e-06, "loss": 0.051, "step": 23880 }, { "epoch": 0.19330042883728457, "grad_norm": 0.8251345157623291, "learning_rate": 9.66502144186423e-06, "loss": 0.041, "step": 23890 }, { "epoch": 0.19338134153248646, "grad_norm": 0.9372314214706421, "learning_rate": 9.669067076624323e-06, "loss": 0.0531, "step": 23900 }, { "epoch": 0.1934622542276883, "grad_norm": 0.5851106643676758, "learning_rate": 9.673112711384417e-06, "loss": 0.0411, "step": 23910 }, { "epoch": 0.1935431669228902, "grad_norm": 0.5079564452171326, "learning_rate": 9.677158346144511e-06, "loss": 0.0563, "step": 23920 }, { "epoch": 0.19362407961809208, "grad_norm": 0.6509225368499756, "learning_rate": 9.681203980904605e-06, "loss": 0.0369, "step": 23930 }, { "epoch": 0.19370499231329397, "grad_norm": 1.2821921110153198, "learning_rate": 9.685249615664699e-06, "loss": 0.0583, "step": 23940 }, { "epoch": 0.19378590500849582, "grad_norm": 0.6598593592643738, "learning_rate": 9.689295250424793e-06, "loss": 0.0647, "step": 23950 }, { "epoch": 0.1938668177036977, "grad_norm": 0.5240229964256287, "learning_rate": 9.693340885184887e-06, "loss": 0.0663, "step": 23960 }, { "epoch": 0.1939477303988996, "grad_norm": 0.8400824069976807, "learning_rate": 9.697386519944981e-06, "loss": 0.055, "step": 23970 }, { "epoch": 0.19402864309410148, "grad_norm": 1.0788673162460327, "learning_rate": 9.701432154705073e-06, "loss": 0.0499, "step": 23980 }, { "epoch": 0.19410955578930333, "grad_norm": 0.5871536731719971, "learning_rate": 9.705477789465167e-06, "loss": 0.0707, "step": 23990 }, { "epoch": 0.19419046848450522, "grad_norm": 0.9691022634506226, "learning_rate": 9.709523424225263e-06, "loss": 0.0372, "step": 24000 }, { "epoch": 0.1942713811797071, "grad_norm": 0.6436703205108643, "learning_rate": 9.713569058985357e-06, "loss": 0.0552, "step": 24010 }, { "epoch": 0.19435229387490896, "grad_norm": 1.2098288536071777, "learning_rate": 9.717614693745449e-06, "loss": 0.0748, "step": 24020 }, { "epoch": 0.19443320657011084, "grad_norm": 0.489270955324173, "learning_rate": 9.721660328505543e-06, "loss": 0.0516, "step": 24030 }, { "epoch": 0.19451411926531273, "grad_norm": 0.7645586133003235, "learning_rate": 9.725705963265637e-06, "loss": 0.0696, "step": 24040 }, { "epoch": 0.19459503196051461, "grad_norm": 1.0491337776184082, "learning_rate": 9.72975159802573e-06, "loss": 0.0673, "step": 24050 }, { "epoch": 0.19467594465571647, "grad_norm": 1.0299185514450073, "learning_rate": 9.733797232785825e-06, "loss": 0.0554, "step": 24060 }, { "epoch": 0.19475685735091836, "grad_norm": 1.0176535844802856, "learning_rate": 9.737842867545919e-06, "loss": 0.0404, "step": 24070 }, { "epoch": 0.19483777004612024, "grad_norm": 0.9012635350227356, "learning_rate": 9.741888502306013e-06, "loss": 0.0631, "step": 24080 }, { "epoch": 0.19491868274132212, "grad_norm": 1.8882421255111694, "learning_rate": 9.745934137066106e-06, "loss": 0.0533, "step": 24090 }, { "epoch": 0.19499959543652398, "grad_norm": 0.9477003812789917, "learning_rate": 9.7499797718262e-06, "loss": 0.0518, "step": 24100 }, { "epoch": 0.19508050813172587, "grad_norm": 1.0877329111099243, "learning_rate": 9.754025406586294e-06, "loss": 0.051, "step": 24110 }, { "epoch": 0.19516142082692775, "grad_norm": 1.2532362937927246, "learning_rate": 9.758071041346388e-06, "loss": 0.0533, "step": 24120 }, { "epoch": 0.19524233352212964, "grad_norm": 0.9150111675262451, "learning_rate": 9.762116676106482e-06, "loss": 0.0508, "step": 24130 }, { "epoch": 0.1953232462173315, "grad_norm": 0.7678043246269226, "learning_rate": 9.766162310866576e-06, "loss": 0.0496, "step": 24140 }, { "epoch": 0.19540415891253338, "grad_norm": 0.8130306601524353, "learning_rate": 9.77020794562667e-06, "loss": 0.0612, "step": 24150 }, { "epoch": 0.19548507160773526, "grad_norm": 0.7004916071891785, "learning_rate": 9.774253580386764e-06, "loss": 0.0574, "step": 24160 }, { "epoch": 0.19556598430293712, "grad_norm": 0.9202296137809753, "learning_rate": 9.778299215146856e-06, "loss": 0.0659, "step": 24170 }, { "epoch": 0.195646896998139, "grad_norm": 0.9321312308311462, "learning_rate": 9.78234484990695e-06, "loss": 0.061, "step": 24180 }, { "epoch": 0.1957278096933409, "grad_norm": 0.6969049572944641, "learning_rate": 9.786390484667046e-06, "loss": 0.045, "step": 24190 }, { "epoch": 0.19580872238854277, "grad_norm": 0.6669274568557739, "learning_rate": 9.79043611942714e-06, "loss": 0.0598, "step": 24200 }, { "epoch": 0.19588963508374463, "grad_norm": 0.8425612449645996, "learning_rate": 9.794481754187232e-06, "loss": 0.0775, "step": 24210 }, { "epoch": 0.1959705477789465, "grad_norm": 0.40195947885513306, "learning_rate": 9.798527388947326e-06, "loss": 0.0477, "step": 24220 }, { "epoch": 0.1960514604741484, "grad_norm": 0.7917523980140686, "learning_rate": 9.80257302370742e-06, "loss": 0.041, "step": 24230 }, { "epoch": 0.19613237316935028, "grad_norm": 0.6590937376022339, "learning_rate": 9.806618658467514e-06, "loss": 0.0437, "step": 24240 }, { "epoch": 0.19621328586455214, "grad_norm": 0.8688060641288757, "learning_rate": 9.810664293227608e-06, "loss": 0.0471, "step": 24250 }, { "epoch": 0.19629419855975402, "grad_norm": 0.6629999279975891, "learning_rate": 9.814709927987702e-06, "loss": 0.0365, "step": 24260 }, { "epoch": 0.1963751112549559, "grad_norm": 0.6004605293273926, "learning_rate": 9.818755562747796e-06, "loss": 0.0358, "step": 24270 }, { "epoch": 0.1964560239501578, "grad_norm": 0.8567540645599365, "learning_rate": 9.82280119750789e-06, "loss": 0.0652, "step": 24280 }, { "epoch": 0.19653693664535965, "grad_norm": 1.0347909927368164, "learning_rate": 9.826846832267984e-06, "loss": 0.0552, "step": 24290 }, { "epoch": 0.19661784934056153, "grad_norm": 1.313833475112915, "learning_rate": 9.830892467028078e-06, "loss": 0.043, "step": 24300 }, { "epoch": 0.19669876203576342, "grad_norm": 0.8515578508377075, "learning_rate": 9.834938101788171e-06, "loss": 0.0441, "step": 24310 }, { "epoch": 0.19677967473096528, "grad_norm": 1.030753493309021, "learning_rate": 9.838983736548265e-06, "loss": 0.0629, "step": 24320 }, { "epoch": 0.19686058742616716, "grad_norm": 0.7986847162246704, "learning_rate": 9.84302937130836e-06, "loss": 0.0633, "step": 24330 }, { "epoch": 0.19694150012136905, "grad_norm": 0.9598775506019592, "learning_rate": 9.847075006068453e-06, "loss": 0.0506, "step": 24340 }, { "epoch": 0.19702241281657093, "grad_norm": 0.783082902431488, "learning_rate": 9.851120640828547e-06, "loss": 0.0545, "step": 24350 }, { "epoch": 0.1971033255117728, "grad_norm": 0.6146795749664307, "learning_rate": 9.85516627558864e-06, "loss": 0.0524, "step": 24360 }, { "epoch": 0.19718423820697467, "grad_norm": 1.0150561332702637, "learning_rate": 9.859211910348735e-06, "loss": 0.0485, "step": 24370 }, { "epoch": 0.19726515090217656, "grad_norm": 1.076302170753479, "learning_rate": 9.863257545108829e-06, "loss": 0.0498, "step": 24380 }, { "epoch": 0.19734606359737844, "grad_norm": 0.7388834953308105, "learning_rate": 9.867303179868923e-06, "loss": 0.0375, "step": 24390 }, { "epoch": 0.1974269762925803, "grad_norm": 0.5551727414131165, "learning_rate": 9.871348814629015e-06, "loss": 0.0477, "step": 24400 }, { "epoch": 0.19750788898778218, "grad_norm": 0.49849098920822144, "learning_rate": 9.87539444938911e-06, "loss": 0.0568, "step": 24410 }, { "epoch": 0.19758880168298407, "grad_norm": 0.6192982792854309, "learning_rate": 9.879440084149203e-06, "loss": 0.047, "step": 24420 }, { "epoch": 0.19766971437818595, "grad_norm": 1.144687533378601, "learning_rate": 9.883485718909299e-06, "loss": 0.0516, "step": 24430 }, { "epoch": 0.1977506270733878, "grad_norm": 0.7167502045631409, "learning_rate": 9.887531353669391e-06, "loss": 0.0451, "step": 24440 }, { "epoch": 0.1978315397685897, "grad_norm": 0.6433992385864258, "learning_rate": 9.891576988429485e-06, "loss": 0.0427, "step": 24450 }, { "epoch": 0.19791245246379158, "grad_norm": 0.8159838318824768, "learning_rate": 9.895622623189579e-06, "loss": 0.0538, "step": 24460 }, { "epoch": 0.19799336515899343, "grad_norm": 0.6686902046203613, "learning_rate": 9.899668257949673e-06, "loss": 0.0517, "step": 24470 }, { "epoch": 0.19807427785419532, "grad_norm": 0.9930425882339478, "learning_rate": 9.903713892709767e-06, "loss": 0.0521, "step": 24480 }, { "epoch": 0.1981551905493972, "grad_norm": 0.9121484160423279, "learning_rate": 9.90775952746986e-06, "loss": 0.0644, "step": 24490 }, { "epoch": 0.1982361032445991, "grad_norm": 0.9968801736831665, "learning_rate": 9.911805162229955e-06, "loss": 0.0741, "step": 24500 }, { "epoch": 0.19831701593980094, "grad_norm": 0.3834097683429718, "learning_rate": 9.915850796990049e-06, "loss": 0.0551, "step": 24510 }, { "epoch": 0.19839792863500283, "grad_norm": 0.8576724529266357, "learning_rate": 9.919896431750143e-06, "loss": 0.05, "step": 24520 }, { "epoch": 0.1984788413302047, "grad_norm": 0.982367217540741, "learning_rate": 9.923942066510236e-06, "loss": 0.0337, "step": 24530 }, { "epoch": 0.1985597540254066, "grad_norm": 1.4433729648590088, "learning_rate": 9.92798770127033e-06, "loss": 0.0628, "step": 24540 }, { "epoch": 0.19864066672060846, "grad_norm": 0.7920266389846802, "learning_rate": 9.932033336030424e-06, "loss": 0.0417, "step": 24550 }, { "epoch": 0.19872157941581034, "grad_norm": 0.6225623488426208, "learning_rate": 9.936078970790518e-06, "loss": 0.0422, "step": 24560 }, { "epoch": 0.19880249211101222, "grad_norm": 0.47580477595329285, "learning_rate": 9.940124605550612e-06, "loss": 0.04, "step": 24570 }, { "epoch": 0.19888340480621408, "grad_norm": 0.8203852772712708, "learning_rate": 9.944170240310706e-06, "loss": 0.0381, "step": 24580 }, { "epoch": 0.19896431750141597, "grad_norm": 1.3889402151107788, "learning_rate": 9.948215875070798e-06, "loss": 0.0652, "step": 24590 }, { "epoch": 0.19904523019661785, "grad_norm": 0.5586104393005371, "learning_rate": 9.952261509830892e-06, "loss": 0.0429, "step": 24600 }, { "epoch": 0.19912614289181974, "grad_norm": 1.0530354976654053, "learning_rate": 9.956307144590988e-06, "loss": 0.0527, "step": 24610 }, { "epoch": 0.1992070555870216, "grad_norm": 0.35326069593429565, "learning_rate": 9.960352779351082e-06, "loss": 0.0532, "step": 24620 }, { "epoch": 0.19928796828222348, "grad_norm": 1.2003133296966553, "learning_rate": 9.964398414111174e-06, "loss": 0.0765, "step": 24630 }, { "epoch": 0.19936888097742536, "grad_norm": 1.0012316703796387, "learning_rate": 9.968444048871268e-06, "loss": 0.0465, "step": 24640 }, { "epoch": 0.19944979367262725, "grad_norm": 1.158588171005249, "learning_rate": 9.972489683631362e-06, "loss": 0.0577, "step": 24650 }, { "epoch": 0.1995307063678291, "grad_norm": 0.7221186757087708, "learning_rate": 9.976535318391456e-06, "loss": 0.0621, "step": 24660 }, { "epoch": 0.199611619063031, "grad_norm": 1.0627856254577637, "learning_rate": 9.98058095315155e-06, "loss": 0.0511, "step": 24670 }, { "epoch": 0.19969253175823287, "grad_norm": 0.5758214592933655, "learning_rate": 9.984626587911644e-06, "loss": 0.0574, "step": 24680 }, { "epoch": 0.19977344445343476, "grad_norm": 0.5300362706184387, "learning_rate": 9.988672222671738e-06, "loss": 0.0448, "step": 24690 }, { "epoch": 0.1998543571486366, "grad_norm": 0.6240301728248596, "learning_rate": 9.992717857431832e-06, "loss": 0.0434, "step": 24700 }, { "epoch": 0.1999352698438385, "grad_norm": 0.39386144280433655, "learning_rate": 9.996763492191926e-06, "loss": 0.056, "step": 24710 }, { "epoch": 0.20001618253904038, "grad_norm": 1.0561481714248657, "learning_rate": 9.999999998005711e-06, "loss": 0.0501, "step": 24720 }, { "epoch": 0.20009709523424224, "grad_norm": 0.6030371189117432, "learning_rate": 9.999999928205602e-06, "loss": 0.0455, "step": 24730 }, { "epoch": 0.20017800792944412, "grad_norm": 0.4652203619480133, "learning_rate": 9.999999758691046e-06, "loss": 0.0481, "step": 24740 }, { "epoch": 0.200258920624646, "grad_norm": 1.0758291482925415, "learning_rate": 9.999999489462054e-06, "loss": 0.0744, "step": 24750 }, { "epoch": 0.2003398333198479, "grad_norm": 0.9265159368515015, "learning_rate": 9.999999120518624e-06, "loss": 0.0629, "step": 24760 }, { "epoch": 0.20042074601504975, "grad_norm": 0.8788710236549377, "learning_rate": 9.99999865186077e-06, "loss": 0.0548, "step": 24770 }, { "epoch": 0.20050165871025163, "grad_norm": 1.1121011972427368, "learning_rate": 9.999998083488496e-06, "loss": 0.0481, "step": 24780 }, { "epoch": 0.20058257140545352, "grad_norm": 0.6196052432060242, "learning_rate": 9.999997415401816e-06, "loss": 0.0492, "step": 24790 }, { "epoch": 0.2006634841006554, "grad_norm": 1.1016018390655518, "learning_rate": 9.999996647600746e-06, "loss": 0.0511, "step": 24800 }, { "epoch": 0.20074439679585726, "grad_norm": 0.8989203572273254, "learning_rate": 9.999995780085295e-06, "loss": 0.0464, "step": 24810 }, { "epoch": 0.20082530949105915, "grad_norm": 1.146436095237732, "learning_rate": 9.999994812855484e-06, "loss": 0.0909, "step": 24820 }, { "epoch": 0.20090622218626103, "grad_norm": 0.4391869604587555, "learning_rate": 9.999993745911333e-06, "loss": 0.0466, "step": 24830 }, { "epoch": 0.20098713488146291, "grad_norm": 0.5715792179107666, "learning_rate": 9.999992579252862e-06, "loss": 0.0451, "step": 24840 }, { "epoch": 0.20106804757666477, "grad_norm": 0.6653719544410706, "learning_rate": 9.999991312880094e-06, "loss": 0.0369, "step": 24850 }, { "epoch": 0.20114896027186666, "grad_norm": 0.5161858797073364, "learning_rate": 9.999989946793056e-06, "loss": 0.0453, "step": 24860 }, { "epoch": 0.20122987296706854, "grad_norm": 0.9614495635032654, "learning_rate": 9.999988480991772e-06, "loss": 0.045, "step": 24870 }, { "epoch": 0.2013107856622704, "grad_norm": 1.0305525064468384, "learning_rate": 9.999986915476275e-06, "loss": 0.0386, "step": 24880 }, { "epoch": 0.20139169835747228, "grad_norm": 1.1059755086898804, "learning_rate": 9.999985250246594e-06, "loss": 0.0527, "step": 24890 }, { "epoch": 0.20147261105267417, "grad_norm": 0.471221387386322, "learning_rate": 9.999983485302761e-06, "loss": 0.0506, "step": 24900 }, { "epoch": 0.20155352374787605, "grad_norm": 0.7755429744720459, "learning_rate": 9.999981620644814e-06, "loss": 0.058, "step": 24910 }, { "epoch": 0.2016344364430779, "grad_norm": 0.6933714747428894, "learning_rate": 9.99997965627279e-06, "loss": 0.0443, "step": 24920 }, { "epoch": 0.2017153491382798, "grad_norm": 0.7292611598968506, "learning_rate": 9.999977592186725e-06, "loss": 0.0593, "step": 24930 }, { "epoch": 0.20179626183348168, "grad_norm": 0.8170293569564819, "learning_rate": 9.999975428386663e-06, "loss": 0.0713, "step": 24940 }, { "epoch": 0.20187717452868356, "grad_norm": 0.6828546524047852, "learning_rate": 9.999973164872648e-06, "loss": 0.0741, "step": 24950 }, { "epoch": 0.20195808722388542, "grad_norm": 0.5650349259376526, "learning_rate": 9.999970801644723e-06, "loss": 0.0585, "step": 24960 }, { "epoch": 0.2020389999190873, "grad_norm": 0.7500426769256592, "learning_rate": 9.999968338702934e-06, "loss": 0.0477, "step": 24970 }, { "epoch": 0.2021199126142892, "grad_norm": 1.172774076461792, "learning_rate": 9.999965776047334e-06, "loss": 0.0692, "step": 24980 }, { "epoch": 0.20220082530949107, "grad_norm": 1.8829333782196045, "learning_rate": 9.999963113677972e-06, "loss": 0.0407, "step": 24990 }, { "epoch": 0.20228173800469293, "grad_norm": 0.5173922181129456, "learning_rate": 9.999960351594899e-06, "loss": 0.0483, "step": 25000 }, { "epoch": 0.2023626506998948, "grad_norm": 0.7245732545852661, "learning_rate": 9.999957489798175e-06, "loss": 0.0446, "step": 25010 }, { "epoch": 0.2024435633950967, "grad_norm": 0.6973897814750671, "learning_rate": 9.999954528287852e-06, "loss": 0.0442, "step": 25020 }, { "epoch": 0.20252447609029856, "grad_norm": 1.4417510032653809, "learning_rate": 9.999951467063993e-06, "loss": 0.0551, "step": 25030 }, { "epoch": 0.20260538878550044, "grad_norm": 0.6985209584236145, "learning_rate": 9.999948306126657e-06, "loss": 0.0445, "step": 25040 }, { "epoch": 0.20268630148070232, "grad_norm": 1.0372413396835327, "learning_rate": 9.999945045475907e-06, "loss": 0.0531, "step": 25050 }, { "epoch": 0.2027672141759042, "grad_norm": 0.8739668726921082, "learning_rate": 9.999941685111808e-06, "loss": 0.0393, "step": 25060 }, { "epoch": 0.20284812687110607, "grad_norm": 0.8386970162391663, "learning_rate": 9.999938225034427e-06, "loss": 0.0483, "step": 25070 }, { "epoch": 0.20292903956630795, "grad_norm": 0.4036492109298706, "learning_rate": 9.999934665243834e-06, "loss": 0.0607, "step": 25080 }, { "epoch": 0.20300995226150984, "grad_norm": 0.9768446087837219, "learning_rate": 9.9999310057401e-06, "loss": 0.0513, "step": 25090 }, { "epoch": 0.20309086495671172, "grad_norm": 0.38205310702323914, "learning_rate": 9.999927246523296e-06, "loss": 0.0475, "step": 25100 }, { "epoch": 0.20317177765191358, "grad_norm": 0.667491614818573, "learning_rate": 9.9999233875935e-06, "loss": 0.0463, "step": 25110 }, { "epoch": 0.20325269034711546, "grad_norm": 1.1854382753372192, "learning_rate": 9.999919428950786e-06, "loss": 0.0755, "step": 25120 }, { "epoch": 0.20333360304231735, "grad_norm": 0.5379131436347961, "learning_rate": 9.999915370595236e-06, "loss": 0.0553, "step": 25130 }, { "epoch": 0.20341451573751923, "grad_norm": 0.6504284143447876, "learning_rate": 9.999911212526928e-06, "loss": 0.0614, "step": 25140 }, { "epoch": 0.2034954284327211, "grad_norm": 0.08265161514282227, "learning_rate": 9.999906954745947e-06, "loss": 0.0747, "step": 25150 }, { "epoch": 0.20357634112792297, "grad_norm": 0.8588355183601379, "learning_rate": 9.999902597252375e-06, "loss": 0.0601, "step": 25160 }, { "epoch": 0.20365725382312486, "grad_norm": 0.7871836423873901, "learning_rate": 9.999898140046303e-06, "loss": 0.0608, "step": 25170 }, { "epoch": 0.2037381665183267, "grad_norm": 0.6303517818450928, "learning_rate": 9.999893583127818e-06, "loss": 0.0591, "step": 25180 }, { "epoch": 0.2038190792135286, "grad_norm": 1.2418625354766846, "learning_rate": 9.99988892649701e-06, "loss": 0.0458, "step": 25190 }, { "epoch": 0.20389999190873048, "grad_norm": 0.49101513624191284, "learning_rate": 9.999884170153973e-06, "loss": 0.0535, "step": 25200 }, { "epoch": 0.20398090460393237, "grad_norm": 0.5032645463943481, "learning_rate": 9.9998793140988e-06, "loss": 0.0608, "step": 25210 }, { "epoch": 0.20406181729913422, "grad_norm": 0.9617996215820312, "learning_rate": 9.99987435833159e-06, "loss": 0.0667, "step": 25220 }, { "epoch": 0.2041427299943361, "grad_norm": 1.6339298486709595, "learning_rate": 9.999869302852442e-06, "loss": 0.0748, "step": 25230 }, { "epoch": 0.204223642689538, "grad_norm": 0.7099546194076538, "learning_rate": 9.999864147661455e-06, "loss": 0.0472, "step": 25240 }, { "epoch": 0.20430455538473988, "grad_norm": 0.7281557321548462, "learning_rate": 9.999858892758734e-06, "loss": 0.0331, "step": 25250 }, { "epoch": 0.20438546807994173, "grad_norm": 0.9772371649742126, "learning_rate": 9.999853538144382e-06, "loss": 0.0591, "step": 25260 }, { "epoch": 0.20446638077514362, "grad_norm": 0.9414851069450378, "learning_rate": 9.999848083818504e-06, "loss": 0.0592, "step": 25270 }, { "epoch": 0.2045472934703455, "grad_norm": 0.2837948501110077, "learning_rate": 9.999842529781214e-06, "loss": 0.0486, "step": 25280 }, { "epoch": 0.20462820616554736, "grad_norm": 0.9200848937034607, "learning_rate": 9.99983687603262e-06, "loss": 0.0381, "step": 25290 }, { "epoch": 0.20470911886074925, "grad_norm": 0.6258994340896606, "learning_rate": 9.999831122572834e-06, "loss": 0.0467, "step": 25300 }, { "epoch": 0.20479003155595113, "grad_norm": 0.577039897441864, "learning_rate": 9.99982526940197e-06, "loss": 0.0546, "step": 25310 }, { "epoch": 0.20487094425115301, "grad_norm": 0.6456189751625061, "learning_rate": 9.999819316520147e-06, "loss": 0.0508, "step": 25320 }, { "epoch": 0.20495185694635487, "grad_norm": 0.4857771098613739, "learning_rate": 9.999813263927483e-06, "loss": 0.0516, "step": 25330 }, { "epoch": 0.20503276964155676, "grad_norm": 0.3948371112346649, "learning_rate": 9.999807111624099e-06, "loss": 0.0535, "step": 25340 }, { "epoch": 0.20511368233675864, "grad_norm": 1.0923864841461182, "learning_rate": 9.999800859610116e-06, "loss": 0.0645, "step": 25350 }, { "epoch": 0.20519459503196052, "grad_norm": 0.36364418268203735, "learning_rate": 9.99979450788566e-06, "loss": 0.0484, "step": 25360 }, { "epoch": 0.20527550772716238, "grad_norm": 0.6606471538543701, "learning_rate": 9.999788056450859e-06, "loss": 0.0662, "step": 25370 }, { "epoch": 0.20535642042236427, "grad_norm": 0.7105591893196106, "learning_rate": 9.99978150530584e-06, "loss": 0.0576, "step": 25380 }, { "epoch": 0.20543733311756615, "grad_norm": 0.5904553532600403, "learning_rate": 9.999774854450734e-06, "loss": 0.0547, "step": 25390 }, { "epoch": 0.20551824581276804, "grad_norm": 0.2876422107219696, "learning_rate": 9.999768103885672e-06, "loss": 0.0507, "step": 25400 }, { "epoch": 0.2055991585079699, "grad_norm": 1.0543009042739868, "learning_rate": 9.99976125361079e-06, "loss": 0.074, "step": 25410 }, { "epoch": 0.20568007120317178, "grad_norm": 1.5326664447784424, "learning_rate": 9.999754303626227e-06, "loss": 0.0766, "step": 25420 }, { "epoch": 0.20576098389837366, "grad_norm": 0.7645102143287659, "learning_rate": 9.999747253932118e-06, "loss": 0.0401, "step": 25430 }, { "epoch": 0.20584189659357552, "grad_norm": 0.6151669025421143, "learning_rate": 9.999740104528605e-06, "loss": 0.0521, "step": 25440 }, { "epoch": 0.2059228092887774, "grad_norm": 0.6811022162437439, "learning_rate": 9.99973285541583e-06, "loss": 0.056, "step": 25450 }, { "epoch": 0.2060037219839793, "grad_norm": 0.5325237512588501, "learning_rate": 9.99972550659394e-06, "loss": 0.049, "step": 25460 }, { "epoch": 0.20608463467918117, "grad_norm": 0.6296750903129578, "learning_rate": 9.999718058063076e-06, "loss": 0.0573, "step": 25470 }, { "epoch": 0.20616554737438303, "grad_norm": 0.7655307650566101, "learning_rate": 9.999710509823394e-06, "loss": 0.0569, "step": 25480 }, { "epoch": 0.2062464600695849, "grad_norm": 0.6165095567703247, "learning_rate": 9.999702861875039e-06, "loss": 0.0386, "step": 25490 }, { "epoch": 0.2063273727647868, "grad_norm": 0.8804382681846619, "learning_rate": 9.999695114218166e-06, "loss": 0.0643, "step": 25500 }, { "epoch": 0.20640828545998868, "grad_norm": 0.48412570357322693, "learning_rate": 9.999687266852929e-06, "loss": 0.0569, "step": 25510 }, { "epoch": 0.20648919815519054, "grad_norm": 0.6289312839508057, "learning_rate": 9.999679319779482e-06, "loss": 0.0381, "step": 25520 }, { "epoch": 0.20657011085039242, "grad_norm": 0.89143967628479, "learning_rate": 9.999671272997988e-06, "loss": 0.0415, "step": 25530 }, { "epoch": 0.2066510235455943, "grad_norm": 0.18625210225582123, "learning_rate": 9.999663126508605e-06, "loss": 0.0545, "step": 25540 }, { "epoch": 0.2067319362407962, "grad_norm": 1.026392936706543, "learning_rate": 9.999654880311495e-06, "loss": 0.0388, "step": 25550 }, { "epoch": 0.20681284893599805, "grad_norm": 1.070264458656311, "learning_rate": 9.999646534406824e-06, "loss": 0.0627, "step": 25560 }, { "epoch": 0.20689376163119994, "grad_norm": 0.9087778925895691, "learning_rate": 9.999638088794756e-06, "loss": 0.0643, "step": 25570 }, { "epoch": 0.20697467432640182, "grad_norm": 0.45006126165390015, "learning_rate": 9.999629543475463e-06, "loss": 0.0693, "step": 25580 }, { "epoch": 0.20705558702160368, "grad_norm": 0.6545336842536926, "learning_rate": 9.999620898449112e-06, "loss": 0.036, "step": 25590 }, { "epoch": 0.20713649971680556, "grad_norm": 1.043925404548645, "learning_rate": 9.999612153715878e-06, "loss": 0.055, "step": 25600 }, { "epoch": 0.20721741241200745, "grad_norm": 0.9716728925704956, "learning_rate": 9.999603309275934e-06, "loss": 0.0479, "step": 25610 }, { "epoch": 0.20729832510720933, "grad_norm": 0.8827943205833435, "learning_rate": 9.999594365129454e-06, "loss": 0.0839, "step": 25620 }, { "epoch": 0.2073792378024112, "grad_norm": 0.4956972301006317, "learning_rate": 9.999585321276623e-06, "loss": 0.0405, "step": 25630 }, { "epoch": 0.20746015049761307, "grad_norm": 1.1195358037948608, "learning_rate": 9.999576177717615e-06, "loss": 0.0637, "step": 25640 }, { "epoch": 0.20754106319281496, "grad_norm": 1.0707077980041504, "learning_rate": 9.999566934452616e-06, "loss": 0.0537, "step": 25650 }, { "epoch": 0.20762197588801684, "grad_norm": 1.3474094867706299, "learning_rate": 9.999557591481808e-06, "loss": 0.0563, "step": 25660 }, { "epoch": 0.2077028885832187, "grad_norm": 1.1899123191833496, "learning_rate": 9.999548148805379e-06, "loss": 0.061, "step": 25670 }, { "epoch": 0.20778380127842058, "grad_norm": 0.5783812999725342, "learning_rate": 9.999538606423515e-06, "loss": 0.0477, "step": 25680 }, { "epoch": 0.20786471397362247, "grad_norm": 0.3340785503387451, "learning_rate": 9.99952896433641e-06, "loss": 0.044, "step": 25690 }, { "epoch": 0.20794562666882435, "grad_norm": 0.8972747325897217, "learning_rate": 9.999519222544254e-06, "loss": 0.0574, "step": 25700 }, { "epoch": 0.2080265393640262, "grad_norm": 0.5747721791267395, "learning_rate": 9.99950938104724e-06, "loss": 0.0428, "step": 25710 }, { "epoch": 0.2081074520592281, "grad_norm": 1.708993911743164, "learning_rate": 9.999499439845568e-06, "loss": 0.061, "step": 25720 }, { "epoch": 0.20818836475442998, "grad_norm": 0.6436125040054321, "learning_rate": 9.999489398939433e-06, "loss": 0.0445, "step": 25730 }, { "epoch": 0.20826927744963183, "grad_norm": 0.9008821249008179, "learning_rate": 9.999479258329037e-06, "loss": 0.0505, "step": 25740 }, { "epoch": 0.20835019014483372, "grad_norm": 0.36786580085754395, "learning_rate": 9.99946901801458e-06, "loss": 0.0361, "step": 25750 }, { "epoch": 0.2084311028400356, "grad_norm": 0.6386369466781616, "learning_rate": 9.99945867799627e-06, "loss": 0.0494, "step": 25760 }, { "epoch": 0.2085120155352375, "grad_norm": 0.7861381769180298, "learning_rate": 9.99944823827431e-06, "loss": 0.0516, "step": 25770 }, { "epoch": 0.20859292823043935, "grad_norm": 0.48369988799095154, "learning_rate": 9.999437698848908e-06, "loss": 0.056, "step": 25780 }, { "epoch": 0.20867384092564123, "grad_norm": 0.5900572538375854, "learning_rate": 9.999427059720277e-06, "loss": 0.0602, "step": 25790 }, { "epoch": 0.20875475362084311, "grad_norm": 1.0053870677947998, "learning_rate": 9.999416320888627e-06, "loss": 0.0414, "step": 25800 }, { "epoch": 0.208835666316045, "grad_norm": 0.9759474396705627, "learning_rate": 9.999405482354172e-06, "loss": 0.0525, "step": 25810 }, { "epoch": 0.20891657901124686, "grad_norm": 0.68910151720047, "learning_rate": 9.99939454411713e-06, "loss": 0.0438, "step": 25820 }, { "epoch": 0.20899749170644874, "grad_norm": 1.6254682540893555, "learning_rate": 9.999383506177717e-06, "loss": 0.0536, "step": 25830 }, { "epoch": 0.20907840440165062, "grad_norm": 0.4434138238430023, "learning_rate": 9.999372368536154e-06, "loss": 0.038, "step": 25840 }, { "epoch": 0.2091593170968525, "grad_norm": 0.5178634524345398, "learning_rate": 9.999361131192664e-06, "loss": 0.0447, "step": 25850 }, { "epoch": 0.20924022979205437, "grad_norm": 0.5044360160827637, "learning_rate": 9.999349794147471e-06, "loss": 0.0275, "step": 25860 }, { "epoch": 0.20932114248725625, "grad_norm": 0.7433123588562012, "learning_rate": 9.9993383574008e-06, "loss": 0.0391, "step": 25870 }, { "epoch": 0.20940205518245814, "grad_norm": 1.0239659547805786, "learning_rate": 9.99932682095288e-06, "loss": 0.0635, "step": 25880 }, { "epoch": 0.20948296787766, "grad_norm": 1.6095774173736572, "learning_rate": 9.999315184803938e-06, "loss": 0.0439, "step": 25890 }, { "epoch": 0.20956388057286188, "grad_norm": 0.8219012022018433, "learning_rate": 9.999303448954212e-06, "loss": 0.0514, "step": 25900 }, { "epoch": 0.20964479326806376, "grad_norm": 0.23236112296581268, "learning_rate": 9.99929161340393e-06, "loss": 0.037, "step": 25910 }, { "epoch": 0.20972570596326565, "grad_norm": 0.6663405895233154, "learning_rate": 9.999279678153331e-06, "loss": 0.0439, "step": 25920 }, { "epoch": 0.2098066186584675, "grad_norm": 0.74033522605896, "learning_rate": 9.999267643202655e-06, "loss": 0.0429, "step": 25930 }, { "epoch": 0.2098875313536694, "grad_norm": 0.8389123678207397, "learning_rate": 9.999255508552139e-06, "loss": 0.0355, "step": 25940 }, { "epoch": 0.20996844404887127, "grad_norm": 0.7361322045326233, "learning_rate": 9.999243274202024e-06, "loss": 0.0476, "step": 25950 }, { "epoch": 0.21004935674407316, "grad_norm": 1.0132511854171753, "learning_rate": 9.999230940152557e-06, "loss": 0.0434, "step": 25960 }, { "epoch": 0.210130269439275, "grad_norm": 2.2140188217163086, "learning_rate": 9.999218506403982e-06, "loss": 0.0475, "step": 25970 }, { "epoch": 0.2102111821344769, "grad_norm": 0.8217719793319702, "learning_rate": 9.99920597295655e-06, "loss": 0.0503, "step": 25980 }, { "epoch": 0.21029209482967878, "grad_norm": 0.639840304851532, "learning_rate": 9.999193339810508e-06, "loss": 0.0473, "step": 25990 }, { "epoch": 0.21037300752488067, "grad_norm": 0.6001439690589905, "learning_rate": 9.999180606966106e-06, "loss": 0.0499, "step": 26000 }, { "epoch": 0.21045392022008252, "grad_norm": 1.2684807777404785, "learning_rate": 9.999167774423603e-06, "loss": 0.0562, "step": 26010 }, { "epoch": 0.2105348329152844, "grad_norm": 0.8215343356132507, "learning_rate": 9.999154842183252e-06, "loss": 0.0489, "step": 26020 }, { "epoch": 0.2106157456104863, "grad_norm": 0.6202082633972168, "learning_rate": 9.999141810245311e-06, "loss": 0.0491, "step": 26030 }, { "epoch": 0.21069665830568815, "grad_norm": 0.6513288021087646, "learning_rate": 9.99912867861004e-06, "loss": 0.0419, "step": 26040 }, { "epoch": 0.21077757100089003, "grad_norm": 0.7184540033340454, "learning_rate": 9.999115447277703e-06, "loss": 0.052, "step": 26050 }, { "epoch": 0.21085848369609192, "grad_norm": 1.0959514379501343, "learning_rate": 9.99910211624856e-06, "loss": 0.0662, "step": 26060 }, { "epoch": 0.2109393963912938, "grad_norm": 0.5166334509849548, "learning_rate": 9.99908868552288e-06, "loss": 0.0367, "step": 26070 }, { "epoch": 0.21102030908649566, "grad_norm": 0.6792179942131042, "learning_rate": 9.999075155100929e-06, "loss": 0.0587, "step": 26080 }, { "epoch": 0.21110122178169755, "grad_norm": 0.5148615837097168, "learning_rate": 9.999061524982976e-06, "loss": 0.0376, "step": 26090 }, { "epoch": 0.21118213447689943, "grad_norm": 0.4952963590621948, "learning_rate": 9.999047795169297e-06, "loss": 0.0464, "step": 26100 }, { "epoch": 0.21126304717210131, "grad_norm": 0.5761737823486328, "learning_rate": 9.999033965660161e-06, "loss": 0.0568, "step": 26110 }, { "epoch": 0.21134395986730317, "grad_norm": 0.41957080364227295, "learning_rate": 9.999020036455847e-06, "loss": 0.0607, "step": 26120 }, { "epoch": 0.21142487256250506, "grad_norm": 0.3252444565296173, "learning_rate": 9.999006007556633e-06, "loss": 0.0486, "step": 26130 }, { "epoch": 0.21150578525770694, "grad_norm": 0.5502849221229553, "learning_rate": 9.998991878962797e-06, "loss": 0.0418, "step": 26140 }, { "epoch": 0.2115866979529088, "grad_norm": 0.594590425491333, "learning_rate": 9.998977650674619e-06, "loss": 0.0553, "step": 26150 }, { "epoch": 0.21166761064811068, "grad_norm": 0.5165215730667114, "learning_rate": 9.998963322692388e-06, "loss": 0.0511, "step": 26160 }, { "epoch": 0.21174852334331257, "grad_norm": 0.5935002565383911, "learning_rate": 9.998948895016384e-06, "loss": 0.0577, "step": 26170 }, { "epoch": 0.21182943603851445, "grad_norm": 0.6701154708862305, "learning_rate": 9.9989343676469e-06, "loss": 0.0547, "step": 26180 }, { "epoch": 0.2119103487337163, "grad_norm": 0.7835132479667664, "learning_rate": 9.998919740584223e-06, "loss": 0.0469, "step": 26190 }, { "epoch": 0.2119912614289182, "grad_norm": 0.799875795841217, "learning_rate": 9.998905013828643e-06, "loss": 0.0377, "step": 26200 }, { "epoch": 0.21207217412412008, "grad_norm": 0.98179030418396, "learning_rate": 9.998890187380459e-06, "loss": 0.0517, "step": 26210 }, { "epoch": 0.21215308681932196, "grad_norm": 0.6637845635414124, "learning_rate": 9.99887526123996e-06, "loss": 0.0435, "step": 26220 }, { "epoch": 0.21223399951452382, "grad_norm": 1.3379932641983032, "learning_rate": 9.998860235407448e-06, "loss": 0.0556, "step": 26230 }, { "epoch": 0.2123149122097257, "grad_norm": 1.3901691436767578, "learning_rate": 9.998845109883222e-06, "loss": 0.0457, "step": 26240 }, { "epoch": 0.2123958249049276, "grad_norm": 0.43164917826652527, "learning_rate": 9.998829884667584e-06, "loss": 0.0495, "step": 26250 }, { "epoch": 0.21247673760012947, "grad_norm": 0.5026207566261292, "learning_rate": 9.998814559760837e-06, "loss": 0.0568, "step": 26260 }, { "epoch": 0.21255765029533133, "grad_norm": 0.8418431282043457, "learning_rate": 9.998799135163286e-06, "loss": 0.0616, "step": 26270 }, { "epoch": 0.21263856299053321, "grad_norm": 0.7260940670967102, "learning_rate": 9.998783610875239e-06, "loss": 0.0645, "step": 26280 }, { "epoch": 0.2127194756857351, "grad_norm": 0.6980267763137817, "learning_rate": 9.998767986897006e-06, "loss": 0.0517, "step": 26290 }, { "epoch": 0.21280038838093696, "grad_norm": 0.7994034886360168, "learning_rate": 9.998752263228899e-06, "loss": 0.0446, "step": 26300 }, { "epoch": 0.21288130107613884, "grad_norm": 0.5693676471710205, "learning_rate": 9.998736439871228e-06, "loss": 0.0589, "step": 26310 }, { "epoch": 0.21296221377134072, "grad_norm": 1.279175877571106, "learning_rate": 9.998720516824315e-06, "loss": 0.0413, "step": 26320 }, { "epoch": 0.2130431264665426, "grad_norm": 1.1649245023727417, "learning_rate": 9.998704494088473e-06, "loss": 0.0408, "step": 26330 }, { "epoch": 0.21312403916174447, "grad_norm": 0.831240177154541, "learning_rate": 9.998688371664022e-06, "loss": 0.0445, "step": 26340 }, { "epoch": 0.21320495185694635, "grad_norm": 0.7721487879753113, "learning_rate": 9.998672149551285e-06, "loss": 0.0587, "step": 26350 }, { "epoch": 0.21328586455214824, "grad_norm": 0.7227905988693237, "learning_rate": 9.998655827750583e-06, "loss": 0.0526, "step": 26360 }, { "epoch": 0.21336677724735012, "grad_norm": 0.7150794863700867, "learning_rate": 9.998639406262244e-06, "loss": 0.0402, "step": 26370 }, { "epoch": 0.21344768994255198, "grad_norm": 0.48261335492134094, "learning_rate": 9.998622885086595e-06, "loss": 0.0509, "step": 26380 }, { "epoch": 0.21352860263775386, "grad_norm": 0.7623143196105957, "learning_rate": 9.998606264223965e-06, "loss": 0.0461, "step": 26390 }, { "epoch": 0.21360951533295575, "grad_norm": 0.9424956440925598, "learning_rate": 9.998589543674686e-06, "loss": 0.0555, "step": 26400 }, { "epoch": 0.21369042802815763, "grad_norm": 1.0741853713989258, "learning_rate": 9.998572723439091e-06, "loss": 0.0634, "step": 26410 }, { "epoch": 0.2137713407233595, "grad_norm": 1.1817822456359863, "learning_rate": 9.998555803517515e-06, "loss": 0.0366, "step": 26420 }, { "epoch": 0.21385225341856137, "grad_norm": 0.43373459577560425, "learning_rate": 9.998538783910297e-06, "loss": 0.0458, "step": 26430 }, { "epoch": 0.21393316611376326, "grad_norm": 0.5797932744026184, "learning_rate": 9.998521664617773e-06, "loss": 0.052, "step": 26440 }, { "epoch": 0.2140140788089651, "grad_norm": 0.3569229543209076, "learning_rate": 9.998504445640288e-06, "loss": 0.048, "step": 26450 }, { "epoch": 0.214094991504167, "grad_norm": 0.5730752348899841, "learning_rate": 9.998487126978184e-06, "loss": 0.0414, "step": 26460 }, { "epoch": 0.21417590419936888, "grad_norm": 0.8730857372283936, "learning_rate": 9.998469708631807e-06, "loss": 0.0513, "step": 26470 }, { "epoch": 0.21425681689457077, "grad_norm": 0.9140138626098633, "learning_rate": 9.998452190601505e-06, "loss": 0.052, "step": 26480 }, { "epoch": 0.21433772958977262, "grad_norm": 0.9994937777519226, "learning_rate": 9.998434572887624e-06, "loss": 0.0822, "step": 26490 }, { "epoch": 0.2144186422849745, "grad_norm": 0.3803093731403351, "learning_rate": 9.998416855490518e-06, "loss": 0.0615, "step": 26500 }, { "epoch": 0.2144995549801764, "grad_norm": 0.8335522413253784, "learning_rate": 9.998399038410542e-06, "loss": 0.0447, "step": 26510 }, { "epoch": 0.21458046767537828, "grad_norm": 1.0172957181930542, "learning_rate": 9.998381121648047e-06, "loss": 0.0577, "step": 26520 }, { "epoch": 0.21466138037058013, "grad_norm": 0.23959724605083466, "learning_rate": 9.998363105203392e-06, "loss": 0.0353, "step": 26530 }, { "epoch": 0.21474229306578202, "grad_norm": 0.6620857119560242, "learning_rate": 9.99834498907694e-06, "loss": 0.0539, "step": 26540 }, { "epoch": 0.2148232057609839, "grad_norm": 0.8353042006492615, "learning_rate": 9.998326773269047e-06, "loss": 0.0489, "step": 26550 }, { "epoch": 0.2149041184561858, "grad_norm": 0.5368790030479431, "learning_rate": 9.998308457780079e-06, "loss": 0.0415, "step": 26560 }, { "epoch": 0.21498503115138765, "grad_norm": 0.5887132883071899, "learning_rate": 9.9982900426104e-06, "loss": 0.0383, "step": 26570 }, { "epoch": 0.21506594384658953, "grad_norm": 0.6154734492301941, "learning_rate": 9.998271527760379e-06, "loss": 0.0656, "step": 26580 }, { "epoch": 0.21514685654179141, "grad_norm": 1.0740667581558228, "learning_rate": 9.998252913230384e-06, "loss": 0.0471, "step": 26590 }, { "epoch": 0.21522776923699327, "grad_norm": 0.6191862225532532, "learning_rate": 9.998234199020786e-06, "loss": 0.036, "step": 26600 }, { "epoch": 0.21530868193219516, "grad_norm": 0.9419409036636353, "learning_rate": 9.998215385131959e-06, "loss": 0.0732, "step": 26610 }, { "epoch": 0.21538959462739704, "grad_norm": 0.3241831362247467, "learning_rate": 9.998196471564276e-06, "loss": 0.0447, "step": 26620 }, { "epoch": 0.21547050732259893, "grad_norm": 0.6101221442222595, "learning_rate": 9.998177458318119e-06, "loss": 0.0499, "step": 26630 }, { "epoch": 0.21555142001780078, "grad_norm": 0.6509813666343689, "learning_rate": 9.998158345393863e-06, "loss": 0.0585, "step": 26640 }, { "epoch": 0.21563233271300267, "grad_norm": 1.046401023864746, "learning_rate": 9.99813913279189e-06, "loss": 0.044, "step": 26650 }, { "epoch": 0.21571324540820455, "grad_norm": 0.4748647212982178, "learning_rate": 9.998119820512583e-06, "loss": 0.0375, "step": 26660 }, { "epoch": 0.21579415810340644, "grad_norm": 1.0569652318954468, "learning_rate": 9.998100408556329e-06, "loss": 0.0503, "step": 26670 }, { "epoch": 0.2158750707986083, "grad_norm": 1.1234537363052368, "learning_rate": 9.998080896923513e-06, "loss": 0.0555, "step": 26680 }, { "epoch": 0.21595598349381018, "grad_norm": 0.4191996455192566, "learning_rate": 9.998061285614526e-06, "loss": 0.0398, "step": 26690 }, { "epoch": 0.21603689618901206, "grad_norm": 0.8851500153541565, "learning_rate": 9.998041574629757e-06, "loss": 0.0499, "step": 26700 }, { "epoch": 0.21611780888421395, "grad_norm": 0.5559619665145874, "learning_rate": 9.9980217639696e-06, "loss": 0.0513, "step": 26710 }, { "epoch": 0.2161987215794158, "grad_norm": 0.6570428013801575, "learning_rate": 9.998001853634452e-06, "loss": 0.0547, "step": 26720 }, { "epoch": 0.2162796342746177, "grad_norm": 0.6064631342887878, "learning_rate": 9.997981843624706e-06, "loss": 0.0488, "step": 26730 }, { "epoch": 0.21636054696981957, "grad_norm": 0.4098280668258667, "learning_rate": 9.997961733940764e-06, "loss": 0.0455, "step": 26740 }, { "epoch": 0.21644145966502143, "grad_norm": 0.7134605050086975, "learning_rate": 9.997941524583025e-06, "loss": 0.0538, "step": 26750 }, { "epoch": 0.21652237236022331, "grad_norm": 0.6312874555587769, "learning_rate": 9.997921215551896e-06, "loss": 0.0611, "step": 26760 }, { "epoch": 0.2166032850554252, "grad_norm": 0.3727577328681946, "learning_rate": 9.997900806847778e-06, "loss": 0.047, "step": 26770 }, { "epoch": 0.21668419775062708, "grad_norm": 1.011581540107727, "learning_rate": 9.997880298471082e-06, "loss": 0.0506, "step": 26780 }, { "epoch": 0.21676511044582894, "grad_norm": 0.4141201078891754, "learning_rate": 9.997859690422211e-06, "loss": 0.0386, "step": 26790 }, { "epoch": 0.21684602314103082, "grad_norm": 1.2603535652160645, "learning_rate": 9.997838982701579e-06, "loss": 0.0515, "step": 26800 }, { "epoch": 0.2169269358362327, "grad_norm": 0.7518587708473206, "learning_rate": 9.997818175309603e-06, "loss": 0.0743, "step": 26810 }, { "epoch": 0.2170078485314346, "grad_norm": 0.725255012512207, "learning_rate": 9.997797268246692e-06, "loss": 0.046, "step": 26820 }, { "epoch": 0.21708876122663645, "grad_norm": 0.9290253520011902, "learning_rate": 9.997776261513266e-06, "loss": 0.061, "step": 26830 }, { "epoch": 0.21716967392183834, "grad_norm": 0.6483675241470337, "learning_rate": 9.997755155109744e-06, "loss": 0.0419, "step": 26840 }, { "epoch": 0.21725058661704022, "grad_norm": 1.0857396125793457, "learning_rate": 9.997733949036544e-06, "loss": 0.0636, "step": 26850 }, { "epoch": 0.2173314993122421, "grad_norm": 0.7428203821182251, "learning_rate": 9.997712643294093e-06, "loss": 0.05, "step": 26860 }, { "epoch": 0.21741241200744396, "grad_norm": 0.5624147057533264, "learning_rate": 9.997691237882811e-06, "loss": 0.0427, "step": 26870 }, { "epoch": 0.21749332470264585, "grad_norm": 0.7985323667526245, "learning_rate": 9.997669732803132e-06, "loss": 0.0615, "step": 26880 }, { "epoch": 0.21757423739784773, "grad_norm": 1.0263458490371704, "learning_rate": 9.997648128055478e-06, "loss": 0.0707, "step": 26890 }, { "epoch": 0.2176551500930496, "grad_norm": 0.4188177287578583, "learning_rate": 9.997626423640283e-06, "loss": 0.0401, "step": 26900 }, { "epoch": 0.21773606278825147, "grad_norm": 0.6039109230041504, "learning_rate": 9.997604619557982e-06, "loss": 0.0555, "step": 26910 }, { "epoch": 0.21781697548345336, "grad_norm": 0.7974914312362671, "learning_rate": 9.997582715809004e-06, "loss": 0.0659, "step": 26920 }, { "epoch": 0.21789788817865524, "grad_norm": 0.5804322361946106, "learning_rate": 9.99756071239379e-06, "loss": 0.06, "step": 26930 }, { "epoch": 0.2179788008738571, "grad_norm": 0.3328329622745514, "learning_rate": 9.997538609312777e-06, "loss": 0.0379, "step": 26940 }, { "epoch": 0.21805971356905898, "grad_norm": 0.6319897174835205, "learning_rate": 9.997516406566408e-06, "loss": 0.037, "step": 26950 }, { "epoch": 0.21814062626426087, "grad_norm": 1.2366671562194824, "learning_rate": 9.997494104155126e-06, "loss": 0.0606, "step": 26960 }, { "epoch": 0.21822153895946275, "grad_norm": 0.4905921518802643, "learning_rate": 9.997471702079372e-06, "loss": 0.0515, "step": 26970 }, { "epoch": 0.2183024516546646, "grad_norm": 0.9877200126647949, "learning_rate": 9.997449200339595e-06, "loss": 0.0589, "step": 26980 }, { "epoch": 0.2183833643498665, "grad_norm": 0.9855848550796509, "learning_rate": 9.997426598936244e-06, "loss": 0.0487, "step": 26990 }, { "epoch": 0.21846427704506838, "grad_norm": 0.5182004570960999, "learning_rate": 9.997403897869771e-06, "loss": 0.0399, "step": 27000 }, { "epoch": 0.21854518974027023, "grad_norm": 0.8655921816825867, "learning_rate": 9.997381097140627e-06, "loss": 0.0604, "step": 27010 }, { "epoch": 0.21862610243547212, "grad_norm": 0.7703977227210999, "learning_rate": 9.997358196749266e-06, "loss": 0.0553, "step": 27020 }, { "epoch": 0.218707015130674, "grad_norm": 0.7017664313316345, "learning_rate": 9.997335196696145e-06, "loss": 0.0375, "step": 27030 }, { "epoch": 0.2187879278258759, "grad_norm": 0.43634921312332153, "learning_rate": 9.997312096981725e-06, "loss": 0.0502, "step": 27040 }, { "epoch": 0.21886884052107775, "grad_norm": 0.6185643076896667, "learning_rate": 9.997288897606463e-06, "loss": 0.0514, "step": 27050 }, { "epoch": 0.21894975321627963, "grad_norm": 0.47016969323158264, "learning_rate": 9.997265598570824e-06, "loss": 0.0494, "step": 27060 }, { "epoch": 0.21903066591148151, "grad_norm": 0.968341588973999, "learning_rate": 9.997242199875274e-06, "loss": 0.0482, "step": 27070 }, { "epoch": 0.2191115786066834, "grad_norm": 1.2599934339523315, "learning_rate": 9.997218701520277e-06, "loss": 0.084, "step": 27080 }, { "epoch": 0.21919249130188526, "grad_norm": 1.082319736480713, "learning_rate": 9.997195103506302e-06, "loss": 0.0595, "step": 27090 }, { "epoch": 0.21927340399708714, "grad_norm": 0.8104313015937805, "learning_rate": 9.997171405833821e-06, "loss": 0.0438, "step": 27100 }, { "epoch": 0.21935431669228903, "grad_norm": 0.5455868244171143, "learning_rate": 9.997147608503306e-06, "loss": 0.0556, "step": 27110 }, { "epoch": 0.2194352293874909, "grad_norm": 0.5376573204994202, "learning_rate": 9.99712371151523e-06, "loss": 0.0608, "step": 27120 }, { "epoch": 0.21951614208269277, "grad_norm": 0.9516055583953857, "learning_rate": 9.997099714870073e-06, "loss": 0.0571, "step": 27130 }, { "epoch": 0.21959705477789465, "grad_norm": 0.6156712770462036, "learning_rate": 9.99707561856831e-06, "loss": 0.0657, "step": 27140 }, { "epoch": 0.21967796747309654, "grad_norm": 0.5445385575294495, "learning_rate": 9.997051422610425e-06, "loss": 0.0488, "step": 27150 }, { "epoch": 0.2197588801682984, "grad_norm": 0.7059304118156433, "learning_rate": 9.997027126996896e-06, "loss": 0.0516, "step": 27160 }, { "epoch": 0.21983979286350028, "grad_norm": 0.5531249642372131, "learning_rate": 9.997002731728212e-06, "loss": 0.058, "step": 27170 }, { "epoch": 0.21992070555870216, "grad_norm": 0.6156094074249268, "learning_rate": 9.996978236804858e-06, "loss": 0.0257, "step": 27180 }, { "epoch": 0.22000161825390405, "grad_norm": 0.8562238812446594, "learning_rate": 9.99695364222732e-06, "loss": 0.0633, "step": 27190 }, { "epoch": 0.2200825309491059, "grad_norm": 0.6628778576850891, "learning_rate": 9.996928947996092e-06, "loss": 0.0345, "step": 27200 }, { "epoch": 0.2201634436443078, "grad_norm": 0.654209315776825, "learning_rate": 9.996904154111666e-06, "loss": 0.0393, "step": 27210 }, { "epoch": 0.22024435633950967, "grad_norm": 0.21148242056369781, "learning_rate": 9.996879260574535e-06, "loss": 0.0572, "step": 27220 }, { "epoch": 0.22032526903471156, "grad_norm": 0.8532141447067261, "learning_rate": 9.996854267385196e-06, "loss": 0.0474, "step": 27230 }, { "epoch": 0.22040618172991341, "grad_norm": 0.9057795405387878, "learning_rate": 9.996829174544147e-06, "loss": 0.0533, "step": 27240 }, { "epoch": 0.2204870944251153, "grad_norm": 1.4191248416900635, "learning_rate": 9.996803982051888e-06, "loss": 0.0496, "step": 27250 }, { "epoch": 0.22056800712031718, "grad_norm": 0.6909671425819397, "learning_rate": 9.996778689908924e-06, "loss": 0.0418, "step": 27260 }, { "epoch": 0.22064891981551907, "grad_norm": 0.48403483629226685, "learning_rate": 9.996753298115757e-06, "loss": 0.051, "step": 27270 }, { "epoch": 0.22072983251072092, "grad_norm": 1.1735509634017944, "learning_rate": 9.996727806672894e-06, "loss": 0.0524, "step": 27280 }, { "epoch": 0.2208107452059228, "grad_norm": 0.4806864559650421, "learning_rate": 9.996702215580843e-06, "loss": 0.0635, "step": 27290 }, { "epoch": 0.2208916579011247, "grad_norm": 0.8466700315475464, "learning_rate": 9.996676524840115e-06, "loss": 0.0494, "step": 27300 }, { "epoch": 0.22097257059632655, "grad_norm": 0.7730889916419983, "learning_rate": 9.996650734451224e-06, "loss": 0.045, "step": 27310 }, { "epoch": 0.22105348329152844, "grad_norm": 0.43538838624954224, "learning_rate": 9.99662484441468e-06, "loss": 0.0436, "step": 27320 }, { "epoch": 0.22113439598673032, "grad_norm": 0.7646813988685608, "learning_rate": 9.996598854731003e-06, "loss": 0.0591, "step": 27330 }, { "epoch": 0.2212153086819322, "grad_norm": 0.3776231110095978, "learning_rate": 9.99657276540071e-06, "loss": 0.0486, "step": 27340 }, { "epoch": 0.22129622137713406, "grad_norm": 0.9434586763381958, "learning_rate": 9.996546576424322e-06, "loss": 0.0601, "step": 27350 }, { "epoch": 0.22137713407233595, "grad_norm": 0.7906712293624878, "learning_rate": 9.996520287802359e-06, "loss": 0.0562, "step": 27360 }, { "epoch": 0.22145804676753783, "grad_norm": 0.5759912729263306, "learning_rate": 9.996493899535348e-06, "loss": 0.0712, "step": 27370 }, { "epoch": 0.22153895946273972, "grad_norm": 0.6924657225608826, "learning_rate": 9.996467411623814e-06, "loss": 0.051, "step": 27380 }, { "epoch": 0.22161987215794157, "grad_norm": 0.6775951981544495, "learning_rate": 9.996440824068286e-06, "loss": 0.0466, "step": 27390 }, { "epoch": 0.22170078485314346, "grad_norm": 0.6378546953201294, "learning_rate": 9.996414136869294e-06, "loss": 0.0446, "step": 27400 }, { "epoch": 0.22178169754834534, "grad_norm": 0.6318911910057068, "learning_rate": 9.99638735002737e-06, "loss": 0.0406, "step": 27410 }, { "epoch": 0.22186261024354723, "grad_norm": 0.5469042658805847, "learning_rate": 9.996360463543046e-06, "loss": 0.0596, "step": 27420 }, { "epoch": 0.22194352293874908, "grad_norm": 1.4511069059371948, "learning_rate": 9.996333477416863e-06, "loss": 0.0693, "step": 27430 }, { "epoch": 0.22202443563395097, "grad_norm": 1.0000547170639038, "learning_rate": 9.996306391649355e-06, "loss": 0.0346, "step": 27440 }, { "epoch": 0.22210534832915285, "grad_norm": 0.4429369270801544, "learning_rate": 9.996279206241064e-06, "loss": 0.0444, "step": 27450 }, { "epoch": 0.2221862610243547, "grad_norm": 0.47846680879592896, "learning_rate": 9.996251921192532e-06, "loss": 0.0412, "step": 27460 }, { "epoch": 0.2222671737195566, "grad_norm": 0.38557112216949463, "learning_rate": 9.996224536504304e-06, "loss": 0.0444, "step": 27470 }, { "epoch": 0.22234808641475848, "grad_norm": 0.41974177956581116, "learning_rate": 9.996197052176924e-06, "loss": 0.042, "step": 27480 }, { "epoch": 0.22242899910996036, "grad_norm": 0.7625684142112732, "learning_rate": 9.99616946821094e-06, "loss": 0.038, "step": 27490 }, { "epoch": 0.22250991180516222, "grad_norm": 0.282598614692688, "learning_rate": 9.996141784606905e-06, "loss": 0.0335, "step": 27500 }, { "epoch": 0.2225908245003641, "grad_norm": 0.5180754065513611, "learning_rate": 9.99611400136537e-06, "loss": 0.0632, "step": 27510 }, { "epoch": 0.222671737195566, "grad_norm": 0.9037352800369263, "learning_rate": 9.996086118486888e-06, "loss": 0.0503, "step": 27520 }, { "epoch": 0.22275264989076787, "grad_norm": 1.0166574716567993, "learning_rate": 9.996058135972016e-06, "loss": 0.0395, "step": 27530 }, { "epoch": 0.22283356258596973, "grad_norm": 0.3420000970363617, "learning_rate": 9.99603005382131e-06, "loss": 0.0421, "step": 27540 }, { "epoch": 0.22291447528117161, "grad_norm": 1.1410858631134033, "learning_rate": 9.996001872035332e-06, "loss": 0.0558, "step": 27550 }, { "epoch": 0.2229953879763735, "grad_norm": 0.7428043484687805, "learning_rate": 9.995973590614644e-06, "loss": 0.05, "step": 27560 }, { "epoch": 0.22307630067157538, "grad_norm": 0.5484246015548706, "learning_rate": 9.99594520955981e-06, "loss": 0.0505, "step": 27570 }, { "epoch": 0.22315721336677724, "grad_norm": 0.9603343605995178, "learning_rate": 9.995916728871396e-06, "loss": 0.0412, "step": 27580 }, { "epoch": 0.22323812606197913, "grad_norm": 0.8353529572486877, "learning_rate": 9.995888148549968e-06, "loss": 0.0597, "step": 27590 }, { "epoch": 0.223319038757181, "grad_norm": 0.6352590918540955, "learning_rate": 9.9958594685961e-06, "loss": 0.0566, "step": 27600 }, { "epoch": 0.22339995145238287, "grad_norm": 0.9845993518829346, "learning_rate": 9.995830689010359e-06, "loss": 0.0452, "step": 27610 }, { "epoch": 0.22348086414758475, "grad_norm": 0.3560985326766968, "learning_rate": 9.995801809793324e-06, "loss": 0.052, "step": 27620 }, { "epoch": 0.22356177684278664, "grad_norm": 0.5301969647407532, "learning_rate": 9.995772830945567e-06, "loss": 0.0576, "step": 27630 }, { "epoch": 0.22364268953798852, "grad_norm": 0.7116531729698181, "learning_rate": 9.995743752467669e-06, "loss": 0.0605, "step": 27640 }, { "epoch": 0.22372360223319038, "grad_norm": 0.5091031193733215, "learning_rate": 9.995714574360207e-06, "loss": 0.0354, "step": 27650 }, { "epoch": 0.22380451492839226, "grad_norm": 0.49530693888664246, "learning_rate": 9.995685296623764e-06, "loss": 0.0292, "step": 27660 }, { "epoch": 0.22388542762359415, "grad_norm": 0.5939663052558899, "learning_rate": 9.995655919258927e-06, "loss": 0.0382, "step": 27670 }, { "epoch": 0.22396634031879603, "grad_norm": 0.9181421399116516, "learning_rate": 9.995626442266275e-06, "loss": 0.0463, "step": 27680 }, { "epoch": 0.2240472530139979, "grad_norm": 0.9526675939559937, "learning_rate": 9.995596865646402e-06, "loss": 0.0593, "step": 27690 }, { "epoch": 0.22412816570919977, "grad_norm": 0.8459448218345642, "learning_rate": 9.995567189399895e-06, "loss": 0.0484, "step": 27700 }, { "epoch": 0.22420907840440166, "grad_norm": 0.4767102003097534, "learning_rate": 9.995537413527347e-06, "loss": 0.0473, "step": 27710 }, { "epoch": 0.22428999109960351, "grad_norm": 0.8866463899612427, "learning_rate": 9.995507538029352e-06, "loss": 0.0431, "step": 27720 }, { "epoch": 0.2243709037948054, "grad_norm": 0.583391010761261, "learning_rate": 9.995477562906504e-06, "loss": 0.0736, "step": 27730 }, { "epoch": 0.22445181649000728, "grad_norm": 0.9187588691711426, "learning_rate": 9.995447488159402e-06, "loss": 0.0474, "step": 27740 }, { "epoch": 0.22453272918520917, "grad_norm": 0.34709712862968445, "learning_rate": 9.995417313788645e-06, "loss": 0.0481, "step": 27750 }, { "epoch": 0.22461364188041102, "grad_norm": 0.387755811214447, "learning_rate": 9.995387039794836e-06, "loss": 0.0508, "step": 27760 }, { "epoch": 0.2246945545756129, "grad_norm": 0.9814860224723816, "learning_rate": 9.99535666617858e-06, "loss": 0.0434, "step": 27770 }, { "epoch": 0.2247754672708148, "grad_norm": 0.239751935005188, "learning_rate": 9.99532619294048e-06, "loss": 0.0495, "step": 27780 }, { "epoch": 0.22485637996601668, "grad_norm": 0.5519996285438538, "learning_rate": 9.995295620081144e-06, "loss": 0.0614, "step": 27790 }, { "epoch": 0.22493729266121854, "grad_norm": 0.6167026162147522, "learning_rate": 9.995264947601181e-06, "loss": 0.0471, "step": 27800 }, { "epoch": 0.22501820535642042, "grad_norm": 0.33031976222991943, "learning_rate": 9.995234175501207e-06, "loss": 0.0476, "step": 27810 }, { "epoch": 0.2250991180516223, "grad_norm": 0.5233999490737915, "learning_rate": 9.995203303781831e-06, "loss": 0.05, "step": 27820 }, { "epoch": 0.2251800307468242, "grad_norm": 0.7326544523239136, "learning_rate": 9.995172332443671e-06, "loss": 0.0694, "step": 27830 }, { "epoch": 0.22526094344202605, "grad_norm": 0.17420358955860138, "learning_rate": 9.995141261487344e-06, "loss": 0.0392, "step": 27840 }, { "epoch": 0.22534185613722793, "grad_norm": 0.8860479593276978, "learning_rate": 9.99511009091347e-06, "loss": 0.0389, "step": 27850 }, { "epoch": 0.22542276883242982, "grad_norm": 1.1738330125808716, "learning_rate": 9.99507882072267e-06, "loss": 0.0552, "step": 27860 }, { "epoch": 0.22550368152763167, "grad_norm": 0.5301632881164551, "learning_rate": 9.995047450915569e-06, "loss": 0.0544, "step": 27870 }, { "epoch": 0.22558459422283356, "grad_norm": 0.397916704416275, "learning_rate": 9.995015981492791e-06, "loss": 0.0387, "step": 27880 }, { "epoch": 0.22566550691803544, "grad_norm": 1.121252179145813, "learning_rate": 9.994984412454963e-06, "loss": 0.0668, "step": 27890 }, { "epoch": 0.22574641961323733, "grad_norm": 0.5270466804504395, "learning_rate": 9.994952743802716e-06, "loss": 0.0404, "step": 27900 }, { "epoch": 0.22582733230843918, "grad_norm": 0.5579569339752197, "learning_rate": 9.994920975536684e-06, "loss": 0.072, "step": 27910 }, { "epoch": 0.22590824500364107, "grad_norm": 0.8079070448875427, "learning_rate": 9.994889107657494e-06, "loss": 0.0363, "step": 27920 }, { "epoch": 0.22598915769884295, "grad_norm": 0.9115596413612366, "learning_rate": 9.994857140165788e-06, "loss": 0.0386, "step": 27930 }, { "epoch": 0.22607007039404484, "grad_norm": 0.7241853475570679, "learning_rate": 9.9948250730622e-06, "loss": 0.042, "step": 27940 }, { "epoch": 0.2261509830892467, "grad_norm": 0.8778935670852661, "learning_rate": 9.99479290634737e-06, "loss": 0.0545, "step": 27950 }, { "epoch": 0.22623189578444858, "grad_norm": 0.5119723677635193, "learning_rate": 9.994760640021944e-06, "loss": 0.0653, "step": 27960 }, { "epoch": 0.22631280847965046, "grad_norm": 0.7831239700317383, "learning_rate": 9.994728274086556e-06, "loss": 0.0558, "step": 27970 }, { "epoch": 0.22639372117485235, "grad_norm": 0.7967007160186768, "learning_rate": 9.99469580854186e-06, "loss": 0.0548, "step": 27980 }, { "epoch": 0.2264746338700542, "grad_norm": 0.6019611954689026, "learning_rate": 9.9946632433885e-06, "loss": 0.0332, "step": 27990 }, { "epoch": 0.2265555465652561, "grad_norm": 0.3791316747665405, "learning_rate": 9.994630578627125e-06, "loss": 0.0491, "step": 28000 }, { "epoch": 0.22663645926045797, "grad_norm": 0.6453375816345215, "learning_rate": 9.994597814258387e-06, "loss": 0.0305, "step": 28010 }, { "epoch": 0.22671737195565983, "grad_norm": 0.35548385977745056, "learning_rate": 9.99456495028294e-06, "loss": 0.0367, "step": 28020 }, { "epoch": 0.22679828465086171, "grad_norm": 0.8573088049888611, "learning_rate": 9.994531986701439e-06, "loss": 0.0679, "step": 28030 }, { "epoch": 0.2268791973460636, "grad_norm": 0.7918416261672974, "learning_rate": 9.994498923514542e-06, "loss": 0.0444, "step": 28040 }, { "epoch": 0.22696011004126548, "grad_norm": 0.45214805006980896, "learning_rate": 9.994465760722907e-06, "loss": 0.0341, "step": 28050 }, { "epoch": 0.22704102273646734, "grad_norm": 0.7756332159042358, "learning_rate": 9.994432498327197e-06, "loss": 0.0507, "step": 28060 }, { "epoch": 0.22712193543166923, "grad_norm": 0.5825783610343933, "learning_rate": 9.994399136328075e-06, "loss": 0.0475, "step": 28070 }, { "epoch": 0.2272028481268711, "grad_norm": 0.6582468748092651, "learning_rate": 9.994365674726205e-06, "loss": 0.09, "step": 28080 }, { "epoch": 0.227283760822073, "grad_norm": 0.7925385236740112, "learning_rate": 9.994332113522255e-06, "loss": 0.0509, "step": 28090 }, { "epoch": 0.22736467351727485, "grad_norm": 1.571832537651062, "learning_rate": 9.994298452716896e-06, "loss": 0.0425, "step": 28100 }, { "epoch": 0.22744558621247674, "grad_norm": 0.734066903591156, "learning_rate": 9.994264692310795e-06, "loss": 0.0292, "step": 28110 }, { "epoch": 0.22752649890767862, "grad_norm": 0.18875375390052795, "learning_rate": 9.99423083230463e-06, "loss": 0.0322, "step": 28120 }, { "epoch": 0.2276074116028805, "grad_norm": 0.7814585566520691, "learning_rate": 9.994196872699075e-06, "loss": 0.0561, "step": 28130 }, { "epoch": 0.22768832429808236, "grad_norm": 0.5172538161277771, "learning_rate": 9.994162813494806e-06, "loss": 0.0411, "step": 28140 }, { "epoch": 0.22776923699328425, "grad_norm": 0.9246927499771118, "learning_rate": 9.994128654692502e-06, "loss": 0.0599, "step": 28150 }, { "epoch": 0.22785014968848613, "grad_norm": 0.7097482681274414, "learning_rate": 9.994094396292845e-06, "loss": 0.0406, "step": 28160 }, { "epoch": 0.227931062383688, "grad_norm": 0.7901378273963928, "learning_rate": 9.994060038296518e-06, "loss": 0.0373, "step": 28170 }, { "epoch": 0.22801197507888987, "grad_norm": 0.8230190277099609, "learning_rate": 9.994025580704208e-06, "loss": 0.053, "step": 28180 }, { "epoch": 0.22809288777409176, "grad_norm": 1.410305142402649, "learning_rate": 9.9939910235166e-06, "loss": 0.0364, "step": 28190 }, { "epoch": 0.22817380046929364, "grad_norm": 0.274354487657547, "learning_rate": 9.993956366734382e-06, "loss": 0.0713, "step": 28200 }, { "epoch": 0.2282547131644955, "grad_norm": 0.6950992941856384, "learning_rate": 9.99392161035825e-06, "loss": 0.0436, "step": 28210 }, { "epoch": 0.22833562585969738, "grad_norm": 0.5924570560455322, "learning_rate": 9.993886754388892e-06, "loss": 0.0489, "step": 28220 }, { "epoch": 0.22841653855489927, "grad_norm": 0.8746893405914307, "learning_rate": 9.993851798827008e-06, "loss": 0.0387, "step": 28230 }, { "epoch": 0.22849745125010115, "grad_norm": 0.4794062077999115, "learning_rate": 9.99381674367329e-06, "loss": 0.0421, "step": 28240 }, { "epoch": 0.228578363945303, "grad_norm": 0.7886708974838257, "learning_rate": 9.99378158892844e-06, "loss": 0.0595, "step": 28250 }, { "epoch": 0.2286592766405049, "grad_norm": 0.8996415138244629, "learning_rate": 9.993746334593157e-06, "loss": 0.0503, "step": 28260 }, { "epoch": 0.22874018933570678, "grad_norm": 0.6555849313735962, "learning_rate": 9.993710980668147e-06, "loss": 0.0486, "step": 28270 }, { "epoch": 0.22882110203090866, "grad_norm": 0.5430260300636292, "learning_rate": 9.993675527154115e-06, "loss": 0.0617, "step": 28280 }, { "epoch": 0.22890201472611052, "grad_norm": 1.2592158317565918, "learning_rate": 9.993639974051765e-06, "loss": 0.0517, "step": 28290 }, { "epoch": 0.2289829274213124, "grad_norm": 1.0217103958129883, "learning_rate": 9.993604321361808e-06, "loss": 0.0625, "step": 28300 }, { "epoch": 0.2290638401165143, "grad_norm": 0.8071820735931396, "learning_rate": 9.993568569084955e-06, "loss": 0.0447, "step": 28310 }, { "epoch": 0.22914475281171615, "grad_norm": 1.1508278846740723, "learning_rate": 9.993532717221919e-06, "loss": 0.0412, "step": 28320 }, { "epoch": 0.22922566550691803, "grad_norm": 0.8528046607971191, "learning_rate": 9.993496765773413e-06, "loss": 0.0419, "step": 28330 }, { "epoch": 0.22930657820211992, "grad_norm": 1.106095314025879, "learning_rate": 9.993460714740159e-06, "loss": 0.0442, "step": 28340 }, { "epoch": 0.2293874908973218, "grad_norm": 0.9263485074043274, "learning_rate": 9.993424564122871e-06, "loss": 0.048, "step": 28350 }, { "epoch": 0.22946840359252366, "grad_norm": 0.7508741617202759, "learning_rate": 9.993388313922273e-06, "loss": 0.0476, "step": 28360 }, { "epoch": 0.22954931628772554, "grad_norm": 0.5177948474884033, "learning_rate": 9.993351964139083e-06, "loss": 0.0362, "step": 28370 }, { "epoch": 0.22963022898292743, "grad_norm": 0.6386201977729797, "learning_rate": 9.993315514774032e-06, "loss": 0.0549, "step": 28380 }, { "epoch": 0.2297111416781293, "grad_norm": 0.39547494053840637, "learning_rate": 9.993278965827844e-06, "loss": 0.0642, "step": 28390 }, { "epoch": 0.22979205437333117, "grad_norm": 0.34730610251426697, "learning_rate": 9.993242317301249e-06, "loss": 0.0432, "step": 28400 }, { "epoch": 0.22987296706853305, "grad_norm": 0.44788485765457153, "learning_rate": 9.993205569194976e-06, "loss": 0.0608, "step": 28410 }, { "epoch": 0.22995387976373494, "grad_norm": 0.5558481216430664, "learning_rate": 9.993168721509761e-06, "loss": 0.0459, "step": 28420 }, { "epoch": 0.23003479245893682, "grad_norm": 0.8905524611473083, "learning_rate": 9.993131774246334e-06, "loss": 0.0408, "step": 28430 }, { "epoch": 0.23011570515413868, "grad_norm": 0.5183817744255066, "learning_rate": 9.993094727405437e-06, "loss": 0.0359, "step": 28440 }, { "epoch": 0.23019661784934056, "grad_norm": 0.10135272890329361, "learning_rate": 9.993057580987806e-06, "loss": 0.0478, "step": 28450 }, { "epoch": 0.23027753054454245, "grad_norm": 0.9052026867866516, "learning_rate": 9.993020334994182e-06, "loss": 0.0531, "step": 28460 }, { "epoch": 0.2303584432397443, "grad_norm": 0.39198997616767883, "learning_rate": 9.992982989425308e-06, "loss": 0.034, "step": 28470 }, { "epoch": 0.2304393559349462, "grad_norm": 1.2212185859680176, "learning_rate": 9.99294554428193e-06, "loss": 0.0597, "step": 28480 }, { "epoch": 0.23052026863014807, "grad_norm": 0.907056987285614, "learning_rate": 9.992907999564793e-06, "loss": 0.0563, "step": 28490 }, { "epoch": 0.23060118132534996, "grad_norm": 0.49956968426704407, "learning_rate": 9.992870355274646e-06, "loss": 0.0459, "step": 28500 }, { "epoch": 0.23068209402055181, "grad_norm": 0.7187815308570862, "learning_rate": 9.992832611412242e-06, "loss": 0.0548, "step": 28510 }, { "epoch": 0.2307630067157537, "grad_norm": 0.2484293431043625, "learning_rate": 9.99279476797833e-06, "loss": 0.0464, "step": 28520 }, { "epoch": 0.23084391941095558, "grad_norm": 1.325368046760559, "learning_rate": 9.992756824973666e-06, "loss": 0.0524, "step": 28530 }, { "epoch": 0.23092483210615747, "grad_norm": 0.72239089012146, "learning_rate": 9.992718782399008e-06, "loss": 0.0481, "step": 28540 }, { "epoch": 0.23100574480135933, "grad_norm": 0.49972036480903625, "learning_rate": 9.992680640255115e-06, "loss": 0.0456, "step": 28550 }, { "epoch": 0.2310866574965612, "grad_norm": 0.7490707039833069, "learning_rate": 9.992642398542747e-06, "loss": 0.051, "step": 28560 }, { "epoch": 0.2311675701917631, "grad_norm": 0.583438515663147, "learning_rate": 9.992604057262665e-06, "loss": 0.0546, "step": 28570 }, { "epoch": 0.23124848288696495, "grad_norm": 0.296191930770874, "learning_rate": 9.992565616415634e-06, "loss": 0.061, "step": 28580 }, { "epoch": 0.23132939558216684, "grad_norm": 0.8729941248893738, "learning_rate": 9.992527076002423e-06, "loss": 0.0627, "step": 28590 }, { "epoch": 0.23141030827736872, "grad_norm": 0.3578857481479645, "learning_rate": 9.9924884360238e-06, "loss": 0.0441, "step": 28600 }, { "epoch": 0.2314912209725706, "grad_norm": 0.8439871668815613, "learning_rate": 9.992449696480534e-06, "loss": 0.0468, "step": 28610 }, { "epoch": 0.23157213366777246, "grad_norm": 0.7373469471931458, "learning_rate": 9.992410857373399e-06, "loss": 0.0517, "step": 28620 }, { "epoch": 0.23165304636297435, "grad_norm": 0.5861049890518188, "learning_rate": 9.992371918703168e-06, "loss": 0.0302, "step": 28630 }, { "epoch": 0.23173395905817623, "grad_norm": 0.19426682591438293, "learning_rate": 9.992332880470618e-06, "loss": 0.0497, "step": 28640 }, { "epoch": 0.23181487175337812, "grad_norm": 0.6242757439613342, "learning_rate": 9.992293742676528e-06, "loss": 0.0467, "step": 28650 }, { "epoch": 0.23189578444857997, "grad_norm": 0.7248522639274597, "learning_rate": 9.992254505321679e-06, "loss": 0.0444, "step": 28660 }, { "epoch": 0.23197669714378186, "grad_norm": 1.1148533821105957, "learning_rate": 9.992215168406853e-06, "loss": 0.0451, "step": 28670 }, { "epoch": 0.23205760983898374, "grad_norm": 0.6047974824905396, "learning_rate": 9.992175731932834e-06, "loss": 0.0364, "step": 28680 }, { "epoch": 0.23213852253418563, "grad_norm": 0.5013038516044617, "learning_rate": 9.99213619590041e-06, "loss": 0.0391, "step": 28690 }, { "epoch": 0.23221943522938748, "grad_norm": 0.5014274716377258, "learning_rate": 9.992096560310366e-06, "loss": 0.028, "step": 28700 }, { "epoch": 0.23230034792458937, "grad_norm": 0.5918117761611938, "learning_rate": 9.992056825163499e-06, "loss": 0.0404, "step": 28710 }, { "epoch": 0.23238126061979125, "grad_norm": 0.7085261940956116, "learning_rate": 9.992016990460595e-06, "loss": 0.0598, "step": 28720 }, { "epoch": 0.2324621733149931, "grad_norm": 0.28461161255836487, "learning_rate": 9.991977056202451e-06, "loss": 0.0465, "step": 28730 }, { "epoch": 0.232543086010195, "grad_norm": 0.46346062421798706, "learning_rate": 9.991937022389862e-06, "loss": 0.0345, "step": 28740 }, { "epoch": 0.23262399870539688, "grad_norm": 0.6734001040458679, "learning_rate": 9.991896889023629e-06, "loss": 0.0562, "step": 28750 }, { "epoch": 0.23270491140059876, "grad_norm": 0.9001803994178772, "learning_rate": 9.991856656104551e-06, "loss": 0.0566, "step": 28760 }, { "epoch": 0.23278582409580062, "grad_norm": 0.5273090600967407, "learning_rate": 9.991816323633429e-06, "loss": 0.0627, "step": 28770 }, { "epoch": 0.2328667367910025, "grad_norm": 0.8149353861808777, "learning_rate": 9.991775891611068e-06, "loss": 0.0489, "step": 28780 }, { "epoch": 0.2329476494862044, "grad_norm": 0.6343575716018677, "learning_rate": 9.991735360038276e-06, "loss": 0.0534, "step": 28790 }, { "epoch": 0.23302856218140627, "grad_norm": 0.39279475808143616, "learning_rate": 9.991694728915861e-06, "loss": 0.0535, "step": 28800 }, { "epoch": 0.23310947487660813, "grad_norm": 0.7003416419029236, "learning_rate": 9.991653998244633e-06, "loss": 0.0457, "step": 28810 }, { "epoch": 0.23319038757181001, "grad_norm": 0.843544065952301, "learning_rate": 9.991613168025402e-06, "loss": 0.0565, "step": 28820 }, { "epoch": 0.2332713002670119, "grad_norm": 0.7057050466537476, "learning_rate": 9.991572238258987e-06, "loss": 0.0468, "step": 28830 }, { "epoch": 0.23335221296221378, "grad_norm": 0.42901191115379333, "learning_rate": 9.991531208946198e-06, "loss": 0.036, "step": 28840 }, { "epoch": 0.23343312565741564, "grad_norm": 0.8897179961204529, "learning_rate": 9.991490080087858e-06, "loss": 0.0332, "step": 28850 }, { "epoch": 0.23351403835261753, "grad_norm": 0.5313597917556763, "learning_rate": 9.991448851684786e-06, "loss": 0.068, "step": 28860 }, { "epoch": 0.2335949510478194, "grad_norm": 0.7418704628944397, "learning_rate": 9.991407523737805e-06, "loss": 0.0372, "step": 28870 }, { "epoch": 0.23367586374302127, "grad_norm": 1.245527744293213, "learning_rate": 9.991366096247736e-06, "loss": 0.0633, "step": 28880 }, { "epoch": 0.23375677643822315, "grad_norm": 1.439119815826416, "learning_rate": 9.991324569215408e-06, "loss": 0.0754, "step": 28890 }, { "epoch": 0.23383768913342504, "grad_norm": 0.2034890204668045, "learning_rate": 9.99128294264165e-06, "loss": 0.0385, "step": 28900 }, { "epoch": 0.23391860182862692, "grad_norm": 0.4221772849559784, "learning_rate": 9.99124121652729e-06, "loss": 0.0341, "step": 28910 }, { "epoch": 0.23399951452382878, "grad_norm": 0.7848542928695679, "learning_rate": 9.991199390873161e-06, "loss": 0.0535, "step": 28920 }, { "epoch": 0.23408042721903066, "grad_norm": 0.4031465947628021, "learning_rate": 9.991157465680097e-06, "loss": 0.0433, "step": 28930 }, { "epoch": 0.23416133991423255, "grad_norm": 0.4429577887058258, "learning_rate": 9.991115440948934e-06, "loss": 0.0358, "step": 28940 }, { "epoch": 0.23424225260943443, "grad_norm": 1.0451496839523315, "learning_rate": 9.99107331668051e-06, "loss": 0.0465, "step": 28950 }, { "epoch": 0.2343231653046363, "grad_norm": 0.47574958205223083, "learning_rate": 9.991031092875666e-06, "loss": 0.0359, "step": 28960 }, { "epoch": 0.23440407799983817, "grad_norm": 0.8951578140258789, "learning_rate": 9.990988769535243e-06, "loss": 0.046, "step": 28970 }, { "epoch": 0.23448499069504006, "grad_norm": 0.7426846623420715, "learning_rate": 9.990946346660086e-06, "loss": 0.0457, "step": 28980 }, { "epoch": 0.23456590339024194, "grad_norm": 0.7307661771774292, "learning_rate": 9.99090382425104e-06, "loss": 0.052, "step": 28990 }, { "epoch": 0.2346468160854438, "grad_norm": 0.566835880279541, "learning_rate": 9.990861202308955e-06, "loss": 0.0695, "step": 29000 }, { "epoch": 0.23472772878064568, "grad_norm": 0.24164234101772308, "learning_rate": 9.990818480834677e-06, "loss": 0.0537, "step": 29010 }, { "epoch": 0.23480864147584757, "grad_norm": 0.7484205365180969, "learning_rate": 9.990775659829061e-06, "loss": 0.0386, "step": 29020 }, { "epoch": 0.23488955417104943, "grad_norm": 0.46139004826545715, "learning_rate": 9.990732739292962e-06, "loss": 0.0641, "step": 29030 }, { "epoch": 0.2349704668662513, "grad_norm": 0.9725413918495178, "learning_rate": 9.990689719227233e-06, "loss": 0.0565, "step": 29040 }, { "epoch": 0.2350513795614532, "grad_norm": 0.45514628291130066, "learning_rate": 9.990646599632734e-06, "loss": 0.0359, "step": 29050 }, { "epoch": 0.23513229225665508, "grad_norm": 0.7460216283798218, "learning_rate": 9.990603380510324e-06, "loss": 0.0426, "step": 29060 }, { "epoch": 0.23521320495185694, "grad_norm": 0.7800915241241455, "learning_rate": 9.990560061860865e-06, "loss": 0.065, "step": 29070 }, { "epoch": 0.23529411764705882, "grad_norm": 0.8616963624954224, "learning_rate": 9.990516643685222e-06, "loss": 0.0478, "step": 29080 }, { "epoch": 0.2353750303422607, "grad_norm": 0.3435470461845398, "learning_rate": 9.990473125984258e-06, "loss": 0.0445, "step": 29090 }, { "epoch": 0.2354559430374626, "grad_norm": 0.6107579469680786, "learning_rate": 9.990429508758845e-06, "loss": 0.0654, "step": 29100 }, { "epoch": 0.23553685573266445, "grad_norm": 0.9417957663536072, "learning_rate": 9.990385792009849e-06, "loss": 0.0506, "step": 29110 }, { "epoch": 0.23561776842786633, "grad_norm": 0.35243508219718933, "learning_rate": 9.990341975738145e-06, "loss": 0.035, "step": 29120 }, { "epoch": 0.23569868112306822, "grad_norm": 0.48071667551994324, "learning_rate": 9.990298059944606e-06, "loss": 0.0444, "step": 29130 }, { "epoch": 0.2357795938182701, "grad_norm": 0.8147192001342773, "learning_rate": 9.990254044630106e-06, "loss": 0.0389, "step": 29140 }, { "epoch": 0.23586050651347196, "grad_norm": 0.8675050735473633, "learning_rate": 9.990209929795523e-06, "loss": 0.0501, "step": 29150 }, { "epoch": 0.23594141920867384, "grad_norm": 0.7273251414299011, "learning_rate": 9.99016571544174e-06, "loss": 0.0471, "step": 29160 }, { "epoch": 0.23602233190387573, "grad_norm": 0.793488085269928, "learning_rate": 9.990121401569635e-06, "loss": 0.0397, "step": 29170 }, { "epoch": 0.23610324459907758, "grad_norm": 1.038804054260254, "learning_rate": 9.990076988180092e-06, "loss": 0.0511, "step": 29180 }, { "epoch": 0.23618415729427947, "grad_norm": 0.5811271071434021, "learning_rate": 9.990032475274e-06, "loss": 0.0428, "step": 29190 }, { "epoch": 0.23626506998948135, "grad_norm": 0.8472088575363159, "learning_rate": 9.989987862852243e-06, "loss": 0.0394, "step": 29200 }, { "epoch": 0.23634598268468324, "grad_norm": 0.9889887571334839, "learning_rate": 9.989943150915714e-06, "loss": 0.0638, "step": 29210 }, { "epoch": 0.2364268953798851, "grad_norm": 0.5147126913070679, "learning_rate": 9.989898339465303e-06, "loss": 0.0568, "step": 29220 }, { "epoch": 0.23650780807508698, "grad_norm": 0.6289961338043213, "learning_rate": 9.989853428501903e-06, "loss": 0.0484, "step": 29230 }, { "epoch": 0.23658872077028886, "grad_norm": 0.5906900763511658, "learning_rate": 9.989808418026412e-06, "loss": 0.0345, "step": 29240 }, { "epoch": 0.23666963346549075, "grad_norm": 0.7672380208969116, "learning_rate": 9.989763308039726e-06, "loss": 0.0372, "step": 29250 }, { "epoch": 0.2367505461606926, "grad_norm": 0.6709568500518799, "learning_rate": 9.989718098542742e-06, "loss": 0.0578, "step": 29260 }, { "epoch": 0.2368314588558945, "grad_norm": 0.6006021499633789, "learning_rate": 9.989672789536366e-06, "loss": 0.0402, "step": 29270 }, { "epoch": 0.23691237155109637, "grad_norm": 0.647047758102417, "learning_rate": 9.9896273810215e-06, "loss": 0.0465, "step": 29280 }, { "epoch": 0.23699328424629826, "grad_norm": 0.3252098262310028, "learning_rate": 9.98958187299905e-06, "loss": 0.0343, "step": 29290 }, { "epoch": 0.23707419694150011, "grad_norm": 0.5136896967887878, "learning_rate": 9.98953626546992e-06, "loss": 0.0437, "step": 29300 }, { "epoch": 0.237155109636702, "grad_norm": 0.7609876394271851, "learning_rate": 9.989490558435026e-06, "loss": 0.0547, "step": 29310 }, { "epoch": 0.23723602233190388, "grad_norm": 0.33058005571365356, "learning_rate": 9.989444751895276e-06, "loss": 0.0337, "step": 29320 }, { "epoch": 0.23731693502710574, "grad_norm": 0.4019455909729004, "learning_rate": 9.989398845851581e-06, "loss": 0.0306, "step": 29330 }, { "epoch": 0.23739784772230763, "grad_norm": 0.7882896065711975, "learning_rate": 9.989352840304863e-06, "loss": 0.0462, "step": 29340 }, { "epoch": 0.2374787604175095, "grad_norm": 0.6384595036506653, "learning_rate": 9.989306735256032e-06, "loss": 0.0689, "step": 29350 }, { "epoch": 0.2375596731127114, "grad_norm": 0.6003329157829285, "learning_rate": 9.989260530706011e-06, "loss": 0.0354, "step": 29360 }, { "epoch": 0.23764058580791325, "grad_norm": 1.7712244987487793, "learning_rate": 9.989214226655723e-06, "loss": 0.0469, "step": 29370 }, { "epoch": 0.23772149850311514, "grad_norm": 0.922595202922821, "learning_rate": 9.98916782310609e-06, "loss": 0.052, "step": 29380 }, { "epoch": 0.23780241119831702, "grad_norm": 0.7201486229896545, "learning_rate": 9.989121320058038e-06, "loss": 0.0462, "step": 29390 }, { "epoch": 0.2378833238935189, "grad_norm": 1.0086877346038818, "learning_rate": 9.989074717512492e-06, "loss": 0.0632, "step": 29400 }, { "epoch": 0.23796423658872076, "grad_norm": 0.7134227156639099, "learning_rate": 9.989028015470385e-06, "loss": 0.0481, "step": 29410 }, { "epoch": 0.23804514928392265, "grad_norm": 1.2586510181427002, "learning_rate": 9.988981213932645e-06, "loss": 0.0605, "step": 29420 }, { "epoch": 0.23812606197912453, "grad_norm": 0.6143580079078674, "learning_rate": 9.988934312900206e-06, "loss": 0.0426, "step": 29430 }, { "epoch": 0.2382069746743264, "grad_norm": 0.9365250468254089, "learning_rate": 9.988887312374007e-06, "loss": 0.0493, "step": 29440 }, { "epoch": 0.23828788736952827, "grad_norm": 0.5788771510124207, "learning_rate": 9.988840212354983e-06, "loss": 0.0442, "step": 29450 }, { "epoch": 0.23836880006473016, "grad_norm": 0.5208428502082825, "learning_rate": 9.98879301284407e-06, "loss": 0.0449, "step": 29460 }, { "epoch": 0.23844971275993204, "grad_norm": 0.9793639183044434, "learning_rate": 9.988745713842213e-06, "loss": 0.0381, "step": 29470 }, { "epoch": 0.2385306254551339, "grad_norm": 0.4925127327442169, "learning_rate": 9.988698315350355e-06, "loss": 0.0403, "step": 29480 }, { "epoch": 0.23861153815033578, "grad_norm": 0.5187420845031738, "learning_rate": 9.988650817369441e-06, "loss": 0.061, "step": 29490 }, { "epoch": 0.23869245084553767, "grad_norm": 1.1847262382507324, "learning_rate": 9.988603219900417e-06, "loss": 0.0797, "step": 29500 }, { "epoch": 0.23877336354073955, "grad_norm": 0.8604630827903748, "learning_rate": 9.988555522944233e-06, "loss": 0.0339, "step": 29510 }, { "epoch": 0.2388542762359414, "grad_norm": 0.23070372641086578, "learning_rate": 9.988507726501841e-06, "loss": 0.0394, "step": 29520 }, { "epoch": 0.2389351889311433, "grad_norm": 0.4223661422729492, "learning_rate": 9.988459830574195e-06, "loss": 0.0337, "step": 29530 }, { "epoch": 0.23901610162634518, "grad_norm": 0.5640909075737, "learning_rate": 9.988411835162246e-06, "loss": 0.0408, "step": 29540 }, { "epoch": 0.23909701432154706, "grad_norm": 0.6429324746131897, "learning_rate": 9.988363740266956e-06, "loss": 0.0574, "step": 29550 }, { "epoch": 0.23917792701674892, "grad_norm": 0.7380816340446472, "learning_rate": 9.988315545889282e-06, "loss": 0.0664, "step": 29560 }, { "epoch": 0.2392588397119508, "grad_norm": 0.6354820728302002, "learning_rate": 9.988267252030183e-06, "loss": 0.044, "step": 29570 }, { "epoch": 0.2393397524071527, "grad_norm": 0.6728026866912842, "learning_rate": 9.988218858690627e-06, "loss": 0.0464, "step": 29580 }, { "epoch": 0.23942066510235455, "grad_norm": 1.1826292276382446, "learning_rate": 9.988170365871575e-06, "loss": 0.0295, "step": 29590 }, { "epoch": 0.23950157779755643, "grad_norm": 1.1140040159225464, "learning_rate": 9.988121773573997e-06, "loss": 0.067, "step": 29600 }, { "epoch": 0.23958249049275832, "grad_norm": 0.9435023069381714, "learning_rate": 9.988073081798859e-06, "loss": 0.042, "step": 29610 }, { "epoch": 0.2396634031879602, "grad_norm": 0.558020830154419, "learning_rate": 9.988024290547135e-06, "loss": 0.0407, "step": 29620 }, { "epoch": 0.23974431588316206, "grad_norm": 0.4935450553894043, "learning_rate": 9.987975399819797e-06, "loss": 0.0465, "step": 29630 }, { "epoch": 0.23982522857836394, "grad_norm": 0.8274707198143005, "learning_rate": 9.987926409617817e-06, "loss": 0.0652, "step": 29640 }, { "epoch": 0.23990614127356583, "grad_norm": 0.4275240898132324, "learning_rate": 9.987877319942176e-06, "loss": 0.0417, "step": 29650 }, { "epoch": 0.2399870539687677, "grad_norm": 0.8227292895317078, "learning_rate": 9.987828130793853e-06, "loss": 0.0472, "step": 29660 }, { "epoch": 0.24006796666396957, "grad_norm": 0.32927241921424866, "learning_rate": 9.987778842173827e-06, "loss": 0.0374, "step": 29670 }, { "epoch": 0.24014887935917145, "grad_norm": 0.47610533237457275, "learning_rate": 9.98772945408308e-06, "loss": 0.0405, "step": 29680 }, { "epoch": 0.24022979205437334, "grad_norm": 1.3268916606903076, "learning_rate": 9.987679966522603e-06, "loss": 0.0494, "step": 29690 }, { "epoch": 0.24031070474957522, "grad_norm": 0.15495182573795319, "learning_rate": 9.987630379493376e-06, "loss": 0.0349, "step": 29700 }, { "epoch": 0.24039161744477708, "grad_norm": 1.0709943771362305, "learning_rate": 9.98758069299639e-06, "loss": 0.0377, "step": 29710 }, { "epoch": 0.24047253013997896, "grad_norm": 0.4059484601020813, "learning_rate": 9.987530907032635e-06, "loss": 0.0451, "step": 29720 }, { "epoch": 0.24055344283518085, "grad_norm": 0.5580406785011292, "learning_rate": 9.987481021603106e-06, "loss": 0.033, "step": 29730 }, { "epoch": 0.2406343555303827, "grad_norm": 0.7831939458847046, "learning_rate": 9.987431036708799e-06, "loss": 0.0469, "step": 29740 }, { "epoch": 0.2407152682255846, "grad_norm": 0.5229403972625732, "learning_rate": 9.987380952350709e-06, "loss": 0.0363, "step": 29750 }, { "epoch": 0.24079618092078647, "grad_norm": 1.6554807424545288, "learning_rate": 9.987330768529833e-06, "loss": 0.0482, "step": 29760 }, { "epoch": 0.24087709361598836, "grad_norm": 0.5992709994316101, "learning_rate": 9.987280485247173e-06, "loss": 0.0567, "step": 29770 }, { "epoch": 0.24095800631119021, "grad_norm": 1.124861717224121, "learning_rate": 9.987230102503733e-06, "loss": 0.0424, "step": 29780 }, { "epoch": 0.2410389190063921, "grad_norm": 0.9176813960075378, "learning_rate": 9.987179620300517e-06, "loss": 0.0466, "step": 29790 }, { "epoch": 0.24111983170159398, "grad_norm": 0.6115798354148865, "learning_rate": 9.987129038638533e-06, "loss": 0.061, "step": 29800 }, { "epoch": 0.24120074439679587, "grad_norm": 0.822918176651001, "learning_rate": 9.98707835751879e-06, "loss": 0.0438, "step": 29810 }, { "epoch": 0.24128165709199773, "grad_norm": 1.037783145904541, "learning_rate": 9.987027576942294e-06, "loss": 0.0488, "step": 29820 }, { "epoch": 0.2413625697871996, "grad_norm": 0.9516106247901917, "learning_rate": 9.986976696910063e-06, "loss": 0.0367, "step": 29830 }, { "epoch": 0.2414434824824015, "grad_norm": 0.5080488920211792, "learning_rate": 9.986925717423107e-06, "loss": 0.0464, "step": 29840 }, { "epoch": 0.24152439517760338, "grad_norm": 1.4322868585586548, "learning_rate": 9.98687463848245e-06, "loss": 0.0503, "step": 29850 }, { "epoch": 0.24160530787280524, "grad_norm": 0.7581678032875061, "learning_rate": 9.986823460089102e-06, "loss": 0.0456, "step": 29860 }, { "epoch": 0.24168622056800712, "grad_norm": 0.9126974940299988, "learning_rate": 9.98677218224409e-06, "loss": 0.0337, "step": 29870 }, { "epoch": 0.241767133263209, "grad_norm": 0.9372013807296753, "learning_rate": 9.986720804948434e-06, "loss": 0.0483, "step": 29880 }, { "epoch": 0.24184804595841086, "grad_norm": 0.6795972585678101, "learning_rate": 9.986669328203158e-06, "loss": 0.0619, "step": 29890 }, { "epoch": 0.24192895865361275, "grad_norm": 1.578875184059143, "learning_rate": 9.98661775200929e-06, "loss": 0.0651, "step": 29900 }, { "epoch": 0.24200987134881463, "grad_norm": 0.7936837673187256, "learning_rate": 9.98656607636786e-06, "loss": 0.0424, "step": 29910 }, { "epoch": 0.24209078404401652, "grad_norm": 0.49532023072242737, "learning_rate": 9.986514301279894e-06, "loss": 0.06, "step": 29920 }, { "epoch": 0.24217169673921837, "grad_norm": 0.7810518145561218, "learning_rate": 9.986462426746428e-06, "loss": 0.0621, "step": 29930 }, { "epoch": 0.24225260943442026, "grad_norm": 0.5543779134750366, "learning_rate": 9.986410452768496e-06, "loss": 0.0403, "step": 29940 }, { "epoch": 0.24233352212962214, "grad_norm": 0.7506580352783203, "learning_rate": 9.986358379347134e-06, "loss": 0.0528, "step": 29950 }, { "epoch": 0.24241443482482403, "grad_norm": 0.788432776927948, "learning_rate": 9.986306206483382e-06, "loss": 0.0453, "step": 29960 }, { "epoch": 0.24249534752002588, "grad_norm": 1.0952277183532715, "learning_rate": 9.986253934178278e-06, "loss": 0.0377, "step": 29970 }, { "epoch": 0.24257626021522777, "grad_norm": 0.6123279333114624, "learning_rate": 9.986201562432867e-06, "loss": 0.0459, "step": 29980 }, { "epoch": 0.24265717291042965, "grad_norm": 0.5394969582557678, "learning_rate": 9.986149091248191e-06, "loss": 0.0363, "step": 29990 }, { "epoch": 0.24273808560563154, "grad_norm": 0.5917291641235352, "learning_rate": 9.9860965206253e-06, "loss": 0.0489, "step": 30000 }, { "epoch": 0.2428189983008334, "grad_norm": 1.0908552408218384, "learning_rate": 9.986043850565237e-06, "loss": 0.0465, "step": 30010 }, { "epoch": 0.24289991099603528, "grad_norm": 0.3942812979221344, "learning_rate": 9.985991081069055e-06, "loss": 0.0376, "step": 30020 }, { "epoch": 0.24298082369123716, "grad_norm": 0.9041557312011719, "learning_rate": 9.985938212137809e-06, "loss": 0.0439, "step": 30030 }, { "epoch": 0.24306173638643902, "grad_norm": 0.7070273160934448, "learning_rate": 9.985885243772551e-06, "loss": 0.0461, "step": 30040 }, { "epoch": 0.2431426490816409, "grad_norm": 0.7257552146911621, "learning_rate": 9.985832175974337e-06, "loss": 0.0411, "step": 30050 }, { "epoch": 0.2432235617768428, "grad_norm": 0.6260961294174194, "learning_rate": 9.985779008744225e-06, "loss": 0.0458, "step": 30060 }, { "epoch": 0.24330447447204467, "grad_norm": 0.6840417385101318, "learning_rate": 9.985725742083276e-06, "loss": 0.0725, "step": 30070 }, { "epoch": 0.24338538716724653, "grad_norm": 0.7670689225196838, "learning_rate": 9.985672375992555e-06, "loss": 0.0352, "step": 30080 }, { "epoch": 0.24346629986244842, "grad_norm": 0.7989543676376343, "learning_rate": 9.98561891047312e-06, "loss": 0.0512, "step": 30090 }, { "epoch": 0.2435472125576503, "grad_norm": 1.0931001901626587, "learning_rate": 9.985565345526044e-06, "loss": 0.0568, "step": 30100 }, { "epoch": 0.24362812525285218, "grad_norm": 0.6926978230476379, "learning_rate": 9.985511681152391e-06, "loss": 0.0392, "step": 30110 }, { "epoch": 0.24370903794805404, "grad_norm": 0.9206599593162537, "learning_rate": 9.985457917353231e-06, "loss": 0.0707, "step": 30120 }, { "epoch": 0.24378995064325593, "grad_norm": 1.17426598072052, "learning_rate": 9.98540405412964e-06, "loss": 0.0486, "step": 30130 }, { "epoch": 0.2438708633384578, "grad_norm": 0.7460669279098511, "learning_rate": 9.98535009148269e-06, "loss": 0.0565, "step": 30140 }, { "epoch": 0.24395177603365967, "grad_norm": 0.7306713461875916, "learning_rate": 9.985296029413456e-06, "loss": 0.0435, "step": 30150 }, { "epoch": 0.24403268872886155, "grad_norm": 1.4091678857803345, "learning_rate": 9.985241867923017e-06, "loss": 0.0658, "step": 30160 }, { "epoch": 0.24411360142406344, "grad_norm": 0.6088196635246277, "learning_rate": 9.985187607012452e-06, "loss": 0.0447, "step": 30170 }, { "epoch": 0.24419451411926532, "grad_norm": 0.9201290011405945, "learning_rate": 9.985133246682846e-06, "loss": 0.0389, "step": 30180 }, { "epoch": 0.24427542681446718, "grad_norm": 0.6155406832695007, "learning_rate": 9.98507878693528e-06, "loss": 0.044, "step": 30190 }, { "epoch": 0.24435633950966906, "grad_norm": 0.392476886510849, "learning_rate": 9.985024227770843e-06, "loss": 0.0474, "step": 30200 }, { "epoch": 0.24443725220487095, "grad_norm": 0.6747857928276062, "learning_rate": 9.984969569190622e-06, "loss": 0.0556, "step": 30210 }, { "epoch": 0.24451816490007283, "grad_norm": 0.5181813836097717, "learning_rate": 9.984914811195705e-06, "loss": 0.033, "step": 30220 }, { "epoch": 0.2445990775952747, "grad_norm": 0.2681480348110199, "learning_rate": 9.984859953787187e-06, "loss": 0.0412, "step": 30230 }, { "epoch": 0.24467999029047657, "grad_norm": 0.4778628945350647, "learning_rate": 9.98480499696616e-06, "loss": 0.0433, "step": 30240 }, { "epoch": 0.24476090298567846, "grad_norm": 0.7692768573760986, "learning_rate": 9.984749940733723e-06, "loss": 0.0533, "step": 30250 }, { "epoch": 0.24484181568088034, "grad_norm": 1.009661078453064, "learning_rate": 9.984694785090967e-06, "loss": 0.046, "step": 30260 }, { "epoch": 0.2449227283760822, "grad_norm": 0.6201493740081787, "learning_rate": 9.984639530039001e-06, "loss": 0.0307, "step": 30270 }, { "epoch": 0.24500364107128408, "grad_norm": 0.528377115726471, "learning_rate": 9.98458417557892e-06, "loss": 0.0496, "step": 30280 }, { "epoch": 0.24508455376648597, "grad_norm": 0.46314746141433716, "learning_rate": 9.984528721711833e-06, "loss": 0.0468, "step": 30290 }, { "epoch": 0.24516546646168783, "grad_norm": 0.6498243808746338, "learning_rate": 9.984473168438843e-06, "loss": 0.0519, "step": 30300 }, { "epoch": 0.2452463791568897, "grad_norm": 0.7494022846221924, "learning_rate": 9.984417515761055e-06, "loss": 0.0383, "step": 30310 }, { "epoch": 0.2453272918520916, "grad_norm": 0.8995001316070557, "learning_rate": 9.984361763679585e-06, "loss": 0.038, "step": 30320 }, { "epoch": 0.24540820454729348, "grad_norm": 0.7455715537071228, "learning_rate": 9.984305912195541e-06, "loss": 0.042, "step": 30330 }, { "epoch": 0.24548911724249534, "grad_norm": 0.6492567658424377, "learning_rate": 9.984249961310039e-06, "loss": 0.0498, "step": 30340 }, { "epoch": 0.24557002993769722, "grad_norm": 0.217304527759552, "learning_rate": 9.984193911024193e-06, "loss": 0.0489, "step": 30350 }, { "epoch": 0.2456509426328991, "grad_norm": 1.1787198781967163, "learning_rate": 9.984137761339123e-06, "loss": 0.0436, "step": 30360 }, { "epoch": 0.245731855328101, "grad_norm": 0.573737621307373, "learning_rate": 9.984081512255946e-06, "loss": 0.0478, "step": 30370 }, { "epoch": 0.24581276802330285, "grad_norm": 0.6631760001182556, "learning_rate": 9.984025163775785e-06, "loss": 0.0516, "step": 30380 }, { "epoch": 0.24589368071850473, "grad_norm": 0.49423858523368835, "learning_rate": 9.983968715899764e-06, "loss": 0.0436, "step": 30390 }, { "epoch": 0.24597459341370662, "grad_norm": 0.33303454518318176, "learning_rate": 9.983912168629009e-06, "loss": 0.0439, "step": 30400 }, { "epoch": 0.2460555061089085, "grad_norm": 0.5267417430877686, "learning_rate": 9.983855521964648e-06, "loss": 0.0613, "step": 30410 }, { "epoch": 0.24613641880411036, "grad_norm": 0.5566934943199158, "learning_rate": 9.98379877590781e-06, "loss": 0.057, "step": 30420 }, { "epoch": 0.24621733149931224, "grad_norm": 0.564208984375, "learning_rate": 9.983741930459624e-06, "loss": 0.0484, "step": 30430 }, { "epoch": 0.24629824419451413, "grad_norm": 0.8098536133766174, "learning_rate": 9.983684985621227e-06, "loss": 0.0442, "step": 30440 }, { "epoch": 0.24637915688971598, "grad_norm": 0.6221235394477844, "learning_rate": 9.983627941393757e-06, "loss": 0.0539, "step": 30450 }, { "epoch": 0.24646006958491787, "grad_norm": 0.8394474387168884, "learning_rate": 9.983570797778346e-06, "loss": 0.0432, "step": 30460 }, { "epoch": 0.24654098228011975, "grad_norm": 0.656785249710083, "learning_rate": 9.983513554776137e-06, "loss": 0.0336, "step": 30470 }, { "epoch": 0.24662189497532164, "grad_norm": 0.5221214294433594, "learning_rate": 9.98345621238827e-06, "loss": 0.0646, "step": 30480 }, { "epoch": 0.2467028076705235, "grad_norm": 0.42219337821006775, "learning_rate": 9.98339877061589e-06, "loss": 0.0451, "step": 30490 }, { "epoch": 0.24678372036572538, "grad_norm": 0.5704799294471741, "learning_rate": 9.983341229460143e-06, "loss": 0.0551, "step": 30500 }, { "epoch": 0.24686463306092726, "grad_norm": 0.6893219947814941, "learning_rate": 9.983283588922174e-06, "loss": 0.0383, "step": 30510 }, { "epoch": 0.24694554575612915, "grad_norm": 0.6984744668006897, "learning_rate": 9.983225849003135e-06, "loss": 0.0502, "step": 30520 }, { "epoch": 0.247026458451331, "grad_norm": 0.8706519603729248, "learning_rate": 9.983168009704176e-06, "loss": 0.0502, "step": 30530 }, { "epoch": 0.2471073711465329, "grad_norm": 0.650513768196106, "learning_rate": 9.98311007102645e-06, "loss": 0.0482, "step": 30540 }, { "epoch": 0.24718828384173477, "grad_norm": 0.2983083724975586, "learning_rate": 9.983052032971115e-06, "loss": 0.0326, "step": 30550 }, { "epoch": 0.24726919653693666, "grad_norm": 0.580037534236908, "learning_rate": 9.982993895539327e-06, "loss": 0.0519, "step": 30560 }, { "epoch": 0.24735010923213852, "grad_norm": 0.4494313895702362, "learning_rate": 9.982935658732244e-06, "loss": 0.0683, "step": 30570 }, { "epoch": 0.2474310219273404, "grad_norm": 0.6323563456535339, "learning_rate": 9.98287732255103e-06, "loss": 0.0584, "step": 30580 }, { "epoch": 0.24751193462254228, "grad_norm": 0.804916262626648, "learning_rate": 9.982818886996846e-06, "loss": 0.0529, "step": 30590 }, { "epoch": 0.24759284731774414, "grad_norm": 0.4624304473400116, "learning_rate": 9.98276035207086e-06, "loss": 0.0349, "step": 30600 }, { "epoch": 0.24767376001294603, "grad_norm": 0.7795426845550537, "learning_rate": 9.982701717774235e-06, "loss": 0.0473, "step": 30610 }, { "epoch": 0.2477546727081479, "grad_norm": 0.5464739203453064, "learning_rate": 9.982642984108147e-06, "loss": 0.0539, "step": 30620 }, { "epoch": 0.2478355854033498, "grad_norm": 0.3156367838382721, "learning_rate": 9.98258415107376e-06, "loss": 0.0422, "step": 30630 }, { "epoch": 0.24791649809855165, "grad_norm": 0.7729805111885071, "learning_rate": 9.982525218672253e-06, "loss": 0.0587, "step": 30640 }, { "epoch": 0.24799741079375354, "grad_norm": 1.038344144821167, "learning_rate": 9.982466186904798e-06, "loss": 0.035, "step": 30650 }, { "epoch": 0.24807832348895542, "grad_norm": 0.6524780988693237, "learning_rate": 9.982407055772573e-06, "loss": 0.0476, "step": 30660 }, { "epoch": 0.2481592361841573, "grad_norm": 0.8870249390602112, "learning_rate": 9.98234782527676e-06, "loss": 0.0455, "step": 30670 }, { "epoch": 0.24824014887935916, "grad_norm": 2.020773410797119, "learning_rate": 9.982288495418535e-06, "loss": 0.0467, "step": 30680 }, { "epoch": 0.24832106157456105, "grad_norm": 0.8593246936798096, "learning_rate": 9.982229066199086e-06, "loss": 0.0509, "step": 30690 }, { "epoch": 0.24840197426976293, "grad_norm": 0.37230515480041504, "learning_rate": 9.982169537619595e-06, "loss": 0.0442, "step": 30700 }, { "epoch": 0.24848288696496482, "grad_norm": 0.4203949570655823, "learning_rate": 9.98210990968125e-06, "loss": 0.052, "step": 30710 }, { "epoch": 0.24856379966016667, "grad_norm": 0.7626328468322754, "learning_rate": 9.982050182385244e-06, "loss": 0.0444, "step": 30720 }, { "epoch": 0.24864471235536856, "grad_norm": 0.16956426203250885, "learning_rate": 9.981990355732762e-06, "loss": 0.0303, "step": 30730 }, { "epoch": 0.24872562505057044, "grad_norm": 0.9628337025642395, "learning_rate": 9.981930429725e-06, "loss": 0.0345, "step": 30740 }, { "epoch": 0.2488065377457723, "grad_norm": 0.7527374029159546, "learning_rate": 9.981870404363153e-06, "loss": 0.0292, "step": 30750 }, { "epoch": 0.24888745044097418, "grad_norm": 0.6861822605133057, "learning_rate": 9.981810279648416e-06, "loss": 0.0418, "step": 30760 }, { "epoch": 0.24896836313617607, "grad_norm": 0.5091506242752075, "learning_rate": 9.981750055581992e-06, "loss": 0.0502, "step": 30770 }, { "epoch": 0.24904927583137795, "grad_norm": 0.512028157711029, "learning_rate": 9.981689732165081e-06, "loss": 0.045, "step": 30780 }, { "epoch": 0.2491301885265798, "grad_norm": 0.6222819685935974, "learning_rate": 9.981629309398883e-06, "loss": 0.0659, "step": 30790 }, { "epoch": 0.2492111012217817, "grad_norm": 0.4765752851963043, "learning_rate": 9.981568787284607e-06, "loss": 0.0605, "step": 30800 }, { "epoch": 0.24929201391698358, "grad_norm": 0.44857603311538696, "learning_rate": 9.981508165823457e-06, "loss": 0.0417, "step": 30810 }, { "epoch": 0.24937292661218546, "grad_norm": 0.432617723941803, "learning_rate": 9.981447445016642e-06, "loss": 0.0584, "step": 30820 }, { "epoch": 0.24945383930738732, "grad_norm": 0.5400980710983276, "learning_rate": 9.981386624865378e-06, "loss": 0.054, "step": 30830 }, { "epoch": 0.2495347520025892, "grad_norm": 0.2527734935283661, "learning_rate": 9.98132570537087e-06, "loss": 0.0427, "step": 30840 }, { "epoch": 0.2496156646977911, "grad_norm": 0.5608066916465759, "learning_rate": 9.981264686534338e-06, "loss": 0.0481, "step": 30850 }, { "epoch": 0.24969657739299297, "grad_norm": 0.49568572640419006, "learning_rate": 9.981203568356999e-06, "loss": 0.0415, "step": 30860 }, { "epoch": 0.24977749008819483, "grad_norm": 0.9563845992088318, "learning_rate": 9.98114235084007e-06, "loss": 0.0415, "step": 30870 }, { "epoch": 0.24985840278339672, "grad_norm": 0.7638653516769409, "learning_rate": 9.981081033984769e-06, "loss": 0.0397, "step": 30880 }, { "epoch": 0.2499393154785986, "grad_norm": 0.5184863209724426, "learning_rate": 9.981019617792327e-06, "loss": 0.0325, "step": 30890 }, { "epoch": 0.2500202281738005, "grad_norm": 1.267439603805542, "learning_rate": 9.980958102263963e-06, "loss": 0.081, "step": 30900 }, { "epoch": 0.25010114086900237, "grad_norm": 0.5236422419548035, "learning_rate": 9.980896487400903e-06, "loss": 0.0435, "step": 30910 }, { "epoch": 0.2501820535642042, "grad_norm": 1.5383251905441284, "learning_rate": 9.980834773204379e-06, "loss": 0.0501, "step": 30920 }, { "epoch": 0.2502629662594061, "grad_norm": 0.6721208095550537, "learning_rate": 9.98077295967562e-06, "loss": 0.0366, "step": 30930 }, { "epoch": 0.25034387895460797, "grad_norm": 0.9176182746887207, "learning_rate": 9.980711046815861e-06, "loss": 0.0404, "step": 30940 }, { "epoch": 0.25042479164980985, "grad_norm": 1.0984543561935425, "learning_rate": 9.980649034626332e-06, "loss": 0.053, "step": 30950 }, { "epoch": 0.25050570434501174, "grad_norm": 0.34824901819229126, "learning_rate": 9.980586923108275e-06, "loss": 0.0592, "step": 30960 }, { "epoch": 0.2505866170402136, "grad_norm": 0.9048367142677307, "learning_rate": 9.980524712262925e-06, "loss": 0.0474, "step": 30970 }, { "epoch": 0.2506675297354155, "grad_norm": 0.8079733848571777, "learning_rate": 9.980462402091524e-06, "loss": 0.0378, "step": 30980 }, { "epoch": 0.2507484424306174, "grad_norm": 1.5268352031707764, "learning_rate": 9.980399992595316e-06, "loss": 0.0501, "step": 30990 }, { "epoch": 0.2508293551258192, "grad_norm": 0.45894619822502136, "learning_rate": 9.980337483775544e-06, "loss": 0.0491, "step": 31000 }, { "epoch": 0.2509102678210211, "grad_norm": 0.6282681822776794, "learning_rate": 9.980274875633454e-06, "loss": 0.0559, "step": 31010 }, { "epoch": 0.250991180516223, "grad_norm": 0.9802755117416382, "learning_rate": 9.980212168170295e-06, "loss": 0.0587, "step": 31020 }, { "epoch": 0.2510720932114249, "grad_norm": 1.033940076828003, "learning_rate": 9.980149361387319e-06, "loss": 0.0602, "step": 31030 }, { "epoch": 0.25115300590662676, "grad_norm": 0.7295991778373718, "learning_rate": 9.980086455285777e-06, "loss": 0.0575, "step": 31040 }, { "epoch": 0.25123391860182864, "grad_norm": 0.4253544211387634, "learning_rate": 9.980023449866924e-06, "loss": 0.0726, "step": 31050 }, { "epoch": 0.2513148312970305, "grad_norm": 0.5926665663719177, "learning_rate": 9.979960345132016e-06, "loss": 0.0425, "step": 31060 }, { "epoch": 0.25139574399223236, "grad_norm": 0.5415006279945374, "learning_rate": 9.979897141082313e-06, "loss": 0.0342, "step": 31070 }, { "epoch": 0.25147665668743424, "grad_norm": 0.47520285844802856, "learning_rate": 9.979833837719075e-06, "loss": 0.0467, "step": 31080 }, { "epoch": 0.2515575693826361, "grad_norm": 0.8710054159164429, "learning_rate": 9.979770435043564e-06, "loss": 0.0433, "step": 31090 }, { "epoch": 0.251638482077838, "grad_norm": 0.5121340155601501, "learning_rate": 9.979706933057043e-06, "loss": 0.0372, "step": 31100 }, { "epoch": 0.2517193947730399, "grad_norm": 0.7212818264961243, "learning_rate": 9.97964333176078e-06, "loss": 0.0418, "step": 31110 }, { "epoch": 0.2518003074682418, "grad_norm": 0.7770971059799194, "learning_rate": 9.979579631156044e-06, "loss": 0.0553, "step": 31120 }, { "epoch": 0.25188122016344366, "grad_norm": 0.7547061443328857, "learning_rate": 9.979515831244105e-06, "loss": 0.0571, "step": 31130 }, { "epoch": 0.25196213285864555, "grad_norm": 0.613696813583374, "learning_rate": 9.979451932026235e-06, "loss": 0.0301, "step": 31140 }, { "epoch": 0.2520430455538474, "grad_norm": 0.6592080593109131, "learning_rate": 9.979387933503707e-06, "loss": 0.0411, "step": 31150 }, { "epoch": 0.25212395824904926, "grad_norm": 0.47868475317955017, "learning_rate": 9.9793238356778e-06, "loss": 0.0353, "step": 31160 }, { "epoch": 0.25220487094425115, "grad_norm": 0.7527089715003967, "learning_rate": 9.979259638549791e-06, "loss": 0.0523, "step": 31170 }, { "epoch": 0.25228578363945303, "grad_norm": 0.8296588063240051, "learning_rate": 9.97919534212096e-06, "loss": 0.0438, "step": 31180 }, { "epoch": 0.2523666963346549, "grad_norm": 0.8290988206863403, "learning_rate": 9.979130946392588e-06, "loss": 0.0498, "step": 31190 }, { "epoch": 0.2524476090298568, "grad_norm": 0.468197226524353, "learning_rate": 9.979066451365963e-06, "loss": 0.0451, "step": 31200 }, { "epoch": 0.2525285217250587, "grad_norm": 0.5354122519493103, "learning_rate": 9.979001857042366e-06, "loss": 0.0347, "step": 31210 }, { "epoch": 0.2526094344202605, "grad_norm": 0.8589081764221191, "learning_rate": 9.978937163423091e-06, "loss": 0.0694, "step": 31220 }, { "epoch": 0.2526903471154624, "grad_norm": 0.8918763399124146, "learning_rate": 9.978872370509423e-06, "loss": 0.0526, "step": 31230 }, { "epoch": 0.2527712598106643, "grad_norm": 0.34822043776512146, "learning_rate": 9.978807478302657e-06, "loss": 0.0439, "step": 31240 }, { "epoch": 0.25285217250586617, "grad_norm": 0.7416645288467407, "learning_rate": 9.978742486804087e-06, "loss": 0.0595, "step": 31250 }, { "epoch": 0.25293308520106805, "grad_norm": 1.0384457111358643, "learning_rate": 9.978677396015008e-06, "loss": 0.0525, "step": 31260 }, { "epoch": 0.25301399789626994, "grad_norm": 0.3019510805606842, "learning_rate": 9.97861220593672e-06, "loss": 0.0315, "step": 31270 }, { "epoch": 0.2530949105914718, "grad_norm": 0.5331364274024963, "learning_rate": 9.978546916570522e-06, "loss": 0.0496, "step": 31280 }, { "epoch": 0.2531758232866737, "grad_norm": 0.4607771337032318, "learning_rate": 9.978481527917717e-06, "loss": 0.0356, "step": 31290 }, { "epoch": 0.25325673598187554, "grad_norm": 0.3757680654525757, "learning_rate": 9.978416039979604e-06, "loss": 0.0303, "step": 31300 }, { "epoch": 0.2533376486770774, "grad_norm": 0.28651586174964905, "learning_rate": 9.978350452757495e-06, "loss": 0.057, "step": 31310 }, { "epoch": 0.2534185613722793, "grad_norm": 0.47787410020828247, "learning_rate": 9.978284766252698e-06, "loss": 0.0374, "step": 31320 }, { "epoch": 0.2534994740674812, "grad_norm": 0.4341886341571808, "learning_rate": 9.978218980466518e-06, "loss": 0.0575, "step": 31330 }, { "epoch": 0.2535803867626831, "grad_norm": 0.4037805497646332, "learning_rate": 9.97815309540027e-06, "loss": 0.0499, "step": 31340 }, { "epoch": 0.25366129945788496, "grad_norm": 0.41353434324264526, "learning_rate": 9.97808711105527e-06, "loss": 0.034, "step": 31350 }, { "epoch": 0.25374221215308684, "grad_norm": 0.20910069346427917, "learning_rate": 9.978021027432831e-06, "loss": 0.0405, "step": 31360 }, { "epoch": 0.2538231248482887, "grad_norm": 0.28541821241378784, "learning_rate": 9.97795484453427e-06, "loss": 0.0543, "step": 31370 }, { "epoch": 0.25390403754349056, "grad_norm": 0.3834909498691559, "learning_rate": 9.977888562360912e-06, "loss": 0.0471, "step": 31380 }, { "epoch": 0.25398495023869244, "grad_norm": 0.7492914795875549, "learning_rate": 9.977822180914072e-06, "loss": 0.0588, "step": 31390 }, { "epoch": 0.2540658629338943, "grad_norm": 1.0246280431747437, "learning_rate": 9.97775570019508e-06, "loss": 0.0668, "step": 31400 }, { "epoch": 0.2541467756290962, "grad_norm": 0.9352970123291016, "learning_rate": 9.977689120205257e-06, "loss": 0.0503, "step": 31410 }, { "epoch": 0.2542276883242981, "grad_norm": 0.8400120139122009, "learning_rate": 9.977622440945933e-06, "loss": 0.0465, "step": 31420 }, { "epoch": 0.2543086010195, "grad_norm": 0.4929358959197998, "learning_rate": 9.97755566241844e-06, "loss": 0.0492, "step": 31430 }, { "epoch": 0.2543895137147018, "grad_norm": 0.4565292000770569, "learning_rate": 9.977488784624105e-06, "loss": 0.066, "step": 31440 }, { "epoch": 0.2544704264099037, "grad_norm": 0.373544842004776, "learning_rate": 9.977421807564264e-06, "loss": 0.0422, "step": 31450 }, { "epoch": 0.2545513391051056, "grad_norm": 0.6827806234359741, "learning_rate": 9.977354731240255e-06, "loss": 0.0531, "step": 31460 }, { "epoch": 0.25463225180030746, "grad_norm": 0.9935402274131775, "learning_rate": 9.97728755565341e-06, "loss": 0.0549, "step": 31470 }, { "epoch": 0.25471316449550935, "grad_norm": 0.4473038911819458, "learning_rate": 9.977220280805076e-06, "loss": 0.039, "step": 31480 }, { "epoch": 0.25479407719071123, "grad_norm": 0.5516922473907471, "learning_rate": 9.977152906696588e-06, "loss": 0.0534, "step": 31490 }, { "epoch": 0.2548749898859131, "grad_norm": 0.9405595064163208, "learning_rate": 9.977085433329295e-06, "loss": 0.0546, "step": 31500 }, { "epoch": 0.254955902581115, "grad_norm": 1.0517361164093018, "learning_rate": 9.97701786070454e-06, "loss": 0.0358, "step": 31510 }, { "epoch": 0.25503681527631683, "grad_norm": 1.277321219444275, "learning_rate": 9.976950188823667e-06, "loss": 0.0554, "step": 31520 }, { "epoch": 0.2551177279715187, "grad_norm": 0.3563804626464844, "learning_rate": 9.976882417688032e-06, "loss": 0.0457, "step": 31530 }, { "epoch": 0.2551986406667206, "grad_norm": 0.9305568933486938, "learning_rate": 9.976814547298984e-06, "loss": 0.0475, "step": 31540 }, { "epoch": 0.2552795533619225, "grad_norm": 0.3782440721988678, "learning_rate": 9.976746577657876e-06, "loss": 0.0633, "step": 31550 }, { "epoch": 0.25536046605712437, "grad_norm": 0.5054601430892944, "learning_rate": 9.976678508766064e-06, "loss": 0.0499, "step": 31560 }, { "epoch": 0.25544137875232625, "grad_norm": 0.5562798380851746, "learning_rate": 9.976610340624905e-06, "loss": 0.0596, "step": 31570 }, { "epoch": 0.25552229144752814, "grad_norm": 0.43989482522010803, "learning_rate": 9.97654207323576e-06, "loss": 0.0324, "step": 31580 }, { "epoch": 0.25560320414272997, "grad_norm": 0.6129705309867859, "learning_rate": 9.976473706599988e-06, "loss": 0.0485, "step": 31590 }, { "epoch": 0.25568411683793185, "grad_norm": 0.21201050281524658, "learning_rate": 9.976405240718954e-06, "loss": 0.0536, "step": 31600 }, { "epoch": 0.25576502953313374, "grad_norm": 0.763905942440033, "learning_rate": 9.976336675594024e-06, "loss": 0.048, "step": 31610 }, { "epoch": 0.2558459422283356, "grad_norm": 0.7911651730537415, "learning_rate": 9.976268011226565e-06, "loss": 0.047, "step": 31620 }, { "epoch": 0.2559268549235375, "grad_norm": 0.4405684173107147, "learning_rate": 9.976199247617944e-06, "loss": 0.038, "step": 31630 }, { "epoch": 0.2560077676187394, "grad_norm": 0.44814983010292053, "learning_rate": 9.976130384769534e-06, "loss": 0.0651, "step": 31640 }, { "epoch": 0.2560886803139413, "grad_norm": 0.3663938343524933, "learning_rate": 9.97606142268271e-06, "loss": 0.0466, "step": 31650 }, { "epoch": 0.25616959300914316, "grad_norm": 0.639868974685669, "learning_rate": 9.975992361358847e-06, "loss": 0.0565, "step": 31660 }, { "epoch": 0.256250505704345, "grad_norm": 0.2785325050354004, "learning_rate": 9.975923200799318e-06, "loss": 0.0534, "step": 31670 }, { "epoch": 0.2563314183995469, "grad_norm": 0.7853860259056091, "learning_rate": 9.975853941005507e-06, "loss": 0.0461, "step": 31680 }, { "epoch": 0.25641233109474876, "grad_norm": 0.5518622398376465, "learning_rate": 9.975784581978794e-06, "loss": 0.0544, "step": 31690 }, { "epoch": 0.25649324378995064, "grad_norm": 0.12857580184936523, "learning_rate": 9.97571512372056e-06, "loss": 0.0467, "step": 31700 }, { "epoch": 0.2565741564851525, "grad_norm": 1.0415834188461304, "learning_rate": 9.975645566232196e-06, "loss": 0.0458, "step": 31710 }, { "epoch": 0.2566550691803544, "grad_norm": 0.5898895263671875, "learning_rate": 9.975575909515081e-06, "loss": 0.0531, "step": 31720 }, { "epoch": 0.2567359818755563, "grad_norm": 0.4934357702732086, "learning_rate": 9.97550615357061e-06, "loss": 0.0626, "step": 31730 }, { "epoch": 0.2568168945707581, "grad_norm": 0.6529455780982971, "learning_rate": 9.975436298400173e-06, "loss": 0.0648, "step": 31740 }, { "epoch": 0.25689780726596, "grad_norm": 0.7169414758682251, "learning_rate": 9.975366344005162e-06, "loss": 0.0514, "step": 31750 }, { "epoch": 0.2569787199611619, "grad_norm": 0.43625912070274353, "learning_rate": 9.975296290386974e-06, "loss": 0.0406, "step": 31760 }, { "epoch": 0.2570596326563638, "grad_norm": 0.7406403422355652, "learning_rate": 9.975226137547002e-06, "loss": 0.0497, "step": 31770 }, { "epoch": 0.25714054535156566, "grad_norm": 0.632644534111023, "learning_rate": 9.97515588548665e-06, "loss": 0.0398, "step": 31780 }, { "epoch": 0.25722145804676755, "grad_norm": 0.502733588218689, "learning_rate": 9.975085534207318e-06, "loss": 0.0365, "step": 31790 }, { "epoch": 0.25730237074196943, "grad_norm": 0.36549732089042664, "learning_rate": 9.975015083710406e-06, "loss": 0.0472, "step": 31800 }, { "epoch": 0.2573832834371713, "grad_norm": 0.9673776030540466, "learning_rate": 9.97494453399732e-06, "loss": 0.0504, "step": 31810 }, { "epoch": 0.25746419613237315, "grad_norm": 0.23921039700508118, "learning_rate": 9.97487388506947e-06, "loss": 0.0321, "step": 31820 }, { "epoch": 0.25754510882757503, "grad_norm": 0.35213810205459595, "learning_rate": 9.974803136928261e-06, "loss": 0.0562, "step": 31830 }, { "epoch": 0.2576260215227769, "grad_norm": 0.865433931350708, "learning_rate": 9.974732289575108e-06, "loss": 0.0397, "step": 31840 }, { "epoch": 0.2577069342179788, "grad_norm": 0.5742883086204529, "learning_rate": 9.974661343011421e-06, "loss": 0.0563, "step": 31850 }, { "epoch": 0.2577878469131807, "grad_norm": 0.5618996620178223, "learning_rate": 9.974590297238613e-06, "loss": 0.0318, "step": 31860 }, { "epoch": 0.25786875960838257, "grad_norm": 0.8397247195243835, "learning_rate": 9.974519152258107e-06, "loss": 0.0457, "step": 31870 }, { "epoch": 0.25794967230358445, "grad_norm": 0.42158403992652893, "learning_rate": 9.974447908071315e-06, "loss": 0.0317, "step": 31880 }, { "epoch": 0.2580305849987863, "grad_norm": 0.8431351780891418, "learning_rate": 9.974376564679662e-06, "loss": 0.0506, "step": 31890 }, { "epoch": 0.25811149769398817, "grad_norm": 0.1796330213546753, "learning_rate": 9.97430512208457e-06, "loss": 0.0458, "step": 31900 }, { "epoch": 0.25819241038919005, "grad_norm": 1.0248301029205322, "learning_rate": 9.974233580287465e-06, "loss": 0.0529, "step": 31910 }, { "epoch": 0.25827332308439194, "grad_norm": 0.6611414551734924, "learning_rate": 9.97416193928977e-06, "loss": 0.0482, "step": 31920 }, { "epoch": 0.2583542357795938, "grad_norm": 0.4607120454311371, "learning_rate": 9.974090199092917e-06, "loss": 0.0374, "step": 31930 }, { "epoch": 0.2584351484747957, "grad_norm": 1.0497993230819702, "learning_rate": 9.974018359698337e-06, "loss": 0.0504, "step": 31940 }, { "epoch": 0.2585160611699976, "grad_norm": 0.3189172148704529, "learning_rate": 9.973946421107458e-06, "loss": 0.042, "step": 31950 }, { "epoch": 0.2585969738651995, "grad_norm": 0.824892520904541, "learning_rate": 9.973874383321721e-06, "loss": 0.0696, "step": 31960 }, { "epoch": 0.2586778865604013, "grad_norm": 1.2547610998153687, "learning_rate": 9.97380224634256e-06, "loss": 0.0672, "step": 31970 }, { "epoch": 0.2587587992556032, "grad_norm": 0.5068519711494446, "learning_rate": 9.973730010171412e-06, "loss": 0.0733, "step": 31980 }, { "epoch": 0.2588397119508051, "grad_norm": 0.6180327534675598, "learning_rate": 9.973657674809719e-06, "loss": 0.0464, "step": 31990 }, { "epoch": 0.25892062464600696, "grad_norm": 0.47571176290512085, "learning_rate": 9.973585240258925e-06, "loss": 0.033, "step": 32000 }, { "epoch": 0.25900153734120884, "grad_norm": 0.5079420208930969, "learning_rate": 9.973512706520471e-06, "loss": 0.0585, "step": 32010 }, { "epoch": 0.2590824500364107, "grad_norm": 0.5108247995376587, "learning_rate": 9.973440073595806e-06, "loss": 0.0562, "step": 32020 }, { "epoch": 0.2591633627316126, "grad_norm": 0.9227749109268188, "learning_rate": 9.973367341486379e-06, "loss": 0.062, "step": 32030 }, { "epoch": 0.25924427542681444, "grad_norm": 0.9290594458580017, "learning_rate": 9.97329451019364e-06, "loss": 0.0582, "step": 32040 }, { "epoch": 0.2593251881220163, "grad_norm": 0.45285218954086304, "learning_rate": 9.97322157971904e-06, "loss": 0.0652, "step": 32050 }, { "epoch": 0.2594061008172182, "grad_norm": 1.123703956604004, "learning_rate": 9.973148550064035e-06, "loss": 0.0675, "step": 32060 }, { "epoch": 0.2594870135124201, "grad_norm": 0.5443967580795288, "learning_rate": 9.97307542123008e-06, "loss": 0.0367, "step": 32070 }, { "epoch": 0.259567926207622, "grad_norm": 1.0988813638687134, "learning_rate": 9.973002193218637e-06, "loss": 0.0791, "step": 32080 }, { "epoch": 0.25964883890282386, "grad_norm": 0.7402377724647522, "learning_rate": 9.97292886603116e-06, "loss": 0.0473, "step": 32090 }, { "epoch": 0.25972975159802575, "grad_norm": 1.419141173362732, "learning_rate": 9.972855439669118e-06, "loss": 0.0482, "step": 32100 }, { "epoch": 0.25981066429322763, "grad_norm": 0.3859022855758667, "learning_rate": 9.97278191413397e-06, "loss": 0.0307, "step": 32110 }, { "epoch": 0.25989157698842946, "grad_norm": 0.8470381498336792, "learning_rate": 9.972708289427188e-06, "loss": 0.0475, "step": 32120 }, { "epoch": 0.25997248968363135, "grad_norm": 0.9809413552284241, "learning_rate": 9.972634565550234e-06, "loss": 0.0382, "step": 32130 }, { "epoch": 0.26005340237883323, "grad_norm": 0.7023314833641052, "learning_rate": 9.972560742504583e-06, "loss": 0.0436, "step": 32140 }, { "epoch": 0.2601343150740351, "grad_norm": 0.3961482644081116, "learning_rate": 9.972486820291705e-06, "loss": 0.0481, "step": 32150 }, { "epoch": 0.260215227769237, "grad_norm": 0.8408985137939453, "learning_rate": 9.972412798913076e-06, "loss": 0.0497, "step": 32160 }, { "epoch": 0.2602961404644389, "grad_norm": 0.4437829852104187, "learning_rate": 9.972338678370168e-06, "loss": 0.0409, "step": 32170 }, { "epoch": 0.26037705315964077, "grad_norm": 0.6881566643714905, "learning_rate": 9.972264458664464e-06, "loss": 0.0537, "step": 32180 }, { "epoch": 0.2604579658548426, "grad_norm": 0.7676419019699097, "learning_rate": 9.972190139797443e-06, "loss": 0.0467, "step": 32190 }, { "epoch": 0.2605388785500445, "grad_norm": 0.8206568360328674, "learning_rate": 9.972115721770586e-06, "loss": 0.0508, "step": 32200 }, { "epoch": 0.26061979124524637, "grad_norm": 0.43775317072868347, "learning_rate": 9.972041204585376e-06, "loss": 0.0383, "step": 32210 }, { "epoch": 0.26070070394044825, "grad_norm": 0.5300054550170898, "learning_rate": 9.971966588243303e-06, "loss": 0.0468, "step": 32220 }, { "epoch": 0.26078161663565014, "grad_norm": 0.6129964590072632, "learning_rate": 9.971891872745851e-06, "loss": 0.0599, "step": 32230 }, { "epoch": 0.260862529330852, "grad_norm": 0.326893150806427, "learning_rate": 9.971817058094512e-06, "loss": 0.0423, "step": 32240 }, { "epoch": 0.2609434420260539, "grad_norm": 1.0016778707504272, "learning_rate": 9.971742144290778e-06, "loss": 0.0512, "step": 32250 }, { "epoch": 0.2610243547212558, "grad_norm": 0.46339547634124756, "learning_rate": 9.971667131336143e-06, "loss": 0.0473, "step": 32260 }, { "epoch": 0.2611052674164576, "grad_norm": 0.5734221339225769, "learning_rate": 9.971592019232101e-06, "loss": 0.0463, "step": 32270 }, { "epoch": 0.2611861801116595, "grad_norm": 0.32661014795303345, "learning_rate": 9.971516807980154e-06, "loss": 0.0487, "step": 32280 }, { "epoch": 0.2612670928068614, "grad_norm": 0.6816713213920593, "learning_rate": 9.971441497581798e-06, "loss": 0.0431, "step": 32290 }, { "epoch": 0.2613480055020633, "grad_norm": 0.4787384867668152, "learning_rate": 9.971366088038537e-06, "loss": 0.0479, "step": 32300 }, { "epoch": 0.26142891819726516, "grad_norm": 0.6678417325019836, "learning_rate": 9.971290579351876e-06, "loss": 0.039, "step": 32310 }, { "epoch": 0.26150983089246704, "grad_norm": 0.8135554790496826, "learning_rate": 9.971214971523318e-06, "loss": 0.0537, "step": 32320 }, { "epoch": 0.26159074358766893, "grad_norm": 0.6821385622024536, "learning_rate": 9.97113926455437e-06, "loss": 0.0399, "step": 32330 }, { "epoch": 0.26167165628287076, "grad_norm": 0.42594337463378906, "learning_rate": 9.971063458446546e-06, "loss": 0.0335, "step": 32340 }, { "epoch": 0.26175256897807264, "grad_norm": 0.5261428356170654, "learning_rate": 9.970987553201357e-06, "loss": 0.0424, "step": 32350 }, { "epoch": 0.2618334816732745, "grad_norm": 0.7056213021278381, "learning_rate": 9.970911548820314e-06, "loss": 0.0514, "step": 32360 }, { "epoch": 0.2619143943684764, "grad_norm": 0.32325175404548645, "learning_rate": 9.970835445304934e-06, "loss": 0.0487, "step": 32370 }, { "epoch": 0.2619953070636783, "grad_norm": 0.45887482166290283, "learning_rate": 9.970759242656734e-06, "loss": 0.0385, "step": 32380 }, { "epoch": 0.2620762197588802, "grad_norm": 0.592944324016571, "learning_rate": 9.970682940877237e-06, "loss": 0.0469, "step": 32390 }, { "epoch": 0.26215713245408206, "grad_norm": 1.0509213209152222, "learning_rate": 9.97060653996796e-06, "loss": 0.0365, "step": 32400 }, { "epoch": 0.26223804514928395, "grad_norm": 0.6049657464027405, "learning_rate": 9.97053003993043e-06, "loss": 0.069, "step": 32410 }, { "epoch": 0.2623189578444858, "grad_norm": 0.26740315556526184, "learning_rate": 9.970453440766173e-06, "loss": 0.0473, "step": 32420 }, { "epoch": 0.26239987053968766, "grad_norm": 0.9635586142539978, "learning_rate": 9.970376742476713e-06, "loss": 0.0532, "step": 32430 }, { "epoch": 0.26248078323488955, "grad_norm": 0.25551337003707886, "learning_rate": 9.970299945063583e-06, "loss": 0.0432, "step": 32440 }, { "epoch": 0.26256169593009143, "grad_norm": 0.4323926270008087, "learning_rate": 9.970223048528314e-06, "loss": 0.0404, "step": 32450 }, { "epoch": 0.2626426086252933, "grad_norm": 0.4646337628364563, "learning_rate": 9.970146052872437e-06, "loss": 0.0508, "step": 32460 }, { "epoch": 0.2627235213204952, "grad_norm": 0.6242446303367615, "learning_rate": 9.97006895809749e-06, "loss": 0.081, "step": 32470 }, { "epoch": 0.2628044340156971, "grad_norm": 0.44868046045303345, "learning_rate": 9.96999176420501e-06, "loss": 0.0451, "step": 32480 }, { "epoch": 0.2628853467108989, "grad_norm": 0.7244195938110352, "learning_rate": 9.969914471196538e-06, "loss": 0.05, "step": 32490 }, { "epoch": 0.2629662594061008, "grad_norm": 0.9370179176330566, "learning_rate": 9.969837079073612e-06, "loss": 0.0517, "step": 32500 }, { "epoch": 0.2630471721013027, "grad_norm": 0.48920807242393494, "learning_rate": 9.969759587837777e-06, "loss": 0.0409, "step": 32510 }, { "epoch": 0.26312808479650457, "grad_norm": 0.9552827477455139, "learning_rate": 9.96968199749058e-06, "loss": 0.0468, "step": 32520 }, { "epoch": 0.26320899749170645, "grad_norm": 0.7240702509880066, "learning_rate": 9.969604308033566e-06, "loss": 0.0449, "step": 32530 }, { "epoch": 0.26328991018690834, "grad_norm": 0.756791889667511, "learning_rate": 9.969526519468287e-06, "loss": 0.0605, "step": 32540 }, { "epoch": 0.2633708228821102, "grad_norm": 0.6521751284599304, "learning_rate": 9.96944863179629e-06, "loss": 0.0697, "step": 32550 }, { "epoch": 0.2634517355773121, "grad_norm": 0.807586669921875, "learning_rate": 9.969370645019133e-06, "loss": 0.0456, "step": 32560 }, { "epoch": 0.26353264827251394, "grad_norm": 0.8484849333763123, "learning_rate": 9.969292559138369e-06, "loss": 0.0574, "step": 32570 }, { "epoch": 0.2636135609677158, "grad_norm": 0.4141877293586731, "learning_rate": 9.969214374155555e-06, "loss": 0.0443, "step": 32580 }, { "epoch": 0.2636944736629177, "grad_norm": 0.8731061816215515, "learning_rate": 9.969136090072251e-06, "loss": 0.0435, "step": 32590 }, { "epoch": 0.2637753863581196, "grad_norm": 0.7330067157745361, "learning_rate": 9.969057706890018e-06, "loss": 0.054, "step": 32600 }, { "epoch": 0.2638562990533215, "grad_norm": 0.7887769937515259, "learning_rate": 9.96897922461042e-06, "loss": 0.0396, "step": 32610 }, { "epoch": 0.26393721174852336, "grad_norm": 0.5456385612487793, "learning_rate": 9.96890064323502e-06, "loss": 0.0421, "step": 32620 }, { "epoch": 0.26401812444372524, "grad_norm": 0.93587327003479, "learning_rate": 9.968821962765387e-06, "loss": 0.0524, "step": 32630 }, { "epoch": 0.2640990371389271, "grad_norm": 0.4616703689098358, "learning_rate": 9.968743183203092e-06, "loss": 0.0326, "step": 32640 }, { "epoch": 0.26417994983412896, "grad_norm": 0.5565558671951294, "learning_rate": 9.9686643045497e-06, "loss": 0.0584, "step": 32650 }, { "epoch": 0.26426086252933084, "grad_norm": 0.44513872265815735, "learning_rate": 9.968585326806789e-06, "loss": 0.0464, "step": 32660 }, { "epoch": 0.2643417752245327, "grad_norm": 0.4258263111114502, "learning_rate": 9.968506249975934e-06, "loss": 0.0524, "step": 32670 }, { "epoch": 0.2644226879197346, "grad_norm": 0.29081976413726807, "learning_rate": 9.968427074058708e-06, "loss": 0.0196, "step": 32680 }, { "epoch": 0.2645036006149365, "grad_norm": 0.41434288024902344, "learning_rate": 9.968347799056696e-06, "loss": 0.0408, "step": 32690 }, { "epoch": 0.2645845133101384, "grad_norm": 0.7206262350082397, "learning_rate": 9.968268424971475e-06, "loss": 0.0329, "step": 32700 }, { "epoch": 0.26466542600534027, "grad_norm": 0.4589415490627289, "learning_rate": 9.968188951804627e-06, "loss": 0.0546, "step": 32710 }, { "epoch": 0.2647463387005421, "grad_norm": 0.5526103377342224, "learning_rate": 9.96810937955774e-06, "loss": 0.0397, "step": 32720 }, { "epoch": 0.264827251395744, "grad_norm": 1.1861906051635742, "learning_rate": 9.9680297082324e-06, "loss": 0.041, "step": 32730 }, { "epoch": 0.26490816409094586, "grad_norm": 0.32987937331199646, "learning_rate": 9.967949937830194e-06, "loss": 0.0293, "step": 32740 }, { "epoch": 0.26498907678614775, "grad_norm": 1.3889808654785156, "learning_rate": 9.967870068352716e-06, "loss": 0.0476, "step": 32750 }, { "epoch": 0.26506998948134963, "grad_norm": 0.5037294626235962, "learning_rate": 9.967790099801556e-06, "loss": 0.0426, "step": 32760 }, { "epoch": 0.2651509021765515, "grad_norm": 0.7758489847183228, "learning_rate": 9.967710032178311e-06, "loss": 0.0416, "step": 32770 }, { "epoch": 0.2652318148717534, "grad_norm": 0.3939485549926758, "learning_rate": 9.967629865484574e-06, "loss": 0.0456, "step": 32780 }, { "epoch": 0.26531272756695523, "grad_norm": 0.49163445830345154, "learning_rate": 9.967549599721949e-06, "loss": 0.0303, "step": 32790 }, { "epoch": 0.2653936402621571, "grad_norm": 0.3476395010948181, "learning_rate": 9.967469234892034e-06, "loss": 0.0463, "step": 32800 }, { "epoch": 0.265474552957359, "grad_norm": 0.7072016596794128, "learning_rate": 9.967388770996432e-06, "loss": 0.0333, "step": 32810 }, { "epoch": 0.2655554656525609, "grad_norm": 0.26276570558547974, "learning_rate": 9.967308208036747e-06, "loss": 0.0308, "step": 32820 }, { "epoch": 0.26563637834776277, "grad_norm": 0.7322192788124084, "learning_rate": 9.967227546014586e-06, "loss": 0.0765, "step": 32830 }, { "epoch": 0.26571729104296465, "grad_norm": 0.9077566266059875, "learning_rate": 9.967146784931558e-06, "loss": 0.0532, "step": 32840 }, { "epoch": 0.26579820373816654, "grad_norm": 0.6698997616767883, "learning_rate": 9.967065924789275e-06, "loss": 0.0463, "step": 32850 }, { "epoch": 0.2658791164333684, "grad_norm": 0.7688770890235901, "learning_rate": 9.966984965589346e-06, "loss": 0.0406, "step": 32860 }, { "epoch": 0.26596002912857025, "grad_norm": 0.5874840021133423, "learning_rate": 9.966903907333388e-06, "loss": 0.0558, "step": 32870 }, { "epoch": 0.26604094182377214, "grad_norm": 0.9871959090232849, "learning_rate": 9.966822750023017e-06, "loss": 0.0538, "step": 32880 }, { "epoch": 0.266121854518974, "grad_norm": 0.7675160765647888, "learning_rate": 9.966741493659853e-06, "loss": 0.0419, "step": 32890 }, { "epoch": 0.2662027672141759, "grad_norm": 0.5092610120773315, "learning_rate": 9.966660138245514e-06, "loss": 0.0422, "step": 32900 }, { "epoch": 0.2662836799093778, "grad_norm": 0.5303878784179688, "learning_rate": 9.966578683781625e-06, "loss": 0.0418, "step": 32910 }, { "epoch": 0.2663645926045797, "grad_norm": 1.4741445779800415, "learning_rate": 9.966497130269809e-06, "loss": 0.0528, "step": 32920 }, { "epoch": 0.26644550529978156, "grad_norm": 1.0296299457550049, "learning_rate": 9.96641547771169e-06, "loss": 0.0539, "step": 32930 }, { "epoch": 0.2665264179949834, "grad_norm": 0.9333137273788452, "learning_rate": 9.9663337261089e-06, "loss": 0.0512, "step": 32940 }, { "epoch": 0.2666073306901853, "grad_norm": 0.35607603192329407, "learning_rate": 9.966251875463068e-06, "loss": 0.0385, "step": 32950 }, { "epoch": 0.26668824338538716, "grad_norm": 0.5952485799789429, "learning_rate": 9.966169925775828e-06, "loss": 0.0409, "step": 32960 }, { "epoch": 0.26676915608058904, "grad_norm": 0.8508825302124023, "learning_rate": 9.966087877048812e-06, "loss": 0.0387, "step": 32970 }, { "epoch": 0.2668500687757909, "grad_norm": 1.0142126083374023, "learning_rate": 9.966005729283658e-06, "loss": 0.0434, "step": 32980 }, { "epoch": 0.2669309814709928, "grad_norm": 0.7683525085449219, "learning_rate": 9.965923482482003e-06, "loss": 0.0504, "step": 32990 }, { "epoch": 0.2670118941661947, "grad_norm": 0.7623611688613892, "learning_rate": 9.965841136645487e-06, "loss": 0.043, "step": 33000 }, { "epoch": 0.2670928068613965, "grad_norm": 0.27261781692504883, "learning_rate": 9.965758691775754e-06, "loss": 0.0446, "step": 33010 }, { "epoch": 0.2671737195565984, "grad_norm": 0.5669440627098083, "learning_rate": 9.965676147874446e-06, "loss": 0.037, "step": 33020 }, { "epoch": 0.2672546322518003, "grad_norm": 0.5623132586479187, "learning_rate": 9.96559350494321e-06, "loss": 0.0526, "step": 33030 }, { "epoch": 0.2673355449470022, "grad_norm": 0.83443683385849, "learning_rate": 9.965510762983695e-06, "loss": 0.0543, "step": 33040 }, { "epoch": 0.26741645764220406, "grad_norm": 0.6354754567146301, "learning_rate": 9.96542792199755e-06, "loss": 0.0454, "step": 33050 }, { "epoch": 0.26749737033740595, "grad_norm": 0.508927583694458, "learning_rate": 9.965344981986429e-06, "loss": 0.0419, "step": 33060 }, { "epoch": 0.26757828303260783, "grad_norm": 0.9532585740089417, "learning_rate": 9.965261942951982e-06, "loss": 0.05, "step": 33070 }, { "epoch": 0.2676591957278097, "grad_norm": 0.8886579871177673, "learning_rate": 9.965178804895869e-06, "loss": 0.064, "step": 33080 }, { "epoch": 0.26774010842301155, "grad_norm": 0.8939550518989563, "learning_rate": 9.96509556781975e-06, "loss": 0.0621, "step": 33090 }, { "epoch": 0.26782102111821343, "grad_norm": 0.8067049384117126, "learning_rate": 9.965012231725277e-06, "loss": 0.0556, "step": 33100 }, { "epoch": 0.2679019338134153, "grad_norm": 0.764986515045166, "learning_rate": 9.964928796614119e-06, "loss": 0.043, "step": 33110 }, { "epoch": 0.2679828465086172, "grad_norm": 0.6541759967803955, "learning_rate": 9.964845262487936e-06, "loss": 0.0602, "step": 33120 }, { "epoch": 0.2680637592038191, "grad_norm": 0.6034036874771118, "learning_rate": 9.964761629348397e-06, "loss": 0.0378, "step": 33130 }, { "epoch": 0.26814467189902097, "grad_norm": 0.983698844909668, "learning_rate": 9.964677897197168e-06, "loss": 0.0422, "step": 33140 }, { "epoch": 0.26822558459422285, "grad_norm": 0.5286413431167603, "learning_rate": 9.964594066035918e-06, "loss": 0.0275, "step": 33150 }, { "epoch": 0.2683064972894247, "grad_norm": 0.9168142676353455, "learning_rate": 9.964510135866323e-06, "loss": 0.033, "step": 33160 }, { "epoch": 0.26838740998462657, "grad_norm": 0.6832315921783447, "learning_rate": 9.964426106690051e-06, "loss": 0.0399, "step": 33170 }, { "epoch": 0.26846832267982845, "grad_norm": 0.2523171007633209, "learning_rate": 9.964341978508782e-06, "loss": 0.0317, "step": 33180 }, { "epoch": 0.26854923537503034, "grad_norm": 0.5153500437736511, "learning_rate": 9.964257751324192e-06, "loss": 0.0404, "step": 33190 }, { "epoch": 0.2686301480702322, "grad_norm": 0.316135972738266, "learning_rate": 9.964173425137962e-06, "loss": 0.0343, "step": 33200 }, { "epoch": 0.2687110607654341, "grad_norm": 0.6116840839385986, "learning_rate": 9.964088999951772e-06, "loss": 0.0472, "step": 33210 }, { "epoch": 0.268791973460636, "grad_norm": 0.5875454545021057, "learning_rate": 9.964004475767306e-06, "loss": 0.0633, "step": 33220 }, { "epoch": 0.2688728861558379, "grad_norm": 1.0309962034225464, "learning_rate": 9.96391985258625e-06, "loss": 0.0601, "step": 33230 }, { "epoch": 0.2689537988510397, "grad_norm": 0.8358822464942932, "learning_rate": 9.963835130410294e-06, "loss": 0.0406, "step": 33240 }, { "epoch": 0.2690347115462416, "grad_norm": 1.4580330848693848, "learning_rate": 9.963750309241123e-06, "loss": 0.0588, "step": 33250 }, { "epoch": 0.2691156242414435, "grad_norm": 0.9012377858161926, "learning_rate": 9.963665389080432e-06, "loss": 0.0477, "step": 33260 }, { "epoch": 0.26919653693664536, "grad_norm": 0.6182253360748291, "learning_rate": 9.963580369929914e-06, "loss": 0.0295, "step": 33270 }, { "epoch": 0.26927744963184724, "grad_norm": 0.5715149641036987, "learning_rate": 9.963495251791264e-06, "loss": 0.0455, "step": 33280 }, { "epoch": 0.26935836232704913, "grad_norm": 0.6351043581962585, "learning_rate": 9.963410034666181e-06, "loss": 0.0374, "step": 33290 }, { "epoch": 0.269439275022251, "grad_norm": 0.647541344165802, "learning_rate": 9.963324718556361e-06, "loss": 0.0525, "step": 33300 }, { "epoch": 0.26952018771745284, "grad_norm": 0.39890578389167786, "learning_rate": 9.963239303463507e-06, "loss": 0.0475, "step": 33310 }, { "epoch": 0.2696011004126547, "grad_norm": 0.5390143990516663, "learning_rate": 9.963153789389326e-06, "loss": 0.0555, "step": 33320 }, { "epoch": 0.2696820131078566, "grad_norm": 0.7979610562324524, "learning_rate": 9.963068176335518e-06, "loss": 0.0561, "step": 33330 }, { "epoch": 0.2697629258030585, "grad_norm": 0.7012606263160706, "learning_rate": 9.962982464303792e-06, "loss": 0.0328, "step": 33340 }, { "epoch": 0.2698438384982604, "grad_norm": 0.30528509616851807, "learning_rate": 9.96289665329586e-06, "loss": 0.0353, "step": 33350 }, { "epoch": 0.26992475119346226, "grad_norm": 0.8669029474258423, "learning_rate": 9.962810743313432e-06, "loss": 0.0467, "step": 33360 }, { "epoch": 0.27000566388866415, "grad_norm": 0.8615660667419434, "learning_rate": 9.962724734358218e-06, "loss": 0.0636, "step": 33370 }, { "epoch": 0.27008657658386603, "grad_norm": 0.7796733379364014, "learning_rate": 9.962638626431938e-06, "loss": 0.0474, "step": 33380 }, { "epoch": 0.27016748927906786, "grad_norm": 0.6800703406333923, "learning_rate": 9.962552419536305e-06, "loss": 0.0318, "step": 33390 }, { "epoch": 0.27024840197426975, "grad_norm": 0.9323595762252808, "learning_rate": 9.962466113673043e-06, "loss": 0.0468, "step": 33400 }, { "epoch": 0.27032931466947163, "grad_norm": 1.2734850645065308, "learning_rate": 9.96237970884387e-06, "loss": 0.0464, "step": 33410 }, { "epoch": 0.2704102273646735, "grad_norm": 1.2985854148864746, "learning_rate": 9.962293205050509e-06, "loss": 0.061, "step": 33420 }, { "epoch": 0.2704911400598754, "grad_norm": 0.5455251336097717, "learning_rate": 9.962206602294685e-06, "loss": 0.044, "step": 33430 }, { "epoch": 0.2705720527550773, "grad_norm": 0.5570245981216431, "learning_rate": 9.962119900578126e-06, "loss": 0.0473, "step": 33440 }, { "epoch": 0.27065296545027917, "grad_norm": 0.5869678854942322, "learning_rate": 9.962033099902563e-06, "loss": 0.0363, "step": 33450 }, { "epoch": 0.270733878145481, "grad_norm": 0.8597975373268127, "learning_rate": 9.961946200269725e-06, "loss": 0.0527, "step": 33460 }, { "epoch": 0.2708147908406829, "grad_norm": 0.3194691836833954, "learning_rate": 9.961859201681345e-06, "loss": 0.0487, "step": 33470 }, { "epoch": 0.27089570353588477, "grad_norm": 0.8286179304122925, "learning_rate": 9.961772104139157e-06, "loss": 0.0298, "step": 33480 }, { "epoch": 0.27097661623108665, "grad_norm": 0.23972965776920319, "learning_rate": 9.9616849076449e-06, "loss": 0.0385, "step": 33490 }, { "epoch": 0.27105752892628854, "grad_norm": 0.5908406376838684, "learning_rate": 9.961597612200311e-06, "loss": 0.0451, "step": 33500 }, { "epoch": 0.2711384416214904, "grad_norm": 0.4990825653076172, "learning_rate": 9.961510217807133e-06, "loss": 0.0482, "step": 33510 }, { "epoch": 0.2712193543166923, "grad_norm": 0.4596593677997589, "learning_rate": 9.96142272446711e-06, "loss": 0.0425, "step": 33520 }, { "epoch": 0.2713002670118942, "grad_norm": 0.3393253684043884, "learning_rate": 9.961335132181981e-06, "loss": 0.0388, "step": 33530 }, { "epoch": 0.271381179707096, "grad_norm": 1.3241106271743774, "learning_rate": 9.961247440953498e-06, "loss": 0.0581, "step": 33540 }, { "epoch": 0.2714620924022979, "grad_norm": 0.4307621121406555, "learning_rate": 9.96115965078341e-06, "loss": 0.0383, "step": 33550 }, { "epoch": 0.2715430050974998, "grad_norm": 0.6783545613288879, "learning_rate": 9.961071761673464e-06, "loss": 0.045, "step": 33560 }, { "epoch": 0.2716239177927017, "grad_norm": 1.2759090662002563, "learning_rate": 9.960983773625416e-06, "loss": 0.0493, "step": 33570 }, { "epoch": 0.27170483048790356, "grad_norm": 0.590523898601532, "learning_rate": 9.960895686641019e-06, "loss": 0.0407, "step": 33580 }, { "epoch": 0.27178574318310544, "grad_norm": 0.5473130941390991, "learning_rate": 9.960807500722031e-06, "loss": 0.0386, "step": 33590 }, { "epoch": 0.27186665587830733, "grad_norm": 0.5007550120353699, "learning_rate": 9.960719215870212e-06, "loss": 0.0439, "step": 33600 }, { "epoch": 0.27194756857350916, "grad_norm": 0.8051178455352783, "learning_rate": 9.960630832087318e-06, "loss": 0.0471, "step": 33610 }, { "epoch": 0.27202848126871104, "grad_norm": 0.7085632681846619, "learning_rate": 9.960542349375114e-06, "loss": 0.0567, "step": 33620 }, { "epoch": 0.2721093939639129, "grad_norm": 0.3402642011642456, "learning_rate": 9.960453767735368e-06, "loss": 0.0465, "step": 33630 }, { "epoch": 0.2721903066591148, "grad_norm": 0.9134573936462402, "learning_rate": 9.96036508716984e-06, "loss": 0.0578, "step": 33640 }, { "epoch": 0.2722712193543167, "grad_norm": 0.6622021198272705, "learning_rate": 9.960276307680306e-06, "loss": 0.0422, "step": 33650 }, { "epoch": 0.2723521320495186, "grad_norm": 0.5048582553863525, "learning_rate": 9.960187429268531e-06, "loss": 0.055, "step": 33660 }, { "epoch": 0.27243304474472047, "grad_norm": 0.632560670375824, "learning_rate": 9.960098451936288e-06, "loss": 0.0588, "step": 33670 }, { "epoch": 0.27251395743992235, "grad_norm": 0.547921895980835, "learning_rate": 9.960009375685354e-06, "loss": 0.0339, "step": 33680 }, { "epoch": 0.2725948701351242, "grad_norm": 0.8843469023704529, "learning_rate": 9.959920200517504e-06, "loss": 0.0574, "step": 33690 }, { "epoch": 0.27267578283032606, "grad_norm": 0.714104175567627, "learning_rate": 9.959830926434517e-06, "loss": 0.0776, "step": 33700 }, { "epoch": 0.27275669552552795, "grad_norm": 0.9765660762786865, "learning_rate": 9.959741553438171e-06, "loss": 0.0346, "step": 33710 }, { "epoch": 0.27283760822072983, "grad_norm": 0.6450172662734985, "learning_rate": 9.959652081530251e-06, "loss": 0.0686, "step": 33720 }, { "epoch": 0.2729185209159317, "grad_norm": 0.5329681038856506, "learning_rate": 9.95956251071254e-06, "loss": 0.0457, "step": 33730 }, { "epoch": 0.2729994336111336, "grad_norm": 0.5128107666969299, "learning_rate": 9.959472840986826e-06, "loss": 0.051, "step": 33740 }, { "epoch": 0.2730803463063355, "grad_norm": 0.3550022840499878, "learning_rate": 9.959383072354896e-06, "loss": 0.0309, "step": 33750 }, { "epoch": 0.2731612590015373, "grad_norm": 0.40920689702033997, "learning_rate": 9.95929320481854e-06, "loss": 0.0372, "step": 33760 }, { "epoch": 0.2732421716967392, "grad_norm": 0.6793919205665588, "learning_rate": 9.95920323837955e-06, "loss": 0.0474, "step": 33770 }, { "epoch": 0.2733230843919411, "grad_norm": 0.6085530519485474, "learning_rate": 9.95911317303972e-06, "loss": 0.0508, "step": 33780 }, { "epoch": 0.27340399708714297, "grad_norm": 1.0484927892684937, "learning_rate": 9.95902300880085e-06, "loss": 0.0382, "step": 33790 }, { "epoch": 0.27348490978234485, "grad_norm": 0.6051323413848877, "learning_rate": 9.958932745664733e-06, "loss": 0.0669, "step": 33800 }, { "epoch": 0.27356582247754674, "grad_norm": 0.6502716541290283, "learning_rate": 9.958842383633172e-06, "loss": 0.0285, "step": 33810 }, { "epoch": 0.2736467351727486, "grad_norm": 0.5296608209609985, "learning_rate": 9.958751922707966e-06, "loss": 0.0439, "step": 33820 }, { "epoch": 0.2737276478679505, "grad_norm": 0.6843606233596802, "learning_rate": 9.958661362890924e-06, "loss": 0.0541, "step": 33830 }, { "epoch": 0.27380856056315234, "grad_norm": 1.1188243627548218, "learning_rate": 9.958570704183847e-06, "loss": 0.0494, "step": 33840 }, { "epoch": 0.2738894732583542, "grad_norm": 0.5173065662384033, "learning_rate": 9.958479946588547e-06, "loss": 0.0631, "step": 33850 }, { "epoch": 0.2739703859535561, "grad_norm": 0.6531108021736145, "learning_rate": 9.958389090106832e-06, "loss": 0.0507, "step": 33860 }, { "epoch": 0.274051298648758, "grad_norm": 0.5771774649620056, "learning_rate": 9.958298134740515e-06, "loss": 0.0464, "step": 33870 }, { "epoch": 0.2741322113439599, "grad_norm": 0.8401802182197571, "learning_rate": 9.958207080491406e-06, "loss": 0.051, "step": 33880 }, { "epoch": 0.27421312403916176, "grad_norm": 0.43524447083473206, "learning_rate": 9.958115927361327e-06, "loss": 0.035, "step": 33890 }, { "epoch": 0.27429403673436364, "grad_norm": 0.49099501967430115, "learning_rate": 9.958024675352092e-06, "loss": 0.0529, "step": 33900 }, { "epoch": 0.2743749494295655, "grad_norm": 1.180955171585083, "learning_rate": 9.957933324465522e-06, "loss": 0.0495, "step": 33910 }, { "epoch": 0.27445586212476736, "grad_norm": 0.3083014190196991, "learning_rate": 9.957841874703437e-06, "loss": 0.0341, "step": 33920 }, { "epoch": 0.27453677481996924, "grad_norm": 0.5276231169700623, "learning_rate": 9.957750326067664e-06, "loss": 0.048, "step": 33930 }, { "epoch": 0.2746176875151711, "grad_norm": 0.956828773021698, "learning_rate": 9.957658678560027e-06, "loss": 0.0414, "step": 33940 }, { "epoch": 0.274698600210373, "grad_norm": 0.4388667047023773, "learning_rate": 9.957566932182354e-06, "loss": 0.0398, "step": 33950 }, { "epoch": 0.2747795129055749, "grad_norm": 0.8758415579795837, "learning_rate": 9.957475086936472e-06, "loss": 0.0404, "step": 33960 }, { "epoch": 0.2748604256007768, "grad_norm": 0.548684298992157, "learning_rate": 9.957383142824218e-06, "loss": 0.045, "step": 33970 }, { "epoch": 0.27494133829597867, "grad_norm": 0.8523415923118591, "learning_rate": 9.957291099847421e-06, "loss": 0.0567, "step": 33980 }, { "epoch": 0.2750222509911805, "grad_norm": 0.6465195417404175, "learning_rate": 9.957198958007919e-06, "loss": 0.0424, "step": 33990 }, { "epoch": 0.2751031636863824, "grad_norm": 0.6422009468078613, "learning_rate": 9.957106717307549e-06, "loss": 0.0445, "step": 34000 }, { "epoch": 0.27518407638158426, "grad_norm": 0.7316571474075317, "learning_rate": 9.957014377748149e-06, "loss": 0.0435, "step": 34010 }, { "epoch": 0.27526498907678615, "grad_norm": 0.6713027358055115, "learning_rate": 9.956921939331565e-06, "loss": 0.0479, "step": 34020 }, { "epoch": 0.27534590177198803, "grad_norm": 0.5453844666481018, "learning_rate": 9.956829402059635e-06, "loss": 0.0362, "step": 34030 }, { "epoch": 0.2754268144671899, "grad_norm": 0.7184612154960632, "learning_rate": 9.956736765934207e-06, "loss": 0.0485, "step": 34040 }, { "epoch": 0.2755077271623918, "grad_norm": 0.7893450260162354, "learning_rate": 9.956644030957129e-06, "loss": 0.058, "step": 34050 }, { "epoch": 0.27558863985759363, "grad_norm": 0.7885065078735352, "learning_rate": 9.956551197130249e-06, "loss": 0.0373, "step": 34060 }, { "epoch": 0.2756695525527955, "grad_norm": 0.4336790442466736, "learning_rate": 9.95645826445542e-06, "loss": 0.0252, "step": 34070 }, { "epoch": 0.2757504652479974, "grad_norm": 0.5495743751525879, "learning_rate": 9.956365232934494e-06, "loss": 0.038, "step": 34080 }, { "epoch": 0.2758313779431993, "grad_norm": 0.6515175104141235, "learning_rate": 9.956272102569327e-06, "loss": 0.0456, "step": 34090 }, { "epoch": 0.27591229063840117, "grad_norm": 0.885901927947998, "learning_rate": 9.956178873361777e-06, "loss": 0.0476, "step": 34100 }, { "epoch": 0.27599320333360305, "grad_norm": 0.2752375900745392, "learning_rate": 9.956085545313701e-06, "loss": 0.0508, "step": 34110 }, { "epoch": 0.27607411602880494, "grad_norm": 0.7984657287597656, "learning_rate": 9.955992118426962e-06, "loss": 0.0378, "step": 34120 }, { "epoch": 0.2761550287240068, "grad_norm": 0.5414879322052002, "learning_rate": 9.955898592703421e-06, "loss": 0.0632, "step": 34130 }, { "epoch": 0.27623594141920865, "grad_norm": 0.7209152579307556, "learning_rate": 9.955804968144948e-06, "loss": 0.0557, "step": 34140 }, { "epoch": 0.27631685411441054, "grad_norm": 0.6956205368041992, "learning_rate": 9.955711244753406e-06, "loss": 0.0545, "step": 34150 }, { "epoch": 0.2763977668096124, "grad_norm": 0.2584274411201477, "learning_rate": 9.955617422530663e-06, "loss": 0.0549, "step": 34160 }, { "epoch": 0.2764786795048143, "grad_norm": 0.8937658071517944, "learning_rate": 9.955523501478595e-06, "loss": 0.0475, "step": 34170 }, { "epoch": 0.2765595922000162, "grad_norm": 0.4829820692539215, "learning_rate": 9.955429481599072e-06, "loss": 0.0508, "step": 34180 }, { "epoch": 0.2766405048952181, "grad_norm": 0.5273256301879883, "learning_rate": 9.955335362893968e-06, "loss": 0.043, "step": 34190 }, { "epoch": 0.27672141759041996, "grad_norm": 0.6495336294174194, "learning_rate": 9.955241145365161e-06, "loss": 0.0387, "step": 34200 }, { "epoch": 0.2768023302856218, "grad_norm": 0.5382211804389954, "learning_rate": 9.955146829014532e-06, "loss": 0.0348, "step": 34210 }, { "epoch": 0.2768832429808237, "grad_norm": 0.7149454355239868, "learning_rate": 9.955052413843961e-06, "loss": 0.0439, "step": 34220 }, { "epoch": 0.27696415567602556, "grad_norm": 0.46892887353897095, "learning_rate": 9.954957899855329e-06, "loss": 0.0309, "step": 34230 }, { "epoch": 0.27704506837122744, "grad_norm": 0.40025466680526733, "learning_rate": 9.954863287050522e-06, "loss": 0.0512, "step": 34240 }, { "epoch": 0.2771259810664293, "grad_norm": 0.6964324712753296, "learning_rate": 9.954768575431425e-06, "loss": 0.0546, "step": 34250 }, { "epoch": 0.2772068937616312, "grad_norm": 0.23885659873485565, "learning_rate": 9.954673764999932e-06, "loss": 0.0281, "step": 34260 }, { "epoch": 0.2772878064568331, "grad_norm": 0.5822404623031616, "learning_rate": 9.95457885575793e-06, "loss": 0.0362, "step": 34270 }, { "epoch": 0.277368719152035, "grad_norm": 0.18733173608779907, "learning_rate": 9.95448384770731e-06, "loss": 0.0578, "step": 34280 }, { "epoch": 0.2774496318472368, "grad_norm": 0.7719162702560425, "learning_rate": 9.954388740849972e-06, "loss": 0.0588, "step": 34290 }, { "epoch": 0.2775305445424387, "grad_norm": 0.3205936551094055, "learning_rate": 9.954293535187808e-06, "loss": 0.0311, "step": 34300 }, { "epoch": 0.2776114572376406, "grad_norm": 0.5878570675849915, "learning_rate": 9.954198230722718e-06, "loss": 0.0526, "step": 34310 }, { "epoch": 0.27769236993284246, "grad_norm": 0.28550639748573303, "learning_rate": 9.954102827456604e-06, "loss": 0.0442, "step": 34320 }, { "epoch": 0.27777328262804435, "grad_norm": 0.5779843926429749, "learning_rate": 9.954007325391368e-06, "loss": 0.0494, "step": 34330 }, { "epoch": 0.27785419532324623, "grad_norm": 0.7318888306617737, "learning_rate": 9.953911724528913e-06, "loss": 0.0499, "step": 34340 }, { "epoch": 0.2779351080184481, "grad_norm": 0.6383810639381409, "learning_rate": 9.953816024871147e-06, "loss": 0.0409, "step": 34350 }, { "epoch": 0.27801602071364995, "grad_norm": 0.8902926445007324, "learning_rate": 9.953720226419981e-06, "loss": 0.0469, "step": 34360 }, { "epoch": 0.27809693340885183, "grad_norm": 0.42559775710105896, "learning_rate": 9.95362432917732e-06, "loss": 0.0332, "step": 34370 }, { "epoch": 0.2781778461040537, "grad_norm": 0.4830814599990845, "learning_rate": 9.953528333145079e-06, "loss": 0.04, "step": 34380 }, { "epoch": 0.2782587587992556, "grad_norm": 0.572964608669281, "learning_rate": 9.953432238325174e-06, "loss": 0.0579, "step": 34390 }, { "epoch": 0.2783396714944575, "grad_norm": 0.6526927351951599, "learning_rate": 9.953336044719521e-06, "loss": 0.0575, "step": 34400 }, { "epoch": 0.27842058418965937, "grad_norm": 0.6327121257781982, "learning_rate": 9.953239752330035e-06, "loss": 0.0542, "step": 34410 }, { "epoch": 0.27850149688486125, "grad_norm": 0.40788400173187256, "learning_rate": 9.953143361158642e-06, "loss": 0.0306, "step": 34420 }, { "epoch": 0.27858240958006314, "grad_norm": 0.9265632033348083, "learning_rate": 9.95304687120726e-06, "loss": 0.0572, "step": 34430 }, { "epoch": 0.27866332227526497, "grad_norm": 0.6192615032196045, "learning_rate": 9.952950282477813e-06, "loss": 0.0482, "step": 34440 }, { "epoch": 0.27874423497046685, "grad_norm": 0.2770644426345825, "learning_rate": 9.952853594972231e-06, "loss": 0.0348, "step": 34450 }, { "epoch": 0.27882514766566874, "grad_norm": 0.8075755834579468, "learning_rate": 9.952756808692438e-06, "loss": 0.0572, "step": 34460 }, { "epoch": 0.2789060603608706, "grad_norm": 0.8280466198921204, "learning_rate": 9.952659923640367e-06, "loss": 0.0801, "step": 34470 }, { "epoch": 0.2789869730560725, "grad_norm": 0.23648816347122192, "learning_rate": 9.95256293981795e-06, "loss": 0.0412, "step": 34480 }, { "epoch": 0.2790678857512744, "grad_norm": 0.3312396705150604, "learning_rate": 9.95246585722712e-06, "loss": 0.0415, "step": 34490 }, { "epoch": 0.2791487984464763, "grad_norm": 0.8866857290267944, "learning_rate": 9.952368675869814e-06, "loss": 0.0568, "step": 34500 }, { "epoch": 0.2792297111416781, "grad_norm": 0.38213977217674255, "learning_rate": 9.952271395747969e-06, "loss": 0.0488, "step": 34510 }, { "epoch": 0.27931062383688, "grad_norm": 0.5196926593780518, "learning_rate": 9.952174016863525e-06, "loss": 0.0285, "step": 34520 }, { "epoch": 0.2793915365320819, "grad_norm": 0.901751697063446, "learning_rate": 9.952076539218427e-06, "loss": 0.046, "step": 34530 }, { "epoch": 0.27947244922728376, "grad_norm": 0.9736380577087402, "learning_rate": 9.951978962814615e-06, "loss": 0.0623, "step": 34540 }, { "epoch": 0.27955336192248564, "grad_norm": 0.7488484382629395, "learning_rate": 9.951881287654037e-06, "loss": 0.0634, "step": 34550 }, { "epoch": 0.27963427461768753, "grad_norm": 0.44473230838775635, "learning_rate": 9.95178351373864e-06, "loss": 0.0374, "step": 34560 }, { "epoch": 0.2797151873128894, "grad_norm": 0.5147312879562378, "learning_rate": 9.951685641070375e-06, "loss": 0.0322, "step": 34570 }, { "epoch": 0.27979610000809124, "grad_norm": 0.8659231066703796, "learning_rate": 9.951587669651194e-06, "loss": 0.041, "step": 34580 }, { "epoch": 0.2798770127032931, "grad_norm": 0.7169462442398071, "learning_rate": 9.95148959948305e-06, "loss": 0.0446, "step": 34590 }, { "epoch": 0.279957925398495, "grad_norm": 0.9390867352485657, "learning_rate": 9.951391430567899e-06, "loss": 0.0519, "step": 34600 }, { "epoch": 0.2800388380936969, "grad_norm": 0.7830169796943665, "learning_rate": 9.951293162907699e-06, "loss": 0.0434, "step": 34610 }, { "epoch": 0.2801197507888988, "grad_norm": 0.8985644578933716, "learning_rate": 9.951194796504409e-06, "loss": 0.0471, "step": 34620 }, { "epoch": 0.28020066348410066, "grad_norm": 1.1476974487304688, "learning_rate": 9.951096331359991e-06, "loss": 0.0507, "step": 34630 }, { "epoch": 0.28028157617930255, "grad_norm": 0.8122144937515259, "learning_rate": 9.950997767476408e-06, "loss": 0.0595, "step": 34640 }, { "epoch": 0.28036248887450443, "grad_norm": 0.45063093304634094, "learning_rate": 9.950899104855629e-06, "loss": 0.0486, "step": 34650 }, { "epoch": 0.28044340156970626, "grad_norm": 0.4636550545692444, "learning_rate": 9.950800343499615e-06, "loss": 0.0446, "step": 34660 }, { "epoch": 0.28052431426490815, "grad_norm": 0.39713427424430847, "learning_rate": 9.950701483410345e-06, "loss": 0.046, "step": 34670 }, { "epoch": 0.28060522696011003, "grad_norm": 0.709828794002533, "learning_rate": 9.950602524589783e-06, "loss": 0.0489, "step": 34680 }, { "epoch": 0.2806861396553119, "grad_norm": 0.328794002532959, "learning_rate": 9.950503467039904e-06, "loss": 0.0471, "step": 34690 }, { "epoch": 0.2807670523505138, "grad_norm": 0.5437069535255432, "learning_rate": 9.950404310762685e-06, "loss": 0.06, "step": 34700 }, { "epoch": 0.2808479650457157, "grad_norm": 1.0749889612197876, "learning_rate": 9.950305055760103e-06, "loss": 0.0532, "step": 34710 }, { "epoch": 0.28092887774091757, "grad_norm": 0.8906980752944946, "learning_rate": 9.950205702034138e-06, "loss": 0.0354, "step": 34720 }, { "epoch": 0.2810097904361194, "grad_norm": 0.6448034048080444, "learning_rate": 9.950106249586768e-06, "loss": 0.0311, "step": 34730 }, { "epoch": 0.2810907031313213, "grad_norm": 0.4915757179260254, "learning_rate": 9.950006698419981e-06, "loss": 0.0439, "step": 34740 }, { "epoch": 0.28117161582652317, "grad_norm": 0.5093185901641846, "learning_rate": 9.94990704853576e-06, "loss": 0.04, "step": 34750 }, { "epoch": 0.28125252852172505, "grad_norm": 0.8641079068183899, "learning_rate": 9.949807299936091e-06, "loss": 0.0409, "step": 34760 }, { "epoch": 0.28133344121692694, "grad_norm": 1.0719561576843262, "learning_rate": 9.949707452622968e-06, "loss": 0.0618, "step": 34770 }, { "epoch": 0.2814143539121288, "grad_norm": 0.6426004767417908, "learning_rate": 9.949607506598377e-06, "loss": 0.0418, "step": 34780 }, { "epoch": 0.2814952666073307, "grad_norm": 0.7951350212097168, "learning_rate": 9.949507461864316e-06, "loss": 0.0579, "step": 34790 }, { "epoch": 0.2815761793025326, "grad_norm": 0.43192023038864136, "learning_rate": 9.949407318422774e-06, "loss": 0.0458, "step": 34800 }, { "epoch": 0.2816570919977344, "grad_norm": 0.4225710928440094, "learning_rate": 9.949307076275755e-06, "loss": 0.0449, "step": 34810 }, { "epoch": 0.2817380046929363, "grad_norm": 1.082705020904541, "learning_rate": 9.949206735425251e-06, "loss": 0.0482, "step": 34820 }, { "epoch": 0.2818189173881382, "grad_norm": 0.5070005059242249, "learning_rate": 9.94910629587327e-06, "loss": 0.0325, "step": 34830 }, { "epoch": 0.2818998300833401, "grad_norm": 0.8123472332954407, "learning_rate": 9.94900575762181e-06, "loss": 0.0505, "step": 34840 }, { "epoch": 0.28198074277854196, "grad_norm": 1.0506306886672974, "learning_rate": 9.94890512067288e-06, "loss": 0.0396, "step": 34850 }, { "epoch": 0.28206165547374384, "grad_norm": 0.6562406420707703, "learning_rate": 9.948804385028484e-06, "loss": 0.055, "step": 34860 }, { "epoch": 0.28214256816894573, "grad_norm": 0.8759064078330994, "learning_rate": 9.948703550690632e-06, "loss": 0.0387, "step": 34870 }, { "epoch": 0.28222348086414756, "grad_norm": 0.880370557308197, "learning_rate": 9.948602617661333e-06, "loss": 0.0405, "step": 34880 }, { "epoch": 0.28230439355934944, "grad_norm": 0.8534006476402283, "learning_rate": 9.948501585942604e-06, "loss": 0.044, "step": 34890 }, { "epoch": 0.2823853062545513, "grad_norm": 0.4520668685436249, "learning_rate": 9.948400455536458e-06, "loss": 0.0483, "step": 34900 }, { "epoch": 0.2824662189497532, "grad_norm": 0.5326128005981445, "learning_rate": 9.94829922644491e-06, "loss": 0.0438, "step": 34910 }, { "epoch": 0.2825471316449551, "grad_norm": 0.6795939207077026, "learning_rate": 9.94819789866998e-06, "loss": 0.044, "step": 34920 }, { "epoch": 0.282628044340157, "grad_norm": 0.9384762048721313, "learning_rate": 9.948096472213692e-06, "loss": 0.0505, "step": 34930 }, { "epoch": 0.28270895703535887, "grad_norm": 0.3196338713169098, "learning_rate": 9.947994947078062e-06, "loss": 0.0565, "step": 34940 }, { "epoch": 0.28278986973056075, "grad_norm": 0.32452163100242615, "learning_rate": 9.947893323265121e-06, "loss": 0.0461, "step": 34950 }, { "epoch": 0.2828707824257626, "grad_norm": 1.3101962804794312, "learning_rate": 9.947791600776892e-06, "loss": 0.0312, "step": 34960 }, { "epoch": 0.28295169512096446, "grad_norm": 0.5443291664123535, "learning_rate": 9.947689779615404e-06, "loss": 0.0603, "step": 34970 }, { "epoch": 0.28303260781616635, "grad_norm": 0.8862490057945251, "learning_rate": 9.947587859782691e-06, "loss": 0.0563, "step": 34980 }, { "epoch": 0.28311352051136823, "grad_norm": 1.1020711660385132, "learning_rate": 9.94748584128078e-06, "loss": 0.0421, "step": 34990 }, { "epoch": 0.2831944332065701, "grad_norm": 2.0871198177337646, "learning_rate": 9.94738372411171e-06, "loss": 0.0451, "step": 35000 }, { "epoch": 0.283275345901772, "grad_norm": 0.6959064602851868, "learning_rate": 9.947281508277516e-06, "loss": 0.043, "step": 35010 }, { "epoch": 0.2833562585969739, "grad_norm": 0.7457898855209351, "learning_rate": 9.947179193780235e-06, "loss": 0.0439, "step": 35020 }, { "epoch": 0.2834371712921757, "grad_norm": 1.0025826692581177, "learning_rate": 9.947076780621911e-06, "loss": 0.0374, "step": 35030 }, { "epoch": 0.2835180839873776, "grad_norm": 0.351576030254364, "learning_rate": 9.946974268804583e-06, "loss": 0.0389, "step": 35040 }, { "epoch": 0.2835989966825795, "grad_norm": 0.18129917979240417, "learning_rate": 9.946871658330297e-06, "loss": 0.0484, "step": 35050 }, { "epoch": 0.28367990937778137, "grad_norm": 0.6310262680053711, "learning_rate": 9.9467689492011e-06, "loss": 0.0273, "step": 35060 }, { "epoch": 0.28376082207298325, "grad_norm": 0.4127587080001831, "learning_rate": 9.94666614141904e-06, "loss": 0.0491, "step": 35070 }, { "epoch": 0.28384173476818514, "grad_norm": 0.47536835074424744, "learning_rate": 9.946563234986163e-06, "loss": 0.0471, "step": 35080 }, { "epoch": 0.283922647463387, "grad_norm": 0.2842257618904114, "learning_rate": 9.94646022990453e-06, "loss": 0.0366, "step": 35090 }, { "epoch": 0.2840035601585889, "grad_norm": 0.40195176005363464, "learning_rate": 9.946357126176185e-06, "loss": 0.0504, "step": 35100 }, { "epoch": 0.28408447285379074, "grad_norm": 0.4511937201023102, "learning_rate": 9.946253923803194e-06, "loss": 0.0577, "step": 35110 }, { "epoch": 0.2841653855489926, "grad_norm": 1.13835608959198, "learning_rate": 9.946150622787606e-06, "loss": 0.0652, "step": 35120 }, { "epoch": 0.2842462982441945, "grad_norm": 0.6439389586448669, "learning_rate": 9.94604722313149e-06, "loss": 0.0478, "step": 35130 }, { "epoch": 0.2843272109393964, "grad_norm": 0.5720061659812927, "learning_rate": 9.945943724836901e-06, "loss": 0.0261, "step": 35140 }, { "epoch": 0.2844081236345983, "grad_norm": 1.0243767499923706, "learning_rate": 9.945840127905905e-06, "loss": 0.059, "step": 35150 }, { "epoch": 0.28448903632980016, "grad_norm": 0.7157610058784485, "learning_rate": 9.945736432340572e-06, "loss": 0.0528, "step": 35160 }, { "epoch": 0.28456994902500204, "grad_norm": 0.4044739603996277, "learning_rate": 9.945632638142965e-06, "loss": 0.044, "step": 35170 }, { "epoch": 0.2846508617202039, "grad_norm": 0.32025760412216187, "learning_rate": 9.945528745315156e-06, "loss": 0.0426, "step": 35180 }, { "epoch": 0.28473177441540576, "grad_norm": 0.7980149984359741, "learning_rate": 9.945424753859217e-06, "loss": 0.0323, "step": 35190 }, { "epoch": 0.28481268711060764, "grad_norm": 0.8500398397445679, "learning_rate": 9.945320663777221e-06, "loss": 0.0655, "step": 35200 }, { "epoch": 0.2848935998058095, "grad_norm": 0.1924341320991516, "learning_rate": 9.945216475071244e-06, "loss": 0.0337, "step": 35210 }, { "epoch": 0.2849745125010114, "grad_norm": 1.4575860500335693, "learning_rate": 9.945112187743364e-06, "loss": 0.0652, "step": 35220 }, { "epoch": 0.2850554251962133, "grad_norm": 0.24126987159252167, "learning_rate": 9.945007801795662e-06, "loss": 0.0432, "step": 35230 }, { "epoch": 0.2851363378914152, "grad_norm": 0.36343520879745483, "learning_rate": 9.944903317230217e-06, "loss": 0.0591, "step": 35240 }, { "epoch": 0.28521725058661707, "grad_norm": 0.5847533345222473, "learning_rate": 9.944798734049116e-06, "loss": 0.0353, "step": 35250 }, { "epoch": 0.2852981632818189, "grad_norm": 0.6068160533905029, "learning_rate": 9.944694052254443e-06, "loss": 0.0376, "step": 35260 }, { "epoch": 0.2853790759770208, "grad_norm": 0.6735630631446838, "learning_rate": 9.944589271848287e-06, "loss": 0.0335, "step": 35270 }, { "epoch": 0.28545998867222266, "grad_norm": 0.38478127121925354, "learning_rate": 9.944484392832735e-06, "loss": 0.0461, "step": 35280 }, { "epoch": 0.28554090136742455, "grad_norm": 0.7646549940109253, "learning_rate": 9.944379415209879e-06, "loss": 0.0401, "step": 35290 }, { "epoch": 0.28562181406262643, "grad_norm": 0.2371666133403778, "learning_rate": 9.944274338981816e-06, "loss": 0.0441, "step": 35300 }, { "epoch": 0.2857027267578283, "grad_norm": 0.8736867308616638, "learning_rate": 9.944169164150637e-06, "loss": 0.0712, "step": 35310 }, { "epoch": 0.2857836394530302, "grad_norm": 0.27461767196655273, "learning_rate": 9.944063890718443e-06, "loss": 0.0537, "step": 35320 }, { "epoch": 0.28586455214823203, "grad_norm": 0.6345472931861877, "learning_rate": 9.943958518687332e-06, "loss": 0.0418, "step": 35330 }, { "epoch": 0.2859454648434339, "grad_norm": 0.32028287649154663, "learning_rate": 9.943853048059406e-06, "loss": 0.037, "step": 35340 }, { "epoch": 0.2860263775386358, "grad_norm": 0.6157971024513245, "learning_rate": 9.943747478836768e-06, "loss": 0.0333, "step": 35350 }, { "epoch": 0.2861072902338377, "grad_norm": 0.4802490770816803, "learning_rate": 9.94364181102152e-06, "loss": 0.0385, "step": 35360 }, { "epoch": 0.28618820292903957, "grad_norm": 0.7259635329246521, "learning_rate": 9.943536044615776e-06, "loss": 0.0387, "step": 35370 }, { "epoch": 0.28626911562424145, "grad_norm": 0.5147840976715088, "learning_rate": 9.94343017962164e-06, "loss": 0.0662, "step": 35380 }, { "epoch": 0.28635002831944334, "grad_norm": 0.7961235046386719, "learning_rate": 9.943324216041228e-06, "loss": 0.0394, "step": 35390 }, { "epoch": 0.2864309410146452, "grad_norm": 0.5837525129318237, "learning_rate": 9.943218153876648e-06, "loss": 0.0561, "step": 35400 }, { "epoch": 0.28651185370984705, "grad_norm": 0.465991348028183, "learning_rate": 9.943111993130017e-06, "loss": 0.0477, "step": 35410 }, { "epoch": 0.28659276640504894, "grad_norm": 0.6272336840629578, "learning_rate": 9.943005733803453e-06, "loss": 0.0334, "step": 35420 }, { "epoch": 0.2866736791002508, "grad_norm": 0.5912268161773682, "learning_rate": 9.942899375899075e-06, "loss": 0.048, "step": 35430 }, { "epoch": 0.2867545917954527, "grad_norm": 0.6992387175559998, "learning_rate": 9.942792919419004e-06, "loss": 0.0575, "step": 35440 }, { "epoch": 0.2868355044906546, "grad_norm": 0.6134114265441895, "learning_rate": 9.942686364365363e-06, "loss": 0.0391, "step": 35450 }, { "epoch": 0.2869164171858565, "grad_norm": 0.5513981580734253, "learning_rate": 9.942579710740279e-06, "loss": 0.046, "step": 35460 }, { "epoch": 0.28699732988105836, "grad_norm": 0.6429114937782288, "learning_rate": 9.942472958545875e-06, "loss": 0.0555, "step": 35470 }, { "epoch": 0.2870782425762602, "grad_norm": 0.7792860865592957, "learning_rate": 9.94236610778428e-06, "loss": 0.0473, "step": 35480 }, { "epoch": 0.2871591552714621, "grad_norm": 0.6147590279579163, "learning_rate": 9.94225915845763e-06, "loss": 0.0344, "step": 35490 }, { "epoch": 0.28724006796666396, "grad_norm": 0.6813704371452332, "learning_rate": 9.942152110568054e-06, "loss": 0.0339, "step": 35500 }, { "epoch": 0.28732098066186584, "grad_norm": 0.38218680024147034, "learning_rate": 9.942044964117686e-06, "loss": 0.0416, "step": 35510 }, { "epoch": 0.28740189335706773, "grad_norm": 0.6627833843231201, "learning_rate": 9.941937719108665e-06, "loss": 0.0554, "step": 35520 }, { "epoch": 0.2874828060522696, "grad_norm": 0.45818644762039185, "learning_rate": 9.941830375543129e-06, "loss": 0.0435, "step": 35530 }, { "epoch": 0.2875637187474715, "grad_norm": 0.2948266267776489, "learning_rate": 9.941722933423218e-06, "loss": 0.0336, "step": 35540 }, { "epoch": 0.2876446314426734, "grad_norm": 0.7996973991394043, "learning_rate": 9.941615392751076e-06, "loss": 0.0636, "step": 35550 }, { "epoch": 0.2877255441378752, "grad_norm": 0.6671268939971924, "learning_rate": 9.941507753528848e-06, "loss": 0.0436, "step": 35560 }, { "epoch": 0.2878064568330771, "grad_norm": 1.0221776962280273, "learning_rate": 9.94140001575868e-06, "loss": 0.0387, "step": 35570 }, { "epoch": 0.287887369528279, "grad_norm": 0.6905931234359741, "learning_rate": 9.94129217944272e-06, "loss": 0.0526, "step": 35580 }, { "epoch": 0.28796828222348086, "grad_norm": 1.11894690990448, "learning_rate": 9.94118424458312e-06, "loss": 0.071, "step": 35590 }, { "epoch": 0.28804919491868275, "grad_norm": 0.5445373058319092, "learning_rate": 9.94107621118203e-06, "loss": 0.0386, "step": 35600 }, { "epoch": 0.28813010761388463, "grad_norm": 0.5002903342247009, "learning_rate": 9.940968079241607e-06, "loss": 0.0371, "step": 35610 }, { "epoch": 0.2882110203090865, "grad_norm": 0.6676822900772095, "learning_rate": 9.940859848764006e-06, "loss": 0.0595, "step": 35620 }, { "epoch": 0.28829193300428835, "grad_norm": 0.4716779291629791, "learning_rate": 9.940751519751386e-06, "loss": 0.0544, "step": 35630 }, { "epoch": 0.28837284569949023, "grad_norm": 0.836391806602478, "learning_rate": 9.940643092205906e-06, "loss": 0.032, "step": 35640 }, { "epoch": 0.2884537583946921, "grad_norm": 0.7182209491729736, "learning_rate": 9.940534566129731e-06, "loss": 0.0425, "step": 35650 }, { "epoch": 0.288534671089894, "grad_norm": 0.9079069495201111, "learning_rate": 9.940425941525025e-06, "loss": 0.037, "step": 35660 }, { "epoch": 0.2886155837850959, "grad_norm": 0.4951775074005127, "learning_rate": 9.94031721839395e-06, "loss": 0.0602, "step": 35670 }, { "epoch": 0.28869649648029777, "grad_norm": 0.2476067990064621, "learning_rate": 9.940208396738683e-06, "loss": 0.0345, "step": 35680 }, { "epoch": 0.28877740917549966, "grad_norm": 0.6417489647865295, "learning_rate": 9.940099476561384e-06, "loss": 0.0428, "step": 35690 }, { "epoch": 0.28885832187070154, "grad_norm": 0.7298353314399719, "learning_rate": 9.939990457864233e-06, "loss": 0.0516, "step": 35700 }, { "epoch": 0.28893923456590337, "grad_norm": 0.6408496499061584, "learning_rate": 9.939881340649398e-06, "loss": 0.0464, "step": 35710 }, { "epoch": 0.28902014726110525, "grad_norm": 0.6611146330833435, "learning_rate": 9.93977212491906e-06, "loss": 0.0391, "step": 35720 }, { "epoch": 0.28910105995630714, "grad_norm": 1.214778184890747, "learning_rate": 9.939662810675396e-06, "loss": 0.0223, "step": 35730 }, { "epoch": 0.289181972651509, "grad_norm": 0.353753000497818, "learning_rate": 9.939553397920585e-06, "loss": 0.0239, "step": 35740 }, { "epoch": 0.2892628853467109, "grad_norm": 0.9006156921386719, "learning_rate": 9.939443886656807e-06, "loss": 0.0565, "step": 35750 }, { "epoch": 0.2893437980419128, "grad_norm": 0.1928948163986206, "learning_rate": 9.939334276886252e-06, "loss": 0.0332, "step": 35760 }, { "epoch": 0.2894247107371147, "grad_norm": 0.6572046875953674, "learning_rate": 9.9392245686111e-06, "loss": 0.0299, "step": 35770 }, { "epoch": 0.2895056234323165, "grad_norm": 0.250529944896698, "learning_rate": 9.939114761833541e-06, "loss": 0.0412, "step": 35780 }, { "epoch": 0.2895865361275184, "grad_norm": 0.2919073700904846, "learning_rate": 9.939004856555766e-06, "loss": 0.0406, "step": 35790 }, { "epoch": 0.2896674488227203, "grad_norm": 0.38826295733451843, "learning_rate": 9.938894852779965e-06, "loss": 0.0497, "step": 35800 }, { "epoch": 0.28974836151792216, "grad_norm": 0.6336848735809326, "learning_rate": 9.938784750508334e-06, "loss": 0.0403, "step": 35810 }, { "epoch": 0.28982927421312404, "grad_norm": 0.4532375931739807, "learning_rate": 9.938674549743066e-06, "loss": 0.0389, "step": 35820 }, { "epoch": 0.28991018690832593, "grad_norm": 0.5411675572395325, "learning_rate": 9.93856425048636e-06, "loss": 0.0574, "step": 35830 }, { "epoch": 0.2899910996035278, "grad_norm": 0.8518543839454651, "learning_rate": 9.938453852740417e-06, "loss": 0.0345, "step": 35840 }, { "epoch": 0.2900720122987297, "grad_norm": 0.569425106048584, "learning_rate": 9.938343356507437e-06, "loss": 0.03, "step": 35850 }, { "epoch": 0.2901529249939315, "grad_norm": 0.6588754653930664, "learning_rate": 9.938232761789623e-06, "loss": 0.0309, "step": 35860 }, { "epoch": 0.2902338376891334, "grad_norm": 1.059718370437622, "learning_rate": 9.938122068589181e-06, "loss": 0.0354, "step": 35870 }, { "epoch": 0.2903147503843353, "grad_norm": 0.8588891625404358, "learning_rate": 9.938011276908322e-06, "loss": 0.046, "step": 35880 }, { "epoch": 0.2903956630795372, "grad_norm": 0.46453166007995605, "learning_rate": 9.937900386749252e-06, "loss": 0.0644, "step": 35890 }, { "epoch": 0.29047657577473907, "grad_norm": 0.2835204601287842, "learning_rate": 9.93778939811418e-06, "loss": 0.0438, "step": 35900 }, { "epoch": 0.29055748846994095, "grad_norm": 0.6830979585647583, "learning_rate": 9.937678311005327e-06, "loss": 0.0314, "step": 35910 }, { "epoch": 0.29063840116514283, "grad_norm": 0.7236546277999878, "learning_rate": 9.937567125424903e-06, "loss": 0.044, "step": 35920 }, { "epoch": 0.29071931386034466, "grad_norm": 0.43927261233329773, "learning_rate": 9.937455841375126e-06, "loss": 0.055, "step": 35930 }, { "epoch": 0.29080022655554655, "grad_norm": 0.5589374303817749, "learning_rate": 9.937344458858213e-06, "loss": 0.0311, "step": 35940 }, { "epoch": 0.29088113925074843, "grad_norm": 0.7758035659790039, "learning_rate": 9.937232977876391e-06, "loss": 0.0458, "step": 35950 }, { "epoch": 0.2909620519459503, "grad_norm": 0.7416560053825378, "learning_rate": 9.93712139843188e-06, "loss": 0.0527, "step": 35960 }, { "epoch": 0.2910429646411522, "grad_norm": 0.49536147713661194, "learning_rate": 9.937009720526904e-06, "loss": 0.0459, "step": 35970 }, { "epoch": 0.2911238773363541, "grad_norm": 1.028632402420044, "learning_rate": 9.936897944163692e-06, "loss": 0.0587, "step": 35980 }, { "epoch": 0.29120479003155597, "grad_norm": 0.8200334906578064, "learning_rate": 9.936786069344473e-06, "loss": 0.0492, "step": 35990 }, { "epoch": 0.29128570272675786, "grad_norm": 0.48610594868659973, "learning_rate": 9.93667409607148e-06, "loss": 0.0547, "step": 36000 }, { "epoch": 0.2913666154219597, "grad_norm": 0.609442949295044, "learning_rate": 9.93656202434694e-06, "loss": 0.0595, "step": 36010 }, { "epoch": 0.29144752811716157, "grad_norm": 0.3973000645637512, "learning_rate": 9.936449854173094e-06, "loss": 0.0365, "step": 36020 }, { "epoch": 0.29152844081236345, "grad_norm": 0.5330668091773987, "learning_rate": 9.936337585552177e-06, "loss": 0.0339, "step": 36030 }, { "epoch": 0.29160935350756534, "grad_norm": 0.5858084559440613, "learning_rate": 9.936225218486428e-06, "loss": 0.0482, "step": 36040 }, { "epoch": 0.2916902662027672, "grad_norm": 0.45429202914237976, "learning_rate": 9.936112752978088e-06, "loss": 0.0447, "step": 36050 }, { "epoch": 0.2917711788979691, "grad_norm": 0.7730396389961243, "learning_rate": 9.9360001890294e-06, "loss": 0.0293, "step": 36060 }, { "epoch": 0.291852091593171, "grad_norm": 0.6773461699485779, "learning_rate": 9.935887526642607e-06, "loss": 0.0474, "step": 36070 }, { "epoch": 0.2919330042883728, "grad_norm": 0.6582056283950806, "learning_rate": 9.935774765819958e-06, "loss": 0.0452, "step": 36080 }, { "epoch": 0.2920139169835747, "grad_norm": 0.6711367964744568, "learning_rate": 9.935661906563702e-06, "loss": 0.0481, "step": 36090 }, { "epoch": 0.2920948296787766, "grad_norm": 0.5714929699897766, "learning_rate": 9.935548948876089e-06, "loss": 0.0412, "step": 36100 }, { "epoch": 0.2921757423739785, "grad_norm": 0.6549409627914429, "learning_rate": 9.935435892759371e-06, "loss": 0.0311, "step": 36110 }, { "epoch": 0.29225665506918036, "grad_norm": 0.7924679517745972, "learning_rate": 9.935322738215802e-06, "loss": 0.0448, "step": 36120 }, { "epoch": 0.29233756776438224, "grad_norm": 0.69939124584198, "learning_rate": 9.935209485247641e-06, "loss": 0.05, "step": 36130 }, { "epoch": 0.29241848045958413, "grad_norm": 0.46495184302330017, "learning_rate": 9.935096133857147e-06, "loss": 0.0459, "step": 36140 }, { "epoch": 0.29249939315478596, "grad_norm": 1.1463390588760376, "learning_rate": 9.934982684046578e-06, "loss": 0.0449, "step": 36150 }, { "epoch": 0.29258030584998784, "grad_norm": 0.7163764238357544, "learning_rate": 9.934869135818196e-06, "loss": 0.0589, "step": 36160 }, { "epoch": 0.2926612185451897, "grad_norm": 0.6926640272140503, "learning_rate": 9.934755489174269e-06, "loss": 0.0489, "step": 36170 }, { "epoch": 0.2927421312403916, "grad_norm": 0.34646591544151306, "learning_rate": 9.934641744117062e-06, "loss": 0.07, "step": 36180 }, { "epoch": 0.2928230439355935, "grad_norm": 0.674712061882019, "learning_rate": 9.93452790064884e-06, "loss": 0.0518, "step": 36190 }, { "epoch": 0.2929039566307954, "grad_norm": 0.6935110688209534, "learning_rate": 9.934413958771878e-06, "loss": 0.0355, "step": 36200 }, { "epoch": 0.29298486932599727, "grad_norm": 0.5883094072341919, "learning_rate": 9.934299918488448e-06, "loss": 0.0706, "step": 36210 }, { "epoch": 0.29306578202119915, "grad_norm": 0.7767115831375122, "learning_rate": 9.934185779800821e-06, "loss": 0.0531, "step": 36220 }, { "epoch": 0.293146694716401, "grad_norm": 0.07425655424594879, "learning_rate": 9.934071542711276e-06, "loss": 0.0356, "step": 36230 }, { "epoch": 0.29322760741160286, "grad_norm": 0.5394445061683655, "learning_rate": 9.93395720722209e-06, "loss": 0.0343, "step": 36240 }, { "epoch": 0.29330852010680475, "grad_norm": 0.7146538496017456, "learning_rate": 9.933842773335544e-06, "loss": 0.0321, "step": 36250 }, { "epoch": 0.29338943280200663, "grad_norm": 0.884430468082428, "learning_rate": 9.93372824105392e-06, "loss": 0.0719, "step": 36260 }, { "epoch": 0.2934703454972085, "grad_norm": 0.5207576751708984, "learning_rate": 9.9336136103795e-06, "loss": 0.026, "step": 36270 }, { "epoch": 0.2935512581924104, "grad_norm": 0.6024983525276184, "learning_rate": 9.933498881314573e-06, "loss": 0.05, "step": 36280 }, { "epoch": 0.2936321708876123, "grad_norm": 0.6007747650146484, "learning_rate": 9.933384053861426e-06, "loss": 0.0424, "step": 36290 }, { "epoch": 0.2937130835828141, "grad_norm": 0.6164447069168091, "learning_rate": 9.933269128022349e-06, "loss": 0.0353, "step": 36300 }, { "epoch": 0.293793996278016, "grad_norm": 0.6113278865814209, "learning_rate": 9.933154103799633e-06, "loss": 0.0467, "step": 36310 }, { "epoch": 0.2938749089732179, "grad_norm": 0.9699889421463013, "learning_rate": 9.933038981195573e-06, "loss": 0.0349, "step": 36320 }, { "epoch": 0.29395582166841977, "grad_norm": 0.3339667320251465, "learning_rate": 9.932923760212466e-06, "loss": 0.0263, "step": 36330 }, { "epoch": 0.29403673436362165, "grad_norm": 0.9275769591331482, "learning_rate": 9.932808440852608e-06, "loss": 0.0433, "step": 36340 }, { "epoch": 0.29411764705882354, "grad_norm": 0.6142210364341736, "learning_rate": 9.932693023118299e-06, "loss": 0.0595, "step": 36350 }, { "epoch": 0.2941985597540254, "grad_norm": 0.29193878173828125, "learning_rate": 9.932577507011841e-06, "loss": 0.0355, "step": 36360 }, { "epoch": 0.2942794724492273, "grad_norm": 0.6507354378700256, "learning_rate": 9.932461892535537e-06, "loss": 0.0337, "step": 36370 }, { "epoch": 0.29436038514442914, "grad_norm": 0.944089412689209, "learning_rate": 9.932346179691694e-06, "loss": 0.0402, "step": 36380 }, { "epoch": 0.294441297839631, "grad_norm": 0.841498851776123, "learning_rate": 9.932230368482619e-06, "loss": 0.0415, "step": 36390 }, { "epoch": 0.2945222105348329, "grad_norm": 0.7477786540985107, "learning_rate": 9.932114458910622e-06, "loss": 0.0582, "step": 36400 }, { "epoch": 0.2946031232300348, "grad_norm": 0.5873681306838989, "learning_rate": 9.931998450978014e-06, "loss": 0.0329, "step": 36410 }, { "epoch": 0.2946840359252367, "grad_norm": 0.9688295722007751, "learning_rate": 9.93188234468711e-06, "loss": 0.0695, "step": 36420 }, { "epoch": 0.29476494862043856, "grad_norm": 0.533054769039154, "learning_rate": 9.931766140040222e-06, "loss": 0.0323, "step": 36430 }, { "epoch": 0.29484586131564045, "grad_norm": 0.8699128031730652, "learning_rate": 9.93164983703967e-06, "loss": 0.05, "step": 36440 }, { "epoch": 0.2949267740108423, "grad_norm": 0.642436683177948, "learning_rate": 9.931533435687775e-06, "loss": 0.039, "step": 36450 }, { "epoch": 0.29500768670604416, "grad_norm": 0.34075355529785156, "learning_rate": 9.931416935986855e-06, "loss": 0.0475, "step": 36460 }, { "epoch": 0.29508859940124604, "grad_norm": 0.5190231800079346, "learning_rate": 9.931300337939237e-06, "loss": 0.0455, "step": 36470 }, { "epoch": 0.29516951209644793, "grad_norm": 1.0752938985824585, "learning_rate": 9.931183641547241e-06, "loss": 0.0467, "step": 36480 }, { "epoch": 0.2952504247916498, "grad_norm": 0.8067091107368469, "learning_rate": 9.9310668468132e-06, "loss": 0.0553, "step": 36490 }, { "epoch": 0.2953313374868517, "grad_norm": 0.97560054063797, "learning_rate": 9.93094995373944e-06, "loss": 0.0596, "step": 36500 }, { "epoch": 0.2954122501820536, "grad_norm": 0.42076683044433594, "learning_rate": 9.930832962328291e-06, "loss": 0.0468, "step": 36510 }, { "epoch": 0.29549316287725547, "grad_norm": 0.7608758211135864, "learning_rate": 9.93071587258209e-06, "loss": 0.0382, "step": 36520 }, { "epoch": 0.2955740755724573, "grad_norm": 0.9557967782020569, "learning_rate": 9.930598684503172e-06, "loss": 0.0412, "step": 36530 }, { "epoch": 0.2956549882676592, "grad_norm": 0.5299069881439209, "learning_rate": 9.93048139809387e-06, "loss": 0.0442, "step": 36540 }, { "epoch": 0.29573590096286106, "grad_norm": 0.9032467603683472, "learning_rate": 9.930364013356524e-06, "loss": 0.0403, "step": 36550 }, { "epoch": 0.29581681365806295, "grad_norm": 0.5886718034744263, "learning_rate": 9.930246530293479e-06, "loss": 0.0402, "step": 36560 }, { "epoch": 0.29589772635326483, "grad_norm": 0.4330650269985199, "learning_rate": 9.930128948907072e-06, "loss": 0.0409, "step": 36570 }, { "epoch": 0.2959786390484667, "grad_norm": 0.8454062342643738, "learning_rate": 9.930011269199652e-06, "loss": 0.0423, "step": 36580 }, { "epoch": 0.2960595517436686, "grad_norm": 0.8622735142707825, "learning_rate": 9.929893491173566e-06, "loss": 0.0402, "step": 36590 }, { "epoch": 0.29614046443887043, "grad_norm": 0.5418763756752014, "learning_rate": 9.929775614831163e-06, "loss": 0.0381, "step": 36600 }, { "epoch": 0.2962213771340723, "grad_norm": 0.5827068090438843, "learning_rate": 9.92965764017479e-06, "loss": 0.0517, "step": 36610 }, { "epoch": 0.2963022898292742, "grad_norm": 0.7095098495483398, "learning_rate": 9.929539567206804e-06, "loss": 0.0552, "step": 36620 }, { "epoch": 0.2963832025244761, "grad_norm": 0.6064661741256714, "learning_rate": 9.929421395929557e-06, "loss": 0.0399, "step": 36630 }, { "epoch": 0.29646411521967797, "grad_norm": 0.5053990483283997, "learning_rate": 9.929303126345408e-06, "loss": 0.0384, "step": 36640 }, { "epoch": 0.29654502791487986, "grad_norm": 0.5969393849372864, "learning_rate": 9.929184758456714e-06, "loss": 0.0378, "step": 36650 }, { "epoch": 0.29662594061008174, "grad_norm": 0.7900319695472717, "learning_rate": 9.929066292265835e-06, "loss": 0.0486, "step": 36660 }, { "epoch": 0.2967068533052836, "grad_norm": 0.14704333245754242, "learning_rate": 9.928947727775134e-06, "loss": 0.0337, "step": 36670 }, { "epoch": 0.29678776600048545, "grad_norm": 0.4205383062362671, "learning_rate": 9.928829064986978e-06, "loss": 0.0304, "step": 36680 }, { "epoch": 0.29686867869568734, "grad_norm": 0.4387606978416443, "learning_rate": 9.92871030390373e-06, "loss": 0.044, "step": 36690 }, { "epoch": 0.2969495913908892, "grad_norm": 0.6270425319671631, "learning_rate": 9.928591444527762e-06, "loss": 0.0275, "step": 36700 }, { "epoch": 0.2970305040860911, "grad_norm": 0.8208450675010681, "learning_rate": 9.92847248686144e-06, "loss": 0.0598, "step": 36710 }, { "epoch": 0.297111416781293, "grad_norm": 0.586637556552887, "learning_rate": 9.928353430907139e-06, "loss": 0.0375, "step": 36720 }, { "epoch": 0.2971923294764949, "grad_norm": 0.5307767391204834, "learning_rate": 9.928234276667234e-06, "loss": 0.059, "step": 36730 }, { "epoch": 0.29727324217169676, "grad_norm": 0.6261017918586731, "learning_rate": 9.9281150241441e-06, "loss": 0.0366, "step": 36740 }, { "epoch": 0.2973541548668986, "grad_norm": 0.5044910907745361, "learning_rate": 9.927995673340115e-06, "loss": 0.0415, "step": 36750 }, { "epoch": 0.2974350675621005, "grad_norm": 0.8165953755378723, "learning_rate": 9.927876224257661e-06, "loss": 0.0407, "step": 36760 }, { "epoch": 0.29751598025730236, "grad_norm": 0.32836413383483887, "learning_rate": 9.927756676899119e-06, "loss": 0.0454, "step": 36770 }, { "epoch": 0.29759689295250424, "grad_norm": 0.5595213174819946, "learning_rate": 9.927637031266872e-06, "loss": 0.0481, "step": 36780 }, { "epoch": 0.29767780564770613, "grad_norm": 0.8140352964401245, "learning_rate": 9.927517287363306e-06, "loss": 0.046, "step": 36790 }, { "epoch": 0.297758718342908, "grad_norm": 0.054985616356134415, "learning_rate": 9.927397445190811e-06, "loss": 0.0469, "step": 36800 }, { "epoch": 0.2978396310381099, "grad_norm": 0.6142961382865906, "learning_rate": 9.927277504751778e-06, "loss": 0.047, "step": 36810 }, { "epoch": 0.2979205437333118, "grad_norm": 0.702594518661499, "learning_rate": 9.927157466048596e-06, "loss": 0.037, "step": 36820 }, { "epoch": 0.2980014564285136, "grad_norm": 0.5063331723213196, "learning_rate": 9.92703732908366e-06, "loss": 0.0496, "step": 36830 }, { "epoch": 0.2980823691237155, "grad_norm": 0.39692422747612, "learning_rate": 9.926917093859365e-06, "loss": 0.038, "step": 36840 }, { "epoch": 0.2981632818189174, "grad_norm": 0.4958197772502899, "learning_rate": 9.926796760378111e-06, "loss": 0.0548, "step": 36850 }, { "epoch": 0.29824419451411927, "grad_norm": 0.32427069544792175, "learning_rate": 9.926676328642296e-06, "loss": 0.0437, "step": 36860 }, { "epoch": 0.29832510720932115, "grad_norm": 0.5451346635818481, "learning_rate": 9.926555798654321e-06, "loss": 0.0678, "step": 36870 }, { "epoch": 0.29840601990452303, "grad_norm": 0.5512063503265381, "learning_rate": 9.926435170416593e-06, "loss": 0.0457, "step": 36880 }, { "epoch": 0.2984869325997249, "grad_norm": 0.40515586733818054, "learning_rate": 9.926314443931514e-06, "loss": 0.0386, "step": 36890 }, { "epoch": 0.29856784529492675, "grad_norm": 0.5441513657569885, "learning_rate": 9.926193619201495e-06, "loss": 0.0592, "step": 36900 }, { "epoch": 0.29864875799012863, "grad_norm": 0.677615761756897, "learning_rate": 9.926072696228943e-06, "loss": 0.049, "step": 36910 }, { "epoch": 0.2987296706853305, "grad_norm": 0.6719714403152466, "learning_rate": 9.92595167501627e-06, "loss": 0.0584, "step": 36920 }, { "epoch": 0.2988105833805324, "grad_norm": 0.43103229999542236, "learning_rate": 9.925830555565892e-06, "loss": 0.0321, "step": 36930 }, { "epoch": 0.2988914960757343, "grad_norm": 0.3287673890590668, "learning_rate": 9.92570933788022e-06, "loss": 0.0546, "step": 36940 }, { "epoch": 0.29897240877093617, "grad_norm": 0.646548867225647, "learning_rate": 9.925588021961676e-06, "loss": 0.0344, "step": 36950 }, { "epoch": 0.29905332146613806, "grad_norm": 0.41267073154449463, "learning_rate": 9.925466607812678e-06, "loss": 0.0437, "step": 36960 }, { "epoch": 0.29913423416133994, "grad_norm": 0.9445767998695374, "learning_rate": 9.925345095435644e-06, "loss": 0.0491, "step": 36970 }, { "epoch": 0.29921514685654177, "grad_norm": 0.35032305121421814, "learning_rate": 9.925223484833e-06, "loss": 0.0602, "step": 36980 }, { "epoch": 0.29929605955174365, "grad_norm": 0.9642422795295715, "learning_rate": 9.925101776007172e-06, "loss": 0.0556, "step": 36990 }, { "epoch": 0.29937697224694554, "grad_norm": 0.47270435094833374, "learning_rate": 9.924979968960587e-06, "loss": 0.0483, "step": 37000 }, { "epoch": 0.2994578849421474, "grad_norm": 0.8163424134254456, "learning_rate": 9.924858063695672e-06, "loss": 0.0417, "step": 37010 }, { "epoch": 0.2995387976373493, "grad_norm": 0.7810652852058411, "learning_rate": 9.924736060214861e-06, "loss": 0.0509, "step": 37020 }, { "epoch": 0.2996197103325512, "grad_norm": 0.8253675699234009, "learning_rate": 9.924613958520586e-06, "loss": 0.0333, "step": 37030 }, { "epoch": 0.2997006230277531, "grad_norm": 0.6676558256149292, "learning_rate": 9.924491758615283e-06, "loss": 0.0413, "step": 37040 }, { "epoch": 0.2997815357229549, "grad_norm": 0.5716888308525085, "learning_rate": 9.924369460501386e-06, "loss": 0.0498, "step": 37050 }, { "epoch": 0.2998624484181568, "grad_norm": 0.134004145860672, "learning_rate": 9.924247064181336e-06, "loss": 0.0381, "step": 37060 }, { "epoch": 0.2999433611133587, "grad_norm": 0.2651268541812897, "learning_rate": 9.924124569657575e-06, "loss": 0.0376, "step": 37070 }, { "epoch": 0.30002427380856056, "grad_norm": 0.8845474720001221, "learning_rate": 9.924001976932544e-06, "loss": 0.0653, "step": 37080 }, { "epoch": 0.30010518650376244, "grad_norm": 0.2815548777580261, "learning_rate": 9.923879286008688e-06, "loss": 0.0375, "step": 37090 }, { "epoch": 0.30018609919896433, "grad_norm": 0.2921842038631439, "learning_rate": 9.923756496888456e-06, "loss": 0.0487, "step": 37100 }, { "epoch": 0.3002670118941662, "grad_norm": 0.43540701270103455, "learning_rate": 9.923633609574294e-06, "loss": 0.0452, "step": 37110 }, { "epoch": 0.3003479245893681, "grad_norm": 0.6098528504371643, "learning_rate": 9.923510624068656e-06, "loss": 0.0548, "step": 37120 }, { "epoch": 0.3004288372845699, "grad_norm": 0.2961197793483734, "learning_rate": 9.92338754037399e-06, "loss": 0.0457, "step": 37130 }, { "epoch": 0.3005097499797718, "grad_norm": 1.0075796842575073, "learning_rate": 9.923264358492753e-06, "loss": 0.0415, "step": 37140 }, { "epoch": 0.3005906626749737, "grad_norm": 0.8345069289207458, "learning_rate": 9.923141078427403e-06, "loss": 0.048, "step": 37150 }, { "epoch": 0.3006715753701756, "grad_norm": 0.37523508071899414, "learning_rate": 9.923017700180397e-06, "loss": 0.0257, "step": 37160 }, { "epoch": 0.30075248806537747, "grad_norm": 0.5167249441146851, "learning_rate": 9.922894223754197e-06, "loss": 0.0399, "step": 37170 }, { "epoch": 0.30083340076057935, "grad_norm": 0.5995023846626282, "learning_rate": 9.922770649151264e-06, "loss": 0.0445, "step": 37180 }, { "epoch": 0.30091431345578123, "grad_norm": 0.6241310238838196, "learning_rate": 9.922646976374061e-06, "loss": 0.0281, "step": 37190 }, { "epoch": 0.30099522615098306, "grad_norm": 0.7599341869354248, "learning_rate": 9.92252320542506e-06, "loss": 0.0471, "step": 37200 }, { "epoch": 0.30107613884618495, "grad_norm": 0.7978308796882629, "learning_rate": 9.922399336306721e-06, "loss": 0.0454, "step": 37210 }, { "epoch": 0.30115705154138683, "grad_norm": 0.6133334040641785, "learning_rate": 9.922275369021521e-06, "loss": 0.0558, "step": 37220 }, { "epoch": 0.3012379642365887, "grad_norm": 0.730751097202301, "learning_rate": 9.922151303571932e-06, "loss": 0.0348, "step": 37230 }, { "epoch": 0.3013188769317906, "grad_norm": 0.44179439544677734, "learning_rate": 9.922027139960424e-06, "loss": 0.0584, "step": 37240 }, { "epoch": 0.3013997896269925, "grad_norm": 1.544996976852417, "learning_rate": 9.921902878189475e-06, "loss": 0.0399, "step": 37250 }, { "epoch": 0.30148070232219437, "grad_norm": 0.5212435126304626, "learning_rate": 9.921778518261566e-06, "loss": 0.0409, "step": 37260 }, { "epoch": 0.30156161501739626, "grad_norm": 0.7423763871192932, "learning_rate": 9.921654060179173e-06, "loss": 0.0374, "step": 37270 }, { "epoch": 0.3016425277125981, "grad_norm": 0.6239858865737915, "learning_rate": 9.921529503944782e-06, "loss": 0.064, "step": 37280 }, { "epoch": 0.30172344040779997, "grad_norm": 0.6680070757865906, "learning_rate": 9.921404849560872e-06, "loss": 0.0385, "step": 37290 }, { "epoch": 0.30180435310300185, "grad_norm": 0.5998671054840088, "learning_rate": 9.921280097029934e-06, "loss": 0.0546, "step": 37300 }, { "epoch": 0.30188526579820374, "grad_norm": 0.597098171710968, "learning_rate": 9.921155246354453e-06, "loss": 0.055, "step": 37310 }, { "epoch": 0.3019661784934056, "grad_norm": 0.464300274848938, "learning_rate": 9.92103029753692e-06, "loss": 0.0392, "step": 37320 }, { "epoch": 0.3020470911886075, "grad_norm": 0.5378941893577576, "learning_rate": 9.920905250579826e-06, "loss": 0.0654, "step": 37330 }, { "epoch": 0.3021280038838094, "grad_norm": 0.8177592754364014, "learning_rate": 9.920780105485665e-06, "loss": 0.0433, "step": 37340 }, { "epoch": 0.3022089165790112, "grad_norm": 0.7968350052833557, "learning_rate": 9.920654862256934e-06, "loss": 0.0395, "step": 37350 }, { "epoch": 0.3022898292742131, "grad_norm": 0.6261546611785889, "learning_rate": 9.920529520896127e-06, "loss": 0.0315, "step": 37360 }, { "epoch": 0.302370741969415, "grad_norm": 0.4665789008140564, "learning_rate": 9.92040408140575e-06, "loss": 0.0614, "step": 37370 }, { "epoch": 0.3024516546646169, "grad_norm": 1.089599609375, "learning_rate": 9.9202785437883e-06, "loss": 0.0487, "step": 37380 }, { "epoch": 0.30253256735981876, "grad_norm": 1.189408302307129, "learning_rate": 9.92015290804628e-06, "loss": 0.0494, "step": 37390 }, { "epoch": 0.30261348005502064, "grad_norm": 0.49669307470321655, "learning_rate": 9.920027174182199e-06, "loss": 0.049, "step": 37400 }, { "epoch": 0.30269439275022253, "grad_norm": 0.7398015260696411, "learning_rate": 9.919901342198562e-06, "loss": 0.0316, "step": 37410 }, { "epoch": 0.3027753054454244, "grad_norm": 0.8727648258209229, "learning_rate": 9.919775412097878e-06, "loss": 0.0501, "step": 37420 }, { "epoch": 0.30285621814062624, "grad_norm": 0.3396146595478058, "learning_rate": 9.919649383882662e-06, "loss": 0.0416, "step": 37430 }, { "epoch": 0.30293713083582813, "grad_norm": 0.5498019456863403, "learning_rate": 9.919523257555423e-06, "loss": 0.0364, "step": 37440 }, { "epoch": 0.30301804353103, "grad_norm": 0.24412912130355835, "learning_rate": 9.919397033118679e-06, "loss": 0.0405, "step": 37450 }, { "epoch": 0.3030989562262319, "grad_norm": 0.9034860134124756, "learning_rate": 9.919270710574945e-06, "loss": 0.0469, "step": 37460 }, { "epoch": 0.3031798689214338, "grad_norm": 0.9248724579811096, "learning_rate": 9.919144289926743e-06, "loss": 0.045, "step": 37470 }, { "epoch": 0.30326078161663567, "grad_norm": 0.35591456294059753, "learning_rate": 9.919017771176594e-06, "loss": 0.045, "step": 37480 }, { "epoch": 0.30334169431183755, "grad_norm": 0.7014693021774292, "learning_rate": 9.918891154327017e-06, "loss": 0.0481, "step": 37490 }, { "epoch": 0.3034226070070394, "grad_norm": 0.86307293176651, "learning_rate": 9.918764439380542e-06, "loss": 0.048, "step": 37500 }, { "epoch": 0.30350351970224126, "grad_norm": 0.3451574146747589, "learning_rate": 9.918637626339695e-06, "loss": 0.0489, "step": 37510 }, { "epoch": 0.30358443239744315, "grad_norm": 0.4913332164287567, "learning_rate": 9.918510715207003e-06, "loss": 0.0386, "step": 37520 }, { "epoch": 0.30366534509264503, "grad_norm": 0.42668840289115906, "learning_rate": 9.918383705984999e-06, "loss": 0.0359, "step": 37530 }, { "epoch": 0.3037462577878469, "grad_norm": 0.5714001059532166, "learning_rate": 9.918256598676215e-06, "loss": 0.0517, "step": 37540 }, { "epoch": 0.3038271704830488, "grad_norm": 0.8429690599441528, "learning_rate": 9.918129393283184e-06, "loss": 0.0414, "step": 37550 }, { "epoch": 0.3039080831782507, "grad_norm": 0.4923784136772156, "learning_rate": 9.918002089808448e-06, "loss": 0.0308, "step": 37560 }, { "epoch": 0.30398899587345257, "grad_norm": 0.47016528248786926, "learning_rate": 9.917874688254542e-06, "loss": 0.036, "step": 37570 }, { "epoch": 0.3040699085686544, "grad_norm": 1.0034348964691162, "learning_rate": 9.917747188624005e-06, "loss": 0.0574, "step": 37580 }, { "epoch": 0.3041508212638563, "grad_norm": 0.4974652826786041, "learning_rate": 9.917619590919385e-06, "loss": 0.0273, "step": 37590 }, { "epoch": 0.30423173395905817, "grad_norm": 0.9055899977684021, "learning_rate": 9.917491895143223e-06, "loss": 0.0465, "step": 37600 }, { "epoch": 0.30431264665426006, "grad_norm": 0.3428325951099396, "learning_rate": 9.917364101298065e-06, "loss": 0.035, "step": 37610 }, { "epoch": 0.30439355934946194, "grad_norm": 0.6720362901687622, "learning_rate": 9.917236209386464e-06, "loss": 0.0351, "step": 37620 }, { "epoch": 0.3044744720446638, "grad_norm": 0.9684658646583557, "learning_rate": 9.917108219410967e-06, "loss": 0.061, "step": 37630 }, { "epoch": 0.3045553847398657, "grad_norm": 0.47096866369247437, "learning_rate": 9.916980131374124e-06, "loss": 0.0443, "step": 37640 }, { "epoch": 0.30463629743506754, "grad_norm": 0.32882100343704224, "learning_rate": 9.916851945278494e-06, "loss": 0.0575, "step": 37650 }, { "epoch": 0.3047172101302694, "grad_norm": 0.42990100383758545, "learning_rate": 9.916723661126633e-06, "loss": 0.0429, "step": 37660 }, { "epoch": 0.3047981228254713, "grad_norm": 0.8355609178543091, "learning_rate": 9.916595278921098e-06, "loss": 0.0445, "step": 37670 }, { "epoch": 0.3048790355206732, "grad_norm": 0.3448275029659271, "learning_rate": 9.916466798664448e-06, "loss": 0.0608, "step": 37680 }, { "epoch": 0.3049599482158751, "grad_norm": 0.9264644980430603, "learning_rate": 9.916338220359249e-06, "loss": 0.0417, "step": 37690 }, { "epoch": 0.30504086091107696, "grad_norm": 0.5341200828552246, "learning_rate": 9.916209544008062e-06, "loss": 0.0423, "step": 37700 }, { "epoch": 0.30512177360627885, "grad_norm": 0.3051183521747589, "learning_rate": 9.916080769613453e-06, "loss": 0.0377, "step": 37710 }, { "epoch": 0.3052026863014807, "grad_norm": 0.8539044260978699, "learning_rate": 9.915951897177993e-06, "loss": 0.0424, "step": 37720 }, { "epoch": 0.30528359899668256, "grad_norm": 0.6466666460037231, "learning_rate": 9.91582292670425e-06, "loss": 0.0456, "step": 37730 }, { "epoch": 0.30536451169188444, "grad_norm": 0.5366626381874084, "learning_rate": 9.915693858194797e-06, "loss": 0.0383, "step": 37740 }, { "epoch": 0.30544542438708633, "grad_norm": 0.4355996549129486, "learning_rate": 9.915564691652206e-06, "loss": 0.046, "step": 37750 }, { "epoch": 0.3055263370822882, "grad_norm": 1.0590591430664062, "learning_rate": 9.915435427079056e-06, "loss": 0.0377, "step": 37760 }, { "epoch": 0.3056072497774901, "grad_norm": 0.18877847492694855, "learning_rate": 9.915306064477923e-06, "loss": 0.0312, "step": 37770 }, { "epoch": 0.305688162472692, "grad_norm": 0.2807941138744354, "learning_rate": 9.915176603851386e-06, "loss": 0.0572, "step": 37780 }, { "epoch": 0.30576907516789387, "grad_norm": 0.5933060050010681, "learning_rate": 9.91504704520203e-06, "loss": 0.0461, "step": 37790 }, { "epoch": 0.3058499878630957, "grad_norm": 0.9993179440498352, "learning_rate": 9.914917388532434e-06, "loss": 0.0465, "step": 37800 }, { "epoch": 0.3059309005582976, "grad_norm": 0.5981796979904175, "learning_rate": 9.914787633845188e-06, "loss": 0.0503, "step": 37810 }, { "epoch": 0.30601181325349947, "grad_norm": 0.42559826374053955, "learning_rate": 9.91465778114288e-06, "loss": 0.0327, "step": 37820 }, { "epoch": 0.30609272594870135, "grad_norm": 0.5355978608131409, "learning_rate": 9.914527830428096e-06, "loss": 0.0395, "step": 37830 }, { "epoch": 0.30617363864390323, "grad_norm": 0.4431266188621521, "learning_rate": 9.91439778170343e-06, "loss": 0.0417, "step": 37840 }, { "epoch": 0.3062545513391051, "grad_norm": 0.3008437156677246, "learning_rate": 9.914267634971475e-06, "loss": 0.0398, "step": 37850 }, { "epoch": 0.306335464034307, "grad_norm": 0.7592195868492126, "learning_rate": 9.914137390234826e-06, "loss": 0.0485, "step": 37860 }, { "epoch": 0.30641637672950883, "grad_norm": 0.4062443673610687, "learning_rate": 9.914007047496081e-06, "loss": 0.0389, "step": 37870 }, { "epoch": 0.3064972894247107, "grad_norm": 0.6389790773391724, "learning_rate": 9.91387660675784e-06, "loss": 0.0437, "step": 37880 }, { "epoch": 0.3065782021199126, "grad_norm": 0.643639087677002, "learning_rate": 9.913746068022702e-06, "loss": 0.0615, "step": 37890 }, { "epoch": 0.3066591148151145, "grad_norm": 0.4620574414730072, "learning_rate": 9.913615431293275e-06, "loss": 0.0516, "step": 37900 }, { "epoch": 0.30674002751031637, "grad_norm": 0.4791950285434723, "learning_rate": 9.91348469657216e-06, "loss": 0.0416, "step": 37910 }, { "epoch": 0.30682094020551826, "grad_norm": 0.21271908283233643, "learning_rate": 9.913353863861964e-06, "loss": 0.0286, "step": 37920 }, { "epoch": 0.30690185290072014, "grad_norm": 1.2838218212127686, "learning_rate": 9.913222933165299e-06, "loss": 0.0478, "step": 37930 }, { "epoch": 0.306982765595922, "grad_norm": 0.43212011456489563, "learning_rate": 9.913091904484775e-06, "loss": 0.0536, "step": 37940 }, { "epoch": 0.30706367829112385, "grad_norm": 0.5229455232620239, "learning_rate": 9.912960777823005e-06, "loss": 0.0315, "step": 37950 }, { "epoch": 0.30714459098632574, "grad_norm": 0.2736147940158844, "learning_rate": 9.912829553182604e-06, "loss": 0.0344, "step": 37960 }, { "epoch": 0.3072255036815276, "grad_norm": 0.6025230288505554, "learning_rate": 9.912698230566189e-06, "loss": 0.0538, "step": 37970 }, { "epoch": 0.3073064163767295, "grad_norm": 0.3649638593196869, "learning_rate": 9.912566809976378e-06, "loss": 0.0468, "step": 37980 }, { "epoch": 0.3073873290719314, "grad_norm": 0.72455894947052, "learning_rate": 9.912435291415794e-06, "loss": 0.0453, "step": 37990 }, { "epoch": 0.3074682417671333, "grad_norm": 0.5060855150222778, "learning_rate": 9.912303674887058e-06, "loss": 0.0505, "step": 38000 }, { "epoch": 0.30754915446233516, "grad_norm": 0.40827199816703796, "learning_rate": 9.912171960392797e-06, "loss": 0.0545, "step": 38010 }, { "epoch": 0.307630067157537, "grad_norm": 0.3590870201587677, "learning_rate": 9.912040147935636e-06, "loss": 0.0414, "step": 38020 }, { "epoch": 0.3077109798527389, "grad_norm": 0.8970696926116943, "learning_rate": 9.911908237518202e-06, "loss": 0.0497, "step": 38030 }, { "epoch": 0.30779189254794076, "grad_norm": 0.49391964077949524, "learning_rate": 9.911776229143129e-06, "loss": 0.0473, "step": 38040 }, { "epoch": 0.30787280524314264, "grad_norm": 0.6617782115936279, "learning_rate": 9.911644122813049e-06, "loss": 0.0419, "step": 38050 }, { "epoch": 0.30795371793834453, "grad_norm": 0.3396761119365692, "learning_rate": 9.911511918530593e-06, "loss": 0.0467, "step": 38060 }, { "epoch": 0.3080346306335464, "grad_norm": 0.5228695869445801, "learning_rate": 9.911379616298405e-06, "loss": 0.048, "step": 38070 }, { "epoch": 0.3081155433287483, "grad_norm": 0.7309395670890808, "learning_rate": 9.911247216119116e-06, "loss": 0.0478, "step": 38080 }, { "epoch": 0.3081964560239502, "grad_norm": 0.6927696466445923, "learning_rate": 9.911114717995371e-06, "loss": 0.0494, "step": 38090 }, { "epoch": 0.308277368719152, "grad_norm": 0.5900598764419556, "learning_rate": 9.910982121929808e-06, "loss": 0.0394, "step": 38100 }, { "epoch": 0.3083582814143539, "grad_norm": 0.5040236115455627, "learning_rate": 9.910849427925076e-06, "loss": 0.0504, "step": 38110 }, { "epoch": 0.3084391941095558, "grad_norm": 0.4603015184402466, "learning_rate": 9.91071663598382e-06, "loss": 0.0317, "step": 38120 }, { "epoch": 0.30852010680475767, "grad_norm": 0.5121473670005798, "learning_rate": 9.910583746108687e-06, "loss": 0.0426, "step": 38130 }, { "epoch": 0.30860101949995955, "grad_norm": 0.3605692386627197, "learning_rate": 9.910450758302329e-06, "loss": 0.037, "step": 38140 }, { "epoch": 0.30868193219516143, "grad_norm": 0.6665425896644592, "learning_rate": 9.910317672567395e-06, "loss": 0.0421, "step": 38150 }, { "epoch": 0.3087628448903633, "grad_norm": 0.7344208359718323, "learning_rate": 9.910184488906543e-06, "loss": 0.0449, "step": 38160 }, { "epoch": 0.30884375758556515, "grad_norm": 0.7597429752349854, "learning_rate": 9.910051207322425e-06, "loss": 0.0665, "step": 38170 }, { "epoch": 0.30892467028076703, "grad_norm": 0.44419172406196594, "learning_rate": 9.909917827817703e-06, "loss": 0.0473, "step": 38180 }, { "epoch": 0.3090055829759689, "grad_norm": 0.601816713809967, "learning_rate": 9.909784350395036e-06, "loss": 0.0287, "step": 38190 }, { "epoch": 0.3090864956711708, "grad_norm": 0.5576767325401306, "learning_rate": 9.909650775057082e-06, "loss": 0.036, "step": 38200 }, { "epoch": 0.3091674083663727, "grad_norm": 0.6084870100021362, "learning_rate": 9.90951710180651e-06, "loss": 0.0419, "step": 38210 }, { "epoch": 0.30924832106157457, "grad_norm": 0.5991477370262146, "learning_rate": 9.909383330645984e-06, "loss": 0.047, "step": 38220 }, { "epoch": 0.30932923375677646, "grad_norm": 0.9331143498420715, "learning_rate": 9.909249461578172e-06, "loss": 0.0452, "step": 38230 }, { "epoch": 0.30941014645197834, "grad_norm": 0.6872377395629883, "learning_rate": 9.909115494605742e-06, "loss": 0.0409, "step": 38240 }, { "epoch": 0.30949105914718017, "grad_norm": 0.6286384463310242, "learning_rate": 9.908981429731369e-06, "loss": 0.04, "step": 38250 }, { "epoch": 0.30957197184238205, "grad_norm": 0.3842623829841614, "learning_rate": 9.908847266957723e-06, "loss": 0.0381, "step": 38260 }, { "epoch": 0.30965288453758394, "grad_norm": 0.6033273339271545, "learning_rate": 9.908713006287481e-06, "loss": 0.0466, "step": 38270 }, { "epoch": 0.3097337972327858, "grad_norm": 0.5780929327011108, "learning_rate": 9.908578647723323e-06, "loss": 0.0382, "step": 38280 }, { "epoch": 0.3098147099279877, "grad_norm": 0.5665083527565002, "learning_rate": 9.908444191267926e-06, "loss": 0.0313, "step": 38290 }, { "epoch": 0.3098956226231896, "grad_norm": 0.6199010014533997, "learning_rate": 9.90830963692397e-06, "loss": 0.0445, "step": 38300 }, { "epoch": 0.3099765353183915, "grad_norm": 0.4746222198009491, "learning_rate": 9.908174984694141e-06, "loss": 0.0289, "step": 38310 }, { "epoch": 0.3100574480135933, "grad_norm": 0.5516718029975891, "learning_rate": 9.908040234581123e-06, "loss": 0.0563, "step": 38320 }, { "epoch": 0.3101383607087952, "grad_norm": 0.9563983082771301, "learning_rate": 9.907905386587605e-06, "loss": 0.0601, "step": 38330 }, { "epoch": 0.3102192734039971, "grad_norm": 0.3975597620010376, "learning_rate": 9.907770440716274e-06, "loss": 0.0412, "step": 38340 }, { "epoch": 0.31030018609919896, "grad_norm": 0.4559800326824188, "learning_rate": 9.907635396969823e-06, "loss": 0.028, "step": 38350 }, { "epoch": 0.31038109879440084, "grad_norm": 0.7817906141281128, "learning_rate": 9.907500255350943e-06, "loss": 0.0505, "step": 38360 }, { "epoch": 0.31046201148960273, "grad_norm": 0.4652835428714752, "learning_rate": 9.907365015862334e-06, "loss": 0.048, "step": 38370 }, { "epoch": 0.3105429241848046, "grad_norm": 0.4103715717792511, "learning_rate": 9.907229678506688e-06, "loss": 0.042, "step": 38380 }, { "epoch": 0.3106238368800065, "grad_norm": 0.2847437560558319, "learning_rate": 9.907094243286703e-06, "loss": 0.033, "step": 38390 }, { "epoch": 0.3107047495752083, "grad_norm": 0.5509154796600342, "learning_rate": 9.906958710205085e-06, "loss": 0.0309, "step": 38400 }, { "epoch": 0.3107856622704102, "grad_norm": 0.7994277477264404, "learning_rate": 9.906823079264536e-06, "loss": 0.0336, "step": 38410 }, { "epoch": 0.3108665749656121, "grad_norm": 0.694584846496582, "learning_rate": 9.906687350467757e-06, "loss": 0.0402, "step": 38420 }, { "epoch": 0.310947487660814, "grad_norm": 0.2429555356502533, "learning_rate": 9.906551523817459e-06, "loss": 0.0582, "step": 38430 }, { "epoch": 0.31102840035601587, "grad_norm": 0.6701483130455017, "learning_rate": 9.906415599316347e-06, "loss": 0.0516, "step": 38440 }, { "epoch": 0.31110931305121775, "grad_norm": 1.3937578201293945, "learning_rate": 9.906279576967135e-06, "loss": 0.0454, "step": 38450 }, { "epoch": 0.31119022574641964, "grad_norm": 2.0212624073028564, "learning_rate": 9.906143456772536e-06, "loss": 0.0552, "step": 38460 }, { "epoch": 0.31127113844162146, "grad_norm": 0.7775483727455139, "learning_rate": 9.906007238735261e-06, "loss": 0.0359, "step": 38470 }, { "epoch": 0.31135205113682335, "grad_norm": 0.4782106876373291, "learning_rate": 9.90587092285803e-06, "loss": 0.0617, "step": 38480 }, { "epoch": 0.31143296383202523, "grad_norm": 0.5153420567512512, "learning_rate": 9.905734509143558e-06, "loss": 0.0338, "step": 38490 }, { "epoch": 0.3115138765272271, "grad_norm": 0.5559794306755066, "learning_rate": 9.905597997594569e-06, "loss": 0.069, "step": 38500 }, { "epoch": 0.311594789222429, "grad_norm": 0.1831573247909546, "learning_rate": 9.905461388213786e-06, "loss": 0.0546, "step": 38510 }, { "epoch": 0.3116757019176309, "grad_norm": 0.254120796918869, "learning_rate": 9.90532468100393e-06, "loss": 0.0341, "step": 38520 }, { "epoch": 0.31175661461283277, "grad_norm": 0.6516970992088318, "learning_rate": 9.905187875967727e-06, "loss": 0.0471, "step": 38530 }, { "epoch": 0.31183752730803466, "grad_norm": 0.6375731825828552, "learning_rate": 9.905050973107909e-06, "loss": 0.0461, "step": 38540 }, { "epoch": 0.3119184400032365, "grad_norm": 0.3005201518535614, "learning_rate": 9.904913972427203e-06, "loss": 0.042, "step": 38550 }, { "epoch": 0.31199935269843837, "grad_norm": 0.4618741571903229, "learning_rate": 9.904776873928345e-06, "loss": 0.0532, "step": 38560 }, { "epoch": 0.31208026539364025, "grad_norm": 0.7141546010971069, "learning_rate": 9.904639677614066e-06, "loss": 0.0523, "step": 38570 }, { "epoch": 0.31216117808884214, "grad_norm": 0.6243036985397339, "learning_rate": 9.904502383487101e-06, "loss": 0.031, "step": 38580 }, { "epoch": 0.312242090784044, "grad_norm": 0.5749370455741882, "learning_rate": 9.904364991550191e-06, "loss": 0.0268, "step": 38590 }, { "epoch": 0.3123230034792459, "grad_norm": 0.44633057713508606, "learning_rate": 9.904227501806075e-06, "loss": 0.0342, "step": 38600 }, { "epoch": 0.3124039161744478, "grad_norm": 0.5774006843566895, "learning_rate": 9.904089914257496e-06, "loss": 0.0527, "step": 38610 }, { "epoch": 0.3124848288696496, "grad_norm": 0.18293724954128265, "learning_rate": 9.903952228907194e-06, "loss": 0.0416, "step": 38620 }, { "epoch": 0.3125657415648515, "grad_norm": 0.5624895095825195, "learning_rate": 9.903814445757917e-06, "loss": 0.0551, "step": 38630 }, { "epoch": 0.3126466542600534, "grad_norm": 0.8815599083900452, "learning_rate": 9.903676564812417e-06, "loss": 0.0468, "step": 38640 }, { "epoch": 0.3127275669552553, "grad_norm": 0.5919637084007263, "learning_rate": 9.903538586073438e-06, "loss": 0.0367, "step": 38650 }, { "epoch": 0.31280847965045716, "grad_norm": 0.7782110571861267, "learning_rate": 9.903400509543732e-06, "loss": 0.045, "step": 38660 }, { "epoch": 0.31288939234565905, "grad_norm": 1.1445097923278809, "learning_rate": 9.903262335226057e-06, "loss": 0.0562, "step": 38670 }, { "epoch": 0.31297030504086093, "grad_norm": 0.8704662919044495, "learning_rate": 9.903124063123164e-06, "loss": 0.0576, "step": 38680 }, { "epoch": 0.3130512177360628, "grad_norm": 0.8216575980186462, "learning_rate": 9.902985693237814e-06, "loss": 0.0383, "step": 38690 }, { "epoch": 0.31313213043126464, "grad_norm": 0.4330049455165863, "learning_rate": 9.902847225572764e-06, "loss": 0.0322, "step": 38700 }, { "epoch": 0.31321304312646653, "grad_norm": 0.6314824819564819, "learning_rate": 9.902708660130778e-06, "loss": 0.0523, "step": 38710 }, { "epoch": 0.3132939558216684, "grad_norm": 0.37622401118278503, "learning_rate": 9.902569996914617e-06, "loss": 0.04, "step": 38720 }, { "epoch": 0.3133748685168703, "grad_norm": 0.7991551160812378, "learning_rate": 9.902431235927048e-06, "loss": 0.036, "step": 38730 }, { "epoch": 0.3134557812120722, "grad_norm": 0.3854965269565582, "learning_rate": 9.902292377170835e-06, "loss": 0.0563, "step": 38740 }, { "epoch": 0.31353669390727407, "grad_norm": 0.3419082760810852, "learning_rate": 9.902153420648752e-06, "loss": 0.0324, "step": 38750 }, { "epoch": 0.31361760660247595, "grad_norm": 0.8418822884559631, "learning_rate": 9.902014366363569e-06, "loss": 0.0558, "step": 38760 }, { "epoch": 0.3136985192976778, "grad_norm": 0.9351010918617249, "learning_rate": 9.901875214318056e-06, "loss": 0.0474, "step": 38770 }, { "epoch": 0.31377943199287966, "grad_norm": 0.3616797626018524, "learning_rate": 9.901735964514988e-06, "loss": 0.0451, "step": 38780 }, { "epoch": 0.31386034468808155, "grad_norm": 0.45745351910591125, "learning_rate": 9.901596616957148e-06, "loss": 0.0444, "step": 38790 }, { "epoch": 0.31394125738328343, "grad_norm": 0.48653531074523926, "learning_rate": 9.90145717164731e-06, "loss": 0.0422, "step": 38800 }, { "epoch": 0.3140221700784853, "grad_norm": 0.31048092246055603, "learning_rate": 9.901317628588255e-06, "loss": 0.0344, "step": 38810 }, { "epoch": 0.3141030827736872, "grad_norm": 0.45126792788505554, "learning_rate": 9.901177987782769e-06, "loss": 0.0401, "step": 38820 }, { "epoch": 0.3141839954688891, "grad_norm": 0.551836371421814, "learning_rate": 9.901038249233634e-06, "loss": 0.0469, "step": 38830 }, { "epoch": 0.314264908164091, "grad_norm": 0.6516358256340027, "learning_rate": 9.900898412943639e-06, "loss": 0.0413, "step": 38840 }, { "epoch": 0.3143458208592928, "grad_norm": 1.3171261548995972, "learning_rate": 9.90075847891557e-06, "loss": 0.0606, "step": 38850 }, { "epoch": 0.3144267335544947, "grad_norm": 0.31672051548957825, "learning_rate": 9.900618447152217e-06, "loss": 0.0293, "step": 38860 }, { "epoch": 0.31450764624969657, "grad_norm": 0.4186655580997467, "learning_rate": 9.900478317656377e-06, "loss": 0.0376, "step": 38870 }, { "epoch": 0.31458855894489846, "grad_norm": 0.8246281147003174, "learning_rate": 9.900338090430843e-06, "loss": 0.0393, "step": 38880 }, { "epoch": 0.31466947164010034, "grad_norm": 0.2549271881580353, "learning_rate": 9.90019776547841e-06, "loss": 0.0535, "step": 38890 }, { "epoch": 0.3147503843353022, "grad_norm": 0.9462697505950928, "learning_rate": 9.900057342801877e-06, "loss": 0.0538, "step": 38900 }, { "epoch": 0.3148312970305041, "grad_norm": 0.7540861964225769, "learning_rate": 9.899916822404045e-06, "loss": 0.0382, "step": 38910 }, { "epoch": 0.31491220972570594, "grad_norm": 0.4565577805042267, "learning_rate": 9.899776204287717e-06, "loss": 0.0392, "step": 38920 }, { "epoch": 0.3149931224209078, "grad_norm": 0.6213564276695251, "learning_rate": 9.899635488455696e-06, "loss": 0.0459, "step": 38930 }, { "epoch": 0.3150740351161097, "grad_norm": 0.7722400426864624, "learning_rate": 9.899494674910788e-06, "loss": 0.0343, "step": 38940 }, { "epoch": 0.3151549478113116, "grad_norm": 0.5028978586196899, "learning_rate": 9.899353763655803e-06, "loss": 0.0504, "step": 38950 }, { "epoch": 0.3152358605065135, "grad_norm": 0.24274426698684692, "learning_rate": 9.89921275469355e-06, "loss": 0.0228, "step": 38960 }, { "epoch": 0.31531677320171536, "grad_norm": 0.5598925948143005, "learning_rate": 9.899071648026841e-06, "loss": 0.0384, "step": 38970 }, { "epoch": 0.31539768589691725, "grad_norm": 0.476571261882782, "learning_rate": 9.89893044365849e-06, "loss": 0.0375, "step": 38980 }, { "epoch": 0.31547859859211913, "grad_norm": 0.4979991316795349, "learning_rate": 9.898789141591314e-06, "loss": 0.0317, "step": 38990 }, { "epoch": 0.31555951128732096, "grad_norm": 0.7786171436309814, "learning_rate": 9.89864774182813e-06, "loss": 0.0383, "step": 39000 }, { "epoch": 0.31564042398252284, "grad_norm": 0.7317286133766174, "learning_rate": 9.89850624437176e-06, "loss": 0.0508, "step": 39010 }, { "epoch": 0.31572133667772473, "grad_norm": 0.9893482327461243, "learning_rate": 9.898364649225021e-06, "loss": 0.0601, "step": 39020 }, { "epoch": 0.3158022493729266, "grad_norm": 0.4540404677391052, "learning_rate": 9.898222956390742e-06, "loss": 0.0361, "step": 39030 }, { "epoch": 0.3158831620681285, "grad_norm": 0.986650288105011, "learning_rate": 9.898081165871747e-06, "loss": 0.0523, "step": 39040 }, { "epoch": 0.3159640747633304, "grad_norm": 0.6731560826301575, "learning_rate": 9.897939277670863e-06, "loss": 0.0403, "step": 39050 }, { "epoch": 0.31604498745853227, "grad_norm": 0.5727750062942505, "learning_rate": 9.89779729179092e-06, "loss": 0.0394, "step": 39060 }, { "epoch": 0.3161259001537341, "grad_norm": 0.304558664560318, "learning_rate": 9.89765520823475e-06, "loss": 0.0261, "step": 39070 }, { "epoch": 0.316206812848936, "grad_norm": 0.5833306908607483, "learning_rate": 9.897513027005186e-06, "loss": 0.0312, "step": 39080 }, { "epoch": 0.31628772554413787, "grad_norm": 0.4927996098995209, "learning_rate": 9.897370748105063e-06, "loss": 0.0342, "step": 39090 }, { "epoch": 0.31636863823933975, "grad_norm": 0.7439596056938171, "learning_rate": 9.89722837153722e-06, "loss": 0.0708, "step": 39100 }, { "epoch": 0.31644955093454163, "grad_norm": 0.6305651664733887, "learning_rate": 9.897085897304494e-06, "loss": 0.0332, "step": 39110 }, { "epoch": 0.3165304636297435, "grad_norm": 0.6492354869842529, "learning_rate": 9.89694332540973e-06, "loss": 0.0524, "step": 39120 }, { "epoch": 0.3166113763249454, "grad_norm": 0.44672131538391113, "learning_rate": 9.89680065585577e-06, "loss": 0.0404, "step": 39130 }, { "epoch": 0.3166922890201473, "grad_norm": 0.7580567002296448, "learning_rate": 9.896657888645457e-06, "loss": 0.0343, "step": 39140 }, { "epoch": 0.3167732017153491, "grad_norm": 0.5937213897705078, "learning_rate": 9.896515023781641e-06, "loss": 0.0536, "step": 39150 }, { "epoch": 0.316854114410551, "grad_norm": 1.0438679456710815, "learning_rate": 9.89637206126717e-06, "loss": 0.0503, "step": 39160 }, { "epoch": 0.3169350271057529, "grad_norm": 0.5459036231040955, "learning_rate": 9.896229001104893e-06, "loss": 0.04, "step": 39170 }, { "epoch": 0.31701593980095477, "grad_norm": 4.011713981628418, "learning_rate": 9.896085843297665e-06, "loss": 0.0318, "step": 39180 }, { "epoch": 0.31709685249615666, "grad_norm": 0.7366316914558411, "learning_rate": 9.895942587848342e-06, "loss": 0.0392, "step": 39190 }, { "epoch": 0.31717776519135854, "grad_norm": 0.42743971943855286, "learning_rate": 9.895799234759779e-06, "loss": 0.03, "step": 39200 }, { "epoch": 0.3172586778865604, "grad_norm": 0.3031768500804901, "learning_rate": 9.895655784034837e-06, "loss": 0.0624, "step": 39210 }, { "epoch": 0.31733959058176225, "grad_norm": 0.6527929306030273, "learning_rate": 9.895512235676375e-06, "loss": 0.0455, "step": 39220 }, { "epoch": 0.31742050327696414, "grad_norm": 0.4207584261894226, "learning_rate": 9.895368589687258e-06, "loss": 0.0317, "step": 39230 }, { "epoch": 0.317501415972166, "grad_norm": 0.6492999196052551, "learning_rate": 9.895224846070346e-06, "loss": 0.0305, "step": 39240 }, { "epoch": 0.3175823286673679, "grad_norm": 0.40254834294319153, "learning_rate": 9.89508100482851e-06, "loss": 0.0438, "step": 39250 }, { "epoch": 0.3176632413625698, "grad_norm": 0.594630241394043, "learning_rate": 9.894937065964619e-06, "loss": 0.0483, "step": 39260 }, { "epoch": 0.3177441540577717, "grad_norm": 0.7099326848983765, "learning_rate": 9.89479302948154e-06, "loss": 0.04, "step": 39270 }, { "epoch": 0.31782506675297356, "grad_norm": 0.578923225402832, "learning_rate": 9.894648895382147e-06, "loss": 0.0304, "step": 39280 }, { "epoch": 0.31790597944817545, "grad_norm": 0.4275219440460205, "learning_rate": 9.894504663669316e-06, "loss": 0.053, "step": 39290 }, { "epoch": 0.3179868921433773, "grad_norm": 0.5638468265533447, "learning_rate": 9.894360334345922e-06, "loss": 0.0361, "step": 39300 }, { "epoch": 0.31806780483857916, "grad_norm": 0.5323454141616821, "learning_rate": 9.894215907414843e-06, "loss": 0.0362, "step": 39310 }, { "epoch": 0.31814871753378104, "grad_norm": 1.9591212272644043, "learning_rate": 9.89407138287896e-06, "loss": 0.0529, "step": 39320 }, { "epoch": 0.31822963022898293, "grad_norm": 0.17551766335964203, "learning_rate": 9.893926760741155e-06, "loss": 0.0366, "step": 39330 }, { "epoch": 0.3183105429241848, "grad_norm": 0.763383686542511, "learning_rate": 9.893782041004314e-06, "loss": 0.0341, "step": 39340 }, { "epoch": 0.3183914556193867, "grad_norm": 0.5770947337150574, "learning_rate": 9.893637223671319e-06, "loss": 0.0511, "step": 39350 }, { "epoch": 0.3184723683145886, "grad_norm": 0.5624952912330627, "learning_rate": 9.893492308745064e-06, "loss": 0.0492, "step": 39360 }, { "epoch": 0.3185532810097904, "grad_norm": 0.7563048601150513, "learning_rate": 9.893347296228432e-06, "loss": 0.0428, "step": 39370 }, { "epoch": 0.3186341937049923, "grad_norm": 0.4151618182659149, "learning_rate": 9.89320218612432e-06, "loss": 0.0551, "step": 39380 }, { "epoch": 0.3187151064001942, "grad_norm": 0.7364467978477478, "learning_rate": 9.89305697843562e-06, "loss": 0.0393, "step": 39390 }, { "epoch": 0.31879601909539607, "grad_norm": 0.7882161140441895, "learning_rate": 9.892911673165228e-06, "loss": 0.0557, "step": 39400 }, { "epoch": 0.31887693179059795, "grad_norm": 0.49785116314888, "learning_rate": 9.892766270316043e-06, "loss": 0.0346, "step": 39410 }, { "epoch": 0.31895784448579984, "grad_norm": 1.1544221639633179, "learning_rate": 9.892620769890962e-06, "loss": 0.0813, "step": 39420 }, { "epoch": 0.3190387571810017, "grad_norm": 0.30497339367866516, "learning_rate": 9.892475171892891e-06, "loss": 0.031, "step": 39430 }, { "epoch": 0.31911966987620355, "grad_norm": 0.44163066148757935, "learning_rate": 9.892329476324729e-06, "loss": 0.0405, "step": 39440 }, { "epoch": 0.31920058257140543, "grad_norm": 0.7188141942024231, "learning_rate": 9.892183683189384e-06, "loss": 0.0408, "step": 39450 }, { "epoch": 0.3192814952666073, "grad_norm": 0.6791709065437317, "learning_rate": 9.892037792489766e-06, "loss": 0.0357, "step": 39460 }, { "epoch": 0.3193624079618092, "grad_norm": 0.5250696539878845, "learning_rate": 9.891891804228779e-06, "loss": 0.0316, "step": 39470 }, { "epoch": 0.3194433206570111, "grad_norm": 0.4399184286594391, "learning_rate": 9.891745718409338e-06, "loss": 0.0377, "step": 39480 }, { "epoch": 0.31952423335221297, "grad_norm": 0.6674270033836365, "learning_rate": 9.891599535034356e-06, "loss": 0.0422, "step": 39490 }, { "epoch": 0.31960514604741486, "grad_norm": 0.5341448783874512, "learning_rate": 9.891453254106747e-06, "loss": 0.0775, "step": 39500 }, { "epoch": 0.31968605874261674, "grad_norm": 0.6818796992301941, "learning_rate": 9.89130687562943e-06, "loss": 0.0342, "step": 39510 }, { "epoch": 0.31976697143781857, "grad_norm": 1.2836663722991943, "learning_rate": 9.891160399605323e-06, "loss": 0.0583, "step": 39520 }, { "epoch": 0.31984788413302045, "grad_norm": 0.7129372954368591, "learning_rate": 9.891013826037348e-06, "loss": 0.0569, "step": 39530 }, { "epoch": 0.31992879682822234, "grad_norm": 0.35761594772338867, "learning_rate": 9.890867154928427e-06, "loss": 0.0416, "step": 39540 }, { "epoch": 0.3200097095234242, "grad_norm": 0.45497244596481323, "learning_rate": 9.890720386281484e-06, "loss": 0.0327, "step": 39550 }, { "epoch": 0.3200906222186261, "grad_norm": 0.36987683176994324, "learning_rate": 9.89057352009945e-06, "loss": 0.0474, "step": 39560 }, { "epoch": 0.320171534913828, "grad_norm": 0.6847350597381592, "learning_rate": 9.890426556385252e-06, "loss": 0.0474, "step": 39570 }, { "epoch": 0.3202524476090299, "grad_norm": 0.7821822762489319, "learning_rate": 9.890279495141819e-06, "loss": 0.0472, "step": 39580 }, { "epoch": 0.3203333603042317, "grad_norm": 0.5953559279441833, "learning_rate": 9.890132336372087e-06, "loss": 0.0437, "step": 39590 }, { "epoch": 0.3204142729994336, "grad_norm": 0.5846818685531616, "learning_rate": 9.889985080078987e-06, "loss": 0.0288, "step": 39600 }, { "epoch": 0.3204951856946355, "grad_norm": 0.5890159010887146, "learning_rate": 9.889837726265459e-06, "loss": 0.0349, "step": 39610 }, { "epoch": 0.32057609838983736, "grad_norm": 0.2557406425476074, "learning_rate": 9.88969027493444e-06, "loss": 0.0663, "step": 39620 }, { "epoch": 0.32065701108503925, "grad_norm": 0.4771048426628113, "learning_rate": 9.889542726088873e-06, "loss": 0.0415, "step": 39630 }, { "epoch": 0.32073792378024113, "grad_norm": 0.8688284158706665, "learning_rate": 9.889395079731696e-06, "loss": 0.0331, "step": 39640 }, { "epoch": 0.320818836475443, "grad_norm": 0.9540833830833435, "learning_rate": 9.889247335865857e-06, "loss": 0.0722, "step": 39650 }, { "epoch": 0.3208997491706449, "grad_norm": 0.6332712173461914, "learning_rate": 9.889099494494301e-06, "loss": 0.0484, "step": 39660 }, { "epoch": 0.32098066186584673, "grad_norm": 0.5505287647247314, "learning_rate": 9.888951555619978e-06, "loss": 0.0653, "step": 39670 }, { "epoch": 0.3210615745610486, "grad_norm": 0.18512769043445587, "learning_rate": 9.888803519245836e-06, "loss": 0.0376, "step": 39680 }, { "epoch": 0.3211424872562505, "grad_norm": 0.7980678081512451, "learning_rate": 9.88865538537483e-06, "loss": 0.0427, "step": 39690 }, { "epoch": 0.3212233999514524, "grad_norm": 0.5308131575584412, "learning_rate": 9.888507154009912e-06, "loss": 0.0518, "step": 39700 }, { "epoch": 0.32130431264665427, "grad_norm": 0.486857146024704, "learning_rate": 9.888358825154039e-06, "loss": 0.0532, "step": 39710 }, { "epoch": 0.32138522534185615, "grad_norm": 0.5744452476501465, "learning_rate": 9.888210398810168e-06, "loss": 0.0248, "step": 39720 }, { "epoch": 0.32146613803705804, "grad_norm": 0.6289704442024231, "learning_rate": 9.888061874981261e-06, "loss": 0.0312, "step": 39730 }, { "epoch": 0.32154705073225986, "grad_norm": 0.7966827154159546, "learning_rate": 9.887913253670279e-06, "loss": 0.0499, "step": 39740 }, { "epoch": 0.32162796342746175, "grad_norm": 1.1657346487045288, "learning_rate": 9.887764534880187e-06, "loss": 0.0321, "step": 39750 }, { "epoch": 0.32170887612266363, "grad_norm": 0.2832331359386444, "learning_rate": 9.88761571861395e-06, "loss": 0.0274, "step": 39760 }, { "epoch": 0.3217897888178655, "grad_norm": 1.000180721282959, "learning_rate": 9.887466804874532e-06, "loss": 0.0388, "step": 39770 }, { "epoch": 0.3218707015130674, "grad_norm": 0.45814430713653564, "learning_rate": 9.88731779366491e-06, "loss": 0.039, "step": 39780 }, { "epoch": 0.3219516142082693, "grad_norm": 0.8153276443481445, "learning_rate": 9.887168684988052e-06, "loss": 0.0433, "step": 39790 }, { "epoch": 0.3220325269034712, "grad_norm": 0.6757727861404419, "learning_rate": 9.887019478846931e-06, "loss": 0.05, "step": 39800 }, { "epoch": 0.32211343959867306, "grad_norm": 1.709517002105713, "learning_rate": 9.886870175244523e-06, "loss": 0.0359, "step": 39810 }, { "epoch": 0.3221943522938749, "grad_norm": 0.35925766825675964, "learning_rate": 9.886720774183807e-06, "loss": 0.0442, "step": 39820 }, { "epoch": 0.32227526498907677, "grad_norm": 0.7002459168434143, "learning_rate": 9.88657127566776e-06, "loss": 0.0356, "step": 39830 }, { "epoch": 0.32235617768427866, "grad_norm": 0.5824680924415588, "learning_rate": 9.886421679699367e-06, "loss": 0.0397, "step": 39840 }, { "epoch": 0.32243709037948054, "grad_norm": 0.38833358883857727, "learning_rate": 9.886271986281607e-06, "loss": 0.0432, "step": 39850 }, { "epoch": 0.3225180030746824, "grad_norm": 0.7080113887786865, "learning_rate": 9.88612219541747e-06, "loss": 0.0416, "step": 39860 }, { "epoch": 0.3225989157698843, "grad_norm": 0.7499400973320007, "learning_rate": 9.88597230710994e-06, "loss": 0.0495, "step": 39870 }, { "epoch": 0.3226798284650862, "grad_norm": 0.6967626214027405, "learning_rate": 9.885822321362006e-06, "loss": 0.0585, "step": 39880 }, { "epoch": 0.322760741160288, "grad_norm": 0.5968953371047974, "learning_rate": 9.885672238176662e-06, "loss": 0.0375, "step": 39890 }, { "epoch": 0.3228416538554899, "grad_norm": 0.3996811509132385, "learning_rate": 9.885522057556897e-06, "loss": 0.0561, "step": 39900 }, { "epoch": 0.3229225665506918, "grad_norm": 0.8201739192008972, "learning_rate": 9.885371779505711e-06, "loss": 0.0495, "step": 39910 }, { "epoch": 0.3230034792458937, "grad_norm": 0.6519485712051392, "learning_rate": 9.885221404026097e-06, "loss": 0.0463, "step": 39920 }, { "epoch": 0.32308439194109556, "grad_norm": 0.2757030725479126, "learning_rate": 9.885070931121056e-06, "loss": 0.0349, "step": 39930 }, { "epoch": 0.32316530463629745, "grad_norm": 0.2566738426685333, "learning_rate": 9.884920360793586e-06, "loss": 0.0348, "step": 39940 }, { "epoch": 0.32324621733149933, "grad_norm": 0.4999472498893738, "learning_rate": 9.884769693046693e-06, "loss": 0.0532, "step": 39950 }, { "epoch": 0.3233271300267012, "grad_norm": 0.847533106803894, "learning_rate": 9.884618927883383e-06, "loss": 0.0365, "step": 39960 }, { "epoch": 0.32340804272190304, "grad_norm": 0.6879391074180603, "learning_rate": 9.884468065306658e-06, "loss": 0.0366, "step": 39970 }, { "epoch": 0.32348895541710493, "grad_norm": 0.44323843717575073, "learning_rate": 9.88431710531953e-06, "loss": 0.0375, "step": 39980 }, { "epoch": 0.3235698681123068, "grad_norm": 0.18369731307029724, "learning_rate": 9.884166047925009e-06, "loss": 0.0313, "step": 39990 }, { "epoch": 0.3236507808075087, "grad_norm": 0.7050772905349731, "learning_rate": 9.884014893126104e-06, "loss": 0.0391, "step": 40000 }, { "epoch": 0.3237316935027106, "grad_norm": 0.783042311668396, "learning_rate": 9.883863640925836e-06, "loss": 0.0366, "step": 40010 }, { "epoch": 0.32381260619791247, "grad_norm": 0.26283887028694153, "learning_rate": 9.883712291327217e-06, "loss": 0.0567, "step": 40020 }, { "epoch": 0.32389351889311435, "grad_norm": 0.43976765871047974, "learning_rate": 9.883560844333264e-06, "loss": 0.0672, "step": 40030 }, { "epoch": 0.3239744315883162, "grad_norm": 0.6505388021469116, "learning_rate": 9.883409299947002e-06, "loss": 0.038, "step": 40040 }, { "epoch": 0.32405534428351807, "grad_norm": 0.20244209468364716, "learning_rate": 9.88325765817145e-06, "loss": 0.0482, "step": 40050 }, { "epoch": 0.32413625697871995, "grad_norm": 0.36092907190322876, "learning_rate": 9.883105919009635e-06, "loss": 0.0472, "step": 40060 }, { "epoch": 0.32421716967392183, "grad_norm": 0.15501679480075836, "learning_rate": 9.882954082464579e-06, "loss": 0.0288, "step": 40070 }, { "epoch": 0.3242980823691237, "grad_norm": 0.7328652739524841, "learning_rate": 9.882802148539313e-06, "loss": 0.0357, "step": 40080 }, { "epoch": 0.3243789950643256, "grad_norm": 0.5973265767097473, "learning_rate": 9.882650117236866e-06, "loss": 0.043, "step": 40090 }, { "epoch": 0.3244599077595275, "grad_norm": 0.8955159783363342, "learning_rate": 9.88249798856027e-06, "loss": 0.0409, "step": 40100 }, { "epoch": 0.3245408204547294, "grad_norm": 0.4070000946521759, "learning_rate": 9.882345762512559e-06, "loss": 0.059, "step": 40110 }, { "epoch": 0.3246217331499312, "grad_norm": 0.7732154726982117, "learning_rate": 9.88219343909677e-06, "loss": 0.0613, "step": 40120 }, { "epoch": 0.3247026458451331, "grad_norm": 0.4397345781326294, "learning_rate": 9.882041018315937e-06, "loss": 0.0457, "step": 40130 }, { "epoch": 0.32478355854033497, "grad_norm": 0.4756471514701843, "learning_rate": 9.881888500173104e-06, "loss": 0.0454, "step": 40140 }, { "epoch": 0.32486447123553686, "grad_norm": 0.5101844072341919, "learning_rate": 9.881735884671313e-06, "loss": 0.0375, "step": 40150 }, { "epoch": 0.32494538393073874, "grad_norm": 0.4583725929260254, "learning_rate": 9.881583171813602e-06, "loss": 0.0254, "step": 40160 }, { "epoch": 0.3250262966259406, "grad_norm": 0.367404580116272, "learning_rate": 9.881430361603023e-06, "loss": 0.0372, "step": 40170 }, { "epoch": 0.3251072093211425, "grad_norm": 0.692272424697876, "learning_rate": 9.88127745404262e-06, "loss": 0.042, "step": 40180 }, { "epoch": 0.32518812201634434, "grad_norm": 0.6216253638267517, "learning_rate": 9.881124449135442e-06, "loss": 0.0383, "step": 40190 }, { "epoch": 0.3252690347115462, "grad_norm": 0.5283320546150208, "learning_rate": 9.88097134688454e-06, "loss": 0.062, "step": 40200 }, { "epoch": 0.3253499474067481, "grad_norm": 0.3955236077308655, "learning_rate": 9.880818147292973e-06, "loss": 0.0369, "step": 40210 }, { "epoch": 0.32543086010195, "grad_norm": 0.2844219207763672, "learning_rate": 9.880664850363787e-06, "loss": 0.0454, "step": 40220 }, { "epoch": 0.3255117727971519, "grad_norm": 0.603661835193634, "learning_rate": 9.880511456100047e-06, "loss": 0.0641, "step": 40230 }, { "epoch": 0.32559268549235376, "grad_norm": 0.9855119585990906, "learning_rate": 9.88035796450481e-06, "loss": 0.046, "step": 40240 }, { "epoch": 0.32567359818755565, "grad_norm": 0.5010437965393066, "learning_rate": 9.880204375581133e-06, "loss": 0.0415, "step": 40250 }, { "epoch": 0.32575451088275753, "grad_norm": 0.7150997519493103, "learning_rate": 9.880050689332084e-06, "loss": 0.0338, "step": 40260 }, { "epoch": 0.32583542357795936, "grad_norm": 0.9570357203483582, "learning_rate": 9.879896905760726e-06, "loss": 0.0415, "step": 40270 }, { "epoch": 0.32591633627316124, "grad_norm": 0.5628629326820374, "learning_rate": 9.879743024870128e-06, "loss": 0.0365, "step": 40280 }, { "epoch": 0.32599724896836313, "grad_norm": 0.6501688957214355, "learning_rate": 9.879589046663356e-06, "loss": 0.0461, "step": 40290 }, { "epoch": 0.326078161663565, "grad_norm": 0.6387935876846313, "learning_rate": 9.87943497114348e-06, "loss": 0.0456, "step": 40300 }, { "epoch": 0.3261590743587669, "grad_norm": 0.6885872483253479, "learning_rate": 9.879280798313576e-06, "loss": 0.0261, "step": 40310 }, { "epoch": 0.3262399870539688, "grad_norm": 0.7389882802963257, "learning_rate": 9.879126528176716e-06, "loss": 0.0271, "step": 40320 }, { "epoch": 0.32632089974917067, "grad_norm": 1.659501552581787, "learning_rate": 9.87897216073598e-06, "loss": 0.0362, "step": 40330 }, { "epoch": 0.3264018124443725, "grad_norm": 0.5764020085334778, "learning_rate": 9.878817695994443e-06, "loss": 0.0264, "step": 40340 }, { "epoch": 0.3264827251395744, "grad_norm": 0.753527045249939, "learning_rate": 9.878663133955185e-06, "loss": 0.0442, "step": 40350 }, { "epoch": 0.32656363783477627, "grad_norm": 0.4880586564540863, "learning_rate": 9.878508474621292e-06, "loss": 0.0394, "step": 40360 }, { "epoch": 0.32664455052997815, "grad_norm": 0.23414430022239685, "learning_rate": 9.878353717995846e-06, "loss": 0.0292, "step": 40370 }, { "epoch": 0.32672546322518004, "grad_norm": 1.1924008131027222, "learning_rate": 9.878198864081934e-06, "loss": 0.0541, "step": 40380 }, { "epoch": 0.3268063759203819, "grad_norm": 0.6703648567199707, "learning_rate": 9.878043912882644e-06, "loss": 0.0522, "step": 40390 }, { "epoch": 0.3268872886155838, "grad_norm": 0.1626456379890442, "learning_rate": 9.877888864401066e-06, "loss": 0.0357, "step": 40400 }, { "epoch": 0.3269682013107857, "grad_norm": 0.8747290968894958, "learning_rate": 9.877733718640292e-06, "loss": 0.0269, "step": 40410 }, { "epoch": 0.3270491140059875, "grad_norm": 0.4880545437335968, "learning_rate": 9.877578475603416e-06, "loss": 0.043, "step": 40420 }, { "epoch": 0.3271300267011894, "grad_norm": 0.3753245770931244, "learning_rate": 9.877423135293536e-06, "loss": 0.0725, "step": 40430 }, { "epoch": 0.3272109393963913, "grad_norm": 0.6499832272529602, "learning_rate": 9.877267697713747e-06, "loss": 0.0461, "step": 40440 }, { "epoch": 0.32729185209159317, "grad_norm": 0.7666500210762024, "learning_rate": 9.877112162867148e-06, "loss": 0.0451, "step": 40450 }, { "epoch": 0.32737276478679506, "grad_norm": 1.3744006156921387, "learning_rate": 9.876956530756847e-06, "loss": 0.0584, "step": 40460 }, { "epoch": 0.32745367748199694, "grad_norm": 0.8362887501716614, "learning_rate": 9.876800801385942e-06, "loss": 0.0597, "step": 40470 }, { "epoch": 0.3275345901771988, "grad_norm": 0.48208215832710266, "learning_rate": 9.876644974757538e-06, "loss": 0.0328, "step": 40480 }, { "epoch": 0.32761550287240065, "grad_norm": 0.9656327962875366, "learning_rate": 9.87648905087475e-06, "loss": 0.0412, "step": 40490 }, { "epoch": 0.32769641556760254, "grad_norm": 0.518141508102417, "learning_rate": 9.87633302974068e-06, "loss": 0.0534, "step": 40500 }, { "epoch": 0.3277773282628044, "grad_norm": 0.43534204363822937, "learning_rate": 9.876176911358442e-06, "loss": 0.0342, "step": 40510 }, { "epoch": 0.3278582409580063, "grad_norm": 0.6598918437957764, "learning_rate": 9.876020695731148e-06, "loss": 0.0299, "step": 40520 }, { "epoch": 0.3279391536532082, "grad_norm": 0.6398226022720337, "learning_rate": 9.875864382861917e-06, "loss": 0.0467, "step": 40530 }, { "epoch": 0.3280200663484101, "grad_norm": 0.765744149684906, "learning_rate": 9.875707972753863e-06, "loss": 0.0487, "step": 40540 }, { "epoch": 0.32810097904361196, "grad_norm": 0.7337196469306946, "learning_rate": 9.87555146541011e-06, "loss": 0.0413, "step": 40550 }, { "epoch": 0.32818189173881385, "grad_norm": 0.6115636229515076, "learning_rate": 9.875394860833771e-06, "loss": 0.044, "step": 40560 }, { "epoch": 0.3282628044340157, "grad_norm": 0.6322861313819885, "learning_rate": 9.875238159027977e-06, "loss": 0.0347, "step": 40570 }, { "epoch": 0.32834371712921756, "grad_norm": 0.5502486824989319, "learning_rate": 9.875081359995848e-06, "loss": 0.0441, "step": 40580 }, { "epoch": 0.32842462982441945, "grad_norm": 0.3521536886692047, "learning_rate": 9.874924463740515e-06, "loss": 0.0566, "step": 40590 }, { "epoch": 0.32850554251962133, "grad_norm": 0.8883639574050903, "learning_rate": 9.874767470265103e-06, "loss": 0.0507, "step": 40600 }, { "epoch": 0.3285864552148232, "grad_norm": 0.3177475035190582, "learning_rate": 9.874610379572746e-06, "loss": 0.0415, "step": 40610 }, { "epoch": 0.3286673679100251, "grad_norm": 0.45708227157592773, "learning_rate": 9.874453191666578e-06, "loss": 0.0401, "step": 40620 }, { "epoch": 0.328748280605227, "grad_norm": 0.9280343651771545, "learning_rate": 9.874295906549728e-06, "loss": 0.0688, "step": 40630 }, { "epoch": 0.3288291933004288, "grad_norm": 0.8759307265281677, "learning_rate": 9.874138524225338e-06, "loss": 0.0604, "step": 40640 }, { "epoch": 0.3289101059956307, "grad_norm": 0.9260842800140381, "learning_rate": 9.873981044696543e-06, "loss": 0.0357, "step": 40650 }, { "epoch": 0.3289910186908326, "grad_norm": 0.5114107131958008, "learning_rate": 9.873823467966487e-06, "loss": 0.0495, "step": 40660 }, { "epoch": 0.32907193138603447, "grad_norm": 0.8594604730606079, "learning_rate": 9.873665794038312e-06, "loss": 0.0434, "step": 40670 }, { "epoch": 0.32915284408123635, "grad_norm": 0.7189157009124756, "learning_rate": 9.87350802291516e-06, "loss": 0.0335, "step": 40680 }, { "epoch": 0.32923375677643824, "grad_norm": 0.3869743347167969, "learning_rate": 9.87335015460018e-06, "loss": 0.0492, "step": 40690 }, { "epoch": 0.3293146694716401, "grad_norm": 0.43746697902679443, "learning_rate": 9.87319218909652e-06, "loss": 0.0398, "step": 40700 }, { "epoch": 0.329395582166842, "grad_norm": 1.1178925037384033, "learning_rate": 9.873034126407326e-06, "loss": 0.0492, "step": 40710 }, { "epoch": 0.32947649486204383, "grad_norm": 0.30074092745780945, "learning_rate": 9.872875966535757e-06, "loss": 0.0364, "step": 40720 }, { "epoch": 0.3295574075572457, "grad_norm": 0.184925377368927, "learning_rate": 9.872717709484962e-06, "loss": 0.0319, "step": 40730 }, { "epoch": 0.3296383202524476, "grad_norm": 0.08169623464345932, "learning_rate": 9.8725593552581e-06, "loss": 0.0295, "step": 40740 }, { "epoch": 0.3297192329476495, "grad_norm": 0.9010621309280396, "learning_rate": 9.872400903858328e-06, "loss": 0.0449, "step": 40750 }, { "epoch": 0.32980014564285137, "grad_norm": 0.7486125230789185, "learning_rate": 9.872242355288807e-06, "loss": 0.025, "step": 40760 }, { "epoch": 0.32988105833805326, "grad_norm": 0.6413047313690186, "learning_rate": 9.872083709552697e-06, "loss": 0.0424, "step": 40770 }, { "epoch": 0.32996197103325514, "grad_norm": 0.5807832479476929, "learning_rate": 9.871924966653163e-06, "loss": 0.0405, "step": 40780 }, { "epoch": 0.33004288372845697, "grad_norm": 0.6638889312744141, "learning_rate": 9.87176612659337e-06, "loss": 0.0409, "step": 40790 }, { "epoch": 0.33012379642365886, "grad_norm": 0.8848902583122253, "learning_rate": 9.871607189376487e-06, "loss": 0.0447, "step": 40800 }, { "epoch": 0.33020470911886074, "grad_norm": 0.7632887959480286, "learning_rate": 9.871448155005682e-06, "loss": 0.0426, "step": 40810 }, { "epoch": 0.3302856218140626, "grad_norm": 0.7435930371284485, "learning_rate": 9.871289023484129e-06, "loss": 0.0543, "step": 40820 }, { "epoch": 0.3303665345092645, "grad_norm": 0.29295217990875244, "learning_rate": 9.871129794815e-06, "loss": 0.0332, "step": 40830 }, { "epoch": 0.3304474472044664, "grad_norm": 1.0903531312942505, "learning_rate": 9.87097046900147e-06, "loss": 0.0515, "step": 40840 }, { "epoch": 0.3305283598996683, "grad_norm": 0.8970947861671448, "learning_rate": 9.870811046046717e-06, "loss": 0.0524, "step": 40850 }, { "epoch": 0.33060927259487016, "grad_norm": 0.6621421575546265, "learning_rate": 9.870651525953922e-06, "loss": 0.0457, "step": 40860 }, { "epoch": 0.330690185290072, "grad_norm": 1.0388965606689453, "learning_rate": 9.870491908726263e-06, "loss": 0.0359, "step": 40870 }, { "epoch": 0.3307710979852739, "grad_norm": 0.5507674813270569, "learning_rate": 9.870332194366926e-06, "loss": 0.0364, "step": 40880 }, { "epoch": 0.33085201068047576, "grad_norm": 0.6802447438240051, "learning_rate": 9.870172382879095e-06, "loss": 0.0453, "step": 40890 }, { "epoch": 0.33093292337567765, "grad_norm": 0.7639626860618591, "learning_rate": 9.870012474265956e-06, "loss": 0.0512, "step": 40900 }, { "epoch": 0.33101383607087953, "grad_norm": 0.6452837586402893, "learning_rate": 9.8698524685307e-06, "loss": 0.0405, "step": 40910 }, { "epoch": 0.3310947487660814, "grad_norm": 0.5973842144012451, "learning_rate": 9.869692365676518e-06, "loss": 0.0545, "step": 40920 }, { "epoch": 0.3311756614612833, "grad_norm": 0.6230573654174805, "learning_rate": 9.8695321657066e-06, "loss": 0.0492, "step": 40930 }, { "epoch": 0.33125657415648513, "grad_norm": 1.2112345695495605, "learning_rate": 9.869371868624147e-06, "loss": 0.0382, "step": 40940 }, { "epoch": 0.331337486851687, "grad_norm": 0.769437849521637, "learning_rate": 9.869211474432348e-06, "loss": 0.0299, "step": 40950 }, { "epoch": 0.3314183995468889, "grad_norm": 0.9488968253135681, "learning_rate": 9.869050983134408e-06, "loss": 0.0685, "step": 40960 }, { "epoch": 0.3314993122420908, "grad_norm": 0.33015891909599304, "learning_rate": 9.868890394733524e-06, "loss": 0.0307, "step": 40970 }, { "epoch": 0.33158022493729267, "grad_norm": 0.47258785367012024, "learning_rate": 9.868729709232898e-06, "loss": 0.051, "step": 40980 }, { "epoch": 0.33166113763249455, "grad_norm": 1.1490740776062012, "learning_rate": 9.868568926635738e-06, "loss": 0.0311, "step": 40990 }, { "epoch": 0.33174205032769644, "grad_norm": 0.5535264015197754, "learning_rate": 9.86840804694525e-06, "loss": 0.0655, "step": 41000 }, { "epoch": 0.33182296302289827, "grad_norm": 0.48547789454460144, "learning_rate": 9.86824707016464e-06, "loss": 0.0304, "step": 41010 }, { "epoch": 0.33190387571810015, "grad_norm": 0.31716567277908325, "learning_rate": 9.868085996297119e-06, "loss": 0.0464, "step": 41020 }, { "epoch": 0.33198478841330203, "grad_norm": 0.6076067686080933, "learning_rate": 9.8679248253459e-06, "loss": 0.0506, "step": 41030 }, { "epoch": 0.3320657011085039, "grad_norm": 0.3810777962207794, "learning_rate": 9.867763557314197e-06, "loss": 0.0456, "step": 41040 }, { "epoch": 0.3321466138037058, "grad_norm": 0.42485669255256653, "learning_rate": 9.867602192205227e-06, "loss": 0.037, "step": 41050 }, { "epoch": 0.3322275264989077, "grad_norm": 0.5218676328659058, "learning_rate": 9.867440730022206e-06, "loss": 0.0467, "step": 41060 }, { "epoch": 0.3323084391941096, "grad_norm": 0.47217753529548645, "learning_rate": 9.867279170768356e-06, "loss": 0.027, "step": 41070 }, { "epoch": 0.33238935188931146, "grad_norm": 1.818761944770813, "learning_rate": 9.867117514446898e-06, "loss": 0.0459, "step": 41080 }, { "epoch": 0.3324702645845133, "grad_norm": 0.6603654026985168, "learning_rate": 9.866955761061057e-06, "loss": 0.0486, "step": 41090 }, { "epoch": 0.33255117727971517, "grad_norm": 0.6987442374229431, "learning_rate": 9.866793910614057e-06, "loss": 0.0529, "step": 41100 }, { "epoch": 0.33263208997491706, "grad_norm": 0.4927309453487396, "learning_rate": 9.866631963109127e-06, "loss": 0.0413, "step": 41110 }, { "epoch": 0.33271300267011894, "grad_norm": 0.6848508715629578, "learning_rate": 9.866469918549496e-06, "loss": 0.0378, "step": 41120 }, { "epoch": 0.3327939153653208, "grad_norm": 0.4474072754383087, "learning_rate": 9.866307776938396e-06, "loss": 0.0519, "step": 41130 }, { "epoch": 0.3328748280605227, "grad_norm": 0.531976044178009, "learning_rate": 9.866145538279062e-06, "loss": 0.0521, "step": 41140 }, { "epoch": 0.3329557407557246, "grad_norm": 0.5291075706481934, "learning_rate": 9.865983202574728e-06, "loss": 0.0444, "step": 41150 }, { "epoch": 0.3330366534509264, "grad_norm": 0.8955236077308655, "learning_rate": 9.86582076982863e-06, "loss": 0.0539, "step": 41160 }, { "epoch": 0.3331175661461283, "grad_norm": 0.6135576963424683, "learning_rate": 9.865658240044009e-06, "loss": 0.0371, "step": 41170 }, { "epoch": 0.3331984788413302, "grad_norm": 0.3525543212890625, "learning_rate": 9.865495613224107e-06, "loss": 0.0599, "step": 41180 }, { "epoch": 0.3332793915365321, "grad_norm": 0.647148847579956, "learning_rate": 9.865332889372166e-06, "loss": 0.035, "step": 41190 }, { "epoch": 0.33336030423173396, "grad_norm": 1.0536319017410278, "learning_rate": 9.865170068491433e-06, "loss": 0.0541, "step": 41200 }, { "epoch": 0.33344121692693585, "grad_norm": 0.6622977256774902, "learning_rate": 9.865007150585152e-06, "loss": 0.0352, "step": 41210 }, { "epoch": 0.33352212962213773, "grad_norm": 0.49685171246528625, "learning_rate": 9.864844135656575e-06, "loss": 0.0438, "step": 41220 }, { "epoch": 0.3336030423173396, "grad_norm": 0.1563342809677124, "learning_rate": 9.86468102370895e-06, "loss": 0.0383, "step": 41230 }, { "epoch": 0.33368395501254144, "grad_norm": 0.33351612091064453, "learning_rate": 9.864517814745535e-06, "loss": 0.0359, "step": 41240 }, { "epoch": 0.33376486770774333, "grad_norm": 0.6246165037155151, "learning_rate": 9.86435450876958e-06, "loss": 0.028, "step": 41250 }, { "epoch": 0.3338457804029452, "grad_norm": 0.48628124594688416, "learning_rate": 9.864191105784344e-06, "loss": 0.0544, "step": 41260 }, { "epoch": 0.3339266930981471, "grad_norm": 0.7069867849349976, "learning_rate": 9.864027605793085e-06, "loss": 0.0476, "step": 41270 }, { "epoch": 0.334007605793349, "grad_norm": 0.552903413772583, "learning_rate": 9.863864008799062e-06, "loss": 0.0399, "step": 41280 }, { "epoch": 0.33408851848855087, "grad_norm": 0.6042910814285278, "learning_rate": 9.863700314805541e-06, "loss": 0.0497, "step": 41290 }, { "epoch": 0.33416943118375275, "grad_norm": 0.6169891953468323, "learning_rate": 9.863536523815784e-06, "loss": 0.0474, "step": 41300 }, { "epoch": 0.3342503438789546, "grad_norm": 0.7042946219444275, "learning_rate": 9.863372635833059e-06, "loss": 0.0453, "step": 41310 }, { "epoch": 0.33433125657415647, "grad_norm": 0.7566058039665222, "learning_rate": 9.863208650860634e-06, "loss": 0.0409, "step": 41320 }, { "epoch": 0.33441216926935835, "grad_norm": 0.45953863859176636, "learning_rate": 9.86304456890178e-06, "loss": 0.04, "step": 41330 }, { "epoch": 0.33449308196456023, "grad_norm": 0.4886035919189453, "learning_rate": 9.862880389959768e-06, "loss": 0.0366, "step": 41340 }, { "epoch": 0.3345739946597621, "grad_norm": 0.8067612648010254, "learning_rate": 9.862716114037872e-06, "loss": 0.0452, "step": 41350 }, { "epoch": 0.334654907354964, "grad_norm": 0.7798858284950256, "learning_rate": 9.862551741139369e-06, "loss": 0.0555, "step": 41360 }, { "epoch": 0.3347358200501659, "grad_norm": 0.1755373775959015, "learning_rate": 9.862387271267538e-06, "loss": 0.0473, "step": 41370 }, { "epoch": 0.3348167327453678, "grad_norm": 0.4780804514884949, "learning_rate": 9.862222704425656e-06, "loss": 0.0375, "step": 41380 }, { "epoch": 0.3348976454405696, "grad_norm": 0.3753353953361511, "learning_rate": 9.862058040617007e-06, "loss": 0.032, "step": 41390 }, { "epoch": 0.3349785581357715, "grad_norm": 0.43552160263061523, "learning_rate": 9.861893279844873e-06, "loss": 0.036, "step": 41400 }, { "epoch": 0.33505947083097337, "grad_norm": 0.4930942952632904, "learning_rate": 9.861728422112546e-06, "loss": 0.0555, "step": 41410 }, { "epoch": 0.33514038352617526, "grad_norm": 0.46433311700820923, "learning_rate": 9.861563467423305e-06, "loss": 0.0229, "step": 41420 }, { "epoch": 0.33522129622137714, "grad_norm": 0.607568621635437, "learning_rate": 9.861398415780447e-06, "loss": 0.0399, "step": 41430 }, { "epoch": 0.335302208916579, "grad_norm": 0.44109445810317993, "learning_rate": 9.861233267187258e-06, "loss": 0.0344, "step": 41440 }, { "epoch": 0.3353831216117809, "grad_norm": 0.5203218460083008, "learning_rate": 9.861068021647035e-06, "loss": 0.043, "step": 41450 }, { "epoch": 0.33546403430698274, "grad_norm": 0.5044179558753967, "learning_rate": 9.860902679163073e-06, "loss": 0.0356, "step": 41460 }, { "epoch": 0.3355449470021846, "grad_norm": 0.31492650508880615, "learning_rate": 9.860737239738667e-06, "loss": 0.0353, "step": 41470 }, { "epoch": 0.3356258596973865, "grad_norm": 0.524564802646637, "learning_rate": 9.86057170337712e-06, "loss": 0.0374, "step": 41480 }, { "epoch": 0.3357067723925884, "grad_norm": 0.3426591455936432, "learning_rate": 9.860406070081732e-06, "loss": 0.0324, "step": 41490 }, { "epoch": 0.3357876850877903, "grad_norm": 0.7220721244812012, "learning_rate": 9.860240339855804e-06, "loss": 0.0492, "step": 41500 }, { "epoch": 0.33586859778299216, "grad_norm": 0.7250580787658691, "learning_rate": 9.860074512702644e-06, "loss": 0.0414, "step": 41510 }, { "epoch": 0.33594951047819405, "grad_norm": 0.3881251811981201, "learning_rate": 9.859908588625557e-06, "loss": 0.0373, "step": 41520 }, { "epoch": 0.33603042317339593, "grad_norm": 0.7550155520439148, "learning_rate": 9.859742567627855e-06, "loss": 0.046, "step": 41530 }, { "epoch": 0.33611133586859776, "grad_norm": 0.4887814521789551, "learning_rate": 9.859576449712845e-06, "loss": 0.05, "step": 41540 }, { "epoch": 0.33619224856379964, "grad_norm": 0.5434011220932007, "learning_rate": 9.859410234883842e-06, "loss": 0.0381, "step": 41550 }, { "epoch": 0.33627316125900153, "grad_norm": 0.2973977327346802, "learning_rate": 9.85924392314416e-06, "loss": 0.0372, "step": 41560 }, { "epoch": 0.3363540739542034, "grad_norm": 0.2324269711971283, "learning_rate": 9.859077514497117e-06, "loss": 0.0387, "step": 41570 }, { "epoch": 0.3364349866494053, "grad_norm": 0.5275130271911621, "learning_rate": 9.85891100894603e-06, "loss": 0.0399, "step": 41580 }, { "epoch": 0.3365158993446072, "grad_norm": 0.2628774344921112, "learning_rate": 9.858744406494223e-06, "loss": 0.0466, "step": 41590 }, { "epoch": 0.33659681203980907, "grad_norm": 0.5973769426345825, "learning_rate": 9.858577707145013e-06, "loss": 0.0296, "step": 41600 }, { "epoch": 0.3366777247350109, "grad_norm": 0.5173704624176025, "learning_rate": 9.85841091090173e-06, "loss": 0.0387, "step": 41610 }, { "epoch": 0.3367586374302128, "grad_norm": 0.6398801803588867, "learning_rate": 9.858244017767697e-06, "loss": 0.0525, "step": 41620 }, { "epoch": 0.33683955012541467, "grad_norm": 0.5640169978141785, "learning_rate": 9.858077027746243e-06, "loss": 0.0378, "step": 41630 }, { "epoch": 0.33692046282061655, "grad_norm": 0.643841564655304, "learning_rate": 9.857909940840698e-06, "loss": 0.041, "step": 41640 }, { "epoch": 0.33700137551581844, "grad_norm": 0.7330644130706787, "learning_rate": 9.857742757054396e-06, "loss": 0.0413, "step": 41650 }, { "epoch": 0.3370822882110203, "grad_norm": 0.6921889185905457, "learning_rate": 9.857575476390668e-06, "loss": 0.0639, "step": 41660 }, { "epoch": 0.3371632009062222, "grad_norm": 0.4464300870895386, "learning_rate": 9.857408098852855e-06, "loss": 0.0312, "step": 41670 }, { "epoch": 0.3372441136014241, "grad_norm": 0.1189119890332222, "learning_rate": 9.857240624444288e-06, "loss": 0.0489, "step": 41680 }, { "epoch": 0.3373250262966259, "grad_norm": 0.35755276679992676, "learning_rate": 9.857073053168314e-06, "loss": 0.0466, "step": 41690 }, { "epoch": 0.3374059389918278, "grad_norm": 0.5859290957450867, "learning_rate": 9.85690538502827e-06, "loss": 0.057, "step": 41700 }, { "epoch": 0.3374868516870297, "grad_norm": 0.5988011360168457, "learning_rate": 9.856737620027504e-06, "loss": 0.0298, "step": 41710 }, { "epoch": 0.33756776438223157, "grad_norm": 0.6258929967880249, "learning_rate": 9.856569758169357e-06, "loss": 0.0433, "step": 41720 }, { "epoch": 0.33764867707743346, "grad_norm": 0.28770288825035095, "learning_rate": 9.856401799457179e-06, "loss": 0.0255, "step": 41730 }, { "epoch": 0.33772958977263534, "grad_norm": 0.8397381901741028, "learning_rate": 9.856233743894318e-06, "loss": 0.0355, "step": 41740 }, { "epoch": 0.3378105024678372, "grad_norm": 0.9103057384490967, "learning_rate": 9.85606559148413e-06, "loss": 0.0393, "step": 41750 }, { "epoch": 0.33789141516303905, "grad_norm": 0.6796306371688843, "learning_rate": 9.855897342229963e-06, "loss": 0.032, "step": 41760 }, { "epoch": 0.33797232785824094, "grad_norm": 0.5867382287979126, "learning_rate": 9.855728996135177e-06, "loss": 0.0391, "step": 41770 }, { "epoch": 0.3380532405534428, "grad_norm": 0.6837970614433289, "learning_rate": 9.855560553203125e-06, "loss": 0.0497, "step": 41780 }, { "epoch": 0.3381341532486447, "grad_norm": 0.45810848474502563, "learning_rate": 9.85539201343717e-06, "loss": 0.043, "step": 41790 }, { "epoch": 0.3382150659438466, "grad_norm": 0.47272375226020813, "learning_rate": 9.85522337684067e-06, "loss": 0.0387, "step": 41800 }, { "epoch": 0.3382959786390485, "grad_norm": 0.8854498863220215, "learning_rate": 9.85505464341699e-06, "loss": 0.0659, "step": 41810 }, { "epoch": 0.33837689133425036, "grad_norm": 0.5238831639289856, "learning_rate": 9.854885813169495e-06, "loss": 0.051, "step": 41820 }, { "epoch": 0.33845780402945225, "grad_norm": 0.2684612274169922, "learning_rate": 9.85471688610155e-06, "loss": 0.0428, "step": 41830 }, { "epoch": 0.3385387167246541, "grad_norm": 1.2209813594818115, "learning_rate": 9.854547862216528e-06, "loss": 0.0384, "step": 41840 }, { "epoch": 0.33861962941985596, "grad_norm": 0.31391027569770813, "learning_rate": 9.854378741517795e-06, "loss": 0.0214, "step": 41850 }, { "epoch": 0.33870054211505785, "grad_norm": 0.7119649052619934, "learning_rate": 9.85420952400873e-06, "loss": 0.034, "step": 41860 }, { "epoch": 0.33878145481025973, "grad_norm": 0.48848801851272583, "learning_rate": 9.8540402096927e-06, "loss": 0.0509, "step": 41870 }, { "epoch": 0.3388623675054616, "grad_norm": 0.2960766851902008, "learning_rate": 9.853870798573085e-06, "loss": 0.0418, "step": 41880 }, { "epoch": 0.3389432802006635, "grad_norm": 0.408867210149765, "learning_rate": 9.853701290653265e-06, "loss": 0.0521, "step": 41890 }, { "epoch": 0.3390241928958654, "grad_norm": 0.5588067173957825, "learning_rate": 9.85353168593662e-06, "loss": 0.0405, "step": 41900 }, { "epoch": 0.3391051055910672, "grad_norm": 0.9170119166374207, "learning_rate": 9.85336198442653e-06, "loss": 0.0499, "step": 41910 }, { "epoch": 0.3391860182862691, "grad_norm": 0.6032609939575195, "learning_rate": 9.853192186126383e-06, "loss": 0.054, "step": 41920 }, { "epoch": 0.339266930981471, "grad_norm": 0.649692952632904, "learning_rate": 9.853022291039562e-06, "loss": 0.0454, "step": 41930 }, { "epoch": 0.33934784367667287, "grad_norm": 0.5382974147796631, "learning_rate": 9.852852299169459e-06, "loss": 0.0553, "step": 41940 }, { "epoch": 0.33942875637187475, "grad_norm": 0.2322617471218109, "learning_rate": 9.85268221051946e-06, "loss": 0.0673, "step": 41950 }, { "epoch": 0.33950966906707664, "grad_norm": 0.6537007093429565, "learning_rate": 9.852512025092958e-06, "loss": 0.0431, "step": 41960 }, { "epoch": 0.3395905817622785, "grad_norm": 0.5263242721557617, "learning_rate": 9.85234174289335e-06, "loss": 0.0256, "step": 41970 }, { "epoch": 0.3396714944574804, "grad_norm": 0.38939034938812256, "learning_rate": 9.852171363924027e-06, "loss": 0.0412, "step": 41980 }, { "epoch": 0.33975240715268223, "grad_norm": 0.4377976953983307, "learning_rate": 9.85200088818839e-06, "loss": 0.0321, "step": 41990 }, { "epoch": 0.3398333198478841, "grad_norm": 0.3979913592338562, "learning_rate": 9.851830315689842e-06, "loss": 0.0466, "step": 42000 }, { "epoch": 0.339914232543086, "grad_norm": 1.0043050050735474, "learning_rate": 9.851659646431776e-06, "loss": 0.0574, "step": 42010 }, { "epoch": 0.3399951452382879, "grad_norm": 0.27554887533187866, "learning_rate": 9.851488880417603e-06, "loss": 0.0315, "step": 42020 }, { "epoch": 0.3400760579334898, "grad_norm": 0.7808628678321838, "learning_rate": 9.851318017650726e-06, "loss": 0.0346, "step": 42030 }, { "epoch": 0.34015697062869166, "grad_norm": 0.43125027418136597, "learning_rate": 9.851147058134554e-06, "loss": 0.0346, "step": 42040 }, { "epoch": 0.34023788332389354, "grad_norm": 1.01069176197052, "learning_rate": 9.850976001872494e-06, "loss": 0.0473, "step": 42050 }, { "epoch": 0.34031879601909537, "grad_norm": 0.726144552230835, "learning_rate": 9.850804848867958e-06, "loss": 0.0369, "step": 42060 }, { "epoch": 0.34039970871429726, "grad_norm": 0.6522103548049927, "learning_rate": 9.85063359912436e-06, "loss": 0.034, "step": 42070 }, { "epoch": 0.34048062140949914, "grad_norm": 0.4607544243335724, "learning_rate": 9.850462252645116e-06, "loss": 0.0478, "step": 42080 }, { "epoch": 0.340561534104701, "grad_norm": 0.6666350364685059, "learning_rate": 9.85029080943364e-06, "loss": 0.0346, "step": 42090 }, { "epoch": 0.3406424467999029, "grad_norm": 0.6325969696044922, "learning_rate": 9.850119269493355e-06, "loss": 0.0327, "step": 42100 }, { "epoch": 0.3407233594951048, "grad_norm": 0.5069460868835449, "learning_rate": 9.84994763282768e-06, "loss": 0.0502, "step": 42110 }, { "epoch": 0.3408042721903067, "grad_norm": 0.34872573614120483, "learning_rate": 9.849775899440038e-06, "loss": 0.0412, "step": 42120 }, { "epoch": 0.34088518488550856, "grad_norm": 0.7165973782539368, "learning_rate": 9.849604069333854e-06, "loss": 0.0282, "step": 42130 }, { "epoch": 0.3409660975807104, "grad_norm": 0.6070092916488647, "learning_rate": 9.849432142512555e-06, "loss": 0.0299, "step": 42140 }, { "epoch": 0.3410470102759123, "grad_norm": 0.9670928120613098, "learning_rate": 9.849260118979568e-06, "loss": 0.0649, "step": 42150 }, { "epoch": 0.34112792297111416, "grad_norm": 0.4118913412094116, "learning_rate": 9.849087998738328e-06, "loss": 0.0416, "step": 42160 }, { "epoch": 0.34120883566631605, "grad_norm": 0.37993931770324707, "learning_rate": 9.848915781792263e-06, "loss": 0.0453, "step": 42170 }, { "epoch": 0.34128974836151793, "grad_norm": 1.3224436044692993, "learning_rate": 9.84874346814481e-06, "loss": 0.0378, "step": 42180 }, { "epoch": 0.3413706610567198, "grad_norm": 0.71023029088974, "learning_rate": 9.848571057799404e-06, "loss": 0.0521, "step": 42190 }, { "epoch": 0.3414515737519217, "grad_norm": 1.050389051437378, "learning_rate": 9.848398550759484e-06, "loss": 0.0376, "step": 42200 }, { "epoch": 0.34153248644712353, "grad_norm": 0.6103419065475464, "learning_rate": 9.848225947028492e-06, "loss": 0.0423, "step": 42210 }, { "epoch": 0.3416133991423254, "grad_norm": 0.4695224463939667, "learning_rate": 9.848053246609866e-06, "loss": 0.0357, "step": 42220 }, { "epoch": 0.3416943118375273, "grad_norm": 0.447702020406723, "learning_rate": 9.847880449507054e-06, "loss": 0.0554, "step": 42230 }, { "epoch": 0.3417752245327292, "grad_norm": 0.1511971801519394, "learning_rate": 9.8477075557235e-06, "loss": 0.0393, "step": 42240 }, { "epoch": 0.34185613722793107, "grad_norm": 0.7422323226928711, "learning_rate": 9.847534565262653e-06, "loss": 0.0627, "step": 42250 }, { "epoch": 0.34193704992313295, "grad_norm": 0.5296444296836853, "learning_rate": 9.847361478127964e-06, "loss": 0.0394, "step": 42260 }, { "epoch": 0.34201796261833484, "grad_norm": 0.2342878133058548, "learning_rate": 9.847188294322882e-06, "loss": 0.0352, "step": 42270 }, { "epoch": 0.3420988753135367, "grad_norm": 0.5585190653800964, "learning_rate": 9.847015013850862e-06, "loss": 0.0558, "step": 42280 }, { "epoch": 0.34217978800873855, "grad_norm": 0.3491385281085968, "learning_rate": 9.846841636715362e-06, "loss": 0.0217, "step": 42290 }, { "epoch": 0.34226070070394043, "grad_norm": 0.6079928278923035, "learning_rate": 9.846668162919838e-06, "loss": 0.0462, "step": 42300 }, { "epoch": 0.3423416133991423, "grad_norm": 0.21673405170440674, "learning_rate": 9.846494592467747e-06, "loss": 0.033, "step": 42310 }, { "epoch": 0.3424225260943442, "grad_norm": 0.9894393086433411, "learning_rate": 9.846320925362553e-06, "loss": 0.0414, "step": 42320 }, { "epoch": 0.3425034387895461, "grad_norm": 0.27496787905693054, "learning_rate": 9.846147161607719e-06, "loss": 0.0257, "step": 42330 }, { "epoch": 0.342584351484748, "grad_norm": 0.7466420531272888, "learning_rate": 9.845973301206713e-06, "loss": 0.0365, "step": 42340 }, { "epoch": 0.34266526417994986, "grad_norm": 0.2543114125728607, "learning_rate": 9.845799344162999e-06, "loss": 0.0357, "step": 42350 }, { "epoch": 0.3427461768751517, "grad_norm": 0.32990774512290955, "learning_rate": 9.845625290480045e-06, "loss": 0.0598, "step": 42360 }, { "epoch": 0.34282708957035357, "grad_norm": 0.7589825987815857, "learning_rate": 9.845451140161327e-06, "loss": 0.0285, "step": 42370 }, { "epoch": 0.34290800226555546, "grad_norm": 0.6384523510932922, "learning_rate": 9.845276893210312e-06, "loss": 0.0357, "step": 42380 }, { "epoch": 0.34298891496075734, "grad_norm": 0.5920447707176208, "learning_rate": 9.84510254963048e-06, "loss": 0.0483, "step": 42390 }, { "epoch": 0.3430698276559592, "grad_norm": 0.3518002927303314, "learning_rate": 9.844928109425306e-06, "loss": 0.0301, "step": 42400 }, { "epoch": 0.3431507403511611, "grad_norm": 0.6184329986572266, "learning_rate": 9.84475357259827e-06, "loss": 0.0378, "step": 42410 }, { "epoch": 0.343231653046363, "grad_norm": 0.5905975699424744, "learning_rate": 9.84457893915285e-06, "loss": 0.0336, "step": 42420 }, { "epoch": 0.3433125657415649, "grad_norm": 0.6062129139900208, "learning_rate": 9.84440420909253e-06, "loss": 0.0527, "step": 42430 }, { "epoch": 0.3433934784367667, "grad_norm": 0.584540605545044, "learning_rate": 9.844229382420796e-06, "loss": 0.0338, "step": 42440 }, { "epoch": 0.3434743911319686, "grad_norm": 0.7050473093986511, "learning_rate": 9.844054459141133e-06, "loss": 0.0441, "step": 42450 }, { "epoch": 0.3435553038271705, "grad_norm": 0.4524550139904022, "learning_rate": 9.84387943925703e-06, "loss": 0.036, "step": 42460 }, { "epoch": 0.34363621652237236, "grad_norm": 0.5820666551589966, "learning_rate": 9.84370432277198e-06, "loss": 0.0366, "step": 42470 }, { "epoch": 0.34371712921757425, "grad_norm": 0.45737582445144653, "learning_rate": 9.84352910968947e-06, "loss": 0.049, "step": 42480 }, { "epoch": 0.34379804191277613, "grad_norm": 0.7728270292282104, "learning_rate": 9.843353800012997e-06, "loss": 0.0494, "step": 42490 }, { "epoch": 0.343878954607978, "grad_norm": 0.5271169543266296, "learning_rate": 9.84317839374606e-06, "loss": 0.0488, "step": 42500 }, { "epoch": 0.34395986730317984, "grad_norm": 0.587958037853241, "learning_rate": 9.843002890892153e-06, "loss": 0.0293, "step": 42510 }, { "epoch": 0.34404077999838173, "grad_norm": 0.483997106552124, "learning_rate": 9.842827291454776e-06, "loss": 0.0298, "step": 42520 }, { "epoch": 0.3441216926935836, "grad_norm": 0.7147597670555115, "learning_rate": 9.842651595437434e-06, "loss": 0.0404, "step": 42530 }, { "epoch": 0.3442026053887855, "grad_norm": 0.4297192096710205, "learning_rate": 9.842475802843627e-06, "loss": 0.0359, "step": 42540 }, { "epoch": 0.3442835180839874, "grad_norm": 0.9190924763679504, "learning_rate": 9.842299913676865e-06, "loss": 0.038, "step": 42550 }, { "epoch": 0.34436443077918927, "grad_norm": 0.6276183128356934, "learning_rate": 9.842123927940655e-06, "loss": 0.0557, "step": 42560 }, { "epoch": 0.34444534347439115, "grad_norm": 0.8917182683944702, "learning_rate": 9.841947845638502e-06, "loss": 0.0456, "step": 42570 }, { "epoch": 0.344526256169593, "grad_norm": 0.41615304350852966, "learning_rate": 9.841771666773923e-06, "loss": 0.0657, "step": 42580 }, { "epoch": 0.34460716886479487, "grad_norm": 0.4856719970703125, "learning_rate": 9.841595391350428e-06, "loss": 0.0567, "step": 42590 }, { "epoch": 0.34468808155999675, "grad_norm": 0.7543234825134277, "learning_rate": 9.841419019371536e-06, "loss": 0.0501, "step": 42600 }, { "epoch": 0.34476899425519864, "grad_norm": 0.37567785382270813, "learning_rate": 9.841242550840762e-06, "loss": 0.038, "step": 42610 }, { "epoch": 0.3448499069504005, "grad_norm": 0.13993442058563232, "learning_rate": 9.841065985761627e-06, "loss": 0.0408, "step": 42620 }, { "epoch": 0.3449308196456024, "grad_norm": 0.4733227491378784, "learning_rate": 9.840889324137648e-06, "loss": 0.0472, "step": 42630 }, { "epoch": 0.3450117323408043, "grad_norm": 0.40230792760849, "learning_rate": 9.840712565972352e-06, "loss": 0.0389, "step": 42640 }, { "epoch": 0.3450926450360062, "grad_norm": 0.8219269514083862, "learning_rate": 9.840535711269264e-06, "loss": 0.0434, "step": 42650 }, { "epoch": 0.345173557731208, "grad_norm": 0.5444205403327942, "learning_rate": 9.840358760031908e-06, "loss": 0.0517, "step": 42660 }, { "epoch": 0.3452544704264099, "grad_norm": 0.6595284342765808, "learning_rate": 9.840181712263816e-06, "loss": 0.047, "step": 42670 }, { "epoch": 0.34533538312161177, "grad_norm": 0.8119261860847473, "learning_rate": 9.840004567968519e-06, "loss": 0.0301, "step": 42680 }, { "epoch": 0.34541629581681366, "grad_norm": 0.5622915625572205, "learning_rate": 9.839827327149548e-06, "loss": 0.0677, "step": 42690 }, { "epoch": 0.34549720851201554, "grad_norm": 0.26876115798950195, "learning_rate": 9.839649989810437e-06, "loss": 0.0353, "step": 42700 }, { "epoch": 0.3455781212072174, "grad_norm": 0.490407794713974, "learning_rate": 9.839472555954725e-06, "loss": 0.0389, "step": 42710 }, { "epoch": 0.3456590339024193, "grad_norm": 0.4607655107975006, "learning_rate": 9.839295025585949e-06, "loss": 0.0329, "step": 42720 }, { "epoch": 0.34573994659762114, "grad_norm": 0.6149912476539612, "learning_rate": 9.839117398707651e-06, "loss": 0.0449, "step": 42730 }, { "epoch": 0.345820859292823, "grad_norm": 0.7077575922012329, "learning_rate": 9.838939675323372e-06, "loss": 0.0469, "step": 42740 }, { "epoch": 0.3459017719880249, "grad_norm": 0.7131140232086182, "learning_rate": 9.838761855436656e-06, "loss": 0.0286, "step": 42750 }, { "epoch": 0.3459826846832268, "grad_norm": 0.32168230414390564, "learning_rate": 9.83858393905105e-06, "loss": 0.0294, "step": 42760 }, { "epoch": 0.3460635973784287, "grad_norm": 0.521207869052887, "learning_rate": 9.838405926170101e-06, "loss": 0.0272, "step": 42770 }, { "epoch": 0.34614451007363056, "grad_norm": 0.5872771143913269, "learning_rate": 9.838227816797361e-06, "loss": 0.0545, "step": 42780 }, { "epoch": 0.34622542276883245, "grad_norm": 0.4645479917526245, "learning_rate": 9.838049610936383e-06, "loss": 0.0456, "step": 42790 }, { "epoch": 0.34630633546403433, "grad_norm": 0.5735189914703369, "learning_rate": 9.837871308590717e-06, "loss": 0.0537, "step": 42800 }, { "epoch": 0.34638724815923616, "grad_norm": 0.8212530612945557, "learning_rate": 9.837692909763922e-06, "loss": 0.0465, "step": 42810 }, { "epoch": 0.34646816085443805, "grad_norm": 0.7683247923851013, "learning_rate": 9.837514414459552e-06, "loss": 0.0327, "step": 42820 }, { "epoch": 0.34654907354963993, "grad_norm": 0.19265298545360565, "learning_rate": 9.837335822681172e-06, "loss": 0.0265, "step": 42830 }, { "epoch": 0.3466299862448418, "grad_norm": 0.41121816635131836, "learning_rate": 9.837157134432342e-06, "loss": 0.0309, "step": 42840 }, { "epoch": 0.3467108989400437, "grad_norm": 0.29005566239356995, "learning_rate": 9.836978349716621e-06, "loss": 0.0325, "step": 42850 }, { "epoch": 0.3467918116352456, "grad_norm": 0.553957998752594, "learning_rate": 9.836799468537582e-06, "loss": 0.0371, "step": 42860 }, { "epoch": 0.34687272433044747, "grad_norm": 0.4372183680534363, "learning_rate": 9.836620490898788e-06, "loss": 0.0386, "step": 42870 }, { "epoch": 0.3469536370256493, "grad_norm": 0.1524963527917862, "learning_rate": 9.836441416803805e-06, "loss": 0.0297, "step": 42880 }, { "epoch": 0.3470345497208512, "grad_norm": 0.49302154779434204, "learning_rate": 9.836262246256212e-06, "loss": 0.0371, "step": 42890 }, { "epoch": 0.34711546241605307, "grad_norm": 0.44310322403907776, "learning_rate": 9.836082979259577e-06, "loss": 0.0315, "step": 42900 }, { "epoch": 0.34719637511125495, "grad_norm": 1.1082065105438232, "learning_rate": 9.835903615817478e-06, "loss": 0.0359, "step": 42910 }, { "epoch": 0.34727728780645684, "grad_norm": 0.6912885308265686, "learning_rate": 9.835724155933489e-06, "loss": 0.0437, "step": 42920 }, { "epoch": 0.3473582005016587, "grad_norm": 0.7610980868339539, "learning_rate": 9.835544599611189e-06, "loss": 0.0477, "step": 42930 }, { "epoch": 0.3474391131968606, "grad_norm": 0.6106807589530945, "learning_rate": 9.835364946854163e-06, "loss": 0.043, "step": 42940 }, { "epoch": 0.3475200258920625, "grad_norm": 0.618465006351471, "learning_rate": 9.83518519766599e-06, "loss": 0.0412, "step": 42950 }, { "epoch": 0.3476009385872643, "grad_norm": 0.6708666086196899, "learning_rate": 9.835005352050256e-06, "loss": 0.0403, "step": 42960 }, { "epoch": 0.3476818512824662, "grad_norm": 0.8384682536125183, "learning_rate": 9.834825410010546e-06, "loss": 0.0494, "step": 42970 }, { "epoch": 0.3477627639776681, "grad_norm": 0.8127650618553162, "learning_rate": 9.83464537155045e-06, "loss": 0.0403, "step": 42980 }, { "epoch": 0.34784367667287, "grad_norm": 0.06894348561763763, "learning_rate": 9.83446523667356e-06, "loss": 0.0514, "step": 42990 }, { "epoch": 0.34792458936807186, "grad_norm": 0.6339892745018005, "learning_rate": 9.834285005383466e-06, "loss": 0.0354, "step": 43000 }, { "epoch": 0.34800550206327374, "grad_norm": 0.5165516138076782, "learning_rate": 9.834104677683763e-06, "loss": 0.0443, "step": 43010 }, { "epoch": 0.3480864147584756, "grad_norm": 0.5816275477409363, "learning_rate": 9.833924253578049e-06, "loss": 0.0451, "step": 43020 }, { "epoch": 0.34816732745367746, "grad_norm": 0.8482062220573425, "learning_rate": 9.833743733069919e-06, "loss": 0.0457, "step": 43030 }, { "epoch": 0.34824824014887934, "grad_norm": 1.0511101484298706, "learning_rate": 9.833563116162975e-06, "loss": 0.0367, "step": 43040 }, { "epoch": 0.3483291528440812, "grad_norm": 0.4555600583553314, "learning_rate": 9.83338240286082e-06, "loss": 0.0499, "step": 43050 }, { "epoch": 0.3484100655392831, "grad_norm": 0.6355628371238708, "learning_rate": 9.833201593167055e-06, "loss": 0.0335, "step": 43060 }, { "epoch": 0.348490978234485, "grad_norm": 0.5368601083755493, "learning_rate": 9.833020687085288e-06, "loss": 0.0275, "step": 43070 }, { "epoch": 0.3485718909296869, "grad_norm": 0.48649531602859497, "learning_rate": 9.832839684619127e-06, "loss": 0.0356, "step": 43080 }, { "epoch": 0.34865280362488876, "grad_norm": 0.24281258881092072, "learning_rate": 9.83265858577218e-06, "loss": 0.0355, "step": 43090 }, { "epoch": 0.34873371632009065, "grad_norm": 0.5787615776062012, "learning_rate": 9.83247739054806e-06, "loss": 0.044, "step": 43100 }, { "epoch": 0.3488146290152925, "grad_norm": 1.3026026487350464, "learning_rate": 9.832296098950382e-06, "loss": 0.061, "step": 43110 }, { "epoch": 0.34889554171049436, "grad_norm": 1.0110872983932495, "learning_rate": 9.832114710982758e-06, "loss": 0.0584, "step": 43120 }, { "epoch": 0.34897645440569625, "grad_norm": 0.5906724333763123, "learning_rate": 9.831933226648807e-06, "loss": 0.0425, "step": 43130 }, { "epoch": 0.34905736710089813, "grad_norm": 0.45685675740242004, "learning_rate": 9.83175164595215e-06, "loss": 0.0512, "step": 43140 }, { "epoch": 0.3491382797961, "grad_norm": 0.2666657269001007, "learning_rate": 9.831569968896403e-06, "loss": 0.0341, "step": 43150 }, { "epoch": 0.3492191924913019, "grad_norm": 0.5247259736061096, "learning_rate": 9.831388195485197e-06, "loss": 0.0365, "step": 43160 }, { "epoch": 0.3493001051865038, "grad_norm": 0.38508185744285583, "learning_rate": 9.83120632572215e-06, "loss": 0.0309, "step": 43170 }, { "epoch": 0.3493810178817056, "grad_norm": 0.5267761945724487, "learning_rate": 9.831024359610894e-06, "loss": 0.027, "step": 43180 }, { "epoch": 0.3494619305769075, "grad_norm": 0.7085078954696655, "learning_rate": 9.830842297155056e-06, "loss": 0.0391, "step": 43190 }, { "epoch": 0.3495428432721094, "grad_norm": 0.6348860859870911, "learning_rate": 9.830660138358266e-06, "loss": 0.0279, "step": 43200 }, { "epoch": 0.34962375596731127, "grad_norm": 0.5852175951004028, "learning_rate": 9.830477883224158e-06, "loss": 0.0358, "step": 43210 }, { "epoch": 0.34970466866251315, "grad_norm": 0.6503375172615051, "learning_rate": 9.830295531756365e-06, "loss": 0.0445, "step": 43220 }, { "epoch": 0.34978558135771504, "grad_norm": 0.7150776982307434, "learning_rate": 9.830113083958526e-06, "loss": 0.041, "step": 43230 }, { "epoch": 0.3498664940529169, "grad_norm": 0.7934958338737488, "learning_rate": 9.829930539834278e-06, "loss": 0.0521, "step": 43240 }, { "epoch": 0.3499474067481188, "grad_norm": 0.7846459746360779, "learning_rate": 9.829747899387261e-06, "loss": 0.037, "step": 43250 }, { "epoch": 0.35002831944332063, "grad_norm": 0.4486561119556427, "learning_rate": 9.82956516262112e-06, "loss": 0.0407, "step": 43260 }, { "epoch": 0.3501092321385225, "grad_norm": 0.7693971395492554, "learning_rate": 9.829382329539496e-06, "loss": 0.0442, "step": 43270 }, { "epoch": 0.3501901448337244, "grad_norm": 0.42526623606681824, "learning_rate": 9.829199400146038e-06, "loss": 0.0342, "step": 43280 }, { "epoch": 0.3502710575289263, "grad_norm": 0.5208532810211182, "learning_rate": 9.829016374444391e-06, "loss": 0.0453, "step": 43290 }, { "epoch": 0.3503519702241282, "grad_norm": 0.5809473991394043, "learning_rate": 9.828833252438207e-06, "loss": 0.0527, "step": 43300 }, { "epoch": 0.35043288291933006, "grad_norm": 0.6922796368598938, "learning_rate": 9.82865003413114e-06, "loss": 0.0388, "step": 43310 }, { "epoch": 0.35051379561453194, "grad_norm": 0.8714355826377869, "learning_rate": 9.828466719526841e-06, "loss": 0.0404, "step": 43320 }, { "epoch": 0.35059470830973377, "grad_norm": 0.207795187830925, "learning_rate": 9.828283308628966e-06, "loss": 0.0378, "step": 43330 }, { "epoch": 0.35067562100493566, "grad_norm": 0.49729591608047485, "learning_rate": 9.828099801441172e-06, "loss": 0.0571, "step": 43340 }, { "epoch": 0.35075653370013754, "grad_norm": 0.5055277347564697, "learning_rate": 9.827916197967124e-06, "loss": 0.052, "step": 43350 }, { "epoch": 0.3508374463953394, "grad_norm": 0.5770956873893738, "learning_rate": 9.827732498210477e-06, "loss": 0.0433, "step": 43360 }, { "epoch": 0.3509183590905413, "grad_norm": 0.5137915015220642, "learning_rate": 9.827548702174897e-06, "loss": 0.0316, "step": 43370 }, { "epoch": 0.3509992717857432, "grad_norm": 0.5252555012702942, "learning_rate": 9.82736480986405e-06, "loss": 0.036, "step": 43380 }, { "epoch": 0.3510801844809451, "grad_norm": 1.0346300601959229, "learning_rate": 9.827180821281604e-06, "loss": 0.0456, "step": 43390 }, { "epoch": 0.35116109717614696, "grad_norm": 0.3694866895675659, "learning_rate": 9.826996736431225e-06, "loss": 0.041, "step": 43400 }, { "epoch": 0.3512420098713488, "grad_norm": 0.5435991883277893, "learning_rate": 9.826812555316587e-06, "loss": 0.0507, "step": 43410 }, { "epoch": 0.3513229225665507, "grad_norm": 0.9269537925720215, "learning_rate": 9.826628277941363e-06, "loss": 0.026, "step": 43420 }, { "epoch": 0.35140383526175256, "grad_norm": 0.8975319862365723, "learning_rate": 9.826443904309228e-06, "loss": 0.0309, "step": 43430 }, { "epoch": 0.35148474795695445, "grad_norm": 0.6740427017211914, "learning_rate": 9.826259434423856e-06, "loss": 0.042, "step": 43440 }, { "epoch": 0.35156566065215633, "grad_norm": 1.222548484802246, "learning_rate": 9.82607486828893e-06, "loss": 0.0416, "step": 43450 }, { "epoch": 0.3516465733473582, "grad_norm": 0.5414016842842102, "learning_rate": 9.82589020590813e-06, "loss": 0.0357, "step": 43460 }, { "epoch": 0.3517274860425601, "grad_norm": 0.3478740453720093, "learning_rate": 9.825705447285136e-06, "loss": 0.0753, "step": 43470 }, { "epoch": 0.35180839873776193, "grad_norm": 0.3850070834159851, "learning_rate": 9.825520592423636e-06, "loss": 0.0346, "step": 43480 }, { "epoch": 0.3518893114329638, "grad_norm": 0.8804875612258911, "learning_rate": 9.825335641327314e-06, "loss": 0.0484, "step": 43490 }, { "epoch": 0.3519702241281657, "grad_norm": 0.5054288506507874, "learning_rate": 9.825150593999859e-06, "loss": 0.0373, "step": 43500 }, { "epoch": 0.3520511368233676, "grad_norm": 0.2987825572490692, "learning_rate": 9.824965450444961e-06, "loss": 0.0398, "step": 43510 }, { "epoch": 0.35213204951856947, "grad_norm": 0.5797520279884338, "learning_rate": 9.824780210666316e-06, "loss": 0.0488, "step": 43520 }, { "epoch": 0.35221296221377135, "grad_norm": 0.44523635506629944, "learning_rate": 9.824594874667612e-06, "loss": 0.0367, "step": 43530 }, { "epoch": 0.35229387490897324, "grad_norm": 0.803234875202179, "learning_rate": 9.82440944245255e-06, "loss": 0.0324, "step": 43540 }, { "epoch": 0.3523747876041751, "grad_norm": 2.172307252883911, "learning_rate": 9.824223914024827e-06, "loss": 0.0309, "step": 43550 }, { "epoch": 0.35245570029937695, "grad_norm": 0.440303236246109, "learning_rate": 9.82403828938814e-06, "loss": 0.0322, "step": 43560 }, { "epoch": 0.35253661299457884, "grad_norm": 0.829168975353241, "learning_rate": 9.823852568546196e-06, "loss": 0.0457, "step": 43570 }, { "epoch": 0.3526175256897807, "grad_norm": 0.9881272315979004, "learning_rate": 9.823666751502697e-06, "loss": 0.0432, "step": 43580 }, { "epoch": 0.3526984383849826, "grad_norm": 0.38629546761512756, "learning_rate": 9.823480838261344e-06, "loss": 0.0344, "step": 43590 }, { "epoch": 0.3527793510801845, "grad_norm": 0.41554561257362366, "learning_rate": 9.823294828825852e-06, "loss": 0.0497, "step": 43600 }, { "epoch": 0.3528602637753864, "grad_norm": 0.8106483817100525, "learning_rate": 9.823108723199925e-06, "loss": 0.0437, "step": 43610 }, { "epoch": 0.35294117647058826, "grad_norm": 0.2787083387374878, "learning_rate": 9.822922521387277e-06, "loss": 0.059, "step": 43620 }, { "epoch": 0.3530220891657901, "grad_norm": 0.30792784690856934, "learning_rate": 9.82273622339162e-06, "loss": 0.0345, "step": 43630 }, { "epoch": 0.35310300186099197, "grad_norm": 0.4489777386188507, "learning_rate": 9.822549829216672e-06, "loss": 0.0444, "step": 43640 }, { "epoch": 0.35318391455619386, "grad_norm": 0.48534664511680603, "learning_rate": 9.822363338866147e-06, "loss": 0.037, "step": 43650 }, { "epoch": 0.35326482725139574, "grad_norm": 0.6017678380012512, "learning_rate": 9.822176752343768e-06, "loss": 0.0366, "step": 43660 }, { "epoch": 0.3533457399465976, "grad_norm": 0.8945099115371704, "learning_rate": 9.821990069653252e-06, "loss": 0.0433, "step": 43670 }, { "epoch": 0.3534266526417995, "grad_norm": 0.3894028067588806, "learning_rate": 9.821803290798323e-06, "loss": 0.0497, "step": 43680 }, { "epoch": 0.3535075653370014, "grad_norm": 0.41859349608421326, "learning_rate": 9.821616415782708e-06, "loss": 0.0432, "step": 43690 }, { "epoch": 0.3535884780322033, "grad_norm": 0.37914979457855225, "learning_rate": 9.821429444610132e-06, "loss": 0.04, "step": 43700 }, { "epoch": 0.3536693907274051, "grad_norm": 0.6013879179954529, "learning_rate": 9.821242377284323e-06, "loss": 0.0372, "step": 43710 }, { "epoch": 0.353750303422607, "grad_norm": 0.7518709301948547, "learning_rate": 9.821055213809015e-06, "loss": 0.0368, "step": 43720 }, { "epoch": 0.3538312161178089, "grad_norm": 0.9405959844589233, "learning_rate": 9.820867954187939e-06, "loss": 0.0477, "step": 43730 }, { "epoch": 0.35391212881301076, "grad_norm": 0.6778365969657898, "learning_rate": 9.820680598424826e-06, "loss": 0.0369, "step": 43740 }, { "epoch": 0.35399304150821265, "grad_norm": 0.6508309841156006, "learning_rate": 9.820493146523418e-06, "loss": 0.0403, "step": 43750 }, { "epoch": 0.35407395420341453, "grad_norm": 1.1814619302749634, "learning_rate": 9.82030559848745e-06, "loss": 0.0503, "step": 43760 }, { "epoch": 0.3541548668986164, "grad_norm": 0.37344637513160706, "learning_rate": 9.820117954320662e-06, "loss": 0.0365, "step": 43770 }, { "epoch": 0.35423577959381825, "grad_norm": 0.7624437212944031, "learning_rate": 9.819930214026799e-06, "loss": 0.0485, "step": 43780 }, { "epoch": 0.35431669228902013, "grad_norm": 1.0671796798706055, "learning_rate": 9.8197423776096e-06, "loss": 0.0586, "step": 43790 }, { "epoch": 0.354397604984222, "grad_norm": 0.9292616248130798, "learning_rate": 9.819554445072816e-06, "loss": 0.0359, "step": 43800 }, { "epoch": 0.3544785176794239, "grad_norm": 0.5390177369117737, "learning_rate": 9.819366416420192e-06, "loss": 0.0461, "step": 43810 }, { "epoch": 0.3545594303746258, "grad_norm": 0.5351483225822449, "learning_rate": 9.819178291655482e-06, "loss": 0.0402, "step": 43820 }, { "epoch": 0.35464034306982767, "grad_norm": 0.5205219984054565, "learning_rate": 9.818990070782433e-06, "loss": 0.0571, "step": 43830 }, { "epoch": 0.35472125576502955, "grad_norm": 0.30150943994522095, "learning_rate": 9.8188017538048e-06, "loss": 0.0354, "step": 43840 }, { "epoch": 0.35480216846023144, "grad_norm": 0.1501692533493042, "learning_rate": 9.81861334072634e-06, "loss": 0.0374, "step": 43850 }, { "epoch": 0.35488308115543327, "grad_norm": 0.5588148236274719, "learning_rate": 9.818424831550808e-06, "loss": 0.0284, "step": 43860 }, { "epoch": 0.35496399385063515, "grad_norm": 0.48111996054649353, "learning_rate": 9.818236226281966e-06, "loss": 0.0518, "step": 43870 }, { "epoch": 0.35504490654583704, "grad_norm": 0.2904268503189087, "learning_rate": 9.818047524923573e-06, "loss": 0.0486, "step": 43880 }, { "epoch": 0.3551258192410389, "grad_norm": 0.2876165509223938, "learning_rate": 9.817858727479395e-06, "loss": 0.0362, "step": 43890 }, { "epoch": 0.3552067319362408, "grad_norm": 0.9422343373298645, "learning_rate": 9.817669833953194e-06, "loss": 0.0409, "step": 43900 }, { "epoch": 0.3552876446314427, "grad_norm": 0.5904098749160767, "learning_rate": 9.817480844348741e-06, "loss": 0.0358, "step": 43910 }, { "epoch": 0.3553685573266446, "grad_norm": 0.5049382448196411, "learning_rate": 9.8172917586698e-06, "loss": 0.0271, "step": 43920 }, { "epoch": 0.3554494700218464, "grad_norm": 0.6242696046829224, "learning_rate": 9.817102576920146e-06, "loss": 0.0358, "step": 43930 }, { "epoch": 0.3555303827170483, "grad_norm": 0.7697364687919617, "learning_rate": 9.81691329910355e-06, "loss": 0.0384, "step": 43940 }, { "epoch": 0.3556112954122502, "grad_norm": 0.5555935502052307, "learning_rate": 9.816723925223788e-06, "loss": 0.0367, "step": 43950 }, { "epoch": 0.35569220810745206, "grad_norm": 0.5519618988037109, "learning_rate": 9.816534455284637e-06, "loss": 0.0385, "step": 43960 }, { "epoch": 0.35577312080265394, "grad_norm": 0.6407132744789124, "learning_rate": 9.816344889289872e-06, "loss": 0.0552, "step": 43970 }, { "epoch": 0.3558540334978558, "grad_norm": 0.6308128237724304, "learning_rate": 9.816155227243278e-06, "loss": 0.0408, "step": 43980 }, { "epoch": 0.3559349461930577, "grad_norm": 0.5593422651290894, "learning_rate": 9.815965469148634e-06, "loss": 0.045, "step": 43990 }, { "epoch": 0.3560158588882596, "grad_norm": 0.6609705686569214, "learning_rate": 9.815775615009727e-06, "loss": 0.0508, "step": 44000 }, { "epoch": 0.3560967715834614, "grad_norm": 0.8130130767822266, "learning_rate": 9.81558566483034e-06, "loss": 0.0428, "step": 44010 }, { "epoch": 0.3561776842786633, "grad_norm": 0.36982864141464233, "learning_rate": 9.815395618614262e-06, "loss": 0.0491, "step": 44020 }, { "epoch": 0.3562585969738652, "grad_norm": 0.42128974199295044, "learning_rate": 9.815205476365288e-06, "loss": 0.0388, "step": 44030 }, { "epoch": 0.3563395096690671, "grad_norm": 0.3330642879009247, "learning_rate": 9.815015238087203e-06, "loss": 0.0483, "step": 44040 }, { "epoch": 0.35642042236426896, "grad_norm": 0.53367680311203, "learning_rate": 9.814824903783806e-06, "loss": 0.0609, "step": 44050 }, { "epoch": 0.35650133505947085, "grad_norm": 0.4387195408344269, "learning_rate": 9.814634473458889e-06, "loss": 0.0399, "step": 44060 }, { "epoch": 0.35658224775467273, "grad_norm": 0.49239131808280945, "learning_rate": 9.814443947116253e-06, "loss": 0.0304, "step": 44070 }, { "epoch": 0.35666316044987456, "grad_norm": 0.2914224863052368, "learning_rate": 9.814253324759694e-06, "loss": 0.0526, "step": 44080 }, { "epoch": 0.35674407314507645, "grad_norm": 0.27576637268066406, "learning_rate": 9.81406260639302e-06, "loss": 0.0389, "step": 44090 }, { "epoch": 0.35682498584027833, "grad_norm": 0.31023702025413513, "learning_rate": 9.813871792020026e-06, "loss": 0.0506, "step": 44100 }, { "epoch": 0.3569058985354802, "grad_norm": 0.5809418559074402, "learning_rate": 9.813680881644525e-06, "loss": 0.0345, "step": 44110 }, { "epoch": 0.3569868112306821, "grad_norm": 0.2529957592487335, "learning_rate": 9.813489875270319e-06, "loss": 0.033, "step": 44120 }, { "epoch": 0.357067723925884, "grad_norm": 0.5150800347328186, "learning_rate": 9.81329877290122e-06, "loss": 0.0341, "step": 44130 }, { "epoch": 0.35714863662108587, "grad_norm": 1.1417255401611328, "learning_rate": 9.813107574541037e-06, "loss": 0.0647, "step": 44140 }, { "epoch": 0.3572295493162877, "grad_norm": 1.099076509475708, "learning_rate": 9.812916280193585e-06, "loss": 0.0428, "step": 44150 }, { "epoch": 0.3573104620114896, "grad_norm": 0.26423346996307373, "learning_rate": 9.81272488986268e-06, "loss": 0.0342, "step": 44160 }, { "epoch": 0.35739137470669147, "grad_norm": 0.480620801448822, "learning_rate": 9.812533403552134e-06, "loss": 0.0371, "step": 44170 }, { "epoch": 0.35747228740189335, "grad_norm": 0.46205201745033264, "learning_rate": 9.812341821265771e-06, "loss": 0.0548, "step": 44180 }, { "epoch": 0.35755320009709524, "grad_norm": 0.534390926361084, "learning_rate": 9.81215014300741e-06, "loss": 0.0391, "step": 44190 }, { "epoch": 0.3576341127922971, "grad_norm": 1.3928042650222778, "learning_rate": 9.811958368780873e-06, "loss": 0.0522, "step": 44200 }, { "epoch": 0.357715025487499, "grad_norm": 0.577026903629303, "learning_rate": 9.811766498589985e-06, "loss": 0.0473, "step": 44210 }, { "epoch": 0.3577959381827009, "grad_norm": 0.9558101296424866, "learning_rate": 9.81157453243857e-06, "loss": 0.0622, "step": 44220 }, { "epoch": 0.3578768508779027, "grad_norm": 0.34089672565460205, "learning_rate": 9.811382470330461e-06, "loss": 0.0281, "step": 44230 }, { "epoch": 0.3579577635731046, "grad_norm": 0.4940060079097748, "learning_rate": 9.811190312269485e-06, "loss": 0.0326, "step": 44240 }, { "epoch": 0.3580386762683065, "grad_norm": 0.5773476958274841, "learning_rate": 9.810998058259476e-06, "loss": 0.0346, "step": 44250 }, { "epoch": 0.3581195889635084, "grad_norm": 0.35404330492019653, "learning_rate": 9.810805708304268e-06, "loss": 0.035, "step": 44260 }, { "epoch": 0.35820050165871026, "grad_norm": 0.421905517578125, "learning_rate": 9.810613262407693e-06, "loss": 0.0334, "step": 44270 }, { "epoch": 0.35828141435391214, "grad_norm": 0.29125866293907166, "learning_rate": 9.810420720573595e-06, "loss": 0.0257, "step": 44280 }, { "epoch": 0.358362327049114, "grad_norm": 0.704045295715332, "learning_rate": 9.81022808280581e-06, "loss": 0.0608, "step": 44290 }, { "epoch": 0.35844323974431586, "grad_norm": 0.5268750190734863, "learning_rate": 9.81003534910818e-06, "loss": 0.0635, "step": 44300 }, { "epoch": 0.35852415243951774, "grad_norm": 0.6513175368309021, "learning_rate": 9.80984251948455e-06, "loss": 0.0516, "step": 44310 }, { "epoch": 0.3586050651347196, "grad_norm": 0.7058141827583313, "learning_rate": 9.809649593938765e-06, "loss": 0.046, "step": 44320 }, { "epoch": 0.3586859778299215, "grad_norm": 0.30910229682922363, "learning_rate": 9.809456572474672e-06, "loss": 0.0295, "step": 44330 }, { "epoch": 0.3587668905251234, "grad_norm": 0.557197093963623, "learning_rate": 9.809263455096122e-06, "loss": 0.0483, "step": 44340 }, { "epoch": 0.3588478032203253, "grad_norm": 0.46977880597114563, "learning_rate": 9.809070241806965e-06, "loss": 0.0506, "step": 44350 }, { "epoch": 0.35892871591552716, "grad_norm": 0.23528867959976196, "learning_rate": 9.808876932611054e-06, "loss": 0.0436, "step": 44360 }, { "epoch": 0.35900962861072905, "grad_norm": 0.38044705986976624, "learning_rate": 9.808683527512243e-06, "loss": 0.0306, "step": 44370 }, { "epoch": 0.3590905413059309, "grad_norm": 0.6654831171035767, "learning_rate": 9.808490026514393e-06, "loss": 0.0426, "step": 44380 }, { "epoch": 0.35917145400113276, "grad_norm": 0.7625883221626282, "learning_rate": 9.80829642962136e-06, "loss": 0.0382, "step": 44390 }, { "epoch": 0.35925236669633465, "grad_norm": 0.636053204536438, "learning_rate": 9.808102736837006e-06, "loss": 0.0406, "step": 44400 }, { "epoch": 0.35933327939153653, "grad_norm": 0.43833231925964355, "learning_rate": 9.807908948165193e-06, "loss": 0.0308, "step": 44410 }, { "epoch": 0.3594141920867384, "grad_norm": 0.7493987679481506, "learning_rate": 9.807715063609784e-06, "loss": 0.0591, "step": 44420 }, { "epoch": 0.3594951047819403, "grad_norm": 0.8131035566329956, "learning_rate": 9.807521083174649e-06, "loss": 0.0354, "step": 44430 }, { "epoch": 0.3595760174771422, "grad_norm": 0.2782813012599945, "learning_rate": 9.807327006863656e-06, "loss": 0.0372, "step": 44440 }, { "epoch": 0.359656930172344, "grad_norm": 0.528793454170227, "learning_rate": 9.807132834680673e-06, "loss": 0.0315, "step": 44450 }, { "epoch": 0.3597378428675459, "grad_norm": 0.2811039984226227, "learning_rate": 9.806938566629576e-06, "loss": 0.0381, "step": 44460 }, { "epoch": 0.3598187555627478, "grad_norm": 0.6460373997688293, "learning_rate": 9.806744202714235e-06, "loss": 0.0326, "step": 44470 }, { "epoch": 0.35989966825794967, "grad_norm": 0.06854267418384552, "learning_rate": 9.80654974293853e-06, "loss": 0.0523, "step": 44480 }, { "epoch": 0.35998058095315155, "grad_norm": 1.406319499015808, "learning_rate": 9.806355187306335e-06, "loss": 0.0404, "step": 44490 }, { "epoch": 0.36006149364835344, "grad_norm": 0.5030149221420288, "learning_rate": 9.806160535821535e-06, "loss": 0.057, "step": 44500 }, { "epoch": 0.3601424063435553, "grad_norm": 0.3331298232078552, "learning_rate": 9.805965788488008e-06, "loss": 0.0309, "step": 44510 }, { "epoch": 0.3602233190387572, "grad_norm": 0.5512061715126038, "learning_rate": 9.80577094530964e-06, "loss": 0.0365, "step": 44520 }, { "epoch": 0.36030423173395904, "grad_norm": 0.7127638459205627, "learning_rate": 9.805576006290316e-06, "loss": 0.0535, "step": 44530 }, { "epoch": 0.3603851444291609, "grad_norm": 0.5839712619781494, "learning_rate": 9.805380971433923e-06, "loss": 0.0426, "step": 44540 }, { "epoch": 0.3604660571243628, "grad_norm": 0.6748013496398926, "learning_rate": 9.80518584074435e-06, "loss": 0.0435, "step": 44550 }, { "epoch": 0.3605469698195647, "grad_norm": 0.4775528907775879, "learning_rate": 9.804990614225492e-06, "loss": 0.039, "step": 44560 }, { "epoch": 0.3606278825147666, "grad_norm": 0.5295855402946472, "learning_rate": 9.80479529188124e-06, "loss": 0.0429, "step": 44570 }, { "epoch": 0.36070879520996846, "grad_norm": 0.5295893549919128, "learning_rate": 9.804599873715488e-06, "loss": 0.0428, "step": 44580 }, { "epoch": 0.36078970790517034, "grad_norm": 0.08269200474023819, "learning_rate": 9.804404359732133e-06, "loss": 0.0528, "step": 44590 }, { "epoch": 0.36087062060037217, "grad_norm": 1.0413563251495361, "learning_rate": 9.804208749935077e-06, "loss": 0.0491, "step": 44600 }, { "epoch": 0.36095153329557406, "grad_norm": 0.4046511650085449, "learning_rate": 9.804013044328223e-06, "loss": 0.0503, "step": 44610 }, { "epoch": 0.36103244599077594, "grad_norm": 0.8633009791374207, "learning_rate": 9.803817242915465e-06, "loss": 0.0518, "step": 44620 }, { "epoch": 0.3611133586859778, "grad_norm": 0.4871145784854889, "learning_rate": 9.803621345700717e-06, "loss": 0.0492, "step": 44630 }, { "epoch": 0.3611942713811797, "grad_norm": 0.8522970676422119, "learning_rate": 9.80342535268788e-06, "loss": 0.0385, "step": 44640 }, { "epoch": 0.3612751840763816, "grad_norm": 0.35782110691070557, "learning_rate": 9.803229263880867e-06, "loss": 0.0419, "step": 44650 }, { "epoch": 0.3613560967715835, "grad_norm": 0.5916242003440857, "learning_rate": 9.803033079283584e-06, "loss": 0.0362, "step": 44660 }, { "epoch": 0.36143700946678536, "grad_norm": 0.6278185248374939, "learning_rate": 9.802836798899948e-06, "loss": 0.0525, "step": 44670 }, { "epoch": 0.3615179221619872, "grad_norm": 0.5175013542175293, "learning_rate": 9.80264042273387e-06, "loss": 0.0516, "step": 44680 }, { "epoch": 0.3615988348571891, "grad_norm": 0.5748089551925659, "learning_rate": 9.80244395078927e-06, "loss": 0.052, "step": 44690 }, { "epoch": 0.36167974755239096, "grad_norm": 0.7767932415008545, "learning_rate": 9.80224738307006e-06, "loss": 0.0496, "step": 44700 }, { "epoch": 0.36176066024759285, "grad_norm": 0.20772749185562134, "learning_rate": 9.802050719580164e-06, "loss": 0.0642, "step": 44710 }, { "epoch": 0.36184157294279473, "grad_norm": 0.5345839858055115, "learning_rate": 9.801853960323507e-06, "loss": 0.0421, "step": 44720 }, { "epoch": 0.3619224856379966, "grad_norm": 0.26694706082344055, "learning_rate": 9.80165710530401e-06, "loss": 0.0374, "step": 44730 }, { "epoch": 0.3620033983331985, "grad_norm": 0.5427584052085876, "learning_rate": 9.801460154525598e-06, "loss": 0.039, "step": 44740 }, { "epoch": 0.36208431102840033, "grad_norm": 0.5995351672172546, "learning_rate": 9.801263107992199e-06, "loss": 0.0461, "step": 44750 }, { "epoch": 0.3621652237236022, "grad_norm": 1.5329228639602661, "learning_rate": 9.801065965707741e-06, "loss": 0.046, "step": 44760 }, { "epoch": 0.3622461364188041, "grad_norm": 0.496112585067749, "learning_rate": 9.800868727676161e-06, "loss": 0.0416, "step": 44770 }, { "epoch": 0.362327049114006, "grad_norm": 0.7682268619537354, "learning_rate": 9.80067139390139e-06, "loss": 0.0604, "step": 44780 }, { "epoch": 0.36240796180920787, "grad_norm": 0.861554741859436, "learning_rate": 9.80047396438736e-06, "loss": 0.0302, "step": 44790 }, { "epoch": 0.36248887450440975, "grad_norm": 0.5250504612922668, "learning_rate": 9.800276439138012e-06, "loss": 0.0295, "step": 44800 }, { "epoch": 0.36256978719961164, "grad_norm": 0.8049880862236023, "learning_rate": 9.800078818157283e-06, "loss": 0.0439, "step": 44810 }, { "epoch": 0.3626506998948135, "grad_norm": 1.0300581455230713, "learning_rate": 9.799881101449118e-06, "loss": 0.0437, "step": 44820 }, { "epoch": 0.36273161259001535, "grad_norm": 0.6438888311386108, "learning_rate": 9.799683289017456e-06, "loss": 0.0396, "step": 44830 }, { "epoch": 0.36281252528521724, "grad_norm": 0.6045061945915222, "learning_rate": 9.799485380866243e-06, "loss": 0.0486, "step": 44840 }, { "epoch": 0.3628934379804191, "grad_norm": 0.5977910161018372, "learning_rate": 9.799287376999427e-06, "loss": 0.0452, "step": 44850 }, { "epoch": 0.362974350675621, "grad_norm": 0.823468804359436, "learning_rate": 9.799089277420955e-06, "loss": 0.0308, "step": 44860 }, { "epoch": 0.3630552633708229, "grad_norm": 0.6252397298812866, "learning_rate": 9.79889108213478e-06, "loss": 0.0329, "step": 44870 }, { "epoch": 0.3631361760660248, "grad_norm": 0.38202640414237976, "learning_rate": 9.798692791144852e-06, "loss": 0.0294, "step": 44880 }, { "epoch": 0.36321708876122666, "grad_norm": 0.20242242515087128, "learning_rate": 9.798494404455128e-06, "loss": 0.0345, "step": 44890 }, { "epoch": 0.3632980014564285, "grad_norm": 0.28162604570388794, "learning_rate": 9.798295922069564e-06, "loss": 0.0275, "step": 44900 }, { "epoch": 0.36337891415163037, "grad_norm": 0.6775141954421997, "learning_rate": 9.798097343992115e-06, "loss": 0.039, "step": 44910 }, { "epoch": 0.36345982684683226, "grad_norm": 0.8123382329940796, "learning_rate": 9.797898670226745e-06, "loss": 0.0442, "step": 44920 }, { "epoch": 0.36354073954203414, "grad_norm": 0.4140804708003998, "learning_rate": 9.797699900777415e-06, "loss": 0.0464, "step": 44930 }, { "epoch": 0.363621652237236, "grad_norm": 0.43385419249534607, "learning_rate": 9.797501035648088e-06, "loss": 0.0385, "step": 44940 }, { "epoch": 0.3637025649324379, "grad_norm": 1.0787274837493896, "learning_rate": 9.797302074842732e-06, "loss": 0.0402, "step": 44950 }, { "epoch": 0.3637834776276398, "grad_norm": 0.8029159903526306, "learning_rate": 9.797103018365312e-06, "loss": 0.053, "step": 44960 }, { "epoch": 0.3638643903228417, "grad_norm": 0.5454792380332947, "learning_rate": 9.7969038662198e-06, "loss": 0.0585, "step": 44970 }, { "epoch": 0.3639453030180435, "grad_norm": 0.4285906255245209, "learning_rate": 9.796704618410169e-06, "loss": 0.0653, "step": 44980 }, { "epoch": 0.3640262157132454, "grad_norm": 0.6637856960296631, "learning_rate": 9.796505274940388e-06, "loss": 0.051, "step": 44990 }, { "epoch": 0.3641071284084473, "grad_norm": 0.3273371160030365, "learning_rate": 9.796305835814436e-06, "loss": 0.0369, "step": 45000 }, { "epoch": 0.36418804110364916, "grad_norm": 0.6967399716377258, "learning_rate": 9.796106301036291e-06, "loss": 0.0591, "step": 45010 }, { "epoch": 0.36426895379885105, "grad_norm": 0.8536589741706848, "learning_rate": 9.79590667060993e-06, "loss": 0.0373, "step": 45020 }, { "epoch": 0.36434986649405293, "grad_norm": 0.2696180045604706, "learning_rate": 9.795706944539335e-06, "loss": 0.0369, "step": 45030 }, { "epoch": 0.3644307791892548, "grad_norm": 1.0765597820281982, "learning_rate": 9.795507122828489e-06, "loss": 0.0291, "step": 45040 }, { "epoch": 0.36451169188445665, "grad_norm": 0.6925554871559143, "learning_rate": 9.795307205481377e-06, "loss": 0.0449, "step": 45050 }, { "epoch": 0.36459260457965853, "grad_norm": 0.4858732223510742, "learning_rate": 9.795107192501988e-06, "loss": 0.034, "step": 45060 }, { "epoch": 0.3646735172748604, "grad_norm": 0.5693264007568359, "learning_rate": 9.794907083894306e-06, "loss": 0.0369, "step": 45070 }, { "epoch": 0.3647544299700623, "grad_norm": 0.8061147928237915, "learning_rate": 9.794706879662326e-06, "loss": 0.0448, "step": 45080 }, { "epoch": 0.3648353426652642, "grad_norm": 0.16596423089504242, "learning_rate": 9.79450657981004e-06, "loss": 0.0415, "step": 45090 }, { "epoch": 0.36491625536046607, "grad_norm": 0.6187705397605896, "learning_rate": 9.79430618434144e-06, "loss": 0.048, "step": 45100 }, { "epoch": 0.36499716805566795, "grad_norm": 0.7970336079597473, "learning_rate": 9.794105693260526e-06, "loss": 0.0465, "step": 45110 }, { "epoch": 0.36507808075086984, "grad_norm": 0.24379076063632965, "learning_rate": 9.793905106571295e-06, "loss": 0.0412, "step": 45120 }, { "epoch": 0.36515899344607167, "grad_norm": 0.5096185207366943, "learning_rate": 9.793704424277748e-06, "loss": 0.0505, "step": 45130 }, { "epoch": 0.36523990614127355, "grad_norm": 0.6298678517341614, "learning_rate": 9.793503646383883e-06, "loss": 0.0373, "step": 45140 }, { "epoch": 0.36532081883647544, "grad_norm": 0.37702280282974243, "learning_rate": 9.793302772893709e-06, "loss": 0.0391, "step": 45150 }, { "epoch": 0.3654017315316773, "grad_norm": 0.4003276228904724, "learning_rate": 9.793101803811232e-06, "loss": 0.0349, "step": 45160 }, { "epoch": 0.3654826442268792, "grad_norm": 0.385903924703598, "learning_rate": 9.792900739140456e-06, "loss": 0.0434, "step": 45170 }, { "epoch": 0.3655635569220811, "grad_norm": 0.5985631942749023, "learning_rate": 9.792699578885394e-06, "loss": 0.0324, "step": 45180 }, { "epoch": 0.365644469617283, "grad_norm": 0.7995576858520508, "learning_rate": 9.792498323050056e-06, "loss": 0.0469, "step": 45190 }, { "epoch": 0.3657253823124848, "grad_norm": 0.9701787233352661, "learning_rate": 9.792296971638457e-06, "loss": 0.0572, "step": 45200 }, { "epoch": 0.3658062950076867, "grad_norm": 0.37122878432273865, "learning_rate": 9.792095524654612e-06, "loss": 0.046, "step": 45210 }, { "epoch": 0.3658872077028886, "grad_norm": 0.8103926777839661, "learning_rate": 9.791893982102537e-06, "loss": 0.0476, "step": 45220 }, { "epoch": 0.36596812039809046, "grad_norm": 0.502572774887085, "learning_rate": 9.791692343986253e-06, "loss": 0.0319, "step": 45230 }, { "epoch": 0.36604903309329234, "grad_norm": 0.6115860939025879, "learning_rate": 9.791490610309782e-06, "loss": 0.0444, "step": 45240 }, { "epoch": 0.3661299457884942, "grad_norm": 0.44423237442970276, "learning_rate": 9.791288781077146e-06, "loss": 0.0397, "step": 45250 }, { "epoch": 0.3662108584836961, "grad_norm": 1.265331506729126, "learning_rate": 9.79108685629237e-06, "loss": 0.0631, "step": 45260 }, { "epoch": 0.366291771178898, "grad_norm": 0.46133801341056824, "learning_rate": 9.79088483595948e-06, "loss": 0.0362, "step": 45270 }, { "epoch": 0.3663726838740998, "grad_norm": 0.5857868790626526, "learning_rate": 9.790682720082507e-06, "loss": 0.038, "step": 45280 }, { "epoch": 0.3664535965693017, "grad_norm": 0.6125845313072205, "learning_rate": 9.790480508665481e-06, "loss": 0.0608, "step": 45290 }, { "epoch": 0.3665345092645036, "grad_norm": 0.16548402607440948, "learning_rate": 9.790278201712433e-06, "loss": 0.0277, "step": 45300 }, { "epoch": 0.3666154219597055, "grad_norm": 0.7196136713027954, "learning_rate": 9.7900757992274e-06, "loss": 0.0412, "step": 45310 }, { "epoch": 0.36669633465490736, "grad_norm": 0.7632731199264526, "learning_rate": 9.789873301214417e-06, "loss": 0.0438, "step": 45320 }, { "epoch": 0.36677724735010925, "grad_norm": 0.40027573704719543, "learning_rate": 9.789670707677523e-06, "loss": 0.0344, "step": 45330 }, { "epoch": 0.36685816004531113, "grad_norm": 0.8165467977523804, "learning_rate": 9.789468018620758e-06, "loss": 0.0266, "step": 45340 }, { "epoch": 0.36693907274051296, "grad_norm": 0.4766736924648285, "learning_rate": 9.789265234048165e-06, "loss": 0.0301, "step": 45350 }, { "epoch": 0.36701998543571485, "grad_norm": 0.4814531207084656, "learning_rate": 9.789062353963788e-06, "loss": 0.0474, "step": 45360 }, { "epoch": 0.36710089813091673, "grad_norm": 0.4584771394729614, "learning_rate": 9.78885937837167e-06, "loss": 0.0413, "step": 45370 }, { "epoch": 0.3671818108261186, "grad_norm": 0.4194774925708771, "learning_rate": 9.788656307275865e-06, "loss": 0.0262, "step": 45380 }, { "epoch": 0.3672627235213205, "grad_norm": 0.5686079263687134, "learning_rate": 9.788453140680417e-06, "loss": 0.0474, "step": 45390 }, { "epoch": 0.3673436362165224, "grad_norm": 0.6791514754295349, "learning_rate": 9.78824987858938e-06, "loss": 0.06, "step": 45400 }, { "epoch": 0.36742454891172427, "grad_norm": 0.4112973213195801, "learning_rate": 9.788046521006809e-06, "loss": 0.0311, "step": 45410 }, { "epoch": 0.36750546160692615, "grad_norm": 0.5159603953361511, "learning_rate": 9.787843067936756e-06, "loss": 0.0276, "step": 45420 }, { "epoch": 0.367586374302128, "grad_norm": 0.32338425517082214, "learning_rate": 9.787639519383282e-06, "loss": 0.0371, "step": 45430 }, { "epoch": 0.36766728699732987, "grad_norm": 0.8396700024604797, "learning_rate": 9.787435875350446e-06, "loss": 0.0346, "step": 45440 }, { "epoch": 0.36774819969253175, "grad_norm": 0.6269276738166809, "learning_rate": 9.787232135842307e-06, "loss": 0.0311, "step": 45450 }, { "epoch": 0.36782911238773364, "grad_norm": 0.42922571301460266, "learning_rate": 9.787028300862928e-06, "loss": 0.0398, "step": 45460 }, { "epoch": 0.3679100250829355, "grad_norm": 0.8466911911964417, "learning_rate": 9.786824370416376e-06, "loss": 0.0446, "step": 45470 }, { "epoch": 0.3679909377781374, "grad_norm": 0.25023260712623596, "learning_rate": 9.78662034450672e-06, "loss": 0.0502, "step": 45480 }, { "epoch": 0.3680718504733393, "grad_norm": 0.9642614126205444, "learning_rate": 9.786416223138023e-06, "loss": 0.0501, "step": 45490 }, { "epoch": 0.3681527631685411, "grad_norm": 0.42075684666633606, "learning_rate": 9.78621200631436e-06, "loss": 0.023, "step": 45500 }, { "epoch": 0.368233675863743, "grad_norm": 0.6697104573249817, "learning_rate": 9.786007694039803e-06, "loss": 0.0443, "step": 45510 }, { "epoch": 0.3683145885589449, "grad_norm": 0.6827733516693115, "learning_rate": 9.785803286318427e-06, "loss": 0.0378, "step": 45520 }, { "epoch": 0.3683955012541468, "grad_norm": 0.4879629611968994, "learning_rate": 9.785598783154306e-06, "loss": 0.0421, "step": 45530 }, { "epoch": 0.36847641394934866, "grad_norm": 0.46122536063194275, "learning_rate": 9.785394184551521e-06, "loss": 0.0252, "step": 45540 }, { "epoch": 0.36855732664455054, "grad_norm": 0.4968284070491791, "learning_rate": 9.785189490514151e-06, "loss": 0.0535, "step": 45550 }, { "epoch": 0.3686382393397524, "grad_norm": 0.36464396119117737, "learning_rate": 9.78498470104628e-06, "loss": 0.0458, "step": 45560 }, { "epoch": 0.3687191520349543, "grad_norm": 0.48594242334365845, "learning_rate": 9.78477981615199e-06, "loss": 0.0387, "step": 45570 }, { "epoch": 0.36880006473015614, "grad_norm": 0.40005654096603394, "learning_rate": 9.784574835835368e-06, "loss": 0.0368, "step": 45580 }, { "epoch": 0.368880977425358, "grad_norm": 0.8402001857757568, "learning_rate": 9.7843697601005e-06, "loss": 0.0459, "step": 45590 }, { "epoch": 0.3689618901205599, "grad_norm": 0.7807493805885315, "learning_rate": 9.78416458895148e-06, "loss": 0.0301, "step": 45600 }, { "epoch": 0.3690428028157618, "grad_norm": 1.1548089981079102, "learning_rate": 9.783959322392394e-06, "loss": 0.0441, "step": 45610 }, { "epoch": 0.3691237155109637, "grad_norm": 0.2676165997982025, "learning_rate": 9.783753960427341e-06, "loss": 0.0427, "step": 45620 }, { "epoch": 0.36920462820616556, "grad_norm": 0.6959612369537354, "learning_rate": 9.783548503060413e-06, "loss": 0.0469, "step": 45630 }, { "epoch": 0.36928554090136745, "grad_norm": 0.43064427375793457, "learning_rate": 9.78334295029571e-06, "loss": 0.0487, "step": 45640 }, { "epoch": 0.3693664535965693, "grad_norm": 0.34822574257850647, "learning_rate": 9.783137302137327e-06, "loss": 0.03, "step": 45650 }, { "epoch": 0.36944736629177116, "grad_norm": 0.36986422538757324, "learning_rate": 9.782931558589371e-06, "loss": 0.049, "step": 45660 }, { "epoch": 0.36952827898697305, "grad_norm": 0.6388073563575745, "learning_rate": 9.782725719655942e-06, "loss": 0.0332, "step": 45670 }, { "epoch": 0.36960919168217493, "grad_norm": 0.5819413065910339, "learning_rate": 9.782519785341144e-06, "loss": 0.0449, "step": 45680 }, { "epoch": 0.3696901043773768, "grad_norm": 0.6216509342193604, "learning_rate": 9.782313755649087e-06, "loss": 0.038, "step": 45690 }, { "epoch": 0.3697710170725787, "grad_norm": 0.7584586143493652, "learning_rate": 9.782107630583876e-06, "loss": 0.0437, "step": 45700 }, { "epoch": 0.3698519297677806, "grad_norm": 0.7192513942718506, "learning_rate": 9.781901410149624e-06, "loss": 0.0544, "step": 45710 }, { "epoch": 0.36993284246298247, "grad_norm": 0.4819181263446808, "learning_rate": 9.781695094350446e-06, "loss": 0.039, "step": 45720 }, { "epoch": 0.3700137551581843, "grad_norm": 0.4430712163448334, "learning_rate": 9.781488683190451e-06, "loss": 0.0436, "step": 45730 }, { "epoch": 0.3700946678533862, "grad_norm": 0.3904288411140442, "learning_rate": 9.78128217667376e-06, "loss": 0.0219, "step": 45740 }, { "epoch": 0.37017558054858807, "grad_norm": 0.9255416393280029, "learning_rate": 9.781075574804488e-06, "loss": 0.0361, "step": 45750 }, { "epoch": 0.37025649324378995, "grad_norm": 0.7262287139892578, "learning_rate": 9.78086887758676e-06, "loss": 0.049, "step": 45760 }, { "epoch": 0.37033740593899184, "grad_norm": 0.6637946367263794, "learning_rate": 9.780662085024692e-06, "loss": 0.0404, "step": 45770 }, { "epoch": 0.3704183186341937, "grad_norm": 0.5406394600868225, "learning_rate": 9.780455197122413e-06, "loss": 0.0513, "step": 45780 }, { "epoch": 0.3704992313293956, "grad_norm": 0.28842100501060486, "learning_rate": 9.780248213884045e-06, "loss": 0.0272, "step": 45790 }, { "epoch": 0.37058014402459744, "grad_norm": 0.8782640099525452, "learning_rate": 9.78004113531372e-06, "loss": 0.0484, "step": 45800 }, { "epoch": 0.3706610567197993, "grad_norm": 0.5304315090179443, "learning_rate": 9.779833961415566e-06, "loss": 0.0332, "step": 45810 }, { "epoch": 0.3707419694150012, "grad_norm": 0.9103389978408813, "learning_rate": 9.779626692193713e-06, "loss": 0.0371, "step": 45820 }, { "epoch": 0.3708228821102031, "grad_norm": 0.5368330478668213, "learning_rate": 9.779419327652297e-06, "loss": 0.038, "step": 45830 }, { "epoch": 0.370903794805405, "grad_norm": 0.4549373388290405, "learning_rate": 9.77921186779545e-06, "loss": 0.034, "step": 45840 }, { "epoch": 0.37098470750060686, "grad_norm": 0.3263961374759674, "learning_rate": 9.779004312627315e-06, "loss": 0.0481, "step": 45850 }, { "epoch": 0.37106562019580874, "grad_norm": 0.8169830441474915, "learning_rate": 9.778796662152026e-06, "loss": 0.0379, "step": 45860 }, { "epoch": 0.37114653289101057, "grad_norm": 0.7707326412200928, "learning_rate": 9.778588916373726e-06, "loss": 0.032, "step": 45870 }, { "epoch": 0.37122744558621246, "grad_norm": 0.5134626626968384, "learning_rate": 9.778381075296558e-06, "loss": 0.0493, "step": 45880 }, { "epoch": 0.37130835828141434, "grad_norm": 0.023759786039590836, "learning_rate": 9.778173138924667e-06, "loss": 0.0274, "step": 45890 }, { "epoch": 0.3713892709766162, "grad_norm": 0.9096776247024536, "learning_rate": 9.7779651072622e-06, "loss": 0.0409, "step": 45900 }, { "epoch": 0.3714701836718181, "grad_norm": 0.43757250905036926, "learning_rate": 9.777756980313306e-06, "loss": 0.0523, "step": 45910 }, { "epoch": 0.37155109636702, "grad_norm": 0.10910248011350632, "learning_rate": 9.777548758082137e-06, "loss": 0.0367, "step": 45920 }, { "epoch": 0.3716320090622219, "grad_norm": 0.6050748229026794, "learning_rate": 9.77734044057284e-06, "loss": 0.0521, "step": 45930 }, { "epoch": 0.37171292175742376, "grad_norm": 0.5213910937309265, "learning_rate": 9.777132027789578e-06, "loss": 0.0331, "step": 45940 }, { "epoch": 0.3717938344526256, "grad_norm": 0.5236078500747681, "learning_rate": 9.776923519736498e-06, "loss": 0.0427, "step": 45950 }, { "epoch": 0.3718747471478275, "grad_norm": 0.4113394320011139, "learning_rate": 9.776714916417767e-06, "loss": 0.0484, "step": 45960 }, { "epoch": 0.37195565984302936, "grad_norm": 0.2011641561985016, "learning_rate": 9.776506217837538e-06, "loss": 0.0412, "step": 45970 }, { "epoch": 0.37203657253823125, "grad_norm": 0.4521995782852173, "learning_rate": 9.776297423999977e-06, "loss": 0.0404, "step": 45980 }, { "epoch": 0.37211748523343313, "grad_norm": 0.33085906505584717, "learning_rate": 9.776088534909248e-06, "loss": 0.0375, "step": 45990 }, { "epoch": 0.372198397928635, "grad_norm": 0.5525457859039307, "learning_rate": 9.775879550569517e-06, "loss": 0.042, "step": 46000 }, { "epoch": 0.3722793106238369, "grad_norm": 0.271992027759552, "learning_rate": 9.775670470984949e-06, "loss": 0.0561, "step": 46010 }, { "epoch": 0.37236022331903873, "grad_norm": 0.6453105211257935, "learning_rate": 9.775461296159713e-06, "loss": 0.058, "step": 46020 }, { "epoch": 0.3724411360142406, "grad_norm": 0.1506223827600479, "learning_rate": 9.775252026097986e-06, "loss": 0.0278, "step": 46030 }, { "epoch": 0.3725220487094425, "grad_norm": 0.8523877263069153, "learning_rate": 9.775042660803937e-06, "loss": 0.0599, "step": 46040 }, { "epoch": 0.3726029614046444, "grad_norm": 0.6374862790107727, "learning_rate": 9.774833200281744e-06, "loss": 0.0461, "step": 46050 }, { "epoch": 0.37268387409984627, "grad_norm": 0.7502871751785278, "learning_rate": 9.774623644535582e-06, "loss": 0.0485, "step": 46060 }, { "epoch": 0.37276478679504815, "grad_norm": 1.3204562664031982, "learning_rate": 9.774413993569632e-06, "loss": 0.0276, "step": 46070 }, { "epoch": 0.37284569949025004, "grad_norm": 0.47525957226753235, "learning_rate": 9.774204247388073e-06, "loss": 0.0433, "step": 46080 }, { "epoch": 0.3729266121854519, "grad_norm": 0.5013774633407593, "learning_rate": 9.773994405995089e-06, "loss": 0.0285, "step": 46090 }, { "epoch": 0.37300752488065375, "grad_norm": 0.5286208987236023, "learning_rate": 9.773784469394867e-06, "loss": 0.031, "step": 46100 }, { "epoch": 0.37308843757585564, "grad_norm": 0.47329849004745483, "learning_rate": 9.77357443759159e-06, "loss": 0.0517, "step": 46110 }, { "epoch": 0.3731693502710575, "grad_norm": 0.5933850407600403, "learning_rate": 9.773364310589448e-06, "loss": 0.0326, "step": 46120 }, { "epoch": 0.3732502629662594, "grad_norm": 0.5112966895103455, "learning_rate": 9.773154088392632e-06, "loss": 0.0457, "step": 46130 }, { "epoch": 0.3733311756614613, "grad_norm": 0.2681165337562561, "learning_rate": 9.772943771005333e-06, "loss": 0.0574, "step": 46140 }, { "epoch": 0.3734120883566632, "grad_norm": 0.1301582008600235, "learning_rate": 9.77273335843175e-06, "loss": 0.0282, "step": 46150 }, { "epoch": 0.37349300105186506, "grad_norm": 0.20302724838256836, "learning_rate": 9.772522850676073e-06, "loss": 0.0395, "step": 46160 }, { "epoch": 0.3735739137470669, "grad_norm": 0.6131197810173035, "learning_rate": 9.772312247742506e-06, "loss": 0.0498, "step": 46170 }, { "epoch": 0.3736548264422688, "grad_norm": 0.6874091625213623, "learning_rate": 9.772101549635243e-06, "loss": 0.0404, "step": 46180 }, { "epoch": 0.37373573913747066, "grad_norm": 0.3439827859401703, "learning_rate": 9.77189075635849e-06, "loss": 0.0716, "step": 46190 }, { "epoch": 0.37381665183267254, "grad_norm": 0.5163580179214478, "learning_rate": 9.77167986791645e-06, "loss": 0.0495, "step": 46200 }, { "epoch": 0.3738975645278744, "grad_norm": 0.44389107823371887, "learning_rate": 9.771468884313329e-06, "loss": 0.0297, "step": 46210 }, { "epoch": 0.3739784772230763, "grad_norm": 0.3000948429107666, "learning_rate": 9.771257805553334e-06, "loss": 0.028, "step": 46220 }, { "epoch": 0.3740593899182782, "grad_norm": 0.6172895431518555, "learning_rate": 9.771046631640672e-06, "loss": 0.0367, "step": 46230 }, { "epoch": 0.3741403026134801, "grad_norm": 0.665098249912262, "learning_rate": 9.77083536257956e-06, "loss": 0.0522, "step": 46240 }, { "epoch": 0.3742212153086819, "grad_norm": 0.32917237281799316, "learning_rate": 9.770623998374207e-06, "loss": 0.0386, "step": 46250 }, { "epoch": 0.3743021280038838, "grad_norm": 0.4499078392982483, "learning_rate": 9.77041253902883e-06, "loss": 0.053, "step": 46260 }, { "epoch": 0.3743830406990857, "grad_norm": 0.31150174140930176, "learning_rate": 9.770200984547647e-06, "loss": 0.0254, "step": 46270 }, { "epoch": 0.37446395339428756, "grad_norm": 0.4246581792831421, "learning_rate": 9.769989334934872e-06, "loss": 0.0405, "step": 46280 }, { "epoch": 0.37454486608948945, "grad_norm": 0.31729230284690857, "learning_rate": 9.769777590194734e-06, "loss": 0.0402, "step": 46290 }, { "epoch": 0.37462577878469133, "grad_norm": 0.47524720430374146, "learning_rate": 9.76956575033145e-06, "loss": 0.0352, "step": 46300 }, { "epoch": 0.3747066914798932, "grad_norm": 0.3148794174194336, "learning_rate": 9.769353815349244e-06, "loss": 0.037, "step": 46310 }, { "epoch": 0.37478760417509505, "grad_norm": 0.2983231842517853, "learning_rate": 9.769141785252348e-06, "loss": 0.0393, "step": 46320 }, { "epoch": 0.37486851687029693, "grad_norm": 0.5828624367713928, "learning_rate": 9.768929660044984e-06, "loss": 0.0537, "step": 46330 }, { "epoch": 0.3749494295654988, "grad_norm": 0.41103214025497437, "learning_rate": 9.768717439731388e-06, "loss": 0.0424, "step": 46340 }, { "epoch": 0.3750303422607007, "grad_norm": 0.459905743598938, "learning_rate": 9.768505124315788e-06, "loss": 0.0298, "step": 46350 }, { "epoch": 0.3751112549559026, "grad_norm": 0.5839685201644897, "learning_rate": 9.768292713802423e-06, "loss": 0.0487, "step": 46360 }, { "epoch": 0.37519216765110447, "grad_norm": 0.8673042058944702, "learning_rate": 9.768080208195522e-06, "loss": 0.0401, "step": 46370 }, { "epoch": 0.37527308034630635, "grad_norm": 0.7206057906150818, "learning_rate": 9.767867607499331e-06, "loss": 0.0385, "step": 46380 }, { "epoch": 0.37535399304150824, "grad_norm": 0.47149255871772766, "learning_rate": 9.767654911718082e-06, "loss": 0.043, "step": 46390 }, { "epoch": 0.37543490573671007, "grad_norm": 0.7419002056121826, "learning_rate": 9.767442120856025e-06, "loss": 0.0464, "step": 46400 }, { "epoch": 0.37551581843191195, "grad_norm": 0.3830771744251251, "learning_rate": 9.767229234917396e-06, "loss": 0.0322, "step": 46410 }, { "epoch": 0.37559673112711384, "grad_norm": 0.6479586958885193, "learning_rate": 9.767016253906446e-06, "loss": 0.0413, "step": 46420 }, { "epoch": 0.3756776438223157, "grad_norm": 0.2613699734210968, "learning_rate": 9.76680317782742e-06, "loss": 0.0458, "step": 46430 }, { "epoch": 0.3757585565175176, "grad_norm": 0.3714908957481384, "learning_rate": 9.766590006684567e-06, "loss": 0.0216, "step": 46440 }, { "epoch": 0.3758394692127195, "grad_norm": 0.6336504220962524, "learning_rate": 9.76637674048214e-06, "loss": 0.063, "step": 46450 }, { "epoch": 0.3759203819079214, "grad_norm": 0.28962740302085876, "learning_rate": 9.766163379224391e-06, "loss": 0.0292, "step": 46460 }, { "epoch": 0.3760012946031232, "grad_norm": 0.24242419004440308, "learning_rate": 9.765949922915574e-06, "loss": 0.037, "step": 46470 }, { "epoch": 0.3760822072983251, "grad_norm": 0.3121667206287384, "learning_rate": 9.76573637155995e-06, "loss": 0.0393, "step": 46480 }, { "epoch": 0.376163119993527, "grad_norm": 0.6026563048362732, "learning_rate": 9.765522725161772e-06, "loss": 0.0283, "step": 46490 }, { "epoch": 0.37624403268872886, "grad_norm": 0.7201324701309204, "learning_rate": 9.765308983725306e-06, "loss": 0.0348, "step": 46500 }, { "epoch": 0.37632494538393074, "grad_norm": 0.8379624485969543, "learning_rate": 9.76509514725481e-06, "loss": 0.0561, "step": 46510 }, { "epoch": 0.3764058580791326, "grad_norm": 0.9278371930122375, "learning_rate": 9.764881215754552e-06, "loss": 0.0518, "step": 46520 }, { "epoch": 0.3764867707743345, "grad_norm": 0.6586000919342041, "learning_rate": 9.764667189228798e-06, "loss": 0.0447, "step": 46530 }, { "epoch": 0.3765676834695364, "grad_norm": 0.2170431762933731, "learning_rate": 9.764453067681814e-06, "loss": 0.0442, "step": 46540 }, { "epoch": 0.3766485961647382, "grad_norm": 0.4013994038105011, "learning_rate": 9.764238851117873e-06, "loss": 0.0416, "step": 46550 }, { "epoch": 0.3767295088599401, "grad_norm": 0.7295129895210266, "learning_rate": 9.764024539541247e-06, "loss": 0.0418, "step": 46560 }, { "epoch": 0.376810421555142, "grad_norm": 0.4513787627220154, "learning_rate": 9.763810132956206e-06, "loss": 0.041, "step": 46570 }, { "epoch": 0.3768913342503439, "grad_norm": 0.29418036341667175, "learning_rate": 9.763595631367031e-06, "loss": 0.0342, "step": 46580 }, { "epoch": 0.37697224694554576, "grad_norm": 0.11367271095514297, "learning_rate": 9.763381034777997e-06, "loss": 0.0376, "step": 46590 }, { "epoch": 0.37705315964074765, "grad_norm": 0.5795333385467529, "learning_rate": 9.763166343193383e-06, "loss": 0.0442, "step": 46600 }, { "epoch": 0.37713407233594953, "grad_norm": 0.5127386450767517, "learning_rate": 9.762951556617473e-06, "loss": 0.0382, "step": 46610 }, { "epoch": 0.37721498503115136, "grad_norm": 0.6001391410827637, "learning_rate": 9.762736675054548e-06, "loss": 0.0384, "step": 46620 }, { "epoch": 0.37729589772635325, "grad_norm": 0.425992876291275, "learning_rate": 9.762521698508896e-06, "loss": 0.0459, "step": 46630 }, { "epoch": 0.37737681042155513, "grad_norm": 0.7576671242713928, "learning_rate": 9.762306626984802e-06, "loss": 0.0605, "step": 46640 }, { "epoch": 0.377457723116757, "grad_norm": 0.8419294953346252, "learning_rate": 9.762091460486556e-06, "loss": 0.0495, "step": 46650 }, { "epoch": 0.3775386358119589, "grad_norm": 0.6640127301216125, "learning_rate": 9.76187619901845e-06, "loss": 0.0316, "step": 46660 }, { "epoch": 0.3776195485071608, "grad_norm": 0.7699955105781555, "learning_rate": 9.761660842584775e-06, "loss": 0.0492, "step": 46670 }, { "epoch": 0.37770046120236267, "grad_norm": 0.75567227602005, "learning_rate": 9.761445391189828e-06, "loss": 0.0405, "step": 46680 }, { "epoch": 0.37778137389756455, "grad_norm": 0.39990177750587463, "learning_rate": 9.761229844837902e-06, "loss": 0.0484, "step": 46690 }, { "epoch": 0.3778622865927664, "grad_norm": 0.45478031039237976, "learning_rate": 9.761014203533301e-06, "loss": 0.0425, "step": 46700 }, { "epoch": 0.37794319928796827, "grad_norm": 0.8170031905174255, "learning_rate": 9.76079846728032e-06, "loss": 0.0457, "step": 46710 }, { "epoch": 0.37802411198317015, "grad_norm": 0.8626950979232788, "learning_rate": 9.760582636083266e-06, "loss": 0.0376, "step": 46720 }, { "epoch": 0.37810502467837204, "grad_norm": 0.23068341612815857, "learning_rate": 9.760366709946442e-06, "loss": 0.0435, "step": 46730 }, { "epoch": 0.3781859373735739, "grad_norm": 0.6149535179138184, "learning_rate": 9.760150688874152e-06, "loss": 0.0609, "step": 46740 }, { "epoch": 0.3782668500687758, "grad_norm": 0.6354812383651733, "learning_rate": 9.759934572870706e-06, "loss": 0.0427, "step": 46750 }, { "epoch": 0.3783477627639777, "grad_norm": 0.5295206904411316, "learning_rate": 9.759718361940414e-06, "loss": 0.0409, "step": 46760 }, { "epoch": 0.3784286754591795, "grad_norm": 0.4080492854118347, "learning_rate": 9.759502056087587e-06, "loss": 0.0374, "step": 46770 }, { "epoch": 0.3785095881543814, "grad_norm": 0.4646158814430237, "learning_rate": 9.759285655316539e-06, "loss": 0.0286, "step": 46780 }, { "epoch": 0.3785905008495833, "grad_norm": 0.7564911246299744, "learning_rate": 9.759069159631587e-06, "loss": 0.0311, "step": 46790 }, { "epoch": 0.3786714135447852, "grad_norm": 0.04134867712855339, "learning_rate": 9.758852569037047e-06, "loss": 0.039, "step": 46800 }, { "epoch": 0.37875232623998706, "grad_norm": 0.6729379892349243, "learning_rate": 9.758635883537239e-06, "loss": 0.0395, "step": 46810 }, { "epoch": 0.37883323893518894, "grad_norm": 0.8627752065658569, "learning_rate": 9.758419103136483e-06, "loss": 0.0483, "step": 46820 }, { "epoch": 0.3789141516303908, "grad_norm": 0.6731133460998535, "learning_rate": 9.758202227839106e-06, "loss": 0.0499, "step": 46830 }, { "epoch": 0.3789950643255927, "grad_norm": 0.564102292060852, "learning_rate": 9.757985257649428e-06, "loss": 0.0385, "step": 46840 }, { "epoch": 0.37907597702079454, "grad_norm": 0.512322723865509, "learning_rate": 9.75776819257178e-06, "loss": 0.0405, "step": 46850 }, { "epoch": 0.3791568897159964, "grad_norm": 0.3005869686603546, "learning_rate": 9.757551032610488e-06, "loss": 0.0243, "step": 46860 }, { "epoch": 0.3792378024111983, "grad_norm": 0.3462994396686554, "learning_rate": 9.757333777769887e-06, "loss": 0.0548, "step": 46870 }, { "epoch": 0.3793187151064002, "grad_norm": 0.4821435511112213, "learning_rate": 9.757116428054306e-06, "loss": 0.0363, "step": 46880 }, { "epoch": 0.3793996278016021, "grad_norm": 0.32115283608436584, "learning_rate": 9.756898983468079e-06, "loss": 0.0549, "step": 46890 }, { "epoch": 0.37948054049680396, "grad_norm": 0.3597911298274994, "learning_rate": 9.756681444015544e-06, "loss": 0.0396, "step": 46900 }, { "epoch": 0.37956145319200585, "grad_norm": 0.662098228931427, "learning_rate": 9.75646380970104e-06, "loss": 0.0287, "step": 46910 }, { "epoch": 0.3796423658872077, "grad_norm": 0.6304022073745728, "learning_rate": 9.756246080528907e-06, "loss": 0.042, "step": 46920 }, { "epoch": 0.37972327858240956, "grad_norm": 0.7310692667961121, "learning_rate": 9.756028256503487e-06, "loss": 0.0415, "step": 46930 }, { "epoch": 0.37980419127761145, "grad_norm": 0.5966075658798218, "learning_rate": 9.755810337629123e-06, "loss": 0.0511, "step": 46940 }, { "epoch": 0.37988510397281333, "grad_norm": 0.4106391668319702, "learning_rate": 9.755592323910163e-06, "loss": 0.0335, "step": 46950 }, { "epoch": 0.3799660166680152, "grad_norm": 0.2897142469882965, "learning_rate": 9.755374215350953e-06, "loss": 0.0376, "step": 46960 }, { "epoch": 0.3800469293632171, "grad_norm": 0.5878005623817444, "learning_rate": 9.755156011955844e-06, "loss": 0.0387, "step": 46970 }, { "epoch": 0.380127842058419, "grad_norm": 0.511223554611206, "learning_rate": 9.754937713729186e-06, "loss": 0.0268, "step": 46980 }, { "epoch": 0.38020875475362087, "grad_norm": 0.6954636573791504, "learning_rate": 9.754719320675334e-06, "loss": 0.0404, "step": 46990 }, { "epoch": 0.3802896674488227, "grad_norm": 0.38339322805404663, "learning_rate": 9.754500832798641e-06, "loss": 0.0369, "step": 47000 }, { "epoch": 0.3803705801440246, "grad_norm": 0.5646531581878662, "learning_rate": 9.754282250103469e-06, "loss": 0.0445, "step": 47010 }, { "epoch": 0.38045149283922647, "grad_norm": 0.7235516905784607, "learning_rate": 9.754063572594173e-06, "loss": 0.0444, "step": 47020 }, { "epoch": 0.38053240553442835, "grad_norm": 0.5409421324729919, "learning_rate": 9.753844800275114e-06, "loss": 0.026, "step": 47030 }, { "epoch": 0.38061331822963024, "grad_norm": 0.9422171115875244, "learning_rate": 9.753625933150656e-06, "loss": 0.0332, "step": 47040 }, { "epoch": 0.3806942309248321, "grad_norm": 0.8006743788719177, "learning_rate": 9.753406971225167e-06, "loss": 0.0279, "step": 47050 }, { "epoch": 0.380775143620034, "grad_norm": 0.5132269263267517, "learning_rate": 9.753187914503009e-06, "loss": 0.0336, "step": 47060 }, { "epoch": 0.38085605631523584, "grad_norm": 0.5590963363647461, "learning_rate": 9.752968762988553e-06, "loss": 0.0393, "step": 47070 }, { "epoch": 0.3809369690104377, "grad_norm": 0.6559487581253052, "learning_rate": 9.752749516686169e-06, "loss": 0.0369, "step": 47080 }, { "epoch": 0.3810178817056396, "grad_norm": 0.4270935356616974, "learning_rate": 9.752530175600229e-06, "loss": 0.0261, "step": 47090 }, { "epoch": 0.3810987944008415, "grad_norm": 0.9736343026161194, "learning_rate": 9.752310739735108e-06, "loss": 0.0363, "step": 47100 }, { "epoch": 0.3811797070960434, "grad_norm": 0.2175111323595047, "learning_rate": 9.752091209095182e-06, "loss": 0.051, "step": 47110 }, { "epoch": 0.38126061979124526, "grad_norm": 0.5900910496711731, "learning_rate": 9.751871583684828e-06, "loss": 0.0369, "step": 47120 }, { "epoch": 0.38134153248644714, "grad_norm": 0.23768427968025208, "learning_rate": 9.751651863508427e-06, "loss": 0.0308, "step": 47130 }, { "epoch": 0.38142244518164903, "grad_norm": 0.34297627210617065, "learning_rate": 9.751432048570362e-06, "loss": 0.0418, "step": 47140 }, { "epoch": 0.38150335787685086, "grad_norm": 0.511040449142456, "learning_rate": 9.751212138875014e-06, "loss": 0.0373, "step": 47150 }, { "epoch": 0.38158427057205274, "grad_norm": 0.612532377243042, "learning_rate": 9.750992134426772e-06, "loss": 0.0537, "step": 47160 }, { "epoch": 0.3816651832672546, "grad_norm": 0.3226439356803894, "learning_rate": 9.750772035230021e-06, "loss": 0.0459, "step": 47170 }, { "epoch": 0.3817460959624565, "grad_norm": 0.7277985215187073, "learning_rate": 9.750551841289151e-06, "loss": 0.0434, "step": 47180 }, { "epoch": 0.3818270086576584, "grad_norm": 0.4065394401550293, "learning_rate": 9.750331552608552e-06, "loss": 0.0444, "step": 47190 }, { "epoch": 0.3819079213528603, "grad_norm": 0.547980785369873, "learning_rate": 9.75011116919262e-06, "loss": 0.0468, "step": 47200 }, { "epoch": 0.38198883404806216, "grad_norm": 0.2999350130558014, "learning_rate": 9.749890691045749e-06, "loss": 0.0344, "step": 47210 }, { "epoch": 0.382069746743264, "grad_norm": 0.5103453397750854, "learning_rate": 9.749670118172333e-06, "loss": 0.0492, "step": 47220 }, { "epoch": 0.3821506594384659, "grad_norm": 0.710077702999115, "learning_rate": 9.749449450576778e-06, "loss": 0.0582, "step": 47230 }, { "epoch": 0.38223157213366776, "grad_norm": 0.5831964612007141, "learning_rate": 9.749228688263477e-06, "loss": 0.0407, "step": 47240 }, { "epoch": 0.38231248482886965, "grad_norm": 0.44781479239463806, "learning_rate": 9.749007831236837e-06, "loss": 0.0496, "step": 47250 }, { "epoch": 0.38239339752407153, "grad_norm": 0.4486709535121918, "learning_rate": 9.748786879501259e-06, "loss": 0.0408, "step": 47260 }, { "epoch": 0.3824743102192734, "grad_norm": 0.626071035861969, "learning_rate": 9.748565833061155e-06, "loss": 0.0302, "step": 47270 }, { "epoch": 0.3825552229144753, "grad_norm": 1.4710986614227295, "learning_rate": 9.74834469192093e-06, "loss": 0.0486, "step": 47280 }, { "epoch": 0.3826361356096772, "grad_norm": 0.2071094661951065, "learning_rate": 9.748123456084992e-06, "loss": 0.0454, "step": 47290 }, { "epoch": 0.382717048304879, "grad_norm": 1.5914306640625, "learning_rate": 9.747902125557758e-06, "loss": 0.0501, "step": 47300 }, { "epoch": 0.3827979610000809, "grad_norm": 0.36626172065734863, "learning_rate": 9.747680700343638e-06, "loss": 0.0289, "step": 47310 }, { "epoch": 0.3828788736952828, "grad_norm": 1.0394335985183716, "learning_rate": 9.747459180447048e-06, "loss": 0.0323, "step": 47320 }, { "epoch": 0.38295978639048467, "grad_norm": 0.4625608026981354, "learning_rate": 9.747237565872409e-06, "loss": 0.0438, "step": 47330 }, { "epoch": 0.38304069908568655, "grad_norm": 0.8751316666603088, "learning_rate": 9.747015856624138e-06, "loss": 0.0424, "step": 47340 }, { "epoch": 0.38312161178088844, "grad_norm": 0.8627206087112427, "learning_rate": 9.746794052706655e-06, "loss": 0.0684, "step": 47350 }, { "epoch": 0.3832025244760903, "grad_norm": 0.7601197957992554, "learning_rate": 9.74657215412439e-06, "loss": 0.0264, "step": 47360 }, { "epoch": 0.38328343717129215, "grad_norm": 0.10487248748540878, "learning_rate": 9.74635016088176e-06, "loss": 0.0483, "step": 47370 }, { "epoch": 0.38336434986649404, "grad_norm": 1.2440379858016968, "learning_rate": 9.7461280729832e-06, "loss": 0.0683, "step": 47380 }, { "epoch": 0.3834452625616959, "grad_norm": 0.6658764481544495, "learning_rate": 9.745905890433131e-06, "loss": 0.0468, "step": 47390 }, { "epoch": 0.3835261752568978, "grad_norm": 0.6114987730979919, "learning_rate": 9.74568361323599e-06, "loss": 0.0402, "step": 47400 }, { "epoch": 0.3836070879520997, "grad_norm": 0.2210923284292221, "learning_rate": 9.745461241396206e-06, "loss": 0.0409, "step": 47410 }, { "epoch": 0.3836880006473016, "grad_norm": 0.290328711271286, "learning_rate": 9.745238774918217e-06, "loss": 0.0423, "step": 47420 }, { "epoch": 0.38376891334250346, "grad_norm": 0.791792631149292, "learning_rate": 9.74501621380646e-06, "loss": 0.0703, "step": 47430 }, { "epoch": 0.3838498260377053, "grad_norm": 0.34014856815338135, "learning_rate": 9.74479355806537e-06, "loss": 0.0438, "step": 47440 }, { "epoch": 0.3839307387329072, "grad_norm": 0.4167328476905823, "learning_rate": 9.74457080769939e-06, "loss": 0.0524, "step": 47450 }, { "epoch": 0.38401165142810906, "grad_norm": 0.4292319715023041, "learning_rate": 9.74434796271296e-06, "loss": 0.0461, "step": 47460 }, { "epoch": 0.38409256412331094, "grad_norm": 0.43614643812179565, "learning_rate": 9.744125023110527e-06, "loss": 0.0364, "step": 47470 }, { "epoch": 0.3841734768185128, "grad_norm": 0.6911159753799438, "learning_rate": 9.743901988896535e-06, "loss": 0.0474, "step": 47480 }, { "epoch": 0.3842543895137147, "grad_norm": 0.6363247632980347, "learning_rate": 9.743678860075431e-06, "loss": 0.0339, "step": 47490 }, { "epoch": 0.3843353022089166, "grad_norm": 0.4014727473258972, "learning_rate": 9.74345563665167e-06, "loss": 0.0342, "step": 47500 }, { "epoch": 0.3844162149041185, "grad_norm": 0.6174903512001038, "learning_rate": 9.743232318629698e-06, "loss": 0.0424, "step": 47510 }, { "epoch": 0.3844971275993203, "grad_norm": 0.8814364671707153, "learning_rate": 9.74300890601397e-06, "loss": 0.038, "step": 47520 }, { "epoch": 0.3845780402945222, "grad_norm": 0.7148314714431763, "learning_rate": 9.742785398808944e-06, "loss": 0.0232, "step": 47530 }, { "epoch": 0.3846589529897241, "grad_norm": 0.5420193076133728, "learning_rate": 9.742561797019076e-06, "loss": 0.0321, "step": 47540 }, { "epoch": 0.38473986568492596, "grad_norm": 0.47737351059913635, "learning_rate": 9.742338100648824e-06, "loss": 0.0423, "step": 47550 }, { "epoch": 0.38482077838012785, "grad_norm": 0.43671301007270813, "learning_rate": 9.742114309702652e-06, "loss": 0.0253, "step": 47560 }, { "epoch": 0.38490169107532973, "grad_norm": 0.4600822329521179, "learning_rate": 9.741890424185018e-06, "loss": 0.046, "step": 47570 }, { "epoch": 0.3849826037705316, "grad_norm": 0.8562972545623779, "learning_rate": 9.741666444100393e-06, "loss": 0.0258, "step": 47580 }, { "epoch": 0.38506351646573345, "grad_norm": 0.5220734477043152, "learning_rate": 9.74144236945324e-06, "loss": 0.0275, "step": 47590 }, { "epoch": 0.38514442916093533, "grad_norm": 0.5177832841873169, "learning_rate": 9.741218200248028e-06, "loss": 0.0446, "step": 47600 }, { "epoch": 0.3852253418561372, "grad_norm": 0.37767523527145386, "learning_rate": 9.740993936489228e-06, "loss": 0.0329, "step": 47610 }, { "epoch": 0.3853062545513391, "grad_norm": 0.2981983721256256, "learning_rate": 9.740769578181312e-06, "loss": 0.0385, "step": 47620 }, { "epoch": 0.385387167246541, "grad_norm": 0.5907979011535645, "learning_rate": 9.740545125328756e-06, "loss": 0.0326, "step": 47630 }, { "epoch": 0.38546807994174287, "grad_norm": 0.8509476184844971, "learning_rate": 9.740320577936035e-06, "loss": 0.0369, "step": 47640 }, { "epoch": 0.38554899263694475, "grad_norm": 0.5103214979171753, "learning_rate": 9.740095936007627e-06, "loss": 0.0311, "step": 47650 }, { "epoch": 0.38562990533214664, "grad_norm": 0.37992429733276367, "learning_rate": 9.739871199548013e-06, "loss": 0.0306, "step": 47660 }, { "epoch": 0.38571081802734847, "grad_norm": 1.2596896886825562, "learning_rate": 9.739646368561676e-06, "loss": 0.0545, "step": 47670 }, { "epoch": 0.38579173072255035, "grad_norm": 0.32346951961517334, "learning_rate": 9.739421443053095e-06, "loss": 0.0346, "step": 47680 }, { "epoch": 0.38587264341775224, "grad_norm": 0.670630693435669, "learning_rate": 9.73919642302676e-06, "loss": 0.0518, "step": 47690 }, { "epoch": 0.3859535561129541, "grad_norm": 0.9466882944107056, "learning_rate": 9.738971308487157e-06, "loss": 0.0475, "step": 47700 }, { "epoch": 0.386034468808156, "grad_norm": 0.6183547377586365, "learning_rate": 9.738746099438774e-06, "loss": 0.0451, "step": 47710 }, { "epoch": 0.3861153815033579, "grad_norm": 0.3638216257095337, "learning_rate": 9.738520795886106e-06, "loss": 0.0292, "step": 47720 }, { "epoch": 0.3861962941985598, "grad_norm": 0.3381003141403198, "learning_rate": 9.738295397833644e-06, "loss": 0.0511, "step": 47730 }, { "epoch": 0.3862772068937616, "grad_norm": 0.27810242772102356, "learning_rate": 9.738069905285882e-06, "loss": 0.026, "step": 47740 }, { "epoch": 0.3863581195889635, "grad_norm": 0.6883874535560608, "learning_rate": 9.737844318247321e-06, "loss": 0.0667, "step": 47750 }, { "epoch": 0.3864390322841654, "grad_norm": 0.5411242246627808, "learning_rate": 9.737618636722455e-06, "loss": 0.0316, "step": 47760 }, { "epoch": 0.38651994497936726, "grad_norm": 0.3617135286331177, "learning_rate": 9.737392860715786e-06, "loss": 0.024, "step": 47770 }, { "epoch": 0.38660085767456914, "grad_norm": 0.45003339648246765, "learning_rate": 9.737166990231818e-06, "loss": 0.05, "step": 47780 }, { "epoch": 0.386681770369771, "grad_norm": 0.4467886686325073, "learning_rate": 9.736941025275057e-06, "loss": 0.044, "step": 47790 }, { "epoch": 0.3867626830649729, "grad_norm": 0.42771050333976746, "learning_rate": 9.736714965850005e-06, "loss": 0.0297, "step": 47800 }, { "epoch": 0.3868435957601748, "grad_norm": 0.561642050743103, "learning_rate": 9.736488811961175e-06, "loss": 0.0348, "step": 47810 }, { "epoch": 0.3869245084553766, "grad_norm": 0.516230583190918, "learning_rate": 9.736262563613073e-06, "loss": 0.0445, "step": 47820 }, { "epoch": 0.3870054211505785, "grad_norm": 0.3972926735877991, "learning_rate": 9.736036220810215e-06, "loss": 0.0253, "step": 47830 }, { "epoch": 0.3870863338457804, "grad_norm": 0.7367977499961853, "learning_rate": 9.73580978355711e-06, "loss": 0.0318, "step": 47840 }, { "epoch": 0.3871672465409823, "grad_norm": 0.5332672595977783, "learning_rate": 9.735583251858278e-06, "loss": 0.0295, "step": 47850 }, { "epoch": 0.38724815923618416, "grad_norm": 0.5073809027671814, "learning_rate": 9.735356625718235e-06, "loss": 0.0497, "step": 47860 }, { "epoch": 0.38732907193138605, "grad_norm": 0.4585047662258148, "learning_rate": 9.7351299051415e-06, "loss": 0.0244, "step": 47870 }, { "epoch": 0.38740998462658793, "grad_norm": 0.42354443669319153, "learning_rate": 9.734903090132597e-06, "loss": 0.0472, "step": 47880 }, { "epoch": 0.38749089732178976, "grad_norm": 0.5798667669296265, "learning_rate": 9.734676180696047e-06, "loss": 0.0325, "step": 47890 }, { "epoch": 0.38757181001699165, "grad_norm": 0.7026355266571045, "learning_rate": 9.734449176836376e-06, "loss": 0.0348, "step": 47900 }, { "epoch": 0.38765272271219353, "grad_norm": 0.5428379774093628, "learning_rate": 9.73422207855811e-06, "loss": 0.0429, "step": 47910 }, { "epoch": 0.3877336354073954, "grad_norm": 0.4241684377193451, "learning_rate": 9.733994885865781e-06, "loss": 0.0373, "step": 47920 }, { "epoch": 0.3878145481025973, "grad_norm": 0.482317715883255, "learning_rate": 9.733767598763917e-06, "loss": 0.0644, "step": 47930 }, { "epoch": 0.3878954607977992, "grad_norm": 0.9778103828430176, "learning_rate": 9.733540217257051e-06, "loss": 0.0395, "step": 47940 }, { "epoch": 0.38797637349300107, "grad_norm": 0.8095561265945435, "learning_rate": 9.733312741349719e-06, "loss": 0.0412, "step": 47950 }, { "epoch": 0.38805728618820295, "grad_norm": 0.48018792271614075, "learning_rate": 9.733085171046456e-06, "loss": 0.0546, "step": 47960 }, { "epoch": 0.3881381988834048, "grad_norm": 0.45418205857276917, "learning_rate": 9.732857506351804e-06, "loss": 0.0526, "step": 47970 }, { "epoch": 0.38821911157860667, "grad_norm": 0.6056337356567383, "learning_rate": 9.732629747270296e-06, "loss": 0.0363, "step": 47980 }, { "epoch": 0.38830002427380855, "grad_norm": 0.6248874068260193, "learning_rate": 9.732401893806482e-06, "loss": 0.0377, "step": 47990 }, { "epoch": 0.38838093696901044, "grad_norm": 0.582534670829773, "learning_rate": 9.732173945964902e-06, "loss": 0.0334, "step": 48000 }, { "epoch": 0.3884618496642123, "grad_norm": 0.7501192092895508, "learning_rate": 9.731945903750101e-06, "loss": 0.0301, "step": 48010 }, { "epoch": 0.3885427623594142, "grad_norm": 1.2369638681411743, "learning_rate": 9.731717767166633e-06, "loss": 0.0488, "step": 48020 }, { "epoch": 0.3886236750546161, "grad_norm": 0.7403286695480347, "learning_rate": 9.731489536219039e-06, "loss": 0.0398, "step": 48030 }, { "epoch": 0.3887045877498179, "grad_norm": 0.24360868334770203, "learning_rate": 9.731261210911876e-06, "loss": 0.0484, "step": 48040 }, { "epoch": 0.3887855004450198, "grad_norm": 0.37851113080978394, "learning_rate": 9.731032791249697e-06, "loss": 0.0423, "step": 48050 }, { "epoch": 0.3888664131402217, "grad_norm": 0.33603665232658386, "learning_rate": 9.730804277237056e-06, "loss": 0.032, "step": 48060 }, { "epoch": 0.3889473258354236, "grad_norm": 0.679074764251709, "learning_rate": 9.730575668878511e-06, "loss": 0.036, "step": 48070 }, { "epoch": 0.38902823853062546, "grad_norm": 0.5625168085098267, "learning_rate": 9.73034696617862e-06, "loss": 0.028, "step": 48080 }, { "epoch": 0.38910915122582734, "grad_norm": 0.7918634414672852, "learning_rate": 9.730118169141947e-06, "loss": 0.0482, "step": 48090 }, { "epoch": 0.38919006392102923, "grad_norm": 0.5372234582901001, "learning_rate": 9.729889277773052e-06, "loss": 0.0388, "step": 48100 }, { "epoch": 0.3892709766162311, "grad_norm": 0.547116756439209, "learning_rate": 9.729660292076501e-06, "loss": 0.0454, "step": 48110 }, { "epoch": 0.38935188931143294, "grad_norm": 0.4647389352321625, "learning_rate": 9.729431212056858e-06, "loss": 0.0306, "step": 48120 }, { "epoch": 0.3894328020066348, "grad_norm": 0.7818781733512878, "learning_rate": 9.729202037718696e-06, "loss": 0.0392, "step": 48130 }, { "epoch": 0.3895137147018367, "grad_norm": 0.5347240567207336, "learning_rate": 9.728972769066583e-06, "loss": 0.0384, "step": 48140 }, { "epoch": 0.3895946273970386, "grad_norm": 0.7241188883781433, "learning_rate": 9.728743406105091e-06, "loss": 0.0399, "step": 48150 }, { "epoch": 0.3896755400922405, "grad_norm": 0.8550787568092346, "learning_rate": 9.728513948838793e-06, "loss": 0.0294, "step": 48160 }, { "epoch": 0.38975645278744236, "grad_norm": 0.7776123285293579, "learning_rate": 9.72828439727227e-06, "loss": 0.0385, "step": 48170 }, { "epoch": 0.38983736548264425, "grad_norm": 0.6398157477378845, "learning_rate": 9.728054751410093e-06, "loss": 0.0475, "step": 48180 }, { "epoch": 0.3899182781778461, "grad_norm": 0.626410186290741, "learning_rate": 9.727825011256848e-06, "loss": 0.0386, "step": 48190 }, { "epoch": 0.38999919087304796, "grad_norm": 0.9487807154655457, "learning_rate": 9.727595176817112e-06, "loss": 0.0342, "step": 48200 }, { "epoch": 0.39008010356824985, "grad_norm": 0.24390634894371033, "learning_rate": 9.727365248095473e-06, "loss": 0.0362, "step": 48210 }, { "epoch": 0.39016101626345173, "grad_norm": 0.23508159816265106, "learning_rate": 9.727135225096513e-06, "loss": 0.0425, "step": 48220 }, { "epoch": 0.3902419289586536, "grad_norm": 0.5012610554695129, "learning_rate": 9.72690510782482e-06, "loss": 0.0415, "step": 48230 }, { "epoch": 0.3903228416538555, "grad_norm": 0.5400153398513794, "learning_rate": 9.726674896284984e-06, "loss": 0.0335, "step": 48240 }, { "epoch": 0.3904037543490574, "grad_norm": 0.8489029407501221, "learning_rate": 9.726444590481595e-06, "loss": 0.0503, "step": 48250 }, { "epoch": 0.39048466704425927, "grad_norm": 0.5431334376335144, "learning_rate": 9.726214190419248e-06, "loss": 0.0454, "step": 48260 }, { "epoch": 0.3905655797394611, "grad_norm": 0.9147680997848511, "learning_rate": 9.725983696102535e-06, "loss": 0.0439, "step": 48270 }, { "epoch": 0.390646492434663, "grad_norm": 0.6114646196365356, "learning_rate": 9.725753107536053e-06, "loss": 0.0396, "step": 48280 }, { "epoch": 0.39072740512986487, "grad_norm": 0.43162932991981506, "learning_rate": 9.725522424724404e-06, "loss": 0.0418, "step": 48290 }, { "epoch": 0.39080831782506675, "grad_norm": 0.6264533996582031, "learning_rate": 9.725291647672186e-06, "loss": 0.0515, "step": 48300 }, { "epoch": 0.39088923052026864, "grad_norm": 0.9423971772193909, "learning_rate": 9.725060776384001e-06, "loss": 0.0479, "step": 48310 }, { "epoch": 0.3909701432154705, "grad_norm": 0.29237237572669983, "learning_rate": 9.724829810864454e-06, "loss": 0.0458, "step": 48320 }, { "epoch": 0.3910510559106724, "grad_norm": 0.42550450563430786, "learning_rate": 9.724598751118151e-06, "loss": 0.0421, "step": 48330 }, { "epoch": 0.39113196860587424, "grad_norm": 0.7676635980606079, "learning_rate": 9.7243675971497e-06, "loss": 0.0439, "step": 48340 }, { "epoch": 0.3912128813010761, "grad_norm": 0.4099090099334717, "learning_rate": 9.72413634896371e-06, "loss": 0.0438, "step": 48350 }, { "epoch": 0.391293793996278, "grad_norm": 0.9590673446655273, "learning_rate": 9.723905006564796e-06, "loss": 0.0489, "step": 48360 }, { "epoch": 0.3913747066914799, "grad_norm": 0.5521366000175476, "learning_rate": 9.723673569957566e-06, "loss": 0.025, "step": 48370 }, { "epoch": 0.3914556193866818, "grad_norm": 0.1884339600801468, "learning_rate": 9.723442039146641e-06, "loss": 0.0406, "step": 48380 }, { "epoch": 0.39153653208188366, "grad_norm": 0.19927869737148285, "learning_rate": 9.723210414136637e-06, "loss": 0.0519, "step": 48390 }, { "epoch": 0.39161744477708554, "grad_norm": 0.9910508990287781, "learning_rate": 9.72297869493217e-06, "loss": 0.0361, "step": 48400 }, { "epoch": 0.39169835747228743, "grad_norm": 0.4130144417285919, "learning_rate": 9.722746881537864e-06, "loss": 0.0266, "step": 48410 }, { "epoch": 0.39177927016748926, "grad_norm": 0.7712112665176392, "learning_rate": 9.722514973958343e-06, "loss": 0.0454, "step": 48420 }, { "epoch": 0.39186018286269114, "grad_norm": 0.5446542501449585, "learning_rate": 9.722282972198229e-06, "loss": 0.0362, "step": 48430 }, { "epoch": 0.391941095557893, "grad_norm": 0.3795224726200104, "learning_rate": 9.722050876262153e-06, "loss": 0.0378, "step": 48440 }, { "epoch": 0.3920220082530949, "grad_norm": 0.5963651537895203, "learning_rate": 9.721818686154738e-06, "loss": 0.049, "step": 48450 }, { "epoch": 0.3921029209482968, "grad_norm": 0.6108731031417847, "learning_rate": 9.721586401880618e-06, "loss": 0.0424, "step": 48460 }, { "epoch": 0.3921838336434987, "grad_norm": 0.9787188768386841, "learning_rate": 9.721354023444426e-06, "loss": 0.0398, "step": 48470 }, { "epoch": 0.39226474633870057, "grad_norm": 1.5819611549377441, "learning_rate": 9.721121550850794e-06, "loss": 0.0393, "step": 48480 }, { "epoch": 0.3923456590339024, "grad_norm": 0.30362528562545776, "learning_rate": 9.720888984104361e-06, "loss": 0.0584, "step": 48490 }, { "epoch": 0.3924265717291043, "grad_norm": 0.389544278383255, "learning_rate": 9.720656323209763e-06, "loss": 0.0445, "step": 48500 }, { "epoch": 0.39250748442430616, "grad_norm": 0.6947004795074463, "learning_rate": 9.72042356817164e-06, "loss": 0.0587, "step": 48510 }, { "epoch": 0.39258839711950805, "grad_norm": 0.13397316634655, "learning_rate": 9.720190718994634e-06, "loss": 0.0294, "step": 48520 }, { "epoch": 0.39266930981470993, "grad_norm": 1.0400723218917847, "learning_rate": 9.719957775683389e-06, "loss": 0.0507, "step": 48530 }, { "epoch": 0.3927502225099118, "grad_norm": 1.021359920501709, "learning_rate": 9.71972473824255e-06, "loss": 0.0392, "step": 48540 }, { "epoch": 0.3928311352051137, "grad_norm": 0.5467314720153809, "learning_rate": 9.719491606676765e-06, "loss": 0.0663, "step": 48550 }, { "epoch": 0.3929120479003156, "grad_norm": 0.5585345029830933, "learning_rate": 9.719258380990683e-06, "loss": 0.0305, "step": 48560 }, { "epoch": 0.3929929605955174, "grad_norm": 0.3963778614997864, "learning_rate": 9.719025061188955e-06, "loss": 0.0387, "step": 48570 }, { "epoch": 0.3930738732907193, "grad_norm": 0.5761810541152954, "learning_rate": 9.718791647276237e-06, "loss": 0.0322, "step": 48580 }, { "epoch": 0.3931547859859212, "grad_norm": 0.29839032888412476, "learning_rate": 9.718558139257179e-06, "loss": 0.0343, "step": 48590 }, { "epoch": 0.39323569868112307, "grad_norm": 0.34603986144065857, "learning_rate": 9.71832453713644e-06, "loss": 0.0276, "step": 48600 }, { "epoch": 0.39331661137632495, "grad_norm": 0.7962116003036499, "learning_rate": 9.718090840918681e-06, "loss": 0.0463, "step": 48610 }, { "epoch": 0.39339752407152684, "grad_norm": 0.8787714838981628, "learning_rate": 9.71785705060856e-06, "loss": 0.0396, "step": 48620 }, { "epoch": 0.3934784367667287, "grad_norm": 0.46248215436935425, "learning_rate": 9.717623166210739e-06, "loss": 0.0382, "step": 48630 }, { "epoch": 0.39355934946193055, "grad_norm": 0.6480649709701538, "learning_rate": 9.717389187729885e-06, "loss": 0.0294, "step": 48640 }, { "epoch": 0.39364026215713244, "grad_norm": 0.511698305606842, "learning_rate": 9.71715511517066e-06, "loss": 0.0414, "step": 48650 }, { "epoch": 0.3937211748523343, "grad_norm": 0.6573342680931091, "learning_rate": 9.716920948537737e-06, "loss": 0.0547, "step": 48660 }, { "epoch": 0.3938020875475362, "grad_norm": 0.5147284269332886, "learning_rate": 9.716686687835781e-06, "loss": 0.028, "step": 48670 }, { "epoch": 0.3938830002427381, "grad_norm": 0.7274599671363831, "learning_rate": 9.716452333069468e-06, "loss": 0.0462, "step": 48680 }, { "epoch": 0.39396391293794, "grad_norm": 0.474578857421875, "learning_rate": 9.716217884243469e-06, "loss": 0.0325, "step": 48690 }, { "epoch": 0.39404482563314186, "grad_norm": 0.8642107844352722, "learning_rate": 9.715983341362463e-06, "loss": 0.0506, "step": 48700 }, { "epoch": 0.39412573832834374, "grad_norm": 0.6200789213180542, "learning_rate": 9.715748704431122e-06, "loss": 0.0367, "step": 48710 }, { "epoch": 0.3942066510235456, "grad_norm": 0.37280064821243286, "learning_rate": 9.715513973454132e-06, "loss": 0.0337, "step": 48720 }, { "epoch": 0.39428756371874746, "grad_norm": 0.5708590149879456, "learning_rate": 9.715279148436167e-06, "loss": 0.0421, "step": 48730 }, { "epoch": 0.39436847641394934, "grad_norm": 0.5135555863380432, "learning_rate": 9.715044229381916e-06, "loss": 0.0335, "step": 48740 }, { "epoch": 0.3944493891091512, "grad_norm": 0.8146741986274719, "learning_rate": 9.714809216296062e-06, "loss": 0.0426, "step": 48750 }, { "epoch": 0.3945303018043531, "grad_norm": 0.6338770389556885, "learning_rate": 9.71457410918329e-06, "loss": 0.0252, "step": 48760 }, { "epoch": 0.394611214499555, "grad_norm": 0.4087561070919037, "learning_rate": 9.714338908048291e-06, "loss": 0.0498, "step": 48770 }, { "epoch": 0.3946921271947569, "grad_norm": 0.47548505663871765, "learning_rate": 9.714103612895755e-06, "loss": 0.0694, "step": 48780 }, { "epoch": 0.3947730398899587, "grad_norm": 0.48746538162231445, "learning_rate": 9.713868223730374e-06, "loss": 0.0428, "step": 48790 }, { "epoch": 0.3948539525851606, "grad_norm": 0.5500463843345642, "learning_rate": 9.713632740556844e-06, "loss": 0.0295, "step": 48800 }, { "epoch": 0.3949348652803625, "grad_norm": 0.4681827127933502, "learning_rate": 9.713397163379858e-06, "loss": 0.0362, "step": 48810 }, { "epoch": 0.39501577797556436, "grad_norm": 0.4055086672306061, "learning_rate": 9.713161492204117e-06, "loss": 0.0262, "step": 48820 }, { "epoch": 0.39509669067076625, "grad_norm": 1.0265663862228394, "learning_rate": 9.712925727034321e-06, "loss": 0.0439, "step": 48830 }, { "epoch": 0.39517760336596813, "grad_norm": 0.5679488182067871, "learning_rate": 9.71268986787517e-06, "loss": 0.0349, "step": 48840 }, { "epoch": 0.39525851606117, "grad_norm": 0.3622559607028961, "learning_rate": 9.712453914731368e-06, "loss": 0.0375, "step": 48850 }, { "epoch": 0.3953394287563719, "grad_norm": 0.6143046617507935, "learning_rate": 9.712217867607621e-06, "loss": 0.0465, "step": 48860 }, { "epoch": 0.39542034145157373, "grad_norm": 0.6770302653312683, "learning_rate": 9.711981726508637e-06, "loss": 0.0274, "step": 48870 }, { "epoch": 0.3955012541467756, "grad_norm": 0.2018502801656723, "learning_rate": 9.711745491439124e-06, "loss": 0.0354, "step": 48880 }, { "epoch": 0.3955821668419775, "grad_norm": 0.5802775621414185, "learning_rate": 9.711509162403797e-06, "loss": 0.0384, "step": 48890 }, { "epoch": 0.3956630795371794, "grad_norm": 0.43552690744400024, "learning_rate": 9.711272739407362e-06, "loss": 0.0394, "step": 48900 }, { "epoch": 0.39574399223238127, "grad_norm": 0.6216170191764832, "learning_rate": 9.711036222454543e-06, "loss": 0.0326, "step": 48910 }, { "epoch": 0.39582490492758315, "grad_norm": 0.4193810224533081, "learning_rate": 9.710799611550049e-06, "loss": 0.0542, "step": 48920 }, { "epoch": 0.39590581762278504, "grad_norm": 0.007969016209244728, "learning_rate": 9.710562906698601e-06, "loss": 0.0429, "step": 48930 }, { "epoch": 0.39598673031798687, "grad_norm": 0.48300421237945557, "learning_rate": 9.710326107904922e-06, "loss": 0.0481, "step": 48940 }, { "epoch": 0.39606764301318875, "grad_norm": 0.37068963050842285, "learning_rate": 9.71008921517373e-06, "loss": 0.0528, "step": 48950 }, { "epoch": 0.39614855570839064, "grad_norm": 0.541578471660614, "learning_rate": 9.709852228509755e-06, "loss": 0.0512, "step": 48960 }, { "epoch": 0.3962294684035925, "grad_norm": 0.545926034450531, "learning_rate": 9.70961514791772e-06, "loss": 0.0365, "step": 48970 }, { "epoch": 0.3963103810987944, "grad_norm": 0.8095763325691223, "learning_rate": 9.709377973402353e-06, "loss": 0.0442, "step": 48980 }, { "epoch": 0.3963912937939963, "grad_norm": 0.8887137770652771, "learning_rate": 9.709140704968382e-06, "loss": 0.0284, "step": 48990 }, { "epoch": 0.3964722064891982, "grad_norm": 0.2398955076932907, "learning_rate": 9.708903342620544e-06, "loss": 0.0277, "step": 49000 }, { "epoch": 0.3965531191844, "grad_norm": 0.6085145473480225, "learning_rate": 9.708665886363569e-06, "loss": 0.0429, "step": 49010 }, { "epoch": 0.3966340318796019, "grad_norm": 0.6013840436935425, "learning_rate": 9.708428336202191e-06, "loss": 0.042, "step": 49020 }, { "epoch": 0.3967149445748038, "grad_norm": 0.5446595549583435, "learning_rate": 9.708190692141152e-06, "loss": 0.0302, "step": 49030 }, { "epoch": 0.39679585727000566, "grad_norm": 0.34380409121513367, "learning_rate": 9.70795295418519e-06, "loss": 0.0425, "step": 49040 }, { "epoch": 0.39687676996520754, "grad_norm": 0.4204323887825012, "learning_rate": 9.707715122339043e-06, "loss": 0.0371, "step": 49050 }, { "epoch": 0.3969576826604094, "grad_norm": 0.37670212984085083, "learning_rate": 9.707477196607457e-06, "loss": 0.0578, "step": 49060 }, { "epoch": 0.3970385953556113, "grad_norm": 0.31818336248397827, "learning_rate": 9.707239176995177e-06, "loss": 0.0282, "step": 49070 }, { "epoch": 0.3971195080508132, "grad_norm": 0.4266763925552368, "learning_rate": 9.707001063506949e-06, "loss": 0.0555, "step": 49080 }, { "epoch": 0.397200420746015, "grad_norm": 0.5747929811477661, "learning_rate": 9.706762856147523e-06, "loss": 0.0506, "step": 49090 }, { "epoch": 0.3972813334412169, "grad_norm": 0.3112289309501648, "learning_rate": 9.706524554921646e-06, "loss": 0.0468, "step": 49100 }, { "epoch": 0.3973622461364188, "grad_norm": 0.5390130877494812, "learning_rate": 9.706286159834073e-06, "loss": 0.0318, "step": 49110 }, { "epoch": 0.3974431588316207, "grad_norm": 0.5734144449234009, "learning_rate": 9.706047670889558e-06, "loss": 0.0412, "step": 49120 }, { "epoch": 0.39752407152682256, "grad_norm": 0.5598648190498352, "learning_rate": 9.705809088092857e-06, "loss": 0.028, "step": 49130 }, { "epoch": 0.39760498422202445, "grad_norm": 0.6641300320625305, "learning_rate": 9.705570411448728e-06, "loss": 0.0391, "step": 49140 }, { "epoch": 0.39768589691722633, "grad_norm": 1.0562028884887695, "learning_rate": 9.705331640961932e-06, "loss": 0.0437, "step": 49150 }, { "epoch": 0.39776680961242816, "grad_norm": 0.8395108580589294, "learning_rate": 9.705092776637231e-06, "loss": 0.0503, "step": 49160 }, { "epoch": 0.39784772230763005, "grad_norm": 0.7847106456756592, "learning_rate": 9.704853818479384e-06, "loss": 0.0325, "step": 49170 }, { "epoch": 0.39792863500283193, "grad_norm": 0.5872581601142883, "learning_rate": 9.704614766493162e-06, "loss": 0.0434, "step": 49180 }, { "epoch": 0.3980095476980338, "grad_norm": 0.37175288796424866, "learning_rate": 9.704375620683331e-06, "loss": 0.0518, "step": 49190 }, { "epoch": 0.3980904603932357, "grad_norm": 0.4490358829498291, "learning_rate": 9.704136381054659e-06, "loss": 0.0325, "step": 49200 }, { "epoch": 0.3981713730884376, "grad_norm": 0.492434561252594, "learning_rate": 9.703897047611917e-06, "loss": 0.0452, "step": 49210 }, { "epoch": 0.39825228578363947, "grad_norm": 0.39666858315467834, "learning_rate": 9.703657620359879e-06, "loss": 0.0326, "step": 49220 }, { "epoch": 0.39833319847884135, "grad_norm": 0.2746184170246124, "learning_rate": 9.70341809930332e-06, "loss": 0.0361, "step": 49230 }, { "epoch": 0.3984141111740432, "grad_norm": 0.6387309432029724, "learning_rate": 9.703178484447017e-06, "loss": 0.0509, "step": 49240 }, { "epoch": 0.39849502386924507, "grad_norm": 0.4553670585155487, "learning_rate": 9.702938775795747e-06, "loss": 0.0353, "step": 49250 }, { "epoch": 0.39857593656444695, "grad_norm": 0.6924221515655518, "learning_rate": 9.702698973354292e-06, "loss": 0.0355, "step": 49260 }, { "epoch": 0.39865684925964884, "grad_norm": 0.4236659109592438, "learning_rate": 9.702459077127432e-06, "loss": 0.048, "step": 49270 }, { "epoch": 0.3987377619548507, "grad_norm": 0.3306962847709656, "learning_rate": 9.702219087119955e-06, "loss": 0.0377, "step": 49280 }, { "epoch": 0.3988186746500526, "grad_norm": 0.557625949382782, "learning_rate": 9.701979003336646e-06, "loss": 0.0504, "step": 49290 }, { "epoch": 0.3988995873452545, "grad_norm": 0.420340895652771, "learning_rate": 9.70173882578229e-06, "loss": 0.0543, "step": 49300 }, { "epoch": 0.3989805000404563, "grad_norm": 0.4664018452167511, "learning_rate": 9.701498554461679e-06, "loss": 0.0516, "step": 49310 }, { "epoch": 0.3990614127356582, "grad_norm": 0.35493841767311096, "learning_rate": 9.701258189379606e-06, "loss": 0.0457, "step": 49320 }, { "epoch": 0.3991423254308601, "grad_norm": 0.6063479781150818, "learning_rate": 9.701017730540862e-06, "loss": 0.0365, "step": 49330 }, { "epoch": 0.399223238126062, "grad_norm": 0.7927623391151428, "learning_rate": 9.700777177950244e-06, "loss": 0.0427, "step": 49340 }, { "epoch": 0.39930415082126386, "grad_norm": 0.5806024670600891, "learning_rate": 9.70053653161255e-06, "loss": 0.0367, "step": 49350 }, { "epoch": 0.39938506351646574, "grad_norm": 0.6438177824020386, "learning_rate": 9.700295791532578e-06, "loss": 0.0293, "step": 49360 }, { "epoch": 0.39946597621166763, "grad_norm": 0.38820329308509827, "learning_rate": 9.700054957715128e-06, "loss": 0.0227, "step": 49370 }, { "epoch": 0.3995468889068695, "grad_norm": 0.469935804605484, "learning_rate": 9.699814030165005e-06, "loss": 0.0523, "step": 49380 }, { "epoch": 0.39962780160207134, "grad_norm": 0.8068033456802368, "learning_rate": 9.699573008887013e-06, "loss": 0.037, "step": 49390 }, { "epoch": 0.3997087142972732, "grad_norm": 0.8099637627601624, "learning_rate": 9.699331893885957e-06, "loss": 0.0404, "step": 49400 }, { "epoch": 0.3997896269924751, "grad_norm": 0.3888148069381714, "learning_rate": 9.69909068516665e-06, "loss": 0.024, "step": 49410 }, { "epoch": 0.399870539687677, "grad_norm": 0.5155788064002991, "learning_rate": 9.698849382733897e-06, "loss": 0.0405, "step": 49420 }, { "epoch": 0.3999514523828789, "grad_norm": 0.5842757821083069, "learning_rate": 9.698607986592513e-06, "loss": 0.0678, "step": 49430 }, { "epoch": 0.40003236507808076, "grad_norm": 0.5522404909133911, "learning_rate": 9.698366496747314e-06, "loss": 0.0336, "step": 49440 }, { "epoch": 0.40011327777328265, "grad_norm": 0.4134119153022766, "learning_rate": 9.69812491320311e-06, "loss": 0.0231, "step": 49450 }, { "epoch": 0.4001941904684845, "grad_norm": 0.33913248777389526, "learning_rate": 9.697883235964727e-06, "loss": 0.0543, "step": 49460 }, { "epoch": 0.40027510316368636, "grad_norm": 0.7825788855552673, "learning_rate": 9.697641465036977e-06, "loss": 0.0617, "step": 49470 }, { "epoch": 0.40035601585888825, "grad_norm": 0.4429165720939636, "learning_rate": 9.697399600424689e-06, "loss": 0.044, "step": 49480 }, { "epoch": 0.40043692855409013, "grad_norm": 0.4102056324481964, "learning_rate": 9.697157642132679e-06, "loss": 0.0421, "step": 49490 }, { "epoch": 0.400517841249292, "grad_norm": 0.5710306763648987, "learning_rate": 9.696915590165777e-06, "loss": 0.0458, "step": 49500 }, { "epoch": 0.4005987539444939, "grad_norm": 0.5368221998214722, "learning_rate": 9.69667344452881e-06, "loss": 0.0511, "step": 49510 }, { "epoch": 0.4006796666396958, "grad_norm": 0.6071436405181885, "learning_rate": 9.696431205226605e-06, "loss": 0.046, "step": 49520 }, { "epoch": 0.40076057933489767, "grad_norm": 0.6723918318748474, "learning_rate": 9.696188872263996e-06, "loss": 0.0382, "step": 49530 }, { "epoch": 0.4008414920300995, "grad_norm": 0.4595617353916168, "learning_rate": 9.695946445645813e-06, "loss": 0.0433, "step": 49540 }, { "epoch": 0.4009224047253014, "grad_norm": 0.9835454225540161, "learning_rate": 9.69570392537689e-06, "loss": 0.0551, "step": 49550 }, { "epoch": 0.40100331742050327, "grad_norm": 0.909686803817749, "learning_rate": 9.695461311462069e-06, "loss": 0.0588, "step": 49560 }, { "epoch": 0.40108423011570515, "grad_norm": 0.5442835092544556, "learning_rate": 9.695218603906183e-06, "loss": 0.0433, "step": 49570 }, { "epoch": 0.40116514281090704, "grad_norm": 0.49755364656448364, "learning_rate": 9.694975802714074e-06, "loss": 0.0392, "step": 49580 }, { "epoch": 0.4012460555061089, "grad_norm": 0.6706479787826538, "learning_rate": 9.694732907890584e-06, "loss": 0.0292, "step": 49590 }, { "epoch": 0.4013269682013108, "grad_norm": 0.557333767414093, "learning_rate": 9.694489919440558e-06, "loss": 0.0479, "step": 49600 }, { "epoch": 0.40140788089651264, "grad_norm": 0.7322293519973755, "learning_rate": 9.694246837368839e-06, "loss": 0.0432, "step": 49610 }, { "epoch": 0.4014887935917145, "grad_norm": 0.6947246193885803, "learning_rate": 9.694003661680279e-06, "loss": 0.0417, "step": 49620 }, { "epoch": 0.4015697062869164, "grad_norm": 0.47499024868011475, "learning_rate": 9.693760392379723e-06, "loss": 0.0499, "step": 49630 }, { "epoch": 0.4016506189821183, "grad_norm": 0.535579264163971, "learning_rate": 9.693517029472026e-06, "loss": 0.0397, "step": 49640 }, { "epoch": 0.4017315316773202, "grad_norm": 0.7510861754417419, "learning_rate": 9.69327357296204e-06, "loss": 0.0486, "step": 49650 }, { "epoch": 0.40181244437252206, "grad_norm": 0.34408727288246155, "learning_rate": 9.693030022854623e-06, "loss": 0.0532, "step": 49660 }, { "epoch": 0.40189335706772394, "grad_norm": 0.7160088419914246, "learning_rate": 9.692786379154629e-06, "loss": 0.0453, "step": 49670 }, { "epoch": 0.40197426976292583, "grad_norm": 0.35143086314201355, "learning_rate": 9.692542641866916e-06, "loss": 0.0288, "step": 49680 }, { "epoch": 0.40205518245812766, "grad_norm": 0.5634053945541382, "learning_rate": 9.692298810996345e-06, "loss": 0.0381, "step": 49690 }, { "epoch": 0.40213609515332954, "grad_norm": 0.46560239791870117, "learning_rate": 9.692054886547784e-06, "loss": 0.0307, "step": 49700 }, { "epoch": 0.4022170078485314, "grad_norm": 0.6002450585365295, "learning_rate": 9.69181086852609e-06, "loss": 0.0425, "step": 49710 }, { "epoch": 0.4022979205437333, "grad_norm": 0.8223695158958435, "learning_rate": 9.691566756936136e-06, "loss": 0.0259, "step": 49720 }, { "epoch": 0.4023788332389352, "grad_norm": 0.2735797166824341, "learning_rate": 9.691322551782785e-06, "loss": 0.0404, "step": 49730 }, { "epoch": 0.4024597459341371, "grad_norm": 0.71369469165802, "learning_rate": 9.691078253070911e-06, "loss": 0.0687, "step": 49740 }, { "epoch": 0.40254065862933897, "grad_norm": 0.6391559839248657, "learning_rate": 9.690833860805384e-06, "loss": 0.0435, "step": 49750 }, { "epoch": 0.4026215713245408, "grad_norm": 0.31082186102867126, "learning_rate": 9.69058937499108e-06, "loss": 0.0438, "step": 49760 }, { "epoch": 0.4027024840197427, "grad_norm": 0.5356441140174866, "learning_rate": 9.690344795632872e-06, "loss": 0.0523, "step": 49770 }, { "epoch": 0.40278339671494456, "grad_norm": 0.7215673327445984, "learning_rate": 9.690100122735637e-06, "loss": 0.0332, "step": 49780 }, { "epoch": 0.40286430941014645, "grad_norm": 0.6372016072273254, "learning_rate": 9.689855356304259e-06, "loss": 0.0415, "step": 49790 }, { "epoch": 0.40294522210534833, "grad_norm": 0.6291685700416565, "learning_rate": 9.689610496343614e-06, "loss": 0.038, "step": 49800 }, { "epoch": 0.4030261348005502, "grad_norm": 0.333822101354599, "learning_rate": 9.68936554285859e-06, "loss": 0.0481, "step": 49810 }, { "epoch": 0.4031070474957521, "grad_norm": 0.794300377368927, "learning_rate": 9.68912049585407e-06, "loss": 0.0549, "step": 49820 }, { "epoch": 0.403187960190954, "grad_norm": 1.0367580652236938, "learning_rate": 9.688875355334939e-06, "loss": 0.0512, "step": 49830 }, { "epoch": 0.4032688728861558, "grad_norm": 0.6779212355613708, "learning_rate": 9.688630121306088e-06, "loss": 0.0484, "step": 49840 }, { "epoch": 0.4033497855813577, "grad_norm": 0.5840775966644287, "learning_rate": 9.68838479377241e-06, "loss": 0.0414, "step": 49850 }, { "epoch": 0.4034306982765596, "grad_norm": 0.27441877126693726, "learning_rate": 9.688139372738791e-06, "loss": 0.0307, "step": 49860 }, { "epoch": 0.40351161097176147, "grad_norm": 0.5875170230865479, "learning_rate": 9.687893858210133e-06, "loss": 0.0325, "step": 49870 }, { "epoch": 0.40359252366696335, "grad_norm": 0.46896153688430786, "learning_rate": 9.687648250191327e-06, "loss": 0.0348, "step": 49880 }, { "epoch": 0.40367343636216524, "grad_norm": 0.3750581741333008, "learning_rate": 9.687402548687272e-06, "loss": 0.0334, "step": 49890 }, { "epoch": 0.4037543490573671, "grad_norm": 0.6840469837188721, "learning_rate": 9.68715675370287e-06, "loss": 0.0289, "step": 49900 }, { "epoch": 0.40383526175256895, "grad_norm": 0.48585325479507446, "learning_rate": 9.686910865243022e-06, "loss": 0.0411, "step": 49910 }, { "epoch": 0.40391617444777084, "grad_norm": 0.14554132521152496, "learning_rate": 9.686664883312629e-06, "loss": 0.0261, "step": 49920 }, { "epoch": 0.4039970871429727, "grad_norm": 0.1578896939754486, "learning_rate": 9.6864188079166e-06, "loss": 0.0313, "step": 49930 }, { "epoch": 0.4040779998381746, "grad_norm": 0.5428899526596069, "learning_rate": 9.686172639059842e-06, "loss": 0.0255, "step": 49940 }, { "epoch": 0.4041589125333765, "grad_norm": 0.2828347682952881, "learning_rate": 9.685926376747264e-06, "loss": 0.0378, "step": 49950 }, { "epoch": 0.4042398252285784, "grad_norm": 0.4063836932182312, "learning_rate": 9.685680020983776e-06, "loss": 0.0367, "step": 49960 }, { "epoch": 0.40432073792378026, "grad_norm": 0.19172579050064087, "learning_rate": 9.685433571774293e-06, "loss": 0.0424, "step": 49970 }, { "epoch": 0.40440165061898214, "grad_norm": 0.5057078003883362, "learning_rate": 9.685187029123726e-06, "loss": 0.0306, "step": 49980 }, { "epoch": 0.404482563314184, "grad_norm": 0.3975830376148224, "learning_rate": 9.684940393036996e-06, "loss": 0.0323, "step": 49990 }, { "epoch": 0.40456347600938586, "grad_norm": 0.6434322595596313, "learning_rate": 9.68469366351902e-06, "loss": 0.0213, "step": 50000 }, { "epoch": 0.40464438870458774, "grad_norm": 0.2536502480506897, "learning_rate": 9.68444684057472e-06, "loss": 0.0434, "step": 50010 }, { "epoch": 0.4047253013997896, "grad_norm": 0.5620536804199219, "learning_rate": 9.684199924209016e-06, "loss": 0.0331, "step": 50020 }, { "epoch": 0.4048062140949915, "grad_norm": 0.545499324798584, "learning_rate": 9.683952914426833e-06, "loss": 0.0298, "step": 50030 }, { "epoch": 0.4048871267901934, "grad_norm": 0.35004088282585144, "learning_rate": 9.6837058112331e-06, "loss": 0.0546, "step": 50040 }, { "epoch": 0.4049680394853953, "grad_norm": 0.7804901599884033, "learning_rate": 9.68345861463274e-06, "loss": 0.0467, "step": 50050 }, { "epoch": 0.4050489521805971, "grad_norm": 1.0270644426345825, "learning_rate": 9.683211324630685e-06, "loss": 0.0478, "step": 50060 }, { "epoch": 0.405129864875799, "grad_norm": 1.0720953941345215, "learning_rate": 9.682963941231867e-06, "loss": 0.0439, "step": 50070 }, { "epoch": 0.4052107775710009, "grad_norm": 0.6258587837219238, "learning_rate": 9.68271646444122e-06, "loss": 0.0323, "step": 50080 }, { "epoch": 0.40529169026620276, "grad_norm": 0.7587287425994873, "learning_rate": 9.682468894263679e-06, "loss": 0.0433, "step": 50090 }, { "epoch": 0.40537260296140465, "grad_norm": 0.6970593929290771, "learning_rate": 9.68222123070418e-06, "loss": 0.0351, "step": 50100 }, { "epoch": 0.40545351565660653, "grad_norm": 0.5827977061271667, "learning_rate": 9.681973473767663e-06, "loss": 0.0464, "step": 50110 }, { "epoch": 0.4055344283518084, "grad_norm": 0.7569541931152344, "learning_rate": 9.681725623459071e-06, "loss": 0.0374, "step": 50120 }, { "epoch": 0.4056153410470103, "grad_norm": 0.3217051029205322, "learning_rate": 9.681477679783343e-06, "loss": 0.0368, "step": 50130 }, { "epoch": 0.40569625374221213, "grad_norm": 0.6961851119995117, "learning_rate": 9.681229642745427e-06, "loss": 0.0399, "step": 50140 }, { "epoch": 0.405777166437414, "grad_norm": 0.2311190515756607, "learning_rate": 9.680981512350268e-06, "loss": 0.0342, "step": 50150 }, { "epoch": 0.4058580791326159, "grad_norm": 0.7042049765586853, "learning_rate": 9.680733288602813e-06, "loss": 0.0334, "step": 50160 }, { "epoch": 0.4059389918278178, "grad_norm": 0.6719930171966553, "learning_rate": 9.680484971508016e-06, "loss": 0.0437, "step": 50170 }, { "epoch": 0.40601990452301967, "grad_norm": 0.4915924370288849, "learning_rate": 9.680236561070827e-06, "loss": 0.0525, "step": 50180 }, { "epoch": 0.40610081721822155, "grad_norm": 0.5656357407569885, "learning_rate": 9.6799880572962e-06, "loss": 0.0423, "step": 50190 }, { "epoch": 0.40618172991342344, "grad_norm": 0.7213016748428345, "learning_rate": 9.67973946018909e-06, "loss": 0.0369, "step": 50200 }, { "epoch": 0.40626264260862527, "grad_norm": 0.8467987775802612, "learning_rate": 9.679490769754457e-06, "loss": 0.0355, "step": 50210 }, { "epoch": 0.40634355530382715, "grad_norm": 0.6896623969078064, "learning_rate": 9.679241985997262e-06, "loss": 0.0427, "step": 50220 }, { "epoch": 0.40642446799902904, "grad_norm": 0.5397550463676453, "learning_rate": 9.67899310892246e-06, "loss": 0.0436, "step": 50230 }, { "epoch": 0.4065053806942309, "grad_norm": 0.4679825007915497, "learning_rate": 9.678744138535018e-06, "loss": 0.0371, "step": 50240 }, { "epoch": 0.4065862933894328, "grad_norm": 0.3748362064361572, "learning_rate": 9.678495074839905e-06, "loss": 0.0475, "step": 50250 }, { "epoch": 0.4066672060846347, "grad_norm": 0.36530059576034546, "learning_rate": 9.678245917842082e-06, "loss": 0.051, "step": 50260 }, { "epoch": 0.4067481187798366, "grad_norm": 0.2755151689052582, "learning_rate": 9.67799666754652e-06, "loss": 0.035, "step": 50270 }, { "epoch": 0.40682903147503846, "grad_norm": 0.3368529677391052, "learning_rate": 9.67774732395819e-06, "loss": 0.0488, "step": 50280 }, { "epoch": 0.4069099441702403, "grad_norm": 0.5037494897842407, "learning_rate": 9.677497887082066e-06, "loss": 0.0301, "step": 50290 }, { "epoch": 0.4069908568654422, "grad_norm": 0.7849031090736389, "learning_rate": 9.677248356923122e-06, "loss": 0.0439, "step": 50300 }, { "epoch": 0.40707176956064406, "grad_norm": 0.27761879563331604, "learning_rate": 9.676998733486331e-06, "loss": 0.0325, "step": 50310 }, { "epoch": 0.40715268225584594, "grad_norm": 0.2627839744091034, "learning_rate": 9.676749016776676e-06, "loss": 0.043, "step": 50320 }, { "epoch": 0.40723359495104783, "grad_norm": 0.36378511786460876, "learning_rate": 9.676499206799135e-06, "loss": 0.0381, "step": 50330 }, { "epoch": 0.4073145076462497, "grad_norm": 0.4082108736038208, "learning_rate": 9.67624930355869e-06, "loss": 0.0484, "step": 50340 }, { "epoch": 0.4073954203414516, "grad_norm": 0.16566132009029388, "learning_rate": 9.675999307060322e-06, "loss": 0.0437, "step": 50350 }, { "epoch": 0.4074763330366534, "grad_norm": 0.4921083152294159, "learning_rate": 9.675749217309022e-06, "loss": 0.0516, "step": 50360 }, { "epoch": 0.4075572457318553, "grad_norm": 0.3297388255596161, "learning_rate": 9.675499034309774e-06, "loss": 0.0602, "step": 50370 }, { "epoch": 0.4076381584270572, "grad_norm": 1.663617730140686, "learning_rate": 9.675248758067569e-06, "loss": 0.0341, "step": 50380 }, { "epoch": 0.4077190711222591, "grad_norm": 0.39446261525154114, "learning_rate": 9.674998388587396e-06, "loss": 0.041, "step": 50390 }, { "epoch": 0.40779998381746096, "grad_norm": 0.5047823190689087, "learning_rate": 9.67474792587425e-06, "loss": 0.043, "step": 50400 }, { "epoch": 0.40788089651266285, "grad_norm": 0.9399073123931885, "learning_rate": 9.674497369933124e-06, "loss": 0.0414, "step": 50410 }, { "epoch": 0.40796180920786473, "grad_norm": 0.4599452018737793, "learning_rate": 9.674246720769018e-06, "loss": 0.0292, "step": 50420 }, { "epoch": 0.4080427219030666, "grad_norm": 0.34534332156181335, "learning_rate": 9.67399597838693e-06, "loss": 0.0483, "step": 50430 }, { "epoch": 0.40812363459826845, "grad_norm": 3.0177223682403564, "learning_rate": 9.673745142791857e-06, "loss": 0.03, "step": 50440 }, { "epoch": 0.40820454729347033, "grad_norm": 0.5268557071685791, "learning_rate": 9.673494213988805e-06, "loss": 0.0531, "step": 50450 }, { "epoch": 0.4082854599886722, "grad_norm": 0.978097677230835, "learning_rate": 9.673243191982777e-06, "loss": 0.0434, "step": 50460 }, { "epoch": 0.4083663726838741, "grad_norm": 0.4403857886791229, "learning_rate": 9.67299207677878e-06, "loss": 0.0495, "step": 50470 }, { "epoch": 0.408447285379076, "grad_norm": 0.1886274218559265, "learning_rate": 9.67274086838182e-06, "loss": 0.0313, "step": 50480 }, { "epoch": 0.40852819807427787, "grad_norm": 0.20552001893520355, "learning_rate": 9.672489566796907e-06, "loss": 0.0368, "step": 50490 }, { "epoch": 0.40860911076947976, "grad_norm": 0.5141738057136536, "learning_rate": 9.672238172029055e-06, "loss": 0.0532, "step": 50500 }, { "epoch": 0.4086900234646816, "grad_norm": 0.2715071737766266, "learning_rate": 9.671986684083276e-06, "loss": 0.0469, "step": 50510 }, { "epoch": 0.40877093615988347, "grad_norm": 0.3583204746246338, "learning_rate": 9.671735102964588e-06, "loss": 0.034, "step": 50520 }, { "epoch": 0.40885184885508535, "grad_norm": 0.6707888245582581, "learning_rate": 9.671483428678002e-06, "loss": 0.0397, "step": 50530 }, { "epoch": 0.40893276155028724, "grad_norm": 0.3366640508174896, "learning_rate": 9.671231661228543e-06, "loss": 0.0394, "step": 50540 }, { "epoch": 0.4090136742454891, "grad_norm": 0.4602776765823364, "learning_rate": 9.670979800621229e-06, "loss": 0.0312, "step": 50550 }, { "epoch": 0.409094586940691, "grad_norm": 0.4362890422344208, "learning_rate": 9.670727846861085e-06, "loss": 0.037, "step": 50560 }, { "epoch": 0.4091754996358929, "grad_norm": 0.8704394102096558, "learning_rate": 9.670475799953135e-06, "loss": 0.0309, "step": 50570 }, { "epoch": 0.4092564123310947, "grad_norm": 0.4072916805744171, "learning_rate": 9.670223659902402e-06, "loss": 0.0316, "step": 50580 }, { "epoch": 0.4093373250262966, "grad_norm": 0.7390848994255066, "learning_rate": 9.66997142671392e-06, "loss": 0.0645, "step": 50590 }, { "epoch": 0.4094182377214985, "grad_norm": 0.21600036323070526, "learning_rate": 9.669719100392715e-06, "loss": 0.0216, "step": 50600 }, { "epoch": 0.4094991504167004, "grad_norm": 0.45253095030784607, "learning_rate": 9.669466680943821e-06, "loss": 0.0375, "step": 50610 }, { "epoch": 0.40958006311190226, "grad_norm": 0.8120885491371155, "learning_rate": 9.669214168372271e-06, "loss": 0.0509, "step": 50620 }, { "epoch": 0.40966097580710414, "grad_norm": 0.7767317891120911, "learning_rate": 9.668961562683104e-06, "loss": 0.0349, "step": 50630 }, { "epoch": 0.40974188850230603, "grad_norm": 0.32229307293891907, "learning_rate": 9.668708863881354e-06, "loss": 0.0234, "step": 50640 }, { "epoch": 0.4098228011975079, "grad_norm": 0.6272839903831482, "learning_rate": 9.668456071972063e-06, "loss": 0.0419, "step": 50650 }, { "epoch": 0.40990371389270974, "grad_norm": 0.5353101491928101, "learning_rate": 9.668203186960268e-06, "loss": 0.0462, "step": 50660 }, { "epoch": 0.4099846265879116, "grad_norm": 0.7985766530036926, "learning_rate": 9.667950208851018e-06, "loss": 0.037, "step": 50670 }, { "epoch": 0.4100655392831135, "grad_norm": 0.5319591164588928, "learning_rate": 9.667697137649355e-06, "loss": 0.036, "step": 50680 }, { "epoch": 0.4101464519783154, "grad_norm": 0.8888391852378845, "learning_rate": 9.667443973360326e-06, "loss": 0.0378, "step": 50690 }, { "epoch": 0.4102273646735173, "grad_norm": 0.4589611887931824, "learning_rate": 9.66719071598898e-06, "loss": 0.0342, "step": 50700 }, { "epoch": 0.41030827736871917, "grad_norm": 0.5075792074203491, "learning_rate": 9.66693736554037e-06, "loss": 0.029, "step": 50710 }, { "epoch": 0.41038919006392105, "grad_norm": 0.7707006931304932, "learning_rate": 9.666683922019546e-06, "loss": 0.0246, "step": 50720 }, { "epoch": 0.4104701027591229, "grad_norm": 0.4953781068325043, "learning_rate": 9.666430385431561e-06, "loss": 0.0301, "step": 50730 }, { "epoch": 0.41055101545432476, "grad_norm": 0.6444827914237976, "learning_rate": 9.666176755781476e-06, "loss": 0.033, "step": 50740 }, { "epoch": 0.41063192814952665, "grad_norm": 0.5180009603500366, "learning_rate": 9.665923033074344e-06, "loss": 0.0282, "step": 50750 }, { "epoch": 0.41071284084472853, "grad_norm": 0.7299830317497253, "learning_rate": 9.66566921731523e-06, "loss": 0.0339, "step": 50760 }, { "epoch": 0.4107937535399304, "grad_norm": 0.5738345384597778, "learning_rate": 9.665415308509191e-06, "loss": 0.0462, "step": 50770 }, { "epoch": 0.4108746662351323, "grad_norm": 0.48230013251304626, "learning_rate": 9.665161306661294e-06, "loss": 0.0457, "step": 50780 }, { "epoch": 0.4109555789303342, "grad_norm": 0.5239271521568298, "learning_rate": 9.664907211776603e-06, "loss": 0.0363, "step": 50790 }, { "epoch": 0.41103649162553607, "grad_norm": 0.2469908893108368, "learning_rate": 9.664653023860185e-06, "loss": 0.0255, "step": 50800 }, { "epoch": 0.4111174043207379, "grad_norm": 0.5187985301017761, "learning_rate": 9.664398742917109e-06, "loss": 0.0274, "step": 50810 }, { "epoch": 0.4111983170159398, "grad_norm": 0.6221203207969666, "learning_rate": 9.664144368952449e-06, "loss": 0.0285, "step": 50820 }, { "epoch": 0.41127922971114167, "grad_norm": 0.6023231744766235, "learning_rate": 9.663889901971274e-06, "loss": 0.0478, "step": 50830 }, { "epoch": 0.41136014240634355, "grad_norm": 0.8606925010681152, "learning_rate": 9.663635341978664e-06, "loss": 0.0329, "step": 50840 }, { "epoch": 0.41144105510154544, "grad_norm": 0.8206969499588013, "learning_rate": 9.663380688979691e-06, "loss": 0.0504, "step": 50850 }, { "epoch": 0.4115219677967473, "grad_norm": 0.518027663230896, "learning_rate": 9.663125942979435e-06, "loss": 0.0586, "step": 50860 }, { "epoch": 0.4116028804919492, "grad_norm": 0.5495258569717407, "learning_rate": 9.662871103982975e-06, "loss": 0.0453, "step": 50870 }, { "epoch": 0.41168379318715104, "grad_norm": 0.28496500849723816, "learning_rate": 9.662616171995397e-06, "loss": 0.0287, "step": 50880 }, { "epoch": 0.4117647058823529, "grad_norm": 0.4440740942955017, "learning_rate": 9.66236114702178e-06, "loss": 0.0318, "step": 50890 }, { "epoch": 0.4118456185775548, "grad_norm": 0.49820199608802795, "learning_rate": 9.662106029067213e-06, "loss": 0.0207, "step": 50900 }, { "epoch": 0.4119265312727567, "grad_norm": 0.7838210463523865, "learning_rate": 9.661850818136784e-06, "loss": 0.0513, "step": 50910 }, { "epoch": 0.4120074439679586, "grad_norm": 0.5933210849761963, "learning_rate": 9.661595514235583e-06, "loss": 0.0269, "step": 50920 }, { "epoch": 0.41208835666316046, "grad_norm": 0.45412999391555786, "learning_rate": 9.6613401173687e-06, "loss": 0.037, "step": 50930 }, { "epoch": 0.41216926935836234, "grad_norm": 0.3409956693649292, "learning_rate": 9.661084627541228e-06, "loss": 0.032, "step": 50940 }, { "epoch": 0.41225018205356423, "grad_norm": 0.6827198266983032, "learning_rate": 9.660829044758265e-06, "loss": 0.0403, "step": 50950 }, { "epoch": 0.41233109474876606, "grad_norm": 0.5093628168106079, "learning_rate": 9.660573369024904e-06, "loss": 0.0354, "step": 50960 }, { "epoch": 0.41241200744396794, "grad_norm": 0.3511163592338562, "learning_rate": 9.660317600346248e-06, "loss": 0.0358, "step": 50970 }, { "epoch": 0.4124929201391698, "grad_norm": 0.23081178963184357, "learning_rate": 9.660061738727394e-06, "loss": 0.035, "step": 50980 }, { "epoch": 0.4125738328343717, "grad_norm": 0.4053352475166321, "learning_rate": 9.659805784173449e-06, "loss": 0.0368, "step": 50990 }, { "epoch": 0.4126547455295736, "grad_norm": 0.40756210684776306, "learning_rate": 9.659549736689512e-06, "loss": 0.0354, "step": 51000 }, { "epoch": 0.4127356582247755, "grad_norm": 0.40916699171066284, "learning_rate": 9.659293596280693e-06, "loss": 0.0506, "step": 51010 }, { "epoch": 0.41281657091997737, "grad_norm": 0.7384265065193176, "learning_rate": 9.6590373629521e-06, "loss": 0.0365, "step": 51020 }, { "epoch": 0.4128974836151792, "grad_norm": 1.029682993888855, "learning_rate": 9.658781036708842e-06, "loss": 0.0538, "step": 51030 }, { "epoch": 0.4129783963103811, "grad_norm": 0.5855434536933899, "learning_rate": 9.658524617556033e-06, "loss": 0.0404, "step": 51040 }, { "epoch": 0.41305930900558296, "grad_norm": 0.5672685503959656, "learning_rate": 9.658268105498783e-06, "loss": 0.0332, "step": 51050 }, { "epoch": 0.41314022170078485, "grad_norm": 0.6501293182373047, "learning_rate": 9.65801150054221e-06, "loss": 0.0283, "step": 51060 }, { "epoch": 0.41322113439598673, "grad_norm": 0.4079943895339966, "learning_rate": 9.657754802691433e-06, "loss": 0.03, "step": 51070 }, { "epoch": 0.4133020470911886, "grad_norm": 0.5934545397758484, "learning_rate": 9.657498011951567e-06, "loss": 0.0253, "step": 51080 }, { "epoch": 0.4133829597863905, "grad_norm": 0.9217981696128845, "learning_rate": 9.657241128327736e-06, "loss": 0.0331, "step": 51090 }, { "epoch": 0.4134638724815924, "grad_norm": 0.4307168424129486, "learning_rate": 9.656984151825064e-06, "loss": 0.0325, "step": 51100 }, { "epoch": 0.4135447851767942, "grad_norm": 0.6758867502212524, "learning_rate": 9.656727082448673e-06, "loss": 0.0267, "step": 51110 }, { "epoch": 0.4136256978719961, "grad_norm": 0.5831505656242371, "learning_rate": 9.656469920203692e-06, "loss": 0.0349, "step": 51120 }, { "epoch": 0.413706610567198, "grad_norm": 0.29251500964164734, "learning_rate": 9.656212665095248e-06, "loss": 0.0257, "step": 51130 }, { "epoch": 0.41378752326239987, "grad_norm": 0.43564969301223755, "learning_rate": 9.655955317128472e-06, "loss": 0.0239, "step": 51140 }, { "epoch": 0.41386843595760175, "grad_norm": 0.6458479762077332, "learning_rate": 9.655697876308498e-06, "loss": 0.0433, "step": 51150 }, { "epoch": 0.41394934865280364, "grad_norm": 0.31335312128067017, "learning_rate": 9.655440342640457e-06, "loss": 0.0385, "step": 51160 }, { "epoch": 0.4140302613480055, "grad_norm": 0.1931343376636505, "learning_rate": 9.655182716129487e-06, "loss": 0.0374, "step": 51170 }, { "epoch": 0.41411117404320735, "grad_norm": 1.390900731086731, "learning_rate": 9.654924996780724e-06, "loss": 0.0511, "step": 51180 }, { "epoch": 0.41419208673840924, "grad_norm": 0.3485090136528015, "learning_rate": 9.65466718459931e-06, "loss": 0.0313, "step": 51190 }, { "epoch": 0.4142729994336111, "grad_norm": 0.8455440402030945, "learning_rate": 9.654409279590385e-06, "loss": 0.055, "step": 51200 }, { "epoch": 0.414353912128813, "grad_norm": 0.11015159636735916, "learning_rate": 9.654151281759094e-06, "loss": 0.0403, "step": 51210 }, { "epoch": 0.4144348248240149, "grad_norm": 0.5799300074577332, "learning_rate": 9.653893191110579e-06, "loss": 0.0243, "step": 51220 }, { "epoch": 0.4145157375192168, "grad_norm": 0.3027959167957306, "learning_rate": 9.653635007649991e-06, "loss": 0.0333, "step": 51230 }, { "epoch": 0.41459665021441866, "grad_norm": 0.49908891320228577, "learning_rate": 9.653376731382476e-06, "loss": 0.0508, "step": 51240 }, { "epoch": 0.41467756290962055, "grad_norm": 0.4494040012359619, "learning_rate": 9.653118362313186e-06, "loss": 0.0397, "step": 51250 }, { "epoch": 0.4147584756048224, "grad_norm": 0.13989904522895813, "learning_rate": 9.652859900447273e-06, "loss": 0.0353, "step": 51260 }, { "epoch": 0.41483938830002426, "grad_norm": 0.8754511475563049, "learning_rate": 9.652601345789894e-06, "loss": 0.0372, "step": 51270 }, { "epoch": 0.41492030099522614, "grad_norm": 0.5002226829528809, "learning_rate": 9.6523426983462e-06, "loss": 0.0355, "step": 51280 }, { "epoch": 0.41500121369042803, "grad_norm": 0.8633406162261963, "learning_rate": 9.652083958121356e-06, "loss": 0.0309, "step": 51290 }, { "epoch": 0.4150821263856299, "grad_norm": 0.42494794726371765, "learning_rate": 9.651825125120515e-06, "loss": 0.0302, "step": 51300 }, { "epoch": 0.4151630390808318, "grad_norm": 0.6619911789894104, "learning_rate": 9.651566199348845e-06, "loss": 0.0396, "step": 51310 }, { "epoch": 0.4152439517760337, "grad_norm": 0.13391034305095673, "learning_rate": 9.651307180811506e-06, "loss": 0.0289, "step": 51320 }, { "epoch": 0.4153248644712355, "grad_norm": 0.9950083494186401, "learning_rate": 9.651048069513664e-06, "loss": 0.0435, "step": 51330 }, { "epoch": 0.4154057771664374, "grad_norm": 0.7179462909698486, "learning_rate": 9.650788865460487e-06, "loss": 0.0424, "step": 51340 }, { "epoch": 0.4154866898616393, "grad_norm": 0.3784732520580292, "learning_rate": 9.650529568657146e-06, "loss": 0.0395, "step": 51350 }, { "epoch": 0.41556760255684116, "grad_norm": 0.042549654841423035, "learning_rate": 9.650270179108809e-06, "loss": 0.0204, "step": 51360 }, { "epoch": 0.41564851525204305, "grad_norm": 1.2427399158477783, "learning_rate": 9.650010696820651e-06, "loss": 0.0328, "step": 51370 }, { "epoch": 0.41572942794724493, "grad_norm": 0.6199473738670349, "learning_rate": 9.649751121797845e-06, "loss": 0.0363, "step": 51380 }, { "epoch": 0.4158103406424468, "grad_norm": 0.2780560255050659, "learning_rate": 9.64949145404557e-06, "loss": 0.04, "step": 51390 }, { "epoch": 0.4158912533376487, "grad_norm": 0.7637486457824707, "learning_rate": 9.649231693569004e-06, "loss": 0.0577, "step": 51400 }, { "epoch": 0.41597216603285053, "grad_norm": 0.8827641606330872, "learning_rate": 9.648971840373326e-06, "loss": 0.0511, "step": 51410 }, { "epoch": 0.4160530787280524, "grad_norm": 0.471377432346344, "learning_rate": 9.64871189446372e-06, "loss": 0.0425, "step": 51420 }, { "epoch": 0.4161339914232543, "grad_norm": 0.43915361166000366, "learning_rate": 9.648451855845368e-06, "loss": 0.0371, "step": 51430 }, { "epoch": 0.4162149041184562, "grad_norm": 0.3709143102169037, "learning_rate": 9.648191724523457e-06, "loss": 0.0346, "step": 51440 }, { "epoch": 0.41629581681365807, "grad_norm": 0.5174628496170044, "learning_rate": 9.647931500503176e-06, "loss": 0.0379, "step": 51450 }, { "epoch": 0.41637672950885996, "grad_norm": 1.3147673606872559, "learning_rate": 9.647671183789714e-06, "loss": 0.0466, "step": 51460 }, { "epoch": 0.41645764220406184, "grad_norm": 0.49368947744369507, "learning_rate": 9.64741077438826e-06, "loss": 0.0501, "step": 51470 }, { "epoch": 0.41653855489926367, "grad_norm": 0.6793168783187866, "learning_rate": 9.64715027230401e-06, "loss": 0.0635, "step": 51480 }, { "epoch": 0.41661946759446555, "grad_norm": 0.6619742512702942, "learning_rate": 9.646889677542157e-06, "loss": 0.0384, "step": 51490 }, { "epoch": 0.41670038028966744, "grad_norm": 0.42347463965415955, "learning_rate": 9.646628990107903e-06, "loss": 0.0463, "step": 51500 }, { "epoch": 0.4167812929848693, "grad_norm": 0.47727882862091064, "learning_rate": 9.646368210006441e-06, "loss": 0.0474, "step": 51510 }, { "epoch": 0.4168622056800712, "grad_norm": 0.9295085668563843, "learning_rate": 9.646107337242973e-06, "loss": 0.0311, "step": 51520 }, { "epoch": 0.4169431183752731, "grad_norm": 0.26078078150749207, "learning_rate": 9.645846371822703e-06, "loss": 0.0408, "step": 51530 }, { "epoch": 0.417024031070475, "grad_norm": 0.21034859120845795, "learning_rate": 9.645585313750836e-06, "loss": 0.0416, "step": 51540 }, { "epoch": 0.41710494376567686, "grad_norm": 0.5669743418693542, "learning_rate": 9.645324163032576e-06, "loss": 0.0451, "step": 51550 }, { "epoch": 0.4171858564608787, "grad_norm": 0.40030938386917114, "learning_rate": 9.645062919673132e-06, "loss": 0.0328, "step": 51560 }, { "epoch": 0.4172667691560806, "grad_norm": 0.5904355645179749, "learning_rate": 9.644801583677716e-06, "loss": 0.0316, "step": 51570 }, { "epoch": 0.41734768185128246, "grad_norm": 0.3631243407726288, "learning_rate": 9.644540155051538e-06, "loss": 0.0221, "step": 51580 }, { "epoch": 0.41742859454648434, "grad_norm": 0.5704444646835327, "learning_rate": 9.644278633799811e-06, "loss": 0.0324, "step": 51590 }, { "epoch": 0.41750950724168623, "grad_norm": 1.0861371755599976, "learning_rate": 9.644017019927749e-06, "loss": 0.054, "step": 51600 }, { "epoch": 0.4175904199368881, "grad_norm": 0.3602461814880371, "learning_rate": 9.643755313440576e-06, "loss": 0.0413, "step": 51610 }, { "epoch": 0.41767133263209, "grad_norm": 0.4164379835128784, "learning_rate": 9.643493514343502e-06, "loss": 0.0401, "step": 51620 }, { "epoch": 0.4177522453272918, "grad_norm": 0.40985265374183655, "learning_rate": 9.643231622641755e-06, "loss": 0.0254, "step": 51630 }, { "epoch": 0.4178331580224937, "grad_norm": 0.7478747367858887, "learning_rate": 9.642969638340556e-06, "loss": 0.0312, "step": 51640 }, { "epoch": 0.4179140707176956, "grad_norm": 0.5547136664390564, "learning_rate": 9.642707561445127e-06, "loss": 0.0595, "step": 51650 }, { "epoch": 0.4179949834128975, "grad_norm": 0.33290895819664, "learning_rate": 9.642445391960699e-06, "loss": 0.0289, "step": 51660 }, { "epoch": 0.41807589610809937, "grad_norm": 0.6480844616889954, "learning_rate": 9.6421831298925e-06, "loss": 0.041, "step": 51670 }, { "epoch": 0.41815680880330125, "grad_norm": 1.0020509958267212, "learning_rate": 9.641920775245754e-06, "loss": 0.0341, "step": 51680 }, { "epoch": 0.41823772149850313, "grad_norm": 0.4600142538547516, "learning_rate": 9.6416583280257e-06, "loss": 0.0572, "step": 51690 }, { "epoch": 0.418318634193705, "grad_norm": 0.41714566946029663, "learning_rate": 9.641395788237572e-06, "loss": 0.0348, "step": 51700 }, { "epoch": 0.41839954688890685, "grad_norm": 0.29653605818748474, "learning_rate": 9.641133155886599e-06, "loss": 0.0262, "step": 51710 }, { "epoch": 0.41848045958410873, "grad_norm": 0.6850010752677917, "learning_rate": 9.640870430978027e-06, "loss": 0.0452, "step": 51720 }, { "epoch": 0.4185613722793106, "grad_norm": 0.3334300220012665, "learning_rate": 9.640607613517088e-06, "loss": 0.0327, "step": 51730 }, { "epoch": 0.4186422849745125, "grad_norm": 0.5280805230140686, "learning_rate": 9.64034470350903e-06, "loss": 0.0563, "step": 51740 }, { "epoch": 0.4187231976697144, "grad_norm": 0.6099703907966614, "learning_rate": 9.640081700959092e-06, "loss": 0.0331, "step": 51750 }, { "epoch": 0.41880411036491627, "grad_norm": 0.3828495442867279, "learning_rate": 9.63981860587252e-06, "loss": 0.0413, "step": 51760 }, { "epoch": 0.41888502306011816, "grad_norm": 0.2413911074399948, "learning_rate": 9.63955541825456e-06, "loss": 0.042, "step": 51770 }, { "epoch": 0.41896593575532, "grad_norm": 0.42375481128692627, "learning_rate": 9.639292138110463e-06, "loss": 0.0464, "step": 51780 }, { "epoch": 0.41904684845052187, "grad_norm": 0.3587343096733093, "learning_rate": 9.639028765445477e-06, "loss": 0.0285, "step": 51790 }, { "epoch": 0.41912776114572375, "grad_norm": 0.7512868642807007, "learning_rate": 9.638765300264858e-06, "loss": 0.0691, "step": 51800 }, { "epoch": 0.41920867384092564, "grad_norm": 0.4861751198768616, "learning_rate": 9.638501742573856e-06, "loss": 0.0342, "step": 51810 }, { "epoch": 0.4192895865361275, "grad_norm": 0.8156200051307678, "learning_rate": 9.63823809237773e-06, "loss": 0.0589, "step": 51820 }, { "epoch": 0.4193704992313294, "grad_norm": 0.5015444159507751, "learning_rate": 9.637974349681735e-06, "loss": 0.0396, "step": 51830 }, { "epoch": 0.4194514119265313, "grad_norm": 0.4602389931678772, "learning_rate": 9.637710514491136e-06, "loss": 0.049, "step": 51840 }, { "epoch": 0.4195323246217332, "grad_norm": 0.394654244184494, "learning_rate": 9.63744658681119e-06, "loss": 0.0361, "step": 51850 }, { "epoch": 0.419613237316935, "grad_norm": 0.5707297921180725, "learning_rate": 9.637182566647161e-06, "loss": 0.0408, "step": 51860 }, { "epoch": 0.4196941500121369, "grad_norm": 1.3070250749588013, "learning_rate": 9.636918454004317e-06, "loss": 0.0262, "step": 51870 }, { "epoch": 0.4197750627073388, "grad_norm": 0.8208373785018921, "learning_rate": 9.636654248887922e-06, "loss": 0.037, "step": 51880 }, { "epoch": 0.41985597540254066, "grad_norm": 0.5106880068778992, "learning_rate": 9.636389951303247e-06, "loss": 0.0522, "step": 51890 }, { "epoch": 0.41993688809774254, "grad_norm": 0.9597216844558716, "learning_rate": 9.636125561255562e-06, "loss": 0.0459, "step": 51900 }, { "epoch": 0.42001780079294443, "grad_norm": 0.5663578510284424, "learning_rate": 9.635861078750139e-06, "loss": 0.0379, "step": 51910 }, { "epoch": 0.4200987134881463, "grad_norm": 0.6142101287841797, "learning_rate": 9.635596503792255e-06, "loss": 0.0447, "step": 51920 }, { "epoch": 0.42017962618334814, "grad_norm": 0.5890533328056335, "learning_rate": 9.635331836387185e-06, "loss": 0.0332, "step": 51930 }, { "epoch": 0.42026053887855, "grad_norm": 0.44066479802131653, "learning_rate": 9.635067076540206e-06, "loss": 0.0302, "step": 51940 }, { "epoch": 0.4203414515737519, "grad_norm": 0.4151017963886261, "learning_rate": 9.634802224256601e-06, "loss": 0.0449, "step": 51950 }, { "epoch": 0.4204223642689538, "grad_norm": 0.4433518946170807, "learning_rate": 9.634537279541649e-06, "loss": 0.039, "step": 51960 }, { "epoch": 0.4205032769641557, "grad_norm": 0.6445884704589844, "learning_rate": 9.634272242400635e-06, "loss": 0.0326, "step": 51970 }, { "epoch": 0.42058418965935757, "grad_norm": 0.44701430201530457, "learning_rate": 9.634007112838843e-06, "loss": 0.0182, "step": 51980 }, { "epoch": 0.42066510235455945, "grad_norm": 0.448861300945282, "learning_rate": 9.633741890861564e-06, "loss": 0.0423, "step": 51990 }, { "epoch": 0.42074601504976133, "grad_norm": 0.44614362716674805, "learning_rate": 9.633476576474084e-06, "loss": 0.0524, "step": 52000 }, { "epoch": 0.42082692774496316, "grad_norm": 0.2500440180301666, "learning_rate": 9.633211169681697e-06, "loss": 0.0363, "step": 52010 }, { "epoch": 0.42090784044016505, "grad_norm": 0.481440007686615, "learning_rate": 9.632945670489692e-06, "loss": 0.0359, "step": 52020 }, { "epoch": 0.42098875313536693, "grad_norm": 0.6288467049598694, "learning_rate": 9.632680078903367e-06, "loss": 0.0521, "step": 52030 }, { "epoch": 0.4210696658305688, "grad_norm": 0.3554166257381439, "learning_rate": 9.63241439492802e-06, "loss": 0.0341, "step": 52040 }, { "epoch": 0.4211505785257707, "grad_norm": 0.6928120255470276, "learning_rate": 9.632148618568946e-06, "loss": 0.0519, "step": 52050 }, { "epoch": 0.4212314912209726, "grad_norm": 0.591785192489624, "learning_rate": 9.631882749831447e-06, "loss": 0.0421, "step": 52060 }, { "epoch": 0.42131240391617447, "grad_norm": 0.4558383524417877, "learning_rate": 9.631616788720824e-06, "loss": 0.0471, "step": 52070 }, { "epoch": 0.4213933166113763, "grad_norm": 0.6231642961502075, "learning_rate": 9.631350735242381e-06, "loss": 0.0421, "step": 52080 }, { "epoch": 0.4214742293065782, "grad_norm": 0.3205586075782776, "learning_rate": 9.631084589401427e-06, "loss": 0.0515, "step": 52090 }, { "epoch": 0.42155514200178007, "grad_norm": 0.4416353404521942, "learning_rate": 9.630818351203265e-06, "loss": 0.0386, "step": 52100 }, { "epoch": 0.42163605469698195, "grad_norm": 0.5033214092254639, "learning_rate": 9.63055202065321e-06, "loss": 0.0502, "step": 52110 }, { "epoch": 0.42171696739218384, "grad_norm": 0.7584477066993713, "learning_rate": 9.630285597756569e-06, "loss": 0.0332, "step": 52120 }, { "epoch": 0.4217978800873857, "grad_norm": 0.5649579167366028, "learning_rate": 9.630019082518657e-06, "loss": 0.0342, "step": 52130 }, { "epoch": 0.4218787927825876, "grad_norm": 0.07454866170883179, "learning_rate": 9.62975247494479e-06, "loss": 0.0378, "step": 52140 }, { "epoch": 0.4219597054777895, "grad_norm": 0.7647656798362732, "learning_rate": 9.62948577504028e-06, "loss": 0.0463, "step": 52150 }, { "epoch": 0.4220406181729913, "grad_norm": 0.751665472984314, "learning_rate": 9.629218982810454e-06, "loss": 0.0366, "step": 52160 }, { "epoch": 0.4221215308681932, "grad_norm": 0.20128551125526428, "learning_rate": 9.628952098260624e-06, "loss": 0.0268, "step": 52170 }, { "epoch": 0.4222024435633951, "grad_norm": 0.5990898013114929, "learning_rate": 9.62868512139612e-06, "loss": 0.0276, "step": 52180 }, { "epoch": 0.422283356258597, "grad_norm": 0.5122118592262268, "learning_rate": 9.62841805222226e-06, "loss": 0.0411, "step": 52190 }, { "epoch": 0.42236426895379886, "grad_norm": 0.36566105484962463, "learning_rate": 9.628150890744374e-06, "loss": 0.0258, "step": 52200 }, { "epoch": 0.42244518164900074, "grad_norm": 0.19843293726444244, "learning_rate": 9.62788363696779e-06, "loss": 0.0333, "step": 52210 }, { "epoch": 0.42252609434420263, "grad_norm": 0.3917381465435028, "learning_rate": 9.627616290897836e-06, "loss": 0.0461, "step": 52220 }, { "epoch": 0.42260700703940446, "grad_norm": 0.5985634326934814, "learning_rate": 9.627348852539844e-06, "loss": 0.0341, "step": 52230 }, { "epoch": 0.42268791973460634, "grad_norm": 0.411506712436676, "learning_rate": 9.627081321899148e-06, "loss": 0.0397, "step": 52240 }, { "epoch": 0.42276883242980823, "grad_norm": 0.6353432536125183, "learning_rate": 9.626813698981085e-06, "loss": 0.0442, "step": 52250 }, { "epoch": 0.4228497451250101, "grad_norm": 0.4239703118801117, "learning_rate": 9.626545983790989e-06, "loss": 0.0398, "step": 52260 }, { "epoch": 0.422930657820212, "grad_norm": 0.5043815970420837, "learning_rate": 9.6262781763342e-06, "loss": 0.0335, "step": 52270 }, { "epoch": 0.4230115705154139, "grad_norm": 0.3525349199771881, "learning_rate": 9.626010276616061e-06, "loss": 0.0302, "step": 52280 }, { "epoch": 0.42309248321061577, "grad_norm": 0.3916569650173187, "learning_rate": 9.62574228464191e-06, "loss": 0.0391, "step": 52290 }, { "epoch": 0.4231733959058176, "grad_norm": 0.6118993759155273, "learning_rate": 9.625474200417096e-06, "loss": 0.0308, "step": 52300 }, { "epoch": 0.4232543086010195, "grad_norm": 1.5451515913009644, "learning_rate": 9.625206023946965e-06, "loss": 0.0455, "step": 52310 }, { "epoch": 0.42333522129622136, "grad_norm": 0.6113423109054565, "learning_rate": 9.624937755236862e-06, "loss": 0.0379, "step": 52320 }, { "epoch": 0.42341613399142325, "grad_norm": 0.39044511318206787, "learning_rate": 9.624669394292141e-06, "loss": 0.0306, "step": 52330 }, { "epoch": 0.42349704668662513, "grad_norm": 0.6326608657836914, "learning_rate": 9.62440094111815e-06, "loss": 0.0364, "step": 52340 }, { "epoch": 0.423577959381827, "grad_norm": 0.6007282733917236, "learning_rate": 9.624132395720246e-06, "loss": 0.0412, "step": 52350 }, { "epoch": 0.4236588720770289, "grad_norm": 0.4720674455165863, "learning_rate": 9.623863758103785e-06, "loss": 0.0295, "step": 52360 }, { "epoch": 0.4237397847722308, "grad_norm": 1.1204861402511597, "learning_rate": 9.623595028274121e-06, "loss": 0.0466, "step": 52370 }, { "epoch": 0.4238206974674326, "grad_norm": 0.7571696043014526, "learning_rate": 9.623326206236615e-06, "loss": 0.0372, "step": 52380 }, { "epoch": 0.4239016101626345, "grad_norm": 0.7412050366401672, "learning_rate": 9.62305729199663e-06, "loss": 0.0276, "step": 52390 }, { "epoch": 0.4239825228578364, "grad_norm": 0.7164157629013062, "learning_rate": 9.622788285559524e-06, "loss": 0.0458, "step": 52400 }, { "epoch": 0.42406343555303827, "grad_norm": 0.3766878545284271, "learning_rate": 9.622519186930666e-06, "loss": 0.0289, "step": 52410 }, { "epoch": 0.42414434824824016, "grad_norm": 0.37648382782936096, "learning_rate": 9.622249996115421e-06, "loss": 0.0329, "step": 52420 }, { "epoch": 0.42422526094344204, "grad_norm": 0.6042487621307373, "learning_rate": 9.621980713119158e-06, "loss": 0.0275, "step": 52430 }, { "epoch": 0.4243061736386439, "grad_norm": 0.7910662889480591, "learning_rate": 9.621711337947248e-06, "loss": 0.0477, "step": 52440 }, { "epoch": 0.42438708633384575, "grad_norm": 0.6535765528678894, "learning_rate": 9.621441870605062e-06, "loss": 0.0256, "step": 52450 }, { "epoch": 0.42446799902904764, "grad_norm": 0.1547524333000183, "learning_rate": 9.621172311097973e-06, "loss": 0.0328, "step": 52460 }, { "epoch": 0.4245489117242495, "grad_norm": 0.19687162339687347, "learning_rate": 9.62090265943136e-06, "loss": 0.0481, "step": 52470 }, { "epoch": 0.4246298244194514, "grad_norm": 0.3851984143257141, "learning_rate": 9.620632915610596e-06, "loss": 0.0276, "step": 52480 }, { "epoch": 0.4247107371146533, "grad_norm": 0.614494800567627, "learning_rate": 9.620363079641066e-06, "loss": 0.0355, "step": 52490 }, { "epoch": 0.4247916498098552, "grad_norm": 0.3440603017807007, "learning_rate": 9.620093151528147e-06, "loss": 0.0317, "step": 52500 }, { "epoch": 0.42487256250505706, "grad_norm": 0.621063768863678, "learning_rate": 9.619823131277222e-06, "loss": 0.0492, "step": 52510 }, { "epoch": 0.42495347520025895, "grad_norm": 0.4401733875274658, "learning_rate": 9.61955301889368e-06, "loss": 0.0607, "step": 52520 }, { "epoch": 0.4250343878954608, "grad_norm": 0.38364332914352417, "learning_rate": 9.619282814382904e-06, "loss": 0.0195, "step": 52530 }, { "epoch": 0.42511530059066266, "grad_norm": 0.39748522639274597, "learning_rate": 9.619012517750285e-06, "loss": 0.0527, "step": 52540 }, { "epoch": 0.42519621328586454, "grad_norm": 0.5096878409385681, "learning_rate": 9.618742129001212e-06, "loss": 0.0433, "step": 52550 }, { "epoch": 0.42527712598106643, "grad_norm": 0.7566577196121216, "learning_rate": 9.618471648141077e-06, "loss": 0.0386, "step": 52560 }, { "epoch": 0.4253580386762683, "grad_norm": 0.5616651773452759, "learning_rate": 9.618201075175275e-06, "loss": 0.0378, "step": 52570 }, { "epoch": 0.4254389513714702, "grad_norm": 0.009273865260183811, "learning_rate": 9.617930410109204e-06, "loss": 0.0337, "step": 52580 }, { "epoch": 0.4255198640666721, "grad_norm": 1.2443568706512451, "learning_rate": 9.617659652948258e-06, "loss": 0.0381, "step": 52590 }, { "epoch": 0.4256007767618739, "grad_norm": 0.6287880539894104, "learning_rate": 9.61738880369784e-06, "loss": 0.0343, "step": 52600 }, { "epoch": 0.4256816894570758, "grad_norm": 0.6160397529602051, "learning_rate": 9.617117862363349e-06, "loss": 0.0394, "step": 52610 }, { "epoch": 0.4257626021522777, "grad_norm": 1.008752703666687, "learning_rate": 9.616846828950188e-06, "loss": 0.0361, "step": 52620 }, { "epoch": 0.42584351484747957, "grad_norm": 0.5954540967941284, "learning_rate": 9.616575703463767e-06, "loss": 0.0542, "step": 52630 }, { "epoch": 0.42592442754268145, "grad_norm": 0.6836615204811096, "learning_rate": 9.616304485909487e-06, "loss": 0.0522, "step": 52640 }, { "epoch": 0.42600534023788333, "grad_norm": 0.3240463435649872, "learning_rate": 9.61603317629276e-06, "loss": 0.0458, "step": 52650 }, { "epoch": 0.4260862529330852, "grad_norm": 1.6834481954574585, "learning_rate": 9.615761774618996e-06, "loss": 0.0421, "step": 52660 }, { "epoch": 0.4261671656282871, "grad_norm": 0.5279836058616638, "learning_rate": 9.615490280893609e-06, "loss": 0.0305, "step": 52670 }, { "epoch": 0.42624807832348893, "grad_norm": 0.7926731109619141, "learning_rate": 9.615218695122011e-06, "loss": 0.0455, "step": 52680 }, { "epoch": 0.4263289910186908, "grad_norm": 0.3866579830646515, "learning_rate": 9.61494701730962e-06, "loss": 0.0407, "step": 52690 }, { "epoch": 0.4264099037138927, "grad_norm": 0.3866398334503174, "learning_rate": 9.614675247461852e-06, "loss": 0.0465, "step": 52700 }, { "epoch": 0.4264908164090946, "grad_norm": 0.5763958096504211, "learning_rate": 9.61440338558413e-06, "loss": 0.0384, "step": 52710 }, { "epoch": 0.42657172910429647, "grad_norm": 1.1741751432418823, "learning_rate": 9.614131431681873e-06, "loss": 0.064, "step": 52720 }, { "epoch": 0.42665264179949836, "grad_norm": 0.5632505416870117, "learning_rate": 9.613859385760504e-06, "loss": 0.0345, "step": 52730 }, { "epoch": 0.42673355449470024, "grad_norm": 0.6939172148704529, "learning_rate": 9.61358724782545e-06, "loss": 0.0385, "step": 52740 }, { "epoch": 0.42681446718990207, "grad_norm": 0.7002564668655396, "learning_rate": 9.61331501788214e-06, "loss": 0.034, "step": 52750 }, { "epoch": 0.42689537988510395, "grad_norm": 0.3964809775352478, "learning_rate": 9.613042695936e-06, "loss": 0.0546, "step": 52760 }, { "epoch": 0.42697629258030584, "grad_norm": 0.5719561576843262, "learning_rate": 9.612770281992464e-06, "loss": 0.0353, "step": 52770 }, { "epoch": 0.4270572052755077, "grad_norm": 0.3108600974082947, "learning_rate": 9.612497776056962e-06, "loss": 0.0435, "step": 52780 }, { "epoch": 0.4271381179707096, "grad_norm": 0.2980293333530426, "learning_rate": 9.612225178134928e-06, "loss": 0.0287, "step": 52790 }, { "epoch": 0.4272190306659115, "grad_norm": 0.6743441224098206, "learning_rate": 9.611952488231799e-06, "loss": 0.041, "step": 52800 }, { "epoch": 0.4272999433611134, "grad_norm": 0.4581572115421295, "learning_rate": 9.611679706353015e-06, "loss": 0.0451, "step": 52810 }, { "epoch": 0.42738085605631526, "grad_norm": 0.6720189452171326, "learning_rate": 9.611406832504015e-06, "loss": 0.0311, "step": 52820 }, { "epoch": 0.4274617687515171, "grad_norm": 0.4409275949001312, "learning_rate": 9.611133866690241e-06, "loss": 0.0392, "step": 52830 }, { "epoch": 0.427542681446719, "grad_norm": 0.4528072476387024, "learning_rate": 9.610860808917136e-06, "loss": 0.0311, "step": 52840 }, { "epoch": 0.42762359414192086, "grad_norm": 0.5269172787666321, "learning_rate": 9.610587659190147e-06, "loss": 0.0462, "step": 52850 }, { "epoch": 0.42770450683712274, "grad_norm": 0.6409966349601746, "learning_rate": 9.610314417514718e-06, "loss": 0.0205, "step": 52860 }, { "epoch": 0.42778541953232463, "grad_norm": 0.3693067133426666, "learning_rate": 9.610041083896304e-06, "loss": 0.0473, "step": 52870 }, { "epoch": 0.4278663322275265, "grad_norm": 0.6707576513290405, "learning_rate": 9.609767658340351e-06, "loss": 0.0347, "step": 52880 }, { "epoch": 0.4279472449227284, "grad_norm": 0.6342024207115173, "learning_rate": 9.609494140852315e-06, "loss": 0.0351, "step": 52890 }, { "epoch": 0.4280281576179302, "grad_norm": 0.5376623272895813, "learning_rate": 9.609220531437649e-06, "loss": 0.0395, "step": 52900 }, { "epoch": 0.4281090703131321, "grad_norm": 0.5274183750152588, "learning_rate": 9.608946830101809e-06, "loss": 0.0425, "step": 52910 }, { "epoch": 0.428189983008334, "grad_norm": 0.6284576654434204, "learning_rate": 9.608673036850256e-06, "loss": 0.0379, "step": 52920 }, { "epoch": 0.4282708957035359, "grad_norm": 0.5511953234672546, "learning_rate": 9.608399151688446e-06, "loss": 0.0293, "step": 52930 }, { "epoch": 0.42835180839873777, "grad_norm": 0.5151835083961487, "learning_rate": 9.608125174621845e-06, "loss": 0.0263, "step": 52940 }, { "epoch": 0.42843272109393965, "grad_norm": 0.5839335918426514, "learning_rate": 9.607851105655916e-06, "loss": 0.0498, "step": 52950 }, { "epoch": 0.42851363378914153, "grad_norm": 0.844362735748291, "learning_rate": 9.607576944796125e-06, "loss": 0.0547, "step": 52960 }, { "epoch": 0.4285945464843434, "grad_norm": 0.41616562008857727, "learning_rate": 9.607302692047936e-06, "loss": 0.0586, "step": 52970 }, { "epoch": 0.42867545917954525, "grad_norm": 0.48298248648643494, "learning_rate": 9.607028347416824e-06, "loss": 0.0248, "step": 52980 }, { "epoch": 0.42875637187474713, "grad_norm": 0.42589718103408813, "learning_rate": 9.606753910908258e-06, "loss": 0.0262, "step": 52990 }, { "epoch": 0.428837284569949, "grad_norm": 0.7798243761062622, "learning_rate": 9.606479382527709e-06, "loss": 0.0319, "step": 53000 }, { "epoch": 0.4289181972651509, "grad_norm": 0.6281217336654663, "learning_rate": 9.606204762280654e-06, "loss": 0.0341, "step": 53010 }, { "epoch": 0.4289991099603528, "grad_norm": 0.3491560220718384, "learning_rate": 9.60593005017257e-06, "loss": 0.045, "step": 53020 }, { "epoch": 0.42908002265555467, "grad_norm": 0.22120709717273712, "learning_rate": 9.605655246208932e-06, "loss": 0.0394, "step": 53030 }, { "epoch": 0.42916093535075656, "grad_norm": 0.1340678632259369, "learning_rate": 9.605380350395224e-06, "loss": 0.0401, "step": 53040 }, { "epoch": 0.4292418480459584, "grad_norm": 0.03598502278327942, "learning_rate": 9.60510536273693e-06, "loss": 0.0383, "step": 53050 }, { "epoch": 0.42932276074116027, "grad_norm": 0.6892362833023071, "learning_rate": 9.604830283239527e-06, "loss": 0.0579, "step": 53060 }, { "epoch": 0.42940367343636215, "grad_norm": 0.47604069113731384, "learning_rate": 9.604555111908509e-06, "loss": 0.0407, "step": 53070 }, { "epoch": 0.42948458613156404, "grad_norm": 0.24750036001205444, "learning_rate": 9.604279848749358e-06, "loss": 0.0551, "step": 53080 }, { "epoch": 0.4295654988267659, "grad_norm": 0.5041743516921997, "learning_rate": 9.604004493767566e-06, "loss": 0.0352, "step": 53090 }, { "epoch": 0.4296464115219678, "grad_norm": 0.7956405282020569, "learning_rate": 9.603729046968624e-06, "loss": 0.0431, "step": 53100 }, { "epoch": 0.4297273242171697, "grad_norm": 0.6427332162857056, "learning_rate": 9.603453508358025e-06, "loss": 0.0303, "step": 53110 }, { "epoch": 0.4298082369123716, "grad_norm": 0.4110029935836792, "learning_rate": 9.603177877941263e-06, "loss": 0.0295, "step": 53120 }, { "epoch": 0.4298891496075734, "grad_norm": 0.6733518838882446, "learning_rate": 9.602902155723837e-06, "loss": 0.0429, "step": 53130 }, { "epoch": 0.4299700623027753, "grad_norm": 0.33735013008117676, "learning_rate": 9.602626341711246e-06, "loss": 0.0389, "step": 53140 }, { "epoch": 0.4300509749979772, "grad_norm": 0.843718945980072, "learning_rate": 9.602350435908987e-06, "loss": 0.0579, "step": 53150 }, { "epoch": 0.43013188769317906, "grad_norm": 0.6853150129318237, "learning_rate": 9.602074438322567e-06, "loss": 0.0456, "step": 53160 }, { "epoch": 0.43021280038838094, "grad_norm": 0.9305759072303772, "learning_rate": 9.601798348957486e-06, "loss": 0.0346, "step": 53170 }, { "epoch": 0.43029371308358283, "grad_norm": 0.5633160471916199, "learning_rate": 9.601522167819251e-06, "loss": 0.0442, "step": 53180 }, { "epoch": 0.4303746257787847, "grad_norm": 0.515375018119812, "learning_rate": 9.601245894913372e-06, "loss": 0.0387, "step": 53190 }, { "epoch": 0.43045553847398654, "grad_norm": 0.25418969988822937, "learning_rate": 9.600969530245356e-06, "loss": 0.0514, "step": 53200 }, { "epoch": 0.4305364511691884, "grad_norm": 0.4476027190685272, "learning_rate": 9.600693073820716e-06, "loss": 0.0236, "step": 53210 }, { "epoch": 0.4306173638643903, "grad_norm": 1.5942654609680176, "learning_rate": 9.600416525644965e-06, "loss": 0.0556, "step": 53220 }, { "epoch": 0.4306982765595922, "grad_norm": 0.20600354671478271, "learning_rate": 9.60013988572362e-06, "loss": 0.0316, "step": 53230 }, { "epoch": 0.4307791892547941, "grad_norm": 0.5659076571464539, "learning_rate": 9.599863154062194e-06, "loss": 0.0537, "step": 53240 }, { "epoch": 0.43086010194999597, "grad_norm": 0.5802762508392334, "learning_rate": 9.599586330666211e-06, "loss": 0.0286, "step": 53250 }, { "epoch": 0.43094101464519785, "grad_norm": 1.3441873788833618, "learning_rate": 9.599309415541187e-06, "loss": 0.0303, "step": 53260 }, { "epoch": 0.43102192734039974, "grad_norm": 0.7795674800872803, "learning_rate": 9.599032408692645e-06, "loss": 0.0482, "step": 53270 }, { "epoch": 0.43110284003560156, "grad_norm": 0.7606667876243591, "learning_rate": 9.598755310126113e-06, "loss": 0.0315, "step": 53280 }, { "epoch": 0.43118375273080345, "grad_norm": 0.26854315400123596, "learning_rate": 9.598478119847115e-06, "loss": 0.0306, "step": 53290 }, { "epoch": 0.43126466542600533, "grad_norm": 0.7273654937744141, "learning_rate": 9.598200837861175e-06, "loss": 0.0379, "step": 53300 }, { "epoch": 0.4313455781212072, "grad_norm": 0.8039589524269104, "learning_rate": 9.597923464173831e-06, "loss": 0.0349, "step": 53310 }, { "epoch": 0.4314264908164091, "grad_norm": 0.494589626789093, "learning_rate": 9.597645998790608e-06, "loss": 0.04, "step": 53320 }, { "epoch": 0.431507403511611, "grad_norm": 0.5564785599708557, "learning_rate": 9.597368441717041e-06, "loss": 0.0463, "step": 53330 }, { "epoch": 0.43158831620681287, "grad_norm": 0.4168432652950287, "learning_rate": 9.597090792958668e-06, "loss": 0.0504, "step": 53340 }, { "epoch": 0.4316692289020147, "grad_norm": 0.6428647637367249, "learning_rate": 9.596813052521021e-06, "loss": 0.0369, "step": 53350 }, { "epoch": 0.4317501415972166, "grad_norm": 0.4052816331386566, "learning_rate": 9.596535220409644e-06, "loss": 0.0487, "step": 53360 }, { "epoch": 0.43183105429241847, "grad_norm": 0.32535868883132935, "learning_rate": 9.596257296630076e-06, "loss": 0.022, "step": 53370 }, { "epoch": 0.43191196698762035, "grad_norm": 0.04664122313261032, "learning_rate": 9.595979281187859e-06, "loss": 0.0433, "step": 53380 }, { "epoch": 0.43199287968282224, "grad_norm": 0.3165411651134491, "learning_rate": 9.595701174088538e-06, "loss": 0.0167, "step": 53390 }, { "epoch": 0.4320737923780241, "grad_norm": 0.07474270462989807, "learning_rate": 9.595422975337657e-06, "loss": 0.0381, "step": 53400 }, { "epoch": 0.432154705073226, "grad_norm": 0.4317723214626312, "learning_rate": 9.59514468494077e-06, "loss": 0.0389, "step": 53410 }, { "epoch": 0.4322356177684279, "grad_norm": 0.8864158987998962, "learning_rate": 9.59486630290342e-06, "loss": 0.0414, "step": 53420 }, { "epoch": 0.4323165304636297, "grad_norm": 0.6410529613494873, "learning_rate": 9.594587829231161e-06, "loss": 0.0393, "step": 53430 }, { "epoch": 0.4323974431588316, "grad_norm": 0.45277056097984314, "learning_rate": 9.59430926392955e-06, "loss": 0.0573, "step": 53440 }, { "epoch": 0.4324783558540335, "grad_norm": 0.5846431255340576, "learning_rate": 9.59403060700414e-06, "loss": 0.0382, "step": 53450 }, { "epoch": 0.4325592685492354, "grad_norm": 0.6524820923805237, "learning_rate": 9.593751858460484e-06, "loss": 0.0298, "step": 53460 }, { "epoch": 0.43264018124443726, "grad_norm": 0.43391960859298706, "learning_rate": 9.59347301830415e-06, "loss": 0.0363, "step": 53470 }, { "epoch": 0.43272109393963915, "grad_norm": 0.3195260763168335, "learning_rate": 9.59319408654069e-06, "loss": 0.0569, "step": 53480 }, { "epoch": 0.43280200663484103, "grad_norm": 0.4052294194698334, "learning_rate": 9.592915063175673e-06, "loss": 0.0411, "step": 53490 }, { "epoch": 0.43288291933004286, "grad_norm": 0.4088459610939026, "learning_rate": 9.59263594821466e-06, "loss": 0.0281, "step": 53500 }, { "epoch": 0.43296383202524474, "grad_norm": 0.9957549571990967, "learning_rate": 9.592356741663219e-06, "loss": 0.043, "step": 53510 }, { "epoch": 0.43304474472044663, "grad_norm": 0.5428991317749023, "learning_rate": 9.592077443526917e-06, "loss": 0.0301, "step": 53520 }, { "epoch": 0.4331256574156485, "grad_norm": 0.0689559206366539, "learning_rate": 9.591798053811325e-06, "loss": 0.0332, "step": 53530 }, { "epoch": 0.4332065701108504, "grad_norm": 0.36763396859169006, "learning_rate": 9.591518572522016e-06, "loss": 0.0337, "step": 53540 }, { "epoch": 0.4332874828060523, "grad_norm": 0.4817643463611603, "learning_rate": 9.59123899966456e-06, "loss": 0.0186, "step": 53550 }, { "epoch": 0.43336839550125417, "grad_norm": 0.30618879199028015, "learning_rate": 9.590959335244536e-06, "loss": 0.0235, "step": 53560 }, { "epoch": 0.43344930819645605, "grad_norm": 0.3961458206176758, "learning_rate": 9.590679579267518e-06, "loss": 0.0311, "step": 53570 }, { "epoch": 0.4335302208916579, "grad_norm": 0.6740639209747314, "learning_rate": 9.590399731739091e-06, "loss": 0.0474, "step": 53580 }, { "epoch": 0.43361113358685976, "grad_norm": 0.7976632118225098, "learning_rate": 9.59011979266483e-06, "loss": 0.0491, "step": 53590 }, { "epoch": 0.43369204628206165, "grad_norm": 0.37030062079429626, "learning_rate": 9.589839762050319e-06, "loss": 0.022, "step": 53600 }, { "epoch": 0.43377295897726353, "grad_norm": 0.49720609188079834, "learning_rate": 9.589559639901145e-06, "loss": 0.0329, "step": 53610 }, { "epoch": 0.4338538716724654, "grad_norm": 0.4649217426776886, "learning_rate": 9.58927942622289e-06, "loss": 0.0642, "step": 53620 }, { "epoch": 0.4339347843676673, "grad_norm": 0.4005207121372223, "learning_rate": 9.588999121021148e-06, "loss": 0.0375, "step": 53630 }, { "epoch": 0.4340156970628692, "grad_norm": 0.5793645977973938, "learning_rate": 9.588718724301506e-06, "loss": 0.0516, "step": 53640 }, { "epoch": 0.434096609758071, "grad_norm": 0.5048785209655762, "learning_rate": 9.588438236069556e-06, "loss": 0.0276, "step": 53650 }, { "epoch": 0.4341775224532729, "grad_norm": 0.5429808497428894, "learning_rate": 9.588157656330894e-06, "loss": 0.0284, "step": 53660 }, { "epoch": 0.4342584351484748, "grad_norm": 0.8675262928009033, "learning_rate": 9.587876985091111e-06, "loss": 0.065, "step": 53670 }, { "epoch": 0.43433934784367667, "grad_norm": 0.32234662771224976, "learning_rate": 9.587596222355807e-06, "loss": 0.0292, "step": 53680 }, { "epoch": 0.43442026053887856, "grad_norm": 0.3968566060066223, "learning_rate": 9.587315368130582e-06, "loss": 0.0298, "step": 53690 }, { "epoch": 0.43450117323408044, "grad_norm": 0.34051597118377686, "learning_rate": 9.587034422421036e-06, "loss": 0.0358, "step": 53700 }, { "epoch": 0.4345820859292823, "grad_norm": 0.6115520596504211, "learning_rate": 9.586753385232772e-06, "loss": 0.0336, "step": 53710 }, { "epoch": 0.4346629986244842, "grad_norm": 0.46642300486564636, "learning_rate": 9.586472256571395e-06, "loss": 0.0371, "step": 53720 }, { "epoch": 0.43474391131968604, "grad_norm": 0.42411571741104126, "learning_rate": 9.586191036442512e-06, "loss": 0.0426, "step": 53730 }, { "epoch": 0.4348248240148879, "grad_norm": 0.6221517324447632, "learning_rate": 9.58590972485173e-06, "loss": 0.0474, "step": 53740 }, { "epoch": 0.4349057367100898, "grad_norm": 0.4278726577758789, "learning_rate": 9.58562832180466e-06, "loss": 0.0317, "step": 53750 }, { "epoch": 0.4349866494052917, "grad_norm": 0.3503663241863251, "learning_rate": 9.585346827306912e-06, "loss": 0.0375, "step": 53760 }, { "epoch": 0.4350675621004936, "grad_norm": 0.8436333537101746, "learning_rate": 9.585065241364105e-06, "loss": 0.0507, "step": 53770 }, { "epoch": 0.43514847479569546, "grad_norm": 0.5132561922073364, "learning_rate": 9.584783563981848e-06, "loss": 0.0232, "step": 53780 }, { "epoch": 0.43522938749089735, "grad_norm": 0.6100702881813049, "learning_rate": 9.584501795165764e-06, "loss": 0.0349, "step": 53790 }, { "epoch": 0.4353103001860992, "grad_norm": 0.17772415280342102, "learning_rate": 9.58421993492147e-06, "loss": 0.0339, "step": 53800 }, { "epoch": 0.43539121288130106, "grad_norm": 0.8468949794769287, "learning_rate": 9.583937983254586e-06, "loss": 0.0337, "step": 53810 }, { "epoch": 0.43547212557650294, "grad_norm": 0.5534231662750244, "learning_rate": 9.583655940170735e-06, "loss": 0.0357, "step": 53820 }, { "epoch": 0.43555303827170483, "grad_norm": 0.38114991784095764, "learning_rate": 9.583373805675544e-06, "loss": 0.0493, "step": 53830 }, { "epoch": 0.4356339509669067, "grad_norm": 0.2538498342037201, "learning_rate": 9.583091579774639e-06, "loss": 0.0317, "step": 53840 }, { "epoch": 0.4357148636621086, "grad_norm": 0.837054431438446, "learning_rate": 9.582809262473647e-06, "loss": 0.042, "step": 53850 }, { "epoch": 0.4357957763573105, "grad_norm": 0.61671382188797, "learning_rate": 9.5825268537782e-06, "loss": 0.0494, "step": 53860 }, { "epoch": 0.4358766890525123, "grad_norm": 0.4508931636810303, "learning_rate": 9.582244353693928e-06, "loss": 0.0476, "step": 53870 }, { "epoch": 0.4359576017477142, "grad_norm": 0.47251081466674805, "learning_rate": 9.581961762226466e-06, "loss": 0.052, "step": 53880 }, { "epoch": 0.4360385144429161, "grad_norm": 0.7438511252403259, "learning_rate": 9.58167907938145e-06, "loss": 0.058, "step": 53890 }, { "epoch": 0.43611942713811797, "grad_norm": 0.41711536049842834, "learning_rate": 9.581396305164517e-06, "loss": 0.028, "step": 53900 }, { "epoch": 0.43620033983331985, "grad_norm": 0.824084997177124, "learning_rate": 9.581113439581307e-06, "loss": 0.0442, "step": 53910 }, { "epoch": 0.43628125252852173, "grad_norm": 0.6567240357398987, "learning_rate": 9.58083048263746e-06, "loss": 0.0351, "step": 53920 }, { "epoch": 0.4363621652237236, "grad_norm": 0.5493795275688171, "learning_rate": 9.58054743433862e-06, "loss": 0.0342, "step": 53930 }, { "epoch": 0.4364430779189255, "grad_norm": 0.6275510787963867, "learning_rate": 9.580264294690432e-06, "loss": 0.0352, "step": 53940 }, { "epoch": 0.43652399061412733, "grad_norm": 0.6364173889160156, "learning_rate": 9.57998106369854e-06, "loss": 0.0379, "step": 53950 }, { "epoch": 0.4366049033093292, "grad_norm": 0.7735372185707092, "learning_rate": 9.579697741368595e-06, "loss": 0.0379, "step": 53960 }, { "epoch": 0.4366858160045311, "grad_norm": 0.4216782748699188, "learning_rate": 9.579414327706246e-06, "loss": 0.0226, "step": 53970 }, { "epoch": 0.436766728699733, "grad_norm": 0.26525381207466125, "learning_rate": 9.579130822717149e-06, "loss": 0.0308, "step": 53980 }, { "epoch": 0.43684764139493487, "grad_norm": 0.5048344135284424, "learning_rate": 9.578847226406952e-06, "loss": 0.0319, "step": 53990 }, { "epoch": 0.43692855409013676, "grad_norm": 0.29879507422447205, "learning_rate": 9.578563538781314e-06, "loss": 0.0409, "step": 54000 }, { "epoch": 0.43700946678533864, "grad_norm": 0.35475000739097595, "learning_rate": 9.578279759845891e-06, "loss": 0.0382, "step": 54010 }, { "epoch": 0.43709037948054047, "grad_norm": 0.42668095231056213, "learning_rate": 9.577995889606346e-06, "loss": 0.0289, "step": 54020 }, { "epoch": 0.43717129217574235, "grad_norm": 0.6972759962081909, "learning_rate": 9.577711928068337e-06, "loss": 0.0248, "step": 54030 }, { "epoch": 0.43725220487094424, "grad_norm": 0.8379490375518799, "learning_rate": 9.577427875237526e-06, "loss": 0.0278, "step": 54040 }, { "epoch": 0.4373331175661461, "grad_norm": 0.3557530343532562, "learning_rate": 9.57714373111958e-06, "loss": 0.0264, "step": 54050 }, { "epoch": 0.437414030261348, "grad_norm": 0.46860480308532715, "learning_rate": 9.576859495720165e-06, "loss": 0.0293, "step": 54060 }, { "epoch": 0.4374949429565499, "grad_norm": 0.47300827503204346, "learning_rate": 9.576575169044951e-06, "loss": 0.053, "step": 54070 }, { "epoch": 0.4375758556517518, "grad_norm": 0.4267885088920593, "learning_rate": 9.576290751099604e-06, "loss": 0.0531, "step": 54080 }, { "epoch": 0.43765676834695366, "grad_norm": 0.32557061314582825, "learning_rate": 9.576006241889802e-06, "loss": 0.043, "step": 54090 }, { "epoch": 0.4377376810421555, "grad_norm": 0.5282275676727295, "learning_rate": 9.575721641421213e-06, "loss": 0.0208, "step": 54100 }, { "epoch": 0.4378185937373574, "grad_norm": 0.6229133605957031, "learning_rate": 9.575436949699517e-06, "loss": 0.0398, "step": 54110 }, { "epoch": 0.43789950643255926, "grad_norm": 0.4761057496070862, "learning_rate": 9.57515216673039e-06, "loss": 0.0414, "step": 54120 }, { "epoch": 0.43798041912776114, "grad_norm": 0.5777246356010437, "learning_rate": 9.574867292519512e-06, "loss": 0.0509, "step": 54130 }, { "epoch": 0.43806133182296303, "grad_norm": 0.8042216300964355, "learning_rate": 9.574582327072564e-06, "loss": 0.0421, "step": 54140 }, { "epoch": 0.4381422445181649, "grad_norm": 0.33310699462890625, "learning_rate": 9.574297270395227e-06, "loss": 0.0323, "step": 54150 }, { "epoch": 0.4382231572133668, "grad_norm": 0.9316485524177551, "learning_rate": 9.57401212249319e-06, "loss": 0.0391, "step": 54160 }, { "epoch": 0.4383040699085686, "grad_norm": 0.5397835969924927, "learning_rate": 9.573726883372137e-06, "loss": 0.0369, "step": 54170 }, { "epoch": 0.4383849826037705, "grad_norm": 0.5620198845863342, "learning_rate": 9.573441553037756e-06, "loss": 0.0347, "step": 54180 }, { "epoch": 0.4384658952989724, "grad_norm": 0.30869829654693604, "learning_rate": 9.57315613149574e-06, "loss": 0.0232, "step": 54190 }, { "epoch": 0.4385468079941743, "grad_norm": 0.7795354127883911, "learning_rate": 9.572870618751777e-06, "loss": 0.0408, "step": 54200 }, { "epoch": 0.43862772068937617, "grad_norm": 0.4811800718307495, "learning_rate": 9.572585014811565e-06, "loss": 0.04, "step": 54210 }, { "epoch": 0.43870863338457805, "grad_norm": 1.015390396118164, "learning_rate": 9.572299319680796e-06, "loss": 0.0363, "step": 54220 }, { "epoch": 0.43878954607977994, "grad_norm": 0.7686986327171326, "learning_rate": 9.572013533365173e-06, "loss": 0.0332, "step": 54230 }, { "epoch": 0.4388704587749818, "grad_norm": 0.5996899604797363, "learning_rate": 9.571727655870388e-06, "loss": 0.0387, "step": 54240 }, { "epoch": 0.43895137147018365, "grad_norm": 0.3805910646915436, "learning_rate": 9.571441687202148e-06, "loss": 0.0315, "step": 54250 }, { "epoch": 0.43903228416538553, "grad_norm": 0.36842265725135803, "learning_rate": 9.571155627366156e-06, "loss": 0.0284, "step": 54260 }, { "epoch": 0.4391131968605874, "grad_norm": 0.6766000390052795, "learning_rate": 9.570869476368113e-06, "loss": 0.0299, "step": 54270 }, { "epoch": 0.4391941095557893, "grad_norm": 1.3914092779159546, "learning_rate": 9.570583234213729e-06, "loss": 0.0514, "step": 54280 }, { "epoch": 0.4392750222509912, "grad_norm": 0.5219635963439941, "learning_rate": 9.57029690090871e-06, "loss": 0.0264, "step": 54290 }, { "epoch": 0.43935593494619307, "grad_norm": 0.5980641841888428, "learning_rate": 9.57001047645877e-06, "loss": 0.0379, "step": 54300 }, { "epoch": 0.43943684764139496, "grad_norm": 0.4369828999042511, "learning_rate": 9.569723960869615e-06, "loss": 0.0302, "step": 54310 }, { "epoch": 0.4395177603365968, "grad_norm": 0.6383000016212463, "learning_rate": 9.569437354146964e-06, "loss": 0.0443, "step": 54320 }, { "epoch": 0.43959867303179867, "grad_norm": 1.1940032243728638, "learning_rate": 9.569150656296533e-06, "loss": 0.0399, "step": 54330 }, { "epoch": 0.43967958572700055, "grad_norm": 0.6373441815376282, "learning_rate": 9.568863867324038e-06, "loss": 0.039, "step": 54340 }, { "epoch": 0.43976049842220244, "grad_norm": 0.5496336221694946, "learning_rate": 9.568576987235197e-06, "loss": 0.0262, "step": 54350 }, { "epoch": 0.4398414111174043, "grad_norm": 0.6566774845123291, "learning_rate": 9.568290016035735e-06, "loss": 0.0321, "step": 54360 }, { "epoch": 0.4399223238126062, "grad_norm": 0.11163493990898132, "learning_rate": 9.56800295373137e-06, "loss": 0.0235, "step": 54370 }, { "epoch": 0.4400032365078081, "grad_norm": 0.6219611167907715, "learning_rate": 9.567715800327831e-06, "loss": 0.0427, "step": 54380 }, { "epoch": 0.44008414920301, "grad_norm": 0.6409608125686646, "learning_rate": 9.567428555830844e-06, "loss": 0.0299, "step": 54390 }, { "epoch": 0.4401650618982118, "grad_norm": 0.367477685213089, "learning_rate": 9.567141220246136e-06, "loss": 0.0461, "step": 54400 }, { "epoch": 0.4402459745934137, "grad_norm": 0.6481806039810181, "learning_rate": 9.566853793579437e-06, "loss": 0.0297, "step": 54410 }, { "epoch": 0.4403268872886156, "grad_norm": 0.6791430711746216, "learning_rate": 9.566566275836482e-06, "loss": 0.0519, "step": 54420 }, { "epoch": 0.44040779998381746, "grad_norm": 0.7573065757751465, "learning_rate": 9.566278667023001e-06, "loss": 0.0277, "step": 54430 }, { "epoch": 0.44048871267901935, "grad_norm": 0.3944924473762512, "learning_rate": 9.565990967144734e-06, "loss": 0.0358, "step": 54440 }, { "epoch": 0.44056962537422123, "grad_norm": 0.7802988886833191, "learning_rate": 9.565703176207414e-06, "loss": 0.0353, "step": 54450 }, { "epoch": 0.4406505380694231, "grad_norm": 0.4760359227657318, "learning_rate": 9.565415294216785e-06, "loss": 0.032, "step": 54460 }, { "epoch": 0.44073145076462494, "grad_norm": 0.8036624789237976, "learning_rate": 9.565127321178585e-06, "loss": 0.0473, "step": 54470 }, { "epoch": 0.44081236345982683, "grad_norm": 0.4734816253185272, "learning_rate": 9.564839257098557e-06, "loss": 0.0283, "step": 54480 }, { "epoch": 0.4408932761550287, "grad_norm": 0.4286256730556488, "learning_rate": 9.56455110198245e-06, "loss": 0.0252, "step": 54490 }, { "epoch": 0.4409741888502306, "grad_norm": 0.33561837673187256, "learning_rate": 9.564262855836003e-06, "loss": 0.0467, "step": 54500 }, { "epoch": 0.4410551015454325, "grad_norm": 0.7855519652366638, "learning_rate": 9.563974518664971e-06, "loss": 0.0486, "step": 54510 }, { "epoch": 0.44113601424063437, "grad_norm": 0.5380473136901855, "learning_rate": 9.563686090475104e-06, "loss": 0.0443, "step": 54520 }, { "epoch": 0.44121692693583625, "grad_norm": 0.3882197439670563, "learning_rate": 9.563397571272151e-06, "loss": 0.0576, "step": 54530 }, { "epoch": 0.44129783963103814, "grad_norm": 0.21512027084827423, "learning_rate": 9.563108961061866e-06, "loss": 0.034, "step": 54540 }, { "epoch": 0.44137875232623996, "grad_norm": 0.4546412527561188, "learning_rate": 9.562820259850007e-06, "loss": 0.0367, "step": 54550 }, { "epoch": 0.44145966502144185, "grad_norm": 0.93392413854599, "learning_rate": 9.56253146764233e-06, "loss": 0.0589, "step": 54560 }, { "epoch": 0.44154057771664373, "grad_norm": 0.5853399634361267, "learning_rate": 9.562242584444595e-06, "loss": 0.0484, "step": 54570 }, { "epoch": 0.4416214904118456, "grad_norm": 0.470967561006546, "learning_rate": 9.561953610262563e-06, "loss": 0.0366, "step": 54580 }, { "epoch": 0.4417024031070475, "grad_norm": 0.41607606410980225, "learning_rate": 9.561664545101997e-06, "loss": 0.0376, "step": 54590 }, { "epoch": 0.4417833158022494, "grad_norm": 0.9258387088775635, "learning_rate": 9.56137538896866e-06, "loss": 0.0577, "step": 54600 }, { "epoch": 0.4418642284974513, "grad_norm": 0.8550107479095459, "learning_rate": 9.561086141868324e-06, "loss": 0.05, "step": 54610 }, { "epoch": 0.4419451411926531, "grad_norm": 0.6402038335800171, "learning_rate": 9.560796803806753e-06, "loss": 0.0484, "step": 54620 }, { "epoch": 0.442026053887855, "grad_norm": 0.6292486786842346, "learning_rate": 9.560507374789716e-06, "loss": 0.0399, "step": 54630 }, { "epoch": 0.44210696658305687, "grad_norm": 0.48315539956092834, "learning_rate": 9.56021785482299e-06, "loss": 0.0362, "step": 54640 }, { "epoch": 0.44218787927825876, "grad_norm": 0.571845531463623, "learning_rate": 9.559928243912342e-06, "loss": 0.0348, "step": 54650 }, { "epoch": 0.44226879197346064, "grad_norm": 0.5229213237762451, "learning_rate": 9.559638542063554e-06, "loss": 0.0328, "step": 54660 }, { "epoch": 0.4423497046686625, "grad_norm": 0.3877195715904236, "learning_rate": 9.5593487492824e-06, "loss": 0.0411, "step": 54670 }, { "epoch": 0.4424306173638644, "grad_norm": 0.7174997925758362, "learning_rate": 9.559058865574663e-06, "loss": 0.0519, "step": 54680 }, { "epoch": 0.4425115300590663, "grad_norm": 0.5102052092552185, "learning_rate": 9.558768890946117e-06, "loss": 0.0442, "step": 54690 }, { "epoch": 0.4425924427542681, "grad_norm": 0.5844485759735107, "learning_rate": 9.558478825402553e-06, "loss": 0.0407, "step": 54700 }, { "epoch": 0.44267335544947, "grad_norm": 0.44079071283340454, "learning_rate": 9.55818866894975e-06, "loss": 0.051, "step": 54710 }, { "epoch": 0.4427542681446719, "grad_norm": 1.0240814685821533, "learning_rate": 9.557898421593499e-06, "loss": 0.0477, "step": 54720 }, { "epoch": 0.4428351808398738, "grad_norm": 0.4398215413093567, "learning_rate": 9.557608083339585e-06, "loss": 0.0419, "step": 54730 }, { "epoch": 0.44291609353507566, "grad_norm": 0.34699857234954834, "learning_rate": 9.5573176541938e-06, "loss": 0.0292, "step": 54740 }, { "epoch": 0.44299700623027755, "grad_norm": 0.6363783478736877, "learning_rate": 9.557027134161935e-06, "loss": 0.0267, "step": 54750 }, { "epoch": 0.44307791892547943, "grad_norm": 1.0969709157943726, "learning_rate": 9.556736523249783e-06, "loss": 0.0291, "step": 54760 }, { "epoch": 0.44315883162068126, "grad_norm": 0.6616048812866211, "learning_rate": 9.556445821463142e-06, "loss": 0.0328, "step": 54770 }, { "epoch": 0.44323974431588314, "grad_norm": 0.46659305691719055, "learning_rate": 9.556155028807808e-06, "loss": 0.0416, "step": 54780 }, { "epoch": 0.44332065701108503, "grad_norm": 0.4893002510070801, "learning_rate": 9.555864145289579e-06, "loss": 0.0432, "step": 54790 }, { "epoch": 0.4434015697062869, "grad_norm": 0.3838825225830078, "learning_rate": 9.555573170914259e-06, "loss": 0.039, "step": 54800 }, { "epoch": 0.4434824824014888, "grad_norm": 1.906470537185669, "learning_rate": 9.55528210568765e-06, "loss": 0.044, "step": 54810 }, { "epoch": 0.4435633950966907, "grad_norm": 0.6035389304161072, "learning_rate": 9.554990949615554e-06, "loss": 0.0437, "step": 54820 }, { "epoch": 0.44364430779189257, "grad_norm": 0.48504531383514404, "learning_rate": 9.554699702703782e-06, "loss": 0.0351, "step": 54830 }, { "epoch": 0.44372522048709445, "grad_norm": 0.9199281930923462, "learning_rate": 9.554408364958138e-06, "loss": 0.0401, "step": 54840 }, { "epoch": 0.4438061331822963, "grad_norm": 0.3475988805294037, "learning_rate": 9.554116936384435e-06, "loss": 0.0274, "step": 54850 }, { "epoch": 0.44388704587749817, "grad_norm": 0.6461284160614014, "learning_rate": 9.553825416988482e-06, "loss": 0.0425, "step": 54860 }, { "epoch": 0.44396795857270005, "grad_norm": 0.899052083492279, "learning_rate": 9.553533806776097e-06, "loss": 0.0568, "step": 54870 }, { "epoch": 0.44404887126790193, "grad_norm": 0.3682514429092407, "learning_rate": 9.55324210575309e-06, "loss": 0.0416, "step": 54880 }, { "epoch": 0.4441297839631038, "grad_norm": 0.9565730094909668, "learning_rate": 9.552950313925284e-06, "loss": 0.0448, "step": 54890 }, { "epoch": 0.4442106966583057, "grad_norm": 0.711907684803009, "learning_rate": 9.552658431298497e-06, "loss": 0.0331, "step": 54900 }, { "epoch": 0.4442916093535076, "grad_norm": 0.7858178615570068, "learning_rate": 9.552366457878546e-06, "loss": 0.0493, "step": 54910 }, { "epoch": 0.4443725220487094, "grad_norm": 0.49379870295524597, "learning_rate": 9.552074393671258e-06, "loss": 0.0387, "step": 54920 }, { "epoch": 0.4444534347439113, "grad_norm": 0.7397903800010681, "learning_rate": 9.551782238682454e-06, "loss": 0.0455, "step": 54930 }, { "epoch": 0.4445343474391132, "grad_norm": 0.24243046343326569, "learning_rate": 9.551489992917965e-06, "loss": 0.0433, "step": 54940 }, { "epoch": 0.44461526013431507, "grad_norm": 0.5842453837394714, "learning_rate": 9.551197656383615e-06, "loss": 0.0487, "step": 54950 }, { "epoch": 0.44469617282951696, "grad_norm": 0.5135366320610046, "learning_rate": 9.550905229085237e-06, "loss": 0.0254, "step": 54960 }, { "epoch": 0.44477708552471884, "grad_norm": 0.60625159740448, "learning_rate": 9.550612711028662e-06, "loss": 0.0354, "step": 54970 }, { "epoch": 0.4448579982199207, "grad_norm": 0.4255484938621521, "learning_rate": 9.550320102219721e-06, "loss": 0.0424, "step": 54980 }, { "epoch": 0.4449389109151226, "grad_norm": 0.3589634895324707, "learning_rate": 9.550027402664253e-06, "loss": 0.0392, "step": 54990 }, { "epoch": 0.44501982361032444, "grad_norm": 0.6660439968109131, "learning_rate": 9.549734612368093e-06, "loss": 0.0321, "step": 55000 }, { "epoch": 0.4451007363055263, "grad_norm": 0.3715132176876068, "learning_rate": 9.549441731337084e-06, "loss": 0.0315, "step": 55010 }, { "epoch": 0.4451816490007282, "grad_norm": 0.8195950388908386, "learning_rate": 9.54914875957706e-06, "loss": 0.0446, "step": 55020 }, { "epoch": 0.4452625616959301, "grad_norm": 0.5376238822937012, "learning_rate": 9.548855697093871e-06, "loss": 0.0173, "step": 55030 }, { "epoch": 0.445343474391132, "grad_norm": 0.7050572037696838, "learning_rate": 9.548562543893355e-06, "loss": 0.0316, "step": 55040 }, { "epoch": 0.44542438708633386, "grad_norm": 0.9338953495025635, "learning_rate": 9.548269299981365e-06, "loss": 0.0317, "step": 55050 }, { "epoch": 0.44550529978153575, "grad_norm": 0.5752997398376465, "learning_rate": 9.547975965363744e-06, "loss": 0.0423, "step": 55060 }, { "epoch": 0.4455862124767376, "grad_norm": 0.6025274395942688, "learning_rate": 9.547682540046343e-06, "loss": 0.0436, "step": 55070 }, { "epoch": 0.44566712517193946, "grad_norm": 0.3567725718021393, "learning_rate": 9.547389024035015e-06, "loss": 0.0385, "step": 55080 }, { "epoch": 0.44574803786714134, "grad_norm": 0.25500252842903137, "learning_rate": 9.547095417335612e-06, "loss": 0.0507, "step": 55090 }, { "epoch": 0.44582895056234323, "grad_norm": 0.3840812146663666, "learning_rate": 9.546801719953993e-06, "loss": 0.0357, "step": 55100 }, { "epoch": 0.4459098632575451, "grad_norm": 0.580904483795166, "learning_rate": 9.54650793189601e-06, "loss": 0.0362, "step": 55110 }, { "epoch": 0.445990775952747, "grad_norm": 0.9278039932250977, "learning_rate": 9.546214053167525e-06, "loss": 0.0425, "step": 55120 }, { "epoch": 0.4460716886479489, "grad_norm": 0.14986656606197357, "learning_rate": 9.545920083774397e-06, "loss": 0.0276, "step": 55130 }, { "epoch": 0.44615260134315077, "grad_norm": 0.5850235223770142, "learning_rate": 9.545626023722491e-06, "loss": 0.0453, "step": 55140 }, { "epoch": 0.4462335140383526, "grad_norm": 0.7211706042289734, "learning_rate": 9.54533187301767e-06, "loss": 0.0334, "step": 55150 }, { "epoch": 0.4463144267335545, "grad_norm": 0.4130996763706207, "learning_rate": 9.545037631665802e-06, "loss": 0.0441, "step": 55160 }, { "epoch": 0.44639533942875637, "grad_norm": 0.5459216833114624, "learning_rate": 9.54474329967275e-06, "loss": 0.029, "step": 55170 }, { "epoch": 0.44647625212395825, "grad_norm": 0.5450314283370972, "learning_rate": 9.54444887704439e-06, "loss": 0.0293, "step": 55180 }, { "epoch": 0.44655716481916014, "grad_norm": 0.5703475475311279, "learning_rate": 9.54415436378659e-06, "loss": 0.0454, "step": 55190 }, { "epoch": 0.446638077514362, "grad_norm": 0.7146772742271423, "learning_rate": 9.543859759905224e-06, "loss": 0.0466, "step": 55200 }, { "epoch": 0.4467189902095639, "grad_norm": 0.587644636631012, "learning_rate": 9.543565065406167e-06, "loss": 0.0308, "step": 55210 }, { "epoch": 0.44679990290476573, "grad_norm": 0.02197556011378765, "learning_rate": 9.543270280295298e-06, "loss": 0.0336, "step": 55220 }, { "epoch": 0.4468808155999676, "grad_norm": 0.6146575808525085, "learning_rate": 9.542975404578494e-06, "loss": 0.049, "step": 55230 }, { "epoch": 0.4469617282951695, "grad_norm": 0.43803146481513977, "learning_rate": 9.542680438261636e-06, "loss": 0.0406, "step": 55240 }, { "epoch": 0.4470426409903714, "grad_norm": 0.4764203131198883, "learning_rate": 9.542385381350607e-06, "loss": 0.0374, "step": 55250 }, { "epoch": 0.44712355368557327, "grad_norm": 0.6827635169029236, "learning_rate": 9.542090233851291e-06, "loss": 0.0339, "step": 55260 }, { "epoch": 0.44720446638077516, "grad_norm": 0.30233922600746155, "learning_rate": 9.541794995769576e-06, "loss": 0.0407, "step": 55270 }, { "epoch": 0.44728537907597704, "grad_norm": 0.797092854976654, "learning_rate": 9.541499667111344e-06, "loss": 0.0466, "step": 55280 }, { "epoch": 0.4473662917711789, "grad_norm": 0.8159007430076599, "learning_rate": 9.541204247882492e-06, "loss": 0.0322, "step": 55290 }, { "epoch": 0.44744720446638075, "grad_norm": 0.6563331484794617, "learning_rate": 9.540908738088907e-06, "loss": 0.0458, "step": 55300 }, { "epoch": 0.44752811716158264, "grad_norm": 0.5056416392326355, "learning_rate": 9.540613137736484e-06, "loss": 0.0237, "step": 55310 }, { "epoch": 0.4476090298567845, "grad_norm": 0.3003407418727875, "learning_rate": 9.540317446831115e-06, "loss": 0.0505, "step": 55320 }, { "epoch": 0.4476899425519864, "grad_norm": 0.9021913409233093, "learning_rate": 9.540021665378703e-06, "loss": 0.0452, "step": 55330 }, { "epoch": 0.4477708552471883, "grad_norm": 0.4358007609844208, "learning_rate": 9.539725793385141e-06, "loss": 0.0439, "step": 55340 }, { "epoch": 0.4478517679423902, "grad_norm": 0.3876339793205261, "learning_rate": 9.539429830856332e-06, "loss": 0.0229, "step": 55350 }, { "epoch": 0.44793268063759206, "grad_norm": 1.813125729560852, "learning_rate": 9.53913377779818e-06, "loss": 0.0481, "step": 55360 }, { "epoch": 0.4480135933327939, "grad_norm": 1.0930163860321045, "learning_rate": 9.538837634216587e-06, "loss": 0.0383, "step": 55370 }, { "epoch": 0.4480945060279958, "grad_norm": 0.8285006880760193, "learning_rate": 9.538541400117459e-06, "loss": 0.0401, "step": 55380 }, { "epoch": 0.44817541872319766, "grad_norm": 0.5457071661949158, "learning_rate": 9.538245075506702e-06, "loss": 0.0459, "step": 55390 }, { "epoch": 0.44825633141839955, "grad_norm": 0.41385719180107117, "learning_rate": 9.537948660390229e-06, "loss": 0.0314, "step": 55400 }, { "epoch": 0.44833724411360143, "grad_norm": 0.5184952020645142, "learning_rate": 9.53765215477395e-06, "loss": 0.0294, "step": 55410 }, { "epoch": 0.4484181568088033, "grad_norm": 0.6930440068244934, "learning_rate": 9.537355558663779e-06, "loss": 0.047, "step": 55420 }, { "epoch": 0.4484990695040052, "grad_norm": 0.2798696756362915, "learning_rate": 9.53705887206563e-06, "loss": 0.0267, "step": 55430 }, { "epoch": 0.44857998219920703, "grad_norm": 0.2631250023841858, "learning_rate": 9.536762094985418e-06, "loss": 0.044, "step": 55440 }, { "epoch": 0.4486608948944089, "grad_norm": 0.24926812946796417, "learning_rate": 9.536465227429066e-06, "loss": 0.0231, "step": 55450 }, { "epoch": 0.4487418075896108, "grad_norm": 0.33326104283332825, "learning_rate": 9.53616826940249e-06, "loss": 0.0258, "step": 55460 }, { "epoch": 0.4488227202848127, "grad_norm": 0.588117778301239, "learning_rate": 9.535871220911617e-06, "loss": 0.0321, "step": 55470 }, { "epoch": 0.44890363298001457, "grad_norm": 0.8072208166122437, "learning_rate": 9.535574081962365e-06, "loss": 0.0547, "step": 55480 }, { "epoch": 0.44898454567521645, "grad_norm": 0.45741093158721924, "learning_rate": 9.535276852560665e-06, "loss": 0.0381, "step": 55490 }, { "epoch": 0.44906545837041834, "grad_norm": 0.5924890637397766, "learning_rate": 9.534979532712441e-06, "loss": 0.0451, "step": 55500 }, { "epoch": 0.4491463710656202, "grad_norm": 0.5940719246864319, "learning_rate": 9.534682122423626e-06, "loss": 0.0333, "step": 55510 }, { "epoch": 0.44922728376082205, "grad_norm": 0.4880402088165283, "learning_rate": 9.534384621700148e-06, "loss": 0.0446, "step": 55520 }, { "epoch": 0.44930819645602393, "grad_norm": 0.581767201423645, "learning_rate": 9.53408703054794e-06, "loss": 0.0428, "step": 55530 }, { "epoch": 0.4493891091512258, "grad_norm": 0.6279578804969788, "learning_rate": 9.53378934897294e-06, "loss": 0.039, "step": 55540 }, { "epoch": 0.4494700218464277, "grad_norm": 0.4297356605529785, "learning_rate": 9.533491576981083e-06, "loss": 0.0222, "step": 55550 }, { "epoch": 0.4495509345416296, "grad_norm": 0.6762896776199341, "learning_rate": 9.533193714578305e-06, "loss": 0.0362, "step": 55560 }, { "epoch": 0.4496318472368315, "grad_norm": 0.49135729670524597, "learning_rate": 9.53289576177055e-06, "loss": 0.0388, "step": 55570 }, { "epoch": 0.44971275993203336, "grad_norm": 0.3675951659679413, "learning_rate": 9.532597718563758e-06, "loss": 0.0351, "step": 55580 }, { "epoch": 0.4497936726272352, "grad_norm": 0.38931283354759216, "learning_rate": 9.532299584963873e-06, "loss": 0.0176, "step": 55590 }, { "epoch": 0.44987458532243707, "grad_norm": 0.4874047040939331, "learning_rate": 9.532001360976842e-06, "loss": 0.0304, "step": 55600 }, { "epoch": 0.44995549801763896, "grad_norm": 0.7325412631034851, "learning_rate": 9.531703046608608e-06, "loss": 0.034, "step": 55610 }, { "epoch": 0.45003641071284084, "grad_norm": 1.0487827062606812, "learning_rate": 9.531404641865127e-06, "loss": 0.0542, "step": 55620 }, { "epoch": 0.4501173234080427, "grad_norm": 0.620599627494812, "learning_rate": 9.531106146752345e-06, "loss": 0.0276, "step": 55630 }, { "epoch": 0.4501982361032446, "grad_norm": 0.2671526074409485, "learning_rate": 9.530807561276218e-06, "loss": 0.0356, "step": 55640 }, { "epoch": 0.4502791487984465, "grad_norm": 0.5260496139526367, "learning_rate": 9.530508885442698e-06, "loss": 0.0277, "step": 55650 }, { "epoch": 0.4503600614936484, "grad_norm": 0.2996594309806824, "learning_rate": 9.530210119257743e-06, "loss": 0.0307, "step": 55660 }, { "epoch": 0.4504409741888502, "grad_norm": 0.6362716555595398, "learning_rate": 9.52991126272731e-06, "loss": 0.0464, "step": 55670 }, { "epoch": 0.4505218868840521, "grad_norm": 0.39767396450042725, "learning_rate": 9.52961231585736e-06, "loss": 0.0466, "step": 55680 }, { "epoch": 0.450602799579254, "grad_norm": 0.47891607880592346, "learning_rate": 9.529313278653856e-06, "loss": 0.0378, "step": 55690 }, { "epoch": 0.45068371227445586, "grad_norm": 0.8205682039260864, "learning_rate": 9.52901415112276e-06, "loss": 0.0554, "step": 55700 }, { "epoch": 0.45076462496965775, "grad_norm": 0.733655571937561, "learning_rate": 9.528714933270038e-06, "loss": 0.0409, "step": 55710 }, { "epoch": 0.45084553766485963, "grad_norm": 0.8651804327964783, "learning_rate": 9.528415625101656e-06, "loss": 0.0346, "step": 55720 }, { "epoch": 0.4509264503600615, "grad_norm": 0.5245921015739441, "learning_rate": 9.528116226623585e-06, "loss": 0.0375, "step": 55730 }, { "epoch": 0.45100736305526334, "grad_norm": 1.3964000940322876, "learning_rate": 9.527816737841797e-06, "loss": 0.0621, "step": 55740 }, { "epoch": 0.45108827575046523, "grad_norm": 0.2136194258928299, "learning_rate": 9.52751715876226e-06, "loss": 0.0221, "step": 55750 }, { "epoch": 0.4511691884456671, "grad_norm": 0.796172022819519, "learning_rate": 9.527217489390953e-06, "loss": 0.0398, "step": 55760 }, { "epoch": 0.451250101140869, "grad_norm": 0.36344829201698303, "learning_rate": 9.52691772973385e-06, "loss": 0.051, "step": 55770 }, { "epoch": 0.4513310138360709, "grad_norm": 0.8294516205787659, "learning_rate": 9.52661787979693e-06, "loss": 0.0498, "step": 55780 }, { "epoch": 0.45141192653127277, "grad_norm": 0.1646050065755844, "learning_rate": 9.526317939586171e-06, "loss": 0.048, "step": 55790 }, { "epoch": 0.45149283922647465, "grad_norm": 0.27954283356666565, "learning_rate": 9.526017909107558e-06, "loss": 0.0215, "step": 55800 }, { "epoch": 0.45157375192167654, "grad_norm": 0.5968044400215149, "learning_rate": 9.525717788367073e-06, "loss": 0.0386, "step": 55810 }, { "epoch": 0.45165466461687837, "grad_norm": 0.37179630994796753, "learning_rate": 9.5254175773707e-06, "loss": 0.0432, "step": 55820 }, { "epoch": 0.45173557731208025, "grad_norm": 0.6800525188446045, "learning_rate": 9.525117276124426e-06, "loss": 0.0333, "step": 55830 }, { "epoch": 0.45181649000728213, "grad_norm": 0.3810155987739563, "learning_rate": 9.524816884634242e-06, "loss": 0.0546, "step": 55840 }, { "epoch": 0.451897402702484, "grad_norm": 0.5650097727775574, "learning_rate": 9.524516402906136e-06, "loss": 0.03, "step": 55850 }, { "epoch": 0.4519783153976859, "grad_norm": 0.20252425968647003, "learning_rate": 9.524215830946105e-06, "loss": 0.022, "step": 55860 }, { "epoch": 0.4520592280928878, "grad_norm": 0.6889216899871826, "learning_rate": 9.523915168760139e-06, "loss": 0.0466, "step": 55870 }, { "epoch": 0.4521401407880897, "grad_norm": 0.1420392543077469, "learning_rate": 9.523614416354234e-06, "loss": 0.0322, "step": 55880 }, { "epoch": 0.4522210534832915, "grad_norm": 0.4317114055156708, "learning_rate": 9.52331357373439e-06, "loss": 0.0493, "step": 55890 }, { "epoch": 0.4523019661784934, "grad_norm": 0.3329886794090271, "learning_rate": 9.523012640906607e-06, "loss": 0.026, "step": 55900 }, { "epoch": 0.45238287887369527, "grad_norm": 0.6253472566604614, "learning_rate": 9.522711617876884e-06, "loss": 0.0277, "step": 55910 }, { "epoch": 0.45246379156889716, "grad_norm": 0.5998090505599976, "learning_rate": 9.522410504651227e-06, "loss": 0.0281, "step": 55920 }, { "epoch": 0.45254470426409904, "grad_norm": 0.6109752655029297, "learning_rate": 9.522109301235637e-06, "loss": 0.0398, "step": 55930 }, { "epoch": 0.4526256169593009, "grad_norm": 0.3244090974330902, "learning_rate": 9.521808007636127e-06, "loss": 0.0418, "step": 55940 }, { "epoch": 0.4527065296545028, "grad_norm": 0.8011754751205444, "learning_rate": 9.5215066238587e-06, "loss": 0.0369, "step": 55950 }, { "epoch": 0.4527874423497047, "grad_norm": 0.04574224725365639, "learning_rate": 9.52120514990937e-06, "loss": 0.0301, "step": 55960 }, { "epoch": 0.4528683550449065, "grad_norm": 0.29718586802482605, "learning_rate": 9.520903585794148e-06, "loss": 0.0366, "step": 55970 }, { "epoch": 0.4529492677401084, "grad_norm": 0.7153754234313965, "learning_rate": 9.520601931519047e-06, "loss": 0.0446, "step": 55980 }, { "epoch": 0.4530301804353103, "grad_norm": 0.7061936855316162, "learning_rate": 9.520300187090084e-06, "loss": 0.0425, "step": 55990 }, { "epoch": 0.4531110931305122, "grad_norm": 0.3027753233909607, "learning_rate": 9.519998352513278e-06, "loss": 0.0258, "step": 56000 }, { "epoch": 0.45319200582571406, "grad_norm": 0.22348061203956604, "learning_rate": 9.519696427794644e-06, "loss": 0.0187, "step": 56010 }, { "epoch": 0.45327291852091595, "grad_norm": 0.36843058466911316, "learning_rate": 9.51939441294021e-06, "loss": 0.0353, "step": 56020 }, { "epoch": 0.45335383121611783, "grad_norm": 0.3261911869049072, "learning_rate": 9.519092307955992e-06, "loss": 0.0449, "step": 56030 }, { "epoch": 0.45343474391131966, "grad_norm": 0.34631451964378357, "learning_rate": 9.51879011284802e-06, "loss": 0.0444, "step": 56040 }, { "epoch": 0.45351565660652154, "grad_norm": 0.7879818677902222, "learning_rate": 9.518487827622318e-06, "loss": 0.0428, "step": 56050 }, { "epoch": 0.45359656930172343, "grad_norm": 0.32666173577308655, "learning_rate": 9.518185452284916e-06, "loss": 0.0339, "step": 56060 }, { "epoch": 0.4536774819969253, "grad_norm": 0.44930359721183777, "learning_rate": 9.517882986841844e-06, "loss": 0.0368, "step": 56070 }, { "epoch": 0.4537583946921272, "grad_norm": 0.5746687054634094, "learning_rate": 9.517580431299131e-06, "loss": 0.0393, "step": 56080 }, { "epoch": 0.4538393073873291, "grad_norm": 0.2230643481016159, "learning_rate": 9.517277785662815e-06, "loss": 0.0301, "step": 56090 }, { "epoch": 0.45392022008253097, "grad_norm": 0.4910232424736023, "learning_rate": 9.51697504993893e-06, "loss": 0.0372, "step": 56100 }, { "epoch": 0.45400113277773285, "grad_norm": 0.5187093019485474, "learning_rate": 9.516672224133513e-06, "loss": 0.023, "step": 56110 }, { "epoch": 0.4540820454729347, "grad_norm": 0.22496764361858368, "learning_rate": 9.516369308252604e-06, "loss": 0.0383, "step": 56120 }, { "epoch": 0.45416295816813657, "grad_norm": 0.3627062439918518, "learning_rate": 9.516066302302244e-06, "loss": 0.0298, "step": 56130 }, { "epoch": 0.45424387086333845, "grad_norm": 0.6771105527877808, "learning_rate": 9.515763206288476e-06, "loss": 0.0533, "step": 56140 }, { "epoch": 0.45432478355854033, "grad_norm": 0.4283849596977234, "learning_rate": 9.515460020217343e-06, "loss": 0.0334, "step": 56150 }, { "epoch": 0.4544056962537422, "grad_norm": 1.2483500242233276, "learning_rate": 9.515156744094891e-06, "loss": 0.0483, "step": 56160 }, { "epoch": 0.4544866089489441, "grad_norm": 0.48396867513656616, "learning_rate": 9.51485337792717e-06, "loss": 0.0201, "step": 56170 }, { "epoch": 0.454567521644146, "grad_norm": 0.4627784788608551, "learning_rate": 9.514549921720232e-06, "loss": 0.0357, "step": 56180 }, { "epoch": 0.4546484343393478, "grad_norm": 0.4304256737232208, "learning_rate": 9.514246375480126e-06, "loss": 0.0303, "step": 56190 }, { "epoch": 0.4547293470345497, "grad_norm": 0.5495833158493042, "learning_rate": 9.513942739212904e-06, "loss": 0.0358, "step": 56200 }, { "epoch": 0.4548102597297516, "grad_norm": 0.5211432576179504, "learning_rate": 9.513639012924625e-06, "loss": 0.0374, "step": 56210 }, { "epoch": 0.45489117242495347, "grad_norm": 0.49287185072898865, "learning_rate": 9.513335196621345e-06, "loss": 0.0475, "step": 56220 }, { "epoch": 0.45497208512015536, "grad_norm": 0.7885295152664185, "learning_rate": 9.513031290309121e-06, "loss": 0.0521, "step": 56230 }, { "epoch": 0.45505299781535724, "grad_norm": 0.42084017395973206, "learning_rate": 9.512727293994018e-06, "loss": 0.0519, "step": 56240 }, { "epoch": 0.4551339105105591, "grad_norm": 0.6621198654174805, "learning_rate": 9.512423207682093e-06, "loss": 0.0349, "step": 56250 }, { "epoch": 0.455214823205761, "grad_norm": 0.6407983899116516, "learning_rate": 9.512119031379415e-06, "loss": 0.0501, "step": 56260 }, { "epoch": 0.45529573590096284, "grad_norm": 0.6585644483566284, "learning_rate": 9.511814765092046e-06, "loss": 0.0383, "step": 56270 }, { "epoch": 0.4553766485961647, "grad_norm": 0.27096548676490784, "learning_rate": 9.511510408826059e-06, "loss": 0.0324, "step": 56280 }, { "epoch": 0.4554575612913666, "grad_norm": 0.5956127643585205, "learning_rate": 9.51120596258752e-06, "loss": 0.0259, "step": 56290 }, { "epoch": 0.4555384739865685, "grad_norm": 0.6614484190940857, "learning_rate": 9.510901426382501e-06, "loss": 0.0457, "step": 56300 }, { "epoch": 0.4556193866817704, "grad_norm": 0.05322945490479469, "learning_rate": 9.510596800217078e-06, "loss": 0.0207, "step": 56310 }, { "epoch": 0.45570029937697226, "grad_norm": 0.5065823197364807, "learning_rate": 9.510292084097323e-06, "loss": 0.0417, "step": 56320 }, { "epoch": 0.45578121207217415, "grad_norm": 0.564607560634613, "learning_rate": 9.509987278029313e-06, "loss": 0.0283, "step": 56330 }, { "epoch": 0.455862124767376, "grad_norm": 1.1159054040908813, "learning_rate": 9.509682382019129e-06, "loss": 0.0426, "step": 56340 }, { "epoch": 0.45594303746257786, "grad_norm": 0.9865360260009766, "learning_rate": 9.509377396072851e-06, "loss": 0.0463, "step": 56350 }, { "epoch": 0.45602395015777974, "grad_norm": 0.21157313883304596, "learning_rate": 9.509072320196559e-06, "loss": 0.0273, "step": 56360 }, { "epoch": 0.45610486285298163, "grad_norm": 0.2873225808143616, "learning_rate": 9.50876715439634e-06, "loss": 0.0279, "step": 56370 }, { "epoch": 0.4561857755481835, "grad_norm": 0.5251839756965637, "learning_rate": 9.508461898678278e-06, "loss": 0.0255, "step": 56380 }, { "epoch": 0.4562666882433854, "grad_norm": 0.32776156067848206, "learning_rate": 9.508156553048461e-06, "loss": 0.049, "step": 56390 }, { "epoch": 0.4563476009385873, "grad_norm": 0.22647085785865784, "learning_rate": 9.507851117512979e-06, "loss": 0.0389, "step": 56400 }, { "epoch": 0.45642851363378917, "grad_norm": 0.1979873925447464, "learning_rate": 9.507545592077922e-06, "loss": 0.0372, "step": 56410 }, { "epoch": 0.456509426328991, "grad_norm": 0.669107973575592, "learning_rate": 9.507239976749385e-06, "loss": 0.0371, "step": 56420 }, { "epoch": 0.4565903390241929, "grad_norm": 0.4497936964035034, "learning_rate": 9.50693427153346e-06, "loss": 0.0233, "step": 56430 }, { "epoch": 0.45667125171939477, "grad_norm": 1.1245853900909424, "learning_rate": 9.506628476436248e-06, "loss": 0.0476, "step": 56440 }, { "epoch": 0.45675216441459665, "grad_norm": 0.3334072530269623, "learning_rate": 9.506322591463843e-06, "loss": 0.0333, "step": 56450 }, { "epoch": 0.45683307710979854, "grad_norm": 0.7106519937515259, "learning_rate": 9.50601661662235e-06, "loss": 0.0335, "step": 56460 }, { "epoch": 0.4569139898050004, "grad_norm": 0.8450183868408203, "learning_rate": 9.505710551917866e-06, "loss": 0.0464, "step": 56470 }, { "epoch": 0.4569949025002023, "grad_norm": 0.6373493075370789, "learning_rate": 9.505404397356496e-06, "loss": 0.0273, "step": 56480 }, { "epoch": 0.45707581519540413, "grad_norm": 0.8354277014732361, "learning_rate": 9.50509815294435e-06, "loss": 0.0267, "step": 56490 }, { "epoch": 0.457156727890606, "grad_norm": 0.45514583587646484, "learning_rate": 9.504791818687528e-06, "loss": 0.04, "step": 56500 }, { "epoch": 0.4572376405858079, "grad_norm": 0.5416438579559326, "learning_rate": 9.504485394592146e-06, "loss": 0.042, "step": 56510 }, { "epoch": 0.4573185532810098, "grad_norm": 0.9291830658912659, "learning_rate": 9.504178880664312e-06, "loss": 0.0286, "step": 56520 }, { "epoch": 0.45739946597621167, "grad_norm": 0.4908997714519501, "learning_rate": 9.503872276910138e-06, "loss": 0.0469, "step": 56530 }, { "epoch": 0.45748037867141356, "grad_norm": 0.3956649601459503, "learning_rate": 9.50356558333574e-06, "loss": 0.0344, "step": 56540 }, { "epoch": 0.45756129136661544, "grad_norm": 0.40818071365356445, "learning_rate": 9.503258799947236e-06, "loss": 0.0601, "step": 56550 }, { "epoch": 0.4576422040618173, "grad_norm": 0.4460901618003845, "learning_rate": 9.502951926750738e-06, "loss": 0.0504, "step": 56560 }, { "epoch": 0.45772311675701916, "grad_norm": 0.18656232953071594, "learning_rate": 9.502644963752373e-06, "loss": 0.0285, "step": 56570 }, { "epoch": 0.45780402945222104, "grad_norm": 0.21198615431785583, "learning_rate": 9.502337910958256e-06, "loss": 0.0439, "step": 56580 }, { "epoch": 0.4578849421474229, "grad_norm": 0.770915687084198, "learning_rate": 9.502030768374519e-06, "loss": 0.0401, "step": 56590 }, { "epoch": 0.4579658548426248, "grad_norm": 0.41445496678352356, "learning_rate": 9.50172353600728e-06, "loss": 0.0456, "step": 56600 }, { "epoch": 0.4580467675378267, "grad_norm": 0.42628049850463867, "learning_rate": 9.501416213862669e-06, "loss": 0.0407, "step": 56610 }, { "epoch": 0.4581276802330286, "grad_norm": 0.8280230760574341, "learning_rate": 9.501108801946815e-06, "loss": 0.0387, "step": 56620 }, { "epoch": 0.45820859292823046, "grad_norm": 0.5245445370674133, "learning_rate": 9.500801300265848e-06, "loss": 0.0419, "step": 56630 }, { "epoch": 0.4582895056234323, "grad_norm": 0.46483108401298523, "learning_rate": 9.500493708825899e-06, "loss": 0.044, "step": 56640 }, { "epoch": 0.4583704183186342, "grad_norm": 0.5444225072860718, "learning_rate": 9.500186027633104e-06, "loss": 0.0532, "step": 56650 }, { "epoch": 0.45845133101383606, "grad_norm": 0.7050545811653137, "learning_rate": 9.499878256693602e-06, "loss": 0.0335, "step": 56660 }, { "epoch": 0.45853224370903795, "grad_norm": 0.4163082242012024, "learning_rate": 9.499570396013527e-06, "loss": 0.0328, "step": 56670 }, { "epoch": 0.45861315640423983, "grad_norm": 0.5439493656158447, "learning_rate": 9.499262445599017e-06, "loss": 0.0521, "step": 56680 }, { "epoch": 0.4586940690994417, "grad_norm": 0.6495444774627686, "learning_rate": 9.498954405456218e-06, "loss": 0.0368, "step": 56690 }, { "epoch": 0.4587749817946436, "grad_norm": 0.43584346771240234, "learning_rate": 9.49864627559127e-06, "loss": 0.031, "step": 56700 }, { "epoch": 0.4588558944898455, "grad_norm": 0.4968317747116089, "learning_rate": 9.49833805601032e-06, "loss": 0.038, "step": 56710 }, { "epoch": 0.4589368071850473, "grad_norm": 0.41666609048843384, "learning_rate": 9.498029746719513e-06, "loss": 0.0373, "step": 56720 }, { "epoch": 0.4590177198802492, "grad_norm": 1.0394734144210815, "learning_rate": 9.497721347725e-06, "loss": 0.0306, "step": 56730 }, { "epoch": 0.4590986325754511, "grad_norm": 0.9523953795433044, "learning_rate": 9.497412859032928e-06, "loss": 0.0437, "step": 56740 }, { "epoch": 0.45917954527065297, "grad_norm": 0.3485025465488434, "learning_rate": 9.497104280649453e-06, "loss": 0.0387, "step": 56750 }, { "epoch": 0.45926045796585485, "grad_norm": 0.4942527413368225, "learning_rate": 9.496795612580724e-06, "loss": 0.0449, "step": 56760 }, { "epoch": 0.45934137066105674, "grad_norm": 0.7210976481437683, "learning_rate": 9.496486854832902e-06, "loss": 0.0355, "step": 56770 }, { "epoch": 0.4594222833562586, "grad_norm": 0.4210220277309418, "learning_rate": 9.496178007412142e-06, "loss": 0.028, "step": 56780 }, { "epoch": 0.45950319605146045, "grad_norm": 0.5001344680786133, "learning_rate": 9.495869070324604e-06, "loss": 0.0354, "step": 56790 }, { "epoch": 0.45958410874666233, "grad_norm": 0.4022134244441986, "learning_rate": 9.495560043576447e-06, "loss": 0.0424, "step": 56800 }, { "epoch": 0.4596650214418642, "grad_norm": 0.04656987264752388, "learning_rate": 9.495250927173837e-06, "loss": 0.0349, "step": 56810 }, { "epoch": 0.4597459341370661, "grad_norm": 0.3421633541584015, "learning_rate": 9.494941721122936e-06, "loss": 0.0337, "step": 56820 }, { "epoch": 0.459826846832268, "grad_norm": 0.38848966360092163, "learning_rate": 9.494632425429912e-06, "loss": 0.0316, "step": 56830 }, { "epoch": 0.4599077595274699, "grad_norm": 0.4050036668777466, "learning_rate": 9.494323040100933e-06, "loss": 0.0533, "step": 56840 }, { "epoch": 0.45998867222267176, "grad_norm": 0.8357028365135193, "learning_rate": 9.494013565142168e-06, "loss": 0.0446, "step": 56850 }, { "epoch": 0.46006958491787364, "grad_norm": 1.0310863256454468, "learning_rate": 9.493704000559792e-06, "loss": 0.0452, "step": 56860 }, { "epoch": 0.46015049761307547, "grad_norm": 0.24362793564796448, "learning_rate": 9.493394346359975e-06, "loss": 0.0302, "step": 56870 }, { "epoch": 0.46023141030827736, "grad_norm": 0.5935588479042053, "learning_rate": 9.493084602548891e-06, "loss": 0.0353, "step": 56880 }, { "epoch": 0.46031232300347924, "grad_norm": 0.39005565643310547, "learning_rate": 9.492774769132724e-06, "loss": 0.031, "step": 56890 }, { "epoch": 0.4603932356986811, "grad_norm": 0.8172906041145325, "learning_rate": 9.492464846117648e-06, "loss": 0.0386, "step": 56900 }, { "epoch": 0.460474148393883, "grad_norm": 0.3171192407608032, "learning_rate": 9.492154833509844e-06, "loss": 0.0405, "step": 56910 }, { "epoch": 0.4605550610890849, "grad_norm": 0.4830070734024048, "learning_rate": 9.491844731315494e-06, "loss": 0.0272, "step": 56920 }, { "epoch": 0.4606359737842868, "grad_norm": 0.6035889983177185, "learning_rate": 9.491534539540785e-06, "loss": 0.0243, "step": 56930 }, { "epoch": 0.4607168864794886, "grad_norm": 0.2895008623600006, "learning_rate": 9.4912242581919e-06, "loss": 0.0311, "step": 56940 }, { "epoch": 0.4607977991746905, "grad_norm": 0.5430229902267456, "learning_rate": 9.49091388727503e-06, "loss": 0.049, "step": 56950 }, { "epoch": 0.4608787118698924, "grad_norm": 0.5446537733078003, "learning_rate": 9.490603426796365e-06, "loss": 0.0377, "step": 56960 }, { "epoch": 0.46095962456509426, "grad_norm": 0.3604663908481598, "learning_rate": 9.490292876762092e-06, "loss": 0.0468, "step": 56970 }, { "epoch": 0.46104053726029615, "grad_norm": 0.46433225274086, "learning_rate": 9.489982237178408e-06, "loss": 0.0309, "step": 56980 }, { "epoch": 0.46112144995549803, "grad_norm": 0.7654917240142822, "learning_rate": 9.489671508051508e-06, "loss": 0.0436, "step": 56990 }, { "epoch": 0.4612023626506999, "grad_norm": 0.30845320224761963, "learning_rate": 9.489360689387586e-06, "loss": 0.0351, "step": 57000 }, { "epoch": 0.46128327534590174, "grad_norm": 0.5152519941329956, "learning_rate": 9.489049781192843e-06, "loss": 0.0474, "step": 57010 }, { "epoch": 0.46136418804110363, "grad_norm": 0.6149747967720032, "learning_rate": 9.48873878347348e-06, "loss": 0.0342, "step": 57020 }, { "epoch": 0.4614451007363055, "grad_norm": 0.30484551191329956, "learning_rate": 9.488427696235698e-06, "loss": 0.0337, "step": 57030 }, { "epoch": 0.4615260134315074, "grad_norm": 0.5327027440071106, "learning_rate": 9.4881165194857e-06, "loss": 0.0415, "step": 57040 }, { "epoch": 0.4616069261267093, "grad_norm": 0.6553550362586975, "learning_rate": 9.487805253229695e-06, "loss": 0.03, "step": 57050 }, { "epoch": 0.46168783882191117, "grad_norm": 0.3786582052707672, "learning_rate": 9.487493897473886e-06, "loss": 0.0427, "step": 57060 }, { "epoch": 0.46176875151711305, "grad_norm": 0.45726919174194336, "learning_rate": 9.487182452224486e-06, "loss": 0.0413, "step": 57070 }, { "epoch": 0.46184966421231494, "grad_norm": 0.3383854329586029, "learning_rate": 9.486870917487703e-06, "loss": 0.0345, "step": 57080 }, { "epoch": 0.46193057690751677, "grad_norm": 0.80070561170578, "learning_rate": 9.486559293269753e-06, "loss": 0.0296, "step": 57090 }, { "epoch": 0.46201148960271865, "grad_norm": 0.7801676988601685, "learning_rate": 9.48624757957685e-06, "loss": 0.0283, "step": 57100 }, { "epoch": 0.46209240229792053, "grad_norm": 0.7910252809524536, "learning_rate": 9.48593577641521e-06, "loss": 0.035, "step": 57110 }, { "epoch": 0.4621733149931224, "grad_norm": 0.33542683720588684, "learning_rate": 9.48562388379105e-06, "loss": 0.0342, "step": 57120 }, { "epoch": 0.4622542276883243, "grad_norm": 0.439656525850296, "learning_rate": 9.485311901710592e-06, "loss": 0.0387, "step": 57130 }, { "epoch": 0.4623351403835262, "grad_norm": 0.45757052302360535, "learning_rate": 9.484999830180057e-06, "loss": 0.0298, "step": 57140 }, { "epoch": 0.4624160530787281, "grad_norm": 0.2869158089160919, "learning_rate": 9.484687669205668e-06, "loss": 0.0395, "step": 57150 }, { "epoch": 0.4624969657739299, "grad_norm": 1.0146325826644897, "learning_rate": 9.484375418793652e-06, "loss": 0.0302, "step": 57160 }, { "epoch": 0.4625778784691318, "grad_norm": 0.7524294257164001, "learning_rate": 9.484063078950233e-06, "loss": 0.0391, "step": 57170 }, { "epoch": 0.46265879116433367, "grad_norm": 0.5350856781005859, "learning_rate": 9.483750649681645e-06, "loss": 0.0373, "step": 57180 }, { "epoch": 0.46273970385953556, "grad_norm": 1.073093056678772, "learning_rate": 9.483438130994117e-06, "loss": 0.0419, "step": 57190 }, { "epoch": 0.46282061655473744, "grad_norm": 0.5683488845825195, "learning_rate": 9.483125522893876e-06, "loss": 0.0389, "step": 57200 }, { "epoch": 0.4629015292499393, "grad_norm": 0.11690964549779892, "learning_rate": 9.482812825387164e-06, "loss": 0.0311, "step": 57210 }, { "epoch": 0.4629824419451412, "grad_norm": 0.6621468663215637, "learning_rate": 9.482500038480213e-06, "loss": 0.0269, "step": 57220 }, { "epoch": 0.4630633546403431, "grad_norm": 0.2567393183708191, "learning_rate": 9.482187162179262e-06, "loss": 0.0288, "step": 57230 }, { "epoch": 0.4631442673355449, "grad_norm": 0.4743919372558594, "learning_rate": 9.48187419649055e-06, "loss": 0.0319, "step": 57240 }, { "epoch": 0.4632251800307468, "grad_norm": 0.441903680562973, "learning_rate": 9.481561141420318e-06, "loss": 0.0399, "step": 57250 }, { "epoch": 0.4633060927259487, "grad_norm": 0.5596303343772888, "learning_rate": 9.481247996974811e-06, "loss": 0.0373, "step": 57260 }, { "epoch": 0.4633870054211506, "grad_norm": 0.7165595293045044, "learning_rate": 9.480934763160272e-06, "loss": 0.0288, "step": 57270 }, { "epoch": 0.46346791811635246, "grad_norm": 0.6069701313972473, "learning_rate": 9.480621439982951e-06, "loss": 0.0373, "step": 57280 }, { "epoch": 0.46354883081155435, "grad_norm": 0.5756797790527344, "learning_rate": 9.480308027449092e-06, "loss": 0.0317, "step": 57290 }, { "epoch": 0.46362974350675623, "grad_norm": 0.4303632080554962, "learning_rate": 9.47999452556495e-06, "loss": 0.0281, "step": 57300 }, { "epoch": 0.46371065620195806, "grad_norm": 0.5446992516517639, "learning_rate": 9.479680934336772e-06, "loss": 0.0559, "step": 57310 }, { "epoch": 0.46379156889715994, "grad_norm": 0.5218984484672546, "learning_rate": 9.479367253770818e-06, "loss": 0.032, "step": 57320 }, { "epoch": 0.46387248159236183, "grad_norm": 0.4029993414878845, "learning_rate": 9.479053483873338e-06, "loss": 0.028, "step": 57330 }, { "epoch": 0.4639533942875637, "grad_norm": 0.5868873000144958, "learning_rate": 9.478739624650593e-06, "loss": 0.029, "step": 57340 }, { "epoch": 0.4640343069827656, "grad_norm": 0.4301644265651703, "learning_rate": 9.478425676108841e-06, "loss": 0.0364, "step": 57350 }, { "epoch": 0.4641152196779675, "grad_norm": 0.3096626102924347, "learning_rate": 9.478111638254344e-06, "loss": 0.0351, "step": 57360 }, { "epoch": 0.46419613237316937, "grad_norm": 0.544918954372406, "learning_rate": 9.477797511093363e-06, "loss": 0.0285, "step": 57370 }, { "epoch": 0.46427704506837125, "grad_norm": 0.5133827924728394, "learning_rate": 9.477483294632164e-06, "loss": 0.0353, "step": 57380 }, { "epoch": 0.4643579577635731, "grad_norm": 0.4080905318260193, "learning_rate": 9.477168988877014e-06, "loss": 0.041, "step": 57390 }, { "epoch": 0.46443887045877497, "grad_norm": 0.4729120433330536, "learning_rate": 9.476854593834179e-06, "loss": 0.0509, "step": 57400 }, { "epoch": 0.46451978315397685, "grad_norm": 0.36473289132118225, "learning_rate": 9.476540109509931e-06, "loss": 0.0242, "step": 57410 }, { "epoch": 0.46460069584917874, "grad_norm": 0.3060567080974579, "learning_rate": 9.47622553591054e-06, "loss": 0.0449, "step": 57420 }, { "epoch": 0.4646816085443806, "grad_norm": 0.2878747582435608, "learning_rate": 9.475910873042282e-06, "loss": 0.0426, "step": 57430 }, { "epoch": 0.4647625212395825, "grad_norm": 0.30950453877449036, "learning_rate": 9.47559612091143e-06, "loss": 0.0321, "step": 57440 }, { "epoch": 0.4648434339347844, "grad_norm": 0.4170245826244354, "learning_rate": 9.47528127952426e-06, "loss": 0.027, "step": 57450 }, { "epoch": 0.4649243466299862, "grad_norm": 0.3027057945728302, "learning_rate": 9.474966348887055e-06, "loss": 0.0331, "step": 57460 }, { "epoch": 0.4650052593251881, "grad_norm": 0.5474145412445068, "learning_rate": 9.474651329006093e-06, "loss": 0.0412, "step": 57470 }, { "epoch": 0.46508617202039, "grad_norm": 0.5580229163169861, "learning_rate": 9.474336219887654e-06, "loss": 0.039, "step": 57480 }, { "epoch": 0.46516708471559187, "grad_norm": 0.46086445450782776, "learning_rate": 9.474021021538029e-06, "loss": 0.054, "step": 57490 }, { "epoch": 0.46524799741079376, "grad_norm": 0.2661653161048889, "learning_rate": 9.473705733963496e-06, "loss": 0.0343, "step": 57500 }, { "epoch": 0.46532891010599564, "grad_norm": 0.47410672903060913, "learning_rate": 9.473390357170349e-06, "loss": 0.0442, "step": 57510 }, { "epoch": 0.4654098228011975, "grad_norm": 0.8367788195610046, "learning_rate": 9.473074891164875e-06, "loss": 0.0464, "step": 57520 }, { "epoch": 0.4654907354963994, "grad_norm": 0.6340507864952087, "learning_rate": 9.472759335953363e-06, "loss": 0.0432, "step": 57530 }, { "epoch": 0.46557164819160124, "grad_norm": 0.5155782103538513, "learning_rate": 9.47244369154211e-06, "loss": 0.0184, "step": 57540 }, { "epoch": 0.4656525608868031, "grad_norm": 0.2532764971256256, "learning_rate": 9.47212795793741e-06, "loss": 0.0308, "step": 57550 }, { "epoch": 0.465733473582005, "grad_norm": 0.5807511210441589, "learning_rate": 9.471812135145558e-06, "loss": 0.036, "step": 57560 }, { "epoch": 0.4658143862772069, "grad_norm": 0.3153507709503174, "learning_rate": 9.471496223172854e-06, "loss": 0.0364, "step": 57570 }, { "epoch": 0.4658952989724088, "grad_norm": 0.38557085394859314, "learning_rate": 9.471180222025598e-06, "loss": 0.0532, "step": 57580 }, { "epoch": 0.46597621166761066, "grad_norm": 0.5723865628242493, "learning_rate": 9.47086413171009e-06, "loss": 0.0292, "step": 57590 }, { "epoch": 0.46605712436281255, "grad_norm": 0.679690420627594, "learning_rate": 9.470547952232636e-06, "loss": 0.0368, "step": 57600 }, { "epoch": 0.4661380370580144, "grad_norm": 0.7091580629348755, "learning_rate": 9.47023168359954e-06, "loss": 0.0501, "step": 57610 }, { "epoch": 0.46621894975321626, "grad_norm": 0.08005514740943909, "learning_rate": 9.469915325817113e-06, "loss": 0.0389, "step": 57620 }, { "epoch": 0.46629986244841815, "grad_norm": 0.6400285363197327, "learning_rate": 9.46959887889166e-06, "loss": 0.0491, "step": 57630 }, { "epoch": 0.46638077514362003, "grad_norm": 0.7125436663627625, "learning_rate": 9.46928234282949e-06, "loss": 0.0372, "step": 57640 }, { "epoch": 0.4664616878388219, "grad_norm": 0.35611629486083984, "learning_rate": 9.468965717636923e-06, "loss": 0.0341, "step": 57650 }, { "epoch": 0.4665426005340238, "grad_norm": 1.046805739402771, "learning_rate": 9.468649003320268e-06, "loss": 0.0368, "step": 57660 }, { "epoch": 0.4666235132292257, "grad_norm": 0.4786095917224884, "learning_rate": 9.468332199885842e-06, "loss": 0.0348, "step": 57670 }, { "epoch": 0.46670442592442757, "grad_norm": 0.4137709438800812, "learning_rate": 9.468015307339961e-06, "loss": 0.043, "step": 57680 }, { "epoch": 0.4667853386196294, "grad_norm": 0.3634738028049469, "learning_rate": 9.46769832568895e-06, "loss": 0.0331, "step": 57690 }, { "epoch": 0.4668662513148313, "grad_norm": 0.7163322567939758, "learning_rate": 9.467381254939128e-06, "loss": 0.0335, "step": 57700 }, { "epoch": 0.46694716401003317, "grad_norm": 0.24111416935920715, "learning_rate": 9.467064095096817e-06, "loss": 0.0401, "step": 57710 }, { "epoch": 0.46702807670523505, "grad_norm": 0.43231552839279175, "learning_rate": 9.466746846168341e-06, "loss": 0.0309, "step": 57720 }, { "epoch": 0.46710898940043694, "grad_norm": 0.4262543320655823, "learning_rate": 9.466429508160031e-06, "loss": 0.0418, "step": 57730 }, { "epoch": 0.4671899020956388, "grad_norm": 0.49228495359420776, "learning_rate": 9.466112081078214e-06, "loss": 0.03, "step": 57740 }, { "epoch": 0.4672708147908407, "grad_norm": 0.5738756060600281, "learning_rate": 9.465794564929217e-06, "loss": 0.0347, "step": 57750 }, { "epoch": 0.46735172748604253, "grad_norm": 0.5439956188201904, "learning_rate": 9.465476959719379e-06, "loss": 0.0339, "step": 57760 }, { "epoch": 0.4674326401812444, "grad_norm": 0.5937835574150085, "learning_rate": 9.465159265455028e-06, "loss": 0.063, "step": 57770 }, { "epoch": 0.4675135528764463, "grad_norm": 0.6628826260566711, "learning_rate": 9.464841482142502e-06, "loss": 0.038, "step": 57780 }, { "epoch": 0.4675944655716482, "grad_norm": 0.26939764618873596, "learning_rate": 9.464523609788138e-06, "loss": 0.036, "step": 57790 }, { "epoch": 0.4676753782668501, "grad_norm": 0.4456506669521332, "learning_rate": 9.464205648398277e-06, "loss": 0.0346, "step": 57800 }, { "epoch": 0.46775629096205196, "grad_norm": 0.7360861897468567, "learning_rate": 9.463887597979257e-06, "loss": 0.0265, "step": 57810 }, { "epoch": 0.46783720365725384, "grad_norm": 0.4544088840484619, "learning_rate": 9.463569458537422e-06, "loss": 0.0322, "step": 57820 }, { "epoch": 0.4679181163524557, "grad_norm": 0.45015645027160645, "learning_rate": 9.46325123007912e-06, "loss": 0.0389, "step": 57830 }, { "epoch": 0.46799902904765756, "grad_norm": 0.27550673484802246, "learning_rate": 9.462932912610693e-06, "loss": 0.0246, "step": 57840 }, { "epoch": 0.46807994174285944, "grad_norm": 0.3803560435771942, "learning_rate": 9.46261450613849e-06, "loss": 0.04, "step": 57850 }, { "epoch": 0.4681608544380613, "grad_norm": 0.43797147274017334, "learning_rate": 9.462296010668863e-06, "loss": 0.0445, "step": 57860 }, { "epoch": 0.4682417671332632, "grad_norm": 0.36325275897979736, "learning_rate": 9.461977426208162e-06, "loss": 0.0216, "step": 57870 }, { "epoch": 0.4683226798284651, "grad_norm": 0.6660861968994141, "learning_rate": 9.461658752762742e-06, "loss": 0.0294, "step": 57880 }, { "epoch": 0.468403592523667, "grad_norm": 0.17640964686870575, "learning_rate": 9.461339990338955e-06, "loss": 0.0312, "step": 57890 }, { "epoch": 0.46848450521886886, "grad_norm": 0.7513374090194702, "learning_rate": 9.46102113894316e-06, "loss": 0.0347, "step": 57900 }, { "epoch": 0.4685654179140707, "grad_norm": 0.46235591173171997, "learning_rate": 9.460702198581717e-06, "loss": 0.0424, "step": 57910 }, { "epoch": 0.4686463306092726, "grad_norm": 0.5078660249710083, "learning_rate": 9.460383169260986e-06, "loss": 0.0348, "step": 57920 }, { "epoch": 0.46872724330447446, "grad_norm": 0.6130375266075134, "learning_rate": 9.460064050987329e-06, "loss": 0.0321, "step": 57930 }, { "epoch": 0.46880815599967635, "grad_norm": 0.4205833375453949, "learning_rate": 9.459744843767109e-06, "loss": 0.0371, "step": 57940 }, { "epoch": 0.46888906869487823, "grad_norm": 0.1916763335466385, "learning_rate": 9.459425547606695e-06, "loss": 0.0501, "step": 57950 }, { "epoch": 0.4689699813900801, "grad_norm": 0.5029444694519043, "learning_rate": 9.45910616251245e-06, "loss": 0.0376, "step": 57960 }, { "epoch": 0.469050894085282, "grad_norm": 0.6269605755805969, "learning_rate": 9.458786688490748e-06, "loss": 0.0262, "step": 57970 }, { "epoch": 0.4691318067804839, "grad_norm": 0.4359505772590637, "learning_rate": 9.45846712554796e-06, "loss": 0.0343, "step": 57980 }, { "epoch": 0.4692127194756857, "grad_norm": 0.3436647057533264, "learning_rate": 9.458147473690454e-06, "loss": 0.0389, "step": 57990 }, { "epoch": 0.4692936321708876, "grad_norm": 0.535405158996582, "learning_rate": 9.45782773292461e-06, "loss": 0.0298, "step": 58000 }, { "epoch": 0.4693745448660895, "grad_norm": 0.5590020418167114, "learning_rate": 9.457507903256804e-06, "loss": 0.0518, "step": 58010 }, { "epoch": 0.46945545756129137, "grad_norm": 0.2716555893421173, "learning_rate": 9.45718798469341e-06, "loss": 0.0236, "step": 58020 }, { "epoch": 0.46953637025649325, "grad_norm": 0.3771016001701355, "learning_rate": 9.456867977240814e-06, "loss": 0.0309, "step": 58030 }, { "epoch": 0.46961728295169514, "grad_norm": 0.575278639793396, "learning_rate": 9.456547880905394e-06, "loss": 0.0517, "step": 58040 }, { "epoch": 0.469698195646897, "grad_norm": 0.47949111461639404, "learning_rate": 9.456227695693533e-06, "loss": 0.0347, "step": 58050 }, { "epoch": 0.46977910834209885, "grad_norm": 0.5302661657333374, "learning_rate": 9.45590742161162e-06, "loss": 0.0445, "step": 58060 }, { "epoch": 0.46986002103730073, "grad_norm": 0.3249877989292145, "learning_rate": 9.45558705866604e-06, "loss": 0.0369, "step": 58070 }, { "epoch": 0.4699409337325026, "grad_norm": 0.18513086438179016, "learning_rate": 9.455266606863183e-06, "loss": 0.0245, "step": 58080 }, { "epoch": 0.4700218464277045, "grad_norm": 0.6480690240859985, "learning_rate": 9.454946066209437e-06, "loss": 0.0414, "step": 58090 }, { "epoch": 0.4701027591229064, "grad_norm": 0.39576810598373413, "learning_rate": 9.454625436711198e-06, "loss": 0.0288, "step": 58100 }, { "epoch": 0.4701836718181083, "grad_norm": 0.4635423719882965, "learning_rate": 9.454304718374858e-06, "loss": 0.0423, "step": 58110 }, { "epoch": 0.47026458451331016, "grad_norm": 0.1921568065881729, "learning_rate": 9.453983911206816e-06, "loss": 0.0388, "step": 58120 }, { "epoch": 0.47034549720851204, "grad_norm": 0.7011599540710449, "learning_rate": 9.453663015213465e-06, "loss": 0.038, "step": 58130 }, { "epoch": 0.47042640990371387, "grad_norm": 1.1694728136062622, "learning_rate": 9.45334203040121e-06, "loss": 0.0338, "step": 58140 }, { "epoch": 0.47050732259891576, "grad_norm": 0.6719799041748047, "learning_rate": 9.453020956776446e-06, "loss": 0.0315, "step": 58150 }, { "epoch": 0.47058823529411764, "grad_norm": 0.11952268332242966, "learning_rate": 9.452699794345583e-06, "loss": 0.0239, "step": 58160 }, { "epoch": 0.4706691479893195, "grad_norm": 0.7799442410469055, "learning_rate": 9.452378543115021e-06, "loss": 0.0377, "step": 58170 }, { "epoch": 0.4707500606845214, "grad_norm": 0.40075555443763733, "learning_rate": 9.452057203091168e-06, "loss": 0.0375, "step": 58180 }, { "epoch": 0.4708309733797233, "grad_norm": 0.26861345767974854, "learning_rate": 9.451735774280434e-06, "loss": 0.0412, "step": 58190 }, { "epoch": 0.4709118860749252, "grad_norm": 0.6297581791877747, "learning_rate": 9.451414256689227e-06, "loss": 0.0426, "step": 58200 }, { "epoch": 0.470992798770127, "grad_norm": 1.0297921895980835, "learning_rate": 9.451092650323962e-06, "loss": 0.0263, "step": 58210 }, { "epoch": 0.4710737114653289, "grad_norm": 0.4089415371417999, "learning_rate": 9.450770955191049e-06, "loss": 0.0423, "step": 58220 }, { "epoch": 0.4711546241605308, "grad_norm": 0.536167562007904, "learning_rate": 9.450449171296905e-06, "loss": 0.0356, "step": 58230 }, { "epoch": 0.47123553685573266, "grad_norm": 0.4252195656299591, "learning_rate": 9.450127298647949e-06, "loss": 0.0455, "step": 58240 }, { "epoch": 0.47131644955093455, "grad_norm": 0.6452528238296509, "learning_rate": 9.449805337250597e-06, "loss": 0.0296, "step": 58250 }, { "epoch": 0.47139736224613643, "grad_norm": 0.5192789435386658, "learning_rate": 9.449483287111272e-06, "loss": 0.0418, "step": 58260 }, { "epoch": 0.4714782749413383, "grad_norm": 0.6425773501396179, "learning_rate": 9.449161148236396e-06, "loss": 0.0484, "step": 58270 }, { "epoch": 0.4715591876365402, "grad_norm": 0.38880959153175354, "learning_rate": 9.448838920632395e-06, "loss": 0.0405, "step": 58280 }, { "epoch": 0.47164010033174203, "grad_norm": 0.43693849444389343, "learning_rate": 9.448516604305693e-06, "loss": 0.0238, "step": 58290 }, { "epoch": 0.4717210130269439, "grad_norm": 0.6878888010978699, "learning_rate": 9.448194199262718e-06, "loss": 0.0372, "step": 58300 }, { "epoch": 0.4718019257221458, "grad_norm": 0.42430800199508667, "learning_rate": 9.4478717055099e-06, "loss": 0.0316, "step": 58310 }, { "epoch": 0.4718828384173477, "grad_norm": 0.49629050493240356, "learning_rate": 9.447549123053672e-06, "loss": 0.0341, "step": 58320 }, { "epoch": 0.47196375111254957, "grad_norm": 0.3730780780315399, "learning_rate": 9.447226451900464e-06, "loss": 0.0715, "step": 58330 }, { "epoch": 0.47204466380775145, "grad_norm": 0.4124114215373993, "learning_rate": 9.446903692056715e-06, "loss": 0.0313, "step": 58340 }, { "epoch": 0.47212557650295334, "grad_norm": 0.3047392666339874, "learning_rate": 9.446580843528859e-06, "loss": 0.0339, "step": 58350 }, { "epoch": 0.47220648919815517, "grad_norm": 0.38415253162384033, "learning_rate": 9.446257906323335e-06, "loss": 0.041, "step": 58360 }, { "epoch": 0.47228740189335705, "grad_norm": 0.6188899874687195, "learning_rate": 9.445934880446583e-06, "loss": 0.0269, "step": 58370 }, { "epoch": 0.47236831458855894, "grad_norm": 0.522246241569519, "learning_rate": 9.445611765905045e-06, "loss": 0.0399, "step": 58380 }, { "epoch": 0.4724492272837608, "grad_norm": 0.42762166261672974, "learning_rate": 9.445288562705167e-06, "loss": 0.0423, "step": 58390 }, { "epoch": 0.4725301399789627, "grad_norm": 0.8584988117218018, "learning_rate": 9.444965270853392e-06, "loss": 0.0426, "step": 58400 }, { "epoch": 0.4726110526741646, "grad_norm": 0.3827521800994873, "learning_rate": 9.44464189035617e-06, "loss": 0.0401, "step": 58410 }, { "epoch": 0.4726919653693665, "grad_norm": 0.38884034752845764, "learning_rate": 9.444318421219947e-06, "loss": 0.0301, "step": 58420 }, { "epoch": 0.47277287806456836, "grad_norm": 0.40412938594818115, "learning_rate": 9.443994863451177e-06, "loss": 0.0413, "step": 58430 }, { "epoch": 0.4728537907597702, "grad_norm": 0.6720535755157471, "learning_rate": 9.443671217056309e-06, "loss": 0.0336, "step": 58440 }, { "epoch": 0.47293470345497207, "grad_norm": 0.4661959111690521, "learning_rate": 9.4433474820418e-06, "loss": 0.0427, "step": 58450 }, { "epoch": 0.47301561615017396, "grad_norm": 0.17180828750133514, "learning_rate": 9.443023658414106e-06, "loss": 0.0416, "step": 58460 }, { "epoch": 0.47309652884537584, "grad_norm": 0.5891310572624207, "learning_rate": 9.442699746179686e-06, "loss": 0.0372, "step": 58470 }, { "epoch": 0.4731774415405777, "grad_norm": 0.1813744455575943, "learning_rate": 9.442375745344998e-06, "loss": 0.0303, "step": 58480 }, { "epoch": 0.4732583542357796, "grad_norm": 0.40830573439598083, "learning_rate": 9.442051655916503e-06, "loss": 0.0359, "step": 58490 }, { "epoch": 0.4733392669309815, "grad_norm": 0.6943525075912476, "learning_rate": 9.441727477900666e-06, "loss": 0.029, "step": 58500 }, { "epoch": 0.4734201796261833, "grad_norm": 0.277732789516449, "learning_rate": 9.441403211303952e-06, "loss": 0.0316, "step": 58510 }, { "epoch": 0.4735010923213852, "grad_norm": 0.7992910742759705, "learning_rate": 9.441078856132825e-06, "loss": 0.0521, "step": 58520 }, { "epoch": 0.4735820050165871, "grad_norm": 0.4935644865036011, "learning_rate": 9.440754412393759e-06, "loss": 0.0322, "step": 58530 }, { "epoch": 0.473662917711789, "grad_norm": 0.607377827167511, "learning_rate": 9.440429880093219e-06, "loss": 0.0456, "step": 58540 }, { "epoch": 0.47374383040699086, "grad_norm": 0.5502829551696777, "learning_rate": 9.44010525923768e-06, "loss": 0.0345, "step": 58550 }, { "epoch": 0.47382474310219275, "grad_norm": 0.4329335689544678, "learning_rate": 9.439780549833615e-06, "loss": 0.037, "step": 58560 }, { "epoch": 0.47390565579739463, "grad_norm": 0.8872194886207581, "learning_rate": 9.439455751887498e-06, "loss": 0.0314, "step": 58570 }, { "epoch": 0.4739865684925965, "grad_norm": 0.33798810839653015, "learning_rate": 9.43913086540581e-06, "loss": 0.0314, "step": 58580 }, { "epoch": 0.47406748118779835, "grad_norm": 0.6040682792663574, "learning_rate": 9.43880589039503e-06, "loss": 0.0505, "step": 58590 }, { "epoch": 0.47414839388300023, "grad_norm": 0.4687472879886627, "learning_rate": 9.438480826861633e-06, "loss": 0.028, "step": 58600 }, { "epoch": 0.4742293065782021, "grad_norm": 0.3239770531654358, "learning_rate": 9.438155674812109e-06, "loss": 0.0354, "step": 58610 }, { "epoch": 0.474310219273404, "grad_norm": 0.4297836124897003, "learning_rate": 9.43783043425294e-06, "loss": 0.0291, "step": 58620 }, { "epoch": 0.4743911319686059, "grad_norm": 0.5850147604942322, "learning_rate": 9.437505105190609e-06, "loss": 0.0297, "step": 58630 }, { "epoch": 0.47447204466380777, "grad_norm": 0.3724794387817383, "learning_rate": 9.437179687631608e-06, "loss": 0.0268, "step": 58640 }, { "epoch": 0.47455295735900965, "grad_norm": 0.3290077745914459, "learning_rate": 9.436854181582424e-06, "loss": 0.0372, "step": 58650 }, { "epoch": 0.4746338700542115, "grad_norm": 0.30867713689804077, "learning_rate": 9.436528587049552e-06, "loss": 0.04, "step": 58660 }, { "epoch": 0.47471478274941337, "grad_norm": 0.49604907631874084, "learning_rate": 9.436202904039483e-06, "loss": 0.0315, "step": 58670 }, { "epoch": 0.47479569544461525, "grad_norm": 0.373936265707016, "learning_rate": 9.43587713255871e-06, "loss": 0.0207, "step": 58680 }, { "epoch": 0.47487660813981714, "grad_norm": 0.4214334189891815, "learning_rate": 9.435551272613735e-06, "loss": 0.0514, "step": 58690 }, { "epoch": 0.474957520835019, "grad_norm": 0.35865968465805054, "learning_rate": 9.435225324211053e-06, "loss": 0.0409, "step": 58700 }, { "epoch": 0.4750384335302209, "grad_norm": 0.43954962491989136, "learning_rate": 9.434899287357165e-06, "loss": 0.0272, "step": 58710 }, { "epoch": 0.4751193462254228, "grad_norm": 0.3676735758781433, "learning_rate": 9.434573162058574e-06, "loss": 0.0326, "step": 58720 }, { "epoch": 0.4752002589206246, "grad_norm": 0.22255133092403412, "learning_rate": 9.434246948321782e-06, "loss": 0.0225, "step": 58730 }, { "epoch": 0.4752811716158265, "grad_norm": 0.37988927960395813, "learning_rate": 9.433920646153294e-06, "loss": 0.0583, "step": 58740 }, { "epoch": 0.4753620843110284, "grad_norm": 0.4540627598762512, "learning_rate": 9.433594255559621e-06, "loss": 0.0288, "step": 58750 }, { "epoch": 0.4754429970062303, "grad_norm": 0.4030510187149048, "learning_rate": 9.43326777654727e-06, "loss": 0.0614, "step": 58760 }, { "epoch": 0.47552390970143216, "grad_norm": 0.5270794630050659, "learning_rate": 9.432941209122753e-06, "loss": 0.0355, "step": 58770 }, { "epoch": 0.47560482239663404, "grad_norm": 0.4790700674057007, "learning_rate": 9.432614553292582e-06, "loss": 0.0244, "step": 58780 }, { "epoch": 0.4756857350918359, "grad_norm": 0.3494177758693695, "learning_rate": 9.43228780906327e-06, "loss": 0.05, "step": 58790 }, { "epoch": 0.4757666477870378, "grad_norm": 0.6768039464950562, "learning_rate": 9.431960976441336e-06, "loss": 0.0343, "step": 58800 }, { "epoch": 0.47584756048223964, "grad_norm": 0.15779873728752136, "learning_rate": 9.431634055433296e-06, "loss": 0.0361, "step": 58810 }, { "epoch": 0.4759284731774415, "grad_norm": 0.46557262539863586, "learning_rate": 9.43130704604567e-06, "loss": 0.0374, "step": 58820 }, { "epoch": 0.4760093858726434, "grad_norm": 0.33786994218826294, "learning_rate": 9.430979948284981e-06, "loss": 0.0352, "step": 58830 }, { "epoch": 0.4760902985678453, "grad_norm": 0.32685261964797974, "learning_rate": 9.43065276215775e-06, "loss": 0.0173, "step": 58840 }, { "epoch": 0.4761712112630472, "grad_norm": 0.4581605792045593, "learning_rate": 9.430325487670504e-06, "loss": 0.0287, "step": 58850 }, { "epoch": 0.47625212395824906, "grad_norm": 0.4371033012866974, "learning_rate": 9.429998124829769e-06, "loss": 0.0192, "step": 58860 }, { "epoch": 0.47633303665345095, "grad_norm": 0.7753878235816956, "learning_rate": 9.429670673642072e-06, "loss": 0.0498, "step": 58870 }, { "epoch": 0.4764139493486528, "grad_norm": 0.47464749217033386, "learning_rate": 9.429343134113946e-06, "loss": 0.0269, "step": 58880 }, { "epoch": 0.47649486204385466, "grad_norm": 0.8654724359512329, "learning_rate": 9.429015506251921e-06, "loss": 0.041, "step": 58890 }, { "epoch": 0.47657577473905655, "grad_norm": 0.5601073503494263, "learning_rate": 9.428687790062534e-06, "loss": 0.0468, "step": 58900 }, { "epoch": 0.47665668743425843, "grad_norm": 0.359638512134552, "learning_rate": 9.428359985552318e-06, "loss": 0.0396, "step": 58910 }, { "epoch": 0.4767376001294603, "grad_norm": 0.5008466839790344, "learning_rate": 9.42803209272781e-06, "loss": 0.0309, "step": 58920 }, { "epoch": 0.4768185128246622, "grad_norm": 0.5339484214782715, "learning_rate": 9.427704111595548e-06, "loss": 0.0262, "step": 58930 }, { "epoch": 0.4768994255198641, "grad_norm": 0.23964498937129974, "learning_rate": 9.427376042162078e-06, "loss": 0.039, "step": 58940 }, { "epoch": 0.47698033821506597, "grad_norm": 0.4555225968360901, "learning_rate": 9.427047884433938e-06, "loss": 0.0469, "step": 58950 }, { "epoch": 0.4770612509102678, "grad_norm": 0.552869975566864, "learning_rate": 9.426719638417675e-06, "loss": 0.0261, "step": 58960 }, { "epoch": 0.4771421636054697, "grad_norm": 0.4426959156990051, "learning_rate": 9.426391304119831e-06, "loss": 0.045, "step": 58970 }, { "epoch": 0.47722307630067157, "grad_norm": 0.7774174809455872, "learning_rate": 9.426062881546959e-06, "loss": 0.049, "step": 58980 }, { "epoch": 0.47730398899587345, "grad_norm": 0.44149646162986755, "learning_rate": 9.425734370705606e-06, "loss": 0.0334, "step": 58990 }, { "epoch": 0.47738490169107534, "grad_norm": 0.5894386768341064, "learning_rate": 9.425405771602325e-06, "loss": 0.0231, "step": 59000 }, { "epoch": 0.4774658143862772, "grad_norm": 0.30928343534469604, "learning_rate": 9.425077084243669e-06, "loss": 0.0438, "step": 59010 }, { "epoch": 0.4775467270814791, "grad_norm": 0.3642633855342865, "learning_rate": 9.424748308636191e-06, "loss": 0.0381, "step": 59020 }, { "epoch": 0.47762763977668093, "grad_norm": 0.24673578143119812, "learning_rate": 9.42441944478645e-06, "loss": 0.0381, "step": 59030 }, { "epoch": 0.4777085524718828, "grad_norm": 0.6746476888656616, "learning_rate": 9.424090492701001e-06, "loss": 0.0349, "step": 59040 }, { "epoch": 0.4777894651670847, "grad_norm": 0.5813412666320801, "learning_rate": 9.42376145238641e-06, "loss": 0.0234, "step": 59050 }, { "epoch": 0.4778703778622866, "grad_norm": 0.9551352858543396, "learning_rate": 9.423432323849233e-06, "loss": 0.0514, "step": 59060 }, { "epoch": 0.4779512905574885, "grad_norm": 0.29871729016304016, "learning_rate": 9.423103107096038e-06, "loss": 0.0202, "step": 59070 }, { "epoch": 0.47803220325269036, "grad_norm": 0.6399058699607849, "learning_rate": 9.422773802133391e-06, "loss": 0.0302, "step": 59080 }, { "epoch": 0.47811311594789224, "grad_norm": 0.4667963981628418, "learning_rate": 9.422444408967855e-06, "loss": 0.027, "step": 59090 }, { "epoch": 0.4781940286430941, "grad_norm": 0.8306041955947876, "learning_rate": 9.422114927606002e-06, "loss": 0.0349, "step": 59100 }, { "epoch": 0.47827494133829596, "grad_norm": 0.44530344009399414, "learning_rate": 9.421785358054402e-06, "loss": 0.0425, "step": 59110 }, { "epoch": 0.47835585403349784, "grad_norm": 0.4489332139492035, "learning_rate": 9.421455700319629e-06, "loss": 0.0346, "step": 59120 }, { "epoch": 0.4784367667286997, "grad_norm": 0.40899768471717834, "learning_rate": 9.421125954408255e-06, "loss": 0.0382, "step": 59130 }, { "epoch": 0.4785176794239016, "grad_norm": 0.28292593359947205, "learning_rate": 9.420796120326856e-06, "loss": 0.0388, "step": 59140 }, { "epoch": 0.4785985921191035, "grad_norm": 0.6348605751991272, "learning_rate": 9.420466198082015e-06, "loss": 0.0359, "step": 59150 }, { "epoch": 0.4786795048143054, "grad_norm": 0.23777039349079132, "learning_rate": 9.420136187680304e-06, "loss": 0.0425, "step": 59160 }, { "epoch": 0.47876041750950726, "grad_norm": 0.34164339303970337, "learning_rate": 9.419806089128309e-06, "loss": 0.0224, "step": 59170 }, { "epoch": 0.4788413302047091, "grad_norm": 0.3379477560520172, "learning_rate": 9.419475902432614e-06, "loss": 0.0244, "step": 59180 }, { "epoch": 0.478922242899911, "grad_norm": 0.4617325961589813, "learning_rate": 9.4191456275998e-06, "loss": 0.0401, "step": 59190 }, { "epoch": 0.47900315559511286, "grad_norm": 0.4953335225582123, "learning_rate": 9.418815264636455e-06, "loss": 0.0172, "step": 59200 }, { "epoch": 0.47908406829031475, "grad_norm": 0.3368057608604431, "learning_rate": 9.418484813549172e-06, "loss": 0.0314, "step": 59210 }, { "epoch": 0.47916498098551663, "grad_norm": 0.2260817289352417, "learning_rate": 9.418154274344534e-06, "loss": 0.0333, "step": 59220 }, { "epoch": 0.4792458936807185, "grad_norm": 0.43902483582496643, "learning_rate": 9.417823647029137e-06, "loss": 0.0448, "step": 59230 }, { "epoch": 0.4793268063759204, "grad_norm": 0.48625481128692627, "learning_rate": 9.417492931609574e-06, "loss": 0.0301, "step": 59240 }, { "epoch": 0.4794077190711223, "grad_norm": 0.4207685589790344, "learning_rate": 9.417162128092438e-06, "loss": 0.0369, "step": 59250 }, { "epoch": 0.4794886317663241, "grad_norm": 0.5553756952285767, "learning_rate": 9.41683123648433e-06, "loss": 0.0617, "step": 59260 }, { "epoch": 0.479569544461526, "grad_norm": 0.08674298226833344, "learning_rate": 9.416500256791847e-06, "loss": 0.0332, "step": 59270 }, { "epoch": 0.4796504571567279, "grad_norm": 0.4217061400413513, "learning_rate": 9.41616918902159e-06, "loss": 0.0442, "step": 59280 }, { "epoch": 0.47973136985192977, "grad_norm": 0.5072044730186462, "learning_rate": 9.415838033180164e-06, "loss": 0.0306, "step": 59290 }, { "epoch": 0.47981228254713165, "grad_norm": 0.42056670784950256, "learning_rate": 9.415506789274168e-06, "loss": 0.0306, "step": 59300 }, { "epoch": 0.47989319524233354, "grad_norm": 0.3050372004508972, "learning_rate": 9.415175457310211e-06, "loss": 0.0507, "step": 59310 }, { "epoch": 0.4799741079375354, "grad_norm": 0.23747628927230835, "learning_rate": 9.414844037294902e-06, "loss": 0.0294, "step": 59320 }, { "epoch": 0.48005502063273725, "grad_norm": 0.3323342800140381, "learning_rate": 9.414512529234848e-06, "loss": 0.0403, "step": 59330 }, { "epoch": 0.48013593332793914, "grad_norm": 0.5589886903762817, "learning_rate": 9.41418093313666e-06, "loss": 0.0282, "step": 59340 }, { "epoch": 0.480216846023141, "grad_norm": 0.574981153011322, "learning_rate": 9.413849249006951e-06, "loss": 0.0451, "step": 59350 }, { "epoch": 0.4802977587183429, "grad_norm": 0.7887188196182251, "learning_rate": 9.413517476852338e-06, "loss": 0.0518, "step": 59360 }, { "epoch": 0.4803786714135448, "grad_norm": 0.6080708503723145, "learning_rate": 9.413185616679438e-06, "loss": 0.0492, "step": 59370 }, { "epoch": 0.4804595841087467, "grad_norm": 0.48433956503868103, "learning_rate": 9.412853668494866e-06, "loss": 0.0389, "step": 59380 }, { "epoch": 0.48054049680394856, "grad_norm": 0.7485136389732361, "learning_rate": 9.412521632305244e-06, "loss": 0.0362, "step": 59390 }, { "epoch": 0.48062140949915044, "grad_norm": 0.52790766954422, "learning_rate": 9.412189508117194e-06, "loss": 0.0467, "step": 59400 }, { "epoch": 0.48070232219435227, "grad_norm": 0.46982327103614807, "learning_rate": 9.411857295937336e-06, "loss": 0.0368, "step": 59410 }, { "epoch": 0.48078323488955416, "grad_norm": 0.279509574174881, "learning_rate": 9.4115249957723e-06, "loss": 0.045, "step": 59420 }, { "epoch": 0.48086414758475604, "grad_norm": 0.2013218104839325, "learning_rate": 9.411192607628712e-06, "loss": 0.0612, "step": 59430 }, { "epoch": 0.4809450602799579, "grad_norm": 0.3303880989551544, "learning_rate": 9.410860131513197e-06, "loss": 0.0458, "step": 59440 }, { "epoch": 0.4810259729751598, "grad_norm": 0.3475942611694336, "learning_rate": 9.410527567432392e-06, "loss": 0.0348, "step": 59450 }, { "epoch": 0.4811068856703617, "grad_norm": 0.3363294005393982, "learning_rate": 9.410194915392923e-06, "loss": 0.0198, "step": 59460 }, { "epoch": 0.4811877983655636, "grad_norm": 0.6649615168571472, "learning_rate": 9.409862175401428e-06, "loss": 0.0456, "step": 59470 }, { "epoch": 0.4812687110607654, "grad_norm": 0.48645785450935364, "learning_rate": 9.409529347464541e-06, "loss": 0.0295, "step": 59480 }, { "epoch": 0.4813496237559673, "grad_norm": 0.498401403427124, "learning_rate": 9.409196431588901e-06, "loss": 0.0263, "step": 59490 }, { "epoch": 0.4814305364511692, "grad_norm": 0.8403133153915405, "learning_rate": 9.408863427781145e-06, "loss": 0.0309, "step": 59500 }, { "epoch": 0.48151144914637106, "grad_norm": 0.5086564421653748, "learning_rate": 9.408530336047916e-06, "loss": 0.049, "step": 59510 }, { "epoch": 0.48159236184157295, "grad_norm": 0.5107433199882507, "learning_rate": 9.408197156395858e-06, "loss": 0.0272, "step": 59520 }, { "epoch": 0.48167327453677483, "grad_norm": 0.6598107814788818, "learning_rate": 9.407863888831611e-06, "loss": 0.029, "step": 59530 }, { "epoch": 0.4817541872319767, "grad_norm": 0.5570625066757202, "learning_rate": 9.407530533361827e-06, "loss": 0.0423, "step": 59540 }, { "epoch": 0.4818350999271786, "grad_norm": 0.5837586522102356, "learning_rate": 9.407197089993148e-06, "loss": 0.0447, "step": 59550 }, { "epoch": 0.48191601262238043, "grad_norm": 0.7787873148918152, "learning_rate": 9.40686355873223e-06, "loss": 0.0428, "step": 59560 }, { "epoch": 0.4819969253175823, "grad_norm": 0.5407005548477173, "learning_rate": 9.40652993958572e-06, "loss": 0.0237, "step": 59570 }, { "epoch": 0.4820778380127842, "grad_norm": 0.599122166633606, "learning_rate": 9.406196232560272e-06, "loss": 0.037, "step": 59580 }, { "epoch": 0.4821587507079861, "grad_norm": 0.35266172885894775, "learning_rate": 9.405862437662546e-06, "loss": 0.0424, "step": 59590 }, { "epoch": 0.48223966340318797, "grad_norm": 0.05786996707320213, "learning_rate": 9.40552855489919e-06, "loss": 0.0318, "step": 59600 }, { "epoch": 0.48232057609838985, "grad_norm": 0.672513484954834, "learning_rate": 9.40519458427687e-06, "loss": 0.0332, "step": 59610 }, { "epoch": 0.48240148879359174, "grad_norm": 0.3793911039829254, "learning_rate": 9.404860525802244e-06, "loss": 0.053, "step": 59620 }, { "epoch": 0.48248240148879357, "grad_norm": 0.49243083596229553, "learning_rate": 9.404526379481972e-06, "loss": 0.0415, "step": 59630 }, { "epoch": 0.48256331418399545, "grad_norm": 0.4176320731639862, "learning_rate": 9.40419214532272e-06, "loss": 0.04, "step": 59640 }, { "epoch": 0.48264422687919734, "grad_norm": 0.46531054377555847, "learning_rate": 9.403857823331157e-06, "loss": 0.0234, "step": 59650 }, { "epoch": 0.4827251395743992, "grad_norm": 0.38230210542678833, "learning_rate": 9.403523413513945e-06, "loss": 0.0423, "step": 59660 }, { "epoch": 0.4828060522696011, "grad_norm": 0.3334353566169739, "learning_rate": 9.403188915877753e-06, "loss": 0.0521, "step": 59670 }, { "epoch": 0.482886964964803, "grad_norm": 0.7970246076583862, "learning_rate": 9.402854330429254e-06, "loss": 0.0404, "step": 59680 }, { "epoch": 0.4829678776600049, "grad_norm": 0.4468485414981842, "learning_rate": 9.40251965717512e-06, "loss": 0.0221, "step": 59690 }, { "epoch": 0.48304879035520676, "grad_norm": 0.40464702248573303, "learning_rate": 9.402184896122026e-06, "loss": 0.0394, "step": 59700 }, { "epoch": 0.4831297030504086, "grad_norm": 0.1726762354373932, "learning_rate": 9.401850047276648e-06, "loss": 0.032, "step": 59710 }, { "epoch": 0.4832106157456105, "grad_norm": 0.555151641368866, "learning_rate": 9.401515110645665e-06, "loss": 0.0374, "step": 59720 }, { "epoch": 0.48329152844081236, "grad_norm": 0.9686716198921204, "learning_rate": 9.401180086235753e-06, "loss": 0.042, "step": 59730 }, { "epoch": 0.48337244113601424, "grad_norm": 0.25137341022491455, "learning_rate": 9.400844974053595e-06, "loss": 0.0298, "step": 59740 }, { "epoch": 0.4834533538312161, "grad_norm": 0.6357835531234741, "learning_rate": 9.400509774105878e-06, "loss": 0.0341, "step": 59750 }, { "epoch": 0.483534266526418, "grad_norm": 1.3517756462097168, "learning_rate": 9.400174486399279e-06, "loss": 0.0353, "step": 59760 }, { "epoch": 0.4836151792216199, "grad_norm": 0.9260281920433044, "learning_rate": 9.399839110940492e-06, "loss": 0.0408, "step": 59770 }, { "epoch": 0.4836960919168217, "grad_norm": 0.37380388379096985, "learning_rate": 9.399503647736202e-06, "loss": 0.0217, "step": 59780 }, { "epoch": 0.4837770046120236, "grad_norm": 0.4750296473503113, "learning_rate": 9.399168096793098e-06, "loss": 0.03, "step": 59790 }, { "epoch": 0.4838579173072255, "grad_norm": 0.397597998380661, "learning_rate": 9.398832458117874e-06, "loss": 0.037, "step": 59800 }, { "epoch": 0.4839388300024274, "grad_norm": 0.717880368232727, "learning_rate": 9.398496731717223e-06, "loss": 0.0297, "step": 59810 }, { "epoch": 0.48401974269762926, "grad_norm": 0.4427758455276489, "learning_rate": 9.398160917597842e-06, "loss": 0.0421, "step": 59820 }, { "epoch": 0.48410065539283115, "grad_norm": 0.3771912157535553, "learning_rate": 9.397825015766424e-06, "loss": 0.0417, "step": 59830 }, { "epoch": 0.48418156808803303, "grad_norm": 0.2631987929344177, "learning_rate": 9.397489026229672e-06, "loss": 0.0344, "step": 59840 }, { "epoch": 0.4842624807832349, "grad_norm": 0.9069282412528992, "learning_rate": 9.397152948994283e-06, "loss": 0.0342, "step": 59850 }, { "epoch": 0.48434339347843675, "grad_norm": 0.3554232120513916, "learning_rate": 9.396816784066964e-06, "loss": 0.0416, "step": 59860 }, { "epoch": 0.48442430617363863, "grad_norm": 0.2894960045814514, "learning_rate": 9.396480531454415e-06, "loss": 0.0267, "step": 59870 }, { "epoch": 0.4845052188688405, "grad_norm": 0.4007628560066223, "learning_rate": 9.396144191163344e-06, "loss": 0.0433, "step": 59880 }, { "epoch": 0.4845861315640424, "grad_norm": 0.7481912970542908, "learning_rate": 9.395807763200455e-06, "loss": 0.0417, "step": 59890 }, { "epoch": 0.4846670442592443, "grad_norm": 0.7466403245925903, "learning_rate": 9.395471247572463e-06, "loss": 0.0297, "step": 59900 }, { "epoch": 0.48474795695444617, "grad_norm": 0.6954705119132996, "learning_rate": 9.395134644286074e-06, "loss": 0.0388, "step": 59910 }, { "epoch": 0.48482886964964805, "grad_norm": 0.36693835258483887, "learning_rate": 9.394797953348005e-06, "loss": 0.0397, "step": 59920 }, { "epoch": 0.4849097823448499, "grad_norm": 0.44614505767822266, "learning_rate": 9.394461174764969e-06, "loss": 0.0289, "step": 59930 }, { "epoch": 0.48499069504005177, "grad_norm": 0.39564597606658936, "learning_rate": 9.39412430854368e-06, "loss": 0.0409, "step": 59940 }, { "epoch": 0.48507160773525365, "grad_norm": 0.4749371409416199, "learning_rate": 9.393787354690858e-06, "loss": 0.0294, "step": 59950 }, { "epoch": 0.48515252043045554, "grad_norm": 1.0389158725738525, "learning_rate": 9.393450313213223e-06, "loss": 0.0416, "step": 59960 }, { "epoch": 0.4852334331256574, "grad_norm": 0.3254346251487732, "learning_rate": 9.393113184117499e-06, "loss": 0.0337, "step": 59970 }, { "epoch": 0.4853143458208593, "grad_norm": 0.3737722337245941, "learning_rate": 9.392775967410404e-06, "loss": 0.0259, "step": 59980 }, { "epoch": 0.4853952585160612, "grad_norm": 0.2018241286277771, "learning_rate": 9.392438663098668e-06, "loss": 0.0294, "step": 59990 }, { "epoch": 0.4854761712112631, "grad_norm": 0.12403656542301178, "learning_rate": 9.392101271189015e-06, "loss": 0.0403, "step": 60000 }, { "epoch": 0.4855570839064649, "grad_norm": 0.5287642478942871, "learning_rate": 9.391763791688173e-06, "loss": 0.0309, "step": 60010 }, { "epoch": 0.4856379966016668, "grad_norm": 0.6392723321914673, "learning_rate": 9.391426224602876e-06, "loss": 0.0371, "step": 60020 }, { "epoch": 0.4857189092968687, "grad_norm": 0.46417078375816345, "learning_rate": 9.391088569939852e-06, "loss": 0.0344, "step": 60030 }, { "epoch": 0.48579982199207056, "grad_norm": 0.39002397656440735, "learning_rate": 9.390750827705837e-06, "loss": 0.058, "step": 60040 }, { "epoch": 0.48588073468727244, "grad_norm": 0.2942560613155365, "learning_rate": 9.390412997907566e-06, "loss": 0.0258, "step": 60050 }, { "epoch": 0.4859616473824743, "grad_norm": 0.5584202408790588, "learning_rate": 9.390075080551775e-06, "loss": 0.0446, "step": 60060 }, { "epoch": 0.4860425600776762, "grad_norm": 0.5784862041473389, "learning_rate": 9.389737075645206e-06, "loss": 0.035, "step": 60070 }, { "epoch": 0.48612347277287804, "grad_norm": 0.480308473110199, "learning_rate": 9.389398983194598e-06, "loss": 0.047, "step": 60080 }, { "epoch": 0.4862043854680799, "grad_norm": 0.2841971516609192, "learning_rate": 9.389060803206694e-06, "loss": 0.032, "step": 60090 }, { "epoch": 0.4862852981632818, "grad_norm": 0.05915212631225586, "learning_rate": 9.388722535688236e-06, "loss": 0.0365, "step": 60100 }, { "epoch": 0.4863662108584837, "grad_norm": 0.4174940288066864, "learning_rate": 9.388384180645972e-06, "loss": 0.0322, "step": 60110 }, { "epoch": 0.4864471235536856, "grad_norm": 0.6139050126075745, "learning_rate": 9.388045738086653e-06, "loss": 0.0318, "step": 60120 }, { "epoch": 0.48652803624888746, "grad_norm": 0.19264881312847137, "learning_rate": 9.387707208017023e-06, "loss": 0.0291, "step": 60130 }, { "epoch": 0.48660894894408935, "grad_norm": 1.504334807395935, "learning_rate": 9.387368590443837e-06, "loss": 0.0239, "step": 60140 }, { "epoch": 0.48668986163929123, "grad_norm": 0.7172138690948486, "learning_rate": 9.387029885373846e-06, "loss": 0.0363, "step": 60150 }, { "epoch": 0.48677077433449306, "grad_norm": 0.6581264138221741, "learning_rate": 9.386691092813804e-06, "loss": 0.0281, "step": 60160 }, { "epoch": 0.48685168702969495, "grad_norm": 0.5624382495880127, "learning_rate": 9.38635221277047e-06, "loss": 0.0445, "step": 60170 }, { "epoch": 0.48693259972489683, "grad_norm": 0.6414440274238586, "learning_rate": 9.3860132452506e-06, "loss": 0.0365, "step": 60180 }, { "epoch": 0.4870135124200987, "grad_norm": 0.6577167510986328, "learning_rate": 9.385674190260957e-06, "loss": 0.0313, "step": 60190 }, { "epoch": 0.4870944251153006, "grad_norm": 0.42203691601753235, "learning_rate": 9.3853350478083e-06, "loss": 0.0551, "step": 60200 }, { "epoch": 0.4871753378105025, "grad_norm": 0.3586624264717102, "learning_rate": 9.384995817899393e-06, "loss": 0.0352, "step": 60210 }, { "epoch": 0.48725625050570437, "grad_norm": 0.07857605069875717, "learning_rate": 9.384656500541003e-06, "loss": 0.0387, "step": 60220 }, { "epoch": 0.4873371632009062, "grad_norm": 0.4560227394104004, "learning_rate": 9.384317095739894e-06, "loss": 0.02, "step": 60230 }, { "epoch": 0.4874180758961081, "grad_norm": 0.6078750491142273, "learning_rate": 9.383977603502836e-06, "loss": 0.0508, "step": 60240 }, { "epoch": 0.48749898859130997, "grad_norm": 0.9260947704315186, "learning_rate": 9.383638023836602e-06, "loss": 0.0435, "step": 60250 }, { "epoch": 0.48757990128651185, "grad_norm": 0.18659153580665588, "learning_rate": 9.38329835674796e-06, "loss": 0.0365, "step": 60260 }, { "epoch": 0.48766081398171374, "grad_norm": 1.1488713026046753, "learning_rate": 9.382958602243686e-06, "loss": 0.0359, "step": 60270 }, { "epoch": 0.4877417266769156, "grad_norm": 0.6544560194015503, "learning_rate": 9.382618760330555e-06, "loss": 0.0221, "step": 60280 }, { "epoch": 0.4878226393721175, "grad_norm": 0.3652254641056061, "learning_rate": 9.382278831015346e-06, "loss": 0.0271, "step": 60290 }, { "epoch": 0.48790355206731933, "grad_norm": 0.5155887007713318, "learning_rate": 9.381938814304836e-06, "loss": 0.0237, "step": 60300 }, { "epoch": 0.4879844647625212, "grad_norm": 0.5722274780273438, "learning_rate": 9.381598710205808e-06, "loss": 0.0408, "step": 60310 }, { "epoch": 0.4880653774577231, "grad_norm": 0.4918036460876465, "learning_rate": 9.381258518725043e-06, "loss": 0.0236, "step": 60320 }, { "epoch": 0.488146290152925, "grad_norm": 0.37830638885498047, "learning_rate": 9.380918239869327e-06, "loss": 0.0305, "step": 60330 }, { "epoch": 0.4882272028481269, "grad_norm": 0.7753929495811462, "learning_rate": 9.380577873645445e-06, "loss": 0.0645, "step": 60340 }, { "epoch": 0.48830811554332876, "grad_norm": 0.29970115423202515, "learning_rate": 9.380237420060186e-06, "loss": 0.0191, "step": 60350 }, { "epoch": 0.48838902823853064, "grad_norm": 0.4846806526184082, "learning_rate": 9.379896879120336e-06, "loss": 0.0293, "step": 60360 }, { "epoch": 0.4884699409337325, "grad_norm": 0.340868204832077, "learning_rate": 9.37955625083269e-06, "loss": 0.036, "step": 60370 }, { "epoch": 0.48855085362893436, "grad_norm": 0.6064403653144836, "learning_rate": 9.379215535204042e-06, "loss": 0.0295, "step": 60380 }, { "epoch": 0.48863176632413624, "grad_norm": 0.37047651410102844, "learning_rate": 9.378874732241184e-06, "loss": 0.0495, "step": 60390 }, { "epoch": 0.4887126790193381, "grad_norm": 0.5741109848022461, "learning_rate": 9.378533841950913e-06, "loss": 0.0449, "step": 60400 }, { "epoch": 0.48879359171454, "grad_norm": 0.44215062260627747, "learning_rate": 9.378192864340028e-06, "loss": 0.0475, "step": 60410 }, { "epoch": 0.4888745044097419, "grad_norm": 0.4160781800746918, "learning_rate": 9.377851799415331e-06, "loss": 0.0285, "step": 60420 }, { "epoch": 0.4889554171049438, "grad_norm": 0.41322141885757446, "learning_rate": 9.377510647183621e-06, "loss": 0.0328, "step": 60430 }, { "epoch": 0.48903632980014566, "grad_norm": 0.5848141312599182, "learning_rate": 9.377169407651702e-06, "loss": 0.0488, "step": 60440 }, { "epoch": 0.4891172424953475, "grad_norm": 0.3123053312301636, "learning_rate": 9.376828080826379e-06, "loss": 0.0384, "step": 60450 }, { "epoch": 0.4891981551905494, "grad_norm": 0.6516555547714233, "learning_rate": 9.376486666714462e-06, "loss": 0.0351, "step": 60460 }, { "epoch": 0.48927906788575126, "grad_norm": 1.1721558570861816, "learning_rate": 9.376145165322757e-06, "loss": 0.0404, "step": 60470 }, { "epoch": 0.48935998058095315, "grad_norm": 0.9489204287528992, "learning_rate": 9.375803576658073e-06, "loss": 0.0493, "step": 60480 }, { "epoch": 0.48944089327615503, "grad_norm": 0.6012154817581177, "learning_rate": 9.375461900727227e-06, "loss": 0.0345, "step": 60490 }, { "epoch": 0.4895218059713569, "grad_norm": 0.6279401779174805, "learning_rate": 9.375120137537028e-06, "loss": 0.0416, "step": 60500 }, { "epoch": 0.4896027186665588, "grad_norm": 0.4105626344680786, "learning_rate": 9.374778287094296e-06, "loss": 0.035, "step": 60510 }, { "epoch": 0.4896836313617607, "grad_norm": 0.3636764883995056, "learning_rate": 9.374436349405847e-06, "loss": 0.0372, "step": 60520 }, { "epoch": 0.4897645440569625, "grad_norm": 0.27490678429603577, "learning_rate": 9.374094324478497e-06, "loss": 0.0359, "step": 60530 }, { "epoch": 0.4898454567521644, "grad_norm": 0.6207768321037292, "learning_rate": 9.373752212319073e-06, "loss": 0.0505, "step": 60540 }, { "epoch": 0.4899263694473663, "grad_norm": 1.2184702157974243, "learning_rate": 9.373410012934393e-06, "loss": 0.0461, "step": 60550 }, { "epoch": 0.49000728214256817, "grad_norm": 0.541807234287262, "learning_rate": 9.373067726331282e-06, "loss": 0.0435, "step": 60560 }, { "epoch": 0.49008819483777005, "grad_norm": 0.4910661578178406, "learning_rate": 9.37272535251657e-06, "loss": 0.0325, "step": 60570 }, { "epoch": 0.49016910753297194, "grad_norm": 0.5223675966262817, "learning_rate": 9.37238289149708e-06, "loss": 0.0328, "step": 60580 }, { "epoch": 0.4902500202281738, "grad_norm": 0.31556436419487, "learning_rate": 9.372040343279645e-06, "loss": 0.0196, "step": 60590 }, { "epoch": 0.49033093292337565, "grad_norm": 3.8824961185455322, "learning_rate": 9.371697707871093e-06, "loss": 0.0395, "step": 60600 }, { "epoch": 0.49041184561857754, "grad_norm": 0.5354754328727722, "learning_rate": 9.37135498527826e-06, "loss": 0.0311, "step": 60610 }, { "epoch": 0.4904927583137794, "grad_norm": 1.0576117038726807, "learning_rate": 9.371012175507981e-06, "loss": 0.0441, "step": 60620 }, { "epoch": 0.4905736710089813, "grad_norm": 0.7377346754074097, "learning_rate": 9.370669278567093e-06, "loss": 0.0539, "step": 60630 }, { "epoch": 0.4906545837041832, "grad_norm": 0.30814045667648315, "learning_rate": 9.37032629446243e-06, "loss": 0.0464, "step": 60640 }, { "epoch": 0.4907354963993851, "grad_norm": 0.7412229776382446, "learning_rate": 9.369983223200837e-06, "loss": 0.031, "step": 60650 }, { "epoch": 0.49081640909458696, "grad_norm": 0.6383092999458313, "learning_rate": 9.369640064789156e-06, "loss": 0.0338, "step": 60660 }, { "epoch": 0.49089732178978884, "grad_norm": 0.25742587447166443, "learning_rate": 9.369296819234226e-06, "loss": 0.0276, "step": 60670 }, { "epoch": 0.49097823448499067, "grad_norm": 0.1248420849442482, "learning_rate": 9.368953486542895e-06, "loss": 0.0246, "step": 60680 }, { "epoch": 0.49105914718019256, "grad_norm": 0.7930432558059692, "learning_rate": 9.368610066722011e-06, "loss": 0.0482, "step": 60690 }, { "epoch": 0.49114005987539444, "grad_norm": 0.40337422490119934, "learning_rate": 9.36826655977842e-06, "loss": 0.0436, "step": 60700 }, { "epoch": 0.4912209725705963, "grad_norm": 0.5439652800559998, "learning_rate": 9.367922965718976e-06, "loss": 0.0335, "step": 60710 }, { "epoch": 0.4913018852657982, "grad_norm": 0.7167364358901978, "learning_rate": 9.36757928455053e-06, "loss": 0.032, "step": 60720 }, { "epoch": 0.4913827979610001, "grad_norm": 0.33733341097831726, "learning_rate": 9.367235516279934e-06, "loss": 0.0355, "step": 60730 }, { "epoch": 0.491463710656202, "grad_norm": 0.37038153409957886, "learning_rate": 9.366891660914046e-06, "loss": 0.0432, "step": 60740 }, { "epoch": 0.4915446233514038, "grad_norm": 0.6470616459846497, "learning_rate": 9.366547718459723e-06, "loss": 0.0337, "step": 60750 }, { "epoch": 0.4916255360466057, "grad_norm": 0.40041613578796387, "learning_rate": 9.366203688923824e-06, "loss": 0.0225, "step": 60760 }, { "epoch": 0.4917064487418076, "grad_norm": 0.6030164957046509, "learning_rate": 9.365859572313209e-06, "loss": 0.0379, "step": 60770 }, { "epoch": 0.49178736143700946, "grad_norm": 0.23577247560024261, "learning_rate": 9.365515368634742e-06, "loss": 0.0311, "step": 60780 }, { "epoch": 0.49186827413221135, "grad_norm": 0.6368563771247864, "learning_rate": 9.365171077895288e-06, "loss": 0.0455, "step": 60790 }, { "epoch": 0.49194918682741323, "grad_norm": 0.37787455320358276, "learning_rate": 9.36482670010171e-06, "loss": 0.0384, "step": 60800 }, { "epoch": 0.4920300995226151, "grad_norm": 0.5499204397201538, "learning_rate": 9.36448223526088e-06, "loss": 0.0495, "step": 60810 }, { "epoch": 0.492111012217817, "grad_norm": 0.694862961769104, "learning_rate": 9.364137683379665e-06, "loss": 0.0209, "step": 60820 }, { "epoch": 0.49219192491301883, "grad_norm": 0.5564811825752258, "learning_rate": 9.363793044464938e-06, "loss": 0.0342, "step": 60830 }, { "epoch": 0.4922728376082207, "grad_norm": 0.1776570826768875, "learning_rate": 9.363448318523569e-06, "loss": 0.0291, "step": 60840 }, { "epoch": 0.4923537503034226, "grad_norm": 0.46129485964775085, "learning_rate": 9.363103505562436e-06, "loss": 0.0202, "step": 60850 }, { "epoch": 0.4924346629986245, "grad_norm": 0.45839378237724304, "learning_rate": 9.362758605588413e-06, "loss": 0.0408, "step": 60860 }, { "epoch": 0.49251557569382637, "grad_norm": 0.27515536546707153, "learning_rate": 9.362413618608381e-06, "loss": 0.0333, "step": 60870 }, { "epoch": 0.49259648838902825, "grad_norm": 0.16143624484539032, "learning_rate": 9.362068544629221e-06, "loss": 0.0279, "step": 60880 }, { "epoch": 0.49267740108423014, "grad_norm": 0.6803503632545471, "learning_rate": 9.36172338365781e-06, "loss": 0.0498, "step": 60890 }, { "epoch": 0.49275831377943197, "grad_norm": 0.4881034791469574, "learning_rate": 9.361378135701034e-06, "loss": 0.033, "step": 60900 }, { "epoch": 0.49283922647463385, "grad_norm": 1.3656748533248901, "learning_rate": 9.36103280076578e-06, "loss": 0.035, "step": 60910 }, { "epoch": 0.49292013916983574, "grad_norm": 0.36609017848968506, "learning_rate": 9.360687378858933e-06, "loss": 0.0433, "step": 60920 }, { "epoch": 0.4930010518650376, "grad_norm": 0.3432357907295227, "learning_rate": 9.360341869987382e-06, "loss": 0.033, "step": 60930 }, { "epoch": 0.4930819645602395, "grad_norm": 0.8745783567428589, "learning_rate": 9.359996274158017e-06, "loss": 0.0351, "step": 60940 }, { "epoch": 0.4931628772554414, "grad_norm": 0.42832672595977783, "learning_rate": 9.359650591377729e-06, "loss": 0.0307, "step": 60950 }, { "epoch": 0.4932437899506433, "grad_norm": 0.5065904259681702, "learning_rate": 9.359304821653417e-06, "loss": 0.0259, "step": 60960 }, { "epoch": 0.49332470264584516, "grad_norm": 0.28279465436935425, "learning_rate": 9.358958964991972e-06, "loss": 0.0282, "step": 60970 }, { "epoch": 0.493405615341047, "grad_norm": 0.25570806860923767, "learning_rate": 9.358613021400292e-06, "loss": 0.0346, "step": 60980 }, { "epoch": 0.4934865280362489, "grad_norm": 0.7237457036972046, "learning_rate": 9.358266990885275e-06, "loss": 0.0615, "step": 60990 }, { "epoch": 0.49356744073145076, "grad_norm": 0.7472292184829712, "learning_rate": 9.357920873453825e-06, "loss": 0.0522, "step": 61000 }, { "epoch": 0.49356744073145076, "eval_loss": 0.03190610930323601, "eval_runtime": 3.8291, "eval_samples_per_second": 52.232, "eval_steps_per_second": 26.116, "step": 61000 }, { "epoch": 0.49364835342665264, "grad_norm": 0.6911836862564087, "learning_rate": 9.357574669112844e-06, "loss": 0.0312, "step": 61010 }, { "epoch": 0.4937292661218545, "grad_norm": 0.4587019383907318, "learning_rate": 9.357228377869235e-06, "loss": 0.0403, "step": 61020 }, { "epoch": 0.4938101788170564, "grad_norm": 0.5778710842132568, "learning_rate": 9.356881999729906e-06, "loss": 0.0482, "step": 61030 }, { "epoch": 0.4938910915122583, "grad_norm": 0.3539869487285614, "learning_rate": 9.35653553470176e-06, "loss": 0.0364, "step": 61040 }, { "epoch": 0.4939720042074601, "grad_norm": 0.2856447398662567, "learning_rate": 9.356188982791713e-06, "loss": 0.0321, "step": 61050 }, { "epoch": 0.494052916902662, "grad_norm": 0.48661479353904724, "learning_rate": 9.355842344006671e-06, "loss": 0.0199, "step": 61060 }, { "epoch": 0.4941338295978639, "grad_norm": 0.8456966280937195, "learning_rate": 9.35549561835355e-06, "loss": 0.0437, "step": 61070 }, { "epoch": 0.4942147422930658, "grad_norm": 0.6993282437324524, "learning_rate": 9.355148805839264e-06, "loss": 0.0374, "step": 61080 }, { "epoch": 0.49429565498826766, "grad_norm": 0.24102024734020233, "learning_rate": 9.354801906470728e-06, "loss": 0.0365, "step": 61090 }, { "epoch": 0.49437656768346955, "grad_norm": 0.5865594744682312, "learning_rate": 9.354454920254864e-06, "loss": 0.0551, "step": 61100 }, { "epoch": 0.49445748037867143, "grad_norm": 0.5712073445320129, "learning_rate": 9.354107847198586e-06, "loss": 0.0407, "step": 61110 }, { "epoch": 0.4945383930738733, "grad_norm": 0.4332229793071747, "learning_rate": 9.353760687308821e-06, "loss": 0.0433, "step": 61120 }, { "epoch": 0.49461930576907515, "grad_norm": 0.40996813774108887, "learning_rate": 9.35341344059249e-06, "loss": 0.0246, "step": 61130 }, { "epoch": 0.49470021846427703, "grad_norm": 0.686277449131012, "learning_rate": 9.353066107056517e-06, "loss": 0.0326, "step": 61140 }, { "epoch": 0.4947811311594789, "grad_norm": 0.14400821924209595, "learning_rate": 9.352718686707832e-06, "loss": 0.0209, "step": 61150 }, { "epoch": 0.4948620438546808, "grad_norm": 0.469669908285141, "learning_rate": 9.352371179553361e-06, "loss": 0.0187, "step": 61160 }, { "epoch": 0.4949429565498827, "grad_norm": 0.18956021964550018, "learning_rate": 9.352023585600034e-06, "loss": 0.0286, "step": 61170 }, { "epoch": 0.49502386924508457, "grad_norm": 0.5987217426300049, "learning_rate": 9.351675904854786e-06, "loss": 0.0364, "step": 61180 }, { "epoch": 0.49510478194028645, "grad_norm": 0.6601713299751282, "learning_rate": 9.351328137324547e-06, "loss": 0.0487, "step": 61190 }, { "epoch": 0.4951856946354883, "grad_norm": 0.3035885989665985, "learning_rate": 9.350980283016255e-06, "loss": 0.0322, "step": 61200 }, { "epoch": 0.49526660733069017, "grad_norm": 0.563961923122406, "learning_rate": 9.350632341936847e-06, "loss": 0.0272, "step": 61210 }, { "epoch": 0.49534752002589205, "grad_norm": 0.4497414529323578, "learning_rate": 9.350284314093263e-06, "loss": 0.0399, "step": 61220 }, { "epoch": 0.49542843272109394, "grad_norm": 0.7409636974334717, "learning_rate": 9.34993619949244e-06, "loss": 0.0393, "step": 61230 }, { "epoch": 0.4955093454162958, "grad_norm": 0.5401376485824585, "learning_rate": 9.349587998141323e-06, "loss": 0.0405, "step": 61240 }, { "epoch": 0.4955902581114977, "grad_norm": 0.2437451332807541, "learning_rate": 9.349239710046857e-06, "loss": 0.0266, "step": 61250 }, { "epoch": 0.4956711708066996, "grad_norm": 0.184317946434021, "learning_rate": 9.348891335215986e-06, "loss": 0.024, "step": 61260 }, { "epoch": 0.4957520835019015, "grad_norm": 0.5846848487854004, "learning_rate": 9.348542873655657e-06, "loss": 0.0367, "step": 61270 }, { "epoch": 0.4958329961971033, "grad_norm": 0.640709638595581, "learning_rate": 9.348194325372823e-06, "loss": 0.0624, "step": 61280 }, { "epoch": 0.4959139088923052, "grad_norm": 0.3209027647972107, "learning_rate": 9.347845690374431e-06, "loss": 0.0279, "step": 61290 }, { "epoch": 0.4959948215875071, "grad_norm": 0.9739699363708496, "learning_rate": 9.347496968667435e-06, "loss": 0.0423, "step": 61300 }, { "epoch": 0.49607573428270896, "grad_norm": 0.9970154166221619, "learning_rate": 9.347148160258791e-06, "loss": 0.0463, "step": 61310 }, { "epoch": 0.49615664697791084, "grad_norm": 0.5346516966819763, "learning_rate": 9.346799265155454e-06, "loss": 0.0307, "step": 61320 }, { "epoch": 0.4962375596731127, "grad_norm": 0.40820059180259705, "learning_rate": 9.34645028336438e-06, "loss": 0.0289, "step": 61330 }, { "epoch": 0.4963184723683146, "grad_norm": 0.8459079265594482, "learning_rate": 9.346101214892533e-06, "loss": 0.0433, "step": 61340 }, { "epoch": 0.49639938506351644, "grad_norm": 0.9592260718345642, "learning_rate": 9.345752059746873e-06, "loss": 0.0383, "step": 61350 }, { "epoch": 0.4964802977587183, "grad_norm": 0.7606490850448608, "learning_rate": 9.345402817934361e-06, "loss": 0.0333, "step": 61360 }, { "epoch": 0.4965612104539202, "grad_norm": 0.4144953489303589, "learning_rate": 9.345053489461964e-06, "loss": 0.0341, "step": 61370 }, { "epoch": 0.4966421231491221, "grad_norm": 0.20931771397590637, "learning_rate": 9.344704074336648e-06, "loss": 0.0279, "step": 61380 }, { "epoch": 0.496723035844324, "grad_norm": 0.32592710852622986, "learning_rate": 9.344354572565382e-06, "loss": 0.0477, "step": 61390 }, { "epoch": 0.49680394853952586, "grad_norm": 0.29842230677604675, "learning_rate": 9.344004984155135e-06, "loss": 0.0278, "step": 61400 }, { "epoch": 0.49688486123472775, "grad_norm": 0.4795716106891632, "learning_rate": 9.343655309112879e-06, "loss": 0.0395, "step": 61410 }, { "epoch": 0.49696577392992963, "grad_norm": 0.45369046926498413, "learning_rate": 9.343305547445587e-06, "loss": 0.0486, "step": 61420 }, { "epoch": 0.49704668662513146, "grad_norm": 0.8822722434997559, "learning_rate": 9.342955699160235e-06, "loss": 0.0374, "step": 61430 }, { "epoch": 0.49712759932033335, "grad_norm": 0.7812719345092773, "learning_rate": 9.3426057642638e-06, "loss": 0.0368, "step": 61440 }, { "epoch": 0.49720851201553523, "grad_norm": 0.3759858310222626, "learning_rate": 9.342255742763261e-06, "loss": 0.0347, "step": 61450 }, { "epoch": 0.4972894247107371, "grad_norm": 0.40019047260284424, "learning_rate": 9.341905634665599e-06, "loss": 0.0404, "step": 61460 }, { "epoch": 0.497370337405939, "grad_norm": 0.5217596292495728, "learning_rate": 9.341555439977794e-06, "loss": 0.0408, "step": 61470 }, { "epoch": 0.4974512501011409, "grad_norm": 0.34750163555145264, "learning_rate": 9.341205158706832e-06, "loss": 0.0301, "step": 61480 }, { "epoch": 0.49753216279634277, "grad_norm": 0.47178220748901367, "learning_rate": 9.340854790859697e-06, "loss": 0.0381, "step": 61490 }, { "epoch": 0.4976130754915446, "grad_norm": 0.44897618889808655, "learning_rate": 9.340504336443378e-06, "loss": 0.0363, "step": 61500 }, { "epoch": 0.4976939881867465, "grad_norm": 1.1826834678649902, "learning_rate": 9.340153795464862e-06, "loss": 0.0376, "step": 61510 }, { "epoch": 0.49777490088194837, "grad_norm": 0.4014122486114502, "learning_rate": 9.339803167931143e-06, "loss": 0.0275, "step": 61520 }, { "epoch": 0.49785581357715025, "grad_norm": 0.46869343519210815, "learning_rate": 9.33945245384921e-06, "loss": 0.0395, "step": 61530 }, { "epoch": 0.49793672627235214, "grad_norm": 0.47080662846565247, "learning_rate": 9.339101653226059e-06, "loss": 0.0355, "step": 61540 }, { "epoch": 0.498017638967554, "grad_norm": 0.5143594145774841, "learning_rate": 9.338750766068686e-06, "loss": 0.0345, "step": 61550 }, { "epoch": 0.4980985516627559, "grad_norm": 0.491496205329895, "learning_rate": 9.338399792384088e-06, "loss": 0.0351, "step": 61560 }, { "epoch": 0.4981794643579578, "grad_norm": 0.23478202521800995, "learning_rate": 9.338048732179266e-06, "loss": 0.0339, "step": 61570 }, { "epoch": 0.4982603770531596, "grad_norm": 0.3969305455684662, "learning_rate": 9.337697585461218e-06, "loss": 0.0344, "step": 61580 }, { "epoch": 0.4983412897483615, "grad_norm": 0.14441920816898346, "learning_rate": 9.33734635223695e-06, "loss": 0.0387, "step": 61590 }, { "epoch": 0.4984222024435634, "grad_norm": 0.35042625665664673, "learning_rate": 9.336995032513467e-06, "loss": 0.0274, "step": 61600 }, { "epoch": 0.4985031151387653, "grad_norm": 0.7119636535644531, "learning_rate": 9.336643626297773e-06, "loss": 0.0446, "step": 61610 }, { "epoch": 0.49858402783396716, "grad_norm": 0.4156343638896942, "learning_rate": 9.336292133596876e-06, "loss": 0.0331, "step": 61620 }, { "epoch": 0.49866494052916904, "grad_norm": 0.5145835876464844, "learning_rate": 9.335940554417788e-06, "loss": 0.028, "step": 61630 }, { "epoch": 0.4987458532243709, "grad_norm": 0.525537371635437, "learning_rate": 9.335588888767519e-06, "loss": 0.0274, "step": 61640 }, { "epoch": 0.49882676591957276, "grad_norm": 0.3494231402873993, "learning_rate": 9.335237136653082e-06, "loss": 0.0436, "step": 61650 }, { "epoch": 0.49890767861477464, "grad_norm": 0.7311667799949646, "learning_rate": 9.334885298081492e-06, "loss": 0.0243, "step": 61660 }, { "epoch": 0.4989885913099765, "grad_norm": 0.2560855448246002, "learning_rate": 9.334533373059767e-06, "loss": 0.0229, "step": 61670 }, { "epoch": 0.4990695040051784, "grad_norm": 0.33441799879074097, "learning_rate": 9.334181361594925e-06, "loss": 0.0421, "step": 61680 }, { "epoch": 0.4991504167003803, "grad_norm": 0.4586701989173889, "learning_rate": 9.333829263693984e-06, "loss": 0.0554, "step": 61690 }, { "epoch": 0.4992313293955822, "grad_norm": 0.7071080207824707, "learning_rate": 9.333477079363968e-06, "loss": 0.0577, "step": 61700 }, { "epoch": 0.49931224209078406, "grad_norm": 0.4091528356075287, "learning_rate": 9.3331248086119e-06, "loss": 0.0362, "step": 61710 }, { "epoch": 0.49939315478598595, "grad_norm": 0.6687198281288147, "learning_rate": 9.332772451444807e-06, "loss": 0.0333, "step": 61720 }, { "epoch": 0.4994740674811878, "grad_norm": 0.30694571137428284, "learning_rate": 9.33242000786971e-06, "loss": 0.0248, "step": 61730 }, { "epoch": 0.49955498017638966, "grad_norm": 0.10635431110858917, "learning_rate": 9.332067477893647e-06, "loss": 0.0274, "step": 61740 }, { "epoch": 0.49963589287159155, "grad_norm": 0.6254014372825623, "learning_rate": 9.331714861523642e-06, "loss": 0.0388, "step": 61750 }, { "epoch": 0.49971680556679343, "grad_norm": 0.5026586055755615, "learning_rate": 9.331362158766727e-06, "loss": 0.0362, "step": 61760 }, { "epoch": 0.4997977182619953, "grad_norm": 0.4544832110404968, "learning_rate": 9.331009369629939e-06, "loss": 0.0338, "step": 61770 }, { "epoch": 0.4998786309571972, "grad_norm": 0.694412887096405, "learning_rate": 9.330656494120312e-06, "loss": 0.0467, "step": 61780 }, { "epoch": 0.4999595436523991, "grad_norm": 0.3631599247455597, "learning_rate": 9.330303532244885e-06, "loss": 0.0211, "step": 61790 }, { "epoch": 0.500040456347601, "grad_norm": 0.6345263123512268, "learning_rate": 9.329950484010693e-06, "loss": 0.0322, "step": 61800 }, { "epoch": 0.5001213690428028, "grad_norm": 0.23024483025074005, "learning_rate": 9.329597349424782e-06, "loss": 0.027, "step": 61810 }, { "epoch": 0.5002022817380047, "grad_norm": 0.1523992419242859, "learning_rate": 9.32924412849419e-06, "loss": 0.0512, "step": 61820 }, { "epoch": 0.5002831944332066, "grad_norm": 0.2594209313392639, "learning_rate": 9.328890821225965e-06, "loss": 0.0411, "step": 61830 }, { "epoch": 0.5003641071284084, "grad_norm": 0.7780789136886597, "learning_rate": 9.328537427627152e-06, "loss": 0.0371, "step": 61840 }, { "epoch": 0.5004450198236103, "grad_norm": 0.2787827253341675, "learning_rate": 9.328183947704795e-06, "loss": 0.0406, "step": 61850 }, { "epoch": 0.5005259325188122, "grad_norm": 0.37352609634399414, "learning_rate": 9.327830381465948e-06, "loss": 0.0387, "step": 61860 }, { "epoch": 0.5006068452140141, "grad_norm": 0.2927468717098236, "learning_rate": 9.327476728917661e-06, "loss": 0.0266, "step": 61870 }, { "epoch": 0.5006877579092159, "grad_norm": 0.5580402612686157, "learning_rate": 9.327122990066986e-06, "loss": 0.0345, "step": 61880 }, { "epoch": 0.5007686706044179, "grad_norm": 0.7808892130851746, "learning_rate": 9.326769164920976e-06, "loss": 0.0344, "step": 61890 }, { "epoch": 0.5008495832996197, "grad_norm": 0.4530179798603058, "learning_rate": 9.326415253486692e-06, "loss": 0.0328, "step": 61900 }, { "epoch": 0.5009304959948215, "grad_norm": 0.5589422583580017, "learning_rate": 9.326061255771189e-06, "loss": 0.0331, "step": 61910 }, { "epoch": 0.5010114086900235, "grad_norm": 0.2254106104373932, "learning_rate": 9.325707171781526e-06, "loss": 0.0206, "step": 61920 }, { "epoch": 0.5010923213852253, "grad_norm": 0.3310290277004242, "learning_rate": 9.325353001524766e-06, "loss": 0.0328, "step": 61930 }, { "epoch": 0.5011732340804272, "grad_norm": 1.0017231702804565, "learning_rate": 9.32499874500797e-06, "loss": 0.0602, "step": 61940 }, { "epoch": 0.5012541467756291, "grad_norm": 0.5618095397949219, "learning_rate": 9.324644402238206e-06, "loss": 0.0578, "step": 61950 }, { "epoch": 0.501335059470831, "grad_norm": 0.43595167994499207, "learning_rate": 9.324289973222538e-06, "loss": 0.0259, "step": 61960 }, { "epoch": 0.5014159721660328, "grad_norm": 0.365735799074173, "learning_rate": 9.323935457968038e-06, "loss": 0.0353, "step": 61970 }, { "epoch": 0.5014968848612348, "grad_norm": 0.5942129492759705, "learning_rate": 9.323580856481771e-06, "loss": 0.0266, "step": 61980 }, { "epoch": 0.5015777975564366, "grad_norm": 0.5933055877685547, "learning_rate": 9.323226168770812e-06, "loss": 0.0433, "step": 61990 }, { "epoch": 0.5016587102516384, "grad_norm": 0.3727148175239563, "learning_rate": 9.322871394842234e-06, "loss": 0.0382, "step": 62000 }, { "epoch": 0.5017396229468404, "grad_norm": 0.38152989745140076, "learning_rate": 9.322516534703112e-06, "loss": 0.0268, "step": 62010 }, { "epoch": 0.5018205356420422, "grad_norm": 0.9792701601982117, "learning_rate": 9.322161588360524e-06, "loss": 0.0296, "step": 62020 }, { "epoch": 0.5019014483372441, "grad_norm": 0.4402788281440735, "learning_rate": 9.321806555821547e-06, "loss": 0.0501, "step": 62030 }, { "epoch": 0.501982361032446, "grad_norm": 0.5151629447937012, "learning_rate": 9.321451437093261e-06, "loss": 0.0359, "step": 62040 }, { "epoch": 0.5020632737276479, "grad_norm": 0.44269758462905884, "learning_rate": 9.32109623218275e-06, "loss": 0.0327, "step": 62050 }, { "epoch": 0.5021441864228497, "grad_norm": 0.8069226741790771, "learning_rate": 9.320740941097094e-06, "loss": 0.0463, "step": 62060 }, { "epoch": 0.5022250991180516, "grad_norm": 0.5144715905189514, "learning_rate": 9.320385563843386e-06, "loss": 0.03, "step": 62070 }, { "epoch": 0.5023060118132535, "grad_norm": 0.5263491272926331, "learning_rate": 9.320030100428705e-06, "loss": 0.0412, "step": 62080 }, { "epoch": 0.5023869245084553, "grad_norm": 0.43212786316871643, "learning_rate": 9.319674550860146e-06, "loss": 0.036, "step": 62090 }, { "epoch": 0.5024678372036573, "grad_norm": 0.2979234755039215, "learning_rate": 9.319318915144798e-06, "loss": 0.0333, "step": 62100 }, { "epoch": 0.5025487498988591, "grad_norm": 0.3823970556259155, "learning_rate": 9.31896319328975e-06, "loss": 0.0292, "step": 62110 }, { "epoch": 0.502629662594061, "grad_norm": 0.5948434472084045, "learning_rate": 9.3186073853021e-06, "loss": 0.0434, "step": 62120 }, { "epoch": 0.5027105752892629, "grad_norm": 0.3227573037147522, "learning_rate": 9.318251491188944e-06, "loss": 0.0332, "step": 62130 }, { "epoch": 0.5027914879844647, "grad_norm": 0.42348238825798035, "learning_rate": 9.317895510957376e-06, "loss": 0.0289, "step": 62140 }, { "epoch": 0.5028724006796667, "grad_norm": 0.8383760452270508, "learning_rate": 9.3175394446145e-06, "loss": 0.0452, "step": 62150 }, { "epoch": 0.5029533133748685, "grad_norm": 0.8364839553833008, "learning_rate": 9.317183292167411e-06, "loss": 0.0513, "step": 62160 }, { "epoch": 0.5030342260700704, "grad_norm": 0.5934694409370422, "learning_rate": 9.316827053623217e-06, "loss": 0.0394, "step": 62170 }, { "epoch": 0.5031151387652723, "grad_norm": 1.0545750856399536, "learning_rate": 9.31647072898902e-06, "loss": 0.0355, "step": 62180 }, { "epoch": 0.5031960514604742, "grad_norm": 0.0034879683516919613, "learning_rate": 9.316114318271929e-06, "loss": 0.0209, "step": 62190 }, { "epoch": 0.503276964155676, "grad_norm": 0.7022631764411926, "learning_rate": 9.315757821479048e-06, "loss": 0.0381, "step": 62200 }, { "epoch": 0.5033578768508779, "grad_norm": 0.22845982015132904, "learning_rate": 9.315401238617489e-06, "loss": 0.0221, "step": 62210 }, { "epoch": 0.5034387895460798, "grad_norm": 0.19938777387142181, "learning_rate": 9.315044569694362e-06, "loss": 0.0307, "step": 62220 }, { "epoch": 0.5035197022412816, "grad_norm": 0.9574864506721497, "learning_rate": 9.31468781471678e-06, "loss": 0.0316, "step": 62230 }, { "epoch": 0.5036006149364836, "grad_norm": 0.30575406551361084, "learning_rate": 9.31433097369186e-06, "loss": 0.0254, "step": 62240 }, { "epoch": 0.5036815276316854, "grad_norm": 0.6050796508789062, "learning_rate": 9.313974046626716e-06, "loss": 0.0354, "step": 62250 }, { "epoch": 0.5037624403268873, "grad_norm": 0.5744377970695496, "learning_rate": 9.313617033528464e-06, "loss": 0.0335, "step": 62260 }, { "epoch": 0.5038433530220892, "grad_norm": 0.5319119095802307, "learning_rate": 9.31325993440423e-06, "loss": 0.0319, "step": 62270 }, { "epoch": 0.5039242657172911, "grad_norm": 0.6254756450653076, "learning_rate": 9.312902749261131e-06, "loss": 0.0368, "step": 62280 }, { "epoch": 0.5040051784124929, "grad_norm": 0.42137765884399414, "learning_rate": 9.312545478106293e-06, "loss": 0.0452, "step": 62290 }, { "epoch": 0.5040860911076948, "grad_norm": 0.588146448135376, "learning_rate": 9.312188120946838e-06, "loss": 0.0498, "step": 62300 }, { "epoch": 0.5041670038028967, "grad_norm": 0.9193846583366394, "learning_rate": 9.311830677789894e-06, "loss": 0.0333, "step": 62310 }, { "epoch": 0.5042479164980985, "grad_norm": 0.03220789134502411, "learning_rate": 9.311473148642592e-06, "loss": 0.0312, "step": 62320 }, { "epoch": 0.5043288291933005, "grad_norm": 0.5829018354415894, "learning_rate": 9.311115533512058e-06, "loss": 0.0209, "step": 62330 }, { "epoch": 0.5044097418885023, "grad_norm": 0.4653589725494385, "learning_rate": 9.310757832405427e-06, "loss": 0.0365, "step": 62340 }, { "epoch": 0.5044906545837042, "grad_norm": 0.7888204455375671, "learning_rate": 9.31040004532983e-06, "loss": 0.0368, "step": 62350 }, { "epoch": 0.5045715672789061, "grad_norm": 0.371047705411911, "learning_rate": 9.310042172292406e-06, "loss": 0.0341, "step": 62360 }, { "epoch": 0.5046524799741079, "grad_norm": 0.7579852938652039, "learning_rate": 9.309684213300289e-06, "loss": 0.0424, "step": 62370 }, { "epoch": 0.5047333926693098, "grad_norm": 0.8745270371437073, "learning_rate": 9.309326168360618e-06, "loss": 0.0478, "step": 62380 }, { "epoch": 0.5048143053645117, "grad_norm": 2.064241886138916, "learning_rate": 9.308968037480533e-06, "loss": 0.0334, "step": 62390 }, { "epoch": 0.5048952180597136, "grad_norm": 0.4873258173465729, "learning_rate": 9.308609820667179e-06, "loss": 0.0418, "step": 62400 }, { "epoch": 0.5049761307549154, "grad_norm": 0.673670768737793, "learning_rate": 9.308251517927697e-06, "loss": 0.0281, "step": 62410 }, { "epoch": 0.5050570434501174, "grad_norm": 0.44527268409729004, "learning_rate": 9.307893129269234e-06, "loss": 0.03, "step": 62420 }, { "epoch": 0.5051379561453192, "grad_norm": 0.7067123651504517, "learning_rate": 9.307534654698936e-06, "loss": 0.0331, "step": 62430 }, { "epoch": 0.505218868840521, "grad_norm": 0.699459969997406, "learning_rate": 9.307176094223954e-06, "loss": 0.0539, "step": 62440 }, { "epoch": 0.505299781535723, "grad_norm": 0.3788266181945801, "learning_rate": 9.306817447851437e-06, "loss": 0.0355, "step": 62450 }, { "epoch": 0.5053806942309248, "grad_norm": 0.5304071307182312, "learning_rate": 9.306458715588538e-06, "loss": 0.0341, "step": 62460 }, { "epoch": 0.5054616069261267, "grad_norm": 0.2678913474082947, "learning_rate": 9.306099897442412e-06, "loss": 0.0285, "step": 62470 }, { "epoch": 0.5055425196213286, "grad_norm": 0.7886331081390381, "learning_rate": 9.305740993420214e-06, "loss": 0.0529, "step": 62480 }, { "epoch": 0.5056234323165305, "grad_norm": 0.22146117687225342, "learning_rate": 9.305382003529102e-06, "loss": 0.0476, "step": 62490 }, { "epoch": 0.5057043450117323, "grad_norm": 0.25668707489967346, "learning_rate": 9.305022927776233e-06, "loss": 0.0263, "step": 62500 }, { "epoch": 0.5057852577069342, "grad_norm": 0.2671422064304352, "learning_rate": 9.304663766168771e-06, "loss": 0.0261, "step": 62510 }, { "epoch": 0.5058661704021361, "grad_norm": 0.4890683591365814, "learning_rate": 9.304304518713879e-06, "loss": 0.0282, "step": 62520 }, { "epoch": 0.5059470830973379, "grad_norm": 2.1076738834381104, "learning_rate": 9.30394518541872e-06, "loss": 0.049, "step": 62530 }, { "epoch": 0.5060279957925399, "grad_norm": 0.7692623734474182, "learning_rate": 9.303585766290459e-06, "loss": 0.0375, "step": 62540 }, { "epoch": 0.5061089084877417, "grad_norm": 0.458116352558136, "learning_rate": 9.303226261336267e-06, "loss": 0.0385, "step": 62550 }, { "epoch": 0.5061898211829436, "grad_norm": 0.39053794741630554, "learning_rate": 9.30286667056331e-06, "loss": 0.0323, "step": 62560 }, { "epoch": 0.5062707338781455, "grad_norm": 0.6235725283622742, "learning_rate": 9.302506993978762e-06, "loss": 0.0259, "step": 62570 }, { "epoch": 0.5063516465733474, "grad_norm": 0.4185638427734375, "learning_rate": 9.302147231589793e-06, "loss": 0.0393, "step": 62580 }, { "epoch": 0.5064325592685492, "grad_norm": 0.8484615087509155, "learning_rate": 9.301787383403583e-06, "loss": 0.0378, "step": 62590 }, { "epoch": 0.5065134719637511, "grad_norm": 0.2419900894165039, "learning_rate": 9.301427449427302e-06, "loss": 0.0467, "step": 62600 }, { "epoch": 0.506594384658953, "grad_norm": 0.5345557332038879, "learning_rate": 9.301067429668133e-06, "loss": 0.0293, "step": 62610 }, { "epoch": 0.5066752973541548, "grad_norm": 0.3381475806236267, "learning_rate": 9.300707324133254e-06, "loss": 0.0267, "step": 62620 }, { "epoch": 0.5067562100493568, "grad_norm": 0.8210611939430237, "learning_rate": 9.300347132829845e-06, "loss": 0.0318, "step": 62630 }, { "epoch": 0.5068371227445586, "grad_norm": 0.5059399008750916, "learning_rate": 9.299986855765094e-06, "loss": 0.0363, "step": 62640 }, { "epoch": 0.5069180354397606, "grad_norm": 0.2662336826324463, "learning_rate": 9.29962649294618e-06, "loss": 0.0305, "step": 62650 }, { "epoch": 0.5069989481349624, "grad_norm": 0.40566298365592957, "learning_rate": 9.299266044380294e-06, "loss": 0.0522, "step": 62660 }, { "epoch": 0.5070798608301642, "grad_norm": 0.5730223059654236, "learning_rate": 9.298905510074623e-06, "loss": 0.0421, "step": 62670 }, { "epoch": 0.5071607735253661, "grad_norm": 0.48707014322280884, "learning_rate": 9.298544890036355e-06, "loss": 0.0268, "step": 62680 }, { "epoch": 0.507241686220568, "grad_norm": 0.8162450194358826, "learning_rate": 9.298184184272685e-06, "loss": 0.0278, "step": 62690 }, { "epoch": 0.5073225989157699, "grad_norm": 0.6709979772567749, "learning_rate": 9.297823392790806e-06, "loss": 0.044, "step": 62700 }, { "epoch": 0.5074035116109717, "grad_norm": 0.3012359142303467, "learning_rate": 9.297462515597913e-06, "loss": 0.0477, "step": 62710 }, { "epoch": 0.5074844243061737, "grad_norm": 0.3202413022518158, "learning_rate": 9.297101552701201e-06, "loss": 0.025, "step": 62720 }, { "epoch": 0.5075653370013755, "grad_norm": 0.6808415651321411, "learning_rate": 9.29674050410787e-06, "loss": 0.0425, "step": 62730 }, { "epoch": 0.5076462496965773, "grad_norm": 0.3462528586387634, "learning_rate": 9.29637936982512e-06, "loss": 0.0425, "step": 62740 }, { "epoch": 0.5077271623917793, "grad_norm": 0.034481775015592575, "learning_rate": 9.296018149860157e-06, "loss": 0.0265, "step": 62750 }, { "epoch": 0.5078080750869811, "grad_norm": 0.7149117588996887, "learning_rate": 9.295656844220177e-06, "loss": 0.0537, "step": 62760 }, { "epoch": 0.507888987782183, "grad_norm": 1.140319585800171, "learning_rate": 9.295295452912393e-06, "loss": 0.0415, "step": 62770 }, { "epoch": 0.5079699004773849, "grad_norm": 0.5550438165664673, "learning_rate": 9.294933975944008e-06, "loss": 0.0342, "step": 62780 }, { "epoch": 0.5080508131725868, "grad_norm": 0.4688005745410919, "learning_rate": 9.294572413322231e-06, "loss": 0.0278, "step": 62790 }, { "epoch": 0.5081317258677887, "grad_norm": 0.5900342464447021, "learning_rate": 9.294210765054275e-06, "loss": 0.0505, "step": 62800 }, { "epoch": 0.5082126385629905, "grad_norm": 0.47821882367134094, "learning_rate": 9.293849031147348e-06, "loss": 0.0281, "step": 62810 }, { "epoch": 0.5082935512581924, "grad_norm": 0.5341925621032715, "learning_rate": 9.293487211608669e-06, "loss": 0.057, "step": 62820 }, { "epoch": 0.5083744639533943, "grad_norm": 0.8801983594894409, "learning_rate": 9.293125306445452e-06, "loss": 0.0286, "step": 62830 }, { "epoch": 0.5084553766485962, "grad_norm": 0.30168616771698, "learning_rate": 9.292763315664913e-06, "loss": 0.0427, "step": 62840 }, { "epoch": 0.508536289343798, "grad_norm": 0.40803050994873047, "learning_rate": 9.292401239274272e-06, "loss": 0.0297, "step": 62850 }, { "epoch": 0.508617202039, "grad_norm": 0.6900316476821899, "learning_rate": 9.29203907728075e-06, "loss": 0.0522, "step": 62860 }, { "epoch": 0.5086981147342018, "grad_norm": 0.6497174501419067, "learning_rate": 9.29167682969157e-06, "loss": 0.0372, "step": 62870 }, { "epoch": 0.5087790274294036, "grad_norm": 0.6406852006912231, "learning_rate": 9.291314496513955e-06, "loss": 0.0338, "step": 62880 }, { "epoch": 0.5088599401246056, "grad_norm": 0.38959628343582153, "learning_rate": 9.290952077755131e-06, "loss": 0.0384, "step": 62890 }, { "epoch": 0.5089408528198074, "grad_norm": 0.4316805601119995, "learning_rate": 9.290589573422329e-06, "loss": 0.0256, "step": 62900 }, { "epoch": 0.5090217655150093, "grad_norm": 0.3466937243938446, "learning_rate": 9.290226983522773e-06, "loss": 0.0326, "step": 62910 }, { "epoch": 0.5091026782102112, "grad_norm": 0.5056929588317871, "learning_rate": 9.289864308063698e-06, "loss": 0.047, "step": 62920 }, { "epoch": 0.5091835909054131, "grad_norm": 0.49670788645744324, "learning_rate": 9.289501547052337e-06, "loss": 0.0308, "step": 62930 }, { "epoch": 0.5092645036006149, "grad_norm": 0.5301100015640259, "learning_rate": 9.289138700495922e-06, "loss": 0.0255, "step": 62940 }, { "epoch": 0.5093454162958169, "grad_norm": 0.34391462802886963, "learning_rate": 9.28877576840169e-06, "loss": 0.0273, "step": 62950 }, { "epoch": 0.5094263289910187, "grad_norm": 0.606878936290741, "learning_rate": 9.288412750776879e-06, "loss": 0.0531, "step": 62960 }, { "epoch": 0.5095072416862205, "grad_norm": 0.3620288074016571, "learning_rate": 9.288049647628729e-06, "loss": 0.0301, "step": 62970 }, { "epoch": 0.5095881543814225, "grad_norm": 0.4387578070163727, "learning_rate": 9.287686458964482e-06, "loss": 0.0389, "step": 62980 }, { "epoch": 0.5096690670766243, "grad_norm": 0.45386314392089844, "learning_rate": 9.28732318479138e-06, "loss": 0.0303, "step": 62990 }, { "epoch": 0.5097499797718262, "grad_norm": 0.3464723229408264, "learning_rate": 9.28695982511667e-06, "loss": 0.0411, "step": 63000 }, { "epoch": 0.5098308924670281, "grad_norm": 0.35556814074516296, "learning_rate": 9.286596379947595e-06, "loss": 0.0299, "step": 63010 }, { "epoch": 0.50991180516223, "grad_norm": 0.46575143933296204, "learning_rate": 9.286232849291403e-06, "loss": 0.0326, "step": 63020 }, { "epoch": 0.5099927178574318, "grad_norm": 0.3079066276550293, "learning_rate": 9.285869233155347e-06, "loss": 0.029, "step": 63030 }, { "epoch": 0.5100736305526337, "grad_norm": 0.4946531355381012, "learning_rate": 9.285505531546676e-06, "loss": 0.0476, "step": 63040 }, { "epoch": 0.5101545432478356, "grad_norm": 0.5804668068885803, "learning_rate": 9.285141744472645e-06, "loss": 0.0383, "step": 63050 }, { "epoch": 0.5102354559430374, "grad_norm": 0.9095960259437561, "learning_rate": 9.284777871940508e-06, "loss": 0.0378, "step": 63060 }, { "epoch": 0.5103163686382394, "grad_norm": 0.5995880961418152, "learning_rate": 9.284413913957523e-06, "loss": 0.0383, "step": 63070 }, { "epoch": 0.5103972813334412, "grad_norm": 0.1907748281955719, "learning_rate": 9.284049870530948e-06, "loss": 0.0405, "step": 63080 }, { "epoch": 0.5104781940286431, "grad_norm": 0.5847969651222229, "learning_rate": 9.28368574166804e-06, "loss": 0.0443, "step": 63090 }, { "epoch": 0.510559106723845, "grad_norm": 0.6178001165390015, "learning_rate": 9.283321527376063e-06, "loss": 0.0357, "step": 63100 }, { "epoch": 0.5106400194190468, "grad_norm": 0.41899043321609497, "learning_rate": 9.282957227662281e-06, "loss": 0.0517, "step": 63110 }, { "epoch": 0.5107209321142487, "grad_norm": 0.40589776635169983, "learning_rate": 9.28259284253396e-06, "loss": 0.0344, "step": 63120 }, { "epoch": 0.5108018448094506, "grad_norm": 0.3824792206287384, "learning_rate": 9.282228371998365e-06, "loss": 0.0274, "step": 63130 }, { "epoch": 0.5108827575046525, "grad_norm": 0.118703693151474, "learning_rate": 9.281863816062767e-06, "loss": 0.0404, "step": 63140 }, { "epoch": 0.5109636701998543, "grad_norm": 0.5341722369194031, "learning_rate": 9.281499174734432e-06, "loss": 0.0297, "step": 63150 }, { "epoch": 0.5110445828950563, "grad_norm": 0.5492850542068481, "learning_rate": 9.281134448020635e-06, "loss": 0.0401, "step": 63160 }, { "epoch": 0.5111254955902581, "grad_norm": 0.4937072694301605, "learning_rate": 9.28076963592865e-06, "loss": 0.0327, "step": 63170 }, { "epoch": 0.5112064082854599, "grad_norm": 0.3677164614200592, "learning_rate": 9.280404738465752e-06, "loss": 0.0282, "step": 63180 }, { "epoch": 0.5112873209806619, "grad_norm": 0.17023815214633942, "learning_rate": 9.280039755639218e-06, "loss": 0.0405, "step": 63190 }, { "epoch": 0.5113682336758637, "grad_norm": 0.3398616909980774, "learning_rate": 9.279674687456327e-06, "loss": 0.0388, "step": 63200 }, { "epoch": 0.5114491463710656, "grad_norm": 0.35939642786979675, "learning_rate": 9.279309533924356e-06, "loss": 0.0286, "step": 63210 }, { "epoch": 0.5115300590662675, "grad_norm": 0.6130382418632507, "learning_rate": 9.278944295050594e-06, "loss": 0.0272, "step": 63220 }, { "epoch": 0.5116109717614694, "grad_norm": 0.36179399490356445, "learning_rate": 9.27857897084232e-06, "loss": 0.0333, "step": 63230 }, { "epoch": 0.5116918844566712, "grad_norm": 0.41274192929267883, "learning_rate": 9.27821356130682e-06, "loss": 0.0625, "step": 63240 }, { "epoch": 0.5117727971518732, "grad_norm": 0.4897558093070984, "learning_rate": 9.277848066451384e-06, "loss": 0.0359, "step": 63250 }, { "epoch": 0.511853709847075, "grad_norm": 0.7054951190948486, "learning_rate": 9.277482486283299e-06, "loss": 0.0336, "step": 63260 }, { "epoch": 0.5119346225422768, "grad_norm": 0.9835597276687622, "learning_rate": 9.277116820809854e-06, "loss": 0.0434, "step": 63270 }, { "epoch": 0.5120155352374788, "grad_norm": 0.2876507341861725, "learning_rate": 9.276751070038344e-06, "loss": 0.0302, "step": 63280 }, { "epoch": 0.5120964479326806, "grad_norm": 0.4387615919113159, "learning_rate": 9.276385233976064e-06, "loss": 0.04, "step": 63290 }, { "epoch": 0.5121773606278825, "grad_norm": 0.7476040720939636, "learning_rate": 9.276019312630307e-06, "loss": 0.0458, "step": 63300 }, { "epoch": 0.5122582733230844, "grad_norm": 0.2937179505825043, "learning_rate": 9.275653306008374e-06, "loss": 0.0263, "step": 63310 }, { "epoch": 0.5123391860182863, "grad_norm": 1.0423905849456787, "learning_rate": 9.27528721411756e-06, "loss": 0.0297, "step": 63320 }, { "epoch": 0.5124200987134881, "grad_norm": 0.22272543609142303, "learning_rate": 9.274921036965167e-06, "loss": 0.0458, "step": 63330 }, { "epoch": 0.51250101140869, "grad_norm": 0.33836328983306885, "learning_rate": 9.274554774558501e-06, "loss": 0.0216, "step": 63340 }, { "epoch": 0.5125819241038919, "grad_norm": 0.31508904695510864, "learning_rate": 9.274188426904864e-06, "loss": 0.0324, "step": 63350 }, { "epoch": 0.5126628367990937, "grad_norm": 0.5421606302261353, "learning_rate": 9.273821994011561e-06, "loss": 0.0325, "step": 63360 }, { "epoch": 0.5127437494942957, "grad_norm": 0.2731892764568329, "learning_rate": 9.273455475885902e-06, "loss": 0.0264, "step": 63370 }, { "epoch": 0.5128246621894975, "grad_norm": 0.4580976366996765, "learning_rate": 9.273088872535195e-06, "loss": 0.0514, "step": 63380 }, { "epoch": 0.5129055748846995, "grad_norm": 0.45316120982170105, "learning_rate": 9.272722183966749e-06, "loss": 0.043, "step": 63390 }, { "epoch": 0.5129864875799013, "grad_norm": 0.552532970905304, "learning_rate": 9.272355410187881e-06, "loss": 0.0396, "step": 63400 }, { "epoch": 0.5130674002751031, "grad_norm": 0.4685087203979492, "learning_rate": 9.271988551205904e-06, "loss": 0.0416, "step": 63410 }, { "epoch": 0.513148312970305, "grad_norm": 0.2466273456811905, "learning_rate": 9.271621607028132e-06, "loss": 0.0346, "step": 63420 }, { "epoch": 0.5132292256655069, "grad_norm": 0.4501200318336487, "learning_rate": 9.271254577661887e-06, "loss": 0.05, "step": 63430 }, { "epoch": 0.5133101383607088, "grad_norm": 0.4540790319442749, "learning_rate": 9.270887463114487e-06, "loss": 0.0429, "step": 63440 }, { "epoch": 0.5133910510559107, "grad_norm": 0.2830051779747009, "learning_rate": 9.27052026339325e-06, "loss": 0.0481, "step": 63450 }, { "epoch": 0.5134719637511126, "grad_norm": 0.4136173129081726, "learning_rate": 9.270152978505505e-06, "loss": 0.0291, "step": 63460 }, { "epoch": 0.5135528764463144, "grad_norm": 0.2787303328514099, "learning_rate": 9.26978560845857e-06, "loss": 0.0291, "step": 63470 }, { "epoch": 0.5136337891415163, "grad_norm": 0.532702624797821, "learning_rate": 9.269418153259777e-06, "loss": 0.0369, "step": 63480 }, { "epoch": 0.5137147018367182, "grad_norm": 0.5180354714393616, "learning_rate": 9.269050612916453e-06, "loss": 0.0435, "step": 63490 }, { "epoch": 0.51379561453192, "grad_norm": 0.4612210988998413, "learning_rate": 9.268682987435925e-06, "loss": 0.0329, "step": 63500 }, { "epoch": 0.513876527227122, "grad_norm": 0.3194364011287689, "learning_rate": 9.268315276825527e-06, "loss": 0.0335, "step": 63510 }, { "epoch": 0.5139574399223238, "grad_norm": 0.21758951246738434, "learning_rate": 9.267947481092591e-06, "loss": 0.0255, "step": 63520 }, { "epoch": 0.5140383526175257, "grad_norm": 0.5660161972045898, "learning_rate": 9.267579600244453e-06, "loss": 0.0281, "step": 63530 }, { "epoch": 0.5141192653127276, "grad_norm": 0.4678327441215515, "learning_rate": 9.26721163428845e-06, "loss": 0.0473, "step": 63540 }, { "epoch": 0.5142001780079295, "grad_norm": 0.4932693839073181, "learning_rate": 9.266843583231919e-06, "loss": 0.0377, "step": 63550 }, { "epoch": 0.5142810907031313, "grad_norm": 0.23944202065467834, "learning_rate": 9.266475447082201e-06, "loss": 0.0321, "step": 63560 }, { "epoch": 0.5143620033983332, "grad_norm": 0.35507822036743164, "learning_rate": 9.266107225846636e-06, "loss": 0.0444, "step": 63570 }, { "epoch": 0.5144429160935351, "grad_norm": 0.23334752023220062, "learning_rate": 9.26573891953257e-06, "loss": 0.0378, "step": 63580 }, { "epoch": 0.5145238287887369, "grad_norm": 0.3527379631996155, "learning_rate": 9.265370528147346e-06, "loss": 0.0331, "step": 63590 }, { "epoch": 0.5146047414839389, "grad_norm": 0.25854572653770447, "learning_rate": 9.265002051698311e-06, "loss": 0.0209, "step": 63600 }, { "epoch": 0.5146856541791407, "grad_norm": 0.4147724509239197, "learning_rate": 9.264633490192813e-06, "loss": 0.0352, "step": 63610 }, { "epoch": 0.5147665668743426, "grad_norm": 0.4952628016471863, "learning_rate": 9.264264843638204e-06, "loss": 0.0348, "step": 63620 }, { "epoch": 0.5148474795695445, "grad_norm": 0.4121823310852051, "learning_rate": 9.263896112041834e-06, "loss": 0.0292, "step": 63630 }, { "epoch": 0.5149283922647463, "grad_norm": 0.4142109155654907, "learning_rate": 9.26352729541106e-06, "loss": 0.0247, "step": 63640 }, { "epoch": 0.5150093049599482, "grad_norm": 0.4246845543384552, "learning_rate": 9.263158393753233e-06, "loss": 0.0349, "step": 63650 }, { "epoch": 0.5150902176551501, "grad_norm": 0.5890137553215027, "learning_rate": 9.262789407075712e-06, "loss": 0.0445, "step": 63660 }, { "epoch": 0.515171130350352, "grad_norm": 0.4594725966453552, "learning_rate": 9.262420335385856e-06, "loss": 0.0282, "step": 63670 }, { "epoch": 0.5152520430455538, "grad_norm": 0.3940883278846741, "learning_rate": 9.262051178691024e-06, "loss": 0.0217, "step": 63680 }, { "epoch": 0.5153329557407558, "grad_norm": 0.6667659282684326, "learning_rate": 9.26168193699858e-06, "loss": 0.0412, "step": 63690 }, { "epoch": 0.5154138684359576, "grad_norm": 1.0047746896743774, "learning_rate": 9.261312610315885e-06, "loss": 0.0318, "step": 63700 }, { "epoch": 0.5154947811311594, "grad_norm": 0.8567147254943848, "learning_rate": 9.260943198650306e-06, "loss": 0.0305, "step": 63710 }, { "epoch": 0.5155756938263614, "grad_norm": 0.390826016664505, "learning_rate": 9.260573702009211e-06, "loss": 0.0459, "step": 63720 }, { "epoch": 0.5156566065215632, "grad_norm": 0.608199417591095, "learning_rate": 9.260204120399968e-06, "loss": 0.0387, "step": 63730 }, { "epoch": 0.5157375192167651, "grad_norm": 0.24082842469215393, "learning_rate": 9.259834453829948e-06, "loss": 0.035, "step": 63740 }, { "epoch": 0.515818431911967, "grad_norm": 0.48069775104522705, "learning_rate": 9.259464702306523e-06, "loss": 0.0363, "step": 63750 }, { "epoch": 0.5158993446071689, "grad_norm": 0.41894590854644775, "learning_rate": 9.259094865837065e-06, "loss": 0.0235, "step": 63760 }, { "epoch": 0.5159802573023707, "grad_norm": 0.33429333567619324, "learning_rate": 9.25872494442895e-06, "loss": 0.042, "step": 63770 }, { "epoch": 0.5160611699975726, "grad_norm": 0.38156095147132874, "learning_rate": 9.25835493808956e-06, "loss": 0.0348, "step": 63780 }, { "epoch": 0.5161420826927745, "grad_norm": 0.7240048050880432, "learning_rate": 9.25798484682627e-06, "loss": 0.0336, "step": 63790 }, { "epoch": 0.5162229953879763, "grad_norm": 0.5516490936279297, "learning_rate": 9.257614670646458e-06, "loss": 0.0418, "step": 63800 }, { "epoch": 0.5163039080831783, "grad_norm": 0.7610754370689392, "learning_rate": 9.257244409557513e-06, "loss": 0.0234, "step": 63810 }, { "epoch": 0.5163848207783801, "grad_norm": 0.3472352623939514, "learning_rate": 9.256874063566814e-06, "loss": 0.0377, "step": 63820 }, { "epoch": 0.516465733473582, "grad_norm": 0.7710808515548706, "learning_rate": 9.25650363268175e-06, "loss": 0.0437, "step": 63830 }, { "epoch": 0.5165466461687839, "grad_norm": 0.31611356139183044, "learning_rate": 9.256133116909705e-06, "loss": 0.0279, "step": 63840 }, { "epoch": 0.5166275588639858, "grad_norm": 0.3721317946910858, "learning_rate": 9.255762516258072e-06, "loss": 0.0293, "step": 63850 }, { "epoch": 0.5167084715591876, "grad_norm": 0.051112934947013855, "learning_rate": 9.255391830734238e-06, "loss": 0.028, "step": 63860 }, { "epoch": 0.5167893842543895, "grad_norm": 0.6818485260009766, "learning_rate": 9.2550210603456e-06, "loss": 0.0281, "step": 63870 }, { "epoch": 0.5168702969495914, "grad_norm": 0.6122468113899231, "learning_rate": 9.254650205099546e-06, "loss": 0.0374, "step": 63880 }, { "epoch": 0.5169512096447932, "grad_norm": 0.32736727595329285, "learning_rate": 9.254279265003478e-06, "loss": 0.0307, "step": 63890 }, { "epoch": 0.5170321223399952, "grad_norm": 0.5919323563575745, "learning_rate": 9.253908240064792e-06, "loss": 0.0364, "step": 63900 }, { "epoch": 0.517113035035197, "grad_norm": 0.6276642084121704, "learning_rate": 9.253537130290886e-06, "loss": 0.0433, "step": 63910 }, { "epoch": 0.517193947730399, "grad_norm": 0.1991894245147705, "learning_rate": 9.25316593568916e-06, "loss": 0.0415, "step": 63920 }, { "epoch": 0.5172748604256008, "grad_norm": 0.43064653873443604, "learning_rate": 9.252794656267019e-06, "loss": 0.0295, "step": 63930 }, { "epoch": 0.5173557731208026, "grad_norm": 0.4250980019569397, "learning_rate": 9.252423292031869e-06, "loss": 0.0389, "step": 63940 }, { "epoch": 0.5174366858160045, "grad_norm": 0.7217625975608826, "learning_rate": 9.25205184299111e-06, "loss": 0.0438, "step": 63950 }, { "epoch": 0.5175175985112064, "grad_norm": 0.6386745572090149, "learning_rate": 9.251680309152155e-06, "loss": 0.0326, "step": 63960 }, { "epoch": 0.5175985112064083, "grad_norm": 0.49427396059036255, "learning_rate": 9.251308690522413e-06, "loss": 0.0351, "step": 63970 }, { "epoch": 0.5176794239016101, "grad_norm": 0.3869282603263855, "learning_rate": 9.250936987109293e-06, "loss": 0.0318, "step": 63980 }, { "epoch": 0.5177603365968121, "grad_norm": 0.5126467943191528, "learning_rate": 9.250565198920209e-06, "loss": 0.0264, "step": 63990 }, { "epoch": 0.5178412492920139, "grad_norm": 0.4262933135032654, "learning_rate": 9.250193325962575e-06, "loss": 0.0369, "step": 64000 }, { "epoch": 0.5179221619872157, "grad_norm": 0.6806911826133728, "learning_rate": 9.24982136824381e-06, "loss": 0.0428, "step": 64010 }, { "epoch": 0.5180030746824177, "grad_norm": 0.4457624554634094, "learning_rate": 9.249449325771327e-06, "loss": 0.0426, "step": 64020 }, { "epoch": 0.5180839873776195, "grad_norm": 0.2575710415840149, "learning_rate": 9.249077198552549e-06, "loss": 0.038, "step": 64030 }, { "epoch": 0.5181649000728215, "grad_norm": 0.4603672921657562, "learning_rate": 9.248704986594896e-06, "loss": 0.045, "step": 64040 }, { "epoch": 0.5182458127680233, "grad_norm": 0.5065626502037048, "learning_rate": 9.248332689905793e-06, "loss": 0.0343, "step": 64050 }, { "epoch": 0.5183267254632252, "grad_norm": 0.5338259339332581, "learning_rate": 9.247960308492661e-06, "loss": 0.0275, "step": 64060 }, { "epoch": 0.518407638158427, "grad_norm": 0.32485538721084595, "learning_rate": 9.247587842362928e-06, "loss": 0.0298, "step": 64070 }, { "epoch": 0.5184885508536289, "grad_norm": 0.36399123072624207, "learning_rate": 9.247215291524025e-06, "loss": 0.0265, "step": 64080 }, { "epoch": 0.5185694635488308, "grad_norm": 0.9236512184143066, "learning_rate": 9.24684265598338e-06, "loss": 0.0434, "step": 64090 }, { "epoch": 0.5186503762440327, "grad_norm": 0.915440559387207, "learning_rate": 9.24646993574842e-06, "loss": 0.0414, "step": 64100 }, { "epoch": 0.5187312889392346, "grad_norm": 0.8481403589248657, "learning_rate": 9.246097130826585e-06, "loss": 0.0383, "step": 64110 }, { "epoch": 0.5188122016344364, "grad_norm": 0.12632279098033905, "learning_rate": 9.245724241225304e-06, "loss": 0.0342, "step": 64120 }, { "epoch": 0.5188931143296384, "grad_norm": 0.5466464161872864, "learning_rate": 9.245351266952019e-06, "loss": 0.0392, "step": 64130 }, { "epoch": 0.5189740270248402, "grad_norm": 0.7711486220359802, "learning_rate": 9.244978208014163e-06, "loss": 0.0434, "step": 64140 }, { "epoch": 0.5190549397200421, "grad_norm": 0.3681171238422394, "learning_rate": 9.24460506441918e-06, "loss": 0.0411, "step": 64150 }, { "epoch": 0.519135852415244, "grad_norm": 0.39226043224334717, "learning_rate": 9.244231836174508e-06, "loss": 0.0343, "step": 64160 }, { "epoch": 0.5192167651104458, "grad_norm": 1.0693778991699219, "learning_rate": 9.243858523287593e-06, "loss": 0.0541, "step": 64170 }, { "epoch": 0.5192976778056477, "grad_norm": 0.3807600140571594, "learning_rate": 9.24348512576588e-06, "loss": 0.0305, "step": 64180 }, { "epoch": 0.5193785905008496, "grad_norm": 0.3537711203098297, "learning_rate": 9.243111643616814e-06, "loss": 0.025, "step": 64190 }, { "epoch": 0.5194595031960515, "grad_norm": 0.3851113021373749, "learning_rate": 9.242738076847841e-06, "loss": 0.0274, "step": 64200 }, { "epoch": 0.5195404158912533, "grad_norm": 0.7239761352539062, "learning_rate": 9.242364425466418e-06, "loss": 0.0413, "step": 64210 }, { "epoch": 0.5196213285864553, "grad_norm": 0.4356207847595215, "learning_rate": 9.24199068947999e-06, "loss": 0.0359, "step": 64220 }, { "epoch": 0.5197022412816571, "grad_norm": 0.23867268860340118, "learning_rate": 9.241616868896013e-06, "loss": 0.0415, "step": 64230 }, { "epoch": 0.5197831539768589, "grad_norm": 0.6352299451828003, "learning_rate": 9.241242963721943e-06, "loss": 0.0335, "step": 64240 }, { "epoch": 0.5198640666720609, "grad_norm": 1.1263641119003296, "learning_rate": 9.240868973965236e-06, "loss": 0.0287, "step": 64250 }, { "epoch": 0.5199449793672627, "grad_norm": 0.6243805885314941, "learning_rate": 9.240494899633349e-06, "loss": 0.0404, "step": 64260 }, { "epoch": 0.5200258920624646, "grad_norm": 0.16010989248752594, "learning_rate": 9.240120740733744e-06, "loss": 0.0239, "step": 64270 }, { "epoch": 0.5201068047576665, "grad_norm": 0.5262587070465088, "learning_rate": 9.239746497273883e-06, "loss": 0.033, "step": 64280 }, { "epoch": 0.5201877174528684, "grad_norm": 0.5340539813041687, "learning_rate": 9.239372169261227e-06, "loss": 0.0425, "step": 64290 }, { "epoch": 0.5202686301480702, "grad_norm": 0.2741093337535858, "learning_rate": 9.238997756703242e-06, "loss": 0.042, "step": 64300 }, { "epoch": 0.5203495428432721, "grad_norm": 0.02874811738729477, "learning_rate": 9.238623259607397e-06, "loss": 0.0321, "step": 64310 }, { "epoch": 0.520430455538474, "grad_norm": 0.7000007033348083, "learning_rate": 9.238248677981162e-06, "loss": 0.0436, "step": 64320 }, { "epoch": 0.5205113682336758, "grad_norm": 0.43494030833244324, "learning_rate": 9.237874011832e-06, "loss": 0.0237, "step": 64330 }, { "epoch": 0.5205922809288778, "grad_norm": 1.0511350631713867, "learning_rate": 9.237499261167388e-06, "loss": 0.0411, "step": 64340 }, { "epoch": 0.5206731936240796, "grad_norm": 0.5204293131828308, "learning_rate": 9.237124425994801e-06, "loss": 0.0422, "step": 64350 }, { "epoch": 0.5207541063192815, "grad_norm": 0.6055399775505066, "learning_rate": 9.236749506321711e-06, "loss": 0.0278, "step": 64360 }, { "epoch": 0.5208350190144834, "grad_norm": 0.5719600319862366, "learning_rate": 9.236374502155598e-06, "loss": 0.0386, "step": 64370 }, { "epoch": 0.5209159317096852, "grad_norm": 0.3759981691837311, "learning_rate": 9.235999413503936e-06, "loss": 0.026, "step": 64380 }, { "epoch": 0.5209968444048871, "grad_norm": 0.5228953957557678, "learning_rate": 9.23562424037421e-06, "loss": 0.0449, "step": 64390 }, { "epoch": 0.521077757100089, "grad_norm": 0.3458087742328644, "learning_rate": 9.235248982773901e-06, "loss": 0.0307, "step": 64400 }, { "epoch": 0.5211586697952909, "grad_norm": 0.6441197991371155, "learning_rate": 9.234873640710492e-06, "loss": 0.0331, "step": 64410 }, { "epoch": 0.5212395824904927, "grad_norm": 0.5700247287750244, "learning_rate": 9.234498214191467e-06, "loss": 0.028, "step": 64420 }, { "epoch": 0.5213204951856947, "grad_norm": 0.9389527440071106, "learning_rate": 9.234122703224316e-06, "loss": 0.0442, "step": 64430 }, { "epoch": 0.5214014078808965, "grad_norm": 0.674392580986023, "learning_rate": 9.233747107816525e-06, "loss": 0.0544, "step": 64440 }, { "epoch": 0.5214823205760983, "grad_norm": 0.3869815170764923, "learning_rate": 9.233371427975586e-06, "loss": 0.0563, "step": 64450 }, { "epoch": 0.5215632332713003, "grad_norm": 0.7234091758728027, "learning_rate": 9.23299566370899e-06, "loss": 0.0169, "step": 64460 }, { "epoch": 0.5216441459665021, "grad_norm": 0.545708179473877, "learning_rate": 9.232619815024234e-06, "loss": 0.0305, "step": 64470 }, { "epoch": 0.521725058661704, "grad_norm": 0.8455477356910706, "learning_rate": 9.232243881928811e-06, "loss": 0.0287, "step": 64480 }, { "epoch": 0.5218059713569059, "grad_norm": 0.49834954738616943, "learning_rate": 9.231867864430216e-06, "loss": 0.0375, "step": 64490 }, { "epoch": 0.5218868840521078, "grad_norm": 0.5375959277153015, "learning_rate": 9.231491762535954e-06, "loss": 0.0747, "step": 64500 }, { "epoch": 0.5219677967473096, "grad_norm": 0.7217475175857544, "learning_rate": 9.23111557625352e-06, "loss": 0.0448, "step": 64510 }, { "epoch": 0.5220487094425116, "grad_norm": 0.6960623860359192, "learning_rate": 9.230739305590417e-06, "loss": 0.0451, "step": 64520 }, { "epoch": 0.5221296221377134, "grad_norm": 0.3676433861255646, "learning_rate": 9.23036295055415e-06, "loss": 0.0419, "step": 64530 }, { "epoch": 0.5222105348329152, "grad_norm": 0.5528730154037476, "learning_rate": 9.229986511152227e-06, "loss": 0.0302, "step": 64540 }, { "epoch": 0.5222914475281172, "grad_norm": 0.12374204397201538, "learning_rate": 9.229609987392151e-06, "loss": 0.0325, "step": 64550 }, { "epoch": 0.522372360223319, "grad_norm": 0.5515012145042419, "learning_rate": 9.229233379281433e-06, "loss": 0.0377, "step": 64560 }, { "epoch": 0.522453272918521, "grad_norm": 0.24937528371810913, "learning_rate": 9.228856686827586e-06, "loss": 0.0405, "step": 64570 }, { "epoch": 0.5225341856137228, "grad_norm": 0.4666562080383301, "learning_rate": 9.228479910038116e-06, "loss": 0.03, "step": 64580 }, { "epoch": 0.5226150983089247, "grad_norm": 0.6992354989051819, "learning_rate": 9.228103048920544e-06, "loss": 0.0465, "step": 64590 }, { "epoch": 0.5226960110041265, "grad_norm": 0.9123501181602478, "learning_rate": 9.227726103482382e-06, "loss": 0.041, "step": 64600 }, { "epoch": 0.5227769236993284, "grad_norm": 0.6608279347419739, "learning_rate": 9.227349073731146e-06, "loss": 0.0503, "step": 64610 }, { "epoch": 0.5228578363945303, "grad_norm": 0.7237277626991272, "learning_rate": 9.22697195967436e-06, "loss": 0.0514, "step": 64620 }, { "epoch": 0.5229387490897321, "grad_norm": 0.7633506059646606, "learning_rate": 9.22659476131954e-06, "loss": 0.0293, "step": 64630 }, { "epoch": 0.5230196617849341, "grad_norm": 1.200989007949829, "learning_rate": 9.22621747867421e-06, "loss": 0.0279, "step": 64640 }, { "epoch": 0.5231005744801359, "grad_norm": 0.6570940017700195, "learning_rate": 9.225840111745894e-06, "loss": 0.0296, "step": 64650 }, { "epoch": 0.5231814871753379, "grad_norm": 0.47564154863357544, "learning_rate": 9.22546266054212e-06, "loss": 0.0336, "step": 64660 }, { "epoch": 0.5232623998705397, "grad_norm": 0.6314670443534851, "learning_rate": 9.225085125070411e-06, "loss": 0.0258, "step": 64670 }, { "epoch": 0.5233433125657415, "grad_norm": 0.20441186428070068, "learning_rate": 9.224707505338302e-06, "loss": 0.0444, "step": 64680 }, { "epoch": 0.5234242252609435, "grad_norm": 0.2541061043739319, "learning_rate": 9.224329801353318e-06, "loss": 0.0362, "step": 64690 }, { "epoch": 0.5235051379561453, "grad_norm": 0.004604958463460207, "learning_rate": 9.223952013122996e-06, "loss": 0.0198, "step": 64700 }, { "epoch": 0.5235860506513472, "grad_norm": 0.036427728831768036, "learning_rate": 9.223574140654867e-06, "loss": 0.0283, "step": 64710 }, { "epoch": 0.523666963346549, "grad_norm": 0.4113859236240387, "learning_rate": 9.223196183956468e-06, "loss": 0.0489, "step": 64720 }, { "epoch": 0.523747876041751, "grad_norm": 0.3546857535839081, "learning_rate": 9.222818143035337e-06, "loss": 0.0341, "step": 64730 }, { "epoch": 0.5238287887369528, "grad_norm": 0.5335444211959839, "learning_rate": 9.222440017899012e-06, "loss": 0.0407, "step": 64740 }, { "epoch": 0.5239097014321547, "grad_norm": 0.49053388833999634, "learning_rate": 9.222061808555037e-06, "loss": 0.042, "step": 64750 }, { "epoch": 0.5239906141273566, "grad_norm": 0.5504056811332703, "learning_rate": 9.22168351501095e-06, "loss": 0.0331, "step": 64760 }, { "epoch": 0.5240715268225584, "grad_norm": 0.39355283975601196, "learning_rate": 9.2213051372743e-06, "loss": 0.0446, "step": 64770 }, { "epoch": 0.5241524395177604, "grad_norm": 0.2657284438610077, "learning_rate": 9.220926675352628e-06, "loss": 0.034, "step": 64780 }, { "epoch": 0.5242333522129622, "grad_norm": 0.4905385971069336, "learning_rate": 9.220548129253486e-06, "loss": 0.0389, "step": 64790 }, { "epoch": 0.5243142649081641, "grad_norm": 0.8758964538574219, "learning_rate": 9.220169498984421e-06, "loss": 0.0337, "step": 64800 }, { "epoch": 0.524395177603366, "grad_norm": 0.3564758598804474, "learning_rate": 9.219790784552984e-06, "loss": 0.0273, "step": 64810 }, { "epoch": 0.5244760902985679, "grad_norm": 0.5485759377479553, "learning_rate": 9.21941198596673e-06, "loss": 0.0284, "step": 64820 }, { "epoch": 0.5245570029937697, "grad_norm": 0.8347311615943909, "learning_rate": 9.21903310323321e-06, "loss": 0.0269, "step": 64830 }, { "epoch": 0.5246379156889716, "grad_norm": 0.34416571259498596, "learning_rate": 9.218654136359981e-06, "loss": 0.0324, "step": 64840 }, { "epoch": 0.5247188283841735, "grad_norm": 0.48253077268600464, "learning_rate": 9.218275085354603e-06, "loss": 0.0384, "step": 64850 }, { "epoch": 0.5247997410793753, "grad_norm": 0.8462941646575928, "learning_rate": 9.217895950224633e-06, "loss": 0.0292, "step": 64860 }, { "epoch": 0.5248806537745773, "grad_norm": 0.7260482907295227, "learning_rate": 9.21751673097763e-06, "loss": 0.0379, "step": 64870 }, { "epoch": 0.5249615664697791, "grad_norm": 0.6742706298828125, "learning_rate": 9.217137427621163e-06, "loss": 0.0323, "step": 64880 }, { "epoch": 0.525042479164981, "grad_norm": 0.2633516490459442, "learning_rate": 9.216758040162792e-06, "loss": 0.0338, "step": 64890 }, { "epoch": 0.5251233918601829, "grad_norm": 1.0727711915969849, "learning_rate": 9.216378568610082e-06, "loss": 0.0248, "step": 64900 }, { "epoch": 0.5252043045553847, "grad_norm": 0.5471234917640686, "learning_rate": 9.215999012970604e-06, "loss": 0.0348, "step": 64910 }, { "epoch": 0.5252852172505866, "grad_norm": 0.3721957206726074, "learning_rate": 9.215619373251926e-06, "loss": 0.0314, "step": 64920 }, { "epoch": 0.5253661299457885, "grad_norm": 0.8478249311447144, "learning_rate": 9.21523964946162e-06, "loss": 0.0271, "step": 64930 }, { "epoch": 0.5254470426409904, "grad_norm": 0.5732775926589966, "learning_rate": 9.214859841607257e-06, "loss": 0.0441, "step": 64940 }, { "epoch": 0.5255279553361922, "grad_norm": 0.410632461309433, "learning_rate": 9.214479949696414e-06, "loss": 0.0209, "step": 64950 }, { "epoch": 0.5256088680313942, "grad_norm": 0.24670886993408203, "learning_rate": 9.214099973736663e-06, "loss": 0.0282, "step": 64960 }, { "epoch": 0.525689780726596, "grad_norm": 0.4849179685115814, "learning_rate": 9.213719913735586e-06, "loss": 0.0255, "step": 64970 }, { "epoch": 0.5257706934217978, "grad_norm": 0.5214430689811707, "learning_rate": 9.21333976970076e-06, "loss": 0.0298, "step": 64980 }, { "epoch": 0.5258516061169998, "grad_norm": 0.5190179944038391, "learning_rate": 9.212959541639768e-06, "loss": 0.0351, "step": 64990 }, { "epoch": 0.5259325188122016, "grad_norm": 1.1479277610778809, "learning_rate": 9.21257922956019e-06, "loss": 0.0289, "step": 65000 }, { "epoch": 0.5260134315074035, "grad_norm": 0.535960853099823, "learning_rate": 9.212198833469615e-06, "loss": 0.0557, "step": 65010 }, { "epoch": 0.5260943442026054, "grad_norm": 0.32235249876976013, "learning_rate": 9.211818353375625e-06, "loss": 0.0311, "step": 65020 }, { "epoch": 0.5261752568978073, "grad_norm": 0.24619771540164948, "learning_rate": 9.21143778928581e-06, "loss": 0.0446, "step": 65030 }, { "epoch": 0.5262561695930091, "grad_norm": 0.4183743894100189, "learning_rate": 9.211057141207758e-06, "loss": 0.0352, "step": 65040 }, { "epoch": 0.526337082288211, "grad_norm": 0.5011370182037354, "learning_rate": 9.210676409149063e-06, "loss": 0.038, "step": 65050 }, { "epoch": 0.5264179949834129, "grad_norm": 0.4669400453567505, "learning_rate": 9.210295593117315e-06, "loss": 0.0401, "step": 65060 }, { "epoch": 0.5264989076786147, "grad_norm": 0.3571552038192749, "learning_rate": 9.20991469312011e-06, "loss": 0.038, "step": 65070 }, { "epoch": 0.5265798203738167, "grad_norm": 0.5070969462394714, "learning_rate": 9.209533709165044e-06, "loss": 0.0374, "step": 65080 }, { "epoch": 0.5266607330690185, "grad_norm": 0.4766428768634796, "learning_rate": 9.209152641259715e-06, "loss": 0.041, "step": 65090 }, { "epoch": 0.5267416457642204, "grad_norm": 0.32084921002388, "learning_rate": 9.208771489411722e-06, "loss": 0.0338, "step": 65100 }, { "epoch": 0.5268225584594223, "grad_norm": 0.4779093563556671, "learning_rate": 9.208390253628667e-06, "loss": 0.0394, "step": 65110 }, { "epoch": 0.5269034711546242, "grad_norm": 0.17460928857326508, "learning_rate": 9.208008933918154e-06, "loss": 0.0264, "step": 65120 }, { "epoch": 0.526984383849826, "grad_norm": 0.28774574398994446, "learning_rate": 9.207627530287784e-06, "loss": 0.0358, "step": 65130 }, { "epoch": 0.5270652965450279, "grad_norm": 0.3920704126358032, "learning_rate": 9.207246042745167e-06, "loss": 0.0269, "step": 65140 }, { "epoch": 0.5271462092402298, "grad_norm": 0.46852853894233704, "learning_rate": 9.206864471297909e-06, "loss": 0.0436, "step": 65150 }, { "epoch": 0.5272271219354316, "grad_norm": 0.2527843117713928, "learning_rate": 9.206482815953621e-06, "loss": 0.0353, "step": 65160 }, { "epoch": 0.5273080346306336, "grad_norm": 1.3924503326416016, "learning_rate": 9.206101076719912e-06, "loss": 0.0496, "step": 65170 }, { "epoch": 0.5273889473258354, "grad_norm": 0.6833755373954773, "learning_rate": 9.205719253604398e-06, "loss": 0.0454, "step": 65180 }, { "epoch": 0.5274698600210374, "grad_norm": 0.4655592739582062, "learning_rate": 9.20533734661469e-06, "loss": 0.0366, "step": 65190 }, { "epoch": 0.5275507727162392, "grad_norm": 0.4462772011756897, "learning_rate": 9.20495535575841e-06, "loss": 0.0339, "step": 65200 }, { "epoch": 0.527631685411441, "grad_norm": 0.3315804898738861, "learning_rate": 9.20457328104317e-06, "loss": 0.0413, "step": 65210 }, { "epoch": 0.527712598106643, "grad_norm": 0.9007467031478882, "learning_rate": 9.204191122476593e-06, "loss": 0.0452, "step": 65220 }, { "epoch": 0.5277935108018448, "grad_norm": 1.0378376245498657, "learning_rate": 9.203808880066299e-06, "loss": 0.0425, "step": 65230 }, { "epoch": 0.5278744234970467, "grad_norm": 0.1399003118276596, "learning_rate": 9.203426553819913e-06, "loss": 0.0487, "step": 65240 }, { "epoch": 0.5279553361922485, "grad_norm": 0.6624507904052734, "learning_rate": 9.203044143745057e-06, "loss": 0.0317, "step": 65250 }, { "epoch": 0.5280362488874505, "grad_norm": 0.776495099067688, "learning_rate": 9.202661649849358e-06, "loss": 0.0385, "step": 65260 }, { "epoch": 0.5281171615826523, "grad_norm": 0.3699140250682831, "learning_rate": 9.202279072140445e-06, "loss": 0.0348, "step": 65270 }, { "epoch": 0.5281980742778541, "grad_norm": 0.5121055245399475, "learning_rate": 9.201896410625948e-06, "loss": 0.0277, "step": 65280 }, { "epoch": 0.5282789869730561, "grad_norm": 0.6227521300315857, "learning_rate": 9.201513665313498e-06, "loss": 0.0336, "step": 65290 }, { "epoch": 0.5283598996682579, "grad_norm": 0.49749135971069336, "learning_rate": 9.201130836210725e-06, "loss": 0.0234, "step": 65300 }, { "epoch": 0.5284408123634599, "grad_norm": 4.35449743270874, "learning_rate": 9.200747923325268e-06, "loss": 0.0304, "step": 65310 }, { "epoch": 0.5285217250586617, "grad_norm": 0.6104267835617065, "learning_rate": 9.200364926664762e-06, "loss": 0.0347, "step": 65320 }, { "epoch": 0.5286026377538636, "grad_norm": 0.3298393785953522, "learning_rate": 9.199981846236846e-06, "loss": 0.0265, "step": 65330 }, { "epoch": 0.5286835504490655, "grad_norm": 0.44290420413017273, "learning_rate": 9.199598682049158e-06, "loss": 0.0444, "step": 65340 }, { "epoch": 0.5287644631442673, "grad_norm": 0.6689687967300415, "learning_rate": 9.199215434109338e-06, "loss": 0.041, "step": 65350 }, { "epoch": 0.5288453758394692, "grad_norm": 0.5766149759292603, "learning_rate": 9.198832102425035e-06, "loss": 0.0425, "step": 65360 }, { "epoch": 0.528926288534671, "grad_norm": 0.3135486841201782, "learning_rate": 9.198448687003886e-06, "loss": 0.0333, "step": 65370 }, { "epoch": 0.529007201229873, "grad_norm": 0.6362804770469666, "learning_rate": 9.198065187853544e-06, "loss": 0.039, "step": 65380 }, { "epoch": 0.5290881139250748, "grad_norm": 0.6139477491378784, "learning_rate": 9.197681604981654e-06, "loss": 0.0225, "step": 65390 }, { "epoch": 0.5291690266202768, "grad_norm": 1.160611629486084, "learning_rate": 9.197297938395865e-06, "loss": 0.0297, "step": 65400 }, { "epoch": 0.5292499393154786, "grad_norm": 0.5473204255104065, "learning_rate": 9.196914188103829e-06, "loss": 0.036, "step": 65410 }, { "epoch": 0.5293308520106805, "grad_norm": 0.4886363446712494, "learning_rate": 9.1965303541132e-06, "loss": 0.0235, "step": 65420 }, { "epoch": 0.5294117647058824, "grad_norm": 0.49736958742141724, "learning_rate": 9.196146436431635e-06, "loss": 0.0366, "step": 65430 }, { "epoch": 0.5294926774010842, "grad_norm": 0.4843723773956299, "learning_rate": 9.195762435066785e-06, "loss": 0.0457, "step": 65440 }, { "epoch": 0.5295735900962861, "grad_norm": 0.5293253660202026, "learning_rate": 9.195378350026311e-06, "loss": 0.0415, "step": 65450 }, { "epoch": 0.529654502791488, "grad_norm": 0.5321829319000244, "learning_rate": 9.194994181317873e-06, "loss": 0.0429, "step": 65460 }, { "epoch": 0.5297354154866899, "grad_norm": 0.5567139387130737, "learning_rate": 9.194609928949132e-06, "loss": 0.0266, "step": 65470 }, { "epoch": 0.5298163281818917, "grad_norm": 0.3230293393135071, "learning_rate": 9.194225592927753e-06, "loss": 0.041, "step": 65480 }, { "epoch": 0.5298972408770937, "grad_norm": 0.3822772204875946, "learning_rate": 9.193841173261396e-06, "loss": 0.0517, "step": 65490 }, { "epoch": 0.5299781535722955, "grad_norm": 0.9635123610496521, "learning_rate": 9.193456669957732e-06, "loss": 0.0353, "step": 65500 }, { "epoch": 0.5300590662674973, "grad_norm": 0.23692166805267334, "learning_rate": 9.193072083024428e-06, "loss": 0.0292, "step": 65510 }, { "epoch": 0.5301399789626993, "grad_norm": 0.22414696216583252, "learning_rate": 9.192687412469152e-06, "loss": 0.058, "step": 65520 }, { "epoch": 0.5302208916579011, "grad_norm": 0.5200417637825012, "learning_rate": 9.192302658299577e-06, "loss": 0.0301, "step": 65530 }, { "epoch": 0.530301804353103, "grad_norm": 0.2988351881504059, "learning_rate": 9.191917820523376e-06, "loss": 0.022, "step": 65540 }, { "epoch": 0.5303827170483049, "grad_norm": 1.686163306236267, "learning_rate": 9.191532899148224e-06, "loss": 0.0375, "step": 65550 }, { "epoch": 0.5304636297435068, "grad_norm": 0.629723310470581, "learning_rate": 9.191147894181797e-06, "loss": 0.0471, "step": 65560 }, { "epoch": 0.5305445424387086, "grad_norm": 0.5507803559303284, "learning_rate": 9.190762805631772e-06, "loss": 0.0386, "step": 65570 }, { "epoch": 0.5306254551339105, "grad_norm": 0.9463150501251221, "learning_rate": 9.190377633505831e-06, "loss": 0.0363, "step": 65580 }, { "epoch": 0.5307063678291124, "grad_norm": 0.17149056494235992, "learning_rate": 9.189992377811655e-06, "loss": 0.0332, "step": 65590 }, { "epoch": 0.5307872805243142, "grad_norm": 0.6751666069030762, "learning_rate": 9.189607038556925e-06, "loss": 0.0332, "step": 65600 }, { "epoch": 0.5308681932195162, "grad_norm": 0.5792956948280334, "learning_rate": 9.189221615749328e-06, "loss": 0.0396, "step": 65610 }, { "epoch": 0.530949105914718, "grad_norm": 0.4891863763332367, "learning_rate": 9.18883610939655e-06, "loss": 0.033, "step": 65620 }, { "epoch": 0.5310300186099199, "grad_norm": 0.5841913223266602, "learning_rate": 9.188450519506277e-06, "loss": 0.041, "step": 65630 }, { "epoch": 0.5311109313051218, "grad_norm": 0.6486959457397461, "learning_rate": 9.188064846086202e-06, "loss": 0.0491, "step": 65640 }, { "epoch": 0.5311918440003236, "grad_norm": 0.5219584703445435, "learning_rate": 9.187679089144017e-06, "loss": 0.0405, "step": 65650 }, { "epoch": 0.5312727566955255, "grad_norm": 0.5213175415992737, "learning_rate": 9.187293248687411e-06, "loss": 0.0328, "step": 65660 }, { "epoch": 0.5313536693907274, "grad_norm": 0.2720926105976105, "learning_rate": 9.186907324724081e-06, "loss": 0.0277, "step": 65670 }, { "epoch": 0.5314345820859293, "grad_norm": 0.4379120171070099, "learning_rate": 9.186521317261723e-06, "loss": 0.0361, "step": 65680 }, { "epoch": 0.5315154947811311, "grad_norm": 0.9222554564476013, "learning_rate": 9.186135226308038e-06, "loss": 0.0387, "step": 65690 }, { "epoch": 0.5315964074763331, "grad_norm": 0.6220611333847046, "learning_rate": 9.18574905187072e-06, "loss": 0.0432, "step": 65700 }, { "epoch": 0.5316773201715349, "grad_norm": 0.5583272576332092, "learning_rate": 9.185362793957474e-06, "loss": 0.04, "step": 65710 }, { "epoch": 0.5317582328667368, "grad_norm": 0.4023865759372711, "learning_rate": 9.184976452576004e-06, "loss": 0.0387, "step": 65720 }, { "epoch": 0.5318391455619387, "grad_norm": 0.6025776863098145, "learning_rate": 9.184590027734013e-06, "loss": 0.047, "step": 65730 }, { "epoch": 0.5319200582571405, "grad_norm": 0.8915601372718811, "learning_rate": 9.184203519439208e-06, "loss": 0.0315, "step": 65740 }, { "epoch": 0.5320009709523424, "grad_norm": 0.6112809181213379, "learning_rate": 9.183816927699299e-06, "loss": 0.0333, "step": 65750 }, { "epoch": 0.5320818836475443, "grad_norm": 0.4936208426952362, "learning_rate": 9.183430252521992e-06, "loss": 0.0304, "step": 65760 }, { "epoch": 0.5321627963427462, "grad_norm": 1.0494232177734375, "learning_rate": 9.183043493914999e-06, "loss": 0.0315, "step": 65770 }, { "epoch": 0.532243709037948, "grad_norm": 1.3288395404815674, "learning_rate": 9.182656651886037e-06, "loss": 0.0324, "step": 65780 }, { "epoch": 0.53232462173315, "grad_norm": 1.4847819805145264, "learning_rate": 9.182269726442817e-06, "loss": 0.0407, "step": 65790 }, { "epoch": 0.5324055344283518, "grad_norm": 0.5746082067489624, "learning_rate": 9.181882717593054e-06, "loss": 0.047, "step": 65800 }, { "epoch": 0.5324864471235536, "grad_norm": 0.4308507740497589, "learning_rate": 9.18149562534447e-06, "loss": 0.0341, "step": 65810 }, { "epoch": 0.5325673598187556, "grad_norm": 0.2517021596431732, "learning_rate": 9.181108449704785e-06, "loss": 0.0308, "step": 65820 }, { "epoch": 0.5326482725139574, "grad_norm": 0.4241960048675537, "learning_rate": 9.180721190681718e-06, "loss": 0.033, "step": 65830 }, { "epoch": 0.5327291852091594, "grad_norm": 0.6308040022850037, "learning_rate": 9.180333848282991e-06, "loss": 0.0315, "step": 65840 }, { "epoch": 0.5328100979043612, "grad_norm": 0.3194485008716583, "learning_rate": 9.179946422516332e-06, "loss": 0.025, "step": 65850 }, { "epoch": 0.5328910105995631, "grad_norm": 0.39057525992393494, "learning_rate": 9.179558913389467e-06, "loss": 0.0264, "step": 65860 }, { "epoch": 0.532971923294765, "grad_norm": 0.6220933198928833, "learning_rate": 9.17917132091012e-06, "loss": 0.038, "step": 65870 }, { "epoch": 0.5330528359899668, "grad_norm": 0.23935149610042572, "learning_rate": 9.178783645086026e-06, "loss": 0.0346, "step": 65880 }, { "epoch": 0.5331337486851687, "grad_norm": 0.4811650216579437, "learning_rate": 9.178395885924914e-06, "loss": 0.0324, "step": 65890 }, { "epoch": 0.5332146613803705, "grad_norm": 0.42413803935050964, "learning_rate": 9.178008043434516e-06, "loss": 0.0264, "step": 65900 }, { "epoch": 0.5332955740755725, "grad_norm": 0.3624875247478485, "learning_rate": 9.177620117622568e-06, "loss": 0.0444, "step": 65910 }, { "epoch": 0.5333764867707743, "grad_norm": 0.8278326988220215, "learning_rate": 9.177232108496806e-06, "loss": 0.0301, "step": 65920 }, { "epoch": 0.5334573994659763, "grad_norm": 0.46254459023475647, "learning_rate": 9.176844016064968e-06, "loss": 0.0319, "step": 65930 }, { "epoch": 0.5335383121611781, "grad_norm": 0.4301152229309082, "learning_rate": 9.176455840334795e-06, "loss": 0.0281, "step": 65940 }, { "epoch": 0.5336192248563799, "grad_norm": 0.36108487844467163, "learning_rate": 9.176067581314026e-06, "loss": 0.0308, "step": 65950 }, { "epoch": 0.5337001375515819, "grad_norm": 0.5537393093109131, "learning_rate": 9.175679239010404e-06, "loss": 0.0338, "step": 65960 }, { "epoch": 0.5337810502467837, "grad_norm": 0.5838145017623901, "learning_rate": 9.175290813431676e-06, "loss": 0.0333, "step": 65970 }, { "epoch": 0.5338619629419856, "grad_norm": 0.24661053717136383, "learning_rate": 9.174902304585589e-06, "loss": 0.0481, "step": 65980 }, { "epoch": 0.5339428756371875, "grad_norm": 0.6748096942901611, "learning_rate": 9.174513712479887e-06, "loss": 0.0423, "step": 65990 }, { "epoch": 0.5340237883323894, "grad_norm": 0.2514978051185608, "learning_rate": 9.174125037122321e-06, "loss": 0.0271, "step": 66000 }, { "epoch": 0.5341047010275912, "grad_norm": 0.4618684947490692, "learning_rate": 9.173736278520647e-06, "loss": 0.0287, "step": 66010 }, { "epoch": 0.534185613722793, "grad_norm": 0.6681525707244873, "learning_rate": 9.173347436682609e-06, "loss": 0.0232, "step": 66020 }, { "epoch": 0.534266526417995, "grad_norm": 0.3065597116947174, "learning_rate": 9.172958511615971e-06, "loss": 0.0359, "step": 66030 }, { "epoch": 0.5343474391131968, "grad_norm": 0.5848621129989624, "learning_rate": 9.172569503328483e-06, "loss": 0.0234, "step": 66040 }, { "epoch": 0.5344283518083988, "grad_norm": 0.3424667418003082, "learning_rate": 9.172180411827904e-06, "loss": 0.0378, "step": 66050 }, { "epoch": 0.5345092645036006, "grad_norm": 0.3534364700317383, "learning_rate": 9.171791237121996e-06, "loss": 0.0225, "step": 66060 }, { "epoch": 0.5345901771988025, "grad_norm": 0.29051199555397034, "learning_rate": 9.171401979218518e-06, "loss": 0.0342, "step": 66070 }, { "epoch": 0.5346710898940044, "grad_norm": 0.22517411410808563, "learning_rate": 9.171012638125234e-06, "loss": 0.0356, "step": 66080 }, { "epoch": 0.5347520025892063, "grad_norm": 0.7338076233863831, "learning_rate": 9.17062321384991e-06, "loss": 0.0163, "step": 66090 }, { "epoch": 0.5348329152844081, "grad_norm": 0.8910619020462036, "learning_rate": 9.170233706400309e-06, "loss": 0.0424, "step": 66100 }, { "epoch": 0.53491382797961, "grad_norm": 0.7037370800971985, "learning_rate": 9.169844115784198e-06, "loss": 0.0389, "step": 66110 }, { "epoch": 0.5349947406748119, "grad_norm": 0.6980148553848267, "learning_rate": 9.169454442009353e-06, "loss": 0.0204, "step": 66120 }, { "epoch": 0.5350756533700137, "grad_norm": 0.2883698642253876, "learning_rate": 9.16906468508354e-06, "loss": 0.044, "step": 66130 }, { "epoch": 0.5351565660652157, "grad_norm": 0.7344366908073425, "learning_rate": 9.168674845014534e-06, "loss": 0.0335, "step": 66140 }, { "epoch": 0.5352374787604175, "grad_norm": 0.39454758167266846, "learning_rate": 9.168284921810106e-06, "loss": 0.028, "step": 66150 }, { "epoch": 0.5353183914556194, "grad_norm": 0.4536597728729248, "learning_rate": 9.167894915478037e-06, "loss": 0.0302, "step": 66160 }, { "epoch": 0.5353993041508213, "grad_norm": 0.36729562282562256, "learning_rate": 9.167504826026104e-06, "loss": 0.0236, "step": 66170 }, { "epoch": 0.5354802168460231, "grad_norm": 0.6025574803352356, "learning_rate": 9.167114653462082e-06, "loss": 0.0372, "step": 66180 }, { "epoch": 0.535561129541225, "grad_norm": 0.6140444874763489, "learning_rate": 9.166724397793759e-06, "loss": 0.0435, "step": 66190 }, { "epoch": 0.5356420422364269, "grad_norm": 0.3389762043952942, "learning_rate": 9.16633405902891e-06, "loss": 0.029, "step": 66200 }, { "epoch": 0.5357229549316288, "grad_norm": 0.22416731715202332, "learning_rate": 9.165943637175328e-06, "loss": 0.0485, "step": 66210 }, { "epoch": 0.5358038676268306, "grad_norm": 0.7625681757926941, "learning_rate": 9.165553132240792e-06, "loss": 0.0384, "step": 66220 }, { "epoch": 0.5358847803220326, "grad_norm": 0.9213724136352539, "learning_rate": 9.165162544233093e-06, "loss": 0.0275, "step": 66230 }, { "epoch": 0.5359656930172344, "grad_norm": 0.427197128534317, "learning_rate": 9.16477187316002e-06, "loss": 0.0389, "step": 66240 }, { "epoch": 0.5360466057124362, "grad_norm": 0.7512705326080322, "learning_rate": 9.164381119029365e-06, "loss": 0.0221, "step": 66250 }, { "epoch": 0.5361275184076382, "grad_norm": 0.25884607434272766, "learning_rate": 9.16399028184892e-06, "loss": 0.0228, "step": 66260 }, { "epoch": 0.53620843110284, "grad_norm": 0.5345240831375122, "learning_rate": 9.163599361626479e-06, "loss": 0.0296, "step": 66270 }, { "epoch": 0.5362893437980419, "grad_norm": 0.5932343602180481, "learning_rate": 9.163208358369839e-06, "loss": 0.0386, "step": 66280 }, { "epoch": 0.5363702564932438, "grad_norm": 0.7491304874420166, "learning_rate": 9.162817272086796e-06, "loss": 0.0385, "step": 66290 }, { "epoch": 0.5364511691884457, "grad_norm": 0.28211939334869385, "learning_rate": 9.162426102785151e-06, "loss": 0.0426, "step": 66300 }, { "epoch": 0.5365320818836475, "grad_norm": 0.6028282642364502, "learning_rate": 9.162034850472705e-06, "loss": 0.038, "step": 66310 }, { "epoch": 0.5366129945788494, "grad_norm": 0.3162485361099243, "learning_rate": 9.161643515157261e-06, "loss": 0.0497, "step": 66320 }, { "epoch": 0.5366939072740513, "grad_norm": 0.3434392213821411, "learning_rate": 9.161252096846622e-06, "loss": 0.0418, "step": 66330 }, { "epoch": 0.5367748199692531, "grad_norm": 0.5949680805206299, "learning_rate": 9.160860595548595e-06, "loss": 0.0365, "step": 66340 }, { "epoch": 0.5368557326644551, "grad_norm": 0.5143424272537231, "learning_rate": 9.160469011270986e-06, "loss": 0.0483, "step": 66350 }, { "epoch": 0.5369366453596569, "grad_norm": 0.4381442666053772, "learning_rate": 9.16007734402161e-06, "loss": 0.0324, "step": 66360 }, { "epoch": 0.5370175580548588, "grad_norm": 0.5167180299758911, "learning_rate": 9.159685593808269e-06, "loss": 0.0268, "step": 66370 }, { "epoch": 0.5370984707500607, "grad_norm": 0.7274981141090393, "learning_rate": 9.159293760638781e-06, "loss": 0.0279, "step": 66380 }, { "epoch": 0.5371793834452626, "grad_norm": 1.0450375080108643, "learning_rate": 9.158901844520962e-06, "loss": 0.0391, "step": 66390 }, { "epoch": 0.5372602961404644, "grad_norm": 0.6186323761940002, "learning_rate": 9.158509845462623e-06, "loss": 0.0531, "step": 66400 }, { "epoch": 0.5373412088356663, "grad_norm": 0.3383715748786926, "learning_rate": 9.158117763471586e-06, "loss": 0.0183, "step": 66410 }, { "epoch": 0.5374221215308682, "grad_norm": 0.14365065097808838, "learning_rate": 9.157725598555667e-06, "loss": 0.0241, "step": 66420 }, { "epoch": 0.53750303422607, "grad_norm": 0.597239077091217, "learning_rate": 9.157333350722687e-06, "loss": 0.048, "step": 66430 }, { "epoch": 0.537583946921272, "grad_norm": 0.0929512083530426, "learning_rate": 9.156941019980475e-06, "loss": 0.0387, "step": 66440 }, { "epoch": 0.5376648596164738, "grad_norm": 0.34100982546806335, "learning_rate": 9.156548606336845e-06, "loss": 0.0439, "step": 66450 }, { "epoch": 0.5377457723116758, "grad_norm": 0.1290767639875412, "learning_rate": 9.15615610979963e-06, "loss": 0.0376, "step": 66460 }, { "epoch": 0.5378266850068776, "grad_norm": 0.6431620121002197, "learning_rate": 9.155763530376656e-06, "loss": 0.0336, "step": 66470 }, { "epoch": 0.5379075977020794, "grad_norm": 1.174468994140625, "learning_rate": 9.15537086807575e-06, "loss": 0.0402, "step": 66480 }, { "epoch": 0.5379885103972813, "grad_norm": 0.33369699120521545, "learning_rate": 9.154978122904746e-06, "loss": 0.0197, "step": 66490 }, { "epoch": 0.5380694230924832, "grad_norm": 1.0676215887069702, "learning_rate": 9.154585294871472e-06, "loss": 0.0493, "step": 66500 }, { "epoch": 0.5381503357876851, "grad_norm": 0.6355984210968018, "learning_rate": 9.154192383983768e-06, "loss": 0.0355, "step": 66510 }, { "epoch": 0.538231248482887, "grad_norm": 0.32066503167152405, "learning_rate": 9.153799390249466e-06, "loss": 0.0325, "step": 66520 }, { "epoch": 0.5383121611780889, "grad_norm": 0.32078802585601807, "learning_rate": 9.153406313676405e-06, "loss": 0.0316, "step": 66530 }, { "epoch": 0.5383930738732907, "grad_norm": 0.6446868181228638, "learning_rate": 9.15301315427242e-06, "loss": 0.0373, "step": 66540 }, { "epoch": 0.5384739865684925, "grad_norm": 0.7715731263160706, "learning_rate": 9.152619912045358e-06, "loss": 0.0424, "step": 66550 }, { "epoch": 0.5385548992636945, "grad_norm": 0.37417957186698914, "learning_rate": 9.15222658700306e-06, "loss": 0.0277, "step": 66560 }, { "epoch": 0.5386358119588963, "grad_norm": 0.3410152196884155, "learning_rate": 9.151833179153365e-06, "loss": 0.0289, "step": 66570 }, { "epoch": 0.5387167246540983, "grad_norm": 0.679611086845398, "learning_rate": 9.151439688504123e-06, "loss": 0.0245, "step": 66580 }, { "epoch": 0.5387976373493001, "grad_norm": 0.41287100315093994, "learning_rate": 9.15104611506318e-06, "loss": 0.0343, "step": 66590 }, { "epoch": 0.538878550044502, "grad_norm": 1.1571253538131714, "learning_rate": 9.150652458838387e-06, "loss": 0.0309, "step": 66600 }, { "epoch": 0.5389594627397039, "grad_norm": 0.585947573184967, "learning_rate": 9.150258719837592e-06, "loss": 0.0385, "step": 66610 }, { "epoch": 0.5390403754349057, "grad_norm": 0.5390203595161438, "learning_rate": 9.14986489806865e-06, "loss": 0.0555, "step": 66620 }, { "epoch": 0.5391212881301076, "grad_norm": 0.08528990298509598, "learning_rate": 9.14947099353941e-06, "loss": 0.0221, "step": 66630 }, { "epoch": 0.5392022008253095, "grad_norm": 0.6830422282218933, "learning_rate": 9.149077006257734e-06, "loss": 0.0484, "step": 66640 }, { "epoch": 0.5392831135205114, "grad_norm": 0.4171191453933716, "learning_rate": 9.148682936231476e-06, "loss": 0.0258, "step": 66650 }, { "epoch": 0.5393640262157132, "grad_norm": 0.6781580448150635, "learning_rate": 9.148288783468495e-06, "loss": 0.0363, "step": 66660 }, { "epoch": 0.5394449389109152, "grad_norm": 0.46388643980026245, "learning_rate": 9.147894547976652e-06, "loss": 0.0318, "step": 66670 }, { "epoch": 0.539525851606117, "grad_norm": 0.3867722451686859, "learning_rate": 9.147500229763808e-06, "loss": 0.0185, "step": 66680 }, { "epoch": 0.5396067643013189, "grad_norm": 0.4937193691730499, "learning_rate": 9.147105828837827e-06, "loss": 0.0327, "step": 66690 }, { "epoch": 0.5396876769965208, "grad_norm": 0.7103104591369629, "learning_rate": 9.146711345206575e-06, "loss": 0.0537, "step": 66700 }, { "epoch": 0.5397685896917226, "grad_norm": 0.49242275953292847, "learning_rate": 9.146316778877921e-06, "loss": 0.0458, "step": 66710 }, { "epoch": 0.5398495023869245, "grad_norm": 0.44105812907218933, "learning_rate": 9.145922129859731e-06, "loss": 0.0325, "step": 66720 }, { "epoch": 0.5399304150821264, "grad_norm": 0.36211955547332764, "learning_rate": 9.145527398159876e-06, "loss": 0.0298, "step": 66730 }, { "epoch": 0.5400113277773283, "grad_norm": 0.3374095857143402, "learning_rate": 9.14513258378623e-06, "loss": 0.0255, "step": 66740 }, { "epoch": 0.5400922404725301, "grad_norm": 0.6346596479415894, "learning_rate": 9.144737686746665e-06, "loss": 0.0465, "step": 66750 }, { "epoch": 0.5401731531677321, "grad_norm": 0.32377955317497253, "learning_rate": 9.144342707049057e-06, "loss": 0.0243, "step": 66760 }, { "epoch": 0.5402540658629339, "grad_norm": 0.48318779468536377, "learning_rate": 9.143947644701282e-06, "loss": 0.0202, "step": 66770 }, { "epoch": 0.5403349785581357, "grad_norm": 0.5236507654190063, "learning_rate": 9.14355249971122e-06, "loss": 0.0222, "step": 66780 }, { "epoch": 0.5404158912533377, "grad_norm": 0.3127056956291199, "learning_rate": 9.14315727208675e-06, "loss": 0.0258, "step": 66790 }, { "epoch": 0.5404968039485395, "grad_norm": 0.3870270848274231, "learning_rate": 9.142761961835756e-06, "loss": 0.0532, "step": 66800 }, { "epoch": 0.5405777166437414, "grad_norm": 0.5133875608444214, "learning_rate": 9.14236656896612e-06, "loss": 0.0465, "step": 66810 }, { "epoch": 0.5406586293389433, "grad_norm": 0.4597620368003845, "learning_rate": 9.14197109348573e-06, "loss": 0.0307, "step": 66820 }, { "epoch": 0.5407395420341452, "grad_norm": 0.216817706823349, "learning_rate": 9.141575535402466e-06, "loss": 0.029, "step": 66830 }, { "epoch": 0.540820454729347, "grad_norm": 0.6108351945877075, "learning_rate": 9.141179894724223e-06, "loss": 0.0217, "step": 66840 }, { "epoch": 0.5409013674245489, "grad_norm": 0.7574178576469421, "learning_rate": 9.14078417145889e-06, "loss": 0.0369, "step": 66850 }, { "epoch": 0.5409822801197508, "grad_norm": 0.5320644974708557, "learning_rate": 9.140388365614359e-06, "loss": 0.0355, "step": 66860 }, { "epoch": 0.5410631928149526, "grad_norm": 0.40941545367240906, "learning_rate": 9.139992477198522e-06, "loss": 0.0276, "step": 66870 }, { "epoch": 0.5411441055101546, "grad_norm": 0.5349811911582947, "learning_rate": 9.139596506219275e-06, "loss": 0.0215, "step": 66880 }, { "epoch": 0.5412250182053564, "grad_norm": 0.3089902102947235, "learning_rate": 9.139200452684513e-06, "loss": 0.0391, "step": 66890 }, { "epoch": 0.5413059309005583, "grad_norm": 0.5749519467353821, "learning_rate": 9.13880431660214e-06, "loss": 0.0304, "step": 66900 }, { "epoch": 0.5413868435957602, "grad_norm": 0.751663327217102, "learning_rate": 9.138408097980049e-06, "loss": 0.0323, "step": 66910 }, { "epoch": 0.541467756290962, "grad_norm": 0.41758519411087036, "learning_rate": 9.138011796826146e-06, "loss": 0.0519, "step": 66920 }, { "epoch": 0.5415486689861639, "grad_norm": 0.7219429016113281, "learning_rate": 9.137615413148334e-06, "loss": 0.0586, "step": 66930 }, { "epoch": 0.5416295816813658, "grad_norm": 0.3331977427005768, "learning_rate": 9.137218946954517e-06, "loss": 0.0291, "step": 66940 }, { "epoch": 0.5417104943765677, "grad_norm": 0.44518840312957764, "learning_rate": 9.136822398252603e-06, "loss": 0.039, "step": 66950 }, { "epoch": 0.5417914070717695, "grad_norm": 0.4642280042171478, "learning_rate": 9.136425767050499e-06, "loss": 0.0256, "step": 66960 }, { "epoch": 0.5418723197669715, "grad_norm": 0.4784908890724182, "learning_rate": 9.136029053356114e-06, "loss": 0.0477, "step": 66970 }, { "epoch": 0.5419532324621733, "grad_norm": 0.4639012813568115, "learning_rate": 9.135632257177363e-06, "loss": 0.0351, "step": 66980 }, { "epoch": 0.5420341451573752, "grad_norm": 0.21774174273014069, "learning_rate": 9.135235378522157e-06, "loss": 0.0469, "step": 66990 }, { "epoch": 0.5421150578525771, "grad_norm": 0.8886056542396545, "learning_rate": 9.13483841739841e-06, "loss": 0.0485, "step": 67000 }, { "epoch": 0.5421959705477789, "grad_norm": 0.7203881144523621, "learning_rate": 9.134441373814043e-06, "loss": 0.0284, "step": 67010 }, { "epoch": 0.5422768832429808, "grad_norm": 0.610262930393219, "learning_rate": 9.134044247776967e-06, "loss": 0.0346, "step": 67020 }, { "epoch": 0.5423577959381827, "grad_norm": 0.45141690969467163, "learning_rate": 9.133647039295109e-06, "loss": 0.0396, "step": 67030 }, { "epoch": 0.5424387086333846, "grad_norm": 0.2152472734451294, "learning_rate": 9.133249748376386e-06, "loss": 0.0347, "step": 67040 }, { "epoch": 0.5425196213285864, "grad_norm": 0.5073646306991577, "learning_rate": 9.132852375028722e-06, "loss": 0.0378, "step": 67050 }, { "epoch": 0.5426005340237884, "grad_norm": 0.6453728675842285, "learning_rate": 9.132454919260042e-06, "loss": 0.0639, "step": 67060 }, { "epoch": 0.5426814467189902, "grad_norm": 0.3927623927593231, "learning_rate": 9.132057381078272e-06, "loss": 0.037, "step": 67070 }, { "epoch": 0.542762359414192, "grad_norm": 0.27655601501464844, "learning_rate": 9.131659760491343e-06, "loss": 0.0304, "step": 67080 }, { "epoch": 0.542843272109394, "grad_norm": 0.36368727684020996, "learning_rate": 9.131262057507182e-06, "loss": 0.0562, "step": 67090 }, { "epoch": 0.5429241848045958, "grad_norm": 0.41202670335769653, "learning_rate": 9.13086427213372e-06, "loss": 0.0313, "step": 67100 }, { "epoch": 0.5430050974997978, "grad_norm": 0.59066241979599, "learning_rate": 9.130466404378891e-06, "loss": 0.0324, "step": 67110 }, { "epoch": 0.5430860101949996, "grad_norm": 0.3666548728942871, "learning_rate": 9.130068454250629e-06, "loss": 0.0343, "step": 67120 }, { "epoch": 0.5431669228902015, "grad_norm": 0.5713330507278442, "learning_rate": 9.12967042175687e-06, "loss": 0.0234, "step": 67130 }, { "epoch": 0.5432478355854033, "grad_norm": 0.2924724519252777, "learning_rate": 9.129272306905554e-06, "loss": 0.0373, "step": 67140 }, { "epoch": 0.5433287482806052, "grad_norm": 0.5651755928993225, "learning_rate": 9.128874109704618e-06, "loss": 0.0353, "step": 67150 }, { "epoch": 0.5434096609758071, "grad_norm": 0.7331447601318359, "learning_rate": 9.128475830162005e-06, "loss": 0.0365, "step": 67160 }, { "epoch": 0.543490573671009, "grad_norm": 0.3147222101688385, "learning_rate": 9.128077468285657e-06, "loss": 0.0285, "step": 67170 }, { "epoch": 0.5435714863662109, "grad_norm": 0.5486667156219482, "learning_rate": 9.127679024083518e-06, "loss": 0.0253, "step": 67180 }, { "epoch": 0.5436523990614127, "grad_norm": 0.5360991954803467, "learning_rate": 9.127280497563535e-06, "loss": 0.056, "step": 67190 }, { "epoch": 0.5437333117566147, "grad_norm": 0.4132169485092163, "learning_rate": 9.126881888733657e-06, "loss": 0.04, "step": 67200 }, { "epoch": 0.5438142244518165, "grad_norm": 0.607385516166687, "learning_rate": 9.12648319760183e-06, "loss": 0.0467, "step": 67210 }, { "epoch": 0.5438951371470183, "grad_norm": 0.11566343158483505, "learning_rate": 9.126084424176008e-06, "loss": 0.0414, "step": 67220 }, { "epoch": 0.5439760498422203, "grad_norm": 0.7634284496307373, "learning_rate": 9.125685568464142e-06, "loss": 0.0289, "step": 67230 }, { "epoch": 0.5440569625374221, "grad_norm": 0.6555607914924622, "learning_rate": 9.125286630474189e-06, "loss": 0.0427, "step": 67240 }, { "epoch": 0.544137875232624, "grad_norm": 0.5135276317596436, "learning_rate": 9.124887610214101e-06, "loss": 0.0231, "step": 67250 }, { "epoch": 0.5442187879278259, "grad_norm": 0.2627089023590088, "learning_rate": 9.124488507691837e-06, "loss": 0.0335, "step": 67260 }, { "epoch": 0.5442997006230278, "grad_norm": 1.9110238552093506, "learning_rate": 9.12408932291536e-06, "loss": 0.0369, "step": 67270 }, { "epoch": 0.5443806133182296, "grad_norm": 0.9015327095985413, "learning_rate": 9.123690055892624e-06, "loss": 0.0488, "step": 67280 }, { "epoch": 0.5444615260134316, "grad_norm": 0.17263129353523254, "learning_rate": 9.123290706631597e-06, "loss": 0.0279, "step": 67290 }, { "epoch": 0.5445424387086334, "grad_norm": 0.6137683987617493, "learning_rate": 9.122891275140244e-06, "loss": 0.026, "step": 67300 }, { "epoch": 0.5446233514038352, "grad_norm": 0.1983359009027481, "learning_rate": 9.122491761426526e-06, "loss": 0.0289, "step": 67310 }, { "epoch": 0.5447042640990372, "grad_norm": 0.38401055335998535, "learning_rate": 9.122092165498413e-06, "loss": 0.0361, "step": 67320 }, { "epoch": 0.544785176794239, "grad_norm": 0.47710639238357544, "learning_rate": 9.121692487363873e-06, "loss": 0.027, "step": 67330 }, { "epoch": 0.5448660894894409, "grad_norm": 0.29722440242767334, "learning_rate": 9.12129272703088e-06, "loss": 0.0308, "step": 67340 }, { "epoch": 0.5449470021846428, "grad_norm": 0.4721442759037018, "learning_rate": 9.1208928845074e-06, "loss": 0.0299, "step": 67350 }, { "epoch": 0.5450279148798447, "grad_norm": 0.5374523997306824, "learning_rate": 9.120492959801415e-06, "loss": 0.0357, "step": 67360 }, { "epoch": 0.5451088275750465, "grad_norm": 0.23509041965007782, "learning_rate": 9.120092952920894e-06, "loss": 0.0419, "step": 67370 }, { "epoch": 0.5451897402702484, "grad_norm": 0.4075503349304199, "learning_rate": 9.119692863873819e-06, "loss": 0.0297, "step": 67380 }, { "epoch": 0.5452706529654503, "grad_norm": 0.8079149723052979, "learning_rate": 9.119292692668166e-06, "loss": 0.0328, "step": 67390 }, { "epoch": 0.5453515656606521, "grad_norm": 0.4613029658794403, "learning_rate": 9.118892439311917e-06, "loss": 0.0312, "step": 67400 }, { "epoch": 0.5454324783558541, "grad_norm": 0.7280117273330688, "learning_rate": 9.118492103813053e-06, "loss": 0.0415, "step": 67410 }, { "epoch": 0.5455133910510559, "grad_norm": 0.30517667531967163, "learning_rate": 9.118091686179558e-06, "loss": 0.0441, "step": 67420 }, { "epoch": 0.5455943037462578, "grad_norm": 0.3526233434677124, "learning_rate": 9.117691186419418e-06, "loss": 0.0486, "step": 67430 }, { "epoch": 0.5456752164414597, "grad_norm": 0.29825183749198914, "learning_rate": 9.11729060454062e-06, "loss": 0.0292, "step": 67440 }, { "epoch": 0.5457561291366615, "grad_norm": 2.1538772583007812, "learning_rate": 9.116889940551155e-06, "loss": 0.0419, "step": 67450 }, { "epoch": 0.5458370418318634, "grad_norm": 0.3795783519744873, "learning_rate": 9.11648919445901e-06, "loss": 0.0343, "step": 67460 }, { "epoch": 0.5459179545270653, "grad_norm": 0.8457138538360596, "learning_rate": 9.116088366272177e-06, "loss": 0.0357, "step": 67470 }, { "epoch": 0.5459988672222672, "grad_norm": 0.12591829895973206, "learning_rate": 9.115687455998653e-06, "loss": 0.0241, "step": 67480 }, { "epoch": 0.546079779917469, "grad_norm": 0.5485312938690186, "learning_rate": 9.11528646364643e-06, "loss": 0.0385, "step": 67490 }, { "epoch": 0.546160692612671, "grad_norm": 0.24397599697113037, "learning_rate": 9.114885389223507e-06, "loss": 0.0221, "step": 67500 }, { "epoch": 0.5462416053078728, "grad_norm": 0.3262820541858673, "learning_rate": 9.114484232737881e-06, "loss": 0.0405, "step": 67510 }, { "epoch": 0.5463225180030746, "grad_norm": 0.202842116355896, "learning_rate": 9.114082994197553e-06, "loss": 0.0378, "step": 67520 }, { "epoch": 0.5464034306982766, "grad_norm": 0.42849671840667725, "learning_rate": 9.113681673610526e-06, "loss": 0.0281, "step": 67530 }, { "epoch": 0.5464843433934784, "grad_norm": 0.6635915040969849, "learning_rate": 9.113280270984801e-06, "loss": 0.0223, "step": 67540 }, { "epoch": 0.5465652560886803, "grad_norm": 0.32708272337913513, "learning_rate": 9.112878786328387e-06, "loss": 0.0517, "step": 67550 }, { "epoch": 0.5466461687838822, "grad_norm": 0.5013366937637329, "learning_rate": 9.112477219649286e-06, "loss": 0.0375, "step": 67560 }, { "epoch": 0.5467270814790841, "grad_norm": 0.4741608202457428, "learning_rate": 9.11207557095551e-06, "loss": 0.0297, "step": 67570 }, { "epoch": 0.5468079941742859, "grad_norm": 0.39891794323921204, "learning_rate": 9.111673840255067e-06, "loss": 0.0344, "step": 67580 }, { "epoch": 0.5468889068694878, "grad_norm": 0.6768478155136108, "learning_rate": 9.11127202755597e-06, "loss": 0.0258, "step": 67590 }, { "epoch": 0.5469698195646897, "grad_norm": 0.6974939703941345, "learning_rate": 9.11087013286623e-06, "loss": 0.0277, "step": 67600 }, { "epoch": 0.5470507322598915, "grad_norm": 0.4079199433326721, "learning_rate": 9.110468156193864e-06, "loss": 0.0226, "step": 67610 }, { "epoch": 0.5471316449550935, "grad_norm": 0.21607829630374908, "learning_rate": 9.11006609754689e-06, "loss": 0.0219, "step": 67620 }, { "epoch": 0.5472125576502953, "grad_norm": 0.28231221437454224, "learning_rate": 9.109663956933323e-06, "loss": 0.0306, "step": 67630 }, { "epoch": 0.5472934703454972, "grad_norm": 0.43953806161880493, "learning_rate": 9.109261734361185e-06, "loss": 0.0352, "step": 67640 }, { "epoch": 0.5473743830406991, "grad_norm": 1.0136327743530273, "learning_rate": 9.108859429838495e-06, "loss": 0.0324, "step": 67650 }, { "epoch": 0.547455295735901, "grad_norm": 0.8410176634788513, "learning_rate": 9.10845704337328e-06, "loss": 0.0437, "step": 67660 }, { "epoch": 0.5475362084311028, "grad_norm": 0.651059627532959, "learning_rate": 9.108054574973562e-06, "loss": 0.0324, "step": 67670 }, { "epoch": 0.5476171211263047, "grad_norm": 0.4356220066547394, "learning_rate": 9.107652024647367e-06, "loss": 0.0338, "step": 67680 }, { "epoch": 0.5476980338215066, "grad_norm": 0.15331727266311646, "learning_rate": 9.107249392402726e-06, "loss": 0.0396, "step": 67690 }, { "epoch": 0.5477789465167084, "grad_norm": 0.27842995524406433, "learning_rate": 9.106846678247665e-06, "loss": 0.026, "step": 67700 }, { "epoch": 0.5478598592119104, "grad_norm": 0.3706917464733124, "learning_rate": 9.106443882190216e-06, "loss": 0.0525, "step": 67710 }, { "epoch": 0.5479407719071122, "grad_norm": 0.5614177584648132, "learning_rate": 9.106041004238415e-06, "loss": 0.032, "step": 67720 }, { "epoch": 0.5480216846023142, "grad_norm": 0.5485674142837524, "learning_rate": 9.105638044400293e-06, "loss": 0.0521, "step": 67730 }, { "epoch": 0.548102597297516, "grad_norm": 0.397299587726593, "learning_rate": 9.10523500268389e-06, "loss": 0.0361, "step": 67740 }, { "epoch": 0.5481835099927178, "grad_norm": 0.6256259083747864, "learning_rate": 9.104831879097238e-06, "loss": 0.0382, "step": 67750 }, { "epoch": 0.5482644226879198, "grad_norm": 0.3614811301231384, "learning_rate": 9.104428673648381e-06, "loss": 0.0432, "step": 67760 }, { "epoch": 0.5483453353831216, "grad_norm": 0.7834634184837341, "learning_rate": 9.10402538634536e-06, "loss": 0.0387, "step": 67770 }, { "epoch": 0.5484262480783235, "grad_norm": 0.36963874101638794, "learning_rate": 9.103622017196216e-06, "loss": 0.0382, "step": 67780 }, { "epoch": 0.5485071607735253, "grad_norm": 0.9929757714271545, "learning_rate": 9.103218566208996e-06, "loss": 0.0611, "step": 67790 }, { "epoch": 0.5485880734687273, "grad_norm": 0.8558392524719238, "learning_rate": 9.102815033391741e-06, "loss": 0.0334, "step": 67800 }, { "epoch": 0.5486689861639291, "grad_norm": 0.41132596135139465, "learning_rate": 9.102411418752504e-06, "loss": 0.0259, "step": 67810 }, { "epoch": 0.548749898859131, "grad_norm": 0.5823701024055481, "learning_rate": 9.10200772229933e-06, "loss": 0.0374, "step": 67820 }, { "epoch": 0.5488308115543329, "grad_norm": 0.5441784262657166, "learning_rate": 9.10160394404027e-06, "loss": 0.0358, "step": 67830 }, { "epoch": 0.5489117242495347, "grad_norm": 0.3415418267250061, "learning_rate": 9.101200083983381e-06, "loss": 0.0305, "step": 67840 }, { "epoch": 0.5489926369447367, "grad_norm": 0.3870795667171478, "learning_rate": 9.100796142136714e-06, "loss": 0.0399, "step": 67850 }, { "epoch": 0.5490735496399385, "grad_norm": 0.8663846850395203, "learning_rate": 9.100392118508325e-06, "loss": 0.0263, "step": 67860 }, { "epoch": 0.5491544623351404, "grad_norm": 0.5717524886131287, "learning_rate": 9.099988013106271e-06, "loss": 0.0369, "step": 67870 }, { "epoch": 0.5492353750303423, "grad_norm": 0.16134002804756165, "learning_rate": 9.099583825938613e-06, "loss": 0.0267, "step": 67880 }, { "epoch": 0.5493162877255441, "grad_norm": 0.5694897174835205, "learning_rate": 9.09917955701341e-06, "loss": 0.031, "step": 67890 }, { "epoch": 0.549397200420746, "grad_norm": 0.8670839071273804, "learning_rate": 9.098775206338723e-06, "loss": 0.0257, "step": 67900 }, { "epoch": 0.5494781131159479, "grad_norm": 0.2889086604118347, "learning_rate": 9.098370773922619e-06, "loss": 0.044, "step": 67910 }, { "epoch": 0.5495590258111498, "grad_norm": 0.2547105550765991, "learning_rate": 9.097966259773159e-06, "loss": 0.0341, "step": 67920 }, { "epoch": 0.5496399385063516, "grad_norm": 0.3374498188495636, "learning_rate": 9.097561663898416e-06, "loss": 0.0287, "step": 67930 }, { "epoch": 0.5497208512015536, "grad_norm": 0.5501031875610352, "learning_rate": 9.097156986306454e-06, "loss": 0.0428, "step": 67940 }, { "epoch": 0.5498017638967554, "grad_norm": 0.5970540642738342, "learning_rate": 9.096752227005348e-06, "loss": 0.0322, "step": 67950 }, { "epoch": 0.5498826765919573, "grad_norm": 0.6925133466720581, "learning_rate": 9.096347386003165e-06, "loss": 0.0405, "step": 67960 }, { "epoch": 0.5499635892871592, "grad_norm": 0.2993180453777313, "learning_rate": 9.095942463307983e-06, "loss": 0.0279, "step": 67970 }, { "epoch": 0.550044501982361, "grad_norm": 0.3070315420627594, "learning_rate": 9.095537458927874e-06, "loss": 0.0469, "step": 67980 }, { "epoch": 0.5501254146775629, "grad_norm": 0.5948125123977661, "learning_rate": 9.095132372870916e-06, "loss": 0.0525, "step": 67990 }, { "epoch": 0.5502063273727648, "grad_norm": 0.4713818430900574, "learning_rate": 9.09472720514519e-06, "loss": 0.0282, "step": 68000 }, { "epoch": 0.5502872400679667, "grad_norm": 0.4117797017097473, "learning_rate": 9.094321955758773e-06, "loss": 0.0393, "step": 68010 }, { "epoch": 0.5503681527631685, "grad_norm": 0.7032948732376099, "learning_rate": 9.093916624719748e-06, "loss": 0.0409, "step": 68020 }, { "epoch": 0.5504490654583705, "grad_norm": 0.504386842250824, "learning_rate": 9.0935112120362e-06, "loss": 0.0388, "step": 68030 }, { "epoch": 0.5505299781535723, "grad_norm": 0.20345576107501984, "learning_rate": 9.09310571771621e-06, "loss": 0.0257, "step": 68040 }, { "epoch": 0.5506108908487741, "grad_norm": 0.5064550638198853, "learning_rate": 9.092700141767869e-06, "loss": 0.0343, "step": 68050 }, { "epoch": 0.5506918035439761, "grad_norm": 0.2335605025291443, "learning_rate": 9.092294484199264e-06, "loss": 0.0247, "step": 68060 }, { "epoch": 0.5507727162391779, "grad_norm": 0.49494004249572754, "learning_rate": 9.091888745018485e-06, "loss": 0.0252, "step": 68070 }, { "epoch": 0.5508536289343798, "grad_norm": 0.631428062915802, "learning_rate": 9.091482924233624e-06, "loss": 0.0348, "step": 68080 }, { "epoch": 0.5509345416295817, "grad_norm": 0.6831995844841003, "learning_rate": 9.091077021852771e-06, "loss": 0.037, "step": 68090 }, { "epoch": 0.5510154543247836, "grad_norm": 0.601091206073761, "learning_rate": 9.090671037884024e-06, "loss": 0.0342, "step": 68100 }, { "epoch": 0.5510963670199854, "grad_norm": 0.8431522846221924, "learning_rate": 9.090264972335481e-06, "loss": 0.0388, "step": 68110 }, { "epoch": 0.5511772797151873, "grad_norm": 0.3849159777164459, "learning_rate": 9.089858825215236e-06, "loss": 0.0382, "step": 68120 }, { "epoch": 0.5512581924103892, "grad_norm": 0.5241828560829163, "learning_rate": 9.089452596531392e-06, "loss": 0.0363, "step": 68130 }, { "epoch": 0.551339105105591, "grad_norm": 0.46157127618789673, "learning_rate": 9.089046286292049e-06, "loss": 0.0403, "step": 68140 }, { "epoch": 0.551420017800793, "grad_norm": 0.29627764225006104, "learning_rate": 9.088639894505308e-06, "loss": 0.0309, "step": 68150 }, { "epoch": 0.5515009304959948, "grad_norm": 0.39561963081359863, "learning_rate": 9.08823342117928e-06, "loss": 0.0312, "step": 68160 }, { "epoch": 0.5515818431911967, "grad_norm": 0.4185087978839874, "learning_rate": 9.087826866322065e-06, "loss": 0.042, "step": 68170 }, { "epoch": 0.5516627558863986, "grad_norm": 0.4639281928539276, "learning_rate": 9.087420229941773e-06, "loss": 0.0378, "step": 68180 }, { "epoch": 0.5517436685816004, "grad_norm": 0.8052830100059509, "learning_rate": 9.087013512046513e-06, "loss": 0.045, "step": 68190 }, { "epoch": 0.5518245812768023, "grad_norm": 0.8973074555397034, "learning_rate": 9.086606712644396e-06, "loss": 0.0419, "step": 68200 }, { "epoch": 0.5519054939720042, "grad_norm": 0.3397654592990875, "learning_rate": 9.086199831743537e-06, "loss": 0.0347, "step": 68210 }, { "epoch": 0.5519864066672061, "grad_norm": 0.6276511549949646, "learning_rate": 9.085792869352045e-06, "loss": 0.0499, "step": 68220 }, { "epoch": 0.5520673193624079, "grad_norm": 0.4084128141403198, "learning_rate": 9.085385825478044e-06, "loss": 0.0291, "step": 68230 }, { "epoch": 0.5521482320576099, "grad_norm": 0.7224814295768738, "learning_rate": 9.084978700129647e-06, "loss": 0.0331, "step": 68240 }, { "epoch": 0.5522291447528117, "grad_norm": 0.34200477600097656, "learning_rate": 9.08457149331497e-06, "loss": 0.0562, "step": 68250 }, { "epoch": 0.5523100574480136, "grad_norm": 0.49796608090400696, "learning_rate": 9.084164205042141e-06, "loss": 0.0341, "step": 68260 }, { "epoch": 0.5523909701432155, "grad_norm": 0.9118832945823669, "learning_rate": 9.083756835319278e-06, "loss": 0.0516, "step": 68270 }, { "epoch": 0.5524718828384173, "grad_norm": 0.4858223795890808, "learning_rate": 9.083349384154506e-06, "loss": 0.0344, "step": 68280 }, { "epoch": 0.5525527955336192, "grad_norm": 0.6686352491378784, "learning_rate": 9.08294185155595e-06, "loss": 0.0353, "step": 68290 }, { "epoch": 0.5526337082288211, "grad_norm": 0.3060348331928253, "learning_rate": 9.08253423753174e-06, "loss": 0.0419, "step": 68300 }, { "epoch": 0.552714620924023, "grad_norm": 0.3218047320842743, "learning_rate": 9.082126542090002e-06, "loss": 0.0328, "step": 68310 }, { "epoch": 0.5527955336192248, "grad_norm": 0.6599363088607788, "learning_rate": 9.08171876523887e-06, "loss": 0.0394, "step": 68320 }, { "epoch": 0.5528764463144268, "grad_norm": 0.7454625964164734, "learning_rate": 9.08131090698647e-06, "loss": 0.0386, "step": 68330 }, { "epoch": 0.5529573590096286, "grad_norm": 0.30565738677978516, "learning_rate": 9.080902967340943e-06, "loss": 0.0581, "step": 68340 }, { "epoch": 0.5530382717048304, "grad_norm": 0.3551323413848877, "learning_rate": 9.08049494631042e-06, "loss": 0.0322, "step": 68350 }, { "epoch": 0.5531191844000324, "grad_norm": 0.4692799150943756, "learning_rate": 9.080086843903042e-06, "loss": 0.0363, "step": 68360 }, { "epoch": 0.5532000970952342, "grad_norm": 0.5698545575141907, "learning_rate": 9.079678660126944e-06, "loss": 0.023, "step": 68370 }, { "epoch": 0.5532810097904362, "grad_norm": 0.5620313286781311, "learning_rate": 9.079270394990267e-06, "loss": 0.0324, "step": 68380 }, { "epoch": 0.553361922485638, "grad_norm": 0.5696518421173096, "learning_rate": 9.078862048501156e-06, "loss": 0.0541, "step": 68390 }, { "epoch": 0.5534428351808399, "grad_norm": 0.7294344305992126, "learning_rate": 9.078453620667749e-06, "loss": 0.0411, "step": 68400 }, { "epoch": 0.5535237478760417, "grad_norm": 0.49124300479888916, "learning_rate": 9.078045111498196e-06, "loss": 0.0386, "step": 68410 }, { "epoch": 0.5536046605712436, "grad_norm": 0.49372345209121704, "learning_rate": 9.077636521000643e-06, "loss": 0.0347, "step": 68420 }, { "epoch": 0.5536855732664455, "grad_norm": 0.6778461933135986, "learning_rate": 9.077227849183236e-06, "loss": 0.0359, "step": 68430 }, { "epoch": 0.5537664859616473, "grad_norm": 0.12202990800142288, "learning_rate": 9.076819096054129e-06, "loss": 0.0262, "step": 68440 }, { "epoch": 0.5538473986568493, "grad_norm": 0.517997145652771, "learning_rate": 9.076410261621471e-06, "loss": 0.0267, "step": 68450 }, { "epoch": 0.5539283113520511, "grad_norm": 0.4417012929916382, "learning_rate": 9.076001345893414e-06, "loss": 0.0385, "step": 68460 }, { "epoch": 0.5540092240472531, "grad_norm": 0.23340387642383575, "learning_rate": 9.075592348878119e-06, "loss": 0.0383, "step": 68470 }, { "epoch": 0.5540901367424549, "grad_norm": 0.40329253673553467, "learning_rate": 9.075183270583736e-06, "loss": 0.0327, "step": 68480 }, { "epoch": 0.5541710494376567, "grad_norm": 0.8403027653694153, "learning_rate": 9.074774111018427e-06, "loss": 0.032, "step": 68490 }, { "epoch": 0.5542519621328587, "grad_norm": 0.4696289002895355, "learning_rate": 9.07436487019035e-06, "loss": 0.029, "step": 68500 }, { "epoch": 0.5543328748280605, "grad_norm": 0.742924153804779, "learning_rate": 9.073955548107666e-06, "loss": 0.0251, "step": 68510 }, { "epoch": 0.5544137875232624, "grad_norm": 0.4640920162200928, "learning_rate": 9.07354614477854e-06, "loss": 0.0326, "step": 68520 }, { "epoch": 0.5544947002184643, "grad_norm": 0.9077101945877075, "learning_rate": 9.073136660211135e-06, "loss": 0.0382, "step": 68530 }, { "epoch": 0.5545756129136662, "grad_norm": 0.5046470761299133, "learning_rate": 9.072727094413619e-06, "loss": 0.0368, "step": 68540 }, { "epoch": 0.554656525608868, "grad_norm": 0.4942171275615692, "learning_rate": 9.072317447394159e-06, "loss": 0.0196, "step": 68550 }, { "epoch": 0.55473743830407, "grad_norm": 0.3012109696865082, "learning_rate": 9.071907719160924e-06, "loss": 0.0325, "step": 68560 }, { "epoch": 0.5548183509992718, "grad_norm": 0.5357542037963867, "learning_rate": 9.071497909722087e-06, "loss": 0.0327, "step": 68570 }, { "epoch": 0.5548992636944736, "grad_norm": 0.2840805649757385, "learning_rate": 9.071088019085818e-06, "loss": 0.0271, "step": 68580 }, { "epoch": 0.5549801763896756, "grad_norm": 0.5888369679450989, "learning_rate": 9.070678047260294e-06, "loss": 0.0378, "step": 68590 }, { "epoch": 0.5550610890848774, "grad_norm": 0.7059540748596191, "learning_rate": 9.07026799425369e-06, "loss": 0.0357, "step": 68600 }, { "epoch": 0.5551420017800793, "grad_norm": 0.3742124140262604, "learning_rate": 9.069857860074183e-06, "loss": 0.0379, "step": 68610 }, { "epoch": 0.5552229144752812, "grad_norm": 0.39799460768699646, "learning_rate": 9.069447644729951e-06, "loss": 0.0181, "step": 68620 }, { "epoch": 0.5553038271704831, "grad_norm": 0.321973979473114, "learning_rate": 9.069037348229179e-06, "loss": 0.0386, "step": 68630 }, { "epoch": 0.5553847398656849, "grad_norm": 0.6431016325950623, "learning_rate": 9.068626970580047e-06, "loss": 0.0336, "step": 68640 }, { "epoch": 0.5554656525608868, "grad_norm": 0.12922216951847076, "learning_rate": 9.06821651179074e-06, "loss": 0.0172, "step": 68650 }, { "epoch": 0.5555465652560887, "grad_norm": 0.504917323589325, "learning_rate": 9.06780597186944e-06, "loss": 0.0343, "step": 68660 }, { "epoch": 0.5556274779512905, "grad_norm": 0.48456329107284546, "learning_rate": 9.06739535082434e-06, "loss": 0.0262, "step": 68670 }, { "epoch": 0.5557083906464925, "grad_norm": 0.13385041058063507, "learning_rate": 9.066984648663627e-06, "loss": 0.0362, "step": 68680 }, { "epoch": 0.5557893033416943, "grad_norm": 0.8760583996772766, "learning_rate": 9.06657386539549e-06, "loss": 0.0298, "step": 68690 }, { "epoch": 0.5558702160368962, "grad_norm": 0.09387803077697754, "learning_rate": 9.066163001028122e-06, "loss": 0.0465, "step": 68700 }, { "epoch": 0.5559511287320981, "grad_norm": 0.6177266836166382, "learning_rate": 9.065752055569716e-06, "loss": 0.0491, "step": 68710 }, { "epoch": 0.5560320414272999, "grad_norm": 0.5666190385818481, "learning_rate": 9.065341029028471e-06, "loss": 0.0402, "step": 68720 }, { "epoch": 0.5561129541225018, "grad_norm": 0.5259706974029541, "learning_rate": 9.06492992141258e-06, "loss": 0.0321, "step": 68730 }, { "epoch": 0.5561938668177037, "grad_norm": 0.46716126799583435, "learning_rate": 9.064518732730242e-06, "loss": 0.0311, "step": 68740 }, { "epoch": 0.5562747795129056, "grad_norm": 0.3451445996761322, "learning_rate": 9.06410746298966e-06, "loss": 0.0311, "step": 68750 }, { "epoch": 0.5563556922081074, "grad_norm": 0.4776672422885895, "learning_rate": 9.063696112199035e-06, "loss": 0.0463, "step": 68760 }, { "epoch": 0.5564366049033094, "grad_norm": 0.5395891666412354, "learning_rate": 9.063284680366567e-06, "loss": 0.0288, "step": 68770 }, { "epoch": 0.5565175175985112, "grad_norm": 1.2266775369644165, "learning_rate": 9.062873167500466e-06, "loss": 0.0453, "step": 68780 }, { "epoch": 0.556598430293713, "grad_norm": 0.4493676424026489, "learning_rate": 9.062461573608937e-06, "loss": 0.0379, "step": 68790 }, { "epoch": 0.556679342988915, "grad_norm": 1.211166501045227, "learning_rate": 9.062049898700188e-06, "loss": 0.0487, "step": 68800 }, { "epoch": 0.5567602556841168, "grad_norm": 0.4360429346561432, "learning_rate": 9.06163814278243e-06, "loss": 0.0354, "step": 68810 }, { "epoch": 0.5568411683793187, "grad_norm": 0.5142750144004822, "learning_rate": 9.061226305863873e-06, "loss": 0.0321, "step": 68820 }, { "epoch": 0.5569220810745206, "grad_norm": 0.3873843848705292, "learning_rate": 9.06081438795273e-06, "loss": 0.0344, "step": 68830 }, { "epoch": 0.5570029937697225, "grad_norm": 0.3828169107437134, "learning_rate": 9.060402389057216e-06, "loss": 0.033, "step": 68840 }, { "epoch": 0.5570839064649243, "grad_norm": 0.39404356479644775, "learning_rate": 9.05999030918555e-06, "loss": 0.0384, "step": 68850 }, { "epoch": 0.5571648191601263, "grad_norm": 0.22831767797470093, "learning_rate": 9.059578148345949e-06, "loss": 0.0322, "step": 68860 }, { "epoch": 0.5572457318553281, "grad_norm": 0.08694548904895782, "learning_rate": 9.05916590654663e-06, "loss": 0.042, "step": 68870 }, { "epoch": 0.5573266445505299, "grad_norm": 0.0906902328133583, "learning_rate": 9.058753583795817e-06, "loss": 0.0248, "step": 68880 }, { "epoch": 0.5574075572457319, "grad_norm": 0.29823338985443115, "learning_rate": 9.058341180101732e-06, "loss": 0.0314, "step": 68890 }, { "epoch": 0.5574884699409337, "grad_norm": 0.8502461910247803, "learning_rate": 9.0579286954726e-06, "loss": 0.0252, "step": 68900 }, { "epoch": 0.5575693826361356, "grad_norm": 0.40726813673973083, "learning_rate": 9.057516129916648e-06, "loss": 0.0254, "step": 68910 }, { "epoch": 0.5576502953313375, "grad_norm": 0.5296417474746704, "learning_rate": 9.057103483442101e-06, "loss": 0.0374, "step": 68920 }, { "epoch": 0.5577312080265394, "grad_norm": 0.39651501178741455, "learning_rate": 9.05669075605719e-06, "loss": 0.0662, "step": 68930 }, { "epoch": 0.5578121207217412, "grad_norm": 0.4756004214286804, "learning_rate": 9.056277947770148e-06, "loss": 0.021, "step": 68940 }, { "epoch": 0.5578930334169431, "grad_norm": 0.6865118145942688, "learning_rate": 9.055865058589205e-06, "loss": 0.0467, "step": 68950 }, { "epoch": 0.557973946112145, "grad_norm": 0.6716071367263794, "learning_rate": 9.055452088522592e-06, "loss": 0.0409, "step": 68960 }, { "epoch": 0.5580548588073468, "grad_norm": 0.40723469853401184, "learning_rate": 9.055039037578553e-06, "loss": 0.0391, "step": 68970 }, { "epoch": 0.5581357715025488, "grad_norm": 0.4544210433959961, "learning_rate": 9.054625905765317e-06, "loss": 0.0386, "step": 68980 }, { "epoch": 0.5582166841977506, "grad_norm": 0.2631799578666687, "learning_rate": 9.054212693091131e-06, "loss": 0.0383, "step": 68990 }, { "epoch": 0.5582975968929526, "grad_norm": 0.4904896020889282, "learning_rate": 9.053799399564229e-06, "loss": 0.027, "step": 69000 }, { "epoch": 0.5583785095881544, "grad_norm": 0.8290961980819702, "learning_rate": 9.053386025192856e-06, "loss": 0.0468, "step": 69010 }, { "epoch": 0.5584594222833562, "grad_norm": 0.8343337774276733, "learning_rate": 9.052972569985256e-06, "loss": 0.055, "step": 69020 }, { "epoch": 0.5585403349785582, "grad_norm": 0.5214290618896484, "learning_rate": 9.052559033949677e-06, "loss": 0.0216, "step": 69030 }, { "epoch": 0.55862124767376, "grad_norm": 0.4048716425895691, "learning_rate": 9.05214541709436e-06, "loss": 0.042, "step": 69040 }, { "epoch": 0.5587021603689619, "grad_norm": 0.4559285044670105, "learning_rate": 9.051731719427559e-06, "loss": 0.031, "step": 69050 }, { "epoch": 0.5587830730641637, "grad_norm": 0.5540633201599121, "learning_rate": 9.051317940957521e-06, "loss": 0.0394, "step": 69060 }, { "epoch": 0.5588639857593657, "grad_norm": 0.3675471544265747, "learning_rate": 9.0509040816925e-06, "loss": 0.0235, "step": 69070 }, { "epoch": 0.5589448984545675, "grad_norm": 0.5145134329795837, "learning_rate": 9.05049014164075e-06, "loss": 0.0257, "step": 69080 }, { "epoch": 0.5590258111497693, "grad_norm": 0.9530137777328491, "learning_rate": 9.050076120810523e-06, "loss": 0.0474, "step": 69090 }, { "epoch": 0.5591067238449713, "grad_norm": 0.18810473382472992, "learning_rate": 9.049662019210079e-06, "loss": 0.0251, "step": 69100 }, { "epoch": 0.5591876365401731, "grad_norm": 0.6177288889884949, "learning_rate": 9.049247836847676e-06, "loss": 0.0187, "step": 69110 }, { "epoch": 0.5592685492353751, "grad_norm": 0.2712673544883728, "learning_rate": 9.048833573731572e-06, "loss": 0.0343, "step": 69120 }, { "epoch": 0.5593494619305769, "grad_norm": 0.7074629664421082, "learning_rate": 9.04841922987003e-06, "loss": 0.0333, "step": 69130 }, { "epoch": 0.5594303746257788, "grad_norm": 0.4261668026447296, "learning_rate": 9.048004805271314e-06, "loss": 0.0342, "step": 69140 }, { "epoch": 0.5595112873209807, "grad_norm": 0.03319547325372696, "learning_rate": 9.047590299943688e-06, "loss": 0.0366, "step": 69150 }, { "epoch": 0.5595922000161825, "grad_norm": 0.6532831788063049, "learning_rate": 9.047175713895418e-06, "loss": 0.0354, "step": 69160 }, { "epoch": 0.5596731127113844, "grad_norm": 0.3830554783344269, "learning_rate": 9.04676104713477e-06, "loss": 0.0312, "step": 69170 }, { "epoch": 0.5597540254065863, "grad_norm": 0.47555190324783325, "learning_rate": 9.046346299670019e-06, "loss": 0.0359, "step": 69180 }, { "epoch": 0.5598349381017882, "grad_norm": 0.22533860802650452, "learning_rate": 9.045931471509432e-06, "loss": 0.0456, "step": 69190 }, { "epoch": 0.55991585079699, "grad_norm": 0.3087051808834076, "learning_rate": 9.045516562661283e-06, "loss": 0.0247, "step": 69200 }, { "epoch": 0.559996763492192, "grad_norm": 0.4457847476005554, "learning_rate": 9.045101573133848e-06, "loss": 0.0335, "step": 69210 }, { "epoch": 0.5600776761873938, "grad_norm": 0.16854992508888245, "learning_rate": 9.044686502935401e-06, "loss": 0.045, "step": 69220 }, { "epoch": 0.5601585888825957, "grad_norm": 0.4586280882358551, "learning_rate": 9.044271352074218e-06, "loss": 0.0221, "step": 69230 }, { "epoch": 0.5602395015777976, "grad_norm": 0.5113740563392639, "learning_rate": 9.043856120558583e-06, "loss": 0.0592, "step": 69240 }, { "epoch": 0.5603204142729994, "grad_norm": 0.6222679615020752, "learning_rate": 9.043440808396774e-06, "loss": 0.0313, "step": 69250 }, { "epoch": 0.5604013269682013, "grad_norm": 0.14710569381713867, "learning_rate": 9.043025415597076e-06, "loss": 0.0215, "step": 69260 }, { "epoch": 0.5604822396634032, "grad_norm": 0.41159549355506897, "learning_rate": 9.04260994216777e-06, "loss": 0.0414, "step": 69270 }, { "epoch": 0.5605631523586051, "grad_norm": 0.31612110137939453, "learning_rate": 9.042194388117142e-06, "loss": 0.0307, "step": 69280 }, { "epoch": 0.5606440650538069, "grad_norm": 1.0207897424697876, "learning_rate": 9.041778753453483e-06, "loss": 0.0366, "step": 69290 }, { "epoch": 0.5607249777490089, "grad_norm": 0.6105018854141235, "learning_rate": 9.041363038185077e-06, "loss": 0.05, "step": 69300 }, { "epoch": 0.5608058904442107, "grad_norm": 0.3462982475757599, "learning_rate": 9.040947242320218e-06, "loss": 0.021, "step": 69310 }, { "epoch": 0.5608868031394125, "grad_norm": 0.6129575371742249, "learning_rate": 9.040531365867195e-06, "loss": 0.0418, "step": 69320 }, { "epoch": 0.5609677158346145, "grad_norm": 0.5750060081481934, "learning_rate": 9.040115408834307e-06, "loss": 0.047, "step": 69330 }, { "epoch": 0.5610486285298163, "grad_norm": 0.6205376982688904, "learning_rate": 9.039699371229844e-06, "loss": 0.0448, "step": 69340 }, { "epoch": 0.5611295412250182, "grad_norm": 0.7996906042098999, "learning_rate": 9.039283253062106e-06, "loss": 0.0449, "step": 69350 }, { "epoch": 0.5612104539202201, "grad_norm": 0.8801194429397583, "learning_rate": 9.03886705433939e-06, "loss": 0.0421, "step": 69360 }, { "epoch": 0.561291366615422, "grad_norm": 0.3019647002220154, "learning_rate": 9.038450775069999e-06, "loss": 0.0405, "step": 69370 }, { "epoch": 0.5613722793106238, "grad_norm": 0.3068690001964569, "learning_rate": 9.038034415262231e-06, "loss": 0.0183, "step": 69380 }, { "epoch": 0.5614531920058257, "grad_norm": 0.21295851469039917, "learning_rate": 9.037617974924391e-06, "loss": 0.0232, "step": 69390 }, { "epoch": 0.5615341047010276, "grad_norm": 0.8009584546089172, "learning_rate": 9.037201454064786e-06, "loss": 0.0219, "step": 69400 }, { "epoch": 0.5616150173962294, "grad_norm": 0.5113164186477661, "learning_rate": 9.03678485269172e-06, "loss": 0.0289, "step": 69410 }, { "epoch": 0.5616959300914314, "grad_norm": 0.532901406288147, "learning_rate": 9.036368170813502e-06, "loss": 0.0237, "step": 69420 }, { "epoch": 0.5617768427866332, "grad_norm": 0.4746515452861786, "learning_rate": 9.035951408438442e-06, "loss": 0.0409, "step": 69430 }, { "epoch": 0.5618577554818351, "grad_norm": 0.48418128490448, "learning_rate": 9.035534565574853e-06, "loss": 0.023, "step": 69440 }, { "epoch": 0.561938668177037, "grad_norm": 0.16794708371162415, "learning_rate": 9.035117642231044e-06, "loss": 0.0189, "step": 69450 }, { "epoch": 0.5620195808722388, "grad_norm": 0.7054886817932129, "learning_rate": 9.034700638415334e-06, "loss": 0.0448, "step": 69460 }, { "epoch": 0.5621004935674407, "grad_norm": 0.23601728677749634, "learning_rate": 9.034283554136036e-06, "loss": 0.0241, "step": 69470 }, { "epoch": 0.5621814062626426, "grad_norm": 0.2500949501991272, "learning_rate": 9.03386638940147e-06, "loss": 0.0282, "step": 69480 }, { "epoch": 0.5622623189578445, "grad_norm": 0.8960813879966736, "learning_rate": 9.033449144219954e-06, "loss": 0.0464, "step": 69490 }, { "epoch": 0.5623432316530463, "grad_norm": 0.5636031031608582, "learning_rate": 9.033031818599811e-06, "loss": 0.0386, "step": 69500 }, { "epoch": 0.5624241443482483, "grad_norm": 0.3780769109725952, "learning_rate": 9.032614412549363e-06, "loss": 0.036, "step": 69510 }, { "epoch": 0.5625050570434501, "grad_norm": 0.6129667162895203, "learning_rate": 9.032196926076932e-06, "loss": 0.0202, "step": 69520 }, { "epoch": 0.562585969738652, "grad_norm": 0.36695918440818787, "learning_rate": 9.031779359190847e-06, "loss": 0.0302, "step": 69530 }, { "epoch": 0.5626668824338539, "grad_norm": 0.6106850504875183, "learning_rate": 9.031361711899433e-06, "loss": 0.0398, "step": 69540 }, { "epoch": 0.5627477951290557, "grad_norm": 0.7521336674690247, "learning_rate": 9.03094398421102e-06, "loss": 0.0252, "step": 69550 }, { "epoch": 0.5628287078242576, "grad_norm": 0.8679654002189636, "learning_rate": 9.03052617613394e-06, "loss": 0.057, "step": 69560 }, { "epoch": 0.5629096205194595, "grad_norm": 0.5673300623893738, "learning_rate": 9.030108287676523e-06, "loss": 0.041, "step": 69570 }, { "epoch": 0.5629905332146614, "grad_norm": 0.6248674392700195, "learning_rate": 9.029690318847107e-06, "loss": 0.0458, "step": 69580 }, { "epoch": 0.5630714459098632, "grad_norm": 0.5559688806533813, "learning_rate": 9.029272269654021e-06, "loss": 0.0336, "step": 69590 }, { "epoch": 0.5631523586050652, "grad_norm": 0.3992649018764496, "learning_rate": 9.028854140105607e-06, "loss": 0.0315, "step": 69600 }, { "epoch": 0.563233271300267, "grad_norm": 0.313182532787323, "learning_rate": 9.028435930210203e-06, "loss": 0.0209, "step": 69610 }, { "epoch": 0.5633141839954688, "grad_norm": 0.35425442457199097, "learning_rate": 9.02801763997615e-06, "loss": 0.0286, "step": 69620 }, { "epoch": 0.5633950966906708, "grad_norm": 0.5321882963180542, "learning_rate": 9.027599269411787e-06, "loss": 0.0252, "step": 69630 }, { "epoch": 0.5634760093858726, "grad_norm": 0.23495151102542877, "learning_rate": 9.027180818525458e-06, "loss": 0.0334, "step": 69640 }, { "epoch": 0.5635569220810746, "grad_norm": 0.39124223589897156, "learning_rate": 9.026762287325513e-06, "loss": 0.027, "step": 69650 }, { "epoch": 0.5636378347762764, "grad_norm": 0.4603254795074463, "learning_rate": 9.026343675820292e-06, "loss": 0.0387, "step": 69660 }, { "epoch": 0.5637187474714783, "grad_norm": 0.5280678868293762, "learning_rate": 9.025924984018148e-06, "loss": 0.0365, "step": 69670 }, { "epoch": 0.5637996601666801, "grad_norm": 0.34530165791511536, "learning_rate": 9.025506211927428e-06, "loss": 0.0375, "step": 69680 }, { "epoch": 0.563880572861882, "grad_norm": 0.3695819079875946, "learning_rate": 9.025087359556486e-06, "loss": 0.0563, "step": 69690 }, { "epoch": 0.5639614855570839, "grad_norm": 0.4240885376930237, "learning_rate": 9.024668426913671e-06, "loss": 0.0388, "step": 69700 }, { "epoch": 0.5640423982522857, "grad_norm": 0.7462897896766663, "learning_rate": 9.024249414007344e-06, "loss": 0.0402, "step": 69710 }, { "epoch": 0.5641233109474877, "grad_norm": 0.40089893341064453, "learning_rate": 9.023830320845857e-06, "loss": 0.04, "step": 69720 }, { "epoch": 0.5642042236426895, "grad_norm": 0.384389728307724, "learning_rate": 9.02341114743757e-06, "loss": 0.0337, "step": 69730 }, { "epoch": 0.5642851363378915, "grad_norm": 0.6888806819915771, "learning_rate": 9.02299189379084e-06, "loss": 0.0318, "step": 69740 }, { "epoch": 0.5643660490330933, "grad_norm": 0.3334187865257263, "learning_rate": 9.02257255991403e-06, "loss": 0.0373, "step": 69750 }, { "epoch": 0.5644469617282951, "grad_norm": 0.3241056799888611, "learning_rate": 9.022153145815502e-06, "loss": 0.0384, "step": 69760 }, { "epoch": 0.5645278744234971, "grad_norm": 0.5547672510147095, "learning_rate": 9.021733651503622e-06, "loss": 0.0428, "step": 69770 }, { "epoch": 0.5646087871186989, "grad_norm": 0.5393441915512085, "learning_rate": 9.021314076986753e-06, "loss": 0.0387, "step": 69780 }, { "epoch": 0.5646896998139008, "grad_norm": 0.4012198746204376, "learning_rate": 9.020894422273265e-06, "loss": 0.0312, "step": 69790 }, { "epoch": 0.5647706125091027, "grad_norm": 0.5486866235733032, "learning_rate": 9.020474687371526e-06, "loss": 0.0339, "step": 69800 }, { "epoch": 0.5648515252043046, "grad_norm": 0.43759840726852417, "learning_rate": 9.020054872289907e-06, "loss": 0.0272, "step": 69810 }, { "epoch": 0.5649324378995064, "grad_norm": 0.7555981278419495, "learning_rate": 9.01963497703678e-06, "loss": 0.0317, "step": 69820 }, { "epoch": 0.5650133505947084, "grad_norm": 0.544090211391449, "learning_rate": 9.019215001620519e-06, "loss": 0.0383, "step": 69830 }, { "epoch": 0.5650942632899102, "grad_norm": 0.40607205033302307, "learning_rate": 9.0187949460495e-06, "loss": 0.0483, "step": 69840 }, { "epoch": 0.565175175985112, "grad_norm": 0.31461167335510254, "learning_rate": 9.0183748103321e-06, "loss": 0.0271, "step": 69850 }, { "epoch": 0.565256088680314, "grad_norm": 0.3247027099132538, "learning_rate": 9.0179545944767e-06, "loss": 0.0406, "step": 69860 }, { "epoch": 0.5653370013755158, "grad_norm": 0.44310858845710754, "learning_rate": 9.017534298491674e-06, "loss": 0.0326, "step": 69870 }, { "epoch": 0.5654179140707177, "grad_norm": 0.680596649646759, "learning_rate": 9.01711392238541e-06, "loss": 0.035, "step": 69880 }, { "epoch": 0.5654988267659196, "grad_norm": 0.5913251638412476, "learning_rate": 9.016693466166287e-06, "loss": 0.0362, "step": 69890 }, { "epoch": 0.5655797394611215, "grad_norm": 0.15152984857559204, "learning_rate": 9.016272929842694e-06, "loss": 0.0582, "step": 69900 }, { "epoch": 0.5656606521563233, "grad_norm": 1.4259202480316162, "learning_rate": 9.015852313423017e-06, "loss": 0.0419, "step": 69910 }, { "epoch": 0.5657415648515252, "grad_norm": 0.2349715232849121, "learning_rate": 9.015431616915642e-06, "loss": 0.0295, "step": 69920 }, { "epoch": 0.5658224775467271, "grad_norm": 0.2094050794839859, "learning_rate": 9.015010840328961e-06, "loss": 0.0365, "step": 69930 }, { "epoch": 0.5659033902419289, "grad_norm": 0.5449327826499939, "learning_rate": 9.014589983671364e-06, "loss": 0.0324, "step": 69940 }, { "epoch": 0.5659843029371309, "grad_norm": 0.6847491264343262, "learning_rate": 9.014169046951247e-06, "loss": 0.0303, "step": 69950 }, { "epoch": 0.5660652156323327, "grad_norm": 1.6240540742874146, "learning_rate": 9.013748030177002e-06, "loss": 0.0497, "step": 69960 }, { "epoch": 0.5661461283275346, "grad_norm": 0.3822993040084839, "learning_rate": 9.013326933357024e-06, "loss": 0.0244, "step": 69970 }, { "epoch": 0.5662270410227365, "grad_norm": 0.4701804220676422, "learning_rate": 9.012905756499715e-06, "loss": 0.0205, "step": 69980 }, { "epoch": 0.5663079537179383, "grad_norm": 0.8385473489761353, "learning_rate": 9.012484499613472e-06, "loss": 0.035, "step": 69990 }, { "epoch": 0.5663888664131402, "grad_norm": 0.38713282346725464, "learning_rate": 9.012063162706694e-06, "loss": 0.0366, "step": 70000 }, { "epoch": 0.5664697791083421, "grad_norm": 0.4094661772251129, "learning_rate": 9.01164174578779e-06, "loss": 0.0293, "step": 70010 }, { "epoch": 0.566550691803544, "grad_norm": 0.29765617847442627, "learning_rate": 9.011220248865157e-06, "loss": 0.0278, "step": 70020 }, { "epoch": 0.5666316044987458, "grad_norm": 0.5398207306861877, "learning_rate": 9.010798671947205e-06, "loss": 0.0345, "step": 70030 }, { "epoch": 0.5667125171939478, "grad_norm": 0.2238604575395584, "learning_rate": 9.010377015042343e-06, "loss": 0.0257, "step": 70040 }, { "epoch": 0.5667934298891496, "grad_norm": 0.5041121244430542, "learning_rate": 9.009955278158974e-06, "loss": 0.037, "step": 70050 }, { "epoch": 0.5668743425843514, "grad_norm": 0.24736791849136353, "learning_rate": 9.009533461305515e-06, "loss": 0.0359, "step": 70060 }, { "epoch": 0.5669552552795534, "grad_norm": 0.3522040545940399, "learning_rate": 9.009111564490374e-06, "loss": 0.0291, "step": 70070 }, { "epoch": 0.5670361679747552, "grad_norm": 0.2310403436422348, "learning_rate": 9.008689587721968e-06, "loss": 0.0211, "step": 70080 }, { "epoch": 0.5671170806699571, "grad_norm": 0.5375770330429077, "learning_rate": 9.00826753100871e-06, "loss": 0.049, "step": 70090 }, { "epoch": 0.567197993365159, "grad_norm": 0.670727550983429, "learning_rate": 9.007845394359018e-06, "loss": 0.0273, "step": 70100 }, { "epoch": 0.5672789060603609, "grad_norm": 0.3730001449584961, "learning_rate": 9.00742317778131e-06, "loss": 0.0387, "step": 70110 }, { "epoch": 0.5673598187555627, "grad_norm": 0.6576844453811646, "learning_rate": 9.007000881284007e-06, "loss": 0.0276, "step": 70120 }, { "epoch": 0.5674407314507647, "grad_norm": 0.3591878414154053, "learning_rate": 9.006578504875532e-06, "loss": 0.0255, "step": 70130 }, { "epoch": 0.5675216441459665, "grad_norm": 0.5188863277435303, "learning_rate": 9.006156048564305e-06, "loss": 0.0328, "step": 70140 }, { "epoch": 0.5676025568411683, "grad_norm": 0.20280595123767853, "learning_rate": 9.005733512358754e-06, "loss": 0.032, "step": 70150 }, { "epoch": 0.5676834695363703, "grad_norm": 0.40907755494117737, "learning_rate": 9.005310896267306e-06, "loss": 0.0334, "step": 70160 }, { "epoch": 0.5677643822315721, "grad_norm": 0.6969184279441833, "learning_rate": 9.004888200298385e-06, "loss": 0.0304, "step": 70170 }, { "epoch": 0.567845294926774, "grad_norm": 0.7395583987236023, "learning_rate": 9.004465424460425e-06, "loss": 0.037, "step": 70180 }, { "epoch": 0.5679262076219759, "grad_norm": 0.5651132464408875, "learning_rate": 9.004042568761855e-06, "loss": 0.0475, "step": 70190 }, { "epoch": 0.5680071203171778, "grad_norm": 0.5224201083183289, "learning_rate": 9.00361963321111e-06, "loss": 0.0328, "step": 70200 }, { "epoch": 0.5680880330123796, "grad_norm": 0.3920711278915405, "learning_rate": 9.003196617816622e-06, "loss": 0.0252, "step": 70210 }, { "epoch": 0.5681689457075815, "grad_norm": 0.21517403423786163, "learning_rate": 9.00277352258683e-06, "loss": 0.0281, "step": 70220 }, { "epoch": 0.5682498584027834, "grad_norm": 0.32617154717445374, "learning_rate": 9.002350347530171e-06, "loss": 0.0352, "step": 70230 }, { "epoch": 0.5683307710979852, "grad_norm": 0.5557016134262085, "learning_rate": 9.001927092655082e-06, "loss": 0.029, "step": 70240 }, { "epoch": 0.5684116837931872, "grad_norm": 0.15017181634902954, "learning_rate": 9.001503757970008e-06, "loss": 0.0415, "step": 70250 }, { "epoch": 0.568492596488389, "grad_norm": 0.8186126947402954, "learning_rate": 9.001080343483386e-06, "loss": 0.0297, "step": 70260 }, { "epoch": 0.568573509183591, "grad_norm": 0.379121333360672, "learning_rate": 9.000656849203664e-06, "loss": 0.0281, "step": 70270 }, { "epoch": 0.5686544218787928, "grad_norm": 0.4208093285560608, "learning_rate": 9.000233275139287e-06, "loss": 0.0436, "step": 70280 }, { "epoch": 0.5687353345739946, "grad_norm": 0.9904469847679138, "learning_rate": 8.999809621298703e-06, "loss": 0.047, "step": 70290 }, { "epoch": 0.5688162472691966, "grad_norm": 0.3344931900501251, "learning_rate": 8.999385887690359e-06, "loss": 0.0455, "step": 70300 }, { "epoch": 0.5688971599643984, "grad_norm": 0.34637659788131714, "learning_rate": 8.998962074322709e-06, "loss": 0.0334, "step": 70310 }, { "epoch": 0.5689780726596003, "grad_norm": 0.2579786777496338, "learning_rate": 8.998538181204199e-06, "loss": 0.0308, "step": 70320 }, { "epoch": 0.5690589853548021, "grad_norm": 0.5896949768066406, "learning_rate": 8.998114208343289e-06, "loss": 0.0214, "step": 70330 }, { "epoch": 0.5691398980500041, "grad_norm": 0.7418456673622131, "learning_rate": 8.99769015574843e-06, "loss": 0.0326, "step": 70340 }, { "epoch": 0.5692208107452059, "grad_norm": 1.6193888187408447, "learning_rate": 8.99726602342808e-06, "loss": 0.0345, "step": 70350 }, { "epoch": 0.5693017234404077, "grad_norm": 0.2618558406829834, "learning_rate": 8.9968418113907e-06, "loss": 0.0309, "step": 70360 }, { "epoch": 0.5693826361356097, "grad_norm": 0.4184684753417969, "learning_rate": 8.996417519644748e-06, "loss": 0.0377, "step": 70370 }, { "epoch": 0.5694635488308115, "grad_norm": 0.3782954812049866, "learning_rate": 8.995993148198681e-06, "loss": 0.031, "step": 70380 }, { "epoch": 0.5695444615260135, "grad_norm": 0.34306833148002625, "learning_rate": 8.99556869706097e-06, "loss": 0.0248, "step": 70390 }, { "epoch": 0.5696253742212153, "grad_norm": 0.4011462926864624, "learning_rate": 8.995144166240077e-06, "loss": 0.0332, "step": 70400 }, { "epoch": 0.5697062869164172, "grad_norm": 0.2980535626411438, "learning_rate": 8.994719555744465e-06, "loss": 0.0262, "step": 70410 }, { "epoch": 0.569787199611619, "grad_norm": 0.265532910823822, "learning_rate": 8.994294865582606e-06, "loss": 0.0419, "step": 70420 }, { "epoch": 0.569868112306821, "grad_norm": 0.30057021975517273, "learning_rate": 8.993870095762968e-06, "loss": 0.0222, "step": 70430 }, { "epoch": 0.5699490250020228, "grad_norm": 0.3138914704322815, "learning_rate": 8.993445246294024e-06, "loss": 0.0463, "step": 70440 }, { "epoch": 0.5700299376972247, "grad_norm": 0.4560301899909973, "learning_rate": 8.993020317184245e-06, "loss": 0.0446, "step": 70450 }, { "epoch": 0.5701108503924266, "grad_norm": 0.37783369421958923, "learning_rate": 8.992595308442102e-06, "loss": 0.0395, "step": 70460 }, { "epoch": 0.5701917630876284, "grad_norm": 0.5985270142555237, "learning_rate": 8.992170220076078e-06, "loss": 0.037, "step": 70470 }, { "epoch": 0.5702726757828304, "grad_norm": 0.4698670506477356, "learning_rate": 8.991745052094644e-06, "loss": 0.0337, "step": 70480 }, { "epoch": 0.5703535884780322, "grad_norm": 0.6489819884300232, "learning_rate": 8.991319804506284e-06, "loss": 0.0354, "step": 70490 }, { "epoch": 0.5704345011732341, "grad_norm": 0.5272700786590576, "learning_rate": 8.990894477319475e-06, "loss": 0.0429, "step": 70500 }, { "epoch": 0.570515413868436, "grad_norm": 0.5978372693061829, "learning_rate": 8.990469070542703e-06, "loss": 0.0295, "step": 70510 }, { "epoch": 0.5705963265636378, "grad_norm": 0.3127335011959076, "learning_rate": 8.990043584184447e-06, "loss": 0.0362, "step": 70520 }, { "epoch": 0.5706772392588397, "grad_norm": 0.4159495532512665, "learning_rate": 8.989618018253196e-06, "loss": 0.0454, "step": 70530 }, { "epoch": 0.5707581519540416, "grad_norm": 0.4804358184337616, "learning_rate": 8.989192372757438e-06, "loss": 0.0366, "step": 70540 }, { "epoch": 0.5708390646492435, "grad_norm": 0.33651968836784363, "learning_rate": 8.988766647705657e-06, "loss": 0.0349, "step": 70550 }, { "epoch": 0.5709199773444453, "grad_norm": 0.41303592920303345, "learning_rate": 8.988340843106348e-06, "loss": 0.0335, "step": 70560 }, { "epoch": 0.5710008900396473, "grad_norm": 0.39537981152534485, "learning_rate": 8.987914958968e-06, "loss": 0.0313, "step": 70570 }, { "epoch": 0.5710818027348491, "grad_norm": 0.8053838610649109, "learning_rate": 8.987488995299106e-06, "loss": 0.0394, "step": 70580 }, { "epoch": 0.5711627154300509, "grad_norm": 0.34144115447998047, "learning_rate": 8.987062952108164e-06, "loss": 0.0247, "step": 70590 }, { "epoch": 0.5712436281252529, "grad_norm": 0.5128651857376099, "learning_rate": 8.986636829403666e-06, "loss": 0.0565, "step": 70600 }, { "epoch": 0.5713245408204547, "grad_norm": 0.35997986793518066, "learning_rate": 8.986210627194114e-06, "loss": 0.0284, "step": 70610 }, { "epoch": 0.5714054535156566, "grad_norm": 0.39913198351860046, "learning_rate": 8.985784345488006e-06, "loss": 0.0309, "step": 70620 }, { "epoch": 0.5714863662108585, "grad_norm": 0.1927962601184845, "learning_rate": 8.985357984293843e-06, "loss": 0.0185, "step": 70630 }, { "epoch": 0.5715672789060604, "grad_norm": 0.2792506217956543, "learning_rate": 8.984931543620129e-06, "loss": 0.0318, "step": 70640 }, { "epoch": 0.5716481916012622, "grad_norm": 0.4095447361469269, "learning_rate": 8.984505023475368e-06, "loss": 0.0298, "step": 70650 }, { "epoch": 0.5717291042964641, "grad_norm": 0.24174953997135162, "learning_rate": 8.984078423868066e-06, "loss": 0.0239, "step": 70660 }, { "epoch": 0.571810016991666, "grad_norm": 0.7433624267578125, "learning_rate": 8.98365174480673e-06, "loss": 0.0453, "step": 70670 }, { "epoch": 0.5718909296868678, "grad_norm": 0.31479066610336304, "learning_rate": 8.983224986299871e-06, "loss": 0.0395, "step": 70680 }, { "epoch": 0.5719718423820698, "grad_norm": 0.2427709549665451, "learning_rate": 8.982798148355997e-06, "loss": 0.0398, "step": 70690 }, { "epoch": 0.5720527550772716, "grad_norm": 0.28587910532951355, "learning_rate": 8.982371230983624e-06, "loss": 0.0406, "step": 70700 }, { "epoch": 0.5721336677724735, "grad_norm": 0.7649248242378235, "learning_rate": 8.981944234191262e-06, "loss": 0.0519, "step": 70710 }, { "epoch": 0.5722145804676754, "grad_norm": 0.6527491211891174, "learning_rate": 8.98151715798743e-06, "loss": 0.0383, "step": 70720 }, { "epoch": 0.5722954931628772, "grad_norm": 0.2665603458881378, "learning_rate": 8.981090002380644e-06, "loss": 0.0207, "step": 70730 }, { "epoch": 0.5723764058580791, "grad_norm": 0.6159942746162415, "learning_rate": 8.98066276737942e-06, "loss": 0.0291, "step": 70740 }, { "epoch": 0.572457318553281, "grad_norm": 0.6699192523956299, "learning_rate": 8.980235452992283e-06, "loss": 0.0332, "step": 70750 }, { "epoch": 0.5725382312484829, "grad_norm": 0.545727014541626, "learning_rate": 8.97980805922775e-06, "loss": 0.0268, "step": 70760 }, { "epoch": 0.5726191439436847, "grad_norm": 0.4576661288738251, "learning_rate": 8.979380586094349e-06, "loss": 0.0253, "step": 70770 }, { "epoch": 0.5727000566388867, "grad_norm": 0.5051097869873047, "learning_rate": 8.978953033600604e-06, "loss": 0.0467, "step": 70780 }, { "epoch": 0.5727809693340885, "grad_norm": 0.5707709789276123, "learning_rate": 8.978525401755039e-06, "loss": 0.0347, "step": 70790 }, { "epoch": 0.5728618820292904, "grad_norm": 0.15650659799575806, "learning_rate": 8.978097690566185e-06, "loss": 0.0313, "step": 70800 }, { "epoch": 0.5729427947244923, "grad_norm": 0.4947241246700287, "learning_rate": 8.977669900042569e-06, "loss": 0.0347, "step": 70810 }, { "epoch": 0.5730237074196941, "grad_norm": 0.558074951171875, "learning_rate": 8.977242030192726e-06, "loss": 0.043, "step": 70820 }, { "epoch": 0.573104620114896, "grad_norm": 0.6817562580108643, "learning_rate": 8.976814081025185e-06, "loss": 0.0324, "step": 70830 }, { "epoch": 0.5731855328100979, "grad_norm": 0.8096848726272583, "learning_rate": 8.976386052548483e-06, "loss": 0.0209, "step": 70840 }, { "epoch": 0.5732664455052998, "grad_norm": 0.6244428753852844, "learning_rate": 8.975957944771157e-06, "loss": 0.0262, "step": 70850 }, { "epoch": 0.5733473582005016, "grad_norm": 0.4584190547466278, "learning_rate": 8.975529757701742e-06, "loss": 0.0361, "step": 70860 }, { "epoch": 0.5734282708957036, "grad_norm": 0.3359871208667755, "learning_rate": 8.97510149134878e-06, "loss": 0.0399, "step": 70870 }, { "epoch": 0.5735091835909054, "grad_norm": 0.6187435388565063, "learning_rate": 8.97467314572081e-06, "loss": 0.0484, "step": 70880 }, { "epoch": 0.5735900962861072, "grad_norm": 0.9978267550468445, "learning_rate": 8.974244720826375e-06, "loss": 0.0358, "step": 70890 }, { "epoch": 0.5736710089813092, "grad_norm": 0.5548186898231506, "learning_rate": 8.973816216674019e-06, "loss": 0.0288, "step": 70900 }, { "epoch": 0.573751921676511, "grad_norm": 0.2714572846889496, "learning_rate": 8.973387633272288e-06, "loss": 0.0385, "step": 70910 }, { "epoch": 0.573832834371713, "grad_norm": 0.6350733637809753, "learning_rate": 8.972958970629729e-06, "loss": 0.0356, "step": 70920 }, { "epoch": 0.5739137470669148, "grad_norm": 0.2625766396522522, "learning_rate": 8.97253022875489e-06, "loss": 0.0324, "step": 70930 }, { "epoch": 0.5739946597621167, "grad_norm": 0.4626918435096741, "learning_rate": 8.972101407656322e-06, "loss": 0.0363, "step": 70940 }, { "epoch": 0.5740755724573186, "grad_norm": 0.9822744131088257, "learning_rate": 8.971672507342577e-06, "loss": 0.0357, "step": 70950 }, { "epoch": 0.5741564851525204, "grad_norm": 0.6018009185791016, "learning_rate": 8.971243527822209e-06, "loss": 0.0363, "step": 70960 }, { "epoch": 0.5742373978477223, "grad_norm": 0.252216637134552, "learning_rate": 8.970814469103772e-06, "loss": 0.0266, "step": 70970 }, { "epoch": 0.5743183105429241, "grad_norm": 0.677791953086853, "learning_rate": 8.970385331195824e-06, "loss": 0.043, "step": 70980 }, { "epoch": 0.5743992232381261, "grad_norm": 0.40156352519989014, "learning_rate": 8.96995611410692e-06, "loss": 0.0463, "step": 70990 }, { "epoch": 0.5744801359333279, "grad_norm": 0.17057137191295624, "learning_rate": 8.969526817845624e-06, "loss": 0.0376, "step": 71000 }, { "epoch": 0.5745610486285299, "grad_norm": 0.7262240052223206, "learning_rate": 8.969097442420496e-06, "loss": 0.0368, "step": 71010 }, { "epoch": 0.5746419613237317, "grad_norm": 0.3950985074043274, "learning_rate": 8.968667987840099e-06, "loss": 0.0397, "step": 71020 }, { "epoch": 0.5747228740189335, "grad_norm": 0.5282391309738159, "learning_rate": 8.968238454112995e-06, "loss": 0.051, "step": 71030 }, { "epoch": 0.5748037867141355, "grad_norm": 0.5619708299636841, "learning_rate": 8.967808841247754e-06, "loss": 0.0304, "step": 71040 }, { "epoch": 0.5748846994093373, "grad_norm": 0.31939077377319336, "learning_rate": 8.967379149252942e-06, "loss": 0.0346, "step": 71050 }, { "epoch": 0.5749656121045392, "grad_norm": 0.8578327894210815, "learning_rate": 8.966949378137129e-06, "loss": 0.0277, "step": 71060 }, { "epoch": 0.575046524799741, "grad_norm": 0.2137274444103241, "learning_rate": 8.966519527908883e-06, "loss": 0.0304, "step": 71070 }, { "epoch": 0.575127437494943, "grad_norm": 0.25526246428489685, "learning_rate": 8.96608959857678e-06, "loss": 0.0585, "step": 71080 }, { "epoch": 0.5752083501901448, "grad_norm": 0.19581308960914612, "learning_rate": 8.965659590149393e-06, "loss": 0.0371, "step": 71090 }, { "epoch": 0.5752892628853468, "grad_norm": 1.0460618734359741, "learning_rate": 8.965229502635298e-06, "loss": 0.0461, "step": 71100 }, { "epoch": 0.5753701755805486, "grad_norm": 0.31539127230644226, "learning_rate": 8.964799336043071e-06, "loss": 0.0449, "step": 71110 }, { "epoch": 0.5754510882757504, "grad_norm": 0.09577136486768723, "learning_rate": 8.96436909038129e-06, "loss": 0.0279, "step": 71120 }, { "epoch": 0.5755320009709524, "grad_norm": 0.6811903119087219, "learning_rate": 8.963938765658537e-06, "loss": 0.0429, "step": 71130 }, { "epoch": 0.5756129136661542, "grad_norm": 0.39808639883995056, "learning_rate": 8.963508361883395e-06, "loss": 0.0432, "step": 71140 }, { "epoch": 0.5756938263613561, "grad_norm": 0.5407028198242188, "learning_rate": 8.963077879064446e-06, "loss": 0.042, "step": 71150 }, { "epoch": 0.575774739056558, "grad_norm": 0.26535603404045105, "learning_rate": 8.962647317210274e-06, "loss": 0.0339, "step": 71160 }, { "epoch": 0.5758556517517599, "grad_norm": 0.5803279876708984, "learning_rate": 8.962216676329466e-06, "loss": 0.0397, "step": 71170 }, { "epoch": 0.5759365644469617, "grad_norm": 0.6214284896850586, "learning_rate": 8.961785956430614e-06, "loss": 0.0251, "step": 71180 }, { "epoch": 0.5760174771421636, "grad_norm": 0.42645999789237976, "learning_rate": 8.961355157522303e-06, "loss": 0.0333, "step": 71190 }, { "epoch": 0.5760983898373655, "grad_norm": 0.6468285322189331, "learning_rate": 8.960924279613126e-06, "loss": 0.0319, "step": 71200 }, { "epoch": 0.5761793025325673, "grad_norm": 0.2791281044483185, "learning_rate": 8.960493322711676e-06, "loss": 0.0272, "step": 71210 }, { "epoch": 0.5762602152277693, "grad_norm": 0.7853306531906128, "learning_rate": 8.960062286826549e-06, "loss": 0.0351, "step": 71220 }, { "epoch": 0.5763411279229711, "grad_norm": 0.2935238182544708, "learning_rate": 8.95963117196634e-06, "loss": 0.0368, "step": 71230 }, { "epoch": 0.576422040618173, "grad_norm": 0.3443438410758972, "learning_rate": 8.959199978139644e-06, "loss": 0.0477, "step": 71240 }, { "epoch": 0.5765029533133749, "grad_norm": 0.4890977740287781, "learning_rate": 8.958768705355063e-06, "loss": 0.0323, "step": 71250 }, { "epoch": 0.5765838660085767, "grad_norm": 0.6653618812561035, "learning_rate": 8.958337353621198e-06, "loss": 0.0247, "step": 71260 }, { "epoch": 0.5766647787037786, "grad_norm": 0.3974754512310028, "learning_rate": 8.957905922946652e-06, "loss": 0.0242, "step": 71270 }, { "epoch": 0.5767456913989805, "grad_norm": 0.5104724764823914, "learning_rate": 8.957474413340027e-06, "loss": 0.0414, "step": 71280 }, { "epoch": 0.5768266040941824, "grad_norm": 0.48076310753822327, "learning_rate": 8.957042824809929e-06, "loss": 0.0365, "step": 71290 }, { "epoch": 0.5769075167893842, "grad_norm": 1.1907262802124023, "learning_rate": 8.956611157364968e-06, "loss": 0.0342, "step": 71300 }, { "epoch": 0.5769884294845862, "grad_norm": 1.0667798519134521, "learning_rate": 8.956179411013747e-06, "loss": 0.034, "step": 71310 }, { "epoch": 0.577069342179788, "grad_norm": 0.34349969029426575, "learning_rate": 8.95574758576488e-06, "loss": 0.0282, "step": 71320 }, { "epoch": 0.5771502548749898, "grad_norm": 0.23959720134735107, "learning_rate": 8.95531568162698e-06, "loss": 0.0381, "step": 71330 }, { "epoch": 0.5772311675701918, "grad_norm": 0.5369144678115845, "learning_rate": 8.954883698608658e-06, "loss": 0.0405, "step": 71340 }, { "epoch": 0.5773120802653936, "grad_norm": 0.2234627604484558, "learning_rate": 8.95445163671853e-06, "loss": 0.042, "step": 71350 }, { "epoch": 0.5773929929605955, "grad_norm": 0.5018844604492188, "learning_rate": 8.954019495965212e-06, "loss": 0.0325, "step": 71360 }, { "epoch": 0.5774739056557974, "grad_norm": 0.5006888508796692, "learning_rate": 8.953587276357323e-06, "loss": 0.0531, "step": 71370 }, { "epoch": 0.5775548183509993, "grad_norm": 0.6442738175392151, "learning_rate": 8.953154977903481e-06, "loss": 0.029, "step": 71380 }, { "epoch": 0.5776357310462011, "grad_norm": 0.17341014742851257, "learning_rate": 8.95272260061231e-06, "loss": 0.0281, "step": 71390 }, { "epoch": 0.5777166437414031, "grad_norm": 0.48041656613349915, "learning_rate": 8.95229014449243e-06, "loss": 0.0411, "step": 71400 }, { "epoch": 0.5777975564366049, "grad_norm": 0.6548085808753967, "learning_rate": 8.951857609552468e-06, "loss": 0.0262, "step": 71410 }, { "epoch": 0.5778784691318067, "grad_norm": 0.3971729874610901, "learning_rate": 8.951424995801046e-06, "loss": 0.0413, "step": 71420 }, { "epoch": 0.5779593818270087, "grad_norm": 0.44597968459129333, "learning_rate": 8.950992303246797e-06, "loss": 0.0294, "step": 71430 }, { "epoch": 0.5780402945222105, "grad_norm": 0.684089720249176, "learning_rate": 8.950559531898346e-06, "loss": 0.057, "step": 71440 }, { "epoch": 0.5781212072174124, "grad_norm": 0.2816818952560425, "learning_rate": 8.950126681764326e-06, "loss": 0.0251, "step": 71450 }, { "epoch": 0.5782021199126143, "grad_norm": 0.5896627306938171, "learning_rate": 8.949693752853369e-06, "loss": 0.0233, "step": 71460 }, { "epoch": 0.5782830326078162, "grad_norm": 0.6137971878051758, "learning_rate": 8.949260745174106e-06, "loss": 0.053, "step": 71470 }, { "epoch": 0.578363945303018, "grad_norm": 0.6023452877998352, "learning_rate": 8.948827658735177e-06, "loss": 0.0278, "step": 71480 }, { "epoch": 0.5784448579982199, "grad_norm": 0.3356514275074005, "learning_rate": 8.948394493545216e-06, "loss": 0.0466, "step": 71490 }, { "epoch": 0.5785257706934218, "grad_norm": 0.32850977778434753, "learning_rate": 8.947961249612862e-06, "loss": 0.0356, "step": 71500 }, { "epoch": 0.5786066833886236, "grad_norm": 0.4437183439731598, "learning_rate": 8.947527926946756e-06, "loss": 0.0357, "step": 71510 }, { "epoch": 0.5786875960838256, "grad_norm": 0.5053644776344299, "learning_rate": 8.947094525555539e-06, "loss": 0.0311, "step": 71520 }, { "epoch": 0.5787685087790274, "grad_norm": 0.2548253834247589, "learning_rate": 8.946661045447853e-06, "loss": 0.0353, "step": 71530 }, { "epoch": 0.5788494214742294, "grad_norm": 0.29718196392059326, "learning_rate": 8.946227486632346e-06, "loss": 0.0261, "step": 71540 }, { "epoch": 0.5789303341694312, "grad_norm": 0.3877670168876648, "learning_rate": 8.94579384911766e-06, "loss": 0.0317, "step": 71550 }, { "epoch": 0.579011246864633, "grad_norm": 0.7734344005584717, "learning_rate": 8.945360132912449e-06, "loss": 0.0463, "step": 71560 }, { "epoch": 0.579092159559835, "grad_norm": 0.7451068758964539, "learning_rate": 8.944926338025358e-06, "loss": 0.0424, "step": 71570 }, { "epoch": 0.5791730722550368, "grad_norm": 0.6057541966438293, "learning_rate": 8.944492464465038e-06, "loss": 0.0302, "step": 71580 }, { "epoch": 0.5792539849502387, "grad_norm": 0.3318382203578949, "learning_rate": 8.944058512240144e-06, "loss": 0.03, "step": 71590 }, { "epoch": 0.5793348976454405, "grad_norm": 0.608159601688385, "learning_rate": 8.94362448135933e-06, "loss": 0.0345, "step": 71600 }, { "epoch": 0.5794158103406425, "grad_norm": 0.5152860283851624, "learning_rate": 8.94319037183125e-06, "loss": 0.0287, "step": 71610 }, { "epoch": 0.5794967230358443, "grad_norm": 0.4326241910457611, "learning_rate": 8.942756183664563e-06, "loss": 0.0245, "step": 71620 }, { "epoch": 0.5795776357310461, "grad_norm": 0.5157181620597839, "learning_rate": 8.942321916867927e-06, "loss": 0.0333, "step": 71630 }, { "epoch": 0.5796585484262481, "grad_norm": 0.6102772951126099, "learning_rate": 8.941887571450003e-06, "loss": 0.0397, "step": 71640 }, { "epoch": 0.5797394611214499, "grad_norm": 0.6002829670906067, "learning_rate": 8.941453147419453e-06, "loss": 0.0414, "step": 71650 }, { "epoch": 0.5798203738166519, "grad_norm": 0.3732829988002777, "learning_rate": 8.94101864478494e-06, "loss": 0.0428, "step": 71660 }, { "epoch": 0.5799012865118537, "grad_norm": 0.1633705496788025, "learning_rate": 8.940584063555132e-06, "loss": 0.0403, "step": 71670 }, { "epoch": 0.5799821992070556, "grad_norm": 1.314406156539917, "learning_rate": 8.940149403738691e-06, "loss": 0.0462, "step": 71680 }, { "epoch": 0.5800631119022575, "grad_norm": 0.3043152987957001, "learning_rate": 8.93971466534429e-06, "loss": 0.0256, "step": 71690 }, { "epoch": 0.5801440245974594, "grad_norm": 0.8480917811393738, "learning_rate": 8.939279848380597e-06, "loss": 0.0304, "step": 71700 }, { "epoch": 0.5802249372926612, "grad_norm": 0.16922806203365326, "learning_rate": 8.938844952856285e-06, "loss": 0.054, "step": 71710 }, { "epoch": 0.580305849987863, "grad_norm": 0.6067970991134644, "learning_rate": 8.938409978780024e-06, "loss": 0.0215, "step": 71720 }, { "epoch": 0.580386762683065, "grad_norm": 0.21622654795646667, "learning_rate": 8.93797492616049e-06, "loss": 0.0324, "step": 71730 }, { "epoch": 0.5804676753782668, "grad_norm": 0.27770334482192993, "learning_rate": 8.93753979500636e-06, "loss": 0.018, "step": 71740 }, { "epoch": 0.5805485880734688, "grad_norm": 0.6166285872459412, "learning_rate": 8.937104585326313e-06, "loss": 0.0364, "step": 71750 }, { "epoch": 0.5806295007686706, "grad_norm": 0.6567559838294983, "learning_rate": 8.936669297129025e-06, "loss": 0.0461, "step": 71760 }, { "epoch": 0.5807104134638725, "grad_norm": 0.8131378889083862, "learning_rate": 8.936233930423178e-06, "loss": 0.042, "step": 71770 }, { "epoch": 0.5807913261590744, "grad_norm": 0.6347534656524658, "learning_rate": 8.935798485217455e-06, "loss": 0.0413, "step": 71780 }, { "epoch": 0.5808722388542762, "grad_norm": 0.2069743573665619, "learning_rate": 8.935362961520541e-06, "loss": 0.0257, "step": 71790 }, { "epoch": 0.5809531515494781, "grad_norm": 0.9410139322280884, "learning_rate": 8.934927359341121e-06, "loss": 0.0272, "step": 71800 }, { "epoch": 0.58103406424468, "grad_norm": 0.33495911955833435, "learning_rate": 8.934491678687879e-06, "loss": 0.0236, "step": 71810 }, { "epoch": 0.5811149769398819, "grad_norm": 0.5357265472412109, "learning_rate": 8.934055919569512e-06, "loss": 0.0289, "step": 71820 }, { "epoch": 0.5811958896350837, "grad_norm": 0.2338634580373764, "learning_rate": 8.9336200819947e-06, "loss": 0.0256, "step": 71830 }, { "epoch": 0.5812768023302857, "grad_norm": 0.2666660249233246, "learning_rate": 8.933184165972143e-06, "loss": 0.0257, "step": 71840 }, { "epoch": 0.5813577150254875, "grad_norm": 0.6101253032684326, "learning_rate": 8.932748171510527e-06, "loss": 0.0315, "step": 71850 }, { "epoch": 0.5814386277206893, "grad_norm": 0.5405039191246033, "learning_rate": 8.932312098618554e-06, "loss": 0.0478, "step": 71860 }, { "epoch": 0.5815195404158913, "grad_norm": 0.42864376306533813, "learning_rate": 8.931875947304918e-06, "loss": 0.0288, "step": 71870 }, { "epoch": 0.5816004531110931, "grad_norm": 0.5507829189300537, "learning_rate": 8.931439717578316e-06, "loss": 0.031, "step": 71880 }, { "epoch": 0.581681365806295, "grad_norm": 0.48950910568237305, "learning_rate": 8.931003409447447e-06, "loss": 0.0392, "step": 71890 }, { "epoch": 0.5817622785014969, "grad_norm": 0.3229822516441345, "learning_rate": 8.930567022921017e-06, "loss": 0.0401, "step": 71900 }, { "epoch": 0.5818431911966988, "grad_norm": 0.9510934948921204, "learning_rate": 8.930130558007723e-06, "loss": 0.0378, "step": 71910 }, { "epoch": 0.5819241038919006, "grad_norm": 0.2788742482662201, "learning_rate": 8.929694014716273e-06, "loss": 0.0367, "step": 71920 }, { "epoch": 0.5820050165871025, "grad_norm": 0.6942417025566101, "learning_rate": 8.92925739305537e-06, "loss": 0.037, "step": 71930 }, { "epoch": 0.5820859292823044, "grad_norm": 0.3815430700778961, "learning_rate": 8.928820693033726e-06, "loss": 0.0416, "step": 71940 }, { "epoch": 0.5821668419775062, "grad_norm": 0.6043409109115601, "learning_rate": 8.928383914660045e-06, "loss": 0.0348, "step": 71950 }, { "epoch": 0.5822477546727082, "grad_norm": 0.5825616121292114, "learning_rate": 8.927947057943042e-06, "loss": 0.0418, "step": 71960 }, { "epoch": 0.58232866736791, "grad_norm": 0.6501279473304749, "learning_rate": 8.927510122891426e-06, "loss": 0.0343, "step": 71970 }, { "epoch": 0.5824095800631119, "grad_norm": 0.452373743057251, "learning_rate": 8.927073109513914e-06, "loss": 0.0197, "step": 71980 }, { "epoch": 0.5824904927583138, "grad_norm": 0.4059199094772339, "learning_rate": 8.926636017819217e-06, "loss": 0.0306, "step": 71990 }, { "epoch": 0.5825714054535157, "grad_norm": 0.5742725133895874, "learning_rate": 8.926198847816055e-06, "loss": 0.029, "step": 72000 }, { "epoch": 0.5826523181487175, "grad_norm": 0.41895875334739685, "learning_rate": 8.925761599513144e-06, "loss": 0.0579, "step": 72010 }, { "epoch": 0.5827332308439194, "grad_norm": 0.3860982656478882, "learning_rate": 8.925324272919208e-06, "loss": 0.0351, "step": 72020 }, { "epoch": 0.5828141435391213, "grad_norm": 0.4044061005115509, "learning_rate": 8.924886868042964e-06, "loss": 0.0265, "step": 72030 }, { "epoch": 0.5828950562343231, "grad_norm": 0.08368602395057678, "learning_rate": 8.924449384893138e-06, "loss": 0.023, "step": 72040 }, { "epoch": 0.5829759689295251, "grad_norm": 0.2614562213420868, "learning_rate": 8.924011823478453e-06, "loss": 0.0246, "step": 72050 }, { "epoch": 0.5830568816247269, "grad_norm": 0.48861396312713623, "learning_rate": 8.923574183807638e-06, "loss": 0.0387, "step": 72060 }, { "epoch": 0.5831377943199288, "grad_norm": 0.31304946541786194, "learning_rate": 8.923136465889417e-06, "loss": 0.0373, "step": 72070 }, { "epoch": 0.5832187070151307, "grad_norm": 0.5520718097686768, "learning_rate": 8.922698669732524e-06, "loss": 0.0262, "step": 72080 }, { "epoch": 0.5832996197103325, "grad_norm": 0.5953716039657593, "learning_rate": 8.922260795345685e-06, "loss": 0.0325, "step": 72090 }, { "epoch": 0.5833805324055344, "grad_norm": 1.045190453529358, "learning_rate": 8.921822842737635e-06, "loss": 0.0355, "step": 72100 }, { "epoch": 0.5834614451007363, "grad_norm": 0.36600279808044434, "learning_rate": 8.921384811917108e-06, "loss": 0.0399, "step": 72110 }, { "epoch": 0.5835423577959382, "grad_norm": 0.3033647835254669, "learning_rate": 8.92094670289284e-06, "loss": 0.0278, "step": 72120 }, { "epoch": 0.58362327049114, "grad_norm": 1.0364502668380737, "learning_rate": 8.920508515673566e-06, "loss": 0.0351, "step": 72130 }, { "epoch": 0.583704183186342, "grad_norm": 0.1338512897491455, "learning_rate": 8.920070250268026e-06, "loss": 0.0423, "step": 72140 }, { "epoch": 0.5837850958815438, "grad_norm": 1.4577016830444336, "learning_rate": 8.919631906684963e-06, "loss": 0.0462, "step": 72150 }, { "epoch": 0.5838660085767456, "grad_norm": 0.921578049659729, "learning_rate": 8.919193484933113e-06, "loss": 0.0463, "step": 72160 }, { "epoch": 0.5839469212719476, "grad_norm": 0.6868454217910767, "learning_rate": 8.918754985021226e-06, "loss": 0.044, "step": 72170 }, { "epoch": 0.5840278339671494, "grad_norm": 0.6060300469398499, "learning_rate": 8.918316406958043e-06, "loss": 0.0287, "step": 72180 }, { "epoch": 0.5841087466623514, "grad_norm": 0.17497359216213226, "learning_rate": 8.91787775075231e-06, "loss": 0.0267, "step": 72190 }, { "epoch": 0.5841896593575532, "grad_norm": 0.4664156138896942, "learning_rate": 8.917439016412777e-06, "loss": 0.0415, "step": 72200 }, { "epoch": 0.5842705720527551, "grad_norm": 0.34625154733657837, "learning_rate": 8.917000203948191e-06, "loss": 0.0243, "step": 72210 }, { "epoch": 0.584351484747957, "grad_norm": 0.3044631779193878, "learning_rate": 8.916561313367308e-06, "loss": 0.0356, "step": 72220 }, { "epoch": 0.5844323974431588, "grad_norm": 0.677832841873169, "learning_rate": 8.916122344678876e-06, "loss": 0.0371, "step": 72230 }, { "epoch": 0.5845133101383607, "grad_norm": 0.47489023208618164, "learning_rate": 8.915683297891652e-06, "loss": 0.0289, "step": 72240 }, { "epoch": 0.5845942228335625, "grad_norm": 0.5513684749603271, "learning_rate": 8.91524417301439e-06, "loss": 0.0315, "step": 72250 }, { "epoch": 0.5846751355287645, "grad_norm": 0.12608499825000763, "learning_rate": 8.91480497005585e-06, "loss": 0.0428, "step": 72260 }, { "epoch": 0.5847560482239663, "grad_norm": 0.5479744076728821, "learning_rate": 8.914365689024789e-06, "loss": 0.0323, "step": 72270 }, { "epoch": 0.5848369609191683, "grad_norm": 0.6289670467376709, "learning_rate": 8.913926329929967e-06, "loss": 0.0255, "step": 72280 }, { "epoch": 0.5849178736143701, "grad_norm": 0.4496842622756958, "learning_rate": 8.913486892780148e-06, "loss": 0.032, "step": 72290 }, { "epoch": 0.5849987863095719, "grad_norm": 0.14730972051620483, "learning_rate": 8.913047377584094e-06, "loss": 0.0347, "step": 72300 }, { "epoch": 0.5850796990047739, "grad_norm": 0.9946826696395874, "learning_rate": 8.91260778435057e-06, "loss": 0.0463, "step": 72310 }, { "epoch": 0.5851606116999757, "grad_norm": 0.4852276146411896, "learning_rate": 8.912168113088345e-06, "loss": 0.0358, "step": 72320 }, { "epoch": 0.5852415243951776, "grad_norm": 0.5633342266082764, "learning_rate": 8.911728363806185e-06, "loss": 0.0313, "step": 72330 }, { "epoch": 0.5853224370903795, "grad_norm": 0.42111167311668396, "learning_rate": 8.91128853651286e-06, "loss": 0.0249, "step": 72340 }, { "epoch": 0.5854033497855814, "grad_norm": 0.4965047240257263, "learning_rate": 8.910848631217146e-06, "loss": 0.0346, "step": 72350 }, { "epoch": 0.5854842624807832, "grad_norm": 0.9131572246551514, "learning_rate": 8.910408647927808e-06, "loss": 0.0441, "step": 72360 }, { "epoch": 0.5855651751759852, "grad_norm": 0.1980452537536621, "learning_rate": 8.909968586653629e-06, "loss": 0.0258, "step": 72370 }, { "epoch": 0.585646087871187, "grad_norm": 0.3171001076698303, "learning_rate": 8.909528447403377e-06, "loss": 0.0342, "step": 72380 }, { "epoch": 0.5857270005663888, "grad_norm": 0.4427776336669922, "learning_rate": 8.909088230185835e-06, "loss": 0.0232, "step": 72390 }, { "epoch": 0.5858079132615908, "grad_norm": 0.21134579181671143, "learning_rate": 8.90864793500978e-06, "loss": 0.0325, "step": 72400 }, { "epoch": 0.5858888259567926, "grad_norm": 0.28509393334388733, "learning_rate": 8.908207561883994e-06, "loss": 0.0389, "step": 72410 }, { "epoch": 0.5859697386519945, "grad_norm": 0.6765751242637634, "learning_rate": 8.907767110817259e-06, "loss": 0.0342, "step": 72420 }, { "epoch": 0.5860506513471964, "grad_norm": 0.2575191855430603, "learning_rate": 8.90732658181836e-06, "loss": 0.0262, "step": 72430 }, { "epoch": 0.5861315640423983, "grad_norm": 0.4345104396343231, "learning_rate": 8.906885974896077e-06, "loss": 0.0308, "step": 72440 }, { "epoch": 0.5862124767376001, "grad_norm": 0.8479846715927124, "learning_rate": 8.906445290059204e-06, "loss": 0.0311, "step": 72450 }, { "epoch": 0.586293389432802, "grad_norm": 0.4463498890399933, "learning_rate": 8.906004527316528e-06, "loss": 0.0368, "step": 72460 }, { "epoch": 0.5863743021280039, "grad_norm": 0.8042336702346802, "learning_rate": 8.905563686676834e-06, "loss": 0.039, "step": 72470 }, { "epoch": 0.5864552148232057, "grad_norm": 0.7158495783805847, "learning_rate": 8.905122768148919e-06, "loss": 0.0365, "step": 72480 }, { "epoch": 0.5865361275184077, "grad_norm": 0.6813024878501892, "learning_rate": 8.904681771741575e-06, "loss": 0.0343, "step": 72490 }, { "epoch": 0.5866170402136095, "grad_norm": 0.604593813419342, "learning_rate": 8.904240697463595e-06, "loss": 0.034, "step": 72500 }, { "epoch": 0.5866979529088114, "grad_norm": 0.3909602165222168, "learning_rate": 8.903799545323777e-06, "loss": 0.0345, "step": 72510 }, { "epoch": 0.5867788656040133, "grad_norm": 0.8971669673919678, "learning_rate": 8.90335831533092e-06, "loss": 0.0455, "step": 72520 }, { "epoch": 0.5868597782992151, "grad_norm": 0.49645453691482544, "learning_rate": 8.90291700749382e-06, "loss": 0.0414, "step": 72530 }, { "epoch": 0.586940690994417, "grad_norm": 0.4637131989002228, "learning_rate": 8.902475621821281e-06, "loss": 0.0478, "step": 72540 }, { "epoch": 0.5870216036896189, "grad_norm": 0.3031693398952484, "learning_rate": 8.902034158322101e-06, "loss": 0.0229, "step": 72550 }, { "epoch": 0.5871025163848208, "grad_norm": 0.5475961565971375, "learning_rate": 8.90159261700509e-06, "loss": 0.026, "step": 72560 }, { "epoch": 0.5871834290800226, "grad_norm": 0.48295173048973083, "learning_rate": 8.901150997879051e-06, "loss": 0.0335, "step": 72570 }, { "epoch": 0.5872643417752246, "grad_norm": 0.3881100118160248, "learning_rate": 8.90070930095279e-06, "loss": 0.0235, "step": 72580 }, { "epoch": 0.5873452544704264, "grad_norm": 0.23740863800048828, "learning_rate": 8.900267526235117e-06, "loss": 0.0267, "step": 72590 }, { "epoch": 0.5874261671656282, "grad_norm": 0.4550231695175171, "learning_rate": 8.899825673734844e-06, "loss": 0.0366, "step": 72600 }, { "epoch": 0.5875070798608302, "grad_norm": 0.8968557119369507, "learning_rate": 8.899383743460779e-06, "loss": 0.0459, "step": 72610 }, { "epoch": 0.587587992556032, "grad_norm": 0.3488626480102539, "learning_rate": 8.898941735421736e-06, "loss": 0.0261, "step": 72620 }, { "epoch": 0.5876689052512339, "grad_norm": 0.6225866079330444, "learning_rate": 8.898499649626534e-06, "loss": 0.0375, "step": 72630 }, { "epoch": 0.5877498179464358, "grad_norm": 0.2683587372303009, "learning_rate": 8.898057486083985e-06, "loss": 0.031, "step": 72640 }, { "epoch": 0.5878307306416377, "grad_norm": 0.3937390148639679, "learning_rate": 8.897615244802907e-06, "loss": 0.0498, "step": 72650 }, { "epoch": 0.5879116433368395, "grad_norm": 0.5245234966278076, "learning_rate": 8.897172925792122e-06, "loss": 0.028, "step": 72660 }, { "epoch": 0.5879925560320415, "grad_norm": 0.22698122262954712, "learning_rate": 8.896730529060453e-06, "loss": 0.0283, "step": 72670 }, { "epoch": 0.5880734687272433, "grad_norm": 0.4018455743789673, "learning_rate": 8.896288054616717e-06, "loss": 0.0302, "step": 72680 }, { "epoch": 0.5881543814224451, "grad_norm": 0.5675001740455627, "learning_rate": 8.895845502469741e-06, "loss": 0.0456, "step": 72690 }, { "epoch": 0.5882352941176471, "grad_norm": 0.49575528502464294, "learning_rate": 8.895402872628352e-06, "loss": 0.0252, "step": 72700 }, { "epoch": 0.5883162068128489, "grad_norm": 0.047654230147600174, "learning_rate": 8.894960165101378e-06, "loss": 0.0283, "step": 72710 }, { "epoch": 0.5883971195080508, "grad_norm": 0.38815662264823914, "learning_rate": 8.894517379897644e-06, "loss": 0.0349, "step": 72720 }, { "epoch": 0.5884780322032527, "grad_norm": 0.3207102417945862, "learning_rate": 8.894074517025983e-06, "loss": 0.0357, "step": 72730 }, { "epoch": 0.5885589448984546, "grad_norm": 0.3510048985481262, "learning_rate": 8.893631576495227e-06, "loss": 0.0243, "step": 72740 }, { "epoch": 0.5886398575936564, "grad_norm": 0.47346168756484985, "learning_rate": 8.893188558314207e-06, "loss": 0.0357, "step": 72750 }, { "epoch": 0.5887207702888583, "grad_norm": 0.5074281692504883, "learning_rate": 8.892745462491763e-06, "loss": 0.0192, "step": 72760 }, { "epoch": 0.5888016829840602, "grad_norm": 0.3348846733570099, "learning_rate": 8.892302289036727e-06, "loss": 0.0449, "step": 72770 }, { "epoch": 0.588882595679262, "grad_norm": 0.2265467494726181, "learning_rate": 8.89185903795794e-06, "loss": 0.0321, "step": 72780 }, { "epoch": 0.588963508374464, "grad_norm": 0.34914711117744446, "learning_rate": 8.89141570926424e-06, "loss": 0.024, "step": 72790 }, { "epoch": 0.5890444210696658, "grad_norm": 0.3911106288433075, "learning_rate": 8.890972302964468e-06, "loss": 0.0303, "step": 72800 }, { "epoch": 0.5891253337648678, "grad_norm": 0.43224722146987915, "learning_rate": 8.890528819067467e-06, "loss": 0.0202, "step": 72810 }, { "epoch": 0.5892062464600696, "grad_norm": 0.5525075197219849, "learning_rate": 8.890085257582084e-06, "loss": 0.0289, "step": 72820 }, { "epoch": 0.5892871591552714, "grad_norm": 0.23952633142471313, "learning_rate": 8.889641618517162e-06, "loss": 0.0245, "step": 72830 }, { "epoch": 0.5893680718504734, "grad_norm": 0.49092474579811096, "learning_rate": 8.88919790188155e-06, "loss": 0.0482, "step": 72840 }, { "epoch": 0.5894489845456752, "grad_norm": 0.367283433675766, "learning_rate": 8.888754107684096e-06, "loss": 0.023, "step": 72850 }, { "epoch": 0.5895298972408771, "grad_norm": 0.43659141659736633, "learning_rate": 8.88831023593365e-06, "loss": 0.0376, "step": 72860 }, { "epoch": 0.589610809936079, "grad_norm": 0.427834153175354, "learning_rate": 8.887866286639065e-06, "loss": 0.0256, "step": 72870 }, { "epoch": 0.5896917226312809, "grad_norm": 0.24376839399337769, "learning_rate": 8.887422259809195e-06, "loss": 0.0419, "step": 72880 }, { "epoch": 0.5897726353264827, "grad_norm": 0.6044664978981018, "learning_rate": 8.886978155452895e-06, "loss": 0.0429, "step": 72890 }, { "epoch": 0.5898535480216845, "grad_norm": 0.5108396410942078, "learning_rate": 8.886533973579021e-06, "loss": 0.0421, "step": 72900 }, { "epoch": 0.5899344607168865, "grad_norm": 0.44586628675460815, "learning_rate": 8.886089714196434e-06, "loss": 0.0376, "step": 72910 }, { "epoch": 0.5900153734120883, "grad_norm": 0.5254507660865784, "learning_rate": 8.885645377313989e-06, "loss": 0.0461, "step": 72920 }, { "epoch": 0.5900962861072903, "grad_norm": 1.0122274160385132, "learning_rate": 8.88520096294055e-06, "loss": 0.0564, "step": 72930 }, { "epoch": 0.5901771988024921, "grad_norm": 0.7403278350830078, "learning_rate": 8.88475647108498e-06, "loss": 0.0263, "step": 72940 }, { "epoch": 0.590258111497694, "grad_norm": 0.22719629108905792, "learning_rate": 8.884311901756143e-06, "loss": 0.0228, "step": 72950 }, { "epoch": 0.5903390241928959, "grad_norm": 0.3910883963108063, "learning_rate": 8.883867254962905e-06, "loss": 0.0446, "step": 72960 }, { "epoch": 0.5904199368880978, "grad_norm": 0.5184861421585083, "learning_rate": 8.883422530714137e-06, "loss": 0.0406, "step": 72970 }, { "epoch": 0.5905008495832996, "grad_norm": 0.21451738476753235, "learning_rate": 8.882977729018701e-06, "loss": 0.04, "step": 72980 }, { "epoch": 0.5905817622785015, "grad_norm": 0.6519029140472412, "learning_rate": 8.882532849885474e-06, "loss": 0.0371, "step": 72990 }, { "epoch": 0.5906626749737034, "grad_norm": 0.47765791416168213, "learning_rate": 8.882087893323325e-06, "loss": 0.0314, "step": 73000 }, { "epoch": 0.5907435876689052, "grad_norm": 0.465217649936676, "learning_rate": 8.881642859341127e-06, "loss": 0.0396, "step": 73010 }, { "epoch": 0.5908245003641072, "grad_norm": 0.4803469181060791, "learning_rate": 8.881197747947759e-06, "loss": 0.0404, "step": 73020 }, { "epoch": 0.590905413059309, "grad_norm": 0.4184723198413849, "learning_rate": 8.880752559152093e-06, "loss": 0.0297, "step": 73030 }, { "epoch": 0.5909863257545109, "grad_norm": 0.6104243397712708, "learning_rate": 8.880307292963012e-06, "loss": 0.0321, "step": 73040 }, { "epoch": 0.5910672384497128, "grad_norm": 0.4420126974582672, "learning_rate": 8.879861949389392e-06, "loss": 0.0361, "step": 73050 }, { "epoch": 0.5911481511449146, "grad_norm": 0.4483605921268463, "learning_rate": 8.879416528440117e-06, "loss": 0.0318, "step": 73060 }, { "epoch": 0.5912290638401165, "grad_norm": 0.2809479236602783, "learning_rate": 8.878971030124071e-06, "loss": 0.0305, "step": 73070 }, { "epoch": 0.5913099765353184, "grad_norm": 0.9453034400939941, "learning_rate": 8.878525454450134e-06, "loss": 0.029, "step": 73080 }, { "epoch": 0.5913908892305203, "grad_norm": 0.4451940655708313, "learning_rate": 8.878079801427196e-06, "loss": 0.0324, "step": 73090 }, { "epoch": 0.5914718019257221, "grad_norm": 0.4969845712184906, "learning_rate": 8.877634071064142e-06, "loss": 0.015, "step": 73100 }, { "epoch": 0.5915527146209241, "grad_norm": 0.7378588318824768, "learning_rate": 8.877188263369864e-06, "loss": 0.0378, "step": 73110 }, { "epoch": 0.5916336273161259, "grad_norm": 0.5903221368789673, "learning_rate": 8.87674237835325e-06, "loss": 0.0326, "step": 73120 }, { "epoch": 0.5917145400113277, "grad_norm": 0.15557849407196045, "learning_rate": 8.876296416023194e-06, "loss": 0.0309, "step": 73130 }, { "epoch": 0.5917954527065297, "grad_norm": 0.3990640342235565, "learning_rate": 8.875850376388591e-06, "loss": 0.0341, "step": 73140 }, { "epoch": 0.5918763654017315, "grad_norm": 0.6608148813247681, "learning_rate": 8.875404259458332e-06, "loss": 0.0403, "step": 73150 }, { "epoch": 0.5919572780969334, "grad_norm": 0.2185867875814438, "learning_rate": 8.874958065241318e-06, "loss": 0.0242, "step": 73160 }, { "epoch": 0.5920381907921353, "grad_norm": 0.41925740242004395, "learning_rate": 8.874511793746445e-06, "loss": 0.0188, "step": 73170 }, { "epoch": 0.5921191034873372, "grad_norm": 0.445778489112854, "learning_rate": 8.874065444982614e-06, "loss": 0.0392, "step": 73180 }, { "epoch": 0.592200016182539, "grad_norm": 0.3219965100288391, "learning_rate": 8.873619018958726e-06, "loss": 0.0323, "step": 73190 }, { "epoch": 0.5922809288777409, "grad_norm": 0.4910028576850891, "learning_rate": 8.873172515683684e-06, "loss": 0.0333, "step": 73200 }, { "epoch": 0.5923618415729428, "grad_norm": 0.5174260139465332, "learning_rate": 8.872725935166392e-06, "loss": 0.0307, "step": 73210 }, { "epoch": 0.5924427542681446, "grad_norm": 0.2879965305328369, "learning_rate": 8.87227927741576e-06, "loss": 0.0215, "step": 73220 }, { "epoch": 0.5925236669633466, "grad_norm": 0.9573395252227783, "learning_rate": 8.871832542440688e-06, "loss": 0.0299, "step": 73230 }, { "epoch": 0.5926045796585484, "grad_norm": 0.48674848675727844, "learning_rate": 8.871385730250092e-06, "loss": 0.0304, "step": 73240 }, { "epoch": 0.5926854923537503, "grad_norm": 0.5345462560653687, "learning_rate": 8.87093884085288e-06, "loss": 0.0424, "step": 73250 }, { "epoch": 0.5927664050489522, "grad_norm": 0.4873715937137604, "learning_rate": 8.870491874257966e-06, "loss": 0.0289, "step": 73260 }, { "epoch": 0.5928473177441541, "grad_norm": 0.23491615056991577, "learning_rate": 8.870044830474263e-06, "loss": 0.0279, "step": 73270 }, { "epoch": 0.5929282304393559, "grad_norm": 0.6786132454872131, "learning_rate": 8.869597709510684e-06, "loss": 0.0358, "step": 73280 }, { "epoch": 0.5930091431345578, "grad_norm": 0.6412105560302734, "learning_rate": 8.86915051137615e-06, "loss": 0.0331, "step": 73290 }, { "epoch": 0.5930900558297597, "grad_norm": 0.8844486474990845, "learning_rate": 8.868703236079575e-06, "loss": 0.0165, "step": 73300 }, { "epoch": 0.5931709685249615, "grad_norm": 0.6452272534370422, "learning_rate": 8.868255883629881e-06, "loss": 0.0266, "step": 73310 }, { "epoch": 0.5932518812201635, "grad_norm": 0.4474169611930847, "learning_rate": 8.867808454035991e-06, "loss": 0.0383, "step": 73320 }, { "epoch": 0.5933327939153653, "grad_norm": 0.3488408029079437, "learning_rate": 8.867360947306827e-06, "loss": 0.0378, "step": 73330 }, { "epoch": 0.5934137066105672, "grad_norm": 0.24479980766773224, "learning_rate": 8.866913363451313e-06, "loss": 0.0201, "step": 73340 }, { "epoch": 0.5934946193057691, "grad_norm": 0.6742529273033142, "learning_rate": 8.866465702478375e-06, "loss": 0.0332, "step": 73350 }, { "epoch": 0.5935755320009709, "grad_norm": 0.470599889755249, "learning_rate": 8.86601796439694e-06, "loss": 0.0507, "step": 73360 }, { "epoch": 0.5936564446961728, "grad_norm": 0.4561566114425659, "learning_rate": 8.86557014921594e-06, "loss": 0.0426, "step": 73370 }, { "epoch": 0.5937373573913747, "grad_norm": 0.4666058421134949, "learning_rate": 8.865122256944303e-06, "loss": 0.0376, "step": 73380 }, { "epoch": 0.5938182700865766, "grad_norm": 0.4193333685398102, "learning_rate": 8.864674287590963e-06, "loss": 0.0255, "step": 73390 }, { "epoch": 0.5938991827817784, "grad_norm": 0.32404226064682007, "learning_rate": 8.86422624116485e-06, "loss": 0.026, "step": 73400 }, { "epoch": 0.5939800954769804, "grad_norm": 0.4629174768924713, "learning_rate": 8.863778117674906e-06, "loss": 0.0402, "step": 73410 }, { "epoch": 0.5940610081721822, "grad_norm": 0.42043229937553406, "learning_rate": 8.863329917130063e-06, "loss": 0.0252, "step": 73420 }, { "epoch": 0.594141920867384, "grad_norm": 0.4724835157394409, "learning_rate": 8.86288163953926e-06, "loss": 0.0309, "step": 73430 }, { "epoch": 0.594222833562586, "grad_norm": 0.5151911973953247, "learning_rate": 8.86243328491144e-06, "loss": 0.0369, "step": 73440 }, { "epoch": 0.5943037462577878, "grad_norm": 0.49226561188697815, "learning_rate": 8.86198485325554e-06, "loss": 0.0274, "step": 73450 }, { "epoch": 0.5943846589529898, "grad_norm": 0.5429776310920715, "learning_rate": 8.861536344580504e-06, "loss": 0.0334, "step": 73460 }, { "epoch": 0.5944655716481916, "grad_norm": 0.7401767373085022, "learning_rate": 8.86108775889528e-06, "loss": 0.0357, "step": 73470 }, { "epoch": 0.5945464843433935, "grad_norm": 0.5989571213722229, "learning_rate": 8.86063909620881e-06, "loss": 0.0271, "step": 73480 }, { "epoch": 0.5946273970385954, "grad_norm": 0.3578615188598633, "learning_rate": 8.860190356530045e-06, "loss": 0.0668, "step": 73490 }, { "epoch": 0.5947083097337972, "grad_norm": 0.2958149015903473, "learning_rate": 8.859741539867933e-06, "loss": 0.0526, "step": 73500 }, { "epoch": 0.5947892224289991, "grad_norm": 0.5709277987480164, "learning_rate": 8.859292646231422e-06, "loss": 0.0296, "step": 73510 }, { "epoch": 0.594870135124201, "grad_norm": 0.36340370774269104, "learning_rate": 8.858843675629469e-06, "loss": 0.042, "step": 73520 }, { "epoch": 0.5949510478194029, "grad_norm": 0.5121744275093079, "learning_rate": 8.858394628071024e-06, "loss": 0.0212, "step": 73530 }, { "epoch": 0.5950319605146047, "grad_norm": 1.1113288402557373, "learning_rate": 8.857945503565041e-06, "loss": 0.0351, "step": 73540 }, { "epoch": 0.5951128732098067, "grad_norm": 0.40510493516921997, "learning_rate": 8.857496302120483e-06, "loss": 0.0412, "step": 73550 }, { "epoch": 0.5951937859050085, "grad_norm": 0.5060807466506958, "learning_rate": 8.857047023746303e-06, "loss": 0.03, "step": 73560 }, { "epoch": 0.5952746986002104, "grad_norm": 0.6459881663322449, "learning_rate": 8.856597668451464e-06, "loss": 0.0283, "step": 73570 }, { "epoch": 0.5953556112954123, "grad_norm": 0.3399710953235626, "learning_rate": 8.856148236244925e-06, "loss": 0.0345, "step": 73580 }, { "epoch": 0.5954365239906141, "grad_norm": 0.33762943744659424, "learning_rate": 8.85569872713565e-06, "loss": 0.0417, "step": 73590 }, { "epoch": 0.595517436685816, "grad_norm": 0.592617928981781, "learning_rate": 8.855249141132606e-06, "loss": 0.0343, "step": 73600 }, { "epoch": 0.5955983493810179, "grad_norm": 0.5102863907814026, "learning_rate": 8.854799478244754e-06, "loss": 0.0326, "step": 73610 }, { "epoch": 0.5956792620762198, "grad_norm": 0.23607191443443298, "learning_rate": 8.854349738481065e-06, "loss": 0.0263, "step": 73620 }, { "epoch": 0.5957601747714216, "grad_norm": 0.40909698605537415, "learning_rate": 8.853899921850508e-06, "loss": 0.0303, "step": 73630 }, { "epoch": 0.5958410874666236, "grad_norm": 0.10390342026948929, "learning_rate": 8.853450028362052e-06, "loss": 0.0227, "step": 73640 }, { "epoch": 0.5959220001618254, "grad_norm": 0.8624221682548523, "learning_rate": 8.853000058024672e-06, "loss": 0.0454, "step": 73650 }, { "epoch": 0.5960029128570272, "grad_norm": 0.8482910394668579, "learning_rate": 8.852550010847338e-06, "loss": 0.0451, "step": 73660 }, { "epoch": 0.5960838255522292, "grad_norm": 0.3851168751716614, "learning_rate": 8.852099886839027e-06, "loss": 0.0454, "step": 73670 }, { "epoch": 0.596164738247431, "grad_norm": 0.4136519432067871, "learning_rate": 8.851649686008717e-06, "loss": 0.0368, "step": 73680 }, { "epoch": 0.5962456509426329, "grad_norm": 0.4493284225463867, "learning_rate": 8.851199408365385e-06, "loss": 0.036, "step": 73690 }, { "epoch": 0.5963265636378348, "grad_norm": 0.7613388299942017, "learning_rate": 8.850749053918012e-06, "loss": 0.0375, "step": 73700 }, { "epoch": 0.5964074763330367, "grad_norm": 0.8291508555412292, "learning_rate": 8.850298622675577e-06, "loss": 0.0189, "step": 73710 }, { "epoch": 0.5964883890282385, "grad_norm": 0.17813219130039215, "learning_rate": 8.849848114647064e-06, "loss": 0.0359, "step": 73720 }, { "epoch": 0.5965693017234404, "grad_norm": 0.5094019174575806, "learning_rate": 8.849397529841459e-06, "loss": 0.0342, "step": 73730 }, { "epoch": 0.5966502144186423, "grad_norm": 0.5831093788146973, "learning_rate": 8.848946868267745e-06, "loss": 0.0284, "step": 73740 }, { "epoch": 0.5967311271138441, "grad_norm": 0.32796338200569153, "learning_rate": 8.848496129934913e-06, "loss": 0.046, "step": 73750 }, { "epoch": 0.5968120398090461, "grad_norm": 0.45669665932655334, "learning_rate": 8.848045314851949e-06, "loss": 0.0183, "step": 73760 }, { "epoch": 0.5968929525042479, "grad_norm": 0.6727808713912964, "learning_rate": 8.847594423027846e-06, "loss": 0.0254, "step": 73770 }, { "epoch": 0.5969738651994498, "grad_norm": 0.30699577927589417, "learning_rate": 8.847143454471594e-06, "loss": 0.0249, "step": 73780 }, { "epoch": 0.5970547778946517, "grad_norm": 0.5946462750434875, "learning_rate": 8.846692409192189e-06, "loss": 0.0361, "step": 73790 }, { "epoch": 0.5971356905898535, "grad_norm": 0.5820539593696594, "learning_rate": 8.846241287198622e-06, "loss": 0.0284, "step": 73800 }, { "epoch": 0.5972166032850554, "grad_norm": 0.40741246938705444, "learning_rate": 8.845790088499893e-06, "loss": 0.0341, "step": 73810 }, { "epoch": 0.5972975159802573, "grad_norm": 0.7380988597869873, "learning_rate": 8.845338813105e-06, "loss": 0.0286, "step": 73820 }, { "epoch": 0.5973784286754592, "grad_norm": 0.791618287563324, "learning_rate": 8.844887461022942e-06, "loss": 0.0246, "step": 73830 }, { "epoch": 0.597459341370661, "grad_norm": 0.4515383243560791, "learning_rate": 8.84443603226272e-06, "loss": 0.0268, "step": 73840 }, { "epoch": 0.597540254065863, "grad_norm": 0.8062347173690796, "learning_rate": 8.84398452683334e-06, "loss": 0.0324, "step": 73850 }, { "epoch": 0.5976211667610648, "grad_norm": 0.34011900424957275, "learning_rate": 8.843532944743802e-06, "loss": 0.0221, "step": 73860 }, { "epoch": 0.5977020794562666, "grad_norm": 0.5586862564086914, "learning_rate": 8.843081286003112e-06, "loss": 0.0367, "step": 73870 }, { "epoch": 0.5977829921514686, "grad_norm": 0.30432212352752686, "learning_rate": 8.84262955062028e-06, "loss": 0.032, "step": 73880 }, { "epoch": 0.5978639048466704, "grad_norm": 0.4743715226650238, "learning_rate": 8.842177738604314e-06, "loss": 0.021, "step": 73890 }, { "epoch": 0.5979448175418723, "grad_norm": 0.5716708302497864, "learning_rate": 8.841725849964224e-06, "loss": 0.0365, "step": 73900 }, { "epoch": 0.5980257302370742, "grad_norm": 0.5418659448623657, "learning_rate": 8.841273884709023e-06, "loss": 0.027, "step": 73910 }, { "epoch": 0.5981066429322761, "grad_norm": 0.4061063230037689, "learning_rate": 8.840821842847723e-06, "loss": 0.0323, "step": 73920 }, { "epoch": 0.5981875556274779, "grad_norm": 0.6848796606063843, "learning_rate": 8.840369724389339e-06, "loss": 0.0303, "step": 73930 }, { "epoch": 0.5982684683226799, "grad_norm": 0.3712800145149231, "learning_rate": 8.839917529342888e-06, "loss": 0.0133, "step": 73940 }, { "epoch": 0.5983493810178817, "grad_norm": 0.22902093827724457, "learning_rate": 8.839465257717389e-06, "loss": 0.0264, "step": 73950 }, { "epoch": 0.5984302937130835, "grad_norm": 0.2975393533706665, "learning_rate": 8.83901290952186e-06, "loss": 0.0288, "step": 73960 }, { "epoch": 0.5985112064082855, "grad_norm": 0.35260146856307983, "learning_rate": 8.838560484765325e-06, "loss": 0.0286, "step": 73970 }, { "epoch": 0.5985921191034873, "grad_norm": 0.6659121513366699, "learning_rate": 8.838107983456802e-06, "loss": 0.0383, "step": 73980 }, { "epoch": 0.5986730317986892, "grad_norm": 0.42810654640197754, "learning_rate": 8.83765540560532e-06, "loss": 0.0384, "step": 73990 }, { "epoch": 0.5987539444938911, "grad_norm": 0.2788624167442322, "learning_rate": 8.837202751219904e-06, "loss": 0.0396, "step": 74000 }, { "epoch": 0.598834857189093, "grad_norm": 0.8469810485839844, "learning_rate": 8.836750020309576e-06, "loss": 0.0327, "step": 74010 }, { "epoch": 0.5989157698842948, "grad_norm": 0.5922976732254028, "learning_rate": 8.836297212883372e-06, "loss": 0.0518, "step": 74020 }, { "epoch": 0.5989966825794967, "grad_norm": 0.38793569803237915, "learning_rate": 8.835844328950317e-06, "loss": 0.0344, "step": 74030 }, { "epoch": 0.5990775952746986, "grad_norm": 0.5560140013694763, "learning_rate": 8.835391368519447e-06, "loss": 0.0241, "step": 74040 }, { "epoch": 0.5991585079699004, "grad_norm": 0.4312377870082855, "learning_rate": 8.83493833159979e-06, "loss": 0.0306, "step": 74050 }, { "epoch": 0.5992394206651024, "grad_norm": 0.657893717288971, "learning_rate": 8.834485218200385e-06, "loss": 0.0262, "step": 74060 }, { "epoch": 0.5993203333603042, "grad_norm": 0.6608888506889343, "learning_rate": 8.834032028330267e-06, "loss": 0.0414, "step": 74070 }, { "epoch": 0.5994012460555062, "grad_norm": 0.1301286369562149, "learning_rate": 8.833578761998474e-06, "loss": 0.0302, "step": 74080 }, { "epoch": 0.599482158750708, "grad_norm": 0.21895797550678253, "learning_rate": 8.833125419214046e-06, "loss": 0.0357, "step": 74090 }, { "epoch": 0.5995630714459098, "grad_norm": 0.2571084201335907, "learning_rate": 8.832671999986025e-06, "loss": 0.0228, "step": 74100 }, { "epoch": 0.5996439841411118, "grad_norm": 0.687070369720459, "learning_rate": 8.83221850432345e-06, "loss": 0.041, "step": 74110 }, { "epoch": 0.5997248968363136, "grad_norm": 0.5872506499290466, "learning_rate": 8.831764932235367e-06, "loss": 0.0304, "step": 74120 }, { "epoch": 0.5998058095315155, "grad_norm": 0.4111884832382202, "learning_rate": 8.831311283730824e-06, "loss": 0.0305, "step": 74130 }, { "epoch": 0.5998867222267174, "grad_norm": 0.12806543707847595, "learning_rate": 8.830857558818863e-06, "loss": 0.0196, "step": 74140 }, { "epoch": 0.5999676349219193, "grad_norm": 0.6912241578102112, "learning_rate": 8.830403757508537e-06, "loss": 0.0348, "step": 74150 }, { "epoch": 0.6000485476171211, "grad_norm": 0.3583749532699585, "learning_rate": 8.829949879808895e-06, "loss": 0.0326, "step": 74160 }, { "epoch": 0.600129460312323, "grad_norm": 0.11253246665000916, "learning_rate": 8.829495925728986e-06, "loss": 0.0309, "step": 74170 }, { "epoch": 0.6002103730075249, "grad_norm": 0.4544405937194824, "learning_rate": 8.829041895277865e-06, "loss": 0.0288, "step": 74180 }, { "epoch": 0.6002912857027267, "grad_norm": 0.45179957151412964, "learning_rate": 8.828587788464588e-06, "loss": 0.047, "step": 74190 }, { "epoch": 0.6003721983979287, "grad_norm": 0.3839946389198303, "learning_rate": 8.82813360529821e-06, "loss": 0.0258, "step": 74200 }, { "epoch": 0.6004531110931305, "grad_norm": 0.3080393970012665, "learning_rate": 8.82767934578779e-06, "loss": 0.0244, "step": 74210 }, { "epoch": 0.6005340237883324, "grad_norm": 0.7415814995765686, "learning_rate": 8.827225009942385e-06, "loss": 0.0345, "step": 74220 }, { "epoch": 0.6006149364835343, "grad_norm": 0.6786739230155945, "learning_rate": 8.826770597771057e-06, "loss": 0.0464, "step": 74230 }, { "epoch": 0.6006958491787362, "grad_norm": 0.8622195720672607, "learning_rate": 8.826316109282867e-06, "loss": 0.0308, "step": 74240 }, { "epoch": 0.600776761873938, "grad_norm": 1.0916446447372437, "learning_rate": 8.825861544486881e-06, "loss": 0.0355, "step": 74250 }, { "epoch": 0.6008576745691399, "grad_norm": 0.47840040922164917, "learning_rate": 8.825406903392165e-06, "loss": 0.0427, "step": 74260 }, { "epoch": 0.6009385872643418, "grad_norm": 0.363277405500412, "learning_rate": 8.824952186007781e-06, "loss": 0.0218, "step": 74270 }, { "epoch": 0.6010194999595436, "grad_norm": 0.42790353298187256, "learning_rate": 8.824497392342802e-06, "loss": 0.0293, "step": 74280 }, { "epoch": 0.6011004126547456, "grad_norm": 0.4343988001346588, "learning_rate": 8.824042522406295e-06, "loss": 0.0223, "step": 74290 }, { "epoch": 0.6011813253499474, "grad_norm": 0.7527517676353455, "learning_rate": 8.823587576207333e-06, "loss": 0.0487, "step": 74300 }, { "epoch": 0.6012622380451493, "grad_norm": 0.6684747338294983, "learning_rate": 8.82313255375499e-06, "loss": 0.0347, "step": 74310 }, { "epoch": 0.6013431507403512, "grad_norm": 0.46118295192718506, "learning_rate": 8.822677455058339e-06, "loss": 0.0306, "step": 74320 }, { "epoch": 0.601424063435553, "grad_norm": 0.678159773349762, "learning_rate": 8.822222280126456e-06, "loss": 0.0544, "step": 74330 }, { "epoch": 0.6015049761307549, "grad_norm": 0.29016149044036865, "learning_rate": 8.821767028968417e-06, "loss": 0.0271, "step": 74340 }, { "epoch": 0.6015858888259568, "grad_norm": 0.507445752620697, "learning_rate": 8.821311701593306e-06, "loss": 0.026, "step": 74350 }, { "epoch": 0.6016668015211587, "grad_norm": 0.23359118402004242, "learning_rate": 8.8208562980102e-06, "loss": 0.03, "step": 74360 }, { "epoch": 0.6017477142163605, "grad_norm": 0.47185230255126953, "learning_rate": 8.820400818228178e-06, "loss": 0.0222, "step": 74370 }, { "epoch": 0.6018286269115625, "grad_norm": 0.25311705470085144, "learning_rate": 8.819945262256328e-06, "loss": 0.0289, "step": 74380 }, { "epoch": 0.6019095396067643, "grad_norm": 0.7777245044708252, "learning_rate": 8.819489630103734e-06, "loss": 0.0357, "step": 74390 }, { "epoch": 0.6019904523019661, "grad_norm": 0.31496307253837585, "learning_rate": 8.819033921779485e-06, "loss": 0.0373, "step": 74400 }, { "epoch": 0.6020713649971681, "grad_norm": 0.7128584980964661, "learning_rate": 8.818578137292665e-06, "loss": 0.0416, "step": 74410 }, { "epoch": 0.6021522776923699, "grad_norm": 0.3202883303165436, "learning_rate": 8.818122276652365e-06, "loss": 0.0236, "step": 74420 }, { "epoch": 0.6022331903875718, "grad_norm": 0.8666483163833618, "learning_rate": 8.817666339867678e-06, "loss": 0.0417, "step": 74430 }, { "epoch": 0.6023141030827737, "grad_norm": 0.5236549973487854, "learning_rate": 8.817210326947694e-06, "loss": 0.0289, "step": 74440 }, { "epoch": 0.6023950157779756, "grad_norm": 0.38739171624183655, "learning_rate": 8.81675423790151e-06, "loss": 0.0412, "step": 74450 }, { "epoch": 0.6024759284731774, "grad_norm": 0.34817853569984436, "learning_rate": 8.81629807273822e-06, "loss": 0.0291, "step": 74460 }, { "epoch": 0.6025568411683793, "grad_norm": 0.3858487010002136, "learning_rate": 8.81584183146692e-06, "loss": 0.0346, "step": 74470 }, { "epoch": 0.6026377538635812, "grad_norm": 0.6494426131248474, "learning_rate": 8.815385514096711e-06, "loss": 0.0325, "step": 74480 }, { "epoch": 0.602718666558783, "grad_norm": 0.5584120750427246, "learning_rate": 8.814929120636692e-06, "loss": 0.0282, "step": 74490 }, { "epoch": 0.602799579253985, "grad_norm": 0.43784135580062866, "learning_rate": 8.814472651095966e-06, "loss": 0.0321, "step": 74500 }, { "epoch": 0.6028804919491868, "grad_norm": 0.7431588768959045, "learning_rate": 8.814016105483636e-06, "loss": 0.0293, "step": 74510 }, { "epoch": 0.6029614046443887, "grad_norm": 0.36748823523521423, "learning_rate": 8.813559483808807e-06, "loss": 0.0289, "step": 74520 }, { "epoch": 0.6030423173395906, "grad_norm": 0.5744738578796387, "learning_rate": 8.813102786080583e-06, "loss": 0.0362, "step": 74530 }, { "epoch": 0.6031232300347925, "grad_norm": 1.0579307079315186, "learning_rate": 8.812646012308075e-06, "loss": 0.0369, "step": 74540 }, { "epoch": 0.6032041427299943, "grad_norm": 0.3172593116760254, "learning_rate": 8.812189162500391e-06, "loss": 0.0356, "step": 74550 }, { "epoch": 0.6032850554251962, "grad_norm": 0.17711159586906433, "learning_rate": 8.811732236666642e-06, "loss": 0.0469, "step": 74560 }, { "epoch": 0.6033659681203981, "grad_norm": 0.39067888259887695, "learning_rate": 8.81127523481594e-06, "loss": 0.0322, "step": 74570 }, { "epoch": 0.6034468808155999, "grad_norm": 0.5330362319946289, "learning_rate": 8.810818156957399e-06, "loss": 0.0313, "step": 74580 }, { "epoch": 0.6035277935108019, "grad_norm": 0.4499585032463074, "learning_rate": 8.810361003100137e-06, "loss": 0.0341, "step": 74590 }, { "epoch": 0.6036087062060037, "grad_norm": 0.9147841930389404, "learning_rate": 8.809903773253267e-06, "loss": 0.0414, "step": 74600 }, { "epoch": 0.6036896189012056, "grad_norm": 0.21455085277557373, "learning_rate": 8.809446467425911e-06, "loss": 0.0253, "step": 74610 }, { "epoch": 0.6037705315964075, "grad_norm": 0.28066152334213257, "learning_rate": 8.808989085627186e-06, "loss": 0.0384, "step": 74620 }, { "epoch": 0.6038514442916093, "grad_norm": 0.6869887709617615, "learning_rate": 8.808531627866215e-06, "loss": 0.0248, "step": 74630 }, { "epoch": 0.6039323569868112, "grad_norm": 0.6907151341438293, "learning_rate": 8.808074094152118e-06, "loss": 0.0552, "step": 74640 }, { "epoch": 0.6040132696820131, "grad_norm": 0.2425779104232788, "learning_rate": 8.807616484494027e-06, "loss": 0.0409, "step": 74650 }, { "epoch": 0.604094182377215, "grad_norm": 0.2653990685939789, "learning_rate": 8.807158798901062e-06, "loss": 0.0381, "step": 74660 }, { "epoch": 0.6041750950724168, "grad_norm": 0.5169953107833862, "learning_rate": 8.806701037382352e-06, "loss": 0.0339, "step": 74670 }, { "epoch": 0.6042560077676188, "grad_norm": 0.5605782270431519, "learning_rate": 8.806243199947025e-06, "loss": 0.038, "step": 74680 }, { "epoch": 0.6043369204628206, "grad_norm": 0.5584602355957031, "learning_rate": 8.805785286604213e-06, "loss": 0.0338, "step": 74690 }, { "epoch": 0.6044178331580224, "grad_norm": 1.5251712799072266, "learning_rate": 8.805327297363048e-06, "loss": 0.0389, "step": 74700 }, { "epoch": 0.6044987458532244, "grad_norm": 0.5030268430709839, "learning_rate": 8.804869232232664e-06, "loss": 0.035, "step": 74710 }, { "epoch": 0.6045796585484262, "grad_norm": 0.5929257273674011, "learning_rate": 8.804411091222196e-06, "loss": 0.0313, "step": 74720 }, { "epoch": 0.6046605712436282, "grad_norm": 0.8774378895759583, "learning_rate": 8.80395287434078e-06, "loss": 0.0358, "step": 74730 }, { "epoch": 0.60474148393883, "grad_norm": 0.7724900841712952, "learning_rate": 8.803494581597553e-06, "loss": 0.0392, "step": 74740 }, { "epoch": 0.6048223966340319, "grad_norm": 0.7365142703056335, "learning_rate": 8.80303621300166e-06, "loss": 0.0315, "step": 74750 }, { "epoch": 0.6049033093292338, "grad_norm": 0.530730128288269, "learning_rate": 8.802577768562234e-06, "loss": 0.0444, "step": 74760 }, { "epoch": 0.6049842220244356, "grad_norm": 0.2405347228050232, "learning_rate": 8.802119248288424e-06, "loss": 0.0502, "step": 74770 }, { "epoch": 0.6050651347196375, "grad_norm": 0.5193366408348083, "learning_rate": 8.80166065218937e-06, "loss": 0.0262, "step": 74780 }, { "epoch": 0.6051460474148393, "grad_norm": 0.8418189883232117, "learning_rate": 8.801201980274222e-06, "loss": 0.0382, "step": 74790 }, { "epoch": 0.6052269601100413, "grad_norm": 0.545591413974762, "learning_rate": 8.800743232552124e-06, "loss": 0.0274, "step": 74800 }, { "epoch": 0.6053078728052431, "grad_norm": 0.602297842502594, "learning_rate": 8.800284409032228e-06, "loss": 0.0282, "step": 74810 }, { "epoch": 0.6053887855004451, "grad_norm": 0.5197008848190308, "learning_rate": 8.79982550972368e-06, "loss": 0.0244, "step": 74820 }, { "epoch": 0.6054696981956469, "grad_norm": 0.27995026111602783, "learning_rate": 8.799366534635635e-06, "loss": 0.0207, "step": 74830 }, { "epoch": 0.6055506108908488, "grad_norm": 0.6482939720153809, "learning_rate": 8.798907483777246e-06, "loss": 0.0346, "step": 74840 }, { "epoch": 0.6056315235860507, "grad_norm": 0.5231324434280396, "learning_rate": 8.798448357157668e-06, "loss": 0.0318, "step": 74850 }, { "epoch": 0.6057124362812525, "grad_norm": 0.20724472403526306, "learning_rate": 8.797989154786053e-06, "loss": 0.0379, "step": 74860 }, { "epoch": 0.6057933489764544, "grad_norm": 0.15336297452449799, "learning_rate": 8.797529876671565e-06, "loss": 0.0311, "step": 74870 }, { "epoch": 0.6058742616716563, "grad_norm": 0.2556571364402771, "learning_rate": 8.797070522823359e-06, "loss": 0.0349, "step": 74880 }, { "epoch": 0.6059551743668582, "grad_norm": 0.6336976289749146, "learning_rate": 8.796611093250597e-06, "loss": 0.031, "step": 74890 }, { "epoch": 0.60603608706206, "grad_norm": 0.48521873354911804, "learning_rate": 8.796151587962443e-06, "loss": 0.0319, "step": 74900 }, { "epoch": 0.606116999757262, "grad_norm": 0.3256385326385498, "learning_rate": 8.79569200696806e-06, "loss": 0.0266, "step": 74910 }, { "epoch": 0.6061979124524638, "grad_norm": 0.37762007117271423, "learning_rate": 8.795232350276612e-06, "loss": 0.0471, "step": 74920 }, { "epoch": 0.6062788251476656, "grad_norm": 0.374946266412735, "learning_rate": 8.794772617897266e-06, "loss": 0.0295, "step": 74930 }, { "epoch": 0.6063597378428676, "grad_norm": 0.5327500700950623, "learning_rate": 8.794312809839195e-06, "loss": 0.0396, "step": 74940 }, { "epoch": 0.6064406505380694, "grad_norm": 0.3903409242630005, "learning_rate": 8.793852926111561e-06, "loss": 0.0324, "step": 74950 }, { "epoch": 0.6065215632332713, "grad_norm": 0.5466518402099609, "learning_rate": 8.793392966723542e-06, "loss": 0.0423, "step": 74960 }, { "epoch": 0.6066024759284732, "grad_norm": 0.15973278880119324, "learning_rate": 8.792932931684306e-06, "loss": 0.0268, "step": 74970 }, { "epoch": 0.6066833886236751, "grad_norm": 0.21676543354988098, "learning_rate": 8.792472821003031e-06, "loss": 0.0419, "step": 74980 }, { "epoch": 0.6067643013188769, "grad_norm": 0.5242107510566711, "learning_rate": 8.792012634688893e-06, "loss": 0.0578, "step": 74990 }, { "epoch": 0.6068452140140788, "grad_norm": 0.6160109639167786, "learning_rate": 8.79155237275107e-06, "loss": 0.0477, "step": 75000 }, { "epoch": 0.6069261267092807, "grad_norm": 0.5854552984237671, "learning_rate": 8.791092035198735e-06, "loss": 0.0317, "step": 75010 }, { "epoch": 0.6070070394044825, "grad_norm": 0.39326944947242737, "learning_rate": 8.790631622041074e-06, "loss": 0.0458, "step": 75020 }, { "epoch": 0.6070879520996845, "grad_norm": 0.6456433534622192, "learning_rate": 8.790171133287268e-06, "loss": 0.0208, "step": 75030 }, { "epoch": 0.6071688647948863, "grad_norm": 0.4467392563819885, "learning_rate": 8.789710568946502e-06, "loss": 0.0351, "step": 75040 }, { "epoch": 0.6072497774900882, "grad_norm": 0.15510378777980804, "learning_rate": 8.789249929027957e-06, "loss": 0.0279, "step": 75050 }, { "epoch": 0.6073306901852901, "grad_norm": 0.09087786078453064, "learning_rate": 8.788789213540822e-06, "loss": 0.0219, "step": 75060 }, { "epoch": 0.6074116028804919, "grad_norm": 0.5000585317611694, "learning_rate": 8.788328422494286e-06, "loss": 0.0376, "step": 75070 }, { "epoch": 0.6074925155756938, "grad_norm": 0.4057238698005676, "learning_rate": 8.787867555897534e-06, "loss": 0.023, "step": 75080 }, { "epoch": 0.6075734282708957, "grad_norm": 0.42339298129081726, "learning_rate": 8.787406613759763e-06, "loss": 0.0312, "step": 75090 }, { "epoch": 0.6076543409660976, "grad_norm": 0.41320133209228516, "learning_rate": 8.786945596090162e-06, "loss": 0.0295, "step": 75100 }, { "epoch": 0.6077352536612994, "grad_norm": 0.5559495091438293, "learning_rate": 8.786484502897926e-06, "loss": 0.0381, "step": 75110 }, { "epoch": 0.6078161663565014, "grad_norm": 0.6238070726394653, "learning_rate": 8.786023334192248e-06, "loss": 0.043, "step": 75120 }, { "epoch": 0.6078970790517032, "grad_norm": 0.7341499924659729, "learning_rate": 8.78556208998233e-06, "loss": 0.0257, "step": 75130 }, { "epoch": 0.6079779917469051, "grad_norm": 0.4885410666465759, "learning_rate": 8.785100770277366e-06, "loss": 0.0312, "step": 75140 }, { "epoch": 0.608058904442107, "grad_norm": 0.4920055866241455, "learning_rate": 8.784639375086559e-06, "loss": 0.0323, "step": 75150 }, { "epoch": 0.6081398171373088, "grad_norm": 0.42501404881477356, "learning_rate": 8.78417790441911e-06, "loss": 0.0306, "step": 75160 }, { "epoch": 0.6082207298325107, "grad_norm": 0.688848078250885, "learning_rate": 8.78371635828422e-06, "loss": 0.0239, "step": 75170 }, { "epoch": 0.6083016425277126, "grad_norm": 0.357143759727478, "learning_rate": 8.783254736691095e-06, "loss": 0.0304, "step": 75180 }, { "epoch": 0.6083825552229145, "grad_norm": 0.38276875019073486, "learning_rate": 8.78279303964894e-06, "loss": 0.0398, "step": 75190 }, { "epoch": 0.6084634679181163, "grad_norm": 0.29482510685920715, "learning_rate": 8.782331267166965e-06, "loss": 0.0291, "step": 75200 }, { "epoch": 0.6085443806133183, "grad_norm": 0.3227452039718628, "learning_rate": 8.781869419254378e-06, "loss": 0.0477, "step": 75210 }, { "epoch": 0.6086252933085201, "grad_norm": 0.3164414167404175, "learning_rate": 8.78140749592039e-06, "loss": 0.0274, "step": 75220 }, { "epoch": 0.6087062060037219, "grad_norm": 0.4791095554828644, "learning_rate": 8.780945497174212e-06, "loss": 0.0257, "step": 75230 }, { "epoch": 0.6087871186989239, "grad_norm": 0.447712779045105, "learning_rate": 8.780483423025058e-06, "loss": 0.0335, "step": 75240 }, { "epoch": 0.6088680313941257, "grad_norm": 0.29428690671920776, "learning_rate": 8.780021273482142e-06, "loss": 0.0288, "step": 75250 }, { "epoch": 0.6089489440893276, "grad_norm": 0.30601224303245544, "learning_rate": 8.779559048554682e-06, "loss": 0.0414, "step": 75260 }, { "epoch": 0.6090298567845295, "grad_norm": 0.4093102514743805, "learning_rate": 8.779096748251897e-06, "loss": 0.0305, "step": 75270 }, { "epoch": 0.6091107694797314, "grad_norm": 0.622642457485199, "learning_rate": 8.778634372583004e-06, "loss": 0.0327, "step": 75280 }, { "epoch": 0.6091916821749332, "grad_norm": 0.752469003200531, "learning_rate": 8.778171921557226e-06, "loss": 0.034, "step": 75290 }, { "epoch": 0.6092725948701351, "grad_norm": 0.3581831753253937, "learning_rate": 8.777709395183784e-06, "loss": 0.0386, "step": 75300 }, { "epoch": 0.609353507565337, "grad_norm": 0.8589357137680054, "learning_rate": 8.777246793471905e-06, "loss": 0.025, "step": 75310 }, { "epoch": 0.6094344202605388, "grad_norm": 0.5793095827102661, "learning_rate": 8.776784116430813e-06, "loss": 0.0418, "step": 75320 }, { "epoch": 0.6095153329557408, "grad_norm": 0.43616747856140137, "learning_rate": 8.776321364069733e-06, "loss": 0.034, "step": 75330 }, { "epoch": 0.6095962456509426, "grad_norm": 0.3863573670387268, "learning_rate": 8.775858536397897e-06, "loss": 0.0358, "step": 75340 }, { "epoch": 0.6096771583461446, "grad_norm": 0.3770347535610199, "learning_rate": 8.775395633424533e-06, "loss": 0.0303, "step": 75350 }, { "epoch": 0.6097580710413464, "grad_norm": 0.5348185300827026, "learning_rate": 8.774932655158874e-06, "loss": 0.0274, "step": 75360 }, { "epoch": 0.6098389837365482, "grad_norm": 0.3995336592197418, "learning_rate": 8.774469601610152e-06, "loss": 0.0444, "step": 75370 }, { "epoch": 0.6099198964317502, "grad_norm": 0.2367500513792038, "learning_rate": 8.774006472787602e-06, "loss": 0.0243, "step": 75380 }, { "epoch": 0.610000809126952, "grad_norm": 0.3909887671470642, "learning_rate": 8.77354326870046e-06, "loss": 0.0241, "step": 75390 }, { "epoch": 0.6100817218221539, "grad_norm": 0.4770659804344177, "learning_rate": 8.773079989357964e-06, "loss": 0.0375, "step": 75400 }, { "epoch": 0.6101626345173558, "grad_norm": 0.7031015157699585, "learning_rate": 8.772616634769354e-06, "loss": 0.0368, "step": 75410 }, { "epoch": 0.6102435472125577, "grad_norm": 0.5442114472389221, "learning_rate": 8.772153204943867e-06, "loss": 0.0273, "step": 75420 }, { "epoch": 0.6103244599077595, "grad_norm": 0.5247424840927124, "learning_rate": 8.77168969989075e-06, "loss": 0.042, "step": 75430 }, { "epoch": 0.6104053726029613, "grad_norm": 0.19532005488872528, "learning_rate": 8.771226119619242e-06, "loss": 0.0213, "step": 75440 }, { "epoch": 0.6104862852981633, "grad_norm": 0.32002294063568115, "learning_rate": 8.770762464138593e-06, "loss": 0.0283, "step": 75450 }, { "epoch": 0.6105671979933651, "grad_norm": 0.6328136324882507, "learning_rate": 8.770298733458048e-06, "loss": 0.0301, "step": 75460 }, { "epoch": 0.6106481106885671, "grad_norm": 0.5279607176780701, "learning_rate": 8.769834927586851e-06, "loss": 0.0366, "step": 75470 }, { "epoch": 0.6107290233837689, "grad_norm": 0.08370838314294815, "learning_rate": 8.769371046534256e-06, "loss": 0.0199, "step": 75480 }, { "epoch": 0.6108099360789708, "grad_norm": 0.7208768725395203, "learning_rate": 8.768907090309514e-06, "loss": 0.0288, "step": 75490 }, { "epoch": 0.6108908487741727, "grad_norm": 0.426213800907135, "learning_rate": 8.768443058921875e-06, "loss": 0.0344, "step": 75500 }, { "epoch": 0.6109717614693746, "grad_norm": 0.36663371324539185, "learning_rate": 8.767978952380596e-06, "loss": 0.0185, "step": 75510 }, { "epoch": 0.6110526741645764, "grad_norm": 0.34377771615982056, "learning_rate": 8.76751477069493e-06, "loss": 0.0205, "step": 75520 }, { "epoch": 0.6111335868597783, "grad_norm": 0.36181509494781494, "learning_rate": 8.767050513874136e-06, "loss": 0.0299, "step": 75530 }, { "epoch": 0.6112144995549802, "grad_norm": 0.436089426279068, "learning_rate": 8.766586181927473e-06, "loss": 0.0332, "step": 75540 }, { "epoch": 0.611295412250182, "grad_norm": 0.5748477578163147, "learning_rate": 8.7661217748642e-06, "loss": 0.0341, "step": 75550 }, { "epoch": 0.611376324945384, "grad_norm": 0.4321015179157257, "learning_rate": 8.765657292693579e-06, "loss": 0.0368, "step": 75560 }, { "epoch": 0.6114572376405858, "grad_norm": 0.5301212072372437, "learning_rate": 8.765192735424873e-06, "loss": 0.0282, "step": 75570 }, { "epoch": 0.6115381503357877, "grad_norm": 0.36788713932037354, "learning_rate": 8.764728103067344e-06, "loss": 0.0249, "step": 75580 }, { "epoch": 0.6116190630309896, "grad_norm": 0.6584792733192444, "learning_rate": 8.764263395630264e-06, "loss": 0.0424, "step": 75590 }, { "epoch": 0.6116999757261914, "grad_norm": 0.33564499020576477, "learning_rate": 8.763798613122897e-06, "loss": 0.0269, "step": 75600 }, { "epoch": 0.6117808884213933, "grad_norm": 0.5881227254867554, "learning_rate": 8.76333375555451e-06, "loss": 0.0381, "step": 75610 }, { "epoch": 0.6118618011165952, "grad_norm": 0.32046863436698914, "learning_rate": 8.762868822934377e-06, "loss": 0.0276, "step": 75620 }, { "epoch": 0.6119427138117971, "grad_norm": 0.5812408924102783, "learning_rate": 8.762403815271769e-06, "loss": 0.0345, "step": 75630 }, { "epoch": 0.6120236265069989, "grad_norm": 0.7183589935302734, "learning_rate": 8.76193873257596e-06, "loss": 0.0415, "step": 75640 }, { "epoch": 0.6121045392022009, "grad_norm": 0.3624580204486847, "learning_rate": 8.761473574856225e-06, "loss": 0.0434, "step": 75650 }, { "epoch": 0.6121854518974027, "grad_norm": 0.21806074678897858, "learning_rate": 8.76100834212184e-06, "loss": 0.0326, "step": 75660 }, { "epoch": 0.6122663645926045, "grad_norm": 0.7018710970878601, "learning_rate": 8.760543034382085e-06, "loss": 0.0265, "step": 75670 }, { "epoch": 0.6123472772878065, "grad_norm": 0.4592311382293701, "learning_rate": 8.760077651646236e-06, "loss": 0.0369, "step": 75680 }, { "epoch": 0.6124281899830083, "grad_norm": 0.6921276450157166, "learning_rate": 8.759612193923576e-06, "loss": 0.0316, "step": 75690 }, { "epoch": 0.6125091026782102, "grad_norm": 0.2703380584716797, "learning_rate": 8.75914666122339e-06, "loss": 0.0291, "step": 75700 }, { "epoch": 0.6125900153734121, "grad_norm": 0.2234855443239212, "learning_rate": 8.758681053554958e-06, "loss": 0.0208, "step": 75710 }, { "epoch": 0.612670928068614, "grad_norm": 0.543735682964325, "learning_rate": 8.758215370927567e-06, "loss": 0.0331, "step": 75720 }, { "epoch": 0.6127518407638158, "grad_norm": 0.32912692427635193, "learning_rate": 8.757749613350506e-06, "loss": 0.03, "step": 75730 }, { "epoch": 0.6128327534590177, "grad_norm": 0.2673557996749878, "learning_rate": 8.75728378083306e-06, "loss": 0.0469, "step": 75740 }, { "epoch": 0.6129136661542196, "grad_norm": 0.23539745807647705, "learning_rate": 8.756817873384522e-06, "loss": 0.0278, "step": 75750 }, { "epoch": 0.6129945788494214, "grad_norm": 0.3714584708213806, "learning_rate": 8.756351891014183e-06, "loss": 0.033, "step": 75760 }, { "epoch": 0.6130754915446234, "grad_norm": 0.35596755146980286, "learning_rate": 8.755885833731334e-06, "loss": 0.026, "step": 75770 }, { "epoch": 0.6131564042398252, "grad_norm": 0.4703212380409241, "learning_rate": 8.755419701545272e-06, "loss": 0.0277, "step": 75780 }, { "epoch": 0.6132373169350271, "grad_norm": 0.4510568082332611, "learning_rate": 8.754953494465293e-06, "loss": 0.0329, "step": 75790 }, { "epoch": 0.613318229630229, "grad_norm": 0.5335086584091187, "learning_rate": 8.754487212500693e-06, "loss": 0.0269, "step": 75800 }, { "epoch": 0.6133991423254309, "grad_norm": 0.26418784260749817, "learning_rate": 8.754020855660772e-06, "loss": 0.03, "step": 75810 }, { "epoch": 0.6134800550206327, "grad_norm": 0.35677850246429443, "learning_rate": 8.753554423954828e-06, "loss": 0.0322, "step": 75820 }, { "epoch": 0.6135609677158346, "grad_norm": 0.384400874376297, "learning_rate": 8.753087917392166e-06, "loss": 0.0425, "step": 75830 }, { "epoch": 0.6136418804110365, "grad_norm": 0.5081902146339417, "learning_rate": 8.75262133598209e-06, "loss": 0.0553, "step": 75840 }, { "epoch": 0.6137227931062383, "grad_norm": 0.6520413160324097, "learning_rate": 8.752154679733903e-06, "loss": 0.0263, "step": 75850 }, { "epoch": 0.6138037058014403, "grad_norm": 0.27164021134376526, "learning_rate": 8.751687948656912e-06, "loss": 0.0418, "step": 75860 }, { "epoch": 0.6138846184966421, "grad_norm": 0.5200039744377136, "learning_rate": 8.751221142760425e-06, "loss": 0.0377, "step": 75870 }, { "epoch": 0.613965531191844, "grad_norm": 0.4552915096282959, "learning_rate": 8.75075426205375e-06, "loss": 0.0223, "step": 75880 }, { "epoch": 0.6140464438870459, "grad_norm": 0.5366277098655701, "learning_rate": 8.750287306546202e-06, "loss": 0.0388, "step": 75890 }, { "epoch": 0.6141273565822477, "grad_norm": 0.6201713681221008, "learning_rate": 8.74982027624709e-06, "loss": 0.0368, "step": 75900 }, { "epoch": 0.6142082692774496, "grad_norm": 0.5673887729644775, "learning_rate": 8.749353171165729e-06, "loss": 0.0385, "step": 75910 }, { "epoch": 0.6142891819726515, "grad_norm": 0.5836567878723145, "learning_rate": 8.748885991311433e-06, "loss": 0.0382, "step": 75920 }, { "epoch": 0.6143700946678534, "grad_norm": 0.19811660051345825, "learning_rate": 8.748418736693521e-06, "loss": 0.0196, "step": 75930 }, { "epoch": 0.6144510073630552, "grad_norm": 0.4052066504955292, "learning_rate": 8.74795140732131e-06, "loss": 0.0265, "step": 75940 }, { "epoch": 0.6145319200582572, "grad_norm": 0.37223193049430847, "learning_rate": 8.74748400320412e-06, "loss": 0.0408, "step": 75950 }, { "epoch": 0.614612832753459, "grad_norm": 0.2805771827697754, "learning_rate": 8.747016524351275e-06, "loss": 0.0192, "step": 75960 }, { "epoch": 0.6146937454486608, "grad_norm": 0.9560149908065796, "learning_rate": 8.746548970772094e-06, "loss": 0.0388, "step": 75970 }, { "epoch": 0.6147746581438628, "grad_norm": 0.519953191280365, "learning_rate": 8.746081342475904e-06, "loss": 0.0428, "step": 75980 }, { "epoch": 0.6148555708390646, "grad_norm": 0.7473917603492737, "learning_rate": 8.74561363947203e-06, "loss": 0.0311, "step": 75990 }, { "epoch": 0.6149364835342666, "grad_norm": 0.5126083493232727, "learning_rate": 8.745145861769798e-06, "loss": 0.0343, "step": 76000 }, { "epoch": 0.6150173962294684, "grad_norm": 0.632811427116394, "learning_rate": 8.74467800937854e-06, "loss": 0.0485, "step": 76010 }, { "epoch": 0.6150983089246703, "grad_norm": 0.28644418716430664, "learning_rate": 8.744210082307583e-06, "loss": 0.0324, "step": 76020 }, { "epoch": 0.6151792216198722, "grad_norm": 0.5669119358062744, "learning_rate": 8.74374208056626e-06, "loss": 0.0357, "step": 76030 }, { "epoch": 0.615260134315074, "grad_norm": 0.31039366126060486, "learning_rate": 8.743274004163906e-06, "loss": 0.0226, "step": 76040 }, { "epoch": 0.6153410470102759, "grad_norm": 0.5141971111297607, "learning_rate": 8.742805853109852e-06, "loss": 0.0348, "step": 76050 }, { "epoch": 0.6154219597054778, "grad_norm": 0.5079476237297058, "learning_rate": 8.74233762741344e-06, "loss": 0.0299, "step": 76060 }, { "epoch": 0.6155028724006797, "grad_norm": 0.43163639307022095, "learning_rate": 8.741869327084002e-06, "loss": 0.0261, "step": 76070 }, { "epoch": 0.6155837850958815, "grad_norm": 0.3295508921146393, "learning_rate": 8.74140095213088e-06, "loss": 0.0351, "step": 76080 }, { "epoch": 0.6156646977910835, "grad_norm": 0.5199300050735474, "learning_rate": 8.740932502563415e-06, "loss": 0.0462, "step": 76090 }, { "epoch": 0.6157456104862853, "grad_norm": 0.45663848519325256, "learning_rate": 8.74046397839095e-06, "loss": 0.0441, "step": 76100 }, { "epoch": 0.6158265231814872, "grad_norm": 0.3826812207698822, "learning_rate": 8.739995379622825e-06, "loss": 0.0255, "step": 76110 }, { "epoch": 0.6159074358766891, "grad_norm": 1.704313039779663, "learning_rate": 8.739526706268388e-06, "loss": 0.0317, "step": 76120 }, { "epoch": 0.6159883485718909, "grad_norm": 0.34599265456199646, "learning_rate": 8.739057958336988e-06, "loss": 0.0293, "step": 76130 }, { "epoch": 0.6160692612670928, "grad_norm": 0.42508381605148315, "learning_rate": 8.738589135837967e-06, "loss": 0.0309, "step": 76140 }, { "epoch": 0.6161501739622947, "grad_norm": 0.4049144685268402, "learning_rate": 8.73812023878068e-06, "loss": 0.0259, "step": 76150 }, { "epoch": 0.6162310866574966, "grad_norm": 0.44113999605178833, "learning_rate": 8.737651267174477e-06, "loss": 0.0438, "step": 76160 }, { "epoch": 0.6163119993526984, "grad_norm": 0.24477632343769073, "learning_rate": 8.73718222102871e-06, "loss": 0.0275, "step": 76170 }, { "epoch": 0.6163929120479004, "grad_norm": 0.6793733835220337, "learning_rate": 8.736713100352731e-06, "loss": 0.0293, "step": 76180 }, { "epoch": 0.6164738247431022, "grad_norm": 0.7216084003448486, "learning_rate": 8.7362439051559e-06, "loss": 0.0238, "step": 76190 }, { "epoch": 0.616554737438304, "grad_norm": 0.819011390209198, "learning_rate": 8.73577463544757e-06, "loss": 0.0554, "step": 76200 }, { "epoch": 0.616635650133506, "grad_norm": 0.2653445601463318, "learning_rate": 8.735305291237103e-06, "loss": 0.037, "step": 76210 }, { "epoch": 0.6167165628287078, "grad_norm": 0.4422503411769867, "learning_rate": 8.734835872533857e-06, "loss": 0.0305, "step": 76220 }, { "epoch": 0.6167974755239097, "grad_norm": 0.7093201279640198, "learning_rate": 8.734366379347193e-06, "loss": 0.0228, "step": 76230 }, { "epoch": 0.6168783882191116, "grad_norm": 0.6821469664573669, "learning_rate": 8.733896811686478e-06, "loss": 0.0308, "step": 76240 }, { "epoch": 0.6169593009143135, "grad_norm": 0.47450271248817444, "learning_rate": 8.73342716956107e-06, "loss": 0.0313, "step": 76250 }, { "epoch": 0.6170402136095153, "grad_norm": 0.42811349034309387, "learning_rate": 8.732957452980342e-06, "loss": 0.0374, "step": 76260 }, { "epoch": 0.6171211263047172, "grad_norm": 0.6675203442573547, "learning_rate": 8.732487661953656e-06, "loss": 0.0355, "step": 76270 }, { "epoch": 0.6172020389999191, "grad_norm": 0.1492379605770111, "learning_rate": 8.732017796490385e-06, "loss": 0.0213, "step": 76280 }, { "epoch": 0.6172829516951209, "grad_norm": 0.8014906048774719, "learning_rate": 8.731547856599896e-06, "loss": 0.0327, "step": 76290 }, { "epoch": 0.6173638643903229, "grad_norm": 0.3566283583641052, "learning_rate": 8.731077842291564e-06, "loss": 0.0268, "step": 76300 }, { "epoch": 0.6174447770855247, "grad_norm": 0.31879445910453796, "learning_rate": 8.73060775357476e-06, "loss": 0.039, "step": 76310 }, { "epoch": 0.6175256897807266, "grad_norm": 0.6650588512420654, "learning_rate": 8.73013759045886e-06, "loss": 0.0421, "step": 76320 }, { "epoch": 0.6176066024759285, "grad_norm": 0.2415781170129776, "learning_rate": 8.729667352953242e-06, "loss": 0.032, "step": 76330 }, { "epoch": 0.6176875151711303, "grad_norm": 0.1527082771062851, "learning_rate": 8.72919704106728e-06, "loss": 0.0248, "step": 76340 }, { "epoch": 0.6177684278663322, "grad_norm": 0.5513184666633606, "learning_rate": 8.728726654810358e-06, "loss": 0.0368, "step": 76350 }, { "epoch": 0.6178493405615341, "grad_norm": 0.3626808226108551, "learning_rate": 8.728256194191853e-06, "loss": 0.026, "step": 76360 }, { "epoch": 0.617930253256736, "grad_norm": 0.2240624576807022, "learning_rate": 8.72778565922115e-06, "loss": 0.0268, "step": 76370 }, { "epoch": 0.6180111659519378, "grad_norm": 0.5518186092376709, "learning_rate": 8.72731504990763e-06, "loss": 0.0238, "step": 76380 }, { "epoch": 0.6180920786471398, "grad_norm": 0.4134674668312073, "learning_rate": 8.726844366260681e-06, "loss": 0.0416, "step": 76390 }, { "epoch": 0.6181729913423416, "grad_norm": 0.47988471388816833, "learning_rate": 8.726373608289689e-06, "loss": 0.027, "step": 76400 }, { "epoch": 0.6182539040375435, "grad_norm": 0.2727927565574646, "learning_rate": 8.725902776004043e-06, "loss": 0.0233, "step": 76410 }, { "epoch": 0.6183348167327454, "grad_norm": 0.02444435842335224, "learning_rate": 8.72543186941313e-06, "loss": 0.0338, "step": 76420 }, { "epoch": 0.6184157294279472, "grad_norm": 0.06596655398607254, "learning_rate": 8.724960888526344e-06, "loss": 0.028, "step": 76430 }, { "epoch": 0.6184966421231491, "grad_norm": 0.2657911479473114, "learning_rate": 8.724489833353078e-06, "loss": 0.0249, "step": 76440 }, { "epoch": 0.618577554818351, "grad_norm": 0.465196430683136, "learning_rate": 8.724018703902723e-06, "loss": 0.036, "step": 76450 }, { "epoch": 0.6186584675135529, "grad_norm": 0.26827919483184814, "learning_rate": 8.723547500184679e-06, "loss": 0.0338, "step": 76460 }, { "epoch": 0.6187393802087547, "grad_norm": 0.7330530881881714, "learning_rate": 8.723076222208339e-06, "loss": 0.0423, "step": 76470 }, { "epoch": 0.6188202929039567, "grad_norm": 0.9170742034912109, "learning_rate": 8.722604869983104e-06, "loss": 0.0511, "step": 76480 }, { "epoch": 0.6189012055991585, "grad_norm": 0.49706143140792847, "learning_rate": 8.722133443518373e-06, "loss": 0.0265, "step": 76490 }, { "epoch": 0.6189821182943603, "grad_norm": 0.4514911472797394, "learning_rate": 8.721661942823549e-06, "loss": 0.0144, "step": 76500 }, { "epoch": 0.6190630309895623, "grad_norm": 0.711724042892456, "learning_rate": 8.721190367908035e-06, "loss": 0.0341, "step": 76510 }, { "epoch": 0.6191439436847641, "grad_norm": 0.22012358903884888, "learning_rate": 8.720718718781234e-06, "loss": 0.0203, "step": 76520 }, { "epoch": 0.619224856379966, "grad_norm": 0.2898769676685333, "learning_rate": 8.720246995452553e-06, "loss": 0.0261, "step": 76530 }, { "epoch": 0.6193057690751679, "grad_norm": 0.30871185660362244, "learning_rate": 8.719775197931399e-06, "loss": 0.0231, "step": 76540 }, { "epoch": 0.6193866817703698, "grad_norm": 0.7242293357849121, "learning_rate": 8.719303326227182e-06, "loss": 0.0301, "step": 76550 }, { "epoch": 0.6194675944655716, "grad_norm": 0.9085996150970459, "learning_rate": 8.718831380349313e-06, "loss": 0.0457, "step": 76560 }, { "epoch": 0.6195485071607735, "grad_norm": 0.4566248059272766, "learning_rate": 8.7183593603072e-06, "loss": 0.0344, "step": 76570 }, { "epoch": 0.6196294198559754, "grad_norm": 0.3836517632007599, "learning_rate": 8.717887266110262e-06, "loss": 0.0433, "step": 76580 }, { "epoch": 0.6197103325511772, "grad_norm": 0.4768873155117035, "learning_rate": 8.71741509776791e-06, "loss": 0.0292, "step": 76590 }, { "epoch": 0.6197912452463792, "grad_norm": 0.5759198069572449, "learning_rate": 8.716942855289563e-06, "loss": 0.0413, "step": 76600 }, { "epoch": 0.619872157941581, "grad_norm": 0.6466689109802246, "learning_rate": 8.716470538684636e-06, "loss": 0.0422, "step": 76610 }, { "epoch": 0.619953070636783, "grad_norm": 0.6855313777923584, "learning_rate": 8.715998147962552e-06, "loss": 0.0278, "step": 76620 }, { "epoch": 0.6200339833319848, "grad_norm": 0.11503002792596817, "learning_rate": 8.715525683132729e-06, "loss": 0.0267, "step": 76630 }, { "epoch": 0.6201148960271866, "grad_norm": 0.35308414697647095, "learning_rate": 8.715053144204588e-06, "loss": 0.031, "step": 76640 }, { "epoch": 0.6201958087223886, "grad_norm": 0.31953179836273193, "learning_rate": 8.714580531187558e-06, "loss": 0.0239, "step": 76650 }, { "epoch": 0.6202767214175904, "grad_norm": 0.18290424346923828, "learning_rate": 8.714107844091057e-06, "loss": 0.0293, "step": 76660 }, { "epoch": 0.6203576341127923, "grad_norm": 0.7415744066238403, "learning_rate": 8.71363508292452e-06, "loss": 0.0392, "step": 76670 }, { "epoch": 0.6204385468079942, "grad_norm": 0.6755667328834534, "learning_rate": 8.713162247697368e-06, "loss": 0.0332, "step": 76680 }, { "epoch": 0.6205194595031961, "grad_norm": 0.6604674458503723, "learning_rate": 8.712689338419036e-06, "loss": 0.0314, "step": 76690 }, { "epoch": 0.6206003721983979, "grad_norm": 0.3661492168903351, "learning_rate": 8.712216355098952e-06, "loss": 0.0309, "step": 76700 }, { "epoch": 0.6206812848935999, "grad_norm": 0.47513672709465027, "learning_rate": 8.711743297746551e-06, "loss": 0.028, "step": 76710 }, { "epoch": 0.6207621975888017, "grad_norm": 0.6459288597106934, "learning_rate": 8.711270166371264e-06, "loss": 0.0281, "step": 76720 }, { "epoch": 0.6208431102840035, "grad_norm": 0.4875930845737457, "learning_rate": 8.71079696098253e-06, "loss": 0.0255, "step": 76730 }, { "epoch": 0.6209240229792055, "grad_norm": 0.7199351191520691, "learning_rate": 8.710323681589782e-06, "loss": 0.0593, "step": 76740 }, { "epoch": 0.6210049356744073, "grad_norm": 0.6533271074295044, "learning_rate": 8.709850328202463e-06, "loss": 0.0182, "step": 76750 }, { "epoch": 0.6210858483696092, "grad_norm": 0.5060913562774658, "learning_rate": 8.709376900830011e-06, "loss": 0.0372, "step": 76760 }, { "epoch": 0.6211667610648111, "grad_norm": 0.5205804109573364, "learning_rate": 8.708903399481867e-06, "loss": 0.0346, "step": 76770 }, { "epoch": 0.621247673760013, "grad_norm": 0.532162070274353, "learning_rate": 8.708429824167475e-06, "loss": 0.025, "step": 76780 }, { "epoch": 0.6213285864552148, "grad_norm": 0.6757438778877258, "learning_rate": 8.707956174896278e-06, "loss": 0.0334, "step": 76790 }, { "epoch": 0.6214094991504167, "grad_norm": 0.34495487809181213, "learning_rate": 8.707482451677724e-06, "loss": 0.0445, "step": 76800 }, { "epoch": 0.6214904118456186, "grad_norm": 0.3694148659706116, "learning_rate": 8.707008654521259e-06, "loss": 0.0358, "step": 76810 }, { "epoch": 0.6215713245408204, "grad_norm": 0.3731498420238495, "learning_rate": 8.706534783436331e-06, "loss": 0.0455, "step": 76820 }, { "epoch": 0.6216522372360224, "grad_norm": 0.46685877442359924, "learning_rate": 8.706060838432393e-06, "loss": 0.0295, "step": 76830 }, { "epoch": 0.6217331499312242, "grad_norm": 0.34104835987091064, "learning_rate": 8.705586819518896e-06, "loss": 0.0313, "step": 76840 }, { "epoch": 0.6218140626264261, "grad_norm": 0.9513780474662781, "learning_rate": 8.705112726705292e-06, "loss": 0.0422, "step": 76850 }, { "epoch": 0.621894975321628, "grad_norm": 0.18784794211387634, "learning_rate": 8.704638560001036e-06, "loss": 0.0424, "step": 76860 }, { "epoch": 0.6219758880168298, "grad_norm": 0.3006187677383423, "learning_rate": 8.704164319415586e-06, "loss": 0.0329, "step": 76870 }, { "epoch": 0.6220568007120317, "grad_norm": 0.3388977646827698, "learning_rate": 8.703690004958396e-06, "loss": 0.0332, "step": 76880 }, { "epoch": 0.6221377134072336, "grad_norm": 0.3428168296813965, "learning_rate": 8.70321561663893e-06, "loss": 0.0273, "step": 76890 }, { "epoch": 0.6222186261024355, "grad_norm": 0.6588442921638489, "learning_rate": 8.702741154466645e-06, "loss": 0.0365, "step": 76900 }, { "epoch": 0.6222995387976373, "grad_norm": 0.9107343554496765, "learning_rate": 8.702266618451006e-06, "loss": 0.0438, "step": 76910 }, { "epoch": 0.6223804514928393, "grad_norm": 0.446359246969223, "learning_rate": 8.701792008601474e-06, "loss": 0.0358, "step": 76920 }, { "epoch": 0.6224613641880411, "grad_norm": 0.7222742438316345, "learning_rate": 8.701317324927516e-06, "loss": 0.0464, "step": 76930 }, { "epoch": 0.6225422768832429, "grad_norm": 0.21858513355255127, "learning_rate": 8.700842567438596e-06, "loss": 0.0263, "step": 76940 }, { "epoch": 0.6226231895784449, "grad_norm": 0.5578830242156982, "learning_rate": 8.700367736144187e-06, "loss": 0.0234, "step": 76950 }, { "epoch": 0.6227041022736467, "grad_norm": 0.3519759774208069, "learning_rate": 8.699892831053753e-06, "loss": 0.0331, "step": 76960 }, { "epoch": 0.6227850149688486, "grad_norm": 0.6952111124992371, "learning_rate": 8.699417852176768e-06, "loss": 0.0456, "step": 76970 }, { "epoch": 0.6228659276640505, "grad_norm": 0.5874060988426208, "learning_rate": 8.698942799522702e-06, "loss": 0.0322, "step": 76980 }, { "epoch": 0.6229468403592524, "grad_norm": 0.4593711495399475, "learning_rate": 8.698467673101033e-06, "loss": 0.0255, "step": 76990 }, { "epoch": 0.6230277530544542, "grad_norm": 0.49363622069358826, "learning_rate": 8.697992472921232e-06, "loss": 0.0328, "step": 77000 }, { "epoch": 0.6231086657496562, "grad_norm": 0.8029701113700867, "learning_rate": 8.69751719899278e-06, "loss": 0.0401, "step": 77010 }, { "epoch": 0.623189578444858, "grad_norm": 0.3819712996482849, "learning_rate": 8.697041851325155e-06, "loss": 0.0283, "step": 77020 }, { "epoch": 0.6232704911400598, "grad_norm": 0.38354387879371643, "learning_rate": 8.696566429927831e-06, "loss": 0.0344, "step": 77030 }, { "epoch": 0.6233514038352618, "grad_norm": 0.5326396226882935, "learning_rate": 8.696090934810295e-06, "loss": 0.0371, "step": 77040 }, { "epoch": 0.6234323165304636, "grad_norm": 0.46133559942245483, "learning_rate": 8.695615365982028e-06, "loss": 0.0303, "step": 77050 }, { "epoch": 0.6235132292256655, "grad_norm": 0.6219164729118347, "learning_rate": 8.695139723452515e-06, "loss": 0.0377, "step": 77060 }, { "epoch": 0.6235941419208674, "grad_norm": 0.2441001832485199, "learning_rate": 8.694664007231239e-06, "loss": 0.0431, "step": 77070 }, { "epoch": 0.6236750546160693, "grad_norm": 0.29602864384651184, "learning_rate": 8.69418821732769e-06, "loss": 0.0326, "step": 77080 }, { "epoch": 0.6237559673112711, "grad_norm": 0.3448629379272461, "learning_rate": 8.693712353751356e-06, "loss": 0.0349, "step": 77090 }, { "epoch": 0.623836880006473, "grad_norm": 0.37603461742401123, "learning_rate": 8.693236416511728e-06, "loss": 0.0275, "step": 77100 }, { "epoch": 0.6239177927016749, "grad_norm": 0.627993106842041, "learning_rate": 8.692760405618296e-06, "loss": 0.0376, "step": 77110 }, { "epoch": 0.6239987053968767, "grad_norm": 0.5207095742225647, "learning_rate": 8.692284321080554e-06, "loss": 0.0398, "step": 77120 }, { "epoch": 0.6240796180920787, "grad_norm": 0.7541787624359131, "learning_rate": 8.691808162907992e-06, "loss": 0.0427, "step": 77130 }, { "epoch": 0.6241605307872805, "grad_norm": 0.6814317107200623, "learning_rate": 8.691331931110114e-06, "loss": 0.0341, "step": 77140 }, { "epoch": 0.6242414434824824, "grad_norm": 0.34939178824424744, "learning_rate": 8.690855625696413e-06, "loss": 0.0494, "step": 77150 }, { "epoch": 0.6243223561776843, "grad_norm": 0.5151062607765198, "learning_rate": 8.690379246676387e-06, "loss": 0.037, "step": 77160 }, { "epoch": 0.6244032688728861, "grad_norm": 0.48335403203964233, "learning_rate": 8.689902794059539e-06, "loss": 0.0294, "step": 77170 }, { "epoch": 0.624484181568088, "grad_norm": 0.6537654399871826, "learning_rate": 8.689426267855368e-06, "loss": 0.0373, "step": 77180 }, { "epoch": 0.6245650942632899, "grad_norm": 0.5363086462020874, "learning_rate": 8.688949668073381e-06, "loss": 0.036, "step": 77190 }, { "epoch": 0.6246460069584918, "grad_norm": 0.43216797709465027, "learning_rate": 8.688472994723079e-06, "loss": 0.0297, "step": 77200 }, { "epoch": 0.6247269196536936, "grad_norm": 0.6746358871459961, "learning_rate": 8.68799624781397e-06, "loss": 0.0263, "step": 77210 }, { "epoch": 0.6248078323488956, "grad_norm": 0.4518350660800934, "learning_rate": 8.687519427355562e-06, "loss": 0.0359, "step": 77220 }, { "epoch": 0.6248887450440974, "grad_norm": 0.9977927207946777, "learning_rate": 8.687042533357363e-06, "loss": 0.0461, "step": 77230 }, { "epoch": 0.6249696577392992, "grad_norm": 0.9053942561149597, "learning_rate": 8.686565565828883e-06, "loss": 0.039, "step": 77240 }, { "epoch": 0.6250505704345012, "grad_norm": 0.31675785779953003, "learning_rate": 8.686088524779639e-06, "loss": 0.0343, "step": 77250 }, { "epoch": 0.625131483129703, "grad_norm": 0.3109164834022522, "learning_rate": 8.68561141021914e-06, "loss": 0.0424, "step": 77260 }, { "epoch": 0.625212395824905, "grad_norm": 0.6400219798088074, "learning_rate": 8.685134222156898e-06, "loss": 0.053, "step": 77270 }, { "epoch": 0.6252933085201068, "grad_norm": 0.2844025492668152, "learning_rate": 8.684656960602438e-06, "loss": 0.0302, "step": 77280 }, { "epoch": 0.6253742212153087, "grad_norm": 0.5271815657615662, "learning_rate": 8.684179625565273e-06, "loss": 0.0256, "step": 77290 }, { "epoch": 0.6254551339105106, "grad_norm": 0.2984853982925415, "learning_rate": 8.683702217054924e-06, "loss": 0.0218, "step": 77300 }, { "epoch": 0.6255360466057124, "grad_norm": 0.9962635040283203, "learning_rate": 8.683224735080908e-06, "loss": 0.0269, "step": 77310 }, { "epoch": 0.6256169593009143, "grad_norm": 0.4056943655014038, "learning_rate": 8.68274717965275e-06, "loss": 0.0542, "step": 77320 }, { "epoch": 0.6256978719961162, "grad_norm": 0.39181387424468994, "learning_rate": 8.682269550779977e-06, "loss": 0.0341, "step": 77330 }, { "epoch": 0.6257787846913181, "grad_norm": 0.4079470932483673, "learning_rate": 8.681791848472111e-06, "loss": 0.0184, "step": 77340 }, { "epoch": 0.6258596973865199, "grad_norm": 0.70682692527771, "learning_rate": 8.681314072738678e-06, "loss": 0.0351, "step": 77350 }, { "epoch": 0.6259406100817219, "grad_norm": 0.45510566234588623, "learning_rate": 8.680836223589209e-06, "loss": 0.0357, "step": 77360 }, { "epoch": 0.6260215227769237, "grad_norm": 0.6537817716598511, "learning_rate": 8.68035830103323e-06, "loss": 0.0558, "step": 77370 }, { "epoch": 0.6261024354721256, "grad_norm": 0.4433923661708832, "learning_rate": 8.679880305080275e-06, "loss": 0.0331, "step": 77380 }, { "epoch": 0.6261833481673275, "grad_norm": 0.7445433139801025, "learning_rate": 8.679402235739875e-06, "loss": 0.029, "step": 77390 }, { "epoch": 0.6262642608625293, "grad_norm": 0.5800995230674744, "learning_rate": 8.678924093021566e-06, "loss": 0.0259, "step": 77400 }, { "epoch": 0.6263451735577312, "grad_norm": 0.3929281532764435, "learning_rate": 8.678445876934885e-06, "loss": 0.0449, "step": 77410 }, { "epoch": 0.6264260862529331, "grad_norm": 0.43085137009620667, "learning_rate": 8.677967587489362e-06, "loss": 0.0405, "step": 77420 }, { "epoch": 0.626506998948135, "grad_norm": 0.37075212597846985, "learning_rate": 8.677489224694544e-06, "loss": 0.032, "step": 77430 }, { "epoch": 0.6265879116433368, "grad_norm": 0.3405747711658478, "learning_rate": 8.677010788559965e-06, "loss": 0.0268, "step": 77440 }, { "epoch": 0.6266688243385388, "grad_norm": 0.7082487344741821, "learning_rate": 8.676532279095167e-06, "loss": 0.0331, "step": 77450 }, { "epoch": 0.6267497370337406, "grad_norm": 0.33235836029052734, "learning_rate": 8.676053696309697e-06, "loss": 0.0344, "step": 77460 }, { "epoch": 0.6268306497289424, "grad_norm": 0.4038303792476654, "learning_rate": 8.675575040213093e-06, "loss": 0.0364, "step": 77470 }, { "epoch": 0.6269115624241444, "grad_norm": 0.4783150255680084, "learning_rate": 8.675096310814908e-06, "loss": 0.0444, "step": 77480 }, { "epoch": 0.6269924751193462, "grad_norm": 0.58461993932724, "learning_rate": 8.674617508124684e-06, "loss": 0.029, "step": 77490 }, { "epoch": 0.6270733878145481, "grad_norm": 0.4750741124153137, "learning_rate": 8.674138632151972e-06, "loss": 0.0469, "step": 77500 }, { "epoch": 0.62715430050975, "grad_norm": 0.5122414231300354, "learning_rate": 8.67365968290632e-06, "loss": 0.0326, "step": 77510 }, { "epoch": 0.6272352132049519, "grad_norm": 0.43746882677078247, "learning_rate": 8.673180660397283e-06, "loss": 0.0308, "step": 77520 }, { "epoch": 0.6273161259001537, "grad_norm": 0.37997493147850037, "learning_rate": 8.67270156463441e-06, "loss": 0.0344, "step": 77530 }, { "epoch": 0.6273970385953556, "grad_norm": 0.7325646281242371, "learning_rate": 8.672222395627258e-06, "loss": 0.048, "step": 77540 }, { "epoch": 0.6274779512905575, "grad_norm": 0.3604030907154083, "learning_rate": 8.671743153385383e-06, "loss": 0.0279, "step": 77550 }, { "epoch": 0.6275588639857593, "grad_norm": 0.6197133660316467, "learning_rate": 8.671263837918342e-06, "loss": 0.0386, "step": 77560 }, { "epoch": 0.6276397766809613, "grad_norm": 0.1947624534368515, "learning_rate": 8.670784449235697e-06, "loss": 0.0299, "step": 77570 }, { "epoch": 0.6277206893761631, "grad_norm": 0.046104636043310165, "learning_rate": 8.670304987347e-06, "loss": 0.0229, "step": 77580 }, { "epoch": 0.627801602071365, "grad_norm": 1.0568897724151611, "learning_rate": 8.669825452261824e-06, "loss": 0.0465, "step": 77590 }, { "epoch": 0.6278825147665669, "grad_norm": 0.3770301342010498, "learning_rate": 8.669345843989722e-06, "loss": 0.0262, "step": 77600 }, { "epoch": 0.6279634274617687, "grad_norm": 0.26672929525375366, "learning_rate": 8.668866162540267e-06, "loss": 0.0246, "step": 77610 }, { "epoch": 0.6280443401569706, "grad_norm": 0.4376743733882904, "learning_rate": 8.66838640792302e-06, "loss": 0.0304, "step": 77620 }, { "epoch": 0.6281252528521725, "grad_norm": 0.42568719387054443, "learning_rate": 8.667906580147552e-06, "loss": 0.0311, "step": 77630 }, { "epoch": 0.6282061655473744, "grad_norm": 0.2664681077003479, "learning_rate": 8.667426679223429e-06, "loss": 0.0294, "step": 77640 }, { "epoch": 0.6282870782425762, "grad_norm": 0.34294188022613525, "learning_rate": 8.666946705160223e-06, "loss": 0.0304, "step": 77650 }, { "epoch": 0.6283679909377782, "grad_norm": 0.4351192116737366, "learning_rate": 8.666466657967507e-06, "loss": 0.0376, "step": 77660 }, { "epoch": 0.62844890363298, "grad_norm": 0.31053975224494934, "learning_rate": 8.665986537654854e-06, "loss": 0.0294, "step": 77670 }, { "epoch": 0.628529816328182, "grad_norm": 0.7123754620552063, "learning_rate": 8.665506344231838e-06, "loss": 0.0363, "step": 77680 }, { "epoch": 0.6286107290233838, "grad_norm": 0.41174694895744324, "learning_rate": 8.665026077708036e-06, "loss": 0.0347, "step": 77690 }, { "epoch": 0.6286916417185856, "grad_norm": 0.5197669863700867, "learning_rate": 8.664545738093026e-06, "loss": 0.0297, "step": 77700 }, { "epoch": 0.6287725544137875, "grad_norm": 0.45255574584007263, "learning_rate": 8.664065325396389e-06, "loss": 0.0389, "step": 77710 }, { "epoch": 0.6288534671089894, "grad_norm": 0.5796974897384644, "learning_rate": 8.663584839627703e-06, "loss": 0.0575, "step": 77720 }, { "epoch": 0.6289343798041913, "grad_norm": 0.5032745599746704, "learning_rate": 8.663104280796553e-06, "loss": 0.0388, "step": 77730 }, { "epoch": 0.6290152924993931, "grad_norm": 0.3209403455257416, "learning_rate": 8.662623648912519e-06, "loss": 0.022, "step": 77740 }, { "epoch": 0.6290962051945951, "grad_norm": 0.6826911568641663, "learning_rate": 8.66214294398519e-06, "loss": 0.0444, "step": 77750 }, { "epoch": 0.6291771178897969, "grad_norm": 0.42506250739097595, "learning_rate": 8.661662166024151e-06, "loss": 0.0338, "step": 77760 }, { "epoch": 0.6292580305849987, "grad_norm": 0.2167743593454361, "learning_rate": 8.66118131503899e-06, "loss": 0.0355, "step": 77770 }, { "epoch": 0.6293389432802007, "grad_norm": 0.35736802220344543, "learning_rate": 8.660700391039296e-06, "loss": 0.0308, "step": 77780 }, { "epoch": 0.6294198559754025, "grad_norm": 0.3554040491580963, "learning_rate": 8.660219394034661e-06, "loss": 0.0265, "step": 77790 }, { "epoch": 0.6295007686706044, "grad_norm": 0.36887192726135254, "learning_rate": 8.659738324034678e-06, "loss": 0.0235, "step": 77800 }, { "epoch": 0.6295816813658063, "grad_norm": 0.6388894319534302, "learning_rate": 8.659257181048941e-06, "loss": 0.0298, "step": 77810 }, { "epoch": 0.6296625940610082, "grad_norm": 0.44096359610557556, "learning_rate": 8.658775965087044e-06, "loss": 0.0288, "step": 77820 }, { "epoch": 0.62974350675621, "grad_norm": 0.4741917848587036, "learning_rate": 8.658294676158584e-06, "loss": 0.0434, "step": 77830 }, { "epoch": 0.6298244194514119, "grad_norm": 0.6528553366661072, "learning_rate": 8.657813314273158e-06, "loss": 0.0288, "step": 77840 }, { "epoch": 0.6299053321466138, "grad_norm": 0.29719308018684387, "learning_rate": 8.65733187944037e-06, "loss": 0.0209, "step": 77850 }, { "epoch": 0.6299862448418156, "grad_norm": 0.7744720578193665, "learning_rate": 8.656850371669816e-06, "loss": 0.0353, "step": 77860 }, { "epoch": 0.6300671575370176, "grad_norm": 0.2630877196788788, "learning_rate": 8.656368790971104e-06, "loss": 0.0526, "step": 77870 }, { "epoch": 0.6301480702322194, "grad_norm": 0.40561094880104065, "learning_rate": 8.655887137353833e-06, "loss": 0.0246, "step": 77880 }, { "epoch": 0.6302289829274214, "grad_norm": 0.4234524369239807, "learning_rate": 8.655405410827612e-06, "loss": 0.0281, "step": 77890 }, { "epoch": 0.6303098956226232, "grad_norm": 0.4266544580459595, "learning_rate": 8.654923611402048e-06, "loss": 0.0379, "step": 77900 }, { "epoch": 0.630390808317825, "grad_norm": 0.7965680360794067, "learning_rate": 8.654441739086748e-06, "loss": 0.0333, "step": 77910 }, { "epoch": 0.630471721013027, "grad_norm": 0.5697128176689148, "learning_rate": 8.653959793891322e-06, "loss": 0.0497, "step": 77920 }, { "epoch": 0.6305526337082288, "grad_norm": 0.5756696462631226, "learning_rate": 8.653477775825383e-06, "loss": 0.0391, "step": 77930 }, { "epoch": 0.6306335464034307, "grad_norm": 0.33445611596107483, "learning_rate": 8.65299568489854e-06, "loss": 0.0279, "step": 77940 }, { "epoch": 0.6307144590986326, "grad_norm": 0.31506362557411194, "learning_rate": 8.652513521120412e-06, "loss": 0.0299, "step": 77950 }, { "epoch": 0.6307953717938345, "grad_norm": 0.4990081489086151, "learning_rate": 8.652031284500612e-06, "loss": 0.0349, "step": 77960 }, { "epoch": 0.6308762844890363, "grad_norm": 0.797081708908081, "learning_rate": 8.651548975048758e-06, "loss": 0.0433, "step": 77970 }, { "epoch": 0.6309571971842383, "grad_norm": 0.9015002846717834, "learning_rate": 8.65106659277447e-06, "loss": 0.0328, "step": 77980 }, { "epoch": 0.6310381098794401, "grad_norm": 0.1092902272939682, "learning_rate": 8.650584137687363e-06, "loss": 0.0194, "step": 77990 }, { "epoch": 0.6311190225746419, "grad_norm": 0.700748085975647, "learning_rate": 8.650101609797064e-06, "loss": 0.0361, "step": 78000 }, { "epoch": 0.6311999352698439, "grad_norm": 0.3634603023529053, "learning_rate": 8.649619009113193e-06, "loss": 0.037, "step": 78010 }, { "epoch": 0.6312808479650457, "grad_norm": 0.576300859451294, "learning_rate": 8.649136335645377e-06, "loss": 0.0232, "step": 78020 }, { "epoch": 0.6313617606602476, "grad_norm": 0.3082001805305481, "learning_rate": 8.64865358940324e-06, "loss": 0.0348, "step": 78030 }, { "epoch": 0.6314426733554495, "grad_norm": 0.4498642385005951, "learning_rate": 8.648170770396409e-06, "loss": 0.0433, "step": 78040 }, { "epoch": 0.6315235860506514, "grad_norm": 0.26038622856140137, "learning_rate": 8.647687878634515e-06, "loss": 0.0365, "step": 78050 }, { "epoch": 0.6316044987458532, "grad_norm": 0.5092695355415344, "learning_rate": 8.647204914127186e-06, "loss": 0.0409, "step": 78060 }, { "epoch": 0.6316854114410551, "grad_norm": 0.28264597058296204, "learning_rate": 8.646721876884056e-06, "loss": 0.0204, "step": 78070 }, { "epoch": 0.631766324136257, "grad_norm": 0.5082540512084961, "learning_rate": 8.646238766914755e-06, "loss": 0.0283, "step": 78080 }, { "epoch": 0.6318472368314588, "grad_norm": 0.1964726746082306, "learning_rate": 8.64575558422892e-06, "loss": 0.0252, "step": 78090 }, { "epoch": 0.6319281495266608, "grad_norm": 0.5022940635681152, "learning_rate": 8.645272328836186e-06, "loss": 0.0168, "step": 78100 }, { "epoch": 0.6320090622218626, "grad_norm": 0.16457723081111908, "learning_rate": 8.644789000746193e-06, "loss": 0.0313, "step": 78110 }, { "epoch": 0.6320899749170645, "grad_norm": 0.3999754786491394, "learning_rate": 8.644305599968575e-06, "loss": 0.03, "step": 78120 }, { "epoch": 0.6321708876122664, "grad_norm": 0.2954549789428711, "learning_rate": 8.643822126512975e-06, "loss": 0.0373, "step": 78130 }, { "epoch": 0.6322518003074682, "grad_norm": 0.18395443260669708, "learning_rate": 8.643338580389039e-06, "loss": 0.048, "step": 78140 }, { "epoch": 0.6323327130026701, "grad_norm": 0.5914468765258789, "learning_rate": 8.642854961606402e-06, "loss": 0.039, "step": 78150 }, { "epoch": 0.632413625697872, "grad_norm": 0.25328579545021057, "learning_rate": 8.642371270174716e-06, "loss": 0.0274, "step": 78160 }, { "epoch": 0.6324945383930739, "grad_norm": 0.49120521545410156, "learning_rate": 8.641887506103623e-06, "loss": 0.0398, "step": 78170 }, { "epoch": 0.6325754510882757, "grad_norm": 0.8903247714042664, "learning_rate": 8.641403669402772e-06, "loss": 0.0233, "step": 78180 }, { "epoch": 0.6326563637834777, "grad_norm": 0.7841693758964539, "learning_rate": 8.640919760081814e-06, "loss": 0.0286, "step": 78190 }, { "epoch": 0.6327372764786795, "grad_norm": 0.7281192541122437, "learning_rate": 8.640435778150395e-06, "loss": 0.0391, "step": 78200 }, { "epoch": 0.6328181891738813, "grad_norm": 0.28823500871658325, "learning_rate": 8.639951723618171e-06, "loss": 0.0315, "step": 78210 }, { "epoch": 0.6328991018690833, "grad_norm": 0.3088664710521698, "learning_rate": 8.639467596494795e-06, "loss": 0.0312, "step": 78220 }, { "epoch": 0.6329800145642851, "grad_norm": 0.5256364941596985, "learning_rate": 8.638983396789921e-06, "loss": 0.0292, "step": 78230 }, { "epoch": 0.633060927259487, "grad_norm": 0.4341902434825897, "learning_rate": 8.638499124513204e-06, "loss": 0.0316, "step": 78240 }, { "epoch": 0.6331418399546889, "grad_norm": 0.9927657842636108, "learning_rate": 8.638014779674305e-06, "loss": 0.0476, "step": 78250 }, { "epoch": 0.6332227526498908, "grad_norm": 0.5162118673324585, "learning_rate": 8.63753036228288e-06, "loss": 0.0233, "step": 78260 }, { "epoch": 0.6333036653450926, "grad_norm": 0.6707289218902588, "learning_rate": 8.63704587234859e-06, "loss": 0.035, "step": 78270 }, { "epoch": 0.6333845780402946, "grad_norm": 0.14750394225120544, "learning_rate": 8.636561309881101e-06, "loss": 0.0254, "step": 78280 }, { "epoch": 0.6334654907354964, "grad_norm": 0.397159606218338, "learning_rate": 8.636076674890073e-06, "loss": 0.036, "step": 78290 }, { "epoch": 0.6335464034306982, "grad_norm": 0.31045010685920715, "learning_rate": 8.635591967385171e-06, "loss": 0.0255, "step": 78300 }, { "epoch": 0.6336273161259002, "grad_norm": 0.3806486427783966, "learning_rate": 8.635107187376062e-06, "loss": 0.0343, "step": 78310 }, { "epoch": 0.633708228821102, "grad_norm": 0.19782289862632751, "learning_rate": 8.634622334872415e-06, "loss": 0.0213, "step": 78320 }, { "epoch": 0.6337891415163039, "grad_norm": 0.2991754114627838, "learning_rate": 8.634137409883899e-06, "loss": 0.0365, "step": 78330 }, { "epoch": 0.6338700542115058, "grad_norm": 0.36780354380607605, "learning_rate": 8.633652412420183e-06, "loss": 0.0409, "step": 78340 }, { "epoch": 0.6339509669067077, "grad_norm": 0.28307846188545227, "learning_rate": 8.63316734249094e-06, "loss": 0.0239, "step": 78350 }, { "epoch": 0.6340318796019095, "grad_norm": 0.21208246052265167, "learning_rate": 8.632682200105847e-06, "loss": 0.0226, "step": 78360 }, { "epoch": 0.6341127922971114, "grad_norm": 0.6086899042129517, "learning_rate": 8.632196985274575e-06, "loss": 0.0352, "step": 78370 }, { "epoch": 0.6341937049923133, "grad_norm": 0.13862405717372894, "learning_rate": 8.631711698006801e-06, "loss": 0.0321, "step": 78380 }, { "epoch": 0.6342746176875151, "grad_norm": 0.3048621118068695, "learning_rate": 8.631226338312207e-06, "loss": 0.0561, "step": 78390 }, { "epoch": 0.6343555303827171, "grad_norm": 0.3075498938560486, "learning_rate": 8.630740906200467e-06, "loss": 0.035, "step": 78400 }, { "epoch": 0.6344364430779189, "grad_norm": 0.2724490463733673, "learning_rate": 8.630255401681265e-06, "loss": 0.0268, "step": 78410 }, { "epoch": 0.6345173557731209, "grad_norm": 0.5150726437568665, "learning_rate": 8.629769824764282e-06, "loss": 0.0345, "step": 78420 }, { "epoch": 0.6345982684683227, "grad_norm": 0.7007102966308594, "learning_rate": 8.629284175459206e-06, "loss": 0.027, "step": 78430 }, { "epoch": 0.6346791811635245, "grad_norm": 0.5683470368385315, "learning_rate": 8.628798453775715e-06, "loss": 0.0266, "step": 78440 }, { "epoch": 0.6347600938587264, "grad_norm": 0.6619150638580322, "learning_rate": 8.628312659723501e-06, "loss": 0.0471, "step": 78450 }, { "epoch": 0.6348410065539283, "grad_norm": 0.22332634031772614, "learning_rate": 8.627826793312251e-06, "loss": 0.0409, "step": 78460 }, { "epoch": 0.6349219192491302, "grad_norm": 0.5405343174934387, "learning_rate": 8.627340854551653e-06, "loss": 0.0307, "step": 78470 }, { "epoch": 0.635002831944332, "grad_norm": 0.4256061315536499, "learning_rate": 8.626854843451402e-06, "loss": 0.0271, "step": 78480 }, { "epoch": 0.635083744639534, "grad_norm": 0.801800549030304, "learning_rate": 8.626368760021185e-06, "loss": 0.0535, "step": 78490 }, { "epoch": 0.6351646573347358, "grad_norm": 0.729205310344696, "learning_rate": 8.625882604270701e-06, "loss": 0.0303, "step": 78500 }, { "epoch": 0.6352455700299376, "grad_norm": 0.48090070486068726, "learning_rate": 8.62539637620964e-06, "loss": 0.0284, "step": 78510 }, { "epoch": 0.6353264827251396, "grad_norm": 0.4208275079727173, "learning_rate": 8.624910075847706e-06, "loss": 0.0328, "step": 78520 }, { "epoch": 0.6354073954203414, "grad_norm": 0.7674330472946167, "learning_rate": 8.62442370319459e-06, "loss": 0.0415, "step": 78530 }, { "epoch": 0.6354883081155434, "grad_norm": 0.5569911003112793, "learning_rate": 8.623937258259995e-06, "loss": 0.0258, "step": 78540 }, { "epoch": 0.6355692208107452, "grad_norm": 0.5246487259864807, "learning_rate": 8.623450741053623e-06, "loss": 0.0246, "step": 78550 }, { "epoch": 0.6356501335059471, "grad_norm": 0.33882808685302734, "learning_rate": 8.622964151585174e-06, "loss": 0.0231, "step": 78560 }, { "epoch": 0.635731046201149, "grad_norm": 0.17558088898658752, "learning_rate": 8.622477489864356e-06, "loss": 0.0332, "step": 78570 }, { "epoch": 0.6358119588963509, "grad_norm": 0.6140545606613159, "learning_rate": 8.621990755900868e-06, "loss": 0.0232, "step": 78580 }, { "epoch": 0.6358928715915527, "grad_norm": 0.4601858854293823, "learning_rate": 8.621503949704423e-06, "loss": 0.0435, "step": 78590 }, { "epoch": 0.6359737842867546, "grad_norm": 0.5658360719680786, "learning_rate": 8.621017071284727e-06, "loss": 0.0355, "step": 78600 }, { "epoch": 0.6360546969819565, "grad_norm": 0.4169239401817322, "learning_rate": 8.62053012065149e-06, "loss": 0.0395, "step": 78610 }, { "epoch": 0.6361356096771583, "grad_norm": 0.6745619773864746, "learning_rate": 8.620043097814423e-06, "loss": 0.0272, "step": 78620 }, { "epoch": 0.6362165223723603, "grad_norm": 0.5230273008346558, "learning_rate": 8.619556002783238e-06, "loss": 0.032, "step": 78630 }, { "epoch": 0.6362974350675621, "grad_norm": 0.7308292984962463, "learning_rate": 8.61906883556765e-06, "loss": 0.0437, "step": 78640 }, { "epoch": 0.636378347762764, "grad_norm": 0.4176749587059021, "learning_rate": 8.618581596177374e-06, "loss": 0.0304, "step": 78650 }, { "epoch": 0.6364592604579659, "grad_norm": 0.07443863898515701, "learning_rate": 8.618094284622127e-06, "loss": 0.023, "step": 78660 }, { "epoch": 0.6365401731531677, "grad_norm": 0.27214860916137695, "learning_rate": 8.617606900911629e-06, "loss": 0.0487, "step": 78670 }, { "epoch": 0.6366210858483696, "grad_norm": 0.4845885932445526, "learning_rate": 8.617119445055598e-06, "loss": 0.0327, "step": 78680 }, { "epoch": 0.6367019985435715, "grad_norm": 0.28796499967575073, "learning_rate": 8.616631917063756e-06, "loss": 0.017, "step": 78690 }, { "epoch": 0.6367829112387734, "grad_norm": 0.1932620406150818, "learning_rate": 8.616144316945825e-06, "loss": 0.0213, "step": 78700 }, { "epoch": 0.6368638239339752, "grad_norm": 0.6065545678138733, "learning_rate": 8.615656644711527e-06, "loss": 0.0354, "step": 78710 }, { "epoch": 0.6369447366291772, "grad_norm": 0.5347492098808289, "learning_rate": 8.615168900370594e-06, "loss": 0.0451, "step": 78720 }, { "epoch": 0.637025649324379, "grad_norm": 0.1688688099384308, "learning_rate": 8.614681083932748e-06, "loss": 0.0394, "step": 78730 }, { "epoch": 0.6371065620195808, "grad_norm": 0.27487799525260925, "learning_rate": 8.614193195407718e-06, "loss": 0.0193, "step": 78740 }, { "epoch": 0.6371874747147828, "grad_norm": 0.5727819800376892, "learning_rate": 8.613705234805236e-06, "loss": 0.0317, "step": 78750 }, { "epoch": 0.6372683874099846, "grad_norm": 0.1781288981437683, "learning_rate": 8.613217202135032e-06, "loss": 0.0356, "step": 78760 }, { "epoch": 0.6373493001051865, "grad_norm": 0.3291776776313782, "learning_rate": 8.612729097406838e-06, "loss": 0.0307, "step": 78770 }, { "epoch": 0.6374302128003884, "grad_norm": 0.35593533515930176, "learning_rate": 8.612240920630388e-06, "loss": 0.0325, "step": 78780 }, { "epoch": 0.6375111254955903, "grad_norm": 0.41335782408714294, "learning_rate": 8.61175267181542e-06, "loss": 0.0193, "step": 78790 }, { "epoch": 0.6375920381907921, "grad_norm": 0.575263500213623, "learning_rate": 8.61126435097167e-06, "loss": 0.0297, "step": 78800 }, { "epoch": 0.637672950885994, "grad_norm": 0.9171118140220642, "learning_rate": 8.610775958108874e-06, "loss": 0.0355, "step": 78810 }, { "epoch": 0.6377538635811959, "grad_norm": 0.6483862996101379, "learning_rate": 8.610287493236776e-06, "loss": 0.0415, "step": 78820 }, { "epoch": 0.6378347762763977, "grad_norm": 0.10767409205436707, "learning_rate": 8.609798956365117e-06, "loss": 0.0382, "step": 78830 }, { "epoch": 0.6379156889715997, "grad_norm": 0.4518371522426605, "learning_rate": 8.609310347503635e-06, "loss": 0.0301, "step": 78840 }, { "epoch": 0.6379966016668015, "grad_norm": 0.2354556769132614, "learning_rate": 8.60882166666208e-06, "loss": 0.0302, "step": 78850 }, { "epoch": 0.6380775143620034, "grad_norm": 0.1500234305858612, "learning_rate": 8.608332913850195e-06, "loss": 0.0374, "step": 78860 }, { "epoch": 0.6381584270572053, "grad_norm": 0.46864983439445496, "learning_rate": 8.607844089077728e-06, "loss": 0.039, "step": 78870 }, { "epoch": 0.6382393397524071, "grad_norm": 0.469942182302475, "learning_rate": 8.607355192354425e-06, "loss": 0.0299, "step": 78880 }, { "epoch": 0.638320252447609, "grad_norm": 0.45114991068840027, "learning_rate": 8.60686622369004e-06, "loss": 0.0219, "step": 78890 }, { "epoch": 0.6384011651428109, "grad_norm": 0.49571746587753296, "learning_rate": 8.606377183094323e-06, "loss": 0.0394, "step": 78900 }, { "epoch": 0.6384820778380128, "grad_norm": 0.11729725450277328, "learning_rate": 8.605888070577027e-06, "loss": 0.0265, "step": 78910 }, { "epoch": 0.6385629905332146, "grad_norm": 0.8306472897529602, "learning_rate": 8.605398886147904e-06, "loss": 0.0477, "step": 78920 }, { "epoch": 0.6386439032284166, "grad_norm": 0.5636104345321655, "learning_rate": 8.60490962981671e-06, "loss": 0.0321, "step": 78930 }, { "epoch": 0.6387248159236184, "grad_norm": 0.19193944334983826, "learning_rate": 8.604420301593208e-06, "loss": 0.0303, "step": 78940 }, { "epoch": 0.6388057286188203, "grad_norm": 0.2971046268939972, "learning_rate": 8.60393090148715e-06, "loss": 0.0374, "step": 78950 }, { "epoch": 0.6388866413140222, "grad_norm": 0.4765699803829193, "learning_rate": 8.603441429508298e-06, "loss": 0.0283, "step": 78960 }, { "epoch": 0.638967554009224, "grad_norm": 0.27882909774780273, "learning_rate": 8.602951885666415e-06, "loss": 0.0285, "step": 78970 }, { "epoch": 0.6390484667044259, "grad_norm": 0.21712276339530945, "learning_rate": 8.602462269971263e-06, "loss": 0.0308, "step": 78980 }, { "epoch": 0.6391293793996278, "grad_norm": 0.5127276182174683, "learning_rate": 8.601972582432606e-06, "loss": 0.0456, "step": 78990 }, { "epoch": 0.6392102920948297, "grad_norm": 0.359187513589859, "learning_rate": 8.601482823060207e-06, "loss": 0.0303, "step": 79000 }, { "epoch": 0.6392912047900315, "grad_norm": 0.9200606346130371, "learning_rate": 8.600992991863839e-06, "loss": 0.0463, "step": 79010 }, { "epoch": 0.6393721174852335, "grad_norm": 0.26781585812568665, "learning_rate": 8.60050308885327e-06, "loss": 0.041, "step": 79020 }, { "epoch": 0.6394530301804353, "grad_norm": 0.31141728162765503, "learning_rate": 8.600013114038264e-06, "loss": 0.0352, "step": 79030 }, { "epoch": 0.6395339428756371, "grad_norm": 0.44889113306999207, "learning_rate": 8.599523067428597e-06, "loss": 0.0232, "step": 79040 }, { "epoch": 0.6396148555708391, "grad_norm": 0.30038994550704956, "learning_rate": 8.599032949034043e-06, "loss": 0.0527, "step": 79050 }, { "epoch": 0.6396957682660409, "grad_norm": 0.1746971309185028, "learning_rate": 8.598542758864374e-06, "loss": 0.023, "step": 79060 }, { "epoch": 0.6397766809612428, "grad_norm": 0.3301646113395691, "learning_rate": 8.598052496929365e-06, "loss": 0.0185, "step": 79070 }, { "epoch": 0.6398575936564447, "grad_norm": 0.3468954265117645, "learning_rate": 8.597562163238797e-06, "loss": 0.0336, "step": 79080 }, { "epoch": 0.6399385063516466, "grad_norm": 0.4554609954357147, "learning_rate": 8.597071757802446e-06, "loss": 0.0364, "step": 79090 }, { "epoch": 0.6400194190468484, "grad_norm": 0.367114394903183, "learning_rate": 8.596581280630092e-06, "loss": 0.0341, "step": 79100 }, { "epoch": 0.6401003317420503, "grad_norm": 0.5889852046966553, "learning_rate": 8.596090731731517e-06, "loss": 0.0238, "step": 79110 }, { "epoch": 0.6401812444372522, "grad_norm": 0.592829167842865, "learning_rate": 8.595600111116504e-06, "loss": 0.0357, "step": 79120 }, { "epoch": 0.640262157132454, "grad_norm": 0.10921939462423325, "learning_rate": 8.595109418794837e-06, "loss": 0.0283, "step": 79130 }, { "epoch": 0.640343069827656, "grad_norm": 0.6297845244407654, "learning_rate": 8.594618654776304e-06, "loss": 0.034, "step": 79140 }, { "epoch": 0.6404239825228578, "grad_norm": 0.1444544792175293, "learning_rate": 8.594127819070689e-06, "loss": 0.0499, "step": 79150 }, { "epoch": 0.6405048952180598, "grad_norm": 0.503669261932373, "learning_rate": 8.593636911687782e-06, "loss": 0.0235, "step": 79160 }, { "epoch": 0.6405858079132616, "grad_norm": 0.32223066687583923, "learning_rate": 8.593145932637374e-06, "loss": 0.0266, "step": 79170 }, { "epoch": 0.6406667206084634, "grad_norm": 0.4739306569099426, "learning_rate": 8.592654881929257e-06, "loss": 0.0199, "step": 79180 }, { "epoch": 0.6407476333036654, "grad_norm": 0.4224819540977478, "learning_rate": 8.59216375957322e-06, "loss": 0.0462, "step": 79190 }, { "epoch": 0.6408285459988672, "grad_norm": 0.2137202024459839, "learning_rate": 8.591672565579064e-06, "loss": 0.0292, "step": 79200 }, { "epoch": 0.6409094586940691, "grad_norm": 0.3028116822242737, "learning_rate": 8.591181299956577e-06, "loss": 0.038, "step": 79210 }, { "epoch": 0.640990371389271, "grad_norm": 0.42384445667266846, "learning_rate": 8.590689962715563e-06, "loss": 0.0406, "step": 79220 }, { "epoch": 0.6410712840844729, "grad_norm": 0.18041807413101196, "learning_rate": 8.590198553865816e-06, "loss": 0.0382, "step": 79230 }, { "epoch": 0.6411521967796747, "grad_norm": 0.16875702142715454, "learning_rate": 8.589707073417139e-06, "loss": 0.0319, "step": 79240 }, { "epoch": 0.6412331094748767, "grad_norm": 0.6234966516494751, "learning_rate": 8.589215521379333e-06, "loss": 0.03, "step": 79250 }, { "epoch": 0.6413140221700785, "grad_norm": 0.6674972772598267, "learning_rate": 8.588723897762202e-06, "loss": 0.0382, "step": 79260 }, { "epoch": 0.6413949348652803, "grad_norm": 0.16361528635025024, "learning_rate": 8.588232202575545e-06, "loss": 0.0424, "step": 79270 }, { "epoch": 0.6414758475604823, "grad_norm": 0.2392803281545639, "learning_rate": 8.587740435829175e-06, "loss": 0.032, "step": 79280 }, { "epoch": 0.6415567602556841, "grad_norm": 0.2032027691602707, "learning_rate": 8.587248597532895e-06, "loss": 0.0232, "step": 79290 }, { "epoch": 0.641637672950886, "grad_norm": 0.33067744970321655, "learning_rate": 8.586756687696516e-06, "loss": 0.0263, "step": 79300 }, { "epoch": 0.6417185856460879, "grad_norm": 0.4673348665237427, "learning_rate": 8.586264706329846e-06, "loss": 0.0378, "step": 79310 }, { "epoch": 0.6417994983412898, "grad_norm": 0.2847888469696045, "learning_rate": 8.585772653442697e-06, "loss": 0.024, "step": 79320 }, { "epoch": 0.6418804110364916, "grad_norm": 0.5935420989990234, "learning_rate": 8.585280529044882e-06, "loss": 0.0402, "step": 79330 }, { "epoch": 0.6419613237316935, "grad_norm": 3.052041530609131, "learning_rate": 8.584788333146217e-06, "loss": 0.0384, "step": 79340 }, { "epoch": 0.6420422364268954, "grad_norm": 0.6480610966682434, "learning_rate": 8.584296065756515e-06, "loss": 0.043, "step": 79350 }, { "epoch": 0.6421231491220972, "grad_norm": 0.20240095257759094, "learning_rate": 8.583803726885595e-06, "loss": 0.0192, "step": 79360 }, { "epoch": 0.6422040618172992, "grad_norm": 0.27864810824394226, "learning_rate": 8.583311316543276e-06, "loss": 0.0232, "step": 79370 }, { "epoch": 0.642284974512501, "grad_norm": 0.47003230452537537, "learning_rate": 8.582818834739378e-06, "loss": 0.0295, "step": 79380 }, { "epoch": 0.6423658872077029, "grad_norm": 0.3101419508457184, "learning_rate": 8.582326281483721e-06, "loss": 0.0263, "step": 79390 }, { "epoch": 0.6424467999029048, "grad_norm": 0.4359442889690399, "learning_rate": 8.581833656786129e-06, "loss": 0.0389, "step": 79400 }, { "epoch": 0.6425277125981066, "grad_norm": 0.5024726986885071, "learning_rate": 8.581340960656428e-06, "loss": 0.0324, "step": 79410 }, { "epoch": 0.6426086252933085, "grad_norm": 0.5666069388389587, "learning_rate": 8.58084819310444e-06, "loss": 0.0246, "step": 79420 }, { "epoch": 0.6426895379885104, "grad_norm": 0.41587233543395996, "learning_rate": 8.580355354139995e-06, "loss": 0.0373, "step": 79430 }, { "epoch": 0.6427704506837123, "grad_norm": 0.4827776551246643, "learning_rate": 8.579862443772921e-06, "loss": 0.0203, "step": 79440 }, { "epoch": 0.6428513633789141, "grad_norm": 0.6600625514984131, "learning_rate": 8.579369462013048e-06, "loss": 0.0416, "step": 79450 }, { "epoch": 0.6429322760741161, "grad_norm": 0.4100836515426636, "learning_rate": 8.578876408870208e-06, "loss": 0.0252, "step": 79460 }, { "epoch": 0.6430131887693179, "grad_norm": 0.7656719088554382, "learning_rate": 8.578383284354233e-06, "loss": 0.0383, "step": 79470 }, { "epoch": 0.6430941014645197, "grad_norm": 0.583668053150177, "learning_rate": 8.577890088474957e-06, "loss": 0.0357, "step": 79480 }, { "epoch": 0.6431750141597217, "grad_norm": 0.5126065015792847, "learning_rate": 8.577396821242216e-06, "loss": 0.022, "step": 79490 }, { "epoch": 0.6432559268549235, "grad_norm": 0.3728383183479309, "learning_rate": 8.57690348266585e-06, "loss": 0.0256, "step": 79500 }, { "epoch": 0.6433368395501254, "grad_norm": 0.5450962781906128, "learning_rate": 8.576410072755694e-06, "loss": 0.0261, "step": 79510 }, { "epoch": 0.6434177522453273, "grad_norm": 0.4596884846687317, "learning_rate": 8.575916591521589e-06, "loss": 0.0349, "step": 79520 }, { "epoch": 0.6434986649405292, "grad_norm": 0.44653910398483276, "learning_rate": 8.575423038973376e-06, "loss": 0.0577, "step": 79530 }, { "epoch": 0.643579577635731, "grad_norm": 0.4361072778701782, "learning_rate": 8.574929415120899e-06, "loss": 0.0332, "step": 79540 }, { "epoch": 0.643660490330933, "grad_norm": 0.39789852499961853, "learning_rate": 8.574435719974003e-06, "loss": 0.0372, "step": 79550 }, { "epoch": 0.6437414030261348, "grad_norm": 0.3924521803855896, "learning_rate": 8.573941953542532e-06, "loss": 0.0409, "step": 79560 }, { "epoch": 0.6438223157213366, "grad_norm": 0.40811821818351746, "learning_rate": 8.573448115836334e-06, "loss": 0.0278, "step": 79570 }, { "epoch": 0.6439032284165386, "grad_norm": 0.3683128356933594, "learning_rate": 8.572954206865254e-06, "loss": 0.0305, "step": 79580 }, { "epoch": 0.6439841411117404, "grad_norm": 0.465946763753891, "learning_rate": 8.572460226639149e-06, "loss": 0.0227, "step": 79590 }, { "epoch": 0.6440650538069423, "grad_norm": 0.3502216041088104, "learning_rate": 8.571966175167866e-06, "loss": 0.0364, "step": 79600 }, { "epoch": 0.6441459665021442, "grad_norm": 0.3798857629299164, "learning_rate": 8.571472052461256e-06, "loss": 0.0407, "step": 79610 }, { "epoch": 0.6442268791973461, "grad_norm": 0.33482012152671814, "learning_rate": 8.570977858529177e-06, "loss": 0.0262, "step": 79620 }, { "epoch": 0.6443077918925479, "grad_norm": 0.5856789946556091, "learning_rate": 8.570483593381485e-06, "loss": 0.0465, "step": 79630 }, { "epoch": 0.6443887045877498, "grad_norm": 0.501015305519104, "learning_rate": 8.569989257028032e-06, "loss": 0.036, "step": 79640 }, { "epoch": 0.6444696172829517, "grad_norm": 0.3566949665546417, "learning_rate": 8.569494849478684e-06, "loss": 0.0374, "step": 79650 }, { "epoch": 0.6445505299781535, "grad_norm": 0.3423183262348175, "learning_rate": 8.569000370743294e-06, "loss": 0.0194, "step": 79660 }, { "epoch": 0.6446314426733555, "grad_norm": 0.22922329604625702, "learning_rate": 8.568505820831726e-06, "loss": 0.0405, "step": 79670 }, { "epoch": 0.6447123553685573, "grad_norm": 0.46579164266586304, "learning_rate": 8.568011199753844e-06, "loss": 0.0312, "step": 79680 }, { "epoch": 0.6447932680637593, "grad_norm": 0.1922893226146698, "learning_rate": 8.56751650751951e-06, "loss": 0.0526, "step": 79690 }, { "epoch": 0.6448741807589611, "grad_norm": 0.5874592065811157, "learning_rate": 8.56702174413859e-06, "loss": 0.0336, "step": 79700 }, { "epoch": 0.6449550934541629, "grad_norm": 0.5479010939598083, "learning_rate": 8.566526909620953e-06, "loss": 0.0343, "step": 79710 }, { "epoch": 0.6450360061493648, "grad_norm": 1.1449358463287354, "learning_rate": 8.566032003976467e-06, "loss": 0.0418, "step": 79720 }, { "epoch": 0.6451169188445667, "grad_norm": 0.5892936587333679, "learning_rate": 8.565537027215e-06, "loss": 0.032, "step": 79730 }, { "epoch": 0.6451978315397686, "grad_norm": 0.8879648447036743, "learning_rate": 8.565041979346423e-06, "loss": 0.0417, "step": 79740 }, { "epoch": 0.6452787442349704, "grad_norm": 0.2891539931297302, "learning_rate": 8.564546860380611e-06, "loss": 0.0304, "step": 79750 }, { "epoch": 0.6453596569301724, "grad_norm": 0.9459667801856995, "learning_rate": 8.564051670327437e-06, "loss": 0.0444, "step": 79760 }, { "epoch": 0.6454405696253742, "grad_norm": 0.4891586899757385, "learning_rate": 8.563556409196775e-06, "loss": 0.0323, "step": 79770 }, { "epoch": 0.645521482320576, "grad_norm": 0.4065670073032379, "learning_rate": 8.563061076998506e-06, "loss": 0.04, "step": 79780 }, { "epoch": 0.645602395015778, "grad_norm": 0.3290044963359833, "learning_rate": 8.562565673742504e-06, "loss": 0.0247, "step": 79790 }, { "epoch": 0.6456833077109798, "grad_norm": 1.0461413860321045, "learning_rate": 8.562070199438651e-06, "loss": 0.044, "step": 79800 }, { "epoch": 0.6457642204061818, "grad_norm": 0.3235228359699249, "learning_rate": 8.561574654096828e-06, "loss": 0.0296, "step": 79810 }, { "epoch": 0.6458451331013836, "grad_norm": 0.505561113357544, "learning_rate": 8.561079037726916e-06, "loss": 0.0322, "step": 79820 }, { "epoch": 0.6459260457965855, "grad_norm": 0.7039824724197388, "learning_rate": 8.560583350338803e-06, "loss": 0.0421, "step": 79830 }, { "epoch": 0.6460069584917874, "grad_norm": 0.5399846434593201, "learning_rate": 8.560087591942369e-06, "loss": 0.0415, "step": 79840 }, { "epoch": 0.6460878711869893, "grad_norm": 0.39675819873809814, "learning_rate": 8.559591762547505e-06, "loss": 0.0164, "step": 79850 }, { "epoch": 0.6461687838821911, "grad_norm": 0.30391955375671387, "learning_rate": 8.559095862164097e-06, "loss": 0.025, "step": 79860 }, { "epoch": 0.646249696577393, "grad_norm": 0.7457437515258789, "learning_rate": 8.558599890802037e-06, "loss": 0.0368, "step": 79870 }, { "epoch": 0.6463306092725949, "grad_norm": 0.7351543307304382, "learning_rate": 8.558103848471213e-06, "loss": 0.0231, "step": 79880 }, { "epoch": 0.6464115219677967, "grad_norm": 0.5507937073707581, "learning_rate": 8.557607735181522e-06, "loss": 0.0343, "step": 79890 }, { "epoch": 0.6464924346629987, "grad_norm": 0.5303556323051453, "learning_rate": 8.557111550942853e-06, "loss": 0.032, "step": 79900 }, { "epoch": 0.6465733473582005, "grad_norm": 0.6117253303527832, "learning_rate": 8.556615295765104e-06, "loss": 0.0293, "step": 79910 }, { "epoch": 0.6466542600534024, "grad_norm": 0.21797752380371094, "learning_rate": 8.556118969658172e-06, "loss": 0.0286, "step": 79920 }, { "epoch": 0.6467351727486043, "grad_norm": 0.5525928139686584, "learning_rate": 8.555622572631952e-06, "loss": 0.0223, "step": 79930 }, { "epoch": 0.6468160854438061, "grad_norm": 0.44843098521232605, "learning_rate": 8.555126104696349e-06, "loss": 0.0281, "step": 79940 }, { "epoch": 0.646896998139008, "grad_norm": 0.3171132802963257, "learning_rate": 8.554629565861259e-06, "loss": 0.0453, "step": 79950 }, { "epoch": 0.6469779108342099, "grad_norm": 0.14456145465373993, "learning_rate": 8.554132956136587e-06, "loss": 0.0375, "step": 79960 }, { "epoch": 0.6470588235294118, "grad_norm": 0.48491841554641724, "learning_rate": 8.553636275532236e-06, "loss": 0.0299, "step": 79970 }, { "epoch": 0.6471397362246136, "grad_norm": 0.5141539573669434, "learning_rate": 8.553139524058113e-06, "loss": 0.0284, "step": 79980 }, { "epoch": 0.6472206489198156, "grad_norm": 0.48180198669433594, "learning_rate": 8.552642701724123e-06, "loss": 0.026, "step": 79990 }, { "epoch": 0.6473015616150174, "grad_norm": 0.5018654465675354, "learning_rate": 8.552145808540172e-06, "loss": 0.0359, "step": 80000 }, { "epoch": 0.6473824743102192, "grad_norm": 0.25218909978866577, "learning_rate": 8.551648844516174e-06, "loss": 0.0266, "step": 80010 }, { "epoch": 0.6474633870054212, "grad_norm": 0.4255475699901581, "learning_rate": 8.551151809662038e-06, "loss": 0.0304, "step": 80020 }, { "epoch": 0.647544299700623, "grad_norm": 0.7452197670936584, "learning_rate": 8.550654703987675e-06, "loss": 0.0299, "step": 80030 }, { "epoch": 0.6476252123958249, "grad_norm": 0.13668857514858246, "learning_rate": 8.550157527503e-06, "loss": 0.0512, "step": 80040 }, { "epoch": 0.6477061250910268, "grad_norm": 0.24707980453968048, "learning_rate": 8.549660280217926e-06, "loss": 0.0326, "step": 80050 }, { "epoch": 0.6477870377862287, "grad_norm": 0.24433152377605438, "learning_rate": 8.549162962142374e-06, "loss": 0.0453, "step": 80060 }, { "epoch": 0.6478679504814305, "grad_norm": 0.400149405002594, "learning_rate": 8.548665573286259e-06, "loss": 0.0296, "step": 80070 }, { "epoch": 0.6479488631766324, "grad_norm": 0.8983726501464844, "learning_rate": 8.548168113659499e-06, "loss": 0.0303, "step": 80080 }, { "epoch": 0.6480297758718343, "grad_norm": 0.5233731865882874, "learning_rate": 8.547670583272016e-06, "loss": 0.0303, "step": 80090 }, { "epoch": 0.6481106885670361, "grad_norm": 0.5688269734382629, "learning_rate": 8.547172982133735e-06, "loss": 0.0268, "step": 80100 }, { "epoch": 0.6481916012622381, "grad_norm": 0.11002232879400253, "learning_rate": 8.546675310254576e-06, "loss": 0.0245, "step": 80110 }, { "epoch": 0.6482725139574399, "grad_norm": 0.3965858221054077, "learning_rate": 8.546177567644466e-06, "loss": 0.0275, "step": 80120 }, { "epoch": 0.6483534266526418, "grad_norm": 0.6850705146789551, "learning_rate": 8.54567975431333e-06, "loss": 0.0395, "step": 80130 }, { "epoch": 0.6484343393478437, "grad_norm": 0.3627259135246277, "learning_rate": 8.545181870271097e-06, "loss": 0.0318, "step": 80140 }, { "epoch": 0.6485152520430456, "grad_norm": 0.24891500174999237, "learning_rate": 8.544683915527695e-06, "loss": 0.031, "step": 80150 }, { "epoch": 0.6485961647382474, "grad_norm": 0.3973850607872009, "learning_rate": 8.544185890093057e-06, "loss": 0.0316, "step": 80160 }, { "epoch": 0.6486770774334493, "grad_norm": 0.4066599905490875, "learning_rate": 8.543687793977113e-06, "loss": 0.0284, "step": 80170 }, { "epoch": 0.6487579901286512, "grad_norm": 0.2812541723251343, "learning_rate": 8.543189627189798e-06, "loss": 0.0295, "step": 80180 }, { "epoch": 0.648838902823853, "grad_norm": 0.206658273935318, "learning_rate": 8.542691389741044e-06, "loss": 0.0174, "step": 80190 }, { "epoch": 0.648919815519055, "grad_norm": 0.4195144474506378, "learning_rate": 8.54219308164079e-06, "loss": 0.0375, "step": 80200 }, { "epoch": 0.6490007282142568, "grad_norm": 0.4829642176628113, "learning_rate": 8.541694702898972e-06, "loss": 0.0242, "step": 80210 }, { "epoch": 0.6490816409094587, "grad_norm": 0.4218536615371704, "learning_rate": 8.541196253525531e-06, "loss": 0.0359, "step": 80220 }, { "epoch": 0.6491625536046606, "grad_norm": 0.28414052724838257, "learning_rate": 8.540697733530405e-06, "loss": 0.0341, "step": 80230 }, { "epoch": 0.6492434662998624, "grad_norm": 0.20549297332763672, "learning_rate": 8.540199142923539e-06, "loss": 0.0138, "step": 80240 }, { "epoch": 0.6493243789950643, "grad_norm": 0.45300015807151794, "learning_rate": 8.539700481714875e-06, "loss": 0.034, "step": 80250 }, { "epoch": 0.6494052916902662, "grad_norm": 0.3989890217781067, "learning_rate": 8.539201749914357e-06, "loss": 0.0405, "step": 80260 }, { "epoch": 0.6494862043854681, "grad_norm": 0.3642706274986267, "learning_rate": 8.538702947531931e-06, "loss": 0.0301, "step": 80270 }, { "epoch": 0.6495671170806699, "grad_norm": 0.053092602640390396, "learning_rate": 8.538204074577548e-06, "loss": 0.0267, "step": 80280 }, { "epoch": 0.6496480297758719, "grad_norm": 0.4056971073150635, "learning_rate": 8.537705131061152e-06, "loss": 0.0333, "step": 80290 }, { "epoch": 0.6497289424710737, "grad_norm": 0.750539243221283, "learning_rate": 8.537206116992694e-06, "loss": 0.0492, "step": 80300 }, { "epoch": 0.6498098551662755, "grad_norm": 0.4898138642311096, "learning_rate": 8.53670703238213e-06, "loss": 0.0385, "step": 80310 }, { "epoch": 0.6498907678614775, "grad_norm": 0.21381951868534088, "learning_rate": 8.536207877239408e-06, "loss": 0.0272, "step": 80320 }, { "epoch": 0.6499716805566793, "grad_norm": 0.26371094584465027, "learning_rate": 8.535708651574488e-06, "loss": 0.0358, "step": 80330 }, { "epoch": 0.6500525932518812, "grad_norm": 0.21395471692085266, "learning_rate": 8.535209355397322e-06, "loss": 0.0294, "step": 80340 }, { "epoch": 0.6501335059470831, "grad_norm": 0.40098926424980164, "learning_rate": 8.534709988717869e-06, "loss": 0.0274, "step": 80350 }, { "epoch": 0.650214418642285, "grad_norm": 0.7642195224761963, "learning_rate": 8.534210551546087e-06, "loss": 0.0432, "step": 80360 }, { "epoch": 0.6502953313374868, "grad_norm": 0.6249070167541504, "learning_rate": 8.533711043891937e-06, "loss": 0.0393, "step": 80370 }, { "epoch": 0.6503762440326887, "grad_norm": 0.23553520441055298, "learning_rate": 8.53321146576538e-06, "loss": 0.0289, "step": 80380 }, { "epoch": 0.6504571567278906, "grad_norm": 0.21110261976718903, "learning_rate": 8.532711817176378e-06, "loss": 0.0387, "step": 80390 }, { "epoch": 0.6505380694230924, "grad_norm": 0.39935290813446045, "learning_rate": 8.532212098134898e-06, "loss": 0.0377, "step": 80400 }, { "epoch": 0.6506189821182944, "grad_norm": 0.2359573245048523, "learning_rate": 8.531712308650904e-06, "loss": 0.0303, "step": 80410 }, { "epoch": 0.6506998948134962, "grad_norm": 0.3704480230808258, "learning_rate": 8.531212448734364e-06, "loss": 0.0301, "step": 80420 }, { "epoch": 0.6507808075086982, "grad_norm": 0.35665592551231384, "learning_rate": 8.530712518395245e-06, "loss": 0.0303, "step": 80430 }, { "epoch": 0.6508617202039, "grad_norm": 0.6068020462989807, "learning_rate": 8.530212517643521e-06, "loss": 0.0247, "step": 80440 }, { "epoch": 0.6509426328991018, "grad_norm": 0.1746719479560852, "learning_rate": 8.52971244648916e-06, "loss": 0.0335, "step": 80450 }, { "epoch": 0.6510235455943038, "grad_norm": 0.7338096499443054, "learning_rate": 8.529212304942136e-06, "loss": 0.0427, "step": 80460 }, { "epoch": 0.6511044582895056, "grad_norm": 0.5715106129646301, "learning_rate": 8.528712093012423e-06, "loss": 0.0416, "step": 80470 }, { "epoch": 0.6511853709847075, "grad_norm": 0.2426966279745102, "learning_rate": 8.528211810709997e-06, "loss": 0.034, "step": 80480 }, { "epoch": 0.6512662836799094, "grad_norm": 0.6448335647583008, "learning_rate": 8.527711458044835e-06, "loss": 0.0356, "step": 80490 }, { "epoch": 0.6513471963751113, "grad_norm": 0.3496079444885254, "learning_rate": 8.527211035026912e-06, "loss": 0.0404, "step": 80500 }, { "epoch": 0.6514281090703131, "grad_norm": 0.6410053968429565, "learning_rate": 8.526710541666214e-06, "loss": 0.0377, "step": 80510 }, { "epoch": 0.6515090217655151, "grad_norm": 0.9209406971931458, "learning_rate": 8.52620997797272e-06, "loss": 0.0498, "step": 80520 }, { "epoch": 0.6515899344607169, "grad_norm": 0.3839142322540283, "learning_rate": 8.525709343956412e-06, "loss": 0.0297, "step": 80530 }, { "epoch": 0.6516708471559187, "grad_norm": 0.45267948508262634, "learning_rate": 8.525208639627273e-06, "loss": 0.0259, "step": 80540 }, { "epoch": 0.6517517598511207, "grad_norm": 0.5475278496742249, "learning_rate": 8.524707864995291e-06, "loss": 0.0495, "step": 80550 }, { "epoch": 0.6518326725463225, "grad_norm": 0.3165239095687866, "learning_rate": 8.52420702007045e-06, "loss": 0.0394, "step": 80560 }, { "epoch": 0.6519135852415244, "grad_norm": 0.37391045689582825, "learning_rate": 8.52370610486274e-06, "loss": 0.0247, "step": 80570 }, { "epoch": 0.6519944979367263, "grad_norm": 0.5473633408546448, "learning_rate": 8.523205119382154e-06, "loss": 0.023, "step": 80580 }, { "epoch": 0.6520754106319282, "grad_norm": 0.4906395673751831, "learning_rate": 8.522704063638676e-06, "loss": 0.0224, "step": 80590 }, { "epoch": 0.65215632332713, "grad_norm": 0.3991476893424988, "learning_rate": 8.522202937642303e-06, "loss": 0.0553, "step": 80600 }, { "epoch": 0.6522372360223319, "grad_norm": 0.8711416721343994, "learning_rate": 8.521701741403029e-06, "loss": 0.0401, "step": 80610 }, { "epoch": 0.6523181487175338, "grad_norm": 0.5189927816390991, "learning_rate": 8.521200474930849e-06, "loss": 0.0345, "step": 80620 }, { "epoch": 0.6523990614127356, "grad_norm": 0.49268049001693726, "learning_rate": 8.520699138235757e-06, "loss": 0.0349, "step": 80630 }, { "epoch": 0.6524799741079376, "grad_norm": 0.4296402335166931, "learning_rate": 8.520197731327756e-06, "loss": 0.0427, "step": 80640 }, { "epoch": 0.6525608868031394, "grad_norm": 0.35359862446784973, "learning_rate": 8.519696254216839e-06, "loss": 0.0367, "step": 80650 }, { "epoch": 0.6526417994983413, "grad_norm": 0.40771058201789856, "learning_rate": 8.519194706913013e-06, "loss": 0.0362, "step": 80660 }, { "epoch": 0.6527227121935432, "grad_norm": 0.24969753623008728, "learning_rate": 8.518693089426277e-06, "loss": 0.0298, "step": 80670 }, { "epoch": 0.652803624888745, "grad_norm": 0.37831953167915344, "learning_rate": 8.518191401766637e-06, "loss": 0.0258, "step": 80680 }, { "epoch": 0.6528845375839469, "grad_norm": 0.7184659242630005, "learning_rate": 8.517689643944096e-06, "loss": 0.0345, "step": 80690 }, { "epoch": 0.6529654502791488, "grad_norm": 0.436903715133667, "learning_rate": 8.517187815968661e-06, "loss": 0.037, "step": 80700 }, { "epoch": 0.6530463629743507, "grad_norm": 0.4164438247680664, "learning_rate": 8.51668591785034e-06, "loss": 0.0378, "step": 80710 }, { "epoch": 0.6531272756695525, "grad_norm": 0.4275282025337219, "learning_rate": 8.516183949599142e-06, "loss": 0.0293, "step": 80720 }, { "epoch": 0.6532081883647545, "grad_norm": 0.22342588007450104, "learning_rate": 8.51568191122508e-06, "loss": 0.0348, "step": 80730 }, { "epoch": 0.6532891010599563, "grad_norm": 0.7192795276641846, "learning_rate": 8.515179802738163e-06, "loss": 0.0218, "step": 80740 }, { "epoch": 0.6533700137551581, "grad_norm": 0.3392564356327057, "learning_rate": 8.514677624148405e-06, "loss": 0.0507, "step": 80750 }, { "epoch": 0.6534509264503601, "grad_norm": 0.29926764965057373, "learning_rate": 8.514175375465823e-06, "loss": 0.0402, "step": 80760 }, { "epoch": 0.6535318391455619, "grad_norm": 0.500873863697052, "learning_rate": 8.51367305670043e-06, "loss": 0.0278, "step": 80770 }, { "epoch": 0.6536127518407638, "grad_norm": 0.303142249584198, "learning_rate": 8.513170667862248e-06, "loss": 0.0325, "step": 80780 }, { "epoch": 0.6536936645359657, "grad_norm": 0.4797840118408203, "learning_rate": 8.512668208961291e-06, "loss": 0.0313, "step": 80790 }, { "epoch": 0.6537745772311676, "grad_norm": 0.3404419720172882, "learning_rate": 8.512165680007583e-06, "loss": 0.0367, "step": 80800 }, { "epoch": 0.6538554899263694, "grad_norm": 0.41060036420822144, "learning_rate": 8.511663081011147e-06, "loss": 0.0431, "step": 80810 }, { "epoch": 0.6539364026215714, "grad_norm": 0.4281683564186096, "learning_rate": 8.511160411982002e-06, "loss": 0.0274, "step": 80820 }, { "epoch": 0.6540173153167732, "grad_norm": 0.414068341255188, "learning_rate": 8.510657672930175e-06, "loss": 0.0331, "step": 80830 }, { "epoch": 0.654098228011975, "grad_norm": 0.4669211506843567, "learning_rate": 8.510154863865694e-06, "loss": 0.0336, "step": 80840 }, { "epoch": 0.654179140707177, "grad_norm": 0.5490188598632812, "learning_rate": 8.509651984798584e-06, "loss": 0.0337, "step": 80850 }, { "epoch": 0.6542600534023788, "grad_norm": 0.7032625079154968, "learning_rate": 8.509149035738873e-06, "loss": 0.0465, "step": 80860 }, { "epoch": 0.6543409660975807, "grad_norm": 0.12100069224834442, "learning_rate": 8.508646016696594e-06, "loss": 0.0315, "step": 80870 }, { "epoch": 0.6544218787927826, "grad_norm": 0.4037363827228546, "learning_rate": 8.508142927681775e-06, "loss": 0.0222, "step": 80880 }, { "epoch": 0.6545027914879845, "grad_norm": 0.1819508671760559, "learning_rate": 8.507639768704454e-06, "loss": 0.0299, "step": 80890 }, { "epoch": 0.6545837041831863, "grad_norm": 0.15408174693584442, "learning_rate": 8.507136539774663e-06, "loss": 0.0268, "step": 80900 }, { "epoch": 0.6546646168783882, "grad_norm": 0.5263171195983887, "learning_rate": 8.506633240902436e-06, "loss": 0.0401, "step": 80910 }, { "epoch": 0.6547455295735901, "grad_norm": 0.5144533514976501, "learning_rate": 8.506129872097812e-06, "loss": 0.039, "step": 80920 }, { "epoch": 0.6548264422687919, "grad_norm": 0.51710045337677, "learning_rate": 8.505626433370831e-06, "loss": 0.027, "step": 80930 }, { "epoch": 0.6549073549639939, "grad_norm": 0.39870837330818176, "learning_rate": 8.505122924731528e-06, "loss": 0.0234, "step": 80940 }, { "epoch": 0.6549882676591957, "grad_norm": 0.848842978477478, "learning_rate": 8.50461934618995e-06, "loss": 0.0272, "step": 80950 }, { "epoch": 0.6550691803543977, "grad_norm": 0.15608884394168854, "learning_rate": 8.50411569775614e-06, "loss": 0.0167, "step": 80960 }, { "epoch": 0.6551500930495995, "grad_norm": 1.2062466144561768, "learning_rate": 8.503611979440136e-06, "loss": 0.0201, "step": 80970 }, { "epoch": 0.6552310057448013, "grad_norm": 0.667343020439148, "learning_rate": 8.50310819125199e-06, "loss": 0.0321, "step": 80980 }, { "epoch": 0.6553119184400032, "grad_norm": 0.28658440709114075, "learning_rate": 8.502604333201745e-06, "loss": 0.0179, "step": 80990 }, { "epoch": 0.6553928311352051, "grad_norm": 0.7508982419967651, "learning_rate": 8.502100405299452e-06, "loss": 0.0304, "step": 81000 }, { "epoch": 0.655473743830407, "grad_norm": 0.7714853286743164, "learning_rate": 8.50159640755516e-06, "loss": 0.0345, "step": 81010 }, { "epoch": 0.6555546565256088, "grad_norm": 0.6118175387382507, "learning_rate": 8.501092339978919e-06, "loss": 0.0491, "step": 81020 }, { "epoch": 0.6556355692208108, "grad_norm": 0.268907368183136, "learning_rate": 8.500588202580782e-06, "loss": 0.0314, "step": 81030 }, { "epoch": 0.6557164819160126, "grad_norm": 0.4864732027053833, "learning_rate": 8.500083995370804e-06, "loss": 0.0224, "step": 81040 }, { "epoch": 0.6557973946112144, "grad_norm": 0.3672344982624054, "learning_rate": 8.49957971835904e-06, "loss": 0.0347, "step": 81050 }, { "epoch": 0.6558783073064164, "grad_norm": 0.22952218353748322, "learning_rate": 8.499075371555547e-06, "loss": 0.0261, "step": 81060 }, { "epoch": 0.6559592200016182, "grad_norm": 0.5506094694137573, "learning_rate": 8.49857095497038e-06, "loss": 0.0244, "step": 81070 }, { "epoch": 0.6560401326968202, "grad_norm": 0.7205854654312134, "learning_rate": 8.498066468613603e-06, "loss": 0.0465, "step": 81080 }, { "epoch": 0.656121045392022, "grad_norm": 0.24876844882965088, "learning_rate": 8.497561912495275e-06, "loss": 0.034, "step": 81090 }, { "epoch": 0.6562019580872239, "grad_norm": 0.16701048612594604, "learning_rate": 8.497057286625459e-06, "loss": 0.0248, "step": 81100 }, { "epoch": 0.6562828707824258, "grad_norm": 0.9364831447601318, "learning_rate": 8.496552591014218e-06, "loss": 0.0259, "step": 81110 }, { "epoch": 0.6563637834776277, "grad_norm": 0.4345265030860901, "learning_rate": 8.496047825671616e-06, "loss": 0.0351, "step": 81120 }, { "epoch": 0.6564446961728295, "grad_norm": 0.5893335938453674, "learning_rate": 8.49554299060772e-06, "loss": 0.054, "step": 81130 }, { "epoch": 0.6565256088680314, "grad_norm": 0.4175552427768707, "learning_rate": 8.4950380858326e-06, "loss": 0.0179, "step": 81140 }, { "epoch": 0.6566065215632333, "grad_norm": 0.4294746518135071, "learning_rate": 8.494533111356323e-06, "loss": 0.0349, "step": 81150 }, { "epoch": 0.6566874342584351, "grad_norm": 0.42974838614463806, "learning_rate": 8.49402806718896e-06, "loss": 0.0411, "step": 81160 }, { "epoch": 0.6567683469536371, "grad_norm": 0.39030721783638, "learning_rate": 8.493522953340584e-06, "loss": 0.0482, "step": 81170 }, { "epoch": 0.6568492596488389, "grad_norm": 0.55772465467453, "learning_rate": 8.493017769821267e-06, "loss": 0.0352, "step": 81180 }, { "epoch": 0.6569301723440408, "grad_norm": 0.6872563362121582, "learning_rate": 8.492512516641087e-06, "loss": 0.0337, "step": 81190 }, { "epoch": 0.6570110850392427, "grad_norm": 0.31500953435897827, "learning_rate": 8.492007193810117e-06, "loss": 0.0349, "step": 81200 }, { "epoch": 0.6570919977344445, "grad_norm": 0.339311808347702, "learning_rate": 8.491501801338433e-06, "loss": 0.0355, "step": 81210 }, { "epoch": 0.6571729104296464, "grad_norm": 0.3340699076652527, "learning_rate": 8.490996339236119e-06, "loss": 0.0274, "step": 81220 }, { "epoch": 0.6572538231248483, "grad_norm": 0.7438010573387146, "learning_rate": 8.490490807513252e-06, "loss": 0.0251, "step": 81230 }, { "epoch": 0.6573347358200502, "grad_norm": 0.41886985301971436, "learning_rate": 8.489985206179916e-06, "loss": 0.0355, "step": 81240 }, { "epoch": 0.657415648515252, "grad_norm": 0.177117258310318, "learning_rate": 8.48947953524619e-06, "loss": 0.0425, "step": 81250 }, { "epoch": 0.657496561210454, "grad_norm": 0.5986855626106262, "learning_rate": 8.488973794722163e-06, "loss": 0.0291, "step": 81260 }, { "epoch": 0.6575774739056558, "grad_norm": 0.5714424848556519, "learning_rate": 8.48846798461792e-06, "loss": 0.015, "step": 81270 }, { "epoch": 0.6576583866008576, "grad_norm": 0.4199448823928833, "learning_rate": 8.487962104943545e-06, "loss": 0.0332, "step": 81280 }, { "epoch": 0.6577392992960596, "grad_norm": 0.42204052209854126, "learning_rate": 8.48745615570913e-06, "loss": 0.0285, "step": 81290 }, { "epoch": 0.6578202119912614, "grad_norm": 0.7718873023986816, "learning_rate": 8.486950136924767e-06, "loss": 0.0248, "step": 81300 }, { "epoch": 0.6579011246864633, "grad_norm": 0.20145173370838165, "learning_rate": 8.486444048600542e-06, "loss": 0.0223, "step": 81310 }, { "epoch": 0.6579820373816652, "grad_norm": 0.3583377003669739, "learning_rate": 8.485937890746551e-06, "loss": 0.0241, "step": 81320 }, { "epoch": 0.6580629500768671, "grad_norm": 0.5279862880706787, "learning_rate": 8.485431663372889e-06, "loss": 0.0295, "step": 81330 }, { "epoch": 0.6581438627720689, "grad_norm": 0.302343487739563, "learning_rate": 8.48492536648965e-06, "loss": 0.039, "step": 81340 }, { "epoch": 0.6582247754672708, "grad_norm": 0.5194101929664612, "learning_rate": 8.484419000106931e-06, "loss": 0.0317, "step": 81350 }, { "epoch": 0.6583056881624727, "grad_norm": 0.14836706221103668, "learning_rate": 8.483912564234833e-06, "loss": 0.0445, "step": 81360 }, { "epoch": 0.6583866008576745, "grad_norm": 0.2191523313522339, "learning_rate": 8.483406058883453e-06, "loss": 0.0275, "step": 81370 }, { "epoch": 0.6584675135528765, "grad_norm": 0.38076332211494446, "learning_rate": 8.48289948406289e-06, "loss": 0.0428, "step": 81380 }, { "epoch": 0.6585484262480783, "grad_norm": 0.5154664516448975, "learning_rate": 8.482392839783253e-06, "loss": 0.031, "step": 81390 }, { "epoch": 0.6586293389432802, "grad_norm": 0.3388066291809082, "learning_rate": 8.481886126054641e-06, "loss": 0.0193, "step": 81400 }, { "epoch": 0.6587102516384821, "grad_norm": 0.6722097992897034, "learning_rate": 8.48137934288716e-06, "loss": 0.0599, "step": 81410 }, { "epoch": 0.658791164333684, "grad_norm": 0.38224533200263977, "learning_rate": 8.480872490290919e-06, "loss": 0.0594, "step": 81420 }, { "epoch": 0.6588720770288858, "grad_norm": 0.3288184702396393, "learning_rate": 8.480365568276024e-06, "loss": 0.0355, "step": 81430 }, { "epoch": 0.6589529897240877, "grad_norm": 0.3170925974845886, "learning_rate": 8.479858576852587e-06, "loss": 0.0379, "step": 81440 }, { "epoch": 0.6590339024192896, "grad_norm": 0.47493883967399597, "learning_rate": 8.479351516030715e-06, "loss": 0.038, "step": 81450 }, { "epoch": 0.6591148151144914, "grad_norm": 0.22015975415706635, "learning_rate": 8.478844385820522e-06, "loss": 0.0245, "step": 81460 }, { "epoch": 0.6591957278096934, "grad_norm": 0.4455212950706482, "learning_rate": 8.478337186232122e-06, "loss": 0.0342, "step": 81470 }, { "epoch": 0.6592766405048952, "grad_norm": 0.455255389213562, "learning_rate": 8.47782991727563e-06, "loss": 0.0215, "step": 81480 }, { "epoch": 0.6593575532000971, "grad_norm": 0.6410696506500244, "learning_rate": 8.477322578961163e-06, "loss": 0.0427, "step": 81490 }, { "epoch": 0.659438465895299, "grad_norm": 0.2855600118637085, "learning_rate": 8.476815171298838e-06, "loss": 0.0377, "step": 81500 }, { "epoch": 0.6595193785905008, "grad_norm": 0.5037857890129089, "learning_rate": 8.476307694298775e-06, "loss": 0.0285, "step": 81510 }, { "epoch": 0.6596002912857027, "grad_norm": 0.6710188984870911, "learning_rate": 8.475800147971092e-06, "loss": 0.0459, "step": 81520 }, { "epoch": 0.6596812039809046, "grad_norm": 0.420121967792511, "learning_rate": 8.475292532325914e-06, "loss": 0.0312, "step": 81530 }, { "epoch": 0.6597621166761065, "grad_norm": 0.44956809282302856, "learning_rate": 8.474784847373363e-06, "loss": 0.0278, "step": 81540 }, { "epoch": 0.6598430293713083, "grad_norm": 0.3170028626918793, "learning_rate": 8.474277093123564e-06, "loss": 0.0236, "step": 81550 }, { "epoch": 0.6599239420665103, "grad_norm": 0.27014532685279846, "learning_rate": 8.473769269586642e-06, "loss": 0.0288, "step": 81560 }, { "epoch": 0.6600048547617121, "grad_norm": 0.13045789301395416, "learning_rate": 8.473261376772726e-06, "loss": 0.0299, "step": 81570 }, { "epoch": 0.6600857674569139, "grad_norm": 0.35015586018562317, "learning_rate": 8.472753414691945e-06, "loss": 0.0342, "step": 81580 }, { "epoch": 0.6601666801521159, "grad_norm": 0.33854326605796814, "learning_rate": 8.472245383354426e-06, "loss": 0.0305, "step": 81590 }, { "epoch": 0.6602475928473177, "grad_norm": 0.33936095237731934, "learning_rate": 8.471737282770305e-06, "loss": 0.0566, "step": 81600 }, { "epoch": 0.6603285055425197, "grad_norm": 0.40755581855773926, "learning_rate": 8.471229112949713e-06, "loss": 0.0387, "step": 81610 }, { "epoch": 0.6604094182377215, "grad_norm": 0.5086438655853271, "learning_rate": 8.470720873902784e-06, "loss": 0.0432, "step": 81620 }, { "epoch": 0.6604903309329234, "grad_norm": 0.4443773627281189, "learning_rate": 8.470212565639654e-06, "loss": 0.0343, "step": 81630 }, { "epoch": 0.6605712436281252, "grad_norm": 0.48946523666381836, "learning_rate": 8.469704188170462e-06, "loss": 0.0199, "step": 81640 }, { "epoch": 0.6606521563233271, "grad_norm": 0.4777382016181946, "learning_rate": 8.469195741505342e-06, "loss": 0.0306, "step": 81650 }, { "epoch": 0.660733069018529, "grad_norm": 0.4541963040828705, "learning_rate": 8.46868722565444e-06, "loss": 0.0363, "step": 81660 }, { "epoch": 0.6608139817137308, "grad_norm": 0.15679116547107697, "learning_rate": 8.46817864062789e-06, "loss": 0.0222, "step": 81670 }, { "epoch": 0.6608948944089328, "grad_norm": 0.5748046636581421, "learning_rate": 8.467669986435843e-06, "loss": 0.0312, "step": 81680 }, { "epoch": 0.6609758071041346, "grad_norm": 0.6060642600059509, "learning_rate": 8.467161263088434e-06, "loss": 0.0343, "step": 81690 }, { "epoch": 0.6610567197993366, "grad_norm": 0.1419256031513214, "learning_rate": 8.466652470595817e-06, "loss": 0.041, "step": 81700 }, { "epoch": 0.6611376324945384, "grad_norm": 0.46611830592155457, "learning_rate": 8.466143608968133e-06, "loss": 0.0392, "step": 81710 }, { "epoch": 0.6612185451897403, "grad_norm": 0.5037734508514404, "learning_rate": 8.465634678215532e-06, "loss": 0.0329, "step": 81720 }, { "epoch": 0.6612994578849422, "grad_norm": 0.37879544496536255, "learning_rate": 8.465125678348165e-06, "loss": 0.0422, "step": 81730 }, { "epoch": 0.661380370580144, "grad_norm": 0.2046823501586914, "learning_rate": 8.464616609376182e-06, "loss": 0.0298, "step": 81740 }, { "epoch": 0.6614612832753459, "grad_norm": 0.35664084553718567, "learning_rate": 8.464107471309733e-06, "loss": 0.0177, "step": 81750 }, { "epoch": 0.6615421959705478, "grad_norm": 0.584857702255249, "learning_rate": 8.463598264158974e-06, "loss": 0.0363, "step": 81760 }, { "epoch": 0.6616231086657497, "grad_norm": 0.3006435036659241, "learning_rate": 8.46308898793406e-06, "loss": 0.0267, "step": 81770 }, { "epoch": 0.6617040213609515, "grad_norm": 0.64012211561203, "learning_rate": 8.462579642645147e-06, "loss": 0.0274, "step": 81780 }, { "epoch": 0.6617849340561535, "grad_norm": 0.18685424327850342, "learning_rate": 8.462070228302392e-06, "loss": 0.0235, "step": 81790 }, { "epoch": 0.6618658467513553, "grad_norm": 1.2952189445495605, "learning_rate": 8.461560744915957e-06, "loss": 0.0274, "step": 81800 }, { "epoch": 0.6619467594465571, "grad_norm": 0.5735771656036377, "learning_rate": 8.461051192496e-06, "loss": 0.0306, "step": 81810 }, { "epoch": 0.6620276721417591, "grad_norm": 0.37327998876571655, "learning_rate": 8.460541571052683e-06, "loss": 0.0395, "step": 81820 }, { "epoch": 0.6621085848369609, "grad_norm": 0.4815393090248108, "learning_rate": 8.460031880596169e-06, "loss": 0.0429, "step": 81830 }, { "epoch": 0.6621894975321628, "grad_norm": 0.648811399936676, "learning_rate": 8.459522121136624e-06, "loss": 0.0448, "step": 81840 }, { "epoch": 0.6622704102273647, "grad_norm": 0.9680910110473633, "learning_rate": 8.459012292684215e-06, "loss": 0.0516, "step": 81850 }, { "epoch": 0.6623513229225666, "grad_norm": 0.20799976587295532, "learning_rate": 8.458502395249106e-06, "loss": 0.0202, "step": 81860 }, { "epoch": 0.6624322356177684, "grad_norm": 0.4599689245223999, "learning_rate": 8.45799242884147e-06, "loss": 0.0282, "step": 81870 }, { "epoch": 0.6625131483129703, "grad_norm": 0.6289199590682983, "learning_rate": 8.457482393471474e-06, "loss": 0.0273, "step": 81880 }, { "epoch": 0.6625940610081722, "grad_norm": 0.6317697167396545, "learning_rate": 8.456972289149292e-06, "loss": 0.0337, "step": 81890 }, { "epoch": 0.662674973703374, "grad_norm": 0.6326215863227844, "learning_rate": 8.456462115885094e-06, "loss": 0.0317, "step": 81900 }, { "epoch": 0.662755886398576, "grad_norm": 0.5219197273254395, "learning_rate": 8.455951873689056e-06, "loss": 0.034, "step": 81910 }, { "epoch": 0.6628367990937778, "grad_norm": 0.5228623151779175, "learning_rate": 8.455441562571356e-06, "loss": 0.0406, "step": 81920 }, { "epoch": 0.6629177117889797, "grad_norm": 0.44486871361732483, "learning_rate": 8.454931182542167e-06, "loss": 0.0249, "step": 81930 }, { "epoch": 0.6629986244841816, "grad_norm": 0.6494713425636292, "learning_rate": 8.45442073361167e-06, "loss": 0.0442, "step": 81940 }, { "epoch": 0.6630795371793834, "grad_norm": 0.7672041654586792, "learning_rate": 8.453910215790044e-06, "loss": 0.0422, "step": 81950 }, { "epoch": 0.6631604498745853, "grad_norm": 0.5856590867042542, "learning_rate": 8.453399629087468e-06, "loss": 0.0407, "step": 81960 }, { "epoch": 0.6632413625697872, "grad_norm": 0.5993611216545105, "learning_rate": 8.45288897351413e-06, "loss": 0.0224, "step": 81970 }, { "epoch": 0.6633222752649891, "grad_norm": 0.5239715576171875, "learning_rate": 8.452378249080208e-06, "loss": 0.0363, "step": 81980 }, { "epoch": 0.6634031879601909, "grad_norm": 0.4674836993217468, "learning_rate": 8.451867455795892e-06, "loss": 0.0309, "step": 81990 }, { "epoch": 0.6634841006553929, "grad_norm": 0.7542492747306824, "learning_rate": 8.451356593671367e-06, "loss": 0.0312, "step": 82000 }, { "epoch": 0.6635650133505947, "grad_norm": 0.9578858017921448, "learning_rate": 8.45084566271682e-06, "loss": 0.0316, "step": 82010 }, { "epoch": 0.6636459260457965, "grad_norm": 0.20289991796016693, "learning_rate": 8.450334662942443e-06, "loss": 0.0312, "step": 82020 }, { "epoch": 0.6637268387409985, "grad_norm": 0.321742445230484, "learning_rate": 8.449823594358424e-06, "loss": 0.0217, "step": 82030 }, { "epoch": 0.6638077514362003, "grad_norm": 0.16732041537761688, "learning_rate": 8.449312456974954e-06, "loss": 0.0299, "step": 82040 }, { "epoch": 0.6638886641314022, "grad_norm": 0.483506977558136, "learning_rate": 8.448801250802233e-06, "loss": 0.0312, "step": 82050 }, { "epoch": 0.6639695768266041, "grad_norm": 0.2518678307533264, "learning_rate": 8.44828997585045e-06, "loss": 0.0448, "step": 82060 }, { "epoch": 0.664050489521806, "grad_norm": 0.3833533823490143, "learning_rate": 8.447778632129801e-06, "loss": 0.0296, "step": 82070 }, { "epoch": 0.6641314022170078, "grad_norm": 0.4806936979293823, "learning_rate": 8.447267219650488e-06, "loss": 0.0223, "step": 82080 }, { "epoch": 0.6642123149122098, "grad_norm": 0.05410395562648773, "learning_rate": 8.446755738422707e-06, "loss": 0.0265, "step": 82090 }, { "epoch": 0.6642932276074116, "grad_norm": 0.24544042348861694, "learning_rate": 8.446244188456661e-06, "loss": 0.028, "step": 82100 }, { "epoch": 0.6643741403026134, "grad_norm": 0.44679516553878784, "learning_rate": 8.445732569762548e-06, "loss": 0.0261, "step": 82110 }, { "epoch": 0.6644550529978154, "grad_norm": 0.8032395243644714, "learning_rate": 8.445220882350574e-06, "loss": 0.0357, "step": 82120 }, { "epoch": 0.6645359656930172, "grad_norm": 0.5076544880867004, "learning_rate": 8.444709126230942e-06, "loss": 0.0349, "step": 82130 }, { "epoch": 0.6646168783882191, "grad_norm": 0.5989405512809753, "learning_rate": 8.444197301413858e-06, "loss": 0.0393, "step": 82140 }, { "epoch": 0.664697791083421, "grad_norm": 0.48654451966285706, "learning_rate": 8.443685407909532e-06, "loss": 0.0299, "step": 82150 }, { "epoch": 0.6647787037786229, "grad_norm": 0.6661628484725952, "learning_rate": 8.44317344572817e-06, "loss": 0.036, "step": 82160 }, { "epoch": 0.6648596164738247, "grad_norm": 0.5229478478431702, "learning_rate": 8.442661414879983e-06, "loss": 0.0302, "step": 82170 }, { "epoch": 0.6649405291690266, "grad_norm": 0.6297802329063416, "learning_rate": 8.442149315375178e-06, "loss": 0.0254, "step": 82180 }, { "epoch": 0.6650214418642285, "grad_norm": 0.25740647315979004, "learning_rate": 8.441637147223976e-06, "loss": 0.0308, "step": 82190 }, { "epoch": 0.6651023545594303, "grad_norm": 0.7877777218818665, "learning_rate": 8.441124910436584e-06, "loss": 0.0288, "step": 82200 }, { "epoch": 0.6651832672546323, "grad_norm": 0.2959911525249481, "learning_rate": 8.440612605023221e-06, "loss": 0.0374, "step": 82210 }, { "epoch": 0.6652641799498341, "grad_norm": 0.4042451083660126, "learning_rate": 8.440100230994103e-06, "loss": 0.0223, "step": 82220 }, { "epoch": 0.665345092645036, "grad_norm": 0.5625925064086914, "learning_rate": 8.439587788359448e-06, "loss": 0.0357, "step": 82230 }, { "epoch": 0.6654260053402379, "grad_norm": 0.5063510537147522, "learning_rate": 8.439075277129479e-06, "loss": 0.0363, "step": 82240 }, { "epoch": 0.6655069180354397, "grad_norm": 0.4922638535499573, "learning_rate": 8.43856269731441e-06, "loss": 0.0359, "step": 82250 }, { "epoch": 0.6655878307306416, "grad_norm": 0.1671164184808731, "learning_rate": 8.438050048924468e-06, "loss": 0.0237, "step": 82260 }, { "epoch": 0.6656687434258435, "grad_norm": 0.40523019433021545, "learning_rate": 8.437537331969877e-06, "loss": 0.0275, "step": 82270 }, { "epoch": 0.6657496561210454, "grad_norm": 0.30478376150131226, "learning_rate": 8.437024546460861e-06, "loss": 0.0419, "step": 82280 }, { "epoch": 0.6658305688162472, "grad_norm": 1.1445461511611938, "learning_rate": 8.436511692407645e-06, "loss": 0.031, "step": 82290 }, { "epoch": 0.6659114815114492, "grad_norm": 0.6112980842590332, "learning_rate": 8.435998769820458e-06, "loss": 0.0385, "step": 82300 }, { "epoch": 0.665992394206651, "grad_norm": 0.5275552868843079, "learning_rate": 8.43548577870953e-06, "loss": 0.0254, "step": 82310 }, { "epoch": 0.6660733069018528, "grad_norm": 0.6114996075630188, "learning_rate": 8.43497271908509e-06, "loss": 0.027, "step": 82320 }, { "epoch": 0.6661542195970548, "grad_norm": 0.4504844546318054, "learning_rate": 8.43445959095737e-06, "loss": 0.0267, "step": 82330 }, { "epoch": 0.6662351322922566, "grad_norm": 0.5358521938323975, "learning_rate": 8.433946394336607e-06, "loss": 0.0261, "step": 82340 }, { "epoch": 0.6663160449874586, "grad_norm": 0.4832763969898224, "learning_rate": 8.433433129233031e-06, "loss": 0.0317, "step": 82350 }, { "epoch": 0.6663969576826604, "grad_norm": 0.4731893241405487, "learning_rate": 8.432919795656878e-06, "loss": 0.0354, "step": 82360 }, { "epoch": 0.6664778703778623, "grad_norm": 0.470580518245697, "learning_rate": 8.432406393618388e-06, "loss": 0.0297, "step": 82370 }, { "epoch": 0.6665587830730642, "grad_norm": 0.055284325033426285, "learning_rate": 8.4318929231278e-06, "loss": 0.0204, "step": 82380 }, { "epoch": 0.6666396957682661, "grad_norm": 0.26045480370521545, "learning_rate": 8.43137938419535e-06, "loss": 0.0262, "step": 82390 }, { "epoch": 0.6667206084634679, "grad_norm": 0.36494216322898865, "learning_rate": 8.430865776831284e-06, "loss": 0.0263, "step": 82400 }, { "epoch": 0.6668015211586698, "grad_norm": 0.23847408592700958, "learning_rate": 8.430352101045842e-06, "loss": 0.0273, "step": 82410 }, { "epoch": 0.6668824338538717, "grad_norm": 0.6768130660057068, "learning_rate": 8.429838356849269e-06, "loss": 0.0385, "step": 82420 }, { "epoch": 0.6669633465490735, "grad_norm": 0.5226919054985046, "learning_rate": 8.429324544251811e-06, "loss": 0.0235, "step": 82430 }, { "epoch": 0.6670442592442755, "grad_norm": 0.30288344621658325, "learning_rate": 8.428810663263715e-06, "loss": 0.0226, "step": 82440 }, { "epoch": 0.6671251719394773, "grad_norm": 0.4659363627433777, "learning_rate": 8.42829671389523e-06, "loss": 0.021, "step": 82450 }, { "epoch": 0.6672060846346792, "grad_norm": 0.4306211471557617, "learning_rate": 8.427782696156603e-06, "loss": 0.0216, "step": 82460 }, { "epoch": 0.6672869973298811, "grad_norm": 0.6009572148323059, "learning_rate": 8.427268610058086e-06, "loss": 0.0391, "step": 82470 }, { "epoch": 0.6673679100250829, "grad_norm": 0.4080910086631775, "learning_rate": 8.426754455609933e-06, "loss": 0.0213, "step": 82480 }, { "epoch": 0.6674488227202848, "grad_norm": 0.5676477551460266, "learning_rate": 8.426240232822396e-06, "loss": 0.0251, "step": 82490 }, { "epoch": 0.6675297354154867, "grad_norm": 0.5063444375991821, "learning_rate": 8.425725941705731e-06, "loss": 0.0234, "step": 82500 }, { "epoch": 0.6676106481106886, "grad_norm": 0.25380364060401917, "learning_rate": 8.425211582270194e-06, "loss": 0.0306, "step": 82510 }, { "epoch": 0.6676915608058904, "grad_norm": 0.4462369978427887, "learning_rate": 8.424697154526043e-06, "loss": 0.0318, "step": 82520 }, { "epoch": 0.6677724735010924, "grad_norm": 0.35167136788368225, "learning_rate": 8.424182658483537e-06, "loss": 0.0183, "step": 82530 }, { "epoch": 0.6678533861962942, "grad_norm": 0.48335176706314087, "learning_rate": 8.423668094152938e-06, "loss": 0.0336, "step": 82540 }, { "epoch": 0.667934298891496, "grad_norm": 0.5577211976051331, "learning_rate": 8.423153461544505e-06, "loss": 0.0354, "step": 82550 }, { "epoch": 0.668015211586698, "grad_norm": 0.46391963958740234, "learning_rate": 8.422638760668503e-06, "loss": 0.029, "step": 82560 }, { "epoch": 0.6680961242818998, "grad_norm": 0.35798028111457825, "learning_rate": 8.422123991535196e-06, "loss": 0.0269, "step": 82570 }, { "epoch": 0.6681770369771017, "grad_norm": 0.4251904785633087, "learning_rate": 8.42160915415485e-06, "loss": 0.0236, "step": 82580 }, { "epoch": 0.6682579496723036, "grad_norm": 0.44802775979042053, "learning_rate": 8.421094248537734e-06, "loss": 0.041, "step": 82590 }, { "epoch": 0.6683388623675055, "grad_norm": 0.11294785141944885, "learning_rate": 8.420579274694116e-06, "loss": 0.0358, "step": 82600 }, { "epoch": 0.6684197750627073, "grad_norm": 0.4449980854988098, "learning_rate": 8.420064232634262e-06, "loss": 0.0269, "step": 82610 }, { "epoch": 0.6685006877579092, "grad_norm": 0.6131948232650757, "learning_rate": 8.419549122368451e-06, "loss": 0.0385, "step": 82620 }, { "epoch": 0.6685816004531111, "grad_norm": 0.34327009320259094, "learning_rate": 8.41903394390695e-06, "loss": 0.0264, "step": 82630 }, { "epoch": 0.6686625131483129, "grad_norm": 0.419418066740036, "learning_rate": 8.418518697260036e-06, "loss": 0.0296, "step": 82640 }, { "epoch": 0.6687434258435149, "grad_norm": 0.4197434186935425, "learning_rate": 8.418003382437982e-06, "loss": 0.0373, "step": 82650 }, { "epoch": 0.6688243385387167, "grad_norm": 0.2989445924758911, "learning_rate": 8.417487999451068e-06, "loss": 0.0243, "step": 82660 }, { "epoch": 0.6689052512339186, "grad_norm": 0.08881225436925888, "learning_rate": 8.416972548309568e-06, "loss": 0.0226, "step": 82670 }, { "epoch": 0.6689861639291205, "grad_norm": 0.3675621449947357, "learning_rate": 8.416457029023766e-06, "loss": 0.0428, "step": 82680 }, { "epoch": 0.6690670766243224, "grad_norm": 0.48388099670410156, "learning_rate": 8.41594144160394e-06, "loss": 0.0347, "step": 82690 }, { "epoch": 0.6691479893195242, "grad_norm": 0.5343604683876038, "learning_rate": 8.415425786060374e-06, "loss": 0.0332, "step": 82700 }, { "epoch": 0.6692289020147261, "grad_norm": 0.23664428293704987, "learning_rate": 8.41491006240335e-06, "loss": 0.032, "step": 82710 }, { "epoch": 0.669309814709928, "grad_norm": 0.3188827931880951, "learning_rate": 8.414394270643156e-06, "loss": 0.0225, "step": 82720 }, { "epoch": 0.6693907274051298, "grad_norm": 0.5246599316596985, "learning_rate": 8.413878410790075e-06, "loss": 0.0283, "step": 82730 }, { "epoch": 0.6694716401003318, "grad_norm": 0.4881606101989746, "learning_rate": 8.413362482854395e-06, "loss": 0.0528, "step": 82740 }, { "epoch": 0.6695525527955336, "grad_norm": 0.5346255898475647, "learning_rate": 8.412846486846409e-06, "loss": 0.0241, "step": 82750 }, { "epoch": 0.6696334654907355, "grad_norm": 0.8000044822692871, "learning_rate": 8.412330422776402e-06, "loss": 0.0531, "step": 82760 }, { "epoch": 0.6697143781859374, "grad_norm": 0.49099448323249817, "learning_rate": 8.41181429065467e-06, "loss": 0.0355, "step": 82770 }, { "epoch": 0.6697952908811392, "grad_norm": 0.6063492298126221, "learning_rate": 8.411298090491504e-06, "loss": 0.0334, "step": 82780 }, { "epoch": 0.6698762035763411, "grad_norm": 0.29013365507125854, "learning_rate": 8.410781822297199e-06, "loss": 0.0429, "step": 82790 }, { "epoch": 0.669957116271543, "grad_norm": 0.3787589967250824, "learning_rate": 8.410265486082051e-06, "loss": 0.0394, "step": 82800 }, { "epoch": 0.6700380289667449, "grad_norm": 0.7315958738327026, "learning_rate": 8.409749081856357e-06, "loss": 0.0263, "step": 82810 }, { "epoch": 0.6701189416619467, "grad_norm": 0.413426011800766, "learning_rate": 8.409232609630419e-06, "loss": 0.0247, "step": 82820 }, { "epoch": 0.6701998543571487, "grad_norm": 0.6544117331504822, "learning_rate": 8.40871606941453e-06, "loss": 0.0397, "step": 82830 }, { "epoch": 0.6702807670523505, "grad_norm": 1.1014927625656128, "learning_rate": 8.408199461218995e-06, "loss": 0.0432, "step": 82840 }, { "epoch": 0.6703616797475523, "grad_norm": 0.21308064460754395, "learning_rate": 8.40768278505412e-06, "loss": 0.0204, "step": 82850 }, { "epoch": 0.6704425924427543, "grad_norm": 0.9172341227531433, "learning_rate": 8.407166040930204e-06, "loss": 0.0431, "step": 82860 }, { "epoch": 0.6705235051379561, "grad_norm": 0.30998560786247253, "learning_rate": 8.406649228857554e-06, "loss": 0.0387, "step": 82870 }, { "epoch": 0.670604417833158, "grad_norm": 0.2033311277627945, "learning_rate": 8.406132348846475e-06, "loss": 0.0255, "step": 82880 }, { "epoch": 0.6706853305283599, "grad_norm": 0.19744981825351715, "learning_rate": 8.405615400907281e-06, "loss": 0.0307, "step": 82890 }, { "epoch": 0.6707662432235618, "grad_norm": 0.533369243144989, "learning_rate": 8.405098385050275e-06, "loss": 0.0203, "step": 82900 }, { "epoch": 0.6708471559187636, "grad_norm": 0.7428085803985596, "learning_rate": 8.404581301285771e-06, "loss": 0.038, "step": 82910 }, { "epoch": 0.6709280686139655, "grad_norm": 0.6248858571052551, "learning_rate": 8.40406414962408e-06, "loss": 0.0295, "step": 82920 }, { "epoch": 0.6710089813091674, "grad_norm": 0.31425508856773376, "learning_rate": 8.403546930075516e-06, "loss": 0.0351, "step": 82930 }, { "epoch": 0.6710898940043692, "grad_norm": 0.4603425860404968, "learning_rate": 8.403029642650394e-06, "loss": 0.0283, "step": 82940 }, { "epoch": 0.6711708066995712, "grad_norm": 0.38331863284111023, "learning_rate": 8.402512287359028e-06, "loss": 0.0342, "step": 82950 }, { "epoch": 0.671251719394773, "grad_norm": 0.573320746421814, "learning_rate": 8.401994864211738e-06, "loss": 0.0252, "step": 82960 }, { "epoch": 0.671332632089975, "grad_norm": 0.3027283549308777, "learning_rate": 8.401477373218844e-06, "loss": 0.0224, "step": 82970 }, { "epoch": 0.6714135447851768, "grad_norm": 0.7374649047851562, "learning_rate": 8.400959814390663e-06, "loss": 0.0346, "step": 82980 }, { "epoch": 0.6714944574803787, "grad_norm": 0.5186294913291931, "learning_rate": 8.400442187737519e-06, "loss": 0.0218, "step": 82990 }, { "epoch": 0.6715753701755806, "grad_norm": 0.39969080686569214, "learning_rate": 8.399924493269735e-06, "loss": 0.0437, "step": 83000 }, { "epoch": 0.6716562828707824, "grad_norm": 0.634730339050293, "learning_rate": 8.399406730997634e-06, "loss": 0.0394, "step": 83010 }, { "epoch": 0.6717371955659843, "grad_norm": 0.18795637786388397, "learning_rate": 8.398888900931542e-06, "loss": 0.028, "step": 83020 }, { "epoch": 0.6718181082611862, "grad_norm": 0.4925704896450043, "learning_rate": 8.398371003081787e-06, "loss": 0.0276, "step": 83030 }, { "epoch": 0.6718990209563881, "grad_norm": 0.5025413036346436, "learning_rate": 8.397853037458695e-06, "loss": 0.0321, "step": 83040 }, { "epoch": 0.6719799336515899, "grad_norm": 0.7633575201034546, "learning_rate": 8.397335004072598e-06, "loss": 0.0343, "step": 83050 }, { "epoch": 0.6720608463467919, "grad_norm": 0.4290315508842468, "learning_rate": 8.396816902933826e-06, "loss": 0.034, "step": 83060 }, { "epoch": 0.6721417590419937, "grad_norm": 0.45112547278404236, "learning_rate": 8.396298734052712e-06, "loss": 0.0349, "step": 83070 }, { "epoch": 0.6722226717371955, "grad_norm": 0.4362965226173401, "learning_rate": 8.395780497439592e-06, "loss": 0.0255, "step": 83080 }, { "epoch": 0.6723035844323975, "grad_norm": 0.43697506189346313, "learning_rate": 8.395262193104796e-06, "loss": 0.0306, "step": 83090 }, { "epoch": 0.6723844971275993, "grad_norm": 0.18210309743881226, "learning_rate": 8.394743821058664e-06, "loss": 0.0323, "step": 83100 }, { "epoch": 0.6724654098228012, "grad_norm": 0.3040054440498352, "learning_rate": 8.394225381311534e-06, "loss": 0.0205, "step": 83110 }, { "epoch": 0.6725463225180031, "grad_norm": 0.46250301599502563, "learning_rate": 8.393706873873743e-06, "loss": 0.0545, "step": 83120 }, { "epoch": 0.672627235213205, "grad_norm": 0.3233344852924347, "learning_rate": 8.393188298755634e-06, "loss": 0.0342, "step": 83130 }, { "epoch": 0.6727081479084068, "grad_norm": 0.32148507237434387, "learning_rate": 8.392669655967546e-06, "loss": 0.0224, "step": 83140 }, { "epoch": 0.6727890606036087, "grad_norm": 0.42606624960899353, "learning_rate": 8.392150945519825e-06, "loss": 0.0227, "step": 83150 }, { "epoch": 0.6728699732988106, "grad_norm": 0.30737069249153137, "learning_rate": 8.391632167422817e-06, "loss": 0.0291, "step": 83160 }, { "epoch": 0.6729508859940124, "grad_norm": 0.6416829824447632, "learning_rate": 8.391113321686863e-06, "loss": 0.0314, "step": 83170 }, { "epoch": 0.6730317986892144, "grad_norm": 0.5269066691398621, "learning_rate": 8.390594408322313e-06, "loss": 0.0293, "step": 83180 }, { "epoch": 0.6731127113844162, "grad_norm": 0.4387992024421692, "learning_rate": 8.390075427339514e-06, "loss": 0.0404, "step": 83190 }, { "epoch": 0.6731936240796181, "grad_norm": 0.42474696040153503, "learning_rate": 8.38955637874882e-06, "loss": 0.0185, "step": 83200 }, { "epoch": 0.67327453677482, "grad_norm": 0.5926992893218994, "learning_rate": 8.389037262560578e-06, "loss": 0.0329, "step": 83210 }, { "epoch": 0.6733554494700218, "grad_norm": 0.28915297985076904, "learning_rate": 8.388518078785144e-06, "loss": 0.0229, "step": 83220 }, { "epoch": 0.6734363621652237, "grad_norm": 0.37764981389045715, "learning_rate": 8.387998827432867e-06, "loss": 0.0333, "step": 83230 }, { "epoch": 0.6735172748604256, "grad_norm": 0.38477787375450134, "learning_rate": 8.387479508514109e-06, "loss": 0.0216, "step": 83240 }, { "epoch": 0.6735981875556275, "grad_norm": 1.1215533018112183, "learning_rate": 8.386960122039223e-06, "loss": 0.0504, "step": 83250 }, { "epoch": 0.6736791002508293, "grad_norm": 0.41450461745262146, "learning_rate": 8.386440668018566e-06, "loss": 0.0296, "step": 83260 }, { "epoch": 0.6737600129460313, "grad_norm": 0.5169064998626709, "learning_rate": 8.3859211464625e-06, "loss": 0.0242, "step": 83270 }, { "epoch": 0.6738409256412331, "grad_norm": 0.8821696639060974, "learning_rate": 8.385401557381385e-06, "loss": 0.0432, "step": 83280 }, { "epoch": 0.673921838336435, "grad_norm": 0.34791016578674316, "learning_rate": 8.384881900785582e-06, "loss": 0.0459, "step": 83290 }, { "epoch": 0.6740027510316369, "grad_norm": 0.2759133279323578, "learning_rate": 8.384362176685455e-06, "loss": 0.0557, "step": 83300 }, { "epoch": 0.6740836637268387, "grad_norm": 0.3115116059780121, "learning_rate": 8.38384238509137e-06, "loss": 0.0193, "step": 83310 }, { "epoch": 0.6741645764220406, "grad_norm": 0.4366922974586487, "learning_rate": 8.383322526013693e-06, "loss": 0.0282, "step": 83320 }, { "epoch": 0.6742454891172425, "grad_norm": 0.81833815574646, "learning_rate": 8.382802599462789e-06, "loss": 0.0346, "step": 83330 }, { "epoch": 0.6743264018124444, "grad_norm": 0.4110862910747528, "learning_rate": 8.38228260544903e-06, "loss": 0.0411, "step": 83340 }, { "epoch": 0.6744073145076462, "grad_norm": 0.4688875377178192, "learning_rate": 8.381762543982785e-06, "loss": 0.0224, "step": 83350 }, { "epoch": 0.6744882272028482, "grad_norm": 0.48256003856658936, "learning_rate": 8.381242415074425e-06, "loss": 0.0309, "step": 83360 }, { "epoch": 0.67456913989805, "grad_norm": 0.3124481737613678, "learning_rate": 8.38072221873432e-06, "loss": 0.0359, "step": 83370 }, { "epoch": 0.6746500525932518, "grad_norm": 0.4685015082359314, "learning_rate": 8.38020195497285e-06, "loss": 0.0306, "step": 83380 }, { "epoch": 0.6747309652884538, "grad_norm": 0.5878177881240845, "learning_rate": 8.379681623800389e-06, "loss": 0.0426, "step": 83390 }, { "epoch": 0.6748118779836556, "grad_norm": 0.38913610577583313, "learning_rate": 8.379161225227312e-06, "loss": 0.0353, "step": 83400 }, { "epoch": 0.6748927906788575, "grad_norm": 0.533766508102417, "learning_rate": 8.378640759263999e-06, "loss": 0.0286, "step": 83410 }, { "epoch": 0.6749737033740594, "grad_norm": 0.425632506608963, "learning_rate": 8.378120225920826e-06, "loss": 0.0403, "step": 83420 }, { "epoch": 0.6750546160692613, "grad_norm": 0.5903396010398865, "learning_rate": 8.377599625208178e-06, "loss": 0.0477, "step": 83430 }, { "epoch": 0.6751355287644631, "grad_norm": 0.3749857544898987, "learning_rate": 8.377078957136437e-06, "loss": 0.0296, "step": 83440 }, { "epoch": 0.675216441459665, "grad_norm": 0.7771029472351074, "learning_rate": 8.376558221715984e-06, "loss": 0.0282, "step": 83450 }, { "epoch": 0.6752973541548669, "grad_norm": 0.3998693823814392, "learning_rate": 8.376037418957206e-06, "loss": 0.0305, "step": 83460 }, { "epoch": 0.6753782668500687, "grad_norm": 0.6114205718040466, "learning_rate": 8.375516548870489e-06, "loss": 0.0311, "step": 83470 }, { "epoch": 0.6754591795452707, "grad_norm": 0.7876748442649841, "learning_rate": 8.37499561146622e-06, "loss": 0.0291, "step": 83480 }, { "epoch": 0.6755400922404725, "grad_norm": 0.2015649676322937, "learning_rate": 8.374474606754789e-06, "loss": 0.0254, "step": 83490 }, { "epoch": 0.6756210049356745, "grad_norm": 0.3116620182991028, "learning_rate": 8.373953534746585e-06, "loss": 0.0384, "step": 83500 }, { "epoch": 0.6757019176308763, "grad_norm": 0.46363168954849243, "learning_rate": 8.373432395452001e-06, "loss": 0.0408, "step": 83510 }, { "epoch": 0.6757828303260781, "grad_norm": 0.6930999159812927, "learning_rate": 8.37291118888143e-06, "loss": 0.0371, "step": 83520 }, { "epoch": 0.67586374302128, "grad_norm": 0.2568860948085785, "learning_rate": 8.372389915045264e-06, "loss": 0.0345, "step": 83530 }, { "epoch": 0.6759446557164819, "grad_norm": 0.4825589060783386, "learning_rate": 8.371868573953904e-06, "loss": 0.0317, "step": 83540 }, { "epoch": 0.6760255684116838, "grad_norm": 0.5737583041191101, "learning_rate": 8.37134716561774e-06, "loss": 0.0232, "step": 83550 }, { "epoch": 0.6761064811068856, "grad_norm": 0.6386357545852661, "learning_rate": 8.370825690047176e-06, "loss": 0.0288, "step": 83560 }, { "epoch": 0.6761873938020876, "grad_norm": 0.9949259757995605, "learning_rate": 8.37030414725261e-06, "loss": 0.0478, "step": 83570 }, { "epoch": 0.6762683064972894, "grad_norm": 0.2759796977043152, "learning_rate": 8.369782537244441e-06, "loss": 0.0349, "step": 83580 }, { "epoch": 0.6763492191924912, "grad_norm": 0.6143860816955566, "learning_rate": 8.369260860033075e-06, "loss": 0.0343, "step": 83590 }, { "epoch": 0.6764301318876932, "grad_norm": 0.7258111238479614, "learning_rate": 8.368739115628915e-06, "loss": 0.0488, "step": 83600 }, { "epoch": 0.676511044582895, "grad_norm": 0.5733588337898254, "learning_rate": 8.368217304042363e-06, "loss": 0.0412, "step": 83610 }, { "epoch": 0.676591957278097, "grad_norm": 0.20310768485069275, "learning_rate": 8.367695425283829e-06, "loss": 0.0274, "step": 83620 }, { "epoch": 0.6766728699732988, "grad_norm": 0.5658326148986816, "learning_rate": 8.367173479363717e-06, "loss": 0.0314, "step": 83630 }, { "epoch": 0.6767537826685007, "grad_norm": 0.42704007029533386, "learning_rate": 8.366651466292439e-06, "loss": 0.0289, "step": 83640 }, { "epoch": 0.6768346953637026, "grad_norm": 0.28060322999954224, "learning_rate": 8.366129386080406e-06, "loss": 0.0228, "step": 83650 }, { "epoch": 0.6769156080589045, "grad_norm": 0.4852750599384308, "learning_rate": 8.365607238738028e-06, "loss": 0.0414, "step": 83660 }, { "epoch": 0.6769965207541063, "grad_norm": 0.4370644688606262, "learning_rate": 8.365085024275718e-06, "loss": 0.023, "step": 83670 }, { "epoch": 0.6770774334493082, "grad_norm": 0.42326244711875916, "learning_rate": 8.364562742703893e-06, "loss": 0.0417, "step": 83680 }, { "epoch": 0.6771583461445101, "grad_norm": 0.47133082151412964, "learning_rate": 8.364040394032965e-06, "loss": 0.0275, "step": 83690 }, { "epoch": 0.6772392588397119, "grad_norm": 0.3410399556159973, "learning_rate": 8.363517978273354e-06, "loss": 0.0285, "step": 83700 }, { "epoch": 0.6773201715349139, "grad_norm": 0.2905324101448059, "learning_rate": 8.362995495435479e-06, "loss": 0.0324, "step": 83710 }, { "epoch": 0.6774010842301157, "grad_norm": 0.3962644338607788, "learning_rate": 8.362472945529755e-06, "loss": 0.0325, "step": 83720 }, { "epoch": 0.6774819969253176, "grad_norm": 0.2622925043106079, "learning_rate": 8.36195032856661e-06, "loss": 0.0334, "step": 83730 }, { "epoch": 0.6775629096205195, "grad_norm": 0.5766133069992065, "learning_rate": 8.361427644556462e-06, "loss": 0.0312, "step": 83740 }, { "epoch": 0.6776438223157213, "grad_norm": 0.7976135015487671, "learning_rate": 8.360904893509736e-06, "loss": 0.0325, "step": 83750 }, { "epoch": 0.6777247350109232, "grad_norm": 0.7412273287773132, "learning_rate": 8.360382075436857e-06, "loss": 0.0332, "step": 83760 }, { "epoch": 0.6778056477061251, "grad_norm": 0.5151524543762207, "learning_rate": 8.359859190348251e-06, "loss": 0.0303, "step": 83770 }, { "epoch": 0.677886560401327, "grad_norm": 0.3988264203071594, "learning_rate": 8.359336238254347e-06, "loss": 0.0231, "step": 83780 }, { "epoch": 0.6779674730965288, "grad_norm": 0.23170103132724762, "learning_rate": 8.358813219165573e-06, "loss": 0.0172, "step": 83790 }, { "epoch": 0.6780483857917308, "grad_norm": 0.30276748538017273, "learning_rate": 8.358290133092361e-06, "loss": 0.0298, "step": 83800 }, { "epoch": 0.6781292984869326, "grad_norm": 0.23609517514705658, "learning_rate": 8.357766980045142e-06, "loss": 0.0367, "step": 83810 }, { "epoch": 0.6782102111821344, "grad_norm": 0.4047112762928009, "learning_rate": 8.357243760034349e-06, "loss": 0.0278, "step": 83820 }, { "epoch": 0.6782911238773364, "grad_norm": 0.5340275168418884, "learning_rate": 8.356720473070416e-06, "loss": 0.0386, "step": 83830 }, { "epoch": 0.6783720365725382, "grad_norm": 0.41052377223968506, "learning_rate": 8.35619711916378e-06, "loss": 0.0348, "step": 83840 }, { "epoch": 0.6784529492677401, "grad_norm": 0.4006603956222534, "learning_rate": 8.355673698324878e-06, "loss": 0.0191, "step": 83850 }, { "epoch": 0.678533861962942, "grad_norm": 0.6043909192085266, "learning_rate": 8.355150210564146e-06, "loss": 0.0291, "step": 83860 }, { "epoch": 0.6786147746581439, "grad_norm": 0.17597278952598572, "learning_rate": 8.354626655892028e-06, "loss": 0.027, "step": 83870 }, { "epoch": 0.6786956873533457, "grad_norm": 1.2252095937728882, "learning_rate": 8.354103034318963e-06, "loss": 0.034, "step": 83880 }, { "epoch": 0.6787766000485476, "grad_norm": 0.6770248413085938, "learning_rate": 8.353579345855396e-06, "loss": 0.0298, "step": 83890 }, { "epoch": 0.6788575127437495, "grad_norm": 0.6978321671485901, "learning_rate": 8.353055590511765e-06, "loss": 0.0409, "step": 83900 }, { "epoch": 0.6789384254389513, "grad_norm": 1.3378334045410156, "learning_rate": 8.35253176829852e-06, "loss": 0.0289, "step": 83910 }, { "epoch": 0.6790193381341533, "grad_norm": 0.383077472448349, "learning_rate": 8.352007879226108e-06, "loss": 0.0257, "step": 83920 }, { "epoch": 0.6791002508293551, "grad_norm": 0.5686435699462891, "learning_rate": 8.351483923304976e-06, "loss": 0.0271, "step": 83930 }, { "epoch": 0.679181163524557, "grad_norm": 0.5812236666679382, "learning_rate": 8.35095990054557e-06, "loss": 0.0403, "step": 83940 }, { "epoch": 0.6792620762197589, "grad_norm": 0.5502166152000427, "learning_rate": 8.350435810958342e-06, "loss": 0.028, "step": 83950 }, { "epoch": 0.6793429889149608, "grad_norm": 0.5362971425056458, "learning_rate": 8.349911654553746e-06, "loss": 0.0426, "step": 83960 }, { "epoch": 0.6794239016101626, "grad_norm": 0.5119895935058594, "learning_rate": 8.349387431342236e-06, "loss": 0.034, "step": 83970 }, { "epoch": 0.6795048143053645, "grad_norm": 0.4741727411746979, "learning_rate": 8.348863141334262e-06, "loss": 0.0138, "step": 83980 }, { "epoch": 0.6795857270005664, "grad_norm": 0.604144275188446, "learning_rate": 8.348338784540284e-06, "loss": 0.0511, "step": 83990 }, { "epoch": 0.6796666396957682, "grad_norm": 0.37517088651657104, "learning_rate": 8.347814360970756e-06, "loss": 0.0366, "step": 84000 }, { "epoch": 0.6797475523909702, "grad_norm": 0.9561023712158203, "learning_rate": 8.347289870636139e-06, "loss": 0.0284, "step": 84010 }, { "epoch": 0.679828465086172, "grad_norm": 0.6032944917678833, "learning_rate": 8.346765313546891e-06, "loss": 0.0353, "step": 84020 }, { "epoch": 0.679909377781374, "grad_norm": 0.41852834820747375, "learning_rate": 8.346240689713477e-06, "loss": 0.0313, "step": 84030 }, { "epoch": 0.6799902904765758, "grad_norm": 0.7049863338470459, "learning_rate": 8.345715999146354e-06, "loss": 0.0282, "step": 84040 }, { "epoch": 0.6800712031717776, "grad_norm": 0.4305834472179413, "learning_rate": 8.34519124185599e-06, "loss": 0.0187, "step": 84050 }, { "epoch": 0.6801521158669795, "grad_norm": 0.24406738579273224, "learning_rate": 8.344666417852849e-06, "loss": 0.0305, "step": 84060 }, { "epoch": 0.6802330285621814, "grad_norm": 0.32648617029190063, "learning_rate": 8.344141527147396e-06, "loss": 0.0207, "step": 84070 }, { "epoch": 0.6803139412573833, "grad_norm": 0.48893317580223083, "learning_rate": 8.3436165697501e-06, "loss": 0.0366, "step": 84080 }, { "epoch": 0.6803948539525851, "grad_norm": 0.17512215673923492, "learning_rate": 8.343091545671432e-06, "loss": 0.0257, "step": 84090 }, { "epoch": 0.6804757666477871, "grad_norm": 0.3850305378437042, "learning_rate": 8.342566454921857e-06, "loss": 0.0254, "step": 84100 }, { "epoch": 0.6805566793429889, "grad_norm": 0.2880414128303528, "learning_rate": 8.342041297511854e-06, "loss": 0.0386, "step": 84110 }, { "epoch": 0.6806375920381907, "grad_norm": 0.4192744493484497, "learning_rate": 8.34151607345189e-06, "loss": 0.0376, "step": 84120 }, { "epoch": 0.6807185047333927, "grad_norm": 0.3653036952018738, "learning_rate": 8.340990782752445e-06, "loss": 0.0335, "step": 84130 }, { "epoch": 0.6807994174285945, "grad_norm": 0.36465418338775635, "learning_rate": 8.34046542542399e-06, "loss": 0.031, "step": 84140 }, { "epoch": 0.6808803301237965, "grad_norm": 0.40338680148124695, "learning_rate": 8.339940001477004e-06, "loss": 0.0409, "step": 84150 }, { "epoch": 0.6809612428189983, "grad_norm": 0.3930581510066986, "learning_rate": 8.339414510921968e-06, "loss": 0.0403, "step": 84160 }, { "epoch": 0.6810421555142002, "grad_norm": 0.40295952558517456, "learning_rate": 8.338888953769357e-06, "loss": 0.0244, "step": 84170 }, { "epoch": 0.681123068209402, "grad_norm": 0.25985187292099, "learning_rate": 8.338363330029656e-06, "loss": 0.0355, "step": 84180 }, { "epoch": 0.6812039809046039, "grad_norm": 0.571614146232605, "learning_rate": 8.337837639713344e-06, "loss": 0.0366, "step": 84190 }, { "epoch": 0.6812848935998058, "grad_norm": 0.4150213897228241, "learning_rate": 8.33731188283091e-06, "loss": 0.0271, "step": 84200 }, { "epoch": 0.6813658062950076, "grad_norm": 0.09275609254837036, "learning_rate": 8.336786059392832e-06, "loss": 0.0347, "step": 84210 }, { "epoch": 0.6814467189902096, "grad_norm": 0.3362515866756439, "learning_rate": 8.336260169409602e-06, "loss": 0.0417, "step": 84220 }, { "epoch": 0.6815276316854114, "grad_norm": 0.42614689469337463, "learning_rate": 8.335734212891706e-06, "loss": 0.0218, "step": 84230 }, { "epoch": 0.6816085443806134, "grad_norm": 0.34601300954818726, "learning_rate": 8.335208189849634e-06, "loss": 0.0502, "step": 84240 }, { "epoch": 0.6816894570758152, "grad_norm": 0.5763827562332153, "learning_rate": 8.334682100293875e-06, "loss": 0.0311, "step": 84250 }, { "epoch": 0.6817703697710171, "grad_norm": 1.2158282995224, "learning_rate": 8.33415594423492e-06, "loss": 0.0254, "step": 84260 }, { "epoch": 0.681851282466219, "grad_norm": 0.41005879640579224, "learning_rate": 8.333629721683266e-06, "loss": 0.0231, "step": 84270 }, { "epoch": 0.6819321951614208, "grad_norm": 0.32231050729751587, "learning_rate": 8.333103432649405e-06, "loss": 0.0204, "step": 84280 }, { "epoch": 0.6820131078566227, "grad_norm": 0.4549086391925812, "learning_rate": 8.33257707714383e-06, "loss": 0.0405, "step": 84290 }, { "epoch": 0.6820940205518246, "grad_norm": 0.45461827516555786, "learning_rate": 8.332050655177041e-06, "loss": 0.0338, "step": 84300 }, { "epoch": 0.6821749332470265, "grad_norm": 0.2519436180591583, "learning_rate": 8.331524166759537e-06, "loss": 0.018, "step": 84310 }, { "epoch": 0.6822558459422283, "grad_norm": 0.5668514966964722, "learning_rate": 8.330997611901816e-06, "loss": 0.0391, "step": 84320 }, { "epoch": 0.6823367586374303, "grad_norm": 0.7319188714027405, "learning_rate": 8.33047099061438e-06, "loss": 0.0364, "step": 84330 }, { "epoch": 0.6824176713326321, "grad_norm": 0.34299251437187195, "learning_rate": 8.32994430290773e-06, "loss": 0.0379, "step": 84340 }, { "epoch": 0.6824985840278339, "grad_norm": 0.45919468998908997, "learning_rate": 8.329417548792374e-06, "loss": 0.0215, "step": 84350 }, { "epoch": 0.6825794967230359, "grad_norm": 0.3849453330039978, "learning_rate": 8.328890728278811e-06, "loss": 0.0365, "step": 84360 }, { "epoch": 0.6826604094182377, "grad_norm": 0.4335198402404785, "learning_rate": 8.328363841377549e-06, "loss": 0.0443, "step": 84370 }, { "epoch": 0.6827413221134396, "grad_norm": 0.3241945207118988, "learning_rate": 8.327836888099099e-06, "loss": 0.0235, "step": 84380 }, { "epoch": 0.6828222348086415, "grad_norm": 0.4604441523551941, "learning_rate": 8.327309868453967e-06, "loss": 0.0353, "step": 84390 }, { "epoch": 0.6829031475038434, "grad_norm": 0.6016365885734558, "learning_rate": 8.326782782452664e-06, "loss": 0.0413, "step": 84400 }, { "epoch": 0.6829840601990452, "grad_norm": 0.19862960278987885, "learning_rate": 8.3262556301057e-06, "loss": 0.0362, "step": 84410 }, { "epoch": 0.6830649728942471, "grad_norm": 0.47519204020500183, "learning_rate": 8.325728411423591e-06, "loss": 0.0511, "step": 84420 }, { "epoch": 0.683145885589449, "grad_norm": 0.3557933270931244, "learning_rate": 8.325201126416849e-06, "loss": 0.0225, "step": 84430 }, { "epoch": 0.6832267982846508, "grad_norm": 0.935413122177124, "learning_rate": 8.324673775095992e-06, "loss": 0.0491, "step": 84440 }, { "epoch": 0.6833077109798528, "grad_norm": 0.2756664752960205, "learning_rate": 8.324146357471534e-06, "loss": 0.0387, "step": 84450 }, { "epoch": 0.6833886236750546, "grad_norm": 0.4517178535461426, "learning_rate": 8.323618873553995e-06, "loss": 0.0313, "step": 84460 }, { "epoch": 0.6834695363702565, "grad_norm": 0.04706384614109993, "learning_rate": 8.323091323353893e-06, "loss": 0.0235, "step": 84470 }, { "epoch": 0.6835504490654584, "grad_norm": 0.6912187933921814, "learning_rate": 8.32256370688175e-06, "loss": 0.0303, "step": 84480 }, { "epoch": 0.6836313617606602, "grad_norm": 0.3115847110748291, "learning_rate": 8.322036024148089e-06, "loss": 0.0239, "step": 84490 }, { "epoch": 0.6837122744558621, "grad_norm": 0.2674226760864258, "learning_rate": 8.321508275163432e-06, "loss": 0.0434, "step": 84500 }, { "epoch": 0.683793187151064, "grad_norm": 0.32738107442855835, "learning_rate": 8.320980459938307e-06, "loss": 0.0342, "step": 84510 }, { "epoch": 0.6838740998462659, "grad_norm": 0.3614213466644287, "learning_rate": 8.320452578483233e-06, "loss": 0.0302, "step": 84520 }, { "epoch": 0.6839550125414677, "grad_norm": 0.5063504576683044, "learning_rate": 8.319924630808745e-06, "loss": 0.0388, "step": 84530 }, { "epoch": 0.6840359252366697, "grad_norm": 0.361702024936676, "learning_rate": 8.319396616925367e-06, "loss": 0.0179, "step": 84540 }, { "epoch": 0.6841168379318715, "grad_norm": 0.6912369132041931, "learning_rate": 8.318868536843634e-06, "loss": 0.0324, "step": 84550 }, { "epoch": 0.6841977506270734, "grad_norm": 0.2554706037044525, "learning_rate": 8.318340390574072e-06, "loss": 0.03, "step": 84560 }, { "epoch": 0.6842786633222753, "grad_norm": 0.3825637102127075, "learning_rate": 8.317812178127219e-06, "loss": 0.019, "step": 84570 }, { "epoch": 0.6843595760174771, "grad_norm": 0.6039577722549438, "learning_rate": 8.317283899513604e-06, "loss": 0.0284, "step": 84580 }, { "epoch": 0.684440488712679, "grad_norm": 0.7513209581375122, "learning_rate": 8.316755554743764e-06, "loss": 0.0375, "step": 84590 }, { "epoch": 0.6845214014078809, "grad_norm": 0.4375590980052948, "learning_rate": 8.316227143828236e-06, "loss": 0.0298, "step": 84600 }, { "epoch": 0.6846023141030828, "grad_norm": 0.2156282365322113, "learning_rate": 8.31569866677756e-06, "loss": 0.0304, "step": 84610 }, { "epoch": 0.6846832267982846, "grad_norm": 0.4383416473865509, "learning_rate": 8.315170123602276e-06, "loss": 0.0293, "step": 84620 }, { "epoch": 0.6847641394934866, "grad_norm": 0.36163002252578735, "learning_rate": 8.31464151431292e-06, "loss": 0.0203, "step": 84630 }, { "epoch": 0.6848450521886884, "grad_norm": 0.5696699619293213, "learning_rate": 8.314112838920037e-06, "loss": 0.0349, "step": 84640 }, { "epoch": 0.6849259648838902, "grad_norm": 0.2919198274612427, "learning_rate": 8.31358409743417e-06, "loss": 0.0361, "step": 84650 }, { "epoch": 0.6850068775790922, "grad_norm": 0.28449296951293945, "learning_rate": 8.313055289865864e-06, "loss": 0.0204, "step": 84660 }, { "epoch": 0.685087790274294, "grad_norm": 0.9926720261573792, "learning_rate": 8.312526416225663e-06, "loss": 0.0272, "step": 84670 }, { "epoch": 0.685168702969496, "grad_norm": 0.27441802620887756, "learning_rate": 8.311997476524117e-06, "loss": 0.02, "step": 84680 }, { "epoch": 0.6852496156646978, "grad_norm": 0.46722331643104553, "learning_rate": 8.31146847077177e-06, "loss": 0.0356, "step": 84690 }, { "epoch": 0.6853305283598997, "grad_norm": 0.5082824230194092, "learning_rate": 8.310939398979178e-06, "loss": 0.0239, "step": 84700 }, { "epoch": 0.6854114410551015, "grad_norm": 0.9945722818374634, "learning_rate": 8.31041026115689e-06, "loss": 0.037, "step": 84710 }, { "epoch": 0.6854923537503034, "grad_norm": 0.8390814065933228, "learning_rate": 8.309881057315458e-06, "loss": 0.0318, "step": 84720 }, { "epoch": 0.6855732664455053, "grad_norm": 0.5552567839622498, "learning_rate": 8.309351787465435e-06, "loss": 0.0517, "step": 84730 }, { "epoch": 0.6856541791407071, "grad_norm": 0.4323924779891968, "learning_rate": 8.308822451617377e-06, "loss": 0.0304, "step": 84740 }, { "epoch": 0.6857350918359091, "grad_norm": 0.8856104612350464, "learning_rate": 8.30829304978184e-06, "loss": 0.0368, "step": 84750 }, { "epoch": 0.6858160045311109, "grad_norm": 0.2773057520389557, "learning_rate": 8.307763581969381e-06, "loss": 0.0284, "step": 84760 }, { "epoch": 0.6858969172263129, "grad_norm": 0.4687620997428894, "learning_rate": 8.307234048190563e-06, "loss": 0.0275, "step": 84770 }, { "epoch": 0.6859778299215147, "grad_norm": 1.1273653507232666, "learning_rate": 8.306704448455943e-06, "loss": 0.0257, "step": 84780 }, { "epoch": 0.6860587426167165, "grad_norm": 0.28418654203414917, "learning_rate": 8.306174782776083e-06, "loss": 0.0382, "step": 84790 }, { "epoch": 0.6861396553119185, "grad_norm": 0.34517231583595276, "learning_rate": 8.305645051161546e-06, "loss": 0.0285, "step": 84800 }, { "epoch": 0.6862205680071203, "grad_norm": 0.4369896650314331, "learning_rate": 8.305115253622898e-06, "loss": 0.0304, "step": 84810 }, { "epoch": 0.6863014807023222, "grad_norm": 0.43001314997673035, "learning_rate": 8.304585390170705e-06, "loss": 0.0344, "step": 84820 }, { "epoch": 0.686382393397524, "grad_norm": 0.35474643111228943, "learning_rate": 8.30405546081553e-06, "loss": 0.0574, "step": 84830 }, { "epoch": 0.686463306092726, "grad_norm": 0.6371811032295227, "learning_rate": 8.303525465567945e-06, "loss": 0.0274, "step": 84840 }, { "epoch": 0.6865442187879278, "grad_norm": 0.5716104507446289, "learning_rate": 8.302995404438518e-06, "loss": 0.0342, "step": 84850 }, { "epoch": 0.6866251314831298, "grad_norm": 0.4639832377433777, "learning_rate": 8.302465277437822e-06, "loss": 0.035, "step": 84860 }, { "epoch": 0.6867060441783316, "grad_norm": 0.5307438969612122, "learning_rate": 8.301935084576426e-06, "loss": 0.0251, "step": 84870 }, { "epoch": 0.6867869568735334, "grad_norm": 0.4564480185508728, "learning_rate": 8.301404825864907e-06, "loss": 0.033, "step": 84880 }, { "epoch": 0.6868678695687354, "grad_norm": 0.5713566541671753, "learning_rate": 8.300874501313839e-06, "loss": 0.0323, "step": 84890 }, { "epoch": 0.6869487822639372, "grad_norm": 0.38941946625709534, "learning_rate": 8.300344110933797e-06, "loss": 0.0344, "step": 84900 }, { "epoch": 0.6870296949591391, "grad_norm": 0.8486618399620056, "learning_rate": 8.299813654735359e-06, "loss": 0.0502, "step": 84910 }, { "epoch": 0.687110607654341, "grad_norm": 0.08783701062202454, "learning_rate": 8.299283132729103e-06, "loss": 0.0273, "step": 84920 }, { "epoch": 0.6871915203495429, "grad_norm": 0.239186629652977, "learning_rate": 8.29875254492561e-06, "loss": 0.0216, "step": 84930 }, { "epoch": 0.6872724330447447, "grad_norm": 0.7064507007598877, "learning_rate": 8.298221891335461e-06, "loss": 0.0281, "step": 84940 }, { "epoch": 0.6873533457399466, "grad_norm": 0.6553625464439392, "learning_rate": 8.297691171969239e-06, "loss": 0.0318, "step": 84950 }, { "epoch": 0.6874342584351485, "grad_norm": 0.6846970319747925, "learning_rate": 8.297160386837531e-06, "loss": 0.0312, "step": 84960 }, { "epoch": 0.6875151711303503, "grad_norm": 0.5437291860580444, "learning_rate": 8.296629535950918e-06, "loss": 0.0336, "step": 84970 }, { "epoch": 0.6875960838255523, "grad_norm": 0.2925550639629364, "learning_rate": 8.296098619319989e-06, "loss": 0.0349, "step": 84980 }, { "epoch": 0.6876769965207541, "grad_norm": 0.10093463957309723, "learning_rate": 8.295567636955331e-06, "loss": 0.0376, "step": 84990 }, { "epoch": 0.687757909215956, "grad_norm": 0.6662626266479492, "learning_rate": 8.295036588867533e-06, "loss": 0.0485, "step": 85000 }, { "epoch": 0.6878388219111579, "grad_norm": 0.18678905069828033, "learning_rate": 8.294505475067187e-06, "loss": 0.0221, "step": 85010 }, { "epoch": 0.6879197346063597, "grad_norm": 0.34364086389541626, "learning_rate": 8.293974295564884e-06, "loss": 0.0345, "step": 85020 }, { "epoch": 0.6880006473015616, "grad_norm": 0.26397618651390076, "learning_rate": 8.29344305037122e-06, "loss": 0.0201, "step": 85030 }, { "epoch": 0.6880815599967635, "grad_norm": 0.2820509374141693, "learning_rate": 8.292911739496783e-06, "loss": 0.0431, "step": 85040 }, { "epoch": 0.6881624726919654, "grad_norm": 0.3660152554512024, "learning_rate": 8.292380362952175e-06, "loss": 0.0284, "step": 85050 }, { "epoch": 0.6882433853871672, "grad_norm": 0.4213293194770813, "learning_rate": 8.29184892074799e-06, "loss": 0.0265, "step": 85060 }, { "epoch": 0.6883242980823692, "grad_norm": 0.38506677746772766, "learning_rate": 8.29131741289483e-06, "loss": 0.027, "step": 85070 }, { "epoch": 0.688405210777571, "grad_norm": 0.24219094216823578, "learning_rate": 8.290785839403292e-06, "loss": 0.0244, "step": 85080 }, { "epoch": 0.6884861234727728, "grad_norm": 0.3456324636936188, "learning_rate": 8.290254200283978e-06, "loss": 0.0359, "step": 85090 }, { "epoch": 0.6885670361679748, "grad_norm": 0.13912253081798553, "learning_rate": 8.289722495547488e-06, "loss": 0.0372, "step": 85100 }, { "epoch": 0.6886479488631766, "grad_norm": 0.37337997555732727, "learning_rate": 8.289190725204428e-06, "loss": 0.027, "step": 85110 }, { "epoch": 0.6887288615583785, "grad_norm": 0.55557781457901, "learning_rate": 8.288658889265404e-06, "loss": 0.028, "step": 85120 }, { "epoch": 0.6888097742535804, "grad_norm": 0.5079485177993774, "learning_rate": 8.288126987741021e-06, "loss": 0.0223, "step": 85130 }, { "epoch": 0.6888906869487823, "grad_norm": 0.3921903371810913, "learning_rate": 8.287595020641886e-06, "loss": 0.0289, "step": 85140 }, { "epoch": 0.6889715996439841, "grad_norm": 0.1566552221775055, "learning_rate": 8.28706298797861e-06, "loss": 0.0309, "step": 85150 }, { "epoch": 0.689052512339186, "grad_norm": 0.2722131311893463, "learning_rate": 8.2865308897618e-06, "loss": 0.0254, "step": 85160 }, { "epoch": 0.6891334250343879, "grad_norm": 0.6154417395591736, "learning_rate": 8.28599872600207e-06, "loss": 0.0245, "step": 85170 }, { "epoch": 0.6892143377295897, "grad_norm": 0.638473629951477, "learning_rate": 8.285466496710035e-06, "loss": 0.0495, "step": 85180 }, { "epoch": 0.6892952504247917, "grad_norm": 0.5266616940498352, "learning_rate": 8.284934201896304e-06, "loss": 0.0335, "step": 85190 }, { "epoch": 0.6893761631199935, "grad_norm": 0.4983271658420563, "learning_rate": 8.284401841571497e-06, "loss": 0.0358, "step": 85200 }, { "epoch": 0.6894570758151954, "grad_norm": 0.49751052260398865, "learning_rate": 8.283869415746226e-06, "loss": 0.0308, "step": 85210 }, { "epoch": 0.6895379885103973, "grad_norm": 0.4648268222808838, "learning_rate": 8.283336924431116e-06, "loss": 0.0381, "step": 85220 }, { "epoch": 0.6896189012055992, "grad_norm": 0.6589237451553345, "learning_rate": 8.282804367636779e-06, "loss": 0.0151, "step": 85230 }, { "epoch": 0.689699813900801, "grad_norm": 0.500647783279419, "learning_rate": 8.282271745373841e-06, "loss": 0.0249, "step": 85240 }, { "epoch": 0.6897807265960029, "grad_norm": 0.3539651930332184, "learning_rate": 8.281739057652922e-06, "loss": 0.0291, "step": 85250 }, { "epoch": 0.6898616392912048, "grad_norm": 0.4287804067134857, "learning_rate": 8.281206304484644e-06, "loss": 0.0428, "step": 85260 }, { "epoch": 0.6899425519864066, "grad_norm": 0.40492019057273865, "learning_rate": 8.280673485879635e-06, "loss": 0.0352, "step": 85270 }, { "epoch": 0.6900234646816086, "grad_norm": 0.41177284717559814, "learning_rate": 8.28014060184852e-06, "loss": 0.027, "step": 85280 }, { "epoch": 0.6901043773768104, "grad_norm": 0.5537838339805603, "learning_rate": 8.279607652401924e-06, "loss": 0.0259, "step": 85290 }, { "epoch": 0.6901852900720123, "grad_norm": 0.28197526931762695, "learning_rate": 8.279074637550477e-06, "loss": 0.0305, "step": 85300 }, { "epoch": 0.6902662027672142, "grad_norm": 0.4310326874256134, "learning_rate": 8.278541557304809e-06, "loss": 0.0283, "step": 85310 }, { "epoch": 0.690347115462416, "grad_norm": 0.4543233811855316, "learning_rate": 8.27800841167555e-06, "loss": 0.0227, "step": 85320 }, { "epoch": 0.690428028157618, "grad_norm": 0.6346852779388428, "learning_rate": 8.277475200673335e-06, "loss": 0.03, "step": 85330 }, { "epoch": 0.6905089408528198, "grad_norm": 0.3952999413013458, "learning_rate": 8.276941924308794e-06, "loss": 0.0235, "step": 85340 }, { "epoch": 0.6905898535480217, "grad_norm": 0.3857957720756531, "learning_rate": 8.276408582592566e-06, "loss": 0.0346, "step": 85350 }, { "epoch": 0.6906707662432235, "grad_norm": 0.5329471230506897, "learning_rate": 8.275875175535286e-06, "loss": 0.0303, "step": 85360 }, { "epoch": 0.6907516789384255, "grad_norm": 0.15912289917469025, "learning_rate": 8.27534170314759e-06, "loss": 0.0337, "step": 85370 }, { "epoch": 0.6908325916336273, "grad_norm": 0.5515905618667603, "learning_rate": 8.27480816544012e-06, "loss": 0.0408, "step": 85380 }, { "epoch": 0.6909135043288291, "grad_norm": 0.38074591755867004, "learning_rate": 8.274274562423513e-06, "loss": 0.0309, "step": 85390 }, { "epoch": 0.6909944170240311, "grad_norm": 0.8744349479675293, "learning_rate": 8.273740894108412e-06, "loss": 0.0276, "step": 85400 }, { "epoch": 0.6910753297192329, "grad_norm": 1.4871736764907837, "learning_rate": 8.27320716050546e-06, "loss": 0.029, "step": 85410 }, { "epoch": 0.6911562424144349, "grad_norm": 0.7372016310691833, "learning_rate": 8.272673361625302e-06, "loss": 0.0494, "step": 85420 }, { "epoch": 0.6912371551096367, "grad_norm": 0.3484043776988983, "learning_rate": 8.272139497478582e-06, "loss": 0.016, "step": 85430 }, { "epoch": 0.6913180678048386, "grad_norm": 0.4061658978462219, "learning_rate": 8.271605568075949e-06, "loss": 0.0391, "step": 85440 }, { "epoch": 0.6913989805000404, "grad_norm": 0.3141274154186249, "learning_rate": 8.27107157342805e-06, "loss": 0.0288, "step": 85450 }, { "epoch": 0.6914798931952423, "grad_norm": 0.9308121800422668, "learning_rate": 8.270537513545531e-06, "loss": 0.0468, "step": 85460 }, { "epoch": 0.6915608058904442, "grad_norm": 0.35902974009513855, "learning_rate": 8.270003388439047e-06, "loss": 0.0309, "step": 85470 }, { "epoch": 0.691641718585646, "grad_norm": 0.32711026072502136, "learning_rate": 8.26946919811925e-06, "loss": 0.0428, "step": 85480 }, { "epoch": 0.691722631280848, "grad_norm": 0.08416464924812317, "learning_rate": 8.268934942596791e-06, "loss": 0.0213, "step": 85490 }, { "epoch": 0.6918035439760498, "grad_norm": 0.9080102443695068, "learning_rate": 8.268400621882324e-06, "loss": 0.0303, "step": 85500 }, { "epoch": 0.6918844566712518, "grad_norm": 0.6344558596611023, "learning_rate": 8.267866235986511e-06, "loss": 0.0363, "step": 85510 }, { "epoch": 0.6919653693664536, "grad_norm": 0.4966751039028168, "learning_rate": 8.26733178492e-06, "loss": 0.0362, "step": 85520 }, { "epoch": 0.6920462820616555, "grad_norm": 0.4728739261627197, "learning_rate": 8.266797268693456e-06, "loss": 0.0443, "step": 85530 }, { "epoch": 0.6921271947568574, "grad_norm": 0.6016910672187805, "learning_rate": 8.266262687317536e-06, "loss": 0.0394, "step": 85540 }, { "epoch": 0.6922081074520592, "grad_norm": 0.3346116840839386, "learning_rate": 8.265728040802905e-06, "loss": 0.0223, "step": 85550 }, { "epoch": 0.6922890201472611, "grad_norm": 0.3195447325706482, "learning_rate": 8.26519332916022e-06, "loss": 0.0292, "step": 85560 }, { "epoch": 0.692369932842463, "grad_norm": 0.8010608553886414, "learning_rate": 8.264658552400149e-06, "loss": 0.0405, "step": 85570 }, { "epoch": 0.6924508455376649, "grad_norm": 0.4801572561264038, "learning_rate": 8.264123710533353e-06, "loss": 0.0325, "step": 85580 }, { "epoch": 0.6925317582328667, "grad_norm": 0.5726161599159241, "learning_rate": 8.263588803570501e-06, "loss": 0.0482, "step": 85590 }, { "epoch": 0.6926126709280687, "grad_norm": 0.5856726169586182, "learning_rate": 8.263053831522262e-06, "loss": 0.0373, "step": 85600 }, { "epoch": 0.6926935836232705, "grad_norm": 0.25583064556121826, "learning_rate": 8.262518794399301e-06, "loss": 0.0322, "step": 85610 }, { "epoch": 0.6927744963184723, "grad_norm": 0.7423975467681885, "learning_rate": 8.26198369221229e-06, "loss": 0.0363, "step": 85620 }, { "epoch": 0.6928554090136743, "grad_norm": 0.5341752171516418, "learning_rate": 8.261448524971901e-06, "loss": 0.0313, "step": 85630 }, { "epoch": 0.6929363217088761, "grad_norm": 0.3698716461658478, "learning_rate": 8.260913292688806e-06, "loss": 0.0294, "step": 85640 }, { "epoch": 0.693017234404078, "grad_norm": 0.1947011649608612, "learning_rate": 8.26037799537368e-06, "loss": 0.0359, "step": 85650 }, { "epoch": 0.6930981470992799, "grad_norm": 0.6030468344688416, "learning_rate": 8.259842633037198e-06, "loss": 0.0338, "step": 85660 }, { "epoch": 0.6931790597944818, "grad_norm": 0.2884758412837982, "learning_rate": 8.259307205690037e-06, "loss": 0.0307, "step": 85670 }, { "epoch": 0.6932599724896836, "grad_norm": 0.739086925983429, "learning_rate": 8.258771713342874e-06, "loss": 0.0305, "step": 85680 }, { "epoch": 0.6933408851848855, "grad_norm": 0.3275351822376251, "learning_rate": 8.258236156006388e-06, "loss": 0.0288, "step": 85690 }, { "epoch": 0.6934217978800874, "grad_norm": 0.29519152641296387, "learning_rate": 8.257700533691263e-06, "loss": 0.037, "step": 85700 }, { "epoch": 0.6935027105752892, "grad_norm": 0.3837410509586334, "learning_rate": 8.257164846408175e-06, "loss": 0.0453, "step": 85710 }, { "epoch": 0.6935836232704912, "grad_norm": 0.6357271671295166, "learning_rate": 8.256629094167811e-06, "loss": 0.0543, "step": 85720 }, { "epoch": 0.693664535965693, "grad_norm": 0.5156044960021973, "learning_rate": 8.256093276980856e-06, "loss": 0.0318, "step": 85730 }, { "epoch": 0.6937454486608949, "grad_norm": 0.7663256525993347, "learning_rate": 8.255557394857993e-06, "loss": 0.0413, "step": 85740 }, { "epoch": 0.6938263613560968, "grad_norm": 0.23336221277713776, "learning_rate": 8.255021447809911e-06, "loss": 0.0324, "step": 85750 }, { "epoch": 0.6939072740512986, "grad_norm": 0.7605285048484802, "learning_rate": 8.254485435847297e-06, "loss": 0.0392, "step": 85760 }, { "epoch": 0.6939881867465005, "grad_norm": 0.40160050988197327, "learning_rate": 8.253949358980843e-06, "loss": 0.0434, "step": 85770 }, { "epoch": 0.6940690994417024, "grad_norm": 0.47666144371032715, "learning_rate": 8.253413217221237e-06, "loss": 0.0359, "step": 85780 }, { "epoch": 0.6941500121369043, "grad_norm": 0.9326159358024597, "learning_rate": 8.252877010579176e-06, "loss": 0.024, "step": 85790 }, { "epoch": 0.6942309248321061, "grad_norm": 0.47213149070739746, "learning_rate": 8.252340739065347e-06, "loss": 0.0316, "step": 85800 }, { "epoch": 0.6943118375273081, "grad_norm": 0.42491352558135986, "learning_rate": 8.251804402690448e-06, "loss": 0.0316, "step": 85810 }, { "epoch": 0.6943927502225099, "grad_norm": 0.45095163583755493, "learning_rate": 8.251268001465176e-06, "loss": 0.0385, "step": 85820 }, { "epoch": 0.6944736629177118, "grad_norm": 0.5627744793891907, "learning_rate": 8.250731535400228e-06, "loss": 0.0311, "step": 85830 }, { "epoch": 0.6945545756129137, "grad_norm": 0.07552635669708252, "learning_rate": 8.250195004506302e-06, "loss": 0.0379, "step": 85840 }, { "epoch": 0.6946354883081155, "grad_norm": 0.14608027040958405, "learning_rate": 8.249658408794097e-06, "loss": 0.0242, "step": 85850 }, { "epoch": 0.6947164010033174, "grad_norm": 0.3296145498752594, "learning_rate": 8.249121748274316e-06, "loss": 0.0366, "step": 85860 }, { "epoch": 0.6947973136985193, "grad_norm": 0.012523991987109184, "learning_rate": 8.24858502295766e-06, "loss": 0.0249, "step": 85870 }, { "epoch": 0.6948782263937212, "grad_norm": 0.33521729707717896, "learning_rate": 8.248048232854835e-06, "loss": 0.0258, "step": 85880 }, { "epoch": 0.694959139088923, "grad_norm": 0.26123857498168945, "learning_rate": 8.247511377976545e-06, "loss": 0.0387, "step": 85890 }, { "epoch": 0.695040051784125, "grad_norm": 0.32000380754470825, "learning_rate": 8.246974458333497e-06, "loss": 0.0298, "step": 85900 }, { "epoch": 0.6951209644793268, "grad_norm": 0.8449794054031372, "learning_rate": 8.246437473936395e-06, "loss": 0.0377, "step": 85910 }, { "epoch": 0.6952018771745286, "grad_norm": 0.5655612945556641, "learning_rate": 8.245900424795954e-06, "loss": 0.0496, "step": 85920 }, { "epoch": 0.6952827898697306, "grad_norm": 0.41581568121910095, "learning_rate": 8.24536331092288e-06, "loss": 0.0306, "step": 85930 }, { "epoch": 0.6953637025649324, "grad_norm": 0.7017926573753357, "learning_rate": 8.244826132327887e-06, "loss": 0.0181, "step": 85940 }, { "epoch": 0.6954446152601343, "grad_norm": 0.3067704439163208, "learning_rate": 8.244288889021687e-06, "loss": 0.0438, "step": 85950 }, { "epoch": 0.6955255279553362, "grad_norm": 0.29596588015556335, "learning_rate": 8.243751581014993e-06, "loss": 0.027, "step": 85960 }, { "epoch": 0.6956064406505381, "grad_norm": 0.34776344895362854, "learning_rate": 8.243214208318522e-06, "loss": 0.0247, "step": 85970 }, { "epoch": 0.69568735334574, "grad_norm": 0.2832941710948944, "learning_rate": 8.242676770942991e-06, "loss": 0.0424, "step": 85980 }, { "epoch": 0.6957682660409418, "grad_norm": 0.6114984750747681, "learning_rate": 8.242139268899117e-06, "loss": 0.0356, "step": 85990 }, { "epoch": 0.6958491787361437, "grad_norm": 0.406933069229126, "learning_rate": 8.241601702197618e-06, "loss": 0.0281, "step": 86000 }, { "epoch": 0.6959300914313455, "grad_norm": 0.6538891196250916, "learning_rate": 8.24106407084922e-06, "loss": 0.0356, "step": 86010 }, { "epoch": 0.6960110041265475, "grad_norm": 0.16889818012714386, "learning_rate": 8.240526374864639e-06, "loss": 0.0245, "step": 86020 }, { "epoch": 0.6960919168217493, "grad_norm": 0.38456159830093384, "learning_rate": 8.239988614254601e-06, "loss": 0.0241, "step": 86030 }, { "epoch": 0.6961728295169513, "grad_norm": 0.3077017068862915, "learning_rate": 8.23945078902983e-06, "loss": 0.0269, "step": 86040 }, { "epoch": 0.6962537422121531, "grad_norm": 0.6084603667259216, "learning_rate": 8.238912899201052e-06, "loss": 0.0321, "step": 86050 }, { "epoch": 0.6963346549073549, "grad_norm": 0.30584603548049927, "learning_rate": 8.238374944778994e-06, "loss": 0.0217, "step": 86060 }, { "epoch": 0.6964155676025569, "grad_norm": 0.34445077180862427, "learning_rate": 8.237836925774383e-06, "loss": 0.0279, "step": 86070 }, { "epoch": 0.6964964802977587, "grad_norm": 0.6365620493888855, "learning_rate": 8.237298842197949e-06, "loss": 0.0317, "step": 86080 }, { "epoch": 0.6965773929929606, "grad_norm": 0.3403204083442688, "learning_rate": 8.236760694060426e-06, "loss": 0.0265, "step": 86090 }, { "epoch": 0.6966583056881624, "grad_norm": 0.2539059519767761, "learning_rate": 8.236222481372545e-06, "loss": 0.0304, "step": 86100 }, { "epoch": 0.6967392183833644, "grad_norm": 0.20320065319538116, "learning_rate": 8.235684204145036e-06, "loss": 0.0282, "step": 86110 }, { "epoch": 0.6968201310785662, "grad_norm": 0.38074105978012085, "learning_rate": 8.235145862388638e-06, "loss": 0.024, "step": 86120 }, { "epoch": 0.6969010437737682, "grad_norm": 0.6743201017379761, "learning_rate": 8.234607456114084e-06, "loss": 0.0282, "step": 86130 }, { "epoch": 0.69698195646897, "grad_norm": 0.21069684624671936, "learning_rate": 8.234068985332115e-06, "loss": 0.0204, "step": 86140 }, { "epoch": 0.6970628691641718, "grad_norm": 0.23497314751148224, "learning_rate": 8.233530450053466e-06, "loss": 0.0319, "step": 86150 }, { "epoch": 0.6971437818593738, "grad_norm": 0.422484815120697, "learning_rate": 8.232991850288879e-06, "loss": 0.0272, "step": 86160 }, { "epoch": 0.6972246945545756, "grad_norm": 0.500820517539978, "learning_rate": 8.232453186049094e-06, "loss": 0.0278, "step": 86170 }, { "epoch": 0.6973056072497775, "grad_norm": 0.6290634274482727, "learning_rate": 8.231914457344854e-06, "loss": 0.0399, "step": 86180 }, { "epoch": 0.6973865199449794, "grad_norm": 0.6626874208450317, "learning_rate": 8.231375664186905e-06, "loss": 0.0374, "step": 86190 }, { "epoch": 0.6974674326401813, "grad_norm": 0.35390549898147583, "learning_rate": 8.230836806585988e-06, "loss": 0.0504, "step": 86200 }, { "epoch": 0.6975483453353831, "grad_norm": 0.37706923484802246, "learning_rate": 8.230297884552853e-06, "loss": 0.0345, "step": 86210 }, { "epoch": 0.697629258030585, "grad_norm": 0.5207346677780151, "learning_rate": 8.229758898098245e-06, "loss": 0.0369, "step": 86220 }, { "epoch": 0.6977101707257869, "grad_norm": 0.47993987798690796, "learning_rate": 8.229219847232914e-06, "loss": 0.022, "step": 86230 }, { "epoch": 0.6977910834209887, "grad_norm": 0.7706183195114136, "learning_rate": 8.22868073196761e-06, "loss": 0.059, "step": 86240 }, { "epoch": 0.6978719961161907, "grad_norm": 1.1995571851730347, "learning_rate": 8.228141552313087e-06, "loss": 0.0401, "step": 86250 }, { "epoch": 0.6979529088113925, "grad_norm": 0.7931298017501831, "learning_rate": 8.227602308280094e-06, "loss": 0.0308, "step": 86260 }, { "epoch": 0.6980338215065944, "grad_norm": 0.2844279408454895, "learning_rate": 8.227062999879388e-06, "loss": 0.0266, "step": 86270 }, { "epoch": 0.6981147342017963, "grad_norm": 0.8175466656684875, "learning_rate": 8.226523627121723e-06, "loss": 0.0299, "step": 86280 }, { "epoch": 0.6981956468969981, "grad_norm": 0.5766993165016174, "learning_rate": 8.225984190017854e-06, "loss": 0.0263, "step": 86290 }, { "epoch": 0.6982765595922, "grad_norm": 0.2974323630332947, "learning_rate": 8.22544468857854e-06, "loss": 0.0279, "step": 86300 }, { "epoch": 0.6983574722874019, "grad_norm": 0.42241939902305603, "learning_rate": 8.224905122814545e-06, "loss": 0.0212, "step": 86310 }, { "epoch": 0.6984383849826038, "grad_norm": 0.3375481069087982, "learning_rate": 8.224365492736622e-06, "loss": 0.0221, "step": 86320 }, { "epoch": 0.6985192976778056, "grad_norm": 0.2262619435787201, "learning_rate": 8.223825798355538e-06, "loss": 0.022, "step": 86330 }, { "epoch": 0.6986002103730076, "grad_norm": 0.35019734501838684, "learning_rate": 8.223286039682052e-06, "loss": 0.0345, "step": 86340 }, { "epoch": 0.6986811230682094, "grad_norm": 0.6228545308113098, "learning_rate": 8.222746216726933e-06, "loss": 0.0351, "step": 86350 }, { "epoch": 0.6987620357634112, "grad_norm": 0.5669817924499512, "learning_rate": 8.222206329500943e-06, "loss": 0.0327, "step": 86360 }, { "epoch": 0.6988429484586132, "grad_norm": 0.1741398572921753, "learning_rate": 8.22166637801485e-06, "loss": 0.0483, "step": 86370 }, { "epoch": 0.698923861153815, "grad_norm": 0.637244701385498, "learning_rate": 8.221126362279423e-06, "loss": 0.038, "step": 86380 }, { "epoch": 0.6990047738490169, "grad_norm": 0.5104164481163025, "learning_rate": 8.220586282305431e-06, "loss": 0.0299, "step": 86390 }, { "epoch": 0.6990856865442188, "grad_norm": 0.4742448627948761, "learning_rate": 8.220046138103643e-06, "loss": 0.0295, "step": 86400 }, { "epoch": 0.6991665992394207, "grad_norm": 0.3122400641441345, "learning_rate": 8.219505929684835e-06, "loss": 0.0287, "step": 86410 }, { "epoch": 0.6992475119346225, "grad_norm": 0.4026375710964203, "learning_rate": 8.218965657059776e-06, "loss": 0.0236, "step": 86420 }, { "epoch": 0.6993284246298245, "grad_norm": 0.22522883117198944, "learning_rate": 8.218425320239243e-06, "loss": 0.0265, "step": 86430 }, { "epoch": 0.6994093373250263, "grad_norm": 0.5786034464836121, "learning_rate": 8.217884919234011e-06, "loss": 0.0281, "step": 86440 }, { "epoch": 0.6994902500202281, "grad_norm": 0.3268335461616516, "learning_rate": 8.217344454054858e-06, "loss": 0.0275, "step": 86450 }, { "epoch": 0.6995711627154301, "grad_norm": 0.26485586166381836, "learning_rate": 8.216803924712562e-06, "loss": 0.0268, "step": 86460 }, { "epoch": 0.6996520754106319, "grad_norm": 0.3240514397621155, "learning_rate": 8.216263331217904e-06, "loss": 0.0291, "step": 86470 }, { "epoch": 0.6997329881058338, "grad_norm": 0.7473695874214172, "learning_rate": 8.21572267358166e-06, "loss": 0.0326, "step": 86480 }, { "epoch": 0.6998139008010357, "grad_norm": 0.32423460483551025, "learning_rate": 8.21518195181462e-06, "loss": 0.0388, "step": 86490 }, { "epoch": 0.6998948134962376, "grad_norm": 0.45283886790275574, "learning_rate": 8.214641165927562e-06, "loss": 0.0299, "step": 86500 }, { "epoch": 0.6999757261914394, "grad_norm": 0.4237019717693329, "learning_rate": 8.214100315931273e-06, "loss": 0.0281, "step": 86510 }, { "epoch": 0.7000566388866413, "grad_norm": 0.3708030879497528, "learning_rate": 8.213559401836537e-06, "loss": 0.0303, "step": 86520 }, { "epoch": 0.7001375515818432, "grad_norm": 0.47296032309532166, "learning_rate": 8.213018423654144e-06, "loss": 0.041, "step": 86530 }, { "epoch": 0.700218464277045, "grad_norm": 0.4965052306652069, "learning_rate": 8.21247738139488e-06, "loss": 0.0284, "step": 86540 }, { "epoch": 0.700299376972247, "grad_norm": 0.40745311975479126, "learning_rate": 8.211936275069539e-06, "loss": 0.0322, "step": 86550 }, { "epoch": 0.7003802896674488, "grad_norm": 0.5006166100502014, "learning_rate": 8.211395104688907e-06, "loss": 0.0328, "step": 86560 }, { "epoch": 0.7004612023626507, "grad_norm": 0.21765515208244324, "learning_rate": 8.21085387026378e-06, "loss": 0.044, "step": 86570 }, { "epoch": 0.7005421150578526, "grad_norm": 0.44079655408859253, "learning_rate": 8.21031257180495e-06, "loss": 0.0373, "step": 86580 }, { "epoch": 0.7006230277530544, "grad_norm": 0.6777271628379822, "learning_rate": 8.209771209323217e-06, "loss": 0.0395, "step": 86590 }, { "epoch": 0.7007039404482563, "grad_norm": 0.2626529932022095, "learning_rate": 8.20922978282937e-06, "loss": 0.0307, "step": 86600 }, { "epoch": 0.7007848531434582, "grad_norm": 0.6217843294143677, "learning_rate": 8.20868829233421e-06, "loss": 0.0237, "step": 86610 }, { "epoch": 0.7008657658386601, "grad_norm": 0.5021932125091553, "learning_rate": 8.208146737848536e-06, "loss": 0.051, "step": 86620 }, { "epoch": 0.7009466785338619, "grad_norm": 0.5358559489250183, "learning_rate": 8.20760511938315e-06, "loss": 0.03, "step": 86630 }, { "epoch": 0.7010275912290639, "grad_norm": 0.3162699341773987, "learning_rate": 8.20706343694885e-06, "loss": 0.0227, "step": 86640 }, { "epoch": 0.7011085039242657, "grad_norm": 0.3292078673839569, "learning_rate": 8.20652169055644e-06, "loss": 0.0456, "step": 86650 }, { "epoch": 0.7011894166194675, "grad_norm": 0.46832993626594543, "learning_rate": 8.205979880216723e-06, "loss": 0.0335, "step": 86660 }, { "epoch": 0.7012703293146695, "grad_norm": 0.4960334897041321, "learning_rate": 8.205438005940508e-06, "loss": 0.0262, "step": 86670 }, { "epoch": 0.7013512420098713, "grad_norm": 0.39590704441070557, "learning_rate": 8.204896067738598e-06, "loss": 0.0283, "step": 86680 }, { "epoch": 0.7014321547050733, "grad_norm": 0.45968863368034363, "learning_rate": 8.2043540656218e-06, "loss": 0.0364, "step": 86690 }, { "epoch": 0.7015130674002751, "grad_norm": 0.5850843191146851, "learning_rate": 8.203811999600928e-06, "loss": 0.0377, "step": 86700 }, { "epoch": 0.701593980095477, "grad_norm": 0.43797290325164795, "learning_rate": 8.203269869686788e-06, "loss": 0.0284, "step": 86710 }, { "epoch": 0.7016748927906789, "grad_norm": 0.5141178965568542, "learning_rate": 8.202727675890192e-06, "loss": 0.0288, "step": 86720 }, { "epoch": 0.7017558054858807, "grad_norm": 0.3686404526233673, "learning_rate": 8.202185418221953e-06, "loss": 0.0267, "step": 86730 }, { "epoch": 0.7018367181810826, "grad_norm": 0.3068883717060089, "learning_rate": 8.201643096692889e-06, "loss": 0.0257, "step": 86740 }, { "epoch": 0.7019176308762844, "grad_norm": 0.36550378799438477, "learning_rate": 8.20110071131381e-06, "loss": 0.034, "step": 86750 }, { "epoch": 0.7019985435714864, "grad_norm": 0.5341681838035583, "learning_rate": 8.200558262095535e-06, "loss": 0.049, "step": 86760 }, { "epoch": 0.7020794562666882, "grad_norm": 0.3697172701358795, "learning_rate": 8.200015749048883e-06, "loss": 0.0478, "step": 86770 }, { "epoch": 0.7021603689618902, "grad_norm": 0.33236339688301086, "learning_rate": 8.199473172184672e-06, "loss": 0.0329, "step": 86780 }, { "epoch": 0.702241281657092, "grad_norm": 0.3896322548389435, "learning_rate": 8.198930531513724e-06, "loss": 0.0257, "step": 86790 }, { "epoch": 0.7023221943522939, "grad_norm": 0.42681699991226196, "learning_rate": 8.198387827046858e-06, "loss": 0.0501, "step": 86800 }, { "epoch": 0.7024031070474958, "grad_norm": 0.7955186367034912, "learning_rate": 8.1978450587949e-06, "loss": 0.0232, "step": 86810 }, { "epoch": 0.7024840197426976, "grad_norm": 0.3109634518623352, "learning_rate": 8.197302226768673e-06, "loss": 0.0304, "step": 86820 }, { "epoch": 0.7025649324378995, "grad_norm": 0.17540165781974792, "learning_rate": 8.196759330979002e-06, "loss": 0.0328, "step": 86830 }, { "epoch": 0.7026458451331014, "grad_norm": 0.472280353307724, "learning_rate": 8.196216371436717e-06, "loss": 0.0347, "step": 86840 }, { "epoch": 0.7027267578283033, "grad_norm": 0.3844889998435974, "learning_rate": 8.195673348152642e-06, "loss": 0.0255, "step": 86850 }, { "epoch": 0.7028076705235051, "grad_norm": 0.22312864661216736, "learning_rate": 8.195130261137609e-06, "loss": 0.0191, "step": 86860 }, { "epoch": 0.7028885832187071, "grad_norm": 0.42379164695739746, "learning_rate": 8.19458711040245e-06, "loss": 0.0391, "step": 86870 }, { "epoch": 0.7029694959139089, "grad_norm": 0.6819059252738953, "learning_rate": 8.194043895957993e-06, "loss": 0.0359, "step": 86880 }, { "epoch": 0.7030504086091107, "grad_norm": 0.47345784306526184, "learning_rate": 8.193500617815073e-06, "loss": 0.0228, "step": 86890 }, { "epoch": 0.7031313213043127, "grad_norm": 0.44528716802597046, "learning_rate": 8.192957275984527e-06, "loss": 0.0277, "step": 86900 }, { "epoch": 0.7032122339995145, "grad_norm": 0.792113184928894, "learning_rate": 8.192413870477186e-06, "loss": 0.0357, "step": 86910 }, { "epoch": 0.7032931466947164, "grad_norm": 0.436385840177536, "learning_rate": 8.191870401303893e-06, "loss": 0.0317, "step": 86920 }, { "epoch": 0.7033740593899183, "grad_norm": 0.5976213812828064, "learning_rate": 8.191326868475481e-06, "loss": 0.0451, "step": 86930 }, { "epoch": 0.7034549720851202, "grad_norm": 0.49615994095802307, "learning_rate": 8.190783272002794e-06, "loss": 0.032, "step": 86940 }, { "epoch": 0.703535884780322, "grad_norm": 0.10783701390028, "learning_rate": 8.190239611896669e-06, "loss": 0.0194, "step": 86950 }, { "epoch": 0.7036167974755239, "grad_norm": 0.62788325548172, "learning_rate": 8.189695888167949e-06, "loss": 0.0265, "step": 86960 }, { "epoch": 0.7036977101707258, "grad_norm": 0.44101080298423767, "learning_rate": 8.18915210082748e-06, "loss": 0.0348, "step": 86970 }, { "epoch": 0.7037786228659276, "grad_norm": 0.6642136573791504, "learning_rate": 8.188608249886104e-06, "loss": 0.0266, "step": 86980 }, { "epoch": 0.7038595355611296, "grad_norm": 0.43157508969306946, "learning_rate": 8.188064335354669e-06, "loss": 0.029, "step": 86990 }, { "epoch": 0.7039404482563314, "grad_norm": 0.5914866328239441, "learning_rate": 8.187520357244018e-06, "loss": 0.0398, "step": 87000 }, { "epoch": 0.7040213609515333, "grad_norm": 0.5517289042472839, "learning_rate": 8.186976315565007e-06, "loss": 0.0291, "step": 87010 }, { "epoch": 0.7041022736467352, "grad_norm": 0.327352374792099, "learning_rate": 8.186432210328478e-06, "loss": 0.0207, "step": 87020 }, { "epoch": 0.704183186341937, "grad_norm": 0.2993875741958618, "learning_rate": 8.185888041545287e-06, "loss": 0.0478, "step": 87030 }, { "epoch": 0.7042640990371389, "grad_norm": 0.47687220573425293, "learning_rate": 8.185343809226284e-06, "loss": 0.0336, "step": 87040 }, { "epoch": 0.7043450117323408, "grad_norm": 0.5568689107894897, "learning_rate": 8.184799513382325e-06, "loss": 0.0464, "step": 87050 }, { "epoch": 0.7044259244275427, "grad_norm": 0.5849931240081787, "learning_rate": 8.18425515402426e-06, "loss": 0.0492, "step": 87060 }, { "epoch": 0.7045068371227445, "grad_norm": 0.14629271626472473, "learning_rate": 8.18371073116295e-06, "loss": 0.0246, "step": 87070 }, { "epoch": 0.7045877498179465, "grad_norm": 0.35488224029541016, "learning_rate": 8.18316624480925e-06, "loss": 0.0356, "step": 87080 }, { "epoch": 0.7046686625131483, "grad_norm": 0.7159964442253113, "learning_rate": 8.182621694974022e-06, "loss": 0.0389, "step": 87090 }, { "epoch": 0.7047495752083502, "grad_norm": 0.505026638507843, "learning_rate": 8.182077081668121e-06, "loss": 0.0383, "step": 87100 }, { "epoch": 0.7048304879035521, "grad_norm": 0.7360796928405762, "learning_rate": 8.18153240490241e-06, "loss": 0.0288, "step": 87110 }, { "epoch": 0.7049114005987539, "grad_norm": 0.29127538204193115, "learning_rate": 8.180987664687751e-06, "loss": 0.0319, "step": 87120 }, { "epoch": 0.7049923132939558, "grad_norm": 0.3911222815513611, "learning_rate": 8.180442861035009e-06, "loss": 0.0253, "step": 87130 }, { "epoch": 0.7050732259891577, "grad_norm": 0.4674360752105713, "learning_rate": 8.17989799395505e-06, "loss": 0.0258, "step": 87140 }, { "epoch": 0.7051541386843596, "grad_norm": 0.34875473380088806, "learning_rate": 8.179353063458736e-06, "loss": 0.0331, "step": 87150 }, { "epoch": 0.7052350513795614, "grad_norm": 0.5208537578582764, "learning_rate": 8.178808069556938e-06, "loss": 0.0319, "step": 87160 }, { "epoch": 0.7053159640747634, "grad_norm": 0.3527800738811493, "learning_rate": 8.178263012260526e-06, "loss": 0.0287, "step": 87170 }, { "epoch": 0.7053968767699652, "grad_norm": 0.6714617609977722, "learning_rate": 8.177717891580364e-06, "loss": 0.0204, "step": 87180 }, { "epoch": 0.705477789465167, "grad_norm": 0.33539602160453796, "learning_rate": 8.17717270752733e-06, "loss": 0.0404, "step": 87190 }, { "epoch": 0.705558702160369, "grad_norm": 0.38847285509109497, "learning_rate": 8.176627460112293e-06, "loss": 0.0277, "step": 87200 }, { "epoch": 0.7056396148555708, "grad_norm": 0.3374107778072357, "learning_rate": 8.176082149346127e-06, "loss": 0.0317, "step": 87210 }, { "epoch": 0.7057205275507727, "grad_norm": 0.7994316220283508, "learning_rate": 8.175536775239707e-06, "loss": 0.0362, "step": 87220 }, { "epoch": 0.7058014402459746, "grad_norm": 0.40908700227737427, "learning_rate": 8.17499133780391e-06, "loss": 0.0402, "step": 87230 }, { "epoch": 0.7058823529411765, "grad_norm": 0.3420335352420807, "learning_rate": 8.174445837049614e-06, "loss": 0.0399, "step": 87240 }, { "epoch": 0.7059632656363783, "grad_norm": 0.46214431524276733, "learning_rate": 8.173900272987699e-06, "loss": 0.0295, "step": 87250 }, { "epoch": 0.7060441783315802, "grad_norm": 0.7730722427368164, "learning_rate": 8.17335464562904e-06, "loss": 0.0453, "step": 87260 }, { "epoch": 0.7061250910267821, "grad_norm": 0.41415759921073914, "learning_rate": 8.172808954984524e-06, "loss": 0.0355, "step": 87270 }, { "epoch": 0.7062060037219839, "grad_norm": 0.36636391282081604, "learning_rate": 8.172263201065032e-06, "loss": 0.0336, "step": 87280 }, { "epoch": 0.7062869164171859, "grad_norm": 0.5962444543838501, "learning_rate": 8.171717383881445e-06, "loss": 0.0389, "step": 87290 }, { "epoch": 0.7063678291123877, "grad_norm": 0.5098491311073303, "learning_rate": 8.171171503444654e-06, "loss": 0.0433, "step": 87300 }, { "epoch": 0.7064487418075897, "grad_norm": 1.0910899639129639, "learning_rate": 8.17062555976554e-06, "loss": 0.041, "step": 87310 }, { "epoch": 0.7065296545027915, "grad_norm": 0.34184879064559937, "learning_rate": 8.170079552854992e-06, "loss": 0.0217, "step": 87320 }, { "epoch": 0.7066105671979933, "grad_norm": 0.38059574365615845, "learning_rate": 8.169533482723901e-06, "loss": 0.0312, "step": 87330 }, { "epoch": 0.7066914798931953, "grad_norm": 0.13066843152046204, "learning_rate": 8.168987349383154e-06, "loss": 0.0399, "step": 87340 }, { "epoch": 0.7067723925883971, "grad_norm": 0.588018000125885, "learning_rate": 8.168441152843646e-06, "loss": 0.0388, "step": 87350 }, { "epoch": 0.706853305283599, "grad_norm": 0.35387516021728516, "learning_rate": 8.167894893116268e-06, "loss": 0.039, "step": 87360 }, { "epoch": 0.7069342179788008, "grad_norm": 0.7084707617759705, "learning_rate": 8.167348570211914e-06, "loss": 0.0471, "step": 87370 }, { "epoch": 0.7070151306740028, "grad_norm": 0.44961315393447876, "learning_rate": 8.16680218414148e-06, "loss": 0.0222, "step": 87380 }, { "epoch": 0.7070960433692046, "grad_norm": 0.9980339407920837, "learning_rate": 8.16625573491586e-06, "loss": 0.0325, "step": 87390 }, { "epoch": 0.7071769560644066, "grad_norm": 0.40532925724983215, "learning_rate": 8.165709222545955e-06, "loss": 0.0308, "step": 87400 }, { "epoch": 0.7072578687596084, "grad_norm": 0.2616140842437744, "learning_rate": 8.165162647042663e-06, "loss": 0.0339, "step": 87410 }, { "epoch": 0.7073387814548102, "grad_norm": 0.4272383749485016, "learning_rate": 8.164616008416884e-06, "loss": 0.0221, "step": 87420 }, { "epoch": 0.7074196941500122, "grad_norm": 0.08735886216163635, "learning_rate": 8.164069306679518e-06, "loss": 0.0275, "step": 87430 }, { "epoch": 0.707500606845214, "grad_norm": 0.3593493700027466, "learning_rate": 8.163522541841472e-06, "loss": 0.0455, "step": 87440 }, { "epoch": 0.7075815195404159, "grad_norm": 0.6009312272071838, "learning_rate": 8.162975713913644e-06, "loss": 0.0318, "step": 87450 }, { "epoch": 0.7076624322356178, "grad_norm": 0.4583539664745331, "learning_rate": 8.162428822906943e-06, "loss": 0.0238, "step": 87460 }, { "epoch": 0.7077433449308197, "grad_norm": 0.1618037223815918, "learning_rate": 8.161881868832277e-06, "loss": 0.0328, "step": 87470 }, { "epoch": 0.7078242576260215, "grad_norm": 0.4012346565723419, "learning_rate": 8.161334851700552e-06, "loss": 0.027, "step": 87480 }, { "epoch": 0.7079051703212234, "grad_norm": 0.3916727900505066, "learning_rate": 8.160787771522678e-06, "loss": 0.0309, "step": 87490 }, { "epoch": 0.7079860830164253, "grad_norm": 0.5289092659950256, "learning_rate": 8.160240628309562e-06, "loss": 0.0272, "step": 87500 }, { "epoch": 0.7080669957116271, "grad_norm": 0.40412601828575134, "learning_rate": 8.15969342207212e-06, "loss": 0.036, "step": 87510 }, { "epoch": 0.7081479084068291, "grad_norm": 0.2816391885280609, "learning_rate": 8.159146152821263e-06, "loss": 0.027, "step": 87520 }, { "epoch": 0.7082288211020309, "grad_norm": 0.44455885887145996, "learning_rate": 8.158598820567904e-06, "loss": 0.0133, "step": 87530 }, { "epoch": 0.7083097337972328, "grad_norm": 0.4395332336425781, "learning_rate": 8.158051425322961e-06, "loss": 0.0254, "step": 87540 }, { "epoch": 0.7083906464924347, "grad_norm": 0.35342398285865784, "learning_rate": 8.157503967097351e-06, "loss": 0.043, "step": 87550 }, { "epoch": 0.7084715591876365, "grad_norm": 0.47402217984199524, "learning_rate": 8.156956445901987e-06, "loss": 0.0216, "step": 87560 }, { "epoch": 0.7085524718828384, "grad_norm": 0.41458800435066223, "learning_rate": 8.156408861747794e-06, "loss": 0.0418, "step": 87570 }, { "epoch": 0.7086333845780403, "grad_norm": 0.2888938784599304, "learning_rate": 8.155861214645687e-06, "loss": 0.0257, "step": 87580 }, { "epoch": 0.7087142972732422, "grad_norm": 0.5804877877235413, "learning_rate": 8.155313504606592e-06, "loss": 0.0339, "step": 87590 }, { "epoch": 0.708795209968444, "grad_norm": 0.5525797605514526, "learning_rate": 8.15476573164143e-06, "loss": 0.0313, "step": 87600 }, { "epoch": 0.708876122663646, "grad_norm": 0.3400651812553406, "learning_rate": 8.154217895761125e-06, "loss": 0.0235, "step": 87610 }, { "epoch": 0.7089570353588478, "grad_norm": 0.4779443144798279, "learning_rate": 8.153669996976603e-06, "loss": 0.0414, "step": 87620 }, { "epoch": 0.7090379480540496, "grad_norm": 0.3706055283546448, "learning_rate": 8.153122035298792e-06, "loss": 0.0404, "step": 87630 }, { "epoch": 0.7091188607492516, "grad_norm": 0.5595781803131104, "learning_rate": 8.152574010738617e-06, "loss": 0.0398, "step": 87640 }, { "epoch": 0.7091997734444534, "grad_norm": 0.27999576926231384, "learning_rate": 8.15202592330701e-06, "loss": 0.0339, "step": 87650 }, { "epoch": 0.7092806861396553, "grad_norm": 0.4608922302722931, "learning_rate": 8.151477773014897e-06, "loss": 0.0292, "step": 87660 }, { "epoch": 0.7093615988348572, "grad_norm": 0.21303312480449677, "learning_rate": 8.150929559873218e-06, "loss": 0.0296, "step": 87670 }, { "epoch": 0.7094425115300591, "grad_norm": 0.576484739780426, "learning_rate": 8.150381283892898e-06, "loss": 0.032, "step": 87680 }, { "epoch": 0.7095234242252609, "grad_norm": 0.46990683674812317, "learning_rate": 8.149832945084872e-06, "loss": 0.0282, "step": 87690 }, { "epoch": 0.7096043369204629, "grad_norm": 0.7030476927757263, "learning_rate": 8.14928454346008e-06, "loss": 0.0289, "step": 87700 }, { "epoch": 0.7096852496156647, "grad_norm": 0.2083064615726471, "learning_rate": 8.148736079029457e-06, "loss": 0.02, "step": 87710 }, { "epoch": 0.7097661623108665, "grad_norm": 0.31083378195762634, "learning_rate": 8.148187551803939e-06, "loss": 0.0361, "step": 87720 }, { "epoch": 0.7098470750060685, "grad_norm": 0.463068425655365, "learning_rate": 8.147638961794467e-06, "loss": 0.0294, "step": 87730 }, { "epoch": 0.7099279877012703, "grad_norm": 0.47115451097488403, "learning_rate": 8.14709030901198e-06, "loss": 0.0377, "step": 87740 }, { "epoch": 0.7100089003964722, "grad_norm": 0.38452041149139404, "learning_rate": 8.14654159346742e-06, "loss": 0.0336, "step": 87750 }, { "epoch": 0.7100898130916741, "grad_norm": 0.43147754669189453, "learning_rate": 8.145992815171733e-06, "loss": 0.0372, "step": 87760 }, { "epoch": 0.710170725786876, "grad_norm": 0.382885217666626, "learning_rate": 8.145443974135858e-06, "loss": 0.0322, "step": 87770 }, { "epoch": 0.7102516384820778, "grad_norm": 0.5676488876342773, "learning_rate": 8.144895070370744e-06, "loss": 0.0264, "step": 87780 }, { "epoch": 0.7103325511772797, "grad_norm": 0.16910651326179504, "learning_rate": 8.14434610388734e-06, "loss": 0.0484, "step": 87790 }, { "epoch": 0.7104134638724816, "grad_norm": 0.24732515215873718, "learning_rate": 8.143797074696586e-06, "loss": 0.0405, "step": 87800 }, { "epoch": 0.7104943765676834, "grad_norm": 0.3865797519683838, "learning_rate": 8.143247982809439e-06, "loss": 0.0288, "step": 87810 }, { "epoch": 0.7105752892628854, "grad_norm": 0.6121488213539124, "learning_rate": 8.142698828236846e-06, "loss": 0.0323, "step": 87820 }, { "epoch": 0.7106562019580872, "grad_norm": 0.1476585865020752, "learning_rate": 8.14214961098976e-06, "loss": 0.0411, "step": 87830 }, { "epoch": 0.7107371146532891, "grad_norm": 0.34932824969291687, "learning_rate": 8.141600331079133e-06, "loss": 0.0243, "step": 87840 }, { "epoch": 0.710818027348491, "grad_norm": 0.272455096244812, "learning_rate": 8.141050988515919e-06, "loss": 0.0405, "step": 87850 }, { "epoch": 0.7108989400436928, "grad_norm": 0.6759887933731079, "learning_rate": 8.140501583311076e-06, "loss": 0.028, "step": 87860 }, { "epoch": 0.7109798527388947, "grad_norm": 0.24424384534358978, "learning_rate": 8.139952115475558e-06, "loss": 0.0227, "step": 87870 }, { "epoch": 0.7110607654340966, "grad_norm": 0.6308697462081909, "learning_rate": 8.139402585020324e-06, "loss": 0.0474, "step": 87880 }, { "epoch": 0.7111416781292985, "grad_norm": 0.40526148676872253, "learning_rate": 8.138852991956332e-06, "loss": 0.0645, "step": 87890 }, { "epoch": 0.7112225908245003, "grad_norm": 0.3921765387058258, "learning_rate": 8.138303336294545e-06, "loss": 0.0284, "step": 87900 }, { "epoch": 0.7113035035197023, "grad_norm": 0.3890771269798279, "learning_rate": 8.13775361804592e-06, "loss": 0.0343, "step": 87910 }, { "epoch": 0.7113844162149041, "grad_norm": 0.6654787659645081, "learning_rate": 8.137203837221426e-06, "loss": 0.0312, "step": 87920 }, { "epoch": 0.7114653289101059, "grad_norm": 0.4840508699417114, "learning_rate": 8.136653993832025e-06, "loss": 0.0284, "step": 87930 }, { "epoch": 0.7115462416053079, "grad_norm": 0.3882564902305603, "learning_rate": 8.13610408788868e-06, "loss": 0.0436, "step": 87940 }, { "epoch": 0.7116271543005097, "grad_norm": 0.25807833671569824, "learning_rate": 8.13555411940236e-06, "loss": 0.0272, "step": 87950 }, { "epoch": 0.7117080669957117, "grad_norm": 0.6676579713821411, "learning_rate": 8.135004088384033e-06, "loss": 0.0338, "step": 87960 }, { "epoch": 0.7117889796909135, "grad_norm": 0.6971102356910706, "learning_rate": 8.134453994844667e-06, "loss": 0.041, "step": 87970 }, { "epoch": 0.7118698923861154, "grad_norm": 0.4955061674118042, "learning_rate": 8.133903838795233e-06, "loss": 0.0335, "step": 87980 }, { "epoch": 0.7119508050813173, "grad_norm": 0.20213434100151062, "learning_rate": 8.133353620246705e-06, "loss": 0.027, "step": 87990 }, { "epoch": 0.7120317177765192, "grad_norm": 0.22197088599205017, "learning_rate": 8.132803339210052e-06, "loss": 0.0279, "step": 88000 }, { "epoch": 0.712112630471721, "grad_norm": 0.5976048111915588, "learning_rate": 8.13225299569625e-06, "loss": 0.044, "step": 88010 }, { "epoch": 0.7121935431669228, "grad_norm": 0.4609759449958801, "learning_rate": 8.131702589716275e-06, "loss": 0.034, "step": 88020 }, { "epoch": 0.7122744558621248, "grad_norm": 0.898137092590332, "learning_rate": 8.131152121281103e-06, "loss": 0.0291, "step": 88030 }, { "epoch": 0.7123553685573266, "grad_norm": 0.18146400153636932, "learning_rate": 8.130601590401713e-06, "loss": 0.0214, "step": 88040 }, { "epoch": 0.7124362812525286, "grad_norm": 0.40209317207336426, "learning_rate": 8.130050997089081e-06, "loss": 0.0235, "step": 88050 }, { "epoch": 0.7125171939477304, "grad_norm": 0.4446929395198822, "learning_rate": 8.129500341354192e-06, "loss": 0.0275, "step": 88060 }, { "epoch": 0.7125981066429323, "grad_norm": 0.5263818502426147, "learning_rate": 8.128949623208025e-06, "loss": 0.0348, "step": 88070 }, { "epoch": 0.7126790193381342, "grad_norm": 0.5182607173919678, "learning_rate": 8.128398842661562e-06, "loss": 0.0461, "step": 88080 }, { "epoch": 0.712759932033336, "grad_norm": 0.27325260639190674, "learning_rate": 8.12784799972579e-06, "loss": 0.0319, "step": 88090 }, { "epoch": 0.7128408447285379, "grad_norm": 0.3262835144996643, "learning_rate": 8.127297094411691e-06, "loss": 0.0275, "step": 88100 }, { "epoch": 0.7129217574237398, "grad_norm": 0.7108272910118103, "learning_rate": 8.126746126730255e-06, "loss": 0.0375, "step": 88110 }, { "epoch": 0.7130026701189417, "grad_norm": 0.5844945311546326, "learning_rate": 8.126195096692467e-06, "loss": 0.0573, "step": 88120 }, { "epoch": 0.7130835828141435, "grad_norm": 0.7255006432533264, "learning_rate": 8.125644004309318e-06, "loss": 0.0335, "step": 88130 }, { "epoch": 0.7131644955093455, "grad_norm": 0.571882963180542, "learning_rate": 8.125092849591801e-06, "loss": 0.0212, "step": 88140 }, { "epoch": 0.7132454082045473, "grad_norm": 0.5733510255813599, "learning_rate": 8.1245416325509e-06, "loss": 0.025, "step": 88150 }, { "epoch": 0.7133263208997491, "grad_norm": 1.008581519126892, "learning_rate": 8.123990353197614e-06, "loss": 0.0263, "step": 88160 }, { "epoch": 0.7134072335949511, "grad_norm": 0.23485161364078522, "learning_rate": 8.123439011542936e-06, "loss": 0.0206, "step": 88170 }, { "epoch": 0.7134881462901529, "grad_norm": 0.45146358013153076, "learning_rate": 8.12288760759786e-06, "loss": 0.0264, "step": 88180 }, { "epoch": 0.7135690589853548, "grad_norm": 0.3279685974121094, "learning_rate": 8.122336141373384e-06, "loss": 0.0502, "step": 88190 }, { "epoch": 0.7136499716805567, "grad_norm": 0.3411312401294708, "learning_rate": 8.121784612880503e-06, "loss": 0.0378, "step": 88200 }, { "epoch": 0.7137308843757586, "grad_norm": 0.5663458108901978, "learning_rate": 8.121233022130221e-06, "loss": 0.03, "step": 88210 }, { "epoch": 0.7138117970709604, "grad_norm": 0.3097221255302429, "learning_rate": 8.120681369133534e-06, "loss": 0.033, "step": 88220 }, { "epoch": 0.7138927097661623, "grad_norm": 0.7518528699874878, "learning_rate": 8.120129653901447e-06, "loss": 0.0262, "step": 88230 }, { "epoch": 0.7139736224613642, "grad_norm": 0.3133125603199005, "learning_rate": 8.11957787644496e-06, "loss": 0.0362, "step": 88240 }, { "epoch": 0.714054535156566, "grad_norm": 0.20058247447013855, "learning_rate": 8.119026036775078e-06, "loss": 0.0375, "step": 88250 }, { "epoch": 0.714135447851768, "grad_norm": 0.36305564641952515, "learning_rate": 8.118474134902805e-06, "loss": 0.0342, "step": 88260 }, { "epoch": 0.7142163605469698, "grad_norm": 0.6938139796257019, "learning_rate": 8.11792217083915e-06, "loss": 0.0253, "step": 88270 }, { "epoch": 0.7142972732421717, "grad_norm": 0.5985135436058044, "learning_rate": 8.11737014459512e-06, "loss": 0.0514, "step": 88280 }, { "epoch": 0.7143781859373736, "grad_norm": 0.4671245515346527, "learning_rate": 8.116818056181723e-06, "loss": 0.025, "step": 88290 }, { "epoch": 0.7144590986325754, "grad_norm": 0.33723053336143494, "learning_rate": 8.11626590560997e-06, "loss": 0.0314, "step": 88300 }, { "epoch": 0.7145400113277773, "grad_norm": 0.7331520318984985, "learning_rate": 8.11571369289087e-06, "loss": 0.0398, "step": 88310 }, { "epoch": 0.7146209240229792, "grad_norm": 0.6405826210975647, "learning_rate": 8.115161418035442e-06, "loss": 0.0402, "step": 88320 }, { "epoch": 0.7147018367181811, "grad_norm": 0.2619631290435791, "learning_rate": 8.114609081054692e-06, "loss": 0.0379, "step": 88330 }, { "epoch": 0.7147827494133829, "grad_norm": 0.6011856198310852, "learning_rate": 8.114056681959642e-06, "loss": 0.0319, "step": 88340 }, { "epoch": 0.7148636621085849, "grad_norm": 0.3631376624107361, "learning_rate": 8.113504220761303e-06, "loss": 0.0282, "step": 88350 }, { "epoch": 0.7149445748037867, "grad_norm": 0.2872519791126251, "learning_rate": 8.112951697470698e-06, "loss": 0.0325, "step": 88360 }, { "epoch": 0.7150254874989886, "grad_norm": 0.3537043035030365, "learning_rate": 8.112399112098842e-06, "loss": 0.021, "step": 88370 }, { "epoch": 0.7151064001941905, "grad_norm": 0.43612560629844666, "learning_rate": 8.111846464656755e-06, "loss": 0.0349, "step": 88380 }, { "epoch": 0.7151873128893923, "grad_norm": 0.5206050872802734, "learning_rate": 8.11129375515546e-06, "loss": 0.0384, "step": 88390 }, { "epoch": 0.7152682255845942, "grad_norm": 0.4137950539588928, "learning_rate": 8.11074098360598e-06, "loss": 0.0229, "step": 88400 }, { "epoch": 0.7153491382797961, "grad_norm": 0.30851778388023376, "learning_rate": 8.110188150019338e-06, "loss": 0.0307, "step": 88410 }, { "epoch": 0.715430050974998, "grad_norm": 0.795272171497345, "learning_rate": 8.10963525440656e-06, "loss": 0.0466, "step": 88420 }, { "epoch": 0.7155109636701998, "grad_norm": 0.3708254396915436, "learning_rate": 8.10908229677867e-06, "loss": 0.03, "step": 88430 }, { "epoch": 0.7155918763654018, "grad_norm": 0.36155709624290466, "learning_rate": 8.1085292771467e-06, "loss": 0.0237, "step": 88440 }, { "epoch": 0.7156727890606036, "grad_norm": 0.44302859902381897, "learning_rate": 8.107976195521673e-06, "loss": 0.021, "step": 88450 }, { "epoch": 0.7157537017558054, "grad_norm": 0.7440815567970276, "learning_rate": 8.107423051914623e-06, "loss": 0.0358, "step": 88460 }, { "epoch": 0.7158346144510074, "grad_norm": 0.5593496561050415, "learning_rate": 8.10686984633658e-06, "loss": 0.0358, "step": 88470 }, { "epoch": 0.7159155271462092, "grad_norm": 0.7670901417732239, "learning_rate": 8.106316578798576e-06, "loss": 0.0298, "step": 88480 }, { "epoch": 0.7159964398414111, "grad_norm": 0.638826847076416, "learning_rate": 8.10576324931165e-06, "loss": 0.0434, "step": 88490 }, { "epoch": 0.716077352536613, "grad_norm": 0.7707511782646179, "learning_rate": 8.105209857886827e-06, "loss": 0.0352, "step": 88500 }, { "epoch": 0.7161582652318149, "grad_norm": 0.18123655021190643, "learning_rate": 8.104656404535152e-06, "loss": 0.0392, "step": 88510 }, { "epoch": 0.7162391779270167, "grad_norm": 0.4349595606327057, "learning_rate": 8.104102889267658e-06, "loss": 0.03, "step": 88520 }, { "epoch": 0.7163200906222186, "grad_norm": 0.35723984241485596, "learning_rate": 8.103549312095385e-06, "loss": 0.0396, "step": 88530 }, { "epoch": 0.7164010033174205, "grad_norm": 0.6147787570953369, "learning_rate": 8.102995673029373e-06, "loss": 0.0267, "step": 88540 }, { "epoch": 0.7164819160126223, "grad_norm": 0.5417022705078125, "learning_rate": 8.102441972080663e-06, "loss": 0.0424, "step": 88550 }, { "epoch": 0.7165628287078243, "grad_norm": 0.3915901482105255, "learning_rate": 8.101888209260298e-06, "loss": 0.0251, "step": 88560 }, { "epoch": 0.7166437414030261, "grad_norm": 0.39305049180984497, "learning_rate": 8.10133438457932e-06, "loss": 0.0461, "step": 88570 }, { "epoch": 0.716724654098228, "grad_norm": 1.172351360321045, "learning_rate": 8.100780498048775e-06, "loss": 0.03, "step": 88580 }, { "epoch": 0.7168055667934299, "grad_norm": 0.0986880287528038, "learning_rate": 8.100226549679709e-06, "loss": 0.0472, "step": 88590 }, { "epoch": 0.7168864794886317, "grad_norm": 0.3760603964328766, "learning_rate": 8.09967253948317e-06, "loss": 0.0223, "step": 88600 }, { "epoch": 0.7169673921838337, "grad_norm": 0.1594771444797516, "learning_rate": 8.099118467470205e-06, "loss": 0.0204, "step": 88610 }, { "epoch": 0.7170483048790355, "grad_norm": 0.6054666042327881, "learning_rate": 8.098564333651865e-06, "loss": 0.0419, "step": 88620 }, { "epoch": 0.7171292175742374, "grad_norm": 0.3117702007293701, "learning_rate": 8.098010138039203e-06, "loss": 0.0356, "step": 88630 }, { "epoch": 0.7172101302694392, "grad_norm": 0.3938460350036621, "learning_rate": 8.097455880643266e-06, "loss": 0.0341, "step": 88640 }, { "epoch": 0.7172910429646412, "grad_norm": 0.36981201171875, "learning_rate": 8.096901561475113e-06, "loss": 0.0235, "step": 88650 }, { "epoch": 0.717371955659843, "grad_norm": 0.37000709772109985, "learning_rate": 8.096347180545792e-06, "loss": 0.0271, "step": 88660 }, { "epoch": 0.717452868355045, "grad_norm": 0.5485566258430481, "learning_rate": 8.095792737866366e-06, "loss": 0.026, "step": 88670 }, { "epoch": 0.7175337810502468, "grad_norm": 0.1198958158493042, "learning_rate": 8.09523823344789e-06, "loss": 0.0308, "step": 88680 }, { "epoch": 0.7176146937454486, "grad_norm": 0.30173224210739136, "learning_rate": 8.094683667301422e-06, "loss": 0.0237, "step": 88690 }, { "epoch": 0.7176956064406506, "grad_norm": 0.4587364196777344, "learning_rate": 8.094129039438019e-06, "loss": 0.0321, "step": 88700 }, { "epoch": 0.7177765191358524, "grad_norm": 0.5378226637840271, "learning_rate": 8.093574349868747e-06, "loss": 0.0374, "step": 88710 }, { "epoch": 0.7178574318310543, "grad_norm": 0.5441059470176697, "learning_rate": 8.093019598604663e-06, "loss": 0.0301, "step": 88720 }, { "epoch": 0.7179383445262562, "grad_norm": 0.7669762969017029, "learning_rate": 8.092464785656836e-06, "loss": 0.0314, "step": 88730 }, { "epoch": 0.7180192572214581, "grad_norm": 0.5904459357261658, "learning_rate": 8.091909911036323e-06, "loss": 0.0262, "step": 88740 }, { "epoch": 0.7181001699166599, "grad_norm": 0.19511482119560242, "learning_rate": 8.091354974754197e-06, "loss": 0.0288, "step": 88750 }, { "epoch": 0.7181810826118618, "grad_norm": 0.3626211881637573, "learning_rate": 8.090799976821522e-06, "loss": 0.0293, "step": 88760 }, { "epoch": 0.7182619953070637, "grad_norm": 0.7717394232749939, "learning_rate": 8.090244917249366e-06, "loss": 0.0507, "step": 88770 }, { "epoch": 0.7183429080022655, "grad_norm": 0.40340378880500793, "learning_rate": 8.089689796048799e-06, "loss": 0.0289, "step": 88780 }, { "epoch": 0.7184238206974675, "grad_norm": 0.44807931780815125, "learning_rate": 8.089134613230891e-06, "loss": 0.0307, "step": 88790 }, { "epoch": 0.7185047333926693, "grad_norm": 0.7676618099212646, "learning_rate": 8.088579368806716e-06, "loss": 0.0222, "step": 88800 }, { "epoch": 0.7185856460878712, "grad_norm": 0.5138195157051086, "learning_rate": 8.088024062787344e-06, "loss": 0.0386, "step": 88810 }, { "epoch": 0.7186665587830731, "grad_norm": 0.3166801333427429, "learning_rate": 8.087468695183852e-06, "loss": 0.0326, "step": 88820 }, { "epoch": 0.7187474714782749, "grad_norm": 0.8128508925437927, "learning_rate": 8.086913266007315e-06, "loss": 0.0297, "step": 88830 }, { "epoch": 0.7188283841734768, "grad_norm": 0.40760061144828796, "learning_rate": 8.08635777526881e-06, "loss": 0.0303, "step": 88840 }, { "epoch": 0.7189092968686787, "grad_norm": 0.6693220138549805, "learning_rate": 8.085802222979413e-06, "loss": 0.0376, "step": 88850 }, { "epoch": 0.7189902095638806, "grad_norm": 0.4565075635910034, "learning_rate": 8.085246609150208e-06, "loss": 0.0244, "step": 88860 }, { "epoch": 0.7190711222590824, "grad_norm": 0.39295026659965515, "learning_rate": 8.08469093379227e-06, "loss": 0.0221, "step": 88870 }, { "epoch": 0.7191520349542844, "grad_norm": 0.48869937658309937, "learning_rate": 8.084135196916685e-06, "loss": 0.029, "step": 88880 }, { "epoch": 0.7192329476494862, "grad_norm": 0.5609086155891418, "learning_rate": 8.083579398534534e-06, "loss": 0.0381, "step": 88890 }, { "epoch": 0.719313860344688, "grad_norm": 0.5692017674446106, "learning_rate": 8.083023538656903e-06, "loss": 0.0422, "step": 88900 }, { "epoch": 0.71939477303989, "grad_norm": 0.449503630399704, "learning_rate": 8.082467617294873e-06, "loss": 0.0422, "step": 88910 }, { "epoch": 0.7194756857350918, "grad_norm": 0.20481322705745697, "learning_rate": 8.081911634459534e-06, "loss": 0.0454, "step": 88920 }, { "epoch": 0.7195565984302937, "grad_norm": 0.06141030788421631, "learning_rate": 8.081355590161975e-06, "loss": 0.0192, "step": 88930 }, { "epoch": 0.7196375111254956, "grad_norm": 0.4593997001647949, "learning_rate": 8.080799484413283e-06, "loss": 0.0261, "step": 88940 }, { "epoch": 0.7197184238206975, "grad_norm": 0.43555277585983276, "learning_rate": 8.080243317224552e-06, "loss": 0.0257, "step": 88950 }, { "epoch": 0.7197993365158993, "grad_norm": 0.48321834206581116, "learning_rate": 8.079687088606867e-06, "loss": 0.0207, "step": 88960 }, { "epoch": 0.7198802492111013, "grad_norm": 0.31667768955230713, "learning_rate": 8.079130798571327e-06, "loss": 0.0227, "step": 88970 }, { "epoch": 0.7199611619063031, "grad_norm": 0.20896032452583313, "learning_rate": 8.078574447129023e-06, "loss": 0.0341, "step": 88980 }, { "epoch": 0.7200420746015049, "grad_norm": 0.43622854351997375, "learning_rate": 8.07801803429105e-06, "loss": 0.0358, "step": 88990 }, { "epoch": 0.7201229872967069, "grad_norm": 0.5560238361358643, "learning_rate": 8.077461560068505e-06, "loss": 0.0418, "step": 89000 }, { "epoch": 0.7202038999919087, "grad_norm": 0.19639694690704346, "learning_rate": 8.076905024472487e-06, "loss": 0.0248, "step": 89010 }, { "epoch": 0.7202848126871106, "grad_norm": 1.0754789113998413, "learning_rate": 8.076348427514094e-06, "loss": 0.0301, "step": 89020 }, { "epoch": 0.7203657253823125, "grad_norm": 0.2705840766429901, "learning_rate": 8.075791769204426e-06, "loss": 0.0405, "step": 89030 }, { "epoch": 0.7204466380775144, "grad_norm": 0.12635649740695953, "learning_rate": 8.075235049554584e-06, "loss": 0.0396, "step": 89040 }, { "epoch": 0.7205275507727162, "grad_norm": 0.08140513300895691, "learning_rate": 8.074678268575671e-06, "loss": 0.0286, "step": 89050 }, { "epoch": 0.7206084634679181, "grad_norm": 0.3571104109287262, "learning_rate": 8.07412142627879e-06, "loss": 0.0277, "step": 89060 }, { "epoch": 0.72068937616312, "grad_norm": 0.6127689480781555, "learning_rate": 8.07356452267505e-06, "loss": 0.0361, "step": 89070 }, { "epoch": 0.7207702888583218, "grad_norm": 0.23614735901355743, "learning_rate": 8.073007557775553e-06, "loss": 0.0301, "step": 89080 }, { "epoch": 0.7208512015535238, "grad_norm": 0.7256695628166199, "learning_rate": 8.072450531591406e-06, "loss": 0.0462, "step": 89090 }, { "epoch": 0.7209321142487256, "grad_norm": 0.6121082305908203, "learning_rate": 8.07189344413372e-06, "loss": 0.0293, "step": 89100 }, { "epoch": 0.7210130269439275, "grad_norm": 0.36515918374061584, "learning_rate": 8.071336295413605e-06, "loss": 0.032, "step": 89110 }, { "epoch": 0.7210939396391294, "grad_norm": 0.7978013157844543, "learning_rate": 8.070779085442172e-06, "loss": 0.0247, "step": 89120 }, { "epoch": 0.7211748523343312, "grad_norm": 0.7564932107925415, "learning_rate": 8.07022181423053e-06, "loss": 0.032, "step": 89130 }, { "epoch": 0.7212557650295331, "grad_norm": 0.6221368908882141, "learning_rate": 8.0696644817898e-06, "loss": 0.0304, "step": 89140 }, { "epoch": 0.721336677724735, "grad_norm": 0.47490769624710083, "learning_rate": 8.069107088131088e-06, "loss": 0.0341, "step": 89150 }, { "epoch": 0.7214175904199369, "grad_norm": 0.7110568881034851, "learning_rate": 8.068549633265514e-06, "loss": 0.0347, "step": 89160 }, { "epoch": 0.7214985031151387, "grad_norm": 0.21222946047782898, "learning_rate": 8.067992117204199e-06, "loss": 0.0227, "step": 89170 }, { "epoch": 0.7215794158103407, "grad_norm": 0.4429085850715637, "learning_rate": 8.067434539958255e-06, "loss": 0.0311, "step": 89180 }, { "epoch": 0.7216603285055425, "grad_norm": 0.40265953540802, "learning_rate": 8.066876901538808e-06, "loss": 0.0299, "step": 89190 }, { "epoch": 0.7217412412007443, "grad_norm": 0.17130544781684875, "learning_rate": 8.066319201956973e-06, "loss": 0.0325, "step": 89200 }, { "epoch": 0.7218221538959463, "grad_norm": 0.42628753185272217, "learning_rate": 8.065761441223877e-06, "loss": 0.0229, "step": 89210 }, { "epoch": 0.7219030665911481, "grad_norm": 0.8006729483604431, "learning_rate": 8.065203619350639e-06, "loss": 0.0295, "step": 89220 }, { "epoch": 0.72198397928635, "grad_norm": 0.21447816491127014, "learning_rate": 8.064645736348386e-06, "loss": 0.0384, "step": 89230 }, { "epoch": 0.7220648919815519, "grad_norm": 0.4626493752002716, "learning_rate": 8.064087792228244e-06, "loss": 0.0305, "step": 89240 }, { "epoch": 0.7221458046767538, "grad_norm": 0.38401687145233154, "learning_rate": 8.063529787001341e-06, "loss": 0.0391, "step": 89250 }, { "epoch": 0.7222267173719557, "grad_norm": 0.2709329128265381, "learning_rate": 8.0629717206788e-06, "loss": 0.0364, "step": 89260 }, { "epoch": 0.7223076300671576, "grad_norm": 0.6015486717224121, "learning_rate": 8.06241359327176e-06, "loss": 0.0431, "step": 89270 }, { "epoch": 0.7223885427623594, "grad_norm": 0.17163752019405365, "learning_rate": 8.061855404791342e-06, "loss": 0.0259, "step": 89280 }, { "epoch": 0.7224694554575612, "grad_norm": 0.4427795708179474, "learning_rate": 8.061297155248681e-06, "loss": 0.0329, "step": 89290 }, { "epoch": 0.7225503681527632, "grad_norm": 0.2810063362121582, "learning_rate": 8.060738844654912e-06, "loss": 0.0316, "step": 89300 }, { "epoch": 0.722631280847965, "grad_norm": 0.3871922194957733, "learning_rate": 8.060180473021168e-06, "loss": 0.0245, "step": 89310 }, { "epoch": 0.722712193543167, "grad_norm": 0.29924628138542175, "learning_rate": 8.059622040358587e-06, "loss": 0.0257, "step": 89320 }, { "epoch": 0.7227931062383688, "grad_norm": 0.4663331210613251, "learning_rate": 8.059063546678302e-06, "loss": 0.0358, "step": 89330 }, { "epoch": 0.7228740189335707, "grad_norm": 0.4247235357761383, "learning_rate": 8.058504991991452e-06, "loss": 0.0371, "step": 89340 }, { "epoch": 0.7229549316287726, "grad_norm": 0.3888554871082306, "learning_rate": 8.057946376309176e-06, "loss": 0.0277, "step": 89350 }, { "epoch": 0.7230358443239744, "grad_norm": 0.4494995176792145, "learning_rate": 8.057387699642617e-06, "loss": 0.0361, "step": 89360 }, { "epoch": 0.7231167570191763, "grad_norm": 0.42794132232666016, "learning_rate": 8.056828962002913e-06, "loss": 0.0346, "step": 89370 }, { "epoch": 0.7231976697143782, "grad_norm": 0.5673097968101501, "learning_rate": 8.056270163401211e-06, "loss": 0.0306, "step": 89380 }, { "epoch": 0.7232785824095801, "grad_norm": 0.6154161691665649, "learning_rate": 8.055711303848651e-06, "loss": 0.0347, "step": 89390 }, { "epoch": 0.7233594951047819, "grad_norm": 0.7486985921859741, "learning_rate": 8.05515238335638e-06, "loss": 0.03, "step": 89400 }, { "epoch": 0.7234404077999839, "grad_norm": 0.23471365869045258, "learning_rate": 8.054593401935546e-06, "loss": 0.0187, "step": 89410 }, { "epoch": 0.7235213204951857, "grad_norm": 0.3150480091571808, "learning_rate": 8.054034359597293e-06, "loss": 0.0215, "step": 89420 }, { "epoch": 0.7236022331903875, "grad_norm": 0.2921207845211029, "learning_rate": 8.053475256352773e-06, "loss": 0.0284, "step": 89430 }, { "epoch": 0.7236831458855895, "grad_norm": 1.3924219608306885, "learning_rate": 8.052916092213137e-06, "loss": 0.0362, "step": 89440 }, { "epoch": 0.7237640585807913, "grad_norm": 0.3224826455116272, "learning_rate": 8.052356867189532e-06, "loss": 0.0244, "step": 89450 }, { "epoch": 0.7238449712759932, "grad_norm": 0.4958941638469696, "learning_rate": 8.051797581293113e-06, "loss": 0.0195, "step": 89460 }, { "epoch": 0.7239258839711951, "grad_norm": 0.5799919366836548, "learning_rate": 8.051238234535035e-06, "loss": 0.0313, "step": 89470 }, { "epoch": 0.724006796666397, "grad_norm": 0.5104055404663086, "learning_rate": 8.05067882692645e-06, "loss": 0.0281, "step": 89480 }, { "epoch": 0.7240877093615988, "grad_norm": 0.4296344518661499, "learning_rate": 8.050119358478519e-06, "loss": 0.0241, "step": 89490 }, { "epoch": 0.7241686220568007, "grad_norm": 0.30227282643318176, "learning_rate": 8.049559829202395e-06, "loss": 0.0295, "step": 89500 }, { "epoch": 0.7242495347520026, "grad_norm": 0.40026143193244934, "learning_rate": 8.049000239109238e-06, "loss": 0.0237, "step": 89510 }, { "epoch": 0.7243304474472044, "grad_norm": 0.40774405002593994, "learning_rate": 8.04844058821021e-06, "loss": 0.0133, "step": 89520 }, { "epoch": 0.7244113601424064, "grad_norm": 0.45250236988067627, "learning_rate": 8.047880876516467e-06, "loss": 0.0388, "step": 89530 }, { "epoch": 0.7244922728376082, "grad_norm": 0.33324921131134033, "learning_rate": 8.047321104039174e-06, "loss": 0.0281, "step": 89540 }, { "epoch": 0.7245731855328101, "grad_norm": 0.41163504123687744, "learning_rate": 8.046761270789497e-06, "loss": 0.0319, "step": 89550 }, { "epoch": 0.724654098228012, "grad_norm": 0.40248891711235046, "learning_rate": 8.046201376778597e-06, "loss": 0.031, "step": 89560 }, { "epoch": 0.7247350109232139, "grad_norm": 0.6924625039100647, "learning_rate": 8.045641422017642e-06, "loss": 0.0325, "step": 89570 }, { "epoch": 0.7248159236184157, "grad_norm": 0.24236401915550232, "learning_rate": 8.045081406517798e-06, "loss": 0.0315, "step": 89580 }, { "epoch": 0.7248968363136176, "grad_norm": 0.24334900081157684, "learning_rate": 8.044521330290235e-06, "loss": 0.0296, "step": 89590 }, { "epoch": 0.7249777490088195, "grad_norm": 0.7763205766677856, "learning_rate": 8.043961193346122e-06, "loss": 0.0304, "step": 89600 }, { "epoch": 0.7250586617040213, "grad_norm": 0.4267013669013977, "learning_rate": 8.043400995696626e-06, "loss": 0.0147, "step": 89610 }, { "epoch": 0.7251395743992233, "grad_norm": 0.6874452233314514, "learning_rate": 8.042840737352924e-06, "loss": 0.0356, "step": 89620 }, { "epoch": 0.7252204870944251, "grad_norm": 0.6727222800254822, "learning_rate": 8.042280418326187e-06, "loss": 0.0457, "step": 89630 }, { "epoch": 0.725301399789627, "grad_norm": 0.42903387546539307, "learning_rate": 8.04172003862759e-06, "loss": 0.0334, "step": 89640 }, { "epoch": 0.7253823124848289, "grad_norm": 0.6036857962608337, "learning_rate": 8.041159598268307e-06, "loss": 0.0506, "step": 89650 }, { "epoch": 0.7254632251800307, "grad_norm": 0.9520987868309021, "learning_rate": 8.040599097259516e-06, "loss": 0.0409, "step": 89660 }, { "epoch": 0.7255441378752326, "grad_norm": 0.30935218930244446, "learning_rate": 8.040038535612397e-06, "loss": 0.0304, "step": 89670 }, { "epoch": 0.7256250505704345, "grad_norm": 0.7353184819221497, "learning_rate": 8.039477913338126e-06, "loss": 0.0415, "step": 89680 }, { "epoch": 0.7257059632656364, "grad_norm": 0.7565400004386902, "learning_rate": 8.038917230447885e-06, "loss": 0.0296, "step": 89690 }, { "epoch": 0.7257868759608382, "grad_norm": 0.30338314175605774, "learning_rate": 8.038356486952854e-06, "loss": 0.027, "step": 89700 }, { "epoch": 0.7258677886560402, "grad_norm": 0.4314158856868744, "learning_rate": 8.037795682864217e-06, "loss": 0.0299, "step": 89710 }, { "epoch": 0.725948701351242, "grad_norm": 0.683164119720459, "learning_rate": 8.03723481819316e-06, "loss": 0.0328, "step": 89720 }, { "epoch": 0.7260296140464438, "grad_norm": 0.593303918838501, "learning_rate": 8.036673892950866e-06, "loss": 0.0279, "step": 89730 }, { "epoch": 0.7261105267416458, "grad_norm": 0.46147218346595764, "learning_rate": 8.03611290714852e-06, "loss": 0.0321, "step": 89740 }, { "epoch": 0.7261914394368476, "grad_norm": 0.43837979435920715, "learning_rate": 8.035551860797313e-06, "loss": 0.0417, "step": 89750 }, { "epoch": 0.7262723521320495, "grad_norm": 0.41349613666534424, "learning_rate": 8.034990753908433e-06, "loss": 0.0199, "step": 89760 }, { "epoch": 0.7263532648272514, "grad_norm": 0.35739386081695557, "learning_rate": 8.034429586493068e-06, "loss": 0.0283, "step": 89770 }, { "epoch": 0.7264341775224533, "grad_norm": 0.37302687764167786, "learning_rate": 8.033868358562413e-06, "loss": 0.0316, "step": 89780 }, { "epoch": 0.7265150902176551, "grad_norm": 0.7724433541297913, "learning_rate": 8.033307070127659e-06, "loss": 0.0251, "step": 89790 }, { "epoch": 0.726596002912857, "grad_norm": 0.5303049087524414, "learning_rate": 8.032745721199996e-06, "loss": 0.0403, "step": 89800 }, { "epoch": 0.7266769156080589, "grad_norm": 0.5611236691474915, "learning_rate": 8.032184311790623e-06, "loss": 0.0344, "step": 89810 }, { "epoch": 0.7267578283032607, "grad_norm": 0.3132034242153168, "learning_rate": 8.031622841910735e-06, "loss": 0.028, "step": 89820 }, { "epoch": 0.7268387409984627, "grad_norm": 0.5376157760620117, "learning_rate": 8.03106131157153e-06, "loss": 0.032, "step": 89830 }, { "epoch": 0.7269196536936645, "grad_norm": 1.4934799671173096, "learning_rate": 8.030499720784207e-06, "loss": 0.0303, "step": 89840 }, { "epoch": 0.7270005663888665, "grad_norm": 0.6615698933601379, "learning_rate": 8.029938069559963e-06, "loss": 0.0406, "step": 89850 }, { "epoch": 0.7270814790840683, "grad_norm": 0.6668550372123718, "learning_rate": 8.029376357910002e-06, "loss": 0.0274, "step": 89860 }, { "epoch": 0.7271623917792702, "grad_norm": 0.6223609447479248, "learning_rate": 8.028814585845524e-06, "loss": 0.0302, "step": 89870 }, { "epoch": 0.727243304474472, "grad_norm": 0.5559958815574646, "learning_rate": 8.028252753377733e-06, "loss": 0.0478, "step": 89880 }, { "epoch": 0.7273242171696739, "grad_norm": 0.3396524488925934, "learning_rate": 8.027690860517834e-06, "loss": 0.0429, "step": 89890 }, { "epoch": 0.7274051298648758, "grad_norm": 0.48219501972198486, "learning_rate": 8.027128907277034e-06, "loss": 0.0247, "step": 89900 }, { "epoch": 0.7274860425600777, "grad_norm": 0.29837918281555176, "learning_rate": 8.026566893666537e-06, "loss": 0.0309, "step": 89910 }, { "epoch": 0.7275669552552796, "grad_norm": 0.7071927189826965, "learning_rate": 8.026004819697553e-06, "loss": 0.0554, "step": 89920 }, { "epoch": 0.7276478679504814, "grad_norm": 0.3807436525821686, "learning_rate": 8.025442685381291e-06, "loss": 0.0173, "step": 89930 }, { "epoch": 0.7277287806456834, "grad_norm": 0.5016666054725647, "learning_rate": 8.024880490728962e-06, "loss": 0.0248, "step": 89940 }, { "epoch": 0.7278096933408852, "grad_norm": 0.29805371165275574, "learning_rate": 8.024318235751776e-06, "loss": 0.0156, "step": 89950 }, { "epoch": 0.727890606036087, "grad_norm": 0.37049809098243713, "learning_rate": 8.023755920460949e-06, "loss": 0.0442, "step": 89960 }, { "epoch": 0.727971518731289, "grad_norm": 0.2712879776954651, "learning_rate": 8.023193544867693e-06, "loss": 0.0281, "step": 89970 }, { "epoch": 0.7280524314264908, "grad_norm": 0.6886930465698242, "learning_rate": 8.022631108983223e-06, "loss": 0.039, "step": 89980 }, { "epoch": 0.7281333441216927, "grad_norm": 0.49360716342926025, "learning_rate": 8.022068612818759e-06, "loss": 0.0352, "step": 89990 }, { "epoch": 0.7282142568168946, "grad_norm": 0.4895668625831604, "learning_rate": 8.021506056385514e-06, "loss": 0.0326, "step": 90000 }, { "epoch": 0.7282951695120965, "grad_norm": 0.6184448003768921, "learning_rate": 8.02094343969471e-06, "loss": 0.0298, "step": 90010 }, { "epoch": 0.7283760822072983, "grad_norm": 0.0681801363825798, "learning_rate": 8.020380762757567e-06, "loss": 0.0229, "step": 90020 }, { "epoch": 0.7284569949025002, "grad_norm": 0.4201706349849701, "learning_rate": 8.019818025585306e-06, "loss": 0.0464, "step": 90030 }, { "epoch": 0.7285379075977021, "grad_norm": 0.6780823469161987, "learning_rate": 8.01925522818915e-06, "loss": 0.0226, "step": 90040 }, { "epoch": 0.7286188202929039, "grad_norm": 0.7524375915527344, "learning_rate": 8.018692370580321e-06, "loss": 0.0225, "step": 90050 }, { "epoch": 0.7286997329881059, "grad_norm": 0.16296736896038055, "learning_rate": 8.018129452770047e-06, "loss": 0.0354, "step": 90060 }, { "epoch": 0.7287806456833077, "grad_norm": 0.29225093126296997, "learning_rate": 8.017566474769551e-06, "loss": 0.041, "step": 90070 }, { "epoch": 0.7288615583785096, "grad_norm": 0.26066854596138, "learning_rate": 8.017003436590064e-06, "loss": 0.0339, "step": 90080 }, { "epoch": 0.7289424710737115, "grad_norm": 0.4101165235042572, "learning_rate": 8.016440338242812e-06, "loss": 0.026, "step": 90090 }, { "epoch": 0.7290233837689133, "grad_norm": 0.7825868725776672, "learning_rate": 8.015877179739024e-06, "loss": 0.0473, "step": 90100 }, { "epoch": 0.7291042964641152, "grad_norm": 0.3390709459781647, "learning_rate": 8.015313961089935e-06, "loss": 0.0282, "step": 90110 }, { "epoch": 0.7291852091593171, "grad_norm": 0.32725995779037476, "learning_rate": 8.01475068230677e-06, "loss": 0.0266, "step": 90120 }, { "epoch": 0.729266121854519, "grad_norm": 0.6372880935668945, "learning_rate": 8.014187343400772e-06, "loss": 0.0479, "step": 90130 }, { "epoch": 0.7293470345497208, "grad_norm": 0.2679083049297333, "learning_rate": 8.01362394438317e-06, "loss": 0.0289, "step": 90140 }, { "epoch": 0.7294279472449228, "grad_norm": 0.3245930075645447, "learning_rate": 8.013060485265198e-06, "loss": 0.0229, "step": 90150 }, { "epoch": 0.7295088599401246, "grad_norm": 0.5862622857093811, "learning_rate": 8.012496966058097e-06, "loss": 0.0417, "step": 90160 }, { "epoch": 0.7295897726353264, "grad_norm": 0.22776059806346893, "learning_rate": 8.011933386773105e-06, "loss": 0.025, "step": 90170 }, { "epoch": 0.7296706853305284, "grad_norm": 0.3557566702365875, "learning_rate": 8.011369747421459e-06, "loss": 0.0469, "step": 90180 }, { "epoch": 0.7297515980257302, "grad_norm": 0.2845439016819, "learning_rate": 8.010806048014399e-06, "loss": 0.0291, "step": 90190 }, { "epoch": 0.7298325107209321, "grad_norm": 0.2526451051235199, "learning_rate": 8.01024228856317e-06, "loss": 0.0284, "step": 90200 }, { "epoch": 0.729913423416134, "grad_norm": 1.0239588022232056, "learning_rate": 8.009678469079012e-06, "loss": 0.0332, "step": 90210 }, { "epoch": 0.7299943361113359, "grad_norm": 0.3198476731777191, "learning_rate": 8.009114589573172e-06, "loss": 0.0355, "step": 90220 }, { "epoch": 0.7300752488065377, "grad_norm": 0.5141332745552063, "learning_rate": 8.008550650056894e-06, "loss": 0.0342, "step": 90230 }, { "epoch": 0.7301561615017397, "grad_norm": 0.6123313903808594, "learning_rate": 8.007986650541424e-06, "loss": 0.0423, "step": 90240 }, { "epoch": 0.7302370741969415, "grad_norm": 0.3959779739379883, "learning_rate": 8.007422591038012e-06, "loss": 0.0335, "step": 90250 }, { "epoch": 0.7303179868921433, "grad_norm": 0.4942438006401062, "learning_rate": 8.006858471557904e-06, "loss": 0.0407, "step": 90260 }, { "epoch": 0.7303988995873453, "grad_norm": 0.6126704216003418, "learning_rate": 8.006294292112353e-06, "loss": 0.0274, "step": 90270 }, { "epoch": 0.7304798122825471, "grad_norm": 0.013618728145956993, "learning_rate": 8.005730052712606e-06, "loss": 0.0348, "step": 90280 }, { "epoch": 0.730560724977749, "grad_norm": 0.20648683607578278, "learning_rate": 8.005165753369921e-06, "loss": 0.0262, "step": 90290 }, { "epoch": 0.7306416376729509, "grad_norm": 0.419795960187912, "learning_rate": 8.004601394095549e-06, "loss": 0.0243, "step": 90300 }, { "epoch": 0.7307225503681528, "grad_norm": 0.4956446588039398, "learning_rate": 8.004036974900745e-06, "loss": 0.0405, "step": 90310 }, { "epoch": 0.7308034630633546, "grad_norm": 0.2366478443145752, "learning_rate": 8.003472495796764e-06, "loss": 0.0413, "step": 90320 }, { "epoch": 0.7308843757585565, "grad_norm": 0.16769787669181824, "learning_rate": 8.002907956794865e-06, "loss": 0.0275, "step": 90330 }, { "epoch": 0.7309652884537584, "grad_norm": 0.34211769700050354, "learning_rate": 8.002343357906307e-06, "loss": 0.0299, "step": 90340 }, { "epoch": 0.7310462011489602, "grad_norm": 0.3097141981124878, "learning_rate": 8.001778699142348e-06, "loss": 0.0358, "step": 90350 }, { "epoch": 0.7311271138441622, "grad_norm": 0.6195356845855713, "learning_rate": 8.001213980514252e-06, "loss": 0.0273, "step": 90360 }, { "epoch": 0.731208026539364, "grad_norm": 0.4124857783317566, "learning_rate": 8.000649202033277e-06, "loss": 0.0311, "step": 90370 }, { "epoch": 0.731288939234566, "grad_norm": 0.27683204412460327, "learning_rate": 8.00008436371069e-06, "loss": 0.0221, "step": 90380 }, { "epoch": 0.7313698519297678, "grad_norm": 0.5883921384811401, "learning_rate": 7.999519465557752e-06, "loss": 0.0522, "step": 90390 }, { "epoch": 0.7314507646249696, "grad_norm": 0.524368405342102, "learning_rate": 7.99895450758573e-06, "loss": 0.0349, "step": 90400 }, { "epoch": 0.7315316773201715, "grad_norm": 0.41683948040008545, "learning_rate": 7.998389489805893e-06, "loss": 0.0234, "step": 90410 }, { "epoch": 0.7316125900153734, "grad_norm": 0.514336884021759, "learning_rate": 7.997824412229508e-06, "loss": 0.0253, "step": 90420 }, { "epoch": 0.7316935027105753, "grad_norm": 0.2840730547904968, "learning_rate": 7.997259274867843e-06, "loss": 0.0327, "step": 90430 }, { "epoch": 0.7317744154057771, "grad_norm": 0.7596780061721802, "learning_rate": 7.99669407773217e-06, "loss": 0.0415, "step": 90440 }, { "epoch": 0.7318553281009791, "grad_norm": 0.1823085993528366, "learning_rate": 7.99612882083376e-06, "loss": 0.0181, "step": 90450 }, { "epoch": 0.7319362407961809, "grad_norm": 0.384081095457077, "learning_rate": 7.995563504183884e-06, "loss": 0.0319, "step": 90460 }, { "epoch": 0.7320171534913827, "grad_norm": 0.3201015293598175, "learning_rate": 7.994998127793817e-06, "loss": 0.0383, "step": 90470 }, { "epoch": 0.7320980661865847, "grad_norm": 0.3346967399120331, "learning_rate": 7.994432691674838e-06, "loss": 0.0289, "step": 90480 }, { "epoch": 0.7321789788817865, "grad_norm": 0.6205720901489258, "learning_rate": 7.993867195838222e-06, "loss": 0.0361, "step": 90490 }, { "epoch": 0.7322598915769885, "grad_norm": 0.32194483280181885, "learning_rate": 7.99330164029524e-06, "loss": 0.0323, "step": 90500 }, { "epoch": 0.7323408042721903, "grad_norm": 0.8657544255256653, "learning_rate": 7.99273602505718e-06, "loss": 0.0228, "step": 90510 }, { "epoch": 0.7324217169673922, "grad_norm": 0.49974894523620605, "learning_rate": 7.992170350135315e-06, "loss": 0.0292, "step": 90520 }, { "epoch": 0.732502629662594, "grad_norm": 0.43650197982788086, "learning_rate": 7.99160461554093e-06, "loss": 0.0292, "step": 90530 }, { "epoch": 0.732583542357796, "grad_norm": 0.38210099935531616, "learning_rate": 7.991038821285308e-06, "loss": 0.0369, "step": 90540 }, { "epoch": 0.7326644550529978, "grad_norm": 0.7218140363693237, "learning_rate": 7.99047296737973e-06, "loss": 0.0269, "step": 90550 }, { "epoch": 0.7327453677481996, "grad_norm": 0.54080730676651, "learning_rate": 7.989907053835482e-06, "loss": 0.0233, "step": 90560 }, { "epoch": 0.7328262804434016, "grad_norm": 0.6720545887947083, "learning_rate": 7.989341080663851e-06, "loss": 0.0175, "step": 90570 }, { "epoch": 0.7329071931386034, "grad_norm": 0.27285927534103394, "learning_rate": 7.98877504787612e-06, "loss": 0.0186, "step": 90580 }, { "epoch": 0.7329881058338054, "grad_norm": 0.3719969689846039, "learning_rate": 7.988208955483583e-06, "loss": 0.0243, "step": 90590 }, { "epoch": 0.7330690185290072, "grad_norm": 0.593300998210907, "learning_rate": 7.987642803497525e-06, "loss": 0.0517, "step": 90600 }, { "epoch": 0.7331499312242091, "grad_norm": 0.3938773274421692, "learning_rate": 7.987076591929239e-06, "loss": 0.0237, "step": 90610 }, { "epoch": 0.733230843919411, "grad_norm": 1.0191463232040405, "learning_rate": 7.986510320790016e-06, "loss": 0.032, "step": 90620 }, { "epoch": 0.7333117566146128, "grad_norm": 0.4801679849624634, "learning_rate": 7.985943990091149e-06, "loss": 0.0298, "step": 90630 }, { "epoch": 0.7333926693098147, "grad_norm": 0.368414968252182, "learning_rate": 7.985377599843936e-06, "loss": 0.025, "step": 90640 }, { "epoch": 0.7334735820050166, "grad_norm": 0.41905319690704346, "learning_rate": 7.984811150059666e-06, "loss": 0.0328, "step": 90650 }, { "epoch": 0.7335544947002185, "grad_norm": 0.5411176085472107, "learning_rate": 7.98424464074964e-06, "loss": 0.0338, "step": 90660 }, { "epoch": 0.7336354073954203, "grad_norm": 0.5368327498435974, "learning_rate": 7.983678071925153e-06, "loss": 0.0248, "step": 90670 }, { "epoch": 0.7337163200906223, "grad_norm": 0.44421377778053284, "learning_rate": 7.983111443597507e-06, "loss": 0.0313, "step": 90680 }, { "epoch": 0.7337972327858241, "grad_norm": 0.5378197431564331, "learning_rate": 7.982544755778e-06, "loss": 0.0238, "step": 90690 }, { "epoch": 0.7338781454810259, "grad_norm": 0.24404168128967285, "learning_rate": 7.981978008477936e-06, "loss": 0.0219, "step": 90700 }, { "epoch": 0.7339590581762279, "grad_norm": 0.4704445004463196, "learning_rate": 7.981411201708616e-06, "loss": 0.0418, "step": 90710 }, { "epoch": 0.7340399708714297, "grad_norm": 0.4532472789287567, "learning_rate": 7.980844335481341e-06, "loss": 0.0187, "step": 90720 }, { "epoch": 0.7341208835666316, "grad_norm": 0.3859661817550659, "learning_rate": 7.980277409807422e-06, "loss": 0.0395, "step": 90730 }, { "epoch": 0.7342017962618335, "grad_norm": 0.3664877414703369, "learning_rate": 7.979710424698159e-06, "loss": 0.0385, "step": 90740 }, { "epoch": 0.7342827089570354, "grad_norm": 0.13034185767173767, "learning_rate": 7.979143380164864e-06, "loss": 0.0207, "step": 90750 }, { "epoch": 0.7343636216522372, "grad_norm": 0.496548056602478, "learning_rate": 7.978576276218841e-06, "loss": 0.0281, "step": 90760 }, { "epoch": 0.7344445343474391, "grad_norm": 0.6680840849876404, "learning_rate": 7.978009112871404e-06, "loss": 0.0398, "step": 90770 }, { "epoch": 0.734525447042641, "grad_norm": 0.2994028627872467, "learning_rate": 7.977441890133861e-06, "loss": 0.0276, "step": 90780 }, { "epoch": 0.7346063597378428, "grad_norm": 0.5137021541595459, "learning_rate": 7.976874608017528e-06, "loss": 0.0231, "step": 90790 }, { "epoch": 0.7346872724330448, "grad_norm": 0.42467305064201355, "learning_rate": 7.976307266533712e-06, "loss": 0.0259, "step": 90800 }, { "epoch": 0.7347681851282466, "grad_norm": 0.24905458092689514, "learning_rate": 7.975739865693732e-06, "loss": 0.0252, "step": 90810 }, { "epoch": 0.7348490978234485, "grad_norm": 0.24354544281959534, "learning_rate": 7.975172405508903e-06, "loss": 0.0234, "step": 90820 }, { "epoch": 0.7349300105186504, "grad_norm": 0.8951566219329834, "learning_rate": 7.974604885990541e-06, "loss": 0.0398, "step": 90830 }, { "epoch": 0.7350109232138523, "grad_norm": 0.4981396496295929, "learning_rate": 7.974037307149964e-06, "loss": 0.0335, "step": 90840 }, { "epoch": 0.7350918359090541, "grad_norm": 0.2857950031757355, "learning_rate": 7.973469668998491e-06, "loss": 0.0349, "step": 90850 }, { "epoch": 0.735172748604256, "grad_norm": 0.5572939515113831, "learning_rate": 7.972901971547445e-06, "loss": 0.0213, "step": 90860 }, { "epoch": 0.7352536612994579, "grad_norm": 0.37110447883605957, "learning_rate": 7.972334214808143e-06, "loss": 0.0148, "step": 90870 }, { "epoch": 0.7353345739946597, "grad_norm": 0.24027451872825623, "learning_rate": 7.97176639879191e-06, "loss": 0.0219, "step": 90880 }, { "epoch": 0.7354154866898617, "grad_norm": 0.34356066584587097, "learning_rate": 7.97119852351007e-06, "loss": 0.0227, "step": 90890 }, { "epoch": 0.7354963993850635, "grad_norm": 0.4449380934238434, "learning_rate": 7.970630588973948e-06, "loss": 0.0377, "step": 90900 }, { "epoch": 0.7355773120802654, "grad_norm": 0.4162171185016632, "learning_rate": 7.970062595194871e-06, "loss": 0.0291, "step": 90910 }, { "epoch": 0.7356582247754673, "grad_norm": 0.20382460951805115, "learning_rate": 7.969494542184166e-06, "loss": 0.0134, "step": 90920 }, { "epoch": 0.7357391374706691, "grad_norm": 0.6172084212303162, "learning_rate": 7.968926429953159e-06, "loss": 0.0294, "step": 90930 }, { "epoch": 0.735820050165871, "grad_norm": 0.3812059462070465, "learning_rate": 7.968358258513183e-06, "loss": 0.0352, "step": 90940 }, { "epoch": 0.7359009628610729, "grad_norm": 0.7063958644866943, "learning_rate": 7.967790027875567e-06, "loss": 0.0261, "step": 90950 }, { "epoch": 0.7359818755562748, "grad_norm": 0.4600926637649536, "learning_rate": 7.967221738051647e-06, "loss": 0.0312, "step": 90960 }, { "epoch": 0.7360627882514766, "grad_norm": 0.35476601123809814, "learning_rate": 7.966653389052752e-06, "loss": 0.0262, "step": 90970 }, { "epoch": 0.7361437009466786, "grad_norm": 0.6191954016685486, "learning_rate": 7.966084980890219e-06, "loss": 0.0211, "step": 90980 }, { "epoch": 0.7362246136418804, "grad_norm": 0.8131190538406372, "learning_rate": 7.96551651357538e-06, "loss": 0.024, "step": 90990 }, { "epoch": 0.7363055263370822, "grad_norm": 0.4551897943019867, "learning_rate": 7.964947987119578e-06, "loss": 0.0371, "step": 91000 }, { "epoch": 0.7363864390322842, "grad_norm": 0.46103790402412415, "learning_rate": 7.964379401534145e-06, "loss": 0.0191, "step": 91010 }, { "epoch": 0.736467351727486, "grad_norm": 0.824715256690979, "learning_rate": 7.963810756830424e-06, "loss": 0.0406, "step": 91020 }, { "epoch": 0.736548264422688, "grad_norm": 0.43839916586875916, "learning_rate": 7.963242053019755e-06, "loss": 0.0353, "step": 91030 }, { "epoch": 0.7366291771178898, "grad_norm": 0.31786635518074036, "learning_rate": 7.962673290113477e-06, "loss": 0.0209, "step": 91040 }, { "epoch": 0.7367100898130917, "grad_norm": 0.5783445835113525, "learning_rate": 7.962104468122937e-06, "loss": 0.038, "step": 91050 }, { "epoch": 0.7367910025082935, "grad_norm": 0.38258594274520874, "learning_rate": 7.961535587059475e-06, "loss": 0.0395, "step": 91060 }, { "epoch": 0.7368719152034954, "grad_norm": 0.9828662872314453, "learning_rate": 7.960966646934438e-06, "loss": 0.0451, "step": 91070 }, { "epoch": 0.7369528278986973, "grad_norm": 0.5135558247566223, "learning_rate": 7.960397647759172e-06, "loss": 0.0251, "step": 91080 }, { "epoch": 0.7370337405938991, "grad_norm": 0.34167003631591797, "learning_rate": 7.959828589545027e-06, "loss": 0.0297, "step": 91090 }, { "epoch": 0.7371146532891011, "grad_norm": 0.4929523169994354, "learning_rate": 7.959259472303347e-06, "loss": 0.0278, "step": 91100 }, { "epoch": 0.7371955659843029, "grad_norm": 0.7246925234794617, "learning_rate": 7.958690296045484e-06, "loss": 0.0238, "step": 91110 }, { "epoch": 0.7372764786795049, "grad_norm": 0.1837574541568756, "learning_rate": 7.95812106078279e-06, "loss": 0.0179, "step": 91120 }, { "epoch": 0.7373573913747067, "grad_norm": 0.3933265209197998, "learning_rate": 7.957551766526615e-06, "loss": 0.0365, "step": 91130 }, { "epoch": 0.7374383040699086, "grad_norm": 0.8123660683631897, "learning_rate": 7.956982413288316e-06, "loss": 0.0413, "step": 91140 }, { "epoch": 0.7375192167651105, "grad_norm": 0.8968499898910522, "learning_rate": 7.956413001079243e-06, "loss": 0.0288, "step": 91150 }, { "epoch": 0.7376001294603123, "grad_norm": 0.7287920117378235, "learning_rate": 7.955843529910754e-06, "loss": 0.0262, "step": 91160 }, { "epoch": 0.7376810421555142, "grad_norm": 0.2714822292327881, "learning_rate": 7.955273999794207e-06, "loss": 0.0166, "step": 91170 }, { "epoch": 0.737761954850716, "grad_norm": 0.5217963457107544, "learning_rate": 7.954704410740958e-06, "loss": 0.0254, "step": 91180 }, { "epoch": 0.737842867545918, "grad_norm": 0.7941997647285461, "learning_rate": 7.954134762762368e-06, "loss": 0.0346, "step": 91190 }, { "epoch": 0.7379237802411198, "grad_norm": 0.4921451210975647, "learning_rate": 7.953565055869796e-06, "loss": 0.0344, "step": 91200 }, { "epoch": 0.7380046929363218, "grad_norm": 0.3679955005645752, "learning_rate": 7.952995290074606e-06, "loss": 0.0292, "step": 91210 }, { "epoch": 0.7380856056315236, "grad_norm": 0.106656014919281, "learning_rate": 7.952425465388157e-06, "loss": 0.0255, "step": 91220 }, { "epoch": 0.7381665183267254, "grad_norm": 0.42989569902420044, "learning_rate": 7.951855581821817e-06, "loss": 0.0204, "step": 91230 }, { "epoch": 0.7382474310219274, "grad_norm": 0.6697288155555725, "learning_rate": 7.951285639386947e-06, "loss": 0.0217, "step": 91240 }, { "epoch": 0.7383283437171292, "grad_norm": 0.6093329191207886, "learning_rate": 7.950715638094916e-06, "loss": 0.0237, "step": 91250 }, { "epoch": 0.7384092564123311, "grad_norm": 0.41077107191085815, "learning_rate": 7.95014557795709e-06, "loss": 0.0342, "step": 91260 }, { "epoch": 0.738490169107533, "grad_norm": 0.4538777768611908, "learning_rate": 7.949575458984839e-06, "loss": 0.023, "step": 91270 }, { "epoch": 0.7385710818027349, "grad_norm": 1.0831767320632935, "learning_rate": 7.949005281189533e-06, "loss": 0.0378, "step": 91280 }, { "epoch": 0.7386519944979367, "grad_norm": 0.21373862028121948, "learning_rate": 7.948435044582541e-06, "loss": 0.0163, "step": 91290 }, { "epoch": 0.7387329071931386, "grad_norm": 0.3848502039909363, "learning_rate": 7.947864749175238e-06, "loss": 0.0442, "step": 91300 }, { "epoch": 0.7388138198883405, "grad_norm": 0.6038743257522583, "learning_rate": 7.947294394978995e-06, "loss": 0.0308, "step": 91310 }, { "epoch": 0.7388947325835423, "grad_norm": 0.48372066020965576, "learning_rate": 7.946723982005189e-06, "loss": 0.032, "step": 91320 }, { "epoch": 0.7389756452787443, "grad_norm": 0.2747231125831604, "learning_rate": 7.94615351026519e-06, "loss": 0.0212, "step": 91330 }, { "epoch": 0.7390565579739461, "grad_norm": 0.8031275272369385, "learning_rate": 7.945582979770382e-06, "loss": 0.0281, "step": 91340 }, { "epoch": 0.739137470669148, "grad_norm": 0.37336257100105286, "learning_rate": 7.945012390532139e-06, "loss": 0.0398, "step": 91350 }, { "epoch": 0.7392183833643499, "grad_norm": 0.5974957346916199, "learning_rate": 7.944441742561842e-06, "loss": 0.0319, "step": 91360 }, { "epoch": 0.7392992960595517, "grad_norm": 0.7927942872047424, "learning_rate": 7.943871035870869e-06, "loss": 0.0379, "step": 91370 }, { "epoch": 0.7393802087547536, "grad_norm": 0.22567902505397797, "learning_rate": 7.943300270470604e-06, "loss": 0.0203, "step": 91380 }, { "epoch": 0.7394611214499555, "grad_norm": 0.13835281133651733, "learning_rate": 7.942729446372428e-06, "loss": 0.0228, "step": 91390 }, { "epoch": 0.7395420341451574, "grad_norm": 0.4643990993499756, "learning_rate": 7.942158563587725e-06, "loss": 0.0264, "step": 91400 }, { "epoch": 0.7396229468403592, "grad_norm": 0.4282611608505249, "learning_rate": 7.941587622127881e-06, "loss": 0.0206, "step": 91410 }, { "epoch": 0.7397038595355612, "grad_norm": 0.33730348944664, "learning_rate": 7.941016622004282e-06, "loss": 0.034, "step": 91420 }, { "epoch": 0.739784772230763, "grad_norm": 0.38454434275627136, "learning_rate": 7.940445563228316e-06, "loss": 0.0299, "step": 91430 }, { "epoch": 0.7398656849259649, "grad_norm": 0.5163854360580444, "learning_rate": 7.93987444581137e-06, "loss": 0.0339, "step": 91440 }, { "epoch": 0.7399465976211668, "grad_norm": 0.7085573077201843, "learning_rate": 7.939303269764835e-06, "loss": 0.0425, "step": 91450 }, { "epoch": 0.7400275103163686, "grad_norm": 0.3811028003692627, "learning_rate": 7.938732035100103e-06, "loss": 0.0251, "step": 91460 }, { "epoch": 0.7401084230115705, "grad_norm": 0.34833741188049316, "learning_rate": 7.938160741828562e-06, "loss": 0.0234, "step": 91470 }, { "epoch": 0.7401893357067724, "grad_norm": 0.5260832905769348, "learning_rate": 7.937589389961609e-06, "loss": 0.0271, "step": 91480 }, { "epoch": 0.7402702484019743, "grad_norm": 0.2709416151046753, "learning_rate": 7.937017979510637e-06, "loss": 0.0238, "step": 91490 }, { "epoch": 0.7403511610971761, "grad_norm": 0.35016539692878723, "learning_rate": 7.936446510487042e-06, "loss": 0.0391, "step": 91500 }, { "epoch": 0.7404320737923781, "grad_norm": 0.7481938600540161, "learning_rate": 7.935874982902218e-06, "loss": 0.0348, "step": 91510 }, { "epoch": 0.7405129864875799, "grad_norm": 0.4536646902561188, "learning_rate": 7.935303396767568e-06, "loss": 0.0389, "step": 91520 }, { "epoch": 0.7405938991827817, "grad_norm": 0.5333285331726074, "learning_rate": 7.934731752094486e-06, "loss": 0.0256, "step": 91530 }, { "epoch": 0.7406748118779837, "grad_norm": 0.3892883360385895, "learning_rate": 7.934160048894377e-06, "loss": 0.0241, "step": 91540 }, { "epoch": 0.7407557245731855, "grad_norm": 0.28291773796081543, "learning_rate": 7.93358828717864e-06, "loss": 0.0347, "step": 91550 }, { "epoch": 0.7408366372683874, "grad_norm": 0.37605342268943787, "learning_rate": 7.933016466958678e-06, "loss": 0.0345, "step": 91560 }, { "epoch": 0.7409175499635893, "grad_norm": 0.7282413840293884, "learning_rate": 7.932444588245894e-06, "loss": 0.0329, "step": 91570 }, { "epoch": 0.7409984626587912, "grad_norm": 0.3643757998943329, "learning_rate": 7.931872651051691e-06, "loss": 0.04, "step": 91580 }, { "epoch": 0.741079375353993, "grad_norm": 0.5133333802223206, "learning_rate": 7.93130065538748e-06, "loss": 0.0243, "step": 91590 }, { "epoch": 0.7411602880491949, "grad_norm": 0.32304707169532776, "learning_rate": 7.930728601264666e-06, "loss": 0.026, "step": 91600 }, { "epoch": 0.7412412007443968, "grad_norm": 0.5321739912033081, "learning_rate": 7.930156488694656e-06, "loss": 0.0309, "step": 91610 }, { "epoch": 0.7413221134395986, "grad_norm": 0.3451090455055237, "learning_rate": 7.92958431768886e-06, "loss": 0.0244, "step": 91620 }, { "epoch": 0.7414030261348006, "grad_norm": 0.2612493336200714, "learning_rate": 7.929012088258691e-06, "loss": 0.0289, "step": 91630 }, { "epoch": 0.7414839388300024, "grad_norm": 0.3048202693462372, "learning_rate": 7.928439800415557e-06, "loss": 0.0509, "step": 91640 }, { "epoch": 0.7415648515252043, "grad_norm": 0.29626527428627014, "learning_rate": 7.927867454170876e-06, "loss": 0.0329, "step": 91650 }, { "epoch": 0.7416457642204062, "grad_norm": 0.30346059799194336, "learning_rate": 7.92729504953606e-06, "loss": 0.0206, "step": 91660 }, { "epoch": 0.741726676915608, "grad_norm": 0.43243542313575745, "learning_rate": 7.926722586522521e-06, "loss": 0.0178, "step": 91670 }, { "epoch": 0.74180758961081, "grad_norm": 1.2041538953781128, "learning_rate": 7.92615006514168e-06, "loss": 0.0401, "step": 91680 }, { "epoch": 0.7418885023060118, "grad_norm": 0.44806987047195435, "learning_rate": 7.925577485404954e-06, "loss": 0.0449, "step": 91690 }, { "epoch": 0.7419694150012137, "grad_norm": 0.799566388130188, "learning_rate": 7.925004847323761e-06, "loss": 0.0414, "step": 91700 }, { "epoch": 0.7420503276964155, "grad_norm": 0.2667235732078552, "learning_rate": 7.924432150909522e-06, "loss": 0.0416, "step": 91710 }, { "epoch": 0.7421312403916175, "grad_norm": 0.6260762214660645, "learning_rate": 7.923859396173655e-06, "loss": 0.0257, "step": 91720 }, { "epoch": 0.7422121530868193, "grad_norm": 0.5014429688453674, "learning_rate": 7.923286583127587e-06, "loss": 0.0387, "step": 91730 }, { "epoch": 0.7422930657820211, "grad_norm": 0.2883797585964203, "learning_rate": 7.92271371178274e-06, "loss": 0.0237, "step": 91740 }, { "epoch": 0.7423739784772231, "grad_norm": 0.2884066700935364, "learning_rate": 7.922140782150536e-06, "loss": 0.0274, "step": 91750 }, { "epoch": 0.7424548911724249, "grad_norm": 0.4937885105609894, "learning_rate": 7.921567794242403e-06, "loss": 0.0352, "step": 91760 }, { "epoch": 0.7425358038676269, "grad_norm": 0.4470606744289398, "learning_rate": 7.920994748069771e-06, "loss": 0.0407, "step": 91770 }, { "epoch": 0.7426167165628287, "grad_norm": 0.43635037541389465, "learning_rate": 7.920421643644062e-06, "loss": 0.0395, "step": 91780 }, { "epoch": 0.7426976292580306, "grad_norm": 0.34688350558280945, "learning_rate": 7.91984848097671e-06, "loss": 0.0502, "step": 91790 }, { "epoch": 0.7427785419532325, "grad_norm": 0.4054473340511322, "learning_rate": 7.919275260079144e-06, "loss": 0.0274, "step": 91800 }, { "epoch": 0.7428594546484344, "grad_norm": 0.10592250525951385, "learning_rate": 7.918701980962797e-06, "loss": 0.0246, "step": 91810 }, { "epoch": 0.7429403673436362, "grad_norm": 0.3699023425579071, "learning_rate": 7.918128643639098e-06, "loss": 0.0468, "step": 91820 }, { "epoch": 0.743021280038838, "grad_norm": 1.475545883178711, "learning_rate": 7.917555248119486e-06, "loss": 0.0264, "step": 91830 }, { "epoch": 0.74310219273404, "grad_norm": 0.3253563642501831, "learning_rate": 7.916981794415393e-06, "loss": 0.0286, "step": 91840 }, { "epoch": 0.7431831054292418, "grad_norm": 0.15807604789733887, "learning_rate": 7.916408282538254e-06, "loss": 0.0265, "step": 91850 }, { "epoch": 0.7432640181244438, "grad_norm": 0.7393542528152466, "learning_rate": 7.915834712499513e-06, "loss": 0.0377, "step": 91860 }, { "epoch": 0.7433449308196456, "grad_norm": 0.5291221737861633, "learning_rate": 7.915261084310602e-06, "loss": 0.0376, "step": 91870 }, { "epoch": 0.7434258435148475, "grad_norm": 0.5347362160682678, "learning_rate": 7.914687397982962e-06, "loss": 0.0362, "step": 91880 }, { "epoch": 0.7435067562100494, "grad_norm": 0.6271910071372986, "learning_rate": 7.914113653528037e-06, "loss": 0.0415, "step": 91890 }, { "epoch": 0.7435876689052512, "grad_norm": 0.32217392325401306, "learning_rate": 7.913539850957266e-06, "loss": 0.0203, "step": 91900 }, { "epoch": 0.7436685816004531, "grad_norm": 0.6073713302612305, "learning_rate": 7.912965990282093e-06, "loss": 0.0274, "step": 91910 }, { "epoch": 0.743749494295655, "grad_norm": 0.27500566840171814, "learning_rate": 7.912392071513964e-06, "loss": 0.0235, "step": 91920 }, { "epoch": 0.7438304069908569, "grad_norm": 0.13893327116966248, "learning_rate": 7.911818094664321e-06, "loss": 0.0238, "step": 91930 }, { "epoch": 0.7439113196860587, "grad_norm": 0.5034595727920532, "learning_rate": 7.911244059744615e-06, "loss": 0.0262, "step": 91940 }, { "epoch": 0.7439922323812607, "grad_norm": 0.5153493881225586, "learning_rate": 7.910669966766293e-06, "loss": 0.0273, "step": 91950 }, { "epoch": 0.7440731450764625, "grad_norm": 0.2318083792924881, "learning_rate": 7.910095815740804e-06, "loss": 0.0236, "step": 91960 }, { "epoch": 0.7441540577716643, "grad_norm": 0.7735081911087036, "learning_rate": 7.909521606679595e-06, "loss": 0.0454, "step": 91970 }, { "epoch": 0.7442349704668663, "grad_norm": 0.8843902945518494, "learning_rate": 7.908947339594121e-06, "loss": 0.0385, "step": 91980 }, { "epoch": 0.7443158831620681, "grad_norm": 0.24905307590961456, "learning_rate": 7.908373014495834e-06, "loss": 0.047, "step": 91990 }, { "epoch": 0.74439679585727, "grad_norm": 0.33518728613853455, "learning_rate": 7.907798631396186e-06, "loss": 0.0303, "step": 92000 }, { "epoch": 0.7444777085524719, "grad_norm": 0.17874488234519958, "learning_rate": 7.907224190306633e-06, "loss": 0.0376, "step": 92010 }, { "epoch": 0.7445586212476738, "grad_norm": 0.2422860711812973, "learning_rate": 7.90664969123863e-06, "loss": 0.0332, "step": 92020 }, { "epoch": 0.7446395339428756, "grad_norm": 0.34031835198402405, "learning_rate": 7.906075134203636e-06, "loss": 0.0218, "step": 92030 }, { "epoch": 0.7447204466380775, "grad_norm": 0.5875203609466553, "learning_rate": 7.90550051921311e-06, "loss": 0.0279, "step": 92040 }, { "epoch": 0.7448013593332794, "grad_norm": 0.5391024947166443, "learning_rate": 7.904925846278508e-06, "loss": 0.0372, "step": 92050 }, { "epoch": 0.7448822720284812, "grad_norm": 0.21812677383422852, "learning_rate": 7.904351115411293e-06, "loss": 0.0314, "step": 92060 }, { "epoch": 0.7449631847236832, "grad_norm": 0.6050980687141418, "learning_rate": 7.903776326622926e-06, "loss": 0.0268, "step": 92070 }, { "epoch": 0.745044097418885, "grad_norm": 0.7938356399536133, "learning_rate": 7.903201479924872e-06, "loss": 0.0415, "step": 92080 }, { "epoch": 0.7451250101140869, "grad_norm": 0.3568893074989319, "learning_rate": 7.902626575328592e-06, "loss": 0.0285, "step": 92090 }, { "epoch": 0.7452059228092888, "grad_norm": 0.4154864549636841, "learning_rate": 7.902051612845552e-06, "loss": 0.0238, "step": 92100 }, { "epoch": 0.7452868355044907, "grad_norm": 0.4592413604259491, "learning_rate": 7.90147659248722e-06, "loss": 0.0362, "step": 92110 }, { "epoch": 0.7453677481996925, "grad_norm": 0.8255317211151123, "learning_rate": 7.900901514265063e-06, "loss": 0.0404, "step": 92120 }, { "epoch": 0.7454486608948944, "grad_norm": 0.3236956298351288, "learning_rate": 7.900326378190549e-06, "loss": 0.0258, "step": 92130 }, { "epoch": 0.7455295735900963, "grad_norm": 0.10234349220991135, "learning_rate": 7.899751184275148e-06, "loss": 0.025, "step": 92140 }, { "epoch": 0.7456104862852981, "grad_norm": 0.6972524523735046, "learning_rate": 7.899175932530332e-06, "loss": 0.0395, "step": 92150 }, { "epoch": 0.7456913989805001, "grad_norm": 0.5331525206565857, "learning_rate": 7.898600622967572e-06, "loss": 0.0367, "step": 92160 }, { "epoch": 0.7457723116757019, "grad_norm": 0.5548012852668762, "learning_rate": 7.898025255598341e-06, "loss": 0.0329, "step": 92170 }, { "epoch": 0.7458532243709038, "grad_norm": 0.6016337275505066, "learning_rate": 7.897449830434116e-06, "loss": 0.0302, "step": 92180 }, { "epoch": 0.7459341370661057, "grad_norm": 0.49750909209251404, "learning_rate": 7.896874347486369e-06, "loss": 0.025, "step": 92190 }, { "epoch": 0.7460150497613075, "grad_norm": 0.46887484192848206, "learning_rate": 7.89629880676658e-06, "loss": 0.0453, "step": 92200 }, { "epoch": 0.7460959624565094, "grad_norm": 0.10389130562543869, "learning_rate": 7.895723208286226e-06, "loss": 0.0239, "step": 92210 }, { "epoch": 0.7461768751517113, "grad_norm": 0.3910377323627472, "learning_rate": 7.895147552056785e-06, "loss": 0.0188, "step": 92220 }, { "epoch": 0.7462577878469132, "grad_norm": 0.17228831350803375, "learning_rate": 7.894571838089738e-06, "loss": 0.034, "step": 92230 }, { "epoch": 0.746338700542115, "grad_norm": 0.36710473895072937, "learning_rate": 7.893996066396566e-06, "loss": 0.0211, "step": 92240 }, { "epoch": 0.746419613237317, "grad_norm": 0.37170329689979553, "learning_rate": 7.893420236988754e-06, "loss": 0.0413, "step": 92250 }, { "epoch": 0.7465005259325188, "grad_norm": 0.35188376903533936, "learning_rate": 7.892844349877781e-06, "loss": 0.0434, "step": 92260 }, { "epoch": 0.7465814386277206, "grad_norm": 0.5579473376274109, "learning_rate": 7.892268405075136e-06, "loss": 0.0392, "step": 92270 }, { "epoch": 0.7466623513229226, "grad_norm": 0.5346609950065613, "learning_rate": 7.891692402592303e-06, "loss": 0.0235, "step": 92280 }, { "epoch": 0.7467432640181244, "grad_norm": 0.15215934813022614, "learning_rate": 7.89111634244077e-06, "loss": 0.0289, "step": 92290 }, { "epoch": 0.7468241767133263, "grad_norm": 0.5100821852684021, "learning_rate": 7.890540224632024e-06, "loss": 0.0217, "step": 92300 }, { "epoch": 0.7469050894085282, "grad_norm": 0.3082330822944641, "learning_rate": 7.889964049177556e-06, "loss": 0.0456, "step": 92310 }, { "epoch": 0.7469860021037301, "grad_norm": 0.4881996512413025, "learning_rate": 7.889387816088856e-06, "loss": 0.0265, "step": 92320 }, { "epoch": 0.747066914798932, "grad_norm": 0.46501386165618896, "learning_rate": 7.888811525377417e-06, "loss": 0.0341, "step": 92330 }, { "epoch": 0.7471478274941338, "grad_norm": 0.5164555311203003, "learning_rate": 7.888235177054729e-06, "loss": 0.0428, "step": 92340 }, { "epoch": 0.7472287401893357, "grad_norm": 0.4134518802165985, "learning_rate": 7.887658771132289e-06, "loss": 0.0428, "step": 92350 }, { "epoch": 0.7473096528845375, "grad_norm": 0.5724326968193054, "learning_rate": 7.887082307621589e-06, "loss": 0.0349, "step": 92360 }, { "epoch": 0.7473905655797395, "grad_norm": 0.6549954414367676, "learning_rate": 7.886505786534129e-06, "loss": 0.0478, "step": 92370 }, { "epoch": 0.7474714782749413, "grad_norm": 0.48874905705451965, "learning_rate": 7.885929207881404e-06, "loss": 0.0279, "step": 92380 }, { "epoch": 0.7475523909701433, "grad_norm": 0.3380171060562134, "learning_rate": 7.885352571674914e-06, "loss": 0.0238, "step": 92390 }, { "epoch": 0.7476333036653451, "grad_norm": 0.3920460343360901, "learning_rate": 7.88477587792616e-06, "loss": 0.0349, "step": 92400 }, { "epoch": 0.747714216360547, "grad_norm": 0.3455785810947418, "learning_rate": 7.884199126646637e-06, "loss": 0.0232, "step": 92410 }, { "epoch": 0.7477951290557489, "grad_norm": 0.41378891468048096, "learning_rate": 7.883622317847855e-06, "loss": 0.0387, "step": 92420 }, { "epoch": 0.7478760417509507, "grad_norm": 0.323882520198822, "learning_rate": 7.883045451541311e-06, "loss": 0.0379, "step": 92430 }, { "epoch": 0.7479569544461526, "grad_norm": 0.5825592875480652, "learning_rate": 7.882468527738513e-06, "loss": 0.0228, "step": 92440 }, { "epoch": 0.7480378671413545, "grad_norm": 0.4628365933895111, "learning_rate": 7.881891546450963e-06, "loss": 0.0252, "step": 92450 }, { "epoch": 0.7481187798365564, "grad_norm": 0.5300740003585815, "learning_rate": 7.881314507690175e-06, "loss": 0.0411, "step": 92460 }, { "epoch": 0.7481996925317582, "grad_norm": 0.3788521885871887, "learning_rate": 7.880737411467647e-06, "loss": 0.0229, "step": 92470 }, { "epoch": 0.7482806052269602, "grad_norm": 0.4645195007324219, "learning_rate": 7.880160257794896e-06, "loss": 0.0301, "step": 92480 }, { "epoch": 0.748361517922162, "grad_norm": 0.3592296242713928, "learning_rate": 7.879583046683428e-06, "loss": 0.0333, "step": 92490 }, { "epoch": 0.7484424306173638, "grad_norm": 0.37305721640586853, "learning_rate": 7.879005778144754e-06, "loss": 0.0396, "step": 92500 }, { "epoch": 0.7485233433125658, "grad_norm": 0.29776814579963684, "learning_rate": 7.87842845219039e-06, "loss": 0.0252, "step": 92510 }, { "epoch": 0.7486042560077676, "grad_norm": 0.38710150122642517, "learning_rate": 7.877851068831844e-06, "loss": 0.0199, "step": 92520 }, { "epoch": 0.7486851687029695, "grad_norm": 0.32272088527679443, "learning_rate": 7.877273628080635e-06, "loss": 0.0323, "step": 92530 }, { "epoch": 0.7487660813981714, "grad_norm": 0.6522539854049683, "learning_rate": 7.87669612994828e-06, "loss": 0.0517, "step": 92540 }, { "epoch": 0.7488469940933733, "grad_norm": 0.5800647139549255, "learning_rate": 7.87611857444629e-06, "loss": 0.0336, "step": 92550 }, { "epoch": 0.7489279067885751, "grad_norm": 0.5526552200317383, "learning_rate": 7.87554096158619e-06, "loss": 0.027, "step": 92560 }, { "epoch": 0.749008819483777, "grad_norm": 0.4276685416698456, "learning_rate": 7.874963291379495e-06, "loss": 0.0249, "step": 92570 }, { "epoch": 0.7490897321789789, "grad_norm": 0.24111464619636536, "learning_rate": 7.874385563837725e-06, "loss": 0.0237, "step": 92580 }, { "epoch": 0.7491706448741807, "grad_norm": 0.8364747166633606, "learning_rate": 7.873807778972405e-06, "loss": 0.0258, "step": 92590 }, { "epoch": 0.7492515575693827, "grad_norm": 0.4003399908542633, "learning_rate": 7.873229936795055e-06, "loss": 0.027, "step": 92600 }, { "epoch": 0.7493324702645845, "grad_norm": 0.41992872953414917, "learning_rate": 7.872652037317199e-06, "loss": 0.0359, "step": 92610 }, { "epoch": 0.7494133829597864, "grad_norm": 0.3699232339859009, "learning_rate": 7.872074080550364e-06, "loss": 0.0399, "step": 92620 }, { "epoch": 0.7494942956549883, "grad_norm": 0.389552503824234, "learning_rate": 7.871496066506074e-06, "loss": 0.0287, "step": 92630 }, { "epoch": 0.7495752083501901, "grad_norm": 0.19267520308494568, "learning_rate": 7.870917995195857e-06, "loss": 0.0285, "step": 92640 }, { "epoch": 0.749656121045392, "grad_norm": 0.49551665782928467, "learning_rate": 7.87033986663124e-06, "loss": 0.0243, "step": 92650 }, { "epoch": 0.7497370337405939, "grad_norm": 1.0373740196228027, "learning_rate": 7.869761680823755e-06, "loss": 0.0214, "step": 92660 }, { "epoch": 0.7498179464357958, "grad_norm": 0.6344701647758484, "learning_rate": 7.869183437784933e-06, "loss": 0.0357, "step": 92670 }, { "epoch": 0.7498988591309976, "grad_norm": 0.288742333650589, "learning_rate": 7.868605137526302e-06, "loss": 0.0199, "step": 92680 }, { "epoch": 0.7499797718261996, "grad_norm": 0.003362262388691306, "learning_rate": 7.868026780059398e-06, "loss": 0.0238, "step": 92690 }, { "epoch": 0.7500606845214014, "grad_norm": 0.3553830087184906, "learning_rate": 7.867448365395756e-06, "loss": 0.0399, "step": 92700 }, { "epoch": 0.7501415972166033, "grad_norm": 0.7106720209121704, "learning_rate": 7.866869893546909e-06, "loss": 0.0441, "step": 92710 }, { "epoch": 0.7502225099118052, "grad_norm": 0.4354603886604309, "learning_rate": 7.866291364524391e-06, "loss": 0.0321, "step": 92720 }, { "epoch": 0.750303422607007, "grad_norm": 0.8268329501152039, "learning_rate": 7.865712778339749e-06, "loss": 0.0319, "step": 92730 }, { "epoch": 0.7503843353022089, "grad_norm": 0.3145536482334137, "learning_rate": 7.86513413500451e-06, "loss": 0.0304, "step": 92740 }, { "epoch": 0.7504652479974108, "grad_norm": 0.3126208484172821, "learning_rate": 7.864555434530224e-06, "loss": 0.0383, "step": 92750 }, { "epoch": 0.7505461606926127, "grad_norm": 0.2704756557941437, "learning_rate": 7.863976676928424e-06, "loss": 0.0303, "step": 92760 }, { "epoch": 0.7506270733878145, "grad_norm": 0.6209275722503662, "learning_rate": 7.863397862210656e-06, "loss": 0.043, "step": 92770 }, { "epoch": 0.7507079860830165, "grad_norm": 0.23522403836250305, "learning_rate": 7.862818990388462e-06, "loss": 0.0461, "step": 92780 }, { "epoch": 0.7507888987782183, "grad_norm": 0.309424489736557, "learning_rate": 7.862240061473387e-06, "loss": 0.027, "step": 92790 }, { "epoch": 0.7508698114734201, "grad_norm": 0.5740000009536743, "learning_rate": 7.861661075476976e-06, "loss": 0.0362, "step": 92800 }, { "epoch": 0.7509507241686221, "grad_norm": 0.45362523198127747, "learning_rate": 7.861082032410778e-06, "loss": 0.0297, "step": 92810 }, { "epoch": 0.7510316368638239, "grad_norm": 0.44341376423835754, "learning_rate": 7.860502932286338e-06, "loss": 0.0242, "step": 92820 }, { "epoch": 0.7511125495590258, "grad_norm": 0.5274885892868042, "learning_rate": 7.859923775115205e-06, "loss": 0.0278, "step": 92830 }, { "epoch": 0.7511934622542277, "grad_norm": 0.43667304515838623, "learning_rate": 7.859344560908931e-06, "loss": 0.0226, "step": 92840 }, { "epoch": 0.7512743749494296, "grad_norm": 0.23122543096542358, "learning_rate": 7.858765289679065e-06, "loss": 0.0335, "step": 92850 }, { "epoch": 0.7513552876446314, "grad_norm": 0.5495975017547607, "learning_rate": 7.858185961437162e-06, "loss": 0.0277, "step": 92860 }, { "epoch": 0.7514362003398333, "grad_norm": 0.3820083439350128, "learning_rate": 7.857606576194771e-06, "loss": 0.0363, "step": 92870 }, { "epoch": 0.7515171130350352, "grad_norm": 0.49947965145111084, "learning_rate": 7.857027133963453e-06, "loss": 0.0337, "step": 92880 }, { "epoch": 0.751598025730237, "grad_norm": 0.5762577056884766, "learning_rate": 7.856447634754758e-06, "loss": 0.0508, "step": 92890 }, { "epoch": 0.751678938425439, "grad_norm": 0.41026192903518677, "learning_rate": 7.855868078580247e-06, "loss": 0.0277, "step": 92900 }, { "epoch": 0.7517598511206408, "grad_norm": 0.7421883940696716, "learning_rate": 7.855288465451474e-06, "loss": 0.0307, "step": 92910 }, { "epoch": 0.7518407638158427, "grad_norm": 0.7531557679176331, "learning_rate": 7.854708795380002e-06, "loss": 0.0274, "step": 92920 }, { "epoch": 0.7519216765110446, "grad_norm": 0.5661449432373047, "learning_rate": 7.854129068377388e-06, "loss": 0.0369, "step": 92930 }, { "epoch": 0.7520025892062464, "grad_norm": 0.3510076403617859, "learning_rate": 7.853549284455195e-06, "loss": 0.0412, "step": 92940 }, { "epoch": 0.7520835019014483, "grad_norm": 0.3135124146938324, "learning_rate": 7.852969443624988e-06, "loss": 0.0281, "step": 92950 }, { "epoch": 0.7521644145966502, "grad_norm": 0.3305915892124176, "learning_rate": 7.852389545898328e-06, "loss": 0.0358, "step": 92960 }, { "epoch": 0.7522453272918521, "grad_norm": 0.5511149168014526, "learning_rate": 7.851809591286778e-06, "loss": 0.0327, "step": 92970 }, { "epoch": 0.752326239987054, "grad_norm": 0.8291013240814209, "learning_rate": 7.851229579801907e-06, "loss": 0.0329, "step": 92980 }, { "epoch": 0.7524071526822559, "grad_norm": 0.10395484417676926, "learning_rate": 7.850649511455281e-06, "loss": 0.0371, "step": 92990 }, { "epoch": 0.7524880653774577, "grad_norm": 0.3749809265136719, "learning_rate": 7.85006938625847e-06, "loss": 0.0395, "step": 93000 }, { "epoch": 0.7525689780726597, "grad_norm": 0.6036255955696106, "learning_rate": 7.84948920422304e-06, "loss": 0.0487, "step": 93010 }, { "epoch": 0.7526498907678615, "grad_norm": 0.2441556751728058, "learning_rate": 7.848908965360564e-06, "loss": 0.0294, "step": 93020 }, { "epoch": 0.7527308034630633, "grad_norm": 0.3419229984283447, "learning_rate": 7.848328669682614e-06, "loss": 0.0213, "step": 93030 }, { "epoch": 0.7528117161582653, "grad_norm": 0.3196330666542053, "learning_rate": 7.847748317200761e-06, "loss": 0.0296, "step": 93040 }, { "epoch": 0.7528926288534671, "grad_norm": 0.22572395205497742, "learning_rate": 7.84716790792658e-06, "loss": 0.0339, "step": 93050 }, { "epoch": 0.752973541548669, "grad_norm": 0.42215606570243835, "learning_rate": 7.846587441871645e-06, "loss": 0.0345, "step": 93060 }, { "epoch": 0.7530544542438709, "grad_norm": 0.5275368690490723, "learning_rate": 7.846006919047533e-06, "loss": 0.026, "step": 93070 }, { "epoch": 0.7531353669390728, "grad_norm": 0.2729471027851105, "learning_rate": 7.845426339465822e-06, "loss": 0.0354, "step": 93080 }, { "epoch": 0.7532162796342746, "grad_norm": 0.8134744167327881, "learning_rate": 7.844845703138089e-06, "loss": 0.0325, "step": 93090 }, { "epoch": 0.7532971923294765, "grad_norm": 0.31110939383506775, "learning_rate": 7.844265010075914e-06, "loss": 0.0357, "step": 93100 }, { "epoch": 0.7533781050246784, "grad_norm": 0.6485303044319153, "learning_rate": 7.843684260290879e-06, "loss": 0.0245, "step": 93110 }, { "epoch": 0.7534590177198802, "grad_norm": 0.20051823556423187, "learning_rate": 7.843103453794564e-06, "loss": 0.0338, "step": 93120 }, { "epoch": 0.7535399304150822, "grad_norm": 0.14172402024269104, "learning_rate": 7.84252259059855e-06, "loss": 0.0242, "step": 93130 }, { "epoch": 0.753620843110284, "grad_norm": 0.6936216950416565, "learning_rate": 7.841941670714427e-06, "loss": 0.0251, "step": 93140 }, { "epoch": 0.7537017558054859, "grad_norm": 0.4376938045024872, "learning_rate": 7.841360694153776e-06, "loss": 0.0427, "step": 93150 }, { "epoch": 0.7537826685006878, "grad_norm": 0.14209142327308655, "learning_rate": 7.840779660928186e-06, "loss": 0.023, "step": 93160 }, { "epoch": 0.7538635811958896, "grad_norm": 0.5083085298538208, "learning_rate": 7.840198571049241e-06, "loss": 0.0232, "step": 93170 }, { "epoch": 0.7539444938910915, "grad_norm": 0.47626423835754395, "learning_rate": 7.839617424528532e-06, "loss": 0.0238, "step": 93180 }, { "epoch": 0.7540254065862934, "grad_norm": 1.1953569650650024, "learning_rate": 7.839036221377649e-06, "loss": 0.0284, "step": 93190 }, { "epoch": 0.7541063192814953, "grad_norm": 0.6286625862121582, "learning_rate": 7.838454961608179e-06, "loss": 0.027, "step": 93200 }, { "epoch": 0.7541872319766971, "grad_norm": 0.20846626162528992, "learning_rate": 7.83787364523172e-06, "loss": 0.0293, "step": 93210 }, { "epoch": 0.7542681446718991, "grad_norm": 0.3663382828235626, "learning_rate": 7.837292272259862e-06, "loss": 0.0302, "step": 93220 }, { "epoch": 0.7543490573671009, "grad_norm": 0.39895352721214294, "learning_rate": 7.836710842704199e-06, "loss": 0.0208, "step": 93230 }, { "epoch": 0.7544299700623027, "grad_norm": 0.2635306119918823, "learning_rate": 7.836129356576326e-06, "loss": 0.0386, "step": 93240 }, { "epoch": 0.7545108827575047, "grad_norm": 0.32442212104797363, "learning_rate": 7.83554781388784e-06, "loss": 0.0353, "step": 93250 }, { "epoch": 0.7545917954527065, "grad_norm": 0.3744436204433441, "learning_rate": 7.83496621465034e-06, "loss": 0.0205, "step": 93260 }, { "epoch": 0.7546727081479084, "grad_norm": 0.4891349673271179, "learning_rate": 7.834384558875424e-06, "loss": 0.0357, "step": 93270 }, { "epoch": 0.7547536208431103, "grad_norm": 0.2963310480117798, "learning_rate": 7.83380284657469e-06, "loss": 0.0156, "step": 93280 }, { "epoch": 0.7548345335383122, "grad_norm": 0.8654397130012512, "learning_rate": 7.833221077759743e-06, "loss": 0.0416, "step": 93290 }, { "epoch": 0.754915446233514, "grad_norm": 0.6248089075088501, "learning_rate": 7.832639252442182e-06, "loss": 0.0377, "step": 93300 }, { "epoch": 0.7549963589287159, "grad_norm": 0.18553318083286285, "learning_rate": 7.83205737063361e-06, "loss": 0.022, "step": 93310 }, { "epoch": 0.7550772716239178, "grad_norm": 0.500454843044281, "learning_rate": 7.831475432345633e-06, "loss": 0.0442, "step": 93320 }, { "epoch": 0.7551581843191196, "grad_norm": 0.6819913983345032, "learning_rate": 7.830893437589856e-06, "loss": 0.0237, "step": 93330 }, { "epoch": 0.7552390970143216, "grad_norm": 0.4694121479988098, "learning_rate": 7.830311386377886e-06, "loss": 0.0392, "step": 93340 }, { "epoch": 0.7553200097095234, "grad_norm": 0.32334664463996887, "learning_rate": 7.82972927872133e-06, "loss": 0.0206, "step": 93350 }, { "epoch": 0.7554009224047253, "grad_norm": 0.4478219449520111, "learning_rate": 7.829147114631797e-06, "loss": 0.0251, "step": 93360 }, { "epoch": 0.7554818350999272, "grad_norm": 0.20429600775241852, "learning_rate": 7.828564894120898e-06, "loss": 0.0371, "step": 93370 }, { "epoch": 0.7555627477951291, "grad_norm": 0.2410871535539627, "learning_rate": 7.827982617200245e-06, "loss": 0.0237, "step": 93380 }, { "epoch": 0.7556436604903309, "grad_norm": 0.4540003538131714, "learning_rate": 7.827400283881446e-06, "loss": 0.0263, "step": 93390 }, { "epoch": 0.7557245731855328, "grad_norm": 0.6652147173881531, "learning_rate": 7.826817894176119e-06, "loss": 0.0238, "step": 93400 }, { "epoch": 0.7558054858807347, "grad_norm": 0.18814001977443695, "learning_rate": 7.826235448095876e-06, "loss": 0.0304, "step": 93410 }, { "epoch": 0.7558863985759365, "grad_norm": 0.8324776291847229, "learning_rate": 7.825652945652333e-06, "loss": 0.0305, "step": 93420 }, { "epoch": 0.7559673112711385, "grad_norm": 0.6552678942680359, "learning_rate": 7.82507038685711e-06, "loss": 0.03, "step": 93430 }, { "epoch": 0.7560482239663403, "grad_norm": 0.7605533599853516, "learning_rate": 7.82448777172182e-06, "loss": 0.0378, "step": 93440 }, { "epoch": 0.7561291366615422, "grad_norm": 0.5990238189697266, "learning_rate": 7.823905100258084e-06, "loss": 0.0322, "step": 93450 }, { "epoch": 0.7562100493567441, "grad_norm": 0.015505148097872734, "learning_rate": 7.823322372477523e-06, "loss": 0.0174, "step": 93460 }, { "epoch": 0.7562909620519459, "grad_norm": 0.611411988735199, "learning_rate": 7.822739588391758e-06, "loss": 0.0333, "step": 93470 }, { "epoch": 0.7563718747471478, "grad_norm": 0.37085020542144775, "learning_rate": 7.822156748012408e-06, "loss": 0.0245, "step": 93480 }, { "epoch": 0.7564527874423497, "grad_norm": 0.2583100199699402, "learning_rate": 7.821573851351104e-06, "loss": 0.0219, "step": 93490 }, { "epoch": 0.7565337001375516, "grad_norm": 0.37865662574768066, "learning_rate": 7.820990898419463e-06, "loss": 0.0315, "step": 93500 }, { "epoch": 0.7566146128327534, "grad_norm": 0.3676931858062744, "learning_rate": 7.820407889229113e-06, "loss": 0.0217, "step": 93510 }, { "epoch": 0.7566955255279554, "grad_norm": 0.4003971815109253, "learning_rate": 7.819824823791684e-06, "loss": 0.0356, "step": 93520 }, { "epoch": 0.7567764382231572, "grad_norm": 0.3980981111526489, "learning_rate": 7.819241702118802e-06, "loss": 0.0314, "step": 93530 }, { "epoch": 0.756857350918359, "grad_norm": 0.44106224179267883, "learning_rate": 7.818658524222096e-06, "loss": 0.0265, "step": 93540 }, { "epoch": 0.756938263613561, "grad_norm": 0.09788820147514343, "learning_rate": 7.818075290113195e-06, "loss": 0.0435, "step": 93550 }, { "epoch": 0.7570191763087628, "grad_norm": 0.36408519744873047, "learning_rate": 7.817491999803731e-06, "loss": 0.0263, "step": 93560 }, { "epoch": 0.7571000890039647, "grad_norm": 0.23000551760196686, "learning_rate": 7.816908653305338e-06, "loss": 0.0327, "step": 93570 }, { "epoch": 0.7571810016991666, "grad_norm": 0.29235970973968506, "learning_rate": 7.816325250629648e-06, "loss": 0.0289, "step": 93580 }, { "epoch": 0.7572619143943685, "grad_norm": 0.26860925555229187, "learning_rate": 7.815741791788299e-06, "loss": 0.0255, "step": 93590 }, { "epoch": 0.7573428270895703, "grad_norm": 0.4004316031932831, "learning_rate": 7.815158276792923e-06, "loss": 0.0483, "step": 93600 }, { "epoch": 0.7574237397847722, "grad_norm": 0.3414144814014435, "learning_rate": 7.814574705655156e-06, "loss": 0.022, "step": 93610 }, { "epoch": 0.7575046524799741, "grad_norm": 0.399799108505249, "learning_rate": 7.81399107838664e-06, "loss": 0.0262, "step": 93620 }, { "epoch": 0.757585565175176, "grad_norm": 0.32803869247436523, "learning_rate": 7.813407394999013e-06, "loss": 0.0393, "step": 93630 }, { "epoch": 0.7576664778703779, "grad_norm": 0.8049758672714233, "learning_rate": 7.812823655503914e-06, "loss": 0.0619, "step": 93640 }, { "epoch": 0.7577473905655797, "grad_norm": 0.34799429774284363, "learning_rate": 7.812239859912986e-06, "loss": 0.0254, "step": 93650 }, { "epoch": 0.7578283032607817, "grad_norm": 0.22655817866325378, "learning_rate": 7.81165600823787e-06, "loss": 0.029, "step": 93660 }, { "epoch": 0.7579092159559835, "grad_norm": 0.5873779654502869, "learning_rate": 7.81107210049021e-06, "loss": 0.0278, "step": 93670 }, { "epoch": 0.7579901286511854, "grad_norm": 0.7627500891685486, "learning_rate": 7.810488136681654e-06, "loss": 0.0352, "step": 93680 }, { "epoch": 0.7580710413463873, "grad_norm": 0.6726066470146179, "learning_rate": 7.809904116823843e-06, "loss": 0.0504, "step": 93690 }, { "epoch": 0.7581519540415891, "grad_norm": 0.431361585855484, "learning_rate": 7.809320040928428e-06, "loss": 0.0242, "step": 93700 }, { "epoch": 0.758232866736791, "grad_norm": 0.8015344738960266, "learning_rate": 7.808735909007054e-06, "loss": 0.0329, "step": 93710 }, { "epoch": 0.7583137794319929, "grad_norm": 0.3867458999156952, "learning_rate": 7.808151721071372e-06, "loss": 0.0215, "step": 93720 }, { "epoch": 0.7583946921271948, "grad_norm": 0.49516940116882324, "learning_rate": 7.807567477133035e-06, "loss": 0.0266, "step": 93730 }, { "epoch": 0.7584756048223966, "grad_norm": 0.5414018630981445, "learning_rate": 7.806983177203689e-06, "loss": 0.0285, "step": 93740 }, { "epoch": 0.7585565175175986, "grad_norm": 0.569398045539856, "learning_rate": 7.806398821294989e-06, "loss": 0.0276, "step": 93750 }, { "epoch": 0.7586374302128004, "grad_norm": 0.523925244808197, "learning_rate": 7.80581440941859e-06, "loss": 0.0202, "step": 93760 }, { "epoch": 0.7587183429080022, "grad_norm": 0.08612771332263947, "learning_rate": 7.805229941586146e-06, "loss": 0.0361, "step": 93770 }, { "epoch": 0.7587992556032042, "grad_norm": 0.3217978775501251, "learning_rate": 7.804645417809313e-06, "loss": 0.0351, "step": 93780 }, { "epoch": 0.758880168298406, "grad_norm": 0.45819252729415894, "learning_rate": 7.804060838099747e-06, "loss": 0.0338, "step": 93790 }, { "epoch": 0.7589610809936079, "grad_norm": 0.6011613011360168, "learning_rate": 7.80347620246911e-06, "loss": 0.0217, "step": 93800 }, { "epoch": 0.7590419936888098, "grad_norm": 0.29159897565841675, "learning_rate": 7.802891510929056e-06, "loss": 0.0442, "step": 93810 }, { "epoch": 0.7591229063840117, "grad_norm": 0.4644641876220703, "learning_rate": 7.802306763491248e-06, "loss": 0.031, "step": 93820 }, { "epoch": 0.7592038190792135, "grad_norm": 0.3206532299518585, "learning_rate": 7.801721960167348e-06, "loss": 0.0226, "step": 93830 }, { "epoch": 0.7592847317744154, "grad_norm": 0.08426452428102493, "learning_rate": 7.801137100969018e-06, "loss": 0.0309, "step": 93840 }, { "epoch": 0.7593656444696173, "grad_norm": 0.24718745052814484, "learning_rate": 7.800552185907921e-06, "loss": 0.0333, "step": 93850 }, { "epoch": 0.7594465571648191, "grad_norm": 0.31950807571411133, "learning_rate": 7.799967214995725e-06, "loss": 0.0457, "step": 93860 }, { "epoch": 0.7595274698600211, "grad_norm": 0.33934688568115234, "learning_rate": 7.799382188244093e-06, "loss": 0.0245, "step": 93870 }, { "epoch": 0.7596083825552229, "grad_norm": 0.48636749386787415, "learning_rate": 7.798797105664693e-06, "loss": 0.0279, "step": 93880 }, { "epoch": 0.7596892952504248, "grad_norm": 0.511917769908905, "learning_rate": 7.798211967269194e-06, "loss": 0.0289, "step": 93890 }, { "epoch": 0.7597702079456267, "grad_norm": 0.5507132411003113, "learning_rate": 7.797626773069264e-06, "loss": 0.0317, "step": 93900 }, { "epoch": 0.7598511206408285, "grad_norm": 0.7079277634620667, "learning_rate": 7.797041523076573e-06, "loss": 0.0429, "step": 93910 }, { "epoch": 0.7599320333360304, "grad_norm": 0.2875235676765442, "learning_rate": 7.796456217302794e-06, "loss": 0.0283, "step": 93920 }, { "epoch": 0.7600129460312323, "grad_norm": 0.3079979121685028, "learning_rate": 7.7958708557596e-06, "loss": 0.0266, "step": 93930 }, { "epoch": 0.7600938587264342, "grad_norm": 0.5158406496047974, "learning_rate": 7.795285438458664e-06, "loss": 0.0184, "step": 93940 }, { "epoch": 0.760174771421636, "grad_norm": 0.4779556691646576, "learning_rate": 7.794699965411662e-06, "loss": 0.0276, "step": 93950 }, { "epoch": 0.760255684116838, "grad_norm": 0.453632116317749, "learning_rate": 7.794114436630267e-06, "loss": 0.0327, "step": 93960 }, { "epoch": 0.7603365968120398, "grad_norm": 0.2346058487892151, "learning_rate": 7.79352885212616e-06, "loss": 0.0222, "step": 93970 }, { "epoch": 0.7604175095072417, "grad_norm": 0.23352545499801636, "learning_rate": 7.792943211911017e-06, "loss": 0.0237, "step": 93980 }, { "epoch": 0.7604984222024436, "grad_norm": 0.16982153058052063, "learning_rate": 7.792357515996518e-06, "loss": 0.022, "step": 93990 }, { "epoch": 0.7605793348976454, "grad_norm": 0.36448970437049866, "learning_rate": 7.791771764394342e-06, "loss": 0.0373, "step": 94000 }, { "epoch": 0.7606602475928473, "grad_norm": 0.23006543517112732, "learning_rate": 7.791185957116171e-06, "loss": 0.0243, "step": 94010 }, { "epoch": 0.7607411602880492, "grad_norm": 0.15062861144542694, "learning_rate": 7.790600094173692e-06, "loss": 0.027, "step": 94020 }, { "epoch": 0.7608220729832511, "grad_norm": 0.39384663105010986, "learning_rate": 7.790014175578585e-06, "loss": 0.0488, "step": 94030 }, { "epoch": 0.7609029856784529, "grad_norm": 0.4519962668418884, "learning_rate": 7.789428201342532e-06, "loss": 0.0258, "step": 94040 }, { "epoch": 0.7609838983736549, "grad_norm": 0.6326972246170044, "learning_rate": 7.788842171477225e-06, "loss": 0.0335, "step": 94050 }, { "epoch": 0.7610648110688567, "grad_norm": 0.4297942519187927, "learning_rate": 7.788256085994347e-06, "loss": 0.0223, "step": 94060 }, { "epoch": 0.7611457237640585, "grad_norm": 0.6114616990089417, "learning_rate": 7.787669944905587e-06, "loss": 0.0271, "step": 94070 }, { "epoch": 0.7612266364592605, "grad_norm": 0.22641314566135406, "learning_rate": 7.787083748222636e-06, "loss": 0.0402, "step": 94080 }, { "epoch": 0.7613075491544623, "grad_norm": 0.38771089911460876, "learning_rate": 7.786497495957183e-06, "loss": 0.0217, "step": 94090 }, { "epoch": 0.7613884618496642, "grad_norm": 0.3361910283565521, "learning_rate": 7.785911188120921e-06, "loss": 0.023, "step": 94100 }, { "epoch": 0.7614693745448661, "grad_norm": 0.4374314546585083, "learning_rate": 7.785324824725542e-06, "loss": 0.0367, "step": 94110 }, { "epoch": 0.761550287240068, "grad_norm": 0.3790811002254486, "learning_rate": 7.784738405782739e-06, "loss": 0.0279, "step": 94120 }, { "epoch": 0.7616311999352698, "grad_norm": 0.2896954119205475, "learning_rate": 7.784151931304206e-06, "loss": 0.0232, "step": 94130 }, { "epoch": 0.7617121126304717, "grad_norm": 0.4546407163143158, "learning_rate": 7.783565401301642e-06, "loss": 0.0427, "step": 94140 }, { "epoch": 0.7617930253256736, "grad_norm": 0.5752601623535156, "learning_rate": 7.782978815786742e-06, "loss": 0.0575, "step": 94150 }, { "epoch": 0.7618739380208754, "grad_norm": 0.20880582928657532, "learning_rate": 7.782392174771203e-06, "loss": 0.0132, "step": 94160 }, { "epoch": 0.7619548507160774, "grad_norm": 0.14918500185012817, "learning_rate": 7.781805478266728e-06, "loss": 0.0237, "step": 94170 }, { "epoch": 0.7620357634112792, "grad_norm": 0.45520392060279846, "learning_rate": 7.781218726285014e-06, "loss": 0.0356, "step": 94180 }, { "epoch": 0.7621166761064812, "grad_norm": 0.24958164989948273, "learning_rate": 7.780631918837766e-06, "loss": 0.033, "step": 94190 }, { "epoch": 0.762197588801683, "grad_norm": 0.3161921799182892, "learning_rate": 7.780045055936682e-06, "loss": 0.0314, "step": 94200 }, { "epoch": 0.7622785014968848, "grad_norm": 0.075401671230793, "learning_rate": 7.77945813759347e-06, "loss": 0.0177, "step": 94210 }, { "epoch": 0.7623594141920867, "grad_norm": 0.21845626831054688, "learning_rate": 7.778871163819833e-06, "loss": 0.0299, "step": 94220 }, { "epoch": 0.7624403268872886, "grad_norm": 0.432256817817688, "learning_rate": 7.778284134627477e-06, "loss": 0.0246, "step": 94230 }, { "epoch": 0.7625212395824905, "grad_norm": 0.2976030707359314, "learning_rate": 7.777697050028107e-06, "loss": 0.0376, "step": 94240 }, { "epoch": 0.7626021522776923, "grad_norm": 0.48105549812316895, "learning_rate": 7.777109910033439e-06, "loss": 0.0223, "step": 94250 }, { "epoch": 0.7626830649728943, "grad_norm": 0.0019864484202116728, "learning_rate": 7.77652271465517e-06, "loss": 0.0315, "step": 94260 }, { "epoch": 0.7627639776680961, "grad_norm": 0.45144370198249817, "learning_rate": 7.77593546390502e-06, "loss": 0.0254, "step": 94270 }, { "epoch": 0.7628448903632981, "grad_norm": 1.2124601602554321, "learning_rate": 7.775348157794697e-06, "loss": 0.0295, "step": 94280 }, { "epoch": 0.7629258030584999, "grad_norm": 0.595622181892395, "learning_rate": 7.774760796335914e-06, "loss": 0.0276, "step": 94290 }, { "epoch": 0.7630067157537017, "grad_norm": 0.16079340875148773, "learning_rate": 7.774173379540387e-06, "loss": 0.0327, "step": 94300 }, { "epoch": 0.7630876284489037, "grad_norm": 0.45189499855041504, "learning_rate": 7.773585907419825e-06, "loss": 0.034, "step": 94310 }, { "epoch": 0.7631685411441055, "grad_norm": 0.5559259057044983, "learning_rate": 7.772998379985949e-06, "loss": 0.0319, "step": 94320 }, { "epoch": 0.7632494538393074, "grad_norm": 0.6791030764579773, "learning_rate": 7.772410797250474e-06, "loss": 0.0338, "step": 94330 }, { "epoch": 0.7633303665345093, "grad_norm": 0.25194406509399414, "learning_rate": 7.771823159225118e-06, "loss": 0.053, "step": 94340 }, { "epoch": 0.7634112792297112, "grad_norm": 0.37118878960609436, "learning_rate": 7.771235465921601e-06, "loss": 0.0266, "step": 94350 }, { "epoch": 0.763492191924913, "grad_norm": 0.3089371621608734, "learning_rate": 7.770647717351644e-06, "loss": 0.0231, "step": 94360 }, { "epoch": 0.7635731046201149, "grad_norm": 0.42702969908714294, "learning_rate": 7.770059913526966e-06, "loss": 0.0302, "step": 94370 }, { "epoch": 0.7636540173153168, "grad_norm": 0.5420588254928589, "learning_rate": 7.769472054459293e-06, "loss": 0.0438, "step": 94380 }, { "epoch": 0.7637349300105186, "grad_norm": 0.4982607364654541, "learning_rate": 7.768884140160345e-06, "loss": 0.0207, "step": 94390 }, { "epoch": 0.7638158427057206, "grad_norm": 0.447094589471817, "learning_rate": 7.768296170641847e-06, "loss": 0.028, "step": 94400 }, { "epoch": 0.7638967554009224, "grad_norm": 0.1963626891374588, "learning_rate": 7.767708145915527e-06, "loss": 0.0233, "step": 94410 }, { "epoch": 0.7639776680961243, "grad_norm": 0.32571178674697876, "learning_rate": 7.767120065993111e-06, "loss": 0.0268, "step": 94420 }, { "epoch": 0.7640585807913262, "grad_norm": 0.12595748901367188, "learning_rate": 7.766531930886328e-06, "loss": 0.027, "step": 94430 }, { "epoch": 0.764139493486528, "grad_norm": 0.44981831312179565, "learning_rate": 7.765943740606905e-06, "loss": 0.0467, "step": 94440 }, { "epoch": 0.7642204061817299, "grad_norm": 0.29773059487342834, "learning_rate": 7.765355495166573e-06, "loss": 0.0276, "step": 94450 }, { "epoch": 0.7643013188769318, "grad_norm": 0.17032407224178314, "learning_rate": 7.764767194577063e-06, "loss": 0.0243, "step": 94460 }, { "epoch": 0.7643822315721337, "grad_norm": 0.5947263836860657, "learning_rate": 7.76417883885011e-06, "loss": 0.0304, "step": 94470 }, { "epoch": 0.7644631442673355, "grad_norm": 0.11103862524032593, "learning_rate": 7.763590427997445e-06, "loss": 0.0276, "step": 94480 }, { "epoch": 0.7645440569625375, "grad_norm": 0.2923130393028259, "learning_rate": 7.763001962030802e-06, "loss": 0.0192, "step": 94490 }, { "epoch": 0.7646249696577393, "grad_norm": 0.2405153512954712, "learning_rate": 7.762413440961918e-06, "loss": 0.0468, "step": 94500 }, { "epoch": 0.7647058823529411, "grad_norm": 0.19813284277915955, "learning_rate": 7.76182486480253e-06, "loss": 0.0264, "step": 94510 }, { "epoch": 0.7647867950481431, "grad_norm": 0.257440447807312, "learning_rate": 7.761236233564375e-06, "loss": 0.0258, "step": 94520 }, { "epoch": 0.7648677077433449, "grad_norm": 0.21191827952861786, "learning_rate": 7.760647547259192e-06, "loss": 0.0409, "step": 94530 }, { "epoch": 0.7649486204385468, "grad_norm": 0.1386551707983017, "learning_rate": 7.760058805898724e-06, "loss": 0.0317, "step": 94540 }, { "epoch": 0.7650295331337487, "grad_norm": 0.35563457012176514, "learning_rate": 7.759470009494708e-06, "loss": 0.0315, "step": 94550 }, { "epoch": 0.7651104458289506, "grad_norm": 0.2776668071746826, "learning_rate": 7.75888115805889e-06, "loss": 0.0294, "step": 94560 }, { "epoch": 0.7651913585241524, "grad_norm": 0.34016066789627075, "learning_rate": 7.75829225160301e-06, "loss": 0.0316, "step": 94570 }, { "epoch": 0.7652722712193544, "grad_norm": 0.3788491189479828, "learning_rate": 7.757703290138814e-06, "loss": 0.0287, "step": 94580 }, { "epoch": 0.7653531839145562, "grad_norm": 0.46147093176841736, "learning_rate": 7.757114273678048e-06, "loss": 0.034, "step": 94590 }, { "epoch": 0.765434096609758, "grad_norm": 0.4055263102054596, "learning_rate": 7.75652520223246e-06, "loss": 0.0362, "step": 94600 }, { "epoch": 0.76551500930496, "grad_norm": 0.33078402280807495, "learning_rate": 7.755936075813792e-06, "loss": 0.0293, "step": 94610 }, { "epoch": 0.7655959220001618, "grad_norm": 0.1874849945306778, "learning_rate": 7.755346894433801e-06, "loss": 0.0171, "step": 94620 }, { "epoch": 0.7656768346953637, "grad_norm": 0.6344069838523865, "learning_rate": 7.75475765810423e-06, "loss": 0.0346, "step": 94630 }, { "epoch": 0.7657577473905656, "grad_norm": 0.5763311982154846, "learning_rate": 7.754168366836836e-06, "loss": 0.0358, "step": 94640 }, { "epoch": 0.7658386600857675, "grad_norm": 0.5396562218666077, "learning_rate": 7.753579020643368e-06, "loss": 0.0399, "step": 94650 }, { "epoch": 0.7659195727809693, "grad_norm": 0.48546624183654785, "learning_rate": 7.752989619535576e-06, "loss": 0.0265, "step": 94660 }, { "epoch": 0.7660004854761712, "grad_norm": 0.25016099214553833, "learning_rate": 7.75240016352522e-06, "loss": 0.0289, "step": 94670 }, { "epoch": 0.7660813981713731, "grad_norm": 0.21520191431045532, "learning_rate": 7.751810652624054e-06, "loss": 0.0544, "step": 94680 }, { "epoch": 0.7661623108665749, "grad_norm": 0.18336741626262665, "learning_rate": 7.751221086843832e-06, "loss": 0.0339, "step": 94690 }, { "epoch": 0.7662432235617769, "grad_norm": 0.5997385382652283, "learning_rate": 7.750631466196316e-06, "loss": 0.0324, "step": 94700 }, { "epoch": 0.7663241362569787, "grad_norm": 0.6165066361427307, "learning_rate": 7.750041790693261e-06, "loss": 0.0368, "step": 94710 }, { "epoch": 0.7664050489521806, "grad_norm": 0.7078263759613037, "learning_rate": 7.749452060346427e-06, "loss": 0.0311, "step": 94720 }, { "epoch": 0.7664859616473825, "grad_norm": 0.21582455933094025, "learning_rate": 7.74886227516758e-06, "loss": 0.0288, "step": 94730 }, { "epoch": 0.7665668743425843, "grad_norm": 0.34444549679756165, "learning_rate": 7.748272435168474e-06, "loss": 0.0238, "step": 94740 }, { "epoch": 0.7666477870377862, "grad_norm": 0.3420242369174957, "learning_rate": 7.747682540360877e-06, "loss": 0.0339, "step": 94750 }, { "epoch": 0.7667286997329881, "grad_norm": 0.6200224161148071, "learning_rate": 7.747092590756552e-06, "loss": 0.0265, "step": 94760 }, { "epoch": 0.76680961242819, "grad_norm": 0.4642274081707001, "learning_rate": 7.746502586367265e-06, "loss": 0.0282, "step": 94770 }, { "epoch": 0.7668905251233918, "grad_norm": 0.5495225787162781, "learning_rate": 7.745912527204783e-06, "loss": 0.0235, "step": 94780 }, { "epoch": 0.7669714378185938, "grad_norm": 0.3917505741119385, "learning_rate": 7.745322413280872e-06, "loss": 0.0235, "step": 94790 }, { "epoch": 0.7670523505137956, "grad_norm": 0.5942815542221069, "learning_rate": 7.7447322446073e-06, "loss": 0.0225, "step": 94800 }, { "epoch": 0.7671332632089974, "grad_norm": 0.184475377202034, "learning_rate": 7.74414202119584e-06, "loss": 0.0191, "step": 94810 }, { "epoch": 0.7672141759041994, "grad_norm": 0.17994551360607147, "learning_rate": 7.743551743058258e-06, "loss": 0.032, "step": 94820 }, { "epoch": 0.7672950885994012, "grad_norm": 0.6173608899116516, "learning_rate": 7.74296141020633e-06, "loss": 0.032, "step": 94830 }, { "epoch": 0.7673760012946031, "grad_norm": 0.3857788145542145, "learning_rate": 7.742371022651827e-06, "loss": 0.028, "step": 94840 }, { "epoch": 0.767456913989805, "grad_norm": 0.787086546421051, "learning_rate": 7.741780580406525e-06, "loss": 0.0279, "step": 94850 }, { "epoch": 0.7675378266850069, "grad_norm": 0.5309308171272278, "learning_rate": 7.741190083482194e-06, "loss": 0.0228, "step": 94860 }, { "epoch": 0.7676187393802087, "grad_norm": 0.29127174615859985, "learning_rate": 7.740599531890616e-06, "loss": 0.0208, "step": 94870 }, { "epoch": 0.7676996520754106, "grad_norm": 0.2243785709142685, "learning_rate": 7.740008925643565e-06, "loss": 0.0239, "step": 94880 }, { "epoch": 0.7677805647706125, "grad_norm": 0.29342690110206604, "learning_rate": 7.739418264752821e-06, "loss": 0.0233, "step": 94890 }, { "epoch": 0.7678614774658143, "grad_norm": 0.3720916509628296, "learning_rate": 7.738827549230161e-06, "loss": 0.024, "step": 94900 }, { "epoch": 0.7679423901610163, "grad_norm": 0.4621100127696991, "learning_rate": 7.738236779087371e-06, "loss": 0.0394, "step": 94910 }, { "epoch": 0.7680233028562181, "grad_norm": 0.2625690698623657, "learning_rate": 7.737645954336226e-06, "loss": 0.0286, "step": 94920 }, { "epoch": 0.76810421555142, "grad_norm": 0.4066927134990692, "learning_rate": 7.737055074988512e-06, "loss": 0.0302, "step": 94930 }, { "epoch": 0.7681851282466219, "grad_norm": 0.49005523324012756, "learning_rate": 7.736464141056017e-06, "loss": 0.0371, "step": 94940 }, { "epoch": 0.7682660409418238, "grad_norm": 0.29704999923706055, "learning_rate": 7.735873152550516e-06, "loss": 0.0229, "step": 94950 }, { "epoch": 0.7683469536370257, "grad_norm": 1.3284051418304443, "learning_rate": 7.735282109483804e-06, "loss": 0.0332, "step": 94960 }, { "epoch": 0.7684278663322275, "grad_norm": 0.43995535373687744, "learning_rate": 7.734691011867666e-06, "loss": 0.0365, "step": 94970 }, { "epoch": 0.7685087790274294, "grad_norm": 0.3626496493816376, "learning_rate": 7.734099859713886e-06, "loss": 0.0401, "step": 94980 }, { "epoch": 0.7685896917226313, "grad_norm": 0.4586767554283142, "learning_rate": 7.733508653034257e-06, "loss": 0.0323, "step": 94990 }, { "epoch": 0.7686706044178332, "grad_norm": 0.35855433344841003, "learning_rate": 7.732917391840569e-06, "loss": 0.0209, "step": 95000 }, { "epoch": 0.768751517113035, "grad_norm": 0.32136642932891846, "learning_rate": 7.732326076144611e-06, "loss": 0.0259, "step": 95010 }, { "epoch": 0.768832429808237, "grad_norm": 0.40977180004119873, "learning_rate": 7.73173470595818e-06, "loss": 0.0318, "step": 95020 }, { "epoch": 0.7689133425034388, "grad_norm": 0.1922648549079895, "learning_rate": 7.731143281293068e-06, "loss": 0.0421, "step": 95030 }, { "epoch": 0.7689942551986406, "grad_norm": 0.5075197815895081, "learning_rate": 7.730551802161066e-06, "loss": 0.0223, "step": 95040 }, { "epoch": 0.7690751678938426, "grad_norm": 0.24998469650745392, "learning_rate": 7.729960268573976e-06, "loss": 0.0312, "step": 95050 }, { "epoch": 0.7691560805890444, "grad_norm": 0.4938136041164398, "learning_rate": 7.72936868054359e-06, "loss": 0.0219, "step": 95060 }, { "epoch": 0.7692369932842463, "grad_norm": 0.6553148627281189, "learning_rate": 7.728777038081707e-06, "loss": 0.037, "step": 95070 }, { "epoch": 0.7693179059794482, "grad_norm": 0.9741763472557068, "learning_rate": 7.728185341200127e-06, "loss": 0.0365, "step": 95080 }, { "epoch": 0.7693988186746501, "grad_norm": 0.36064139008522034, "learning_rate": 7.727593589910651e-06, "loss": 0.0228, "step": 95090 }, { "epoch": 0.7694797313698519, "grad_norm": 0.5610073208808899, "learning_rate": 7.727001784225077e-06, "loss": 0.0276, "step": 95100 }, { "epoch": 0.7695606440650538, "grad_norm": 0.17898090183734894, "learning_rate": 7.726409924155211e-06, "loss": 0.0329, "step": 95110 }, { "epoch": 0.7696415567602557, "grad_norm": 0.21031956374645233, "learning_rate": 7.725818009712853e-06, "loss": 0.0232, "step": 95120 }, { "epoch": 0.7697224694554575, "grad_norm": 0.7962043881416321, "learning_rate": 7.725226040909812e-06, "loss": 0.0434, "step": 95130 }, { "epoch": 0.7698033821506595, "grad_norm": 0.34886687994003296, "learning_rate": 7.72463401775789e-06, "loss": 0.0141, "step": 95140 }, { "epoch": 0.7698842948458613, "grad_norm": 0.18722642958164215, "learning_rate": 7.724041940268892e-06, "loss": 0.0315, "step": 95150 }, { "epoch": 0.7699652075410632, "grad_norm": 0.39466536045074463, "learning_rate": 7.72344980845463e-06, "loss": 0.0368, "step": 95160 }, { "epoch": 0.7700461202362651, "grad_norm": 0.28913235664367676, "learning_rate": 7.722857622326912e-06, "loss": 0.0227, "step": 95170 }, { "epoch": 0.7701270329314669, "grad_norm": 0.6109815835952759, "learning_rate": 7.722265381897545e-06, "loss": 0.0436, "step": 95180 }, { "epoch": 0.7702079456266688, "grad_norm": 0.2555027902126312, "learning_rate": 7.721673087178346e-06, "loss": 0.0289, "step": 95190 }, { "epoch": 0.7702888583218707, "grad_norm": 0.4675672650337219, "learning_rate": 7.721080738181118e-06, "loss": 0.0271, "step": 95200 }, { "epoch": 0.7703697710170726, "grad_norm": 0.3473266661167145, "learning_rate": 7.720488334917682e-06, "loss": 0.0303, "step": 95210 }, { "epoch": 0.7704506837122744, "grad_norm": 0.22194457054138184, "learning_rate": 7.719895877399848e-06, "loss": 0.0374, "step": 95220 }, { "epoch": 0.7705315964074764, "grad_norm": 0.2467505931854248, "learning_rate": 7.719303365639433e-06, "loss": 0.0176, "step": 95230 }, { "epoch": 0.7706125091026782, "grad_norm": 0.2713378369808197, "learning_rate": 7.718710799648252e-06, "loss": 0.0288, "step": 95240 }, { "epoch": 0.7706934217978801, "grad_norm": 0.2271967977285385, "learning_rate": 7.718118179438125e-06, "loss": 0.035, "step": 95250 }, { "epoch": 0.770774334493082, "grad_norm": 0.5113222002983093, "learning_rate": 7.717525505020869e-06, "loss": 0.0279, "step": 95260 }, { "epoch": 0.7708552471882838, "grad_norm": 0.2792215943336487, "learning_rate": 7.716932776408304e-06, "loss": 0.0297, "step": 95270 }, { "epoch": 0.7709361598834857, "grad_norm": 0.48672086000442505, "learning_rate": 7.716339993612249e-06, "loss": 0.0409, "step": 95280 }, { "epoch": 0.7710170725786876, "grad_norm": 0.5261810421943665, "learning_rate": 7.715747156644529e-06, "loss": 0.0231, "step": 95290 }, { "epoch": 0.7710979852738895, "grad_norm": 0.37874266505241394, "learning_rate": 7.715154265516966e-06, "loss": 0.0244, "step": 95300 }, { "epoch": 0.7711788979690913, "grad_norm": 0.38011977076530457, "learning_rate": 7.71456132024138e-06, "loss": 0.0415, "step": 95310 }, { "epoch": 0.7712598106642933, "grad_norm": 0.5362400412559509, "learning_rate": 7.7139683208296e-06, "loss": 0.0353, "step": 95320 }, { "epoch": 0.7713407233594951, "grad_norm": 0.3166440427303314, "learning_rate": 7.713375267293454e-06, "loss": 0.0373, "step": 95330 }, { "epoch": 0.7714216360546969, "grad_norm": 0.4317716360092163, "learning_rate": 7.712782159644764e-06, "loss": 0.0322, "step": 95340 }, { "epoch": 0.7715025487498989, "grad_norm": 0.46428239345550537, "learning_rate": 7.71218899789536e-06, "loss": 0.0228, "step": 95350 }, { "epoch": 0.7715834614451007, "grad_norm": 0.7508305311203003, "learning_rate": 7.711595782057074e-06, "loss": 0.0331, "step": 95360 }, { "epoch": 0.7716643741403026, "grad_norm": 0.5392919778823853, "learning_rate": 7.711002512141734e-06, "loss": 0.049, "step": 95370 }, { "epoch": 0.7717452868355045, "grad_norm": 0.3863745331764221, "learning_rate": 7.710409188161172e-06, "loss": 0.0265, "step": 95380 }, { "epoch": 0.7718261995307064, "grad_norm": 0.14726005494594574, "learning_rate": 7.709815810127222e-06, "loss": 0.0252, "step": 95390 }, { "epoch": 0.7719071122259082, "grad_norm": 0.3278507590293884, "learning_rate": 7.709222378051715e-06, "loss": 0.0274, "step": 95400 }, { "epoch": 0.7719880249211101, "grad_norm": 0.383906751871109, "learning_rate": 7.708628891946487e-06, "loss": 0.0267, "step": 95410 }, { "epoch": 0.772068937616312, "grad_norm": 0.5647707581520081, "learning_rate": 7.708035351823373e-06, "loss": 0.0337, "step": 95420 }, { "epoch": 0.7721498503115138, "grad_norm": 0.5176582336425781, "learning_rate": 7.707441757694213e-06, "loss": 0.0312, "step": 95430 }, { "epoch": 0.7722307630067158, "grad_norm": 0.6722943782806396, "learning_rate": 7.706848109570842e-06, "loss": 0.0467, "step": 95440 }, { "epoch": 0.7723116757019176, "grad_norm": 0.5394346714019775, "learning_rate": 7.7062544074651e-06, "loss": 0.0252, "step": 95450 }, { "epoch": 0.7723925883971196, "grad_norm": 0.32750269770622253, "learning_rate": 7.705660651388827e-06, "loss": 0.0254, "step": 95460 }, { "epoch": 0.7724735010923214, "grad_norm": 0.5376010537147522, "learning_rate": 7.705066841353863e-06, "loss": 0.0345, "step": 95470 }, { "epoch": 0.7725544137875232, "grad_norm": 0.38268423080444336, "learning_rate": 7.704472977372052e-06, "loss": 0.0261, "step": 95480 }, { "epoch": 0.7726353264827251, "grad_norm": 0.2701847553253174, "learning_rate": 7.703879059455237e-06, "loss": 0.03, "step": 95490 }, { "epoch": 0.772716239177927, "grad_norm": 0.2979629933834076, "learning_rate": 7.703285087615262e-06, "loss": 0.0258, "step": 95500 }, { "epoch": 0.7727971518731289, "grad_norm": 0.4196030795574188, "learning_rate": 7.702691061863973e-06, "loss": 0.0324, "step": 95510 }, { "epoch": 0.7728780645683307, "grad_norm": 0.49940845370292664, "learning_rate": 7.702096982213218e-06, "loss": 0.029, "step": 95520 }, { "epoch": 0.7729589772635327, "grad_norm": 0.5977974534034729, "learning_rate": 7.70150284867484e-06, "loss": 0.0318, "step": 95530 }, { "epoch": 0.7730398899587345, "grad_norm": 0.47655948996543884, "learning_rate": 7.700908661260693e-06, "loss": 0.03, "step": 95540 }, { "epoch": 0.7731208026539365, "grad_norm": 0.2275741547346115, "learning_rate": 7.700314419982625e-06, "loss": 0.0366, "step": 95550 }, { "epoch": 0.7732017153491383, "grad_norm": 0.16783054172992706, "learning_rate": 7.699720124852485e-06, "loss": 0.0283, "step": 95560 }, { "epoch": 0.7732826280443401, "grad_norm": 0.34324583411216736, "learning_rate": 7.699125775882128e-06, "loss": 0.0313, "step": 95570 }, { "epoch": 0.773363540739542, "grad_norm": 0.4750610888004303, "learning_rate": 7.698531373083404e-06, "loss": 0.028, "step": 95580 }, { "epoch": 0.7734444534347439, "grad_norm": 0.38990461826324463, "learning_rate": 7.69793691646817e-06, "loss": 0.0276, "step": 95590 }, { "epoch": 0.7735253661299458, "grad_norm": 0.27286970615386963, "learning_rate": 7.697342406048279e-06, "loss": 0.0197, "step": 95600 }, { "epoch": 0.7736062788251477, "grad_norm": 0.8986141085624695, "learning_rate": 7.696747841835588e-06, "loss": 0.039, "step": 95610 }, { "epoch": 0.7736871915203496, "grad_norm": 0.8219382166862488, "learning_rate": 7.696153223841954e-06, "loss": 0.0303, "step": 95620 }, { "epoch": 0.7737681042155514, "grad_norm": 0.2629111111164093, "learning_rate": 7.695558552079236e-06, "loss": 0.0307, "step": 95630 }, { "epoch": 0.7738490169107533, "grad_norm": 0.3797072470188141, "learning_rate": 7.694963826559294e-06, "loss": 0.0453, "step": 95640 }, { "epoch": 0.7739299296059552, "grad_norm": 0.48224806785583496, "learning_rate": 7.694369047293989e-06, "loss": 0.0225, "step": 95650 }, { "epoch": 0.774010842301157, "grad_norm": 0.6037679314613342, "learning_rate": 7.693774214295178e-06, "loss": 0.0299, "step": 95660 }, { "epoch": 0.774091754996359, "grad_norm": 0.49041032791137695, "learning_rate": 7.69317932757473e-06, "loss": 0.0294, "step": 95670 }, { "epoch": 0.7741726676915608, "grad_norm": 0.8911870121955872, "learning_rate": 7.692584387144504e-06, "loss": 0.0316, "step": 95680 }, { "epoch": 0.7742535803867627, "grad_norm": 0.29444077610969543, "learning_rate": 7.691989393016367e-06, "loss": 0.0256, "step": 95690 }, { "epoch": 0.7743344930819646, "grad_norm": 0.015412314794957638, "learning_rate": 7.691394345202185e-06, "loss": 0.0222, "step": 95700 }, { "epoch": 0.7744154057771664, "grad_norm": 0.5348226428031921, "learning_rate": 7.690799243713825e-06, "loss": 0.0366, "step": 95710 }, { "epoch": 0.7744963184723683, "grad_norm": 0.6508319973945618, "learning_rate": 7.690204088563153e-06, "loss": 0.0296, "step": 95720 }, { "epoch": 0.7745772311675702, "grad_norm": 0.3607538640499115, "learning_rate": 7.689608879762043e-06, "loss": 0.0266, "step": 95730 }, { "epoch": 0.7746581438627721, "grad_norm": 0.2288551777601242, "learning_rate": 7.689013617322358e-06, "loss": 0.0312, "step": 95740 }, { "epoch": 0.7747390565579739, "grad_norm": 0.4108046889305115, "learning_rate": 7.688418301255975e-06, "loss": 0.0353, "step": 95750 }, { "epoch": 0.7748199692531759, "grad_norm": 0.2717609107494354, "learning_rate": 7.687822931574767e-06, "loss": 0.0179, "step": 95760 }, { "epoch": 0.7749008819483777, "grad_norm": 0.3295740485191345, "learning_rate": 7.687227508290601e-06, "loss": 0.0261, "step": 95770 }, { "epoch": 0.7749817946435795, "grad_norm": 0.8649511337280273, "learning_rate": 7.686632031415357e-06, "loss": 0.0444, "step": 95780 }, { "epoch": 0.7750627073387815, "grad_norm": 0.17867642641067505, "learning_rate": 7.68603650096091e-06, "loss": 0.0303, "step": 95790 }, { "epoch": 0.7751436200339833, "grad_norm": 0.7184661626815796, "learning_rate": 7.685440916939132e-06, "loss": 0.037, "step": 95800 }, { "epoch": 0.7752245327291852, "grad_norm": 0.33838915824890137, "learning_rate": 7.684845279361908e-06, "loss": 0.0218, "step": 95810 }, { "epoch": 0.7753054454243871, "grad_norm": 0.3755147457122803, "learning_rate": 7.68424958824111e-06, "loss": 0.0196, "step": 95820 }, { "epoch": 0.775386358119589, "grad_norm": 0.3983956575393677, "learning_rate": 7.68365384358862e-06, "loss": 0.025, "step": 95830 }, { "epoch": 0.7754672708147908, "grad_norm": 0.29770591855049133, "learning_rate": 7.683058045416322e-06, "loss": 0.0164, "step": 95840 }, { "epoch": 0.7755481835099928, "grad_norm": 0.5224114656448364, "learning_rate": 7.682462193736095e-06, "loss": 0.0232, "step": 95850 }, { "epoch": 0.7756290962051946, "grad_norm": 0.5089141726493835, "learning_rate": 7.681866288559822e-06, "loss": 0.0238, "step": 95860 }, { "epoch": 0.7757100089003964, "grad_norm": 0.5216702222824097, "learning_rate": 7.681270329899389e-06, "loss": 0.0302, "step": 95870 }, { "epoch": 0.7757909215955984, "grad_norm": 0.19275474548339844, "learning_rate": 7.680674317766678e-06, "loss": 0.0248, "step": 95880 }, { "epoch": 0.7758718342908002, "grad_norm": 0.32151535153388977, "learning_rate": 7.680078252173578e-06, "loss": 0.0256, "step": 95890 }, { "epoch": 0.7759527469860021, "grad_norm": 0.43624719977378845, "learning_rate": 7.679482133131972e-06, "loss": 0.0281, "step": 95900 }, { "epoch": 0.776033659681204, "grad_norm": 0.19308678805828094, "learning_rate": 7.678885960653755e-06, "loss": 0.0345, "step": 95910 }, { "epoch": 0.7761145723764059, "grad_norm": 0.16304640471935272, "learning_rate": 7.678289734750813e-06, "loss": 0.0315, "step": 95920 }, { "epoch": 0.7761954850716077, "grad_norm": 0.15885375440120697, "learning_rate": 7.677693455435033e-06, "loss": 0.0244, "step": 95930 }, { "epoch": 0.7762763977668096, "grad_norm": 0.2344946563243866, "learning_rate": 7.677097122718313e-06, "loss": 0.0257, "step": 95940 }, { "epoch": 0.7763573104620115, "grad_norm": 0.14418652653694153, "learning_rate": 7.676500736612541e-06, "loss": 0.0207, "step": 95950 }, { "epoch": 0.7764382231572133, "grad_norm": 0.5313233733177185, "learning_rate": 7.675904297129613e-06, "loss": 0.0236, "step": 95960 }, { "epoch": 0.7765191358524153, "grad_norm": 0.33388569951057434, "learning_rate": 7.675307804281424e-06, "loss": 0.0286, "step": 95970 }, { "epoch": 0.7766000485476171, "grad_norm": 0.2789154052734375, "learning_rate": 7.674711258079866e-06, "loss": 0.0236, "step": 95980 }, { "epoch": 0.776680961242819, "grad_norm": 0.2142738550901413, "learning_rate": 7.67411465853684e-06, "loss": 0.0287, "step": 95990 }, { "epoch": 0.7767618739380209, "grad_norm": 0.31969359517097473, "learning_rate": 7.673518005664243e-06, "loss": 0.023, "step": 96000 }, { "epoch": 0.7768427866332227, "grad_norm": 0.48783355951309204, "learning_rate": 7.672921299473972e-06, "loss": 0.0204, "step": 96010 }, { "epoch": 0.7769236993284246, "grad_norm": 0.36180931329727173, "learning_rate": 7.672324539977929e-06, "loss": 0.0341, "step": 96020 }, { "epoch": 0.7770046120236265, "grad_norm": 0.34842145442962646, "learning_rate": 7.671727727188016e-06, "loss": 0.024, "step": 96030 }, { "epoch": 0.7770855247188284, "grad_norm": 0.5296858549118042, "learning_rate": 7.671130861116131e-06, "loss": 0.0428, "step": 96040 }, { "epoch": 0.7771664374140302, "grad_norm": 0.3535684049129486, "learning_rate": 7.670533941774181e-06, "loss": 0.0346, "step": 96050 }, { "epoch": 0.7772473501092322, "grad_norm": 0.19407744705677032, "learning_rate": 7.669936969174071e-06, "loss": 0.0239, "step": 96060 }, { "epoch": 0.777328262804434, "grad_norm": 0.3924194574356079, "learning_rate": 7.669339943327703e-06, "loss": 0.0355, "step": 96070 }, { "epoch": 0.7774091754996358, "grad_norm": 0.7632352709770203, "learning_rate": 7.668742864246986e-06, "loss": 0.049, "step": 96080 }, { "epoch": 0.7774900881948378, "grad_norm": 0.682266116142273, "learning_rate": 7.668145731943826e-06, "loss": 0.0349, "step": 96090 }, { "epoch": 0.7775710008900396, "grad_norm": 1.2925715446472168, "learning_rate": 7.667548546430133e-06, "loss": 0.0301, "step": 96100 }, { "epoch": 0.7776519135852415, "grad_norm": 1.089646339416504, "learning_rate": 7.666951307717814e-06, "loss": 0.0275, "step": 96110 }, { "epoch": 0.7777328262804434, "grad_norm": 0.5716546177864075, "learning_rate": 7.666354015818782e-06, "loss": 0.0262, "step": 96120 }, { "epoch": 0.7778137389756453, "grad_norm": 0.42376792430877686, "learning_rate": 7.665756670744947e-06, "loss": 0.0396, "step": 96130 }, { "epoch": 0.7778946516708471, "grad_norm": 0.5276116728782654, "learning_rate": 7.665159272508224e-06, "loss": 0.0391, "step": 96140 }, { "epoch": 0.7779755643660491, "grad_norm": 0.38286471366882324, "learning_rate": 7.664561821120527e-06, "loss": 0.0352, "step": 96150 }, { "epoch": 0.7780564770612509, "grad_norm": 0.554766833782196, "learning_rate": 7.663964316593768e-06, "loss": 0.0246, "step": 96160 }, { "epoch": 0.7781373897564527, "grad_norm": 0.3336547017097473, "learning_rate": 7.663366758939863e-06, "loss": 0.044, "step": 96170 }, { "epoch": 0.7782183024516547, "grad_norm": 0.584938108921051, "learning_rate": 7.662769148170734e-06, "loss": 0.0444, "step": 96180 }, { "epoch": 0.7782992151468565, "grad_norm": 0.5393444299697876, "learning_rate": 7.662171484298296e-06, "loss": 0.0268, "step": 96190 }, { "epoch": 0.7783801278420585, "grad_norm": 0.3334304690361023, "learning_rate": 7.661573767334464e-06, "loss": 0.0298, "step": 96200 }, { "epoch": 0.7784610405372603, "grad_norm": 0.16346418857574463, "learning_rate": 7.660975997291164e-06, "loss": 0.0232, "step": 96210 }, { "epoch": 0.7785419532324622, "grad_norm": 0.48257768154144287, "learning_rate": 7.660378174180317e-06, "loss": 0.0302, "step": 96220 }, { "epoch": 0.778622865927664, "grad_norm": 0.3241996765136719, "learning_rate": 7.65978029801384e-06, "loss": 0.0303, "step": 96230 }, { "epoch": 0.7787037786228659, "grad_norm": 0.37903615832328796, "learning_rate": 7.659182368803663e-06, "loss": 0.0322, "step": 96240 }, { "epoch": 0.7787846913180678, "grad_norm": 0.472109317779541, "learning_rate": 7.658584386561708e-06, "loss": 0.0336, "step": 96250 }, { "epoch": 0.7788656040132697, "grad_norm": 0.4557073712348938, "learning_rate": 7.657986351299898e-06, "loss": 0.0323, "step": 96260 }, { "epoch": 0.7789465167084716, "grad_norm": 0.414230614900589, "learning_rate": 7.65738826303016e-06, "loss": 0.0222, "step": 96270 }, { "epoch": 0.7790274294036734, "grad_norm": 0.10265963524580002, "learning_rate": 7.656790121764425e-06, "loss": 0.0324, "step": 96280 }, { "epoch": 0.7791083420988754, "grad_norm": 0.5989196300506592, "learning_rate": 7.65619192751462e-06, "loss": 0.0268, "step": 96290 }, { "epoch": 0.7791892547940772, "grad_norm": 0.7089035511016846, "learning_rate": 7.655593680292674e-06, "loss": 0.0278, "step": 96300 }, { "epoch": 0.779270167489279, "grad_norm": 0.5584667921066284, "learning_rate": 7.65499538011052e-06, "loss": 0.027, "step": 96310 }, { "epoch": 0.779351080184481, "grad_norm": 0.794725239276886, "learning_rate": 7.654397026980086e-06, "loss": 0.0284, "step": 96320 }, { "epoch": 0.7794319928796828, "grad_norm": 0.3003629446029663, "learning_rate": 7.653798620913308e-06, "loss": 0.0235, "step": 96330 }, { "epoch": 0.7795129055748847, "grad_norm": 0.4331167936325073, "learning_rate": 7.653200161922118e-06, "loss": 0.027, "step": 96340 }, { "epoch": 0.7795938182700866, "grad_norm": 0.32364317774772644, "learning_rate": 7.652601650018452e-06, "loss": 0.0208, "step": 96350 }, { "epoch": 0.7796747309652885, "grad_norm": 0.32064568996429443, "learning_rate": 7.652003085214246e-06, "loss": 0.0256, "step": 96360 }, { "epoch": 0.7797556436604903, "grad_norm": 0.29132798314094543, "learning_rate": 7.651404467521436e-06, "loss": 0.0359, "step": 96370 }, { "epoch": 0.7798365563556922, "grad_norm": 0.3448621928691864, "learning_rate": 7.650805796951965e-06, "loss": 0.0225, "step": 96380 }, { "epoch": 0.7799174690508941, "grad_norm": 0.7342237234115601, "learning_rate": 7.650207073517766e-06, "loss": 0.0262, "step": 96390 }, { "epoch": 0.7799983817460959, "grad_norm": 0.6189472079277039, "learning_rate": 7.649608297230782e-06, "loss": 0.0273, "step": 96400 }, { "epoch": 0.7800792944412979, "grad_norm": 0.22944089770317078, "learning_rate": 7.649009468102955e-06, "loss": 0.0275, "step": 96410 }, { "epoch": 0.7801602071364997, "grad_norm": 0.2656267285346985, "learning_rate": 7.648410586146224e-06, "loss": 0.0336, "step": 96420 }, { "epoch": 0.7802411198317016, "grad_norm": 0.1946837455034256, "learning_rate": 7.647811651372536e-06, "loss": 0.0277, "step": 96430 }, { "epoch": 0.7803220325269035, "grad_norm": 0.32696855068206787, "learning_rate": 7.647212663793835e-06, "loss": 0.0419, "step": 96440 }, { "epoch": 0.7804029452221053, "grad_norm": 0.42066773772239685, "learning_rate": 7.646613623422066e-06, "loss": 0.0224, "step": 96450 }, { "epoch": 0.7804838579173072, "grad_norm": 0.6041978001594543, "learning_rate": 7.646014530269175e-06, "loss": 0.0203, "step": 96460 }, { "epoch": 0.7805647706125091, "grad_norm": 0.8253864049911499, "learning_rate": 7.64541538434711e-06, "loss": 0.0406, "step": 96470 }, { "epoch": 0.780645683307711, "grad_norm": 0.2652896046638489, "learning_rate": 7.64481618566782e-06, "loss": 0.0281, "step": 96480 }, { "epoch": 0.7807265960029128, "grad_norm": 0.5766546726226807, "learning_rate": 7.644216934243255e-06, "loss": 0.0377, "step": 96490 }, { "epoch": 0.7808075086981148, "grad_norm": 0.3743259608745575, "learning_rate": 7.643617630085365e-06, "loss": 0.0427, "step": 96500 }, { "epoch": 0.7808884213933166, "grad_norm": 0.2559773027896881, "learning_rate": 7.643018273206103e-06, "loss": 0.0238, "step": 96510 }, { "epoch": 0.7809693340885185, "grad_norm": 0.40791764855384827, "learning_rate": 7.642418863617423e-06, "loss": 0.0239, "step": 96520 }, { "epoch": 0.7810502467837204, "grad_norm": 0.32220327854156494, "learning_rate": 7.641819401331274e-06, "loss": 0.035, "step": 96530 }, { "epoch": 0.7811311594789222, "grad_norm": 0.4135892689228058, "learning_rate": 7.641219886359614e-06, "loss": 0.0324, "step": 96540 }, { "epoch": 0.7812120721741241, "grad_norm": 0.2834659814834595, "learning_rate": 7.6406203187144e-06, "loss": 0.0294, "step": 96550 }, { "epoch": 0.781292984869326, "grad_norm": 0.565503716468811, "learning_rate": 7.64002069840759e-06, "loss": 0.0424, "step": 96560 }, { "epoch": 0.7813738975645279, "grad_norm": 0.4805358946323395, "learning_rate": 7.639421025451139e-06, "loss": 0.0391, "step": 96570 }, { "epoch": 0.7814548102597297, "grad_norm": 0.48310092091560364, "learning_rate": 7.638821299857008e-06, "loss": 0.027, "step": 96580 }, { "epoch": 0.7815357229549317, "grad_norm": 0.4059913158416748, "learning_rate": 7.638221521637156e-06, "loss": 0.0293, "step": 96590 }, { "epoch": 0.7816166356501335, "grad_norm": 0.4926702082157135, "learning_rate": 7.637621690803548e-06, "loss": 0.0494, "step": 96600 }, { "epoch": 0.7816975483453353, "grad_norm": 0.34354472160339355, "learning_rate": 7.637021807368142e-06, "loss": 0.0309, "step": 96610 }, { "epoch": 0.7817784610405373, "grad_norm": 0.2518066167831421, "learning_rate": 7.636421871342902e-06, "loss": 0.0207, "step": 96620 }, { "epoch": 0.7818593737357391, "grad_norm": 0.5674166679382324, "learning_rate": 7.635821882739794e-06, "loss": 0.0393, "step": 96630 }, { "epoch": 0.781940286430941, "grad_norm": 0.2958603501319885, "learning_rate": 7.635221841570784e-06, "loss": 0.0352, "step": 96640 }, { "epoch": 0.7820211991261429, "grad_norm": 0.4503747522830963, "learning_rate": 7.634621747847838e-06, "loss": 0.02, "step": 96650 }, { "epoch": 0.7821021118213448, "grad_norm": 0.47874096035957336, "learning_rate": 7.634021601582922e-06, "loss": 0.0341, "step": 96660 }, { "epoch": 0.7821830245165466, "grad_norm": 0.18860933184623718, "learning_rate": 7.633421402788007e-06, "loss": 0.0225, "step": 96670 }, { "epoch": 0.7822639372117485, "grad_norm": 0.5367279052734375, "learning_rate": 7.63282115147506e-06, "loss": 0.0321, "step": 96680 }, { "epoch": 0.7823448499069504, "grad_norm": 0.7020971775054932, "learning_rate": 7.632220847656053e-06, "loss": 0.0406, "step": 96690 }, { "epoch": 0.7824257626021522, "grad_norm": 0.4592730700969696, "learning_rate": 7.63162049134296e-06, "loss": 0.0282, "step": 96700 }, { "epoch": 0.7825066752973542, "grad_norm": 0.2024424970149994, "learning_rate": 7.631020082547748e-06, "loss": 0.0301, "step": 96710 }, { "epoch": 0.782587587992556, "grad_norm": 0.31478631496429443, "learning_rate": 7.6304196212824e-06, "loss": 0.0289, "step": 96720 }, { "epoch": 0.782668500687758, "grad_norm": 0.5142228007316589, "learning_rate": 7.629819107558883e-06, "loss": 0.0255, "step": 96730 }, { "epoch": 0.7827494133829598, "grad_norm": 0.16459232568740845, "learning_rate": 7.629218541389175e-06, "loss": 0.0389, "step": 96740 }, { "epoch": 0.7828303260781616, "grad_norm": 0.09451860189437866, "learning_rate": 7.628617922785255e-06, "loss": 0.0325, "step": 96750 }, { "epoch": 0.7829112387733635, "grad_norm": 0.22898495197296143, "learning_rate": 7.628017251759099e-06, "loss": 0.0178, "step": 96760 }, { "epoch": 0.7829921514685654, "grad_norm": 0.3159814178943634, "learning_rate": 7.627416528322687e-06, "loss": 0.0334, "step": 96770 }, { "epoch": 0.7830730641637673, "grad_norm": 0.14920227229595184, "learning_rate": 7.626815752488e-06, "loss": 0.03, "step": 96780 }, { "epoch": 0.7831539768589691, "grad_norm": 0.27500787377357483, "learning_rate": 7.626214924267018e-06, "loss": 0.0322, "step": 96790 }, { "epoch": 0.7832348895541711, "grad_norm": 0.7266537547111511, "learning_rate": 7.625614043671723e-06, "loss": 0.0355, "step": 96800 }, { "epoch": 0.7833158022493729, "grad_norm": 0.3071112334728241, "learning_rate": 7.625013110714099e-06, "loss": 0.0327, "step": 96810 }, { "epoch": 0.7833967149445749, "grad_norm": 0.2642538547515869, "learning_rate": 7.6244121254061295e-06, "loss": 0.0255, "step": 96820 }, { "epoch": 0.7834776276397767, "grad_norm": 0.5445703864097595, "learning_rate": 7.623811087759801e-06, "loss": 0.0248, "step": 96830 }, { "epoch": 0.7835585403349785, "grad_norm": 0.45368489623069763, "learning_rate": 7.6232099977871e-06, "loss": 0.0312, "step": 96840 }, { "epoch": 0.7836394530301805, "grad_norm": 0.5006291270256042, "learning_rate": 7.622608855500013e-06, "loss": 0.0383, "step": 96850 }, { "epoch": 0.7837203657253823, "grad_norm": 0.5657437443733215, "learning_rate": 7.622007660910529e-06, "loss": 0.0266, "step": 96860 }, { "epoch": 0.7838012784205842, "grad_norm": 0.29188552498817444, "learning_rate": 7.621406414030639e-06, "loss": 0.0325, "step": 96870 }, { "epoch": 0.783882191115786, "grad_norm": 0.7870804667472839, "learning_rate": 7.620805114872329e-06, "loss": 0.036, "step": 96880 }, { "epoch": 0.783963103810988, "grad_norm": 0.4624690115451813, "learning_rate": 7.6202037634475965e-06, "loss": 0.0277, "step": 96890 }, { "epoch": 0.7840440165061898, "grad_norm": 0.1696598380804062, "learning_rate": 7.61960235976843e-06, "loss": 0.0269, "step": 96900 }, { "epoch": 0.7841249292013917, "grad_norm": 0.4126397669315338, "learning_rate": 7.619000903846825e-06, "loss": 0.0445, "step": 96910 }, { "epoch": 0.7842058418965936, "grad_norm": 0.35871386528015137, "learning_rate": 7.618399395694777e-06, "loss": 0.0308, "step": 96920 }, { "epoch": 0.7842867545917954, "grad_norm": 0.46959489583969116, "learning_rate": 7.617797835324279e-06, "loss": 0.0204, "step": 96930 }, { "epoch": 0.7843676672869974, "grad_norm": 0.45958492159843445, "learning_rate": 7.617196222747329e-06, "loss": 0.0248, "step": 96940 }, { "epoch": 0.7844485799821992, "grad_norm": 0.5590961575508118, "learning_rate": 7.616594557975928e-06, "loss": 0.0252, "step": 96950 }, { "epoch": 0.7845294926774011, "grad_norm": 0.48709622025489807, "learning_rate": 7.6159928410220705e-06, "loss": 0.0321, "step": 96960 }, { "epoch": 0.784610405372603, "grad_norm": 0.3260935842990875, "learning_rate": 7.6153910718977595e-06, "loss": 0.0176, "step": 96970 }, { "epoch": 0.7846913180678048, "grad_norm": 0.38909780979156494, "learning_rate": 7.6147892506149935e-06, "loss": 0.0284, "step": 96980 }, { "epoch": 0.7847722307630067, "grad_norm": 0.4683968424797058, "learning_rate": 7.614187377185779e-06, "loss": 0.0298, "step": 96990 }, { "epoch": 0.7848531434582086, "grad_norm": 0.4362739026546478, "learning_rate": 7.6135854516221145e-06, "loss": 0.0165, "step": 97000 }, { "epoch": 0.7849340561534105, "grad_norm": 0.3005095422267914, "learning_rate": 7.612983473936004e-06, "loss": 0.0196, "step": 97010 }, { "epoch": 0.7850149688486123, "grad_norm": 0.18753984570503235, "learning_rate": 7.612381444139455e-06, "loss": 0.0219, "step": 97020 }, { "epoch": 0.7850958815438143, "grad_norm": 0.4972284734249115, "learning_rate": 7.611779362244475e-06, "loss": 0.0219, "step": 97030 }, { "epoch": 0.7851767942390161, "grad_norm": 0.30579498410224915, "learning_rate": 7.611177228263067e-06, "loss": 0.0199, "step": 97040 }, { "epoch": 0.7852577069342179, "grad_norm": 0.3713184893131256, "learning_rate": 7.610575042207242e-06, "loss": 0.025, "step": 97050 }, { "epoch": 0.7853386196294199, "grad_norm": 0.4073663055896759, "learning_rate": 7.60997280408901e-06, "loss": 0.0389, "step": 97060 }, { "epoch": 0.7854195323246217, "grad_norm": 0.3638683557510376, "learning_rate": 7.60937051392038e-06, "loss": 0.0264, "step": 97070 }, { "epoch": 0.7855004450198236, "grad_norm": 0.42149946093559265, "learning_rate": 7.608768171713363e-06, "loss": 0.0266, "step": 97080 }, { "epoch": 0.7855813577150255, "grad_norm": 0.38590461015701294, "learning_rate": 7.608165777479972e-06, "loss": 0.0339, "step": 97090 }, { "epoch": 0.7856622704102274, "grad_norm": 0.6365649700164795, "learning_rate": 7.607563331232224e-06, "loss": 0.0384, "step": 97100 }, { "epoch": 0.7857431831054292, "grad_norm": 0.43211063742637634, "learning_rate": 7.606960832982127e-06, "loss": 0.0278, "step": 97110 }, { "epoch": 0.7858240958006312, "grad_norm": 0.3722386360168457, "learning_rate": 7.606358282741699e-06, "loss": 0.0346, "step": 97120 }, { "epoch": 0.785905008495833, "grad_norm": 0.3941211998462677, "learning_rate": 7.605755680522959e-06, "loss": 0.0243, "step": 97130 }, { "epoch": 0.7859859211910348, "grad_norm": 0.34421399235725403, "learning_rate": 7.605153026337924e-06, "loss": 0.0435, "step": 97140 }, { "epoch": 0.7860668338862368, "grad_norm": 0.28597763180732727, "learning_rate": 7.604550320198609e-06, "loss": 0.0236, "step": 97150 }, { "epoch": 0.7861477465814386, "grad_norm": 0.4097239673137665, "learning_rate": 7.603947562117038e-06, "loss": 0.0314, "step": 97160 }, { "epoch": 0.7862286592766405, "grad_norm": 0.21022415161132812, "learning_rate": 7.60334475210523e-06, "loss": 0.0188, "step": 97170 }, { "epoch": 0.7863095719718424, "grad_norm": 0.5833075642585754, "learning_rate": 7.6027418901752056e-06, "loss": 0.0309, "step": 97180 }, { "epoch": 0.7863904846670443, "grad_norm": 0.2475709617137909, "learning_rate": 7.60213897633899e-06, "loss": 0.0238, "step": 97190 }, { "epoch": 0.7864713973622461, "grad_norm": 0.24729865789413452, "learning_rate": 7.601536010608604e-06, "loss": 0.0298, "step": 97200 }, { "epoch": 0.786552310057448, "grad_norm": 0.19882072508335114, "learning_rate": 7.600932992996077e-06, "loss": 0.0255, "step": 97210 }, { "epoch": 0.7866332227526499, "grad_norm": 0.2714971601963043, "learning_rate": 7.600329923513432e-06, "loss": 0.0311, "step": 97220 }, { "epoch": 0.7867141354478517, "grad_norm": 0.20091192424297333, "learning_rate": 7.599726802172693e-06, "loss": 0.0356, "step": 97230 }, { "epoch": 0.7867950481430537, "grad_norm": 0.3416233956813812, "learning_rate": 7.599123628985894e-06, "loss": 0.0284, "step": 97240 }, { "epoch": 0.7868759608382555, "grad_norm": 0.4544890820980072, "learning_rate": 7.598520403965062e-06, "loss": 0.0211, "step": 97250 }, { "epoch": 0.7869568735334574, "grad_norm": 0.3340282440185547, "learning_rate": 7.597917127122225e-06, "loss": 0.0229, "step": 97260 }, { "epoch": 0.7870377862286593, "grad_norm": 0.44778305292129517, "learning_rate": 7.597313798469414e-06, "loss": 0.0296, "step": 97270 }, { "epoch": 0.7871186989238611, "grad_norm": 0.3934847116470337, "learning_rate": 7.5967104180186635e-06, "loss": 0.0226, "step": 97280 }, { "epoch": 0.787199611619063, "grad_norm": 0.42127725481987, "learning_rate": 7.5961069857820065e-06, "loss": 0.0343, "step": 97290 }, { "epoch": 0.7872805243142649, "grad_norm": 0.2638853192329407, "learning_rate": 7.595503501771476e-06, "loss": 0.0163, "step": 97300 }, { "epoch": 0.7873614370094668, "grad_norm": 0.7929278612136841, "learning_rate": 7.594899965999107e-06, "loss": 0.0341, "step": 97310 }, { "epoch": 0.7874423497046686, "grad_norm": 0.4397006928920746, "learning_rate": 7.594296378476936e-06, "loss": 0.0341, "step": 97320 }, { "epoch": 0.7875232623998706, "grad_norm": 0.3542598783969879, "learning_rate": 7.593692739217002e-06, "loss": 0.0263, "step": 97330 }, { "epoch": 0.7876041750950724, "grad_norm": 0.2984279990196228, "learning_rate": 7.59308904823134e-06, "loss": 0.0277, "step": 97340 }, { "epoch": 0.7876850877902742, "grad_norm": 0.3627564609050751, "learning_rate": 7.592485305531993e-06, "loss": 0.0319, "step": 97350 }, { "epoch": 0.7877660004854762, "grad_norm": 0.3274610638618469, "learning_rate": 7.591881511130998e-06, "loss": 0.0263, "step": 97360 }, { "epoch": 0.787846913180678, "grad_norm": 0.22411760687828064, "learning_rate": 7.591277665040399e-06, "loss": 0.0297, "step": 97370 }, { "epoch": 0.78792782587588, "grad_norm": 0.5399887561798096, "learning_rate": 7.590673767272237e-06, "loss": 0.034, "step": 97380 }, { "epoch": 0.7880087385710818, "grad_norm": 0.09249281138181686, "learning_rate": 7.590069817838555e-06, "loss": 0.016, "step": 97390 }, { "epoch": 0.7880896512662837, "grad_norm": 0.5811478495597839, "learning_rate": 7.5894658167514e-06, "loss": 0.034, "step": 97400 }, { "epoch": 0.7881705639614855, "grad_norm": 0.33924564719200134, "learning_rate": 7.588861764022815e-06, "loss": 0.0431, "step": 97410 }, { "epoch": 0.7882514766566875, "grad_norm": 0.5051611661911011, "learning_rate": 7.588257659664846e-06, "loss": 0.026, "step": 97420 }, { "epoch": 0.7883323893518893, "grad_norm": 0.35960128903388977, "learning_rate": 7.587653503689545e-06, "loss": 0.0171, "step": 97430 }, { "epoch": 0.7884133020470911, "grad_norm": 0.49205371737480164, "learning_rate": 7.587049296108956e-06, "loss": 0.0312, "step": 97440 }, { "epoch": 0.7884942147422931, "grad_norm": 0.2814949154853821, "learning_rate": 7.5864450369351305e-06, "loss": 0.024, "step": 97450 }, { "epoch": 0.7885751274374949, "grad_norm": 0.8120269775390625, "learning_rate": 7.585840726180118e-06, "loss": 0.0345, "step": 97460 }, { "epoch": 0.7886560401326969, "grad_norm": 0.8335976600646973, "learning_rate": 7.585236363855972e-06, "loss": 0.0324, "step": 97470 }, { "epoch": 0.7887369528278987, "grad_norm": 0.1749255210161209, "learning_rate": 7.584631949974746e-06, "loss": 0.0325, "step": 97480 }, { "epoch": 0.7888178655231006, "grad_norm": 0.3110736608505249, "learning_rate": 7.584027484548491e-06, "loss": 0.0171, "step": 97490 }, { "epoch": 0.7888987782183025, "grad_norm": 0.4237041175365448, "learning_rate": 7.5834229675892614e-06, "loss": 0.0147, "step": 97500 }, { "epoch": 0.7889796909135043, "grad_norm": 0.3549971878528595, "learning_rate": 7.5828183991091165e-06, "loss": 0.0431, "step": 97510 }, { "epoch": 0.7890606036087062, "grad_norm": 0.6466003060340881, "learning_rate": 7.582213779120111e-06, "loss": 0.0255, "step": 97520 }, { "epoch": 0.789141516303908, "grad_norm": 0.6479360461235046, "learning_rate": 7.581609107634301e-06, "loss": 0.0406, "step": 97530 }, { "epoch": 0.78922242899911, "grad_norm": 0.34109169244766235, "learning_rate": 7.581004384663749e-06, "loss": 0.0282, "step": 97540 }, { "epoch": 0.7893033416943118, "grad_norm": 0.40274158120155334, "learning_rate": 7.5803996102205135e-06, "loss": 0.0201, "step": 97550 }, { "epoch": 0.7893842543895138, "grad_norm": 0.30267924070358276, "learning_rate": 7.579794784316654e-06, "loss": 0.0354, "step": 97560 }, { "epoch": 0.7894651670847156, "grad_norm": 0.5845749974250793, "learning_rate": 7.579189906964235e-06, "loss": 0.0287, "step": 97570 }, { "epoch": 0.7895460797799174, "grad_norm": 0.40665891766548157, "learning_rate": 7.578584978175318e-06, "loss": 0.0307, "step": 97580 }, { "epoch": 0.7896269924751194, "grad_norm": 0.39564841985702515, "learning_rate": 7.577979997961967e-06, "loss": 0.03, "step": 97590 }, { "epoch": 0.7897079051703212, "grad_norm": 0.9296160936355591, "learning_rate": 7.577374966336248e-06, "loss": 0.0384, "step": 97600 }, { "epoch": 0.7897888178655231, "grad_norm": 0.2853566110134125, "learning_rate": 7.576769883310224e-06, "loss": 0.0212, "step": 97610 }, { "epoch": 0.789869730560725, "grad_norm": 0.3521776795387268, "learning_rate": 7.576164748895968e-06, "loss": 0.0227, "step": 97620 }, { "epoch": 0.7899506432559269, "grad_norm": 0.3503364622592926, "learning_rate": 7.575559563105541e-06, "loss": 0.0432, "step": 97630 }, { "epoch": 0.7900315559511287, "grad_norm": 0.6421043872833252, "learning_rate": 7.574954325951019e-06, "loss": 0.0296, "step": 97640 }, { "epoch": 0.7901124686463306, "grad_norm": 0.3023739755153656, "learning_rate": 7.574349037444467e-06, "loss": 0.0221, "step": 97650 }, { "epoch": 0.7901933813415325, "grad_norm": 0.7240008115768433, "learning_rate": 7.573743697597959e-06, "loss": 0.0362, "step": 97660 }, { "epoch": 0.7902742940367343, "grad_norm": 0.647533655166626, "learning_rate": 7.573138306423567e-06, "loss": 0.03, "step": 97670 }, { "epoch": 0.7903552067319363, "grad_norm": 0.34176144003868103, "learning_rate": 7.572532863933362e-06, "loss": 0.0253, "step": 97680 }, { "epoch": 0.7904361194271381, "grad_norm": 0.7413017749786377, "learning_rate": 7.57192737013942e-06, "loss": 0.0172, "step": 97690 }, { "epoch": 0.79051703212234, "grad_norm": 0.35853856801986694, "learning_rate": 7.571321825053817e-06, "loss": 0.0394, "step": 97700 }, { "epoch": 0.7905979448175419, "grad_norm": 0.8389538526535034, "learning_rate": 7.570716228688627e-06, "loss": 0.0548, "step": 97710 }, { "epoch": 0.7906788575127438, "grad_norm": 0.6401534080505371, "learning_rate": 7.57011058105593e-06, "loss": 0.0311, "step": 97720 }, { "epoch": 0.7907597702079456, "grad_norm": 0.16538645327091217, "learning_rate": 7.569504882167804e-06, "loss": 0.0257, "step": 97730 }, { "epoch": 0.7908406829031475, "grad_norm": 0.3625234365463257, "learning_rate": 7.568899132036327e-06, "loss": 0.0216, "step": 97740 }, { "epoch": 0.7909215955983494, "grad_norm": 0.2971099317073822, "learning_rate": 7.568293330673579e-06, "loss": 0.0232, "step": 97750 }, { "epoch": 0.7910025082935512, "grad_norm": 0.1926705241203308, "learning_rate": 7.567687478091643e-06, "loss": 0.0316, "step": 97760 }, { "epoch": 0.7910834209887532, "grad_norm": 0.24918320775032043, "learning_rate": 7.567081574302602e-06, "loss": 0.0329, "step": 97770 }, { "epoch": 0.791164333683955, "grad_norm": 0.5153757333755493, "learning_rate": 7.566475619318537e-06, "loss": 0.0391, "step": 97780 }, { "epoch": 0.7912452463791569, "grad_norm": 0.3890524208545685, "learning_rate": 7.5658696131515355e-06, "loss": 0.0243, "step": 97790 }, { "epoch": 0.7913261590743588, "grad_norm": 0.3201110363006592, "learning_rate": 7.565263555813679e-06, "loss": 0.0263, "step": 97800 }, { "epoch": 0.7914070717695606, "grad_norm": 0.5349504947662354, "learning_rate": 7.564657447317059e-06, "loss": 0.0146, "step": 97810 }, { "epoch": 0.7914879844647625, "grad_norm": 0.6570361256599426, "learning_rate": 7.564051287673757e-06, "loss": 0.0374, "step": 97820 }, { "epoch": 0.7915688971599644, "grad_norm": 0.6961104869842529, "learning_rate": 7.563445076895868e-06, "loss": 0.0422, "step": 97830 }, { "epoch": 0.7916498098551663, "grad_norm": 0.7210562229156494, "learning_rate": 7.5628388149954765e-06, "loss": 0.0381, "step": 97840 }, { "epoch": 0.7917307225503681, "grad_norm": 0.14033924043178558, "learning_rate": 7.562232501984677e-06, "loss": 0.0192, "step": 97850 }, { "epoch": 0.7918116352455701, "grad_norm": 0.15915918350219727, "learning_rate": 7.561626137875559e-06, "loss": 0.0297, "step": 97860 }, { "epoch": 0.7918925479407719, "grad_norm": 0.8089086413383484, "learning_rate": 7.561019722680216e-06, "loss": 0.0458, "step": 97870 }, { "epoch": 0.7919734606359737, "grad_norm": 0.4914657771587372, "learning_rate": 7.56041325641074e-06, "loss": 0.029, "step": 97880 }, { "epoch": 0.7920543733311757, "grad_norm": 0.5612240433692932, "learning_rate": 7.559806739079227e-06, "loss": 0.024, "step": 97890 }, { "epoch": 0.7921352860263775, "grad_norm": 0.4634070098400116, "learning_rate": 7.559200170697772e-06, "loss": 0.0409, "step": 97900 }, { "epoch": 0.7922161987215794, "grad_norm": 0.9966459274291992, "learning_rate": 7.558593551278474e-06, "loss": 0.0646, "step": 97910 }, { "epoch": 0.7922971114167813, "grad_norm": 0.46044549345970154, "learning_rate": 7.557986880833429e-06, "loss": 0.0286, "step": 97920 }, { "epoch": 0.7923780241119832, "grad_norm": 0.5910462141036987, "learning_rate": 7.5573801593747345e-06, "loss": 0.0322, "step": 97930 }, { "epoch": 0.792458936807185, "grad_norm": 0.5365301370620728, "learning_rate": 7.556773386914491e-06, "loss": 0.0313, "step": 97940 }, { "epoch": 0.7925398495023869, "grad_norm": 0.47357413172721863, "learning_rate": 7.556166563464801e-06, "loss": 0.043, "step": 97950 }, { "epoch": 0.7926207621975888, "grad_norm": 0.5573176741600037, "learning_rate": 7.555559689037766e-06, "loss": 0.0278, "step": 97960 }, { "epoch": 0.7927016748927906, "grad_norm": 0.6126319169998169, "learning_rate": 7.554952763645488e-06, "loss": 0.0325, "step": 97970 }, { "epoch": 0.7927825875879926, "grad_norm": 0.30591684579849243, "learning_rate": 7.554345787300068e-06, "loss": 0.0364, "step": 97980 }, { "epoch": 0.7928635002831944, "grad_norm": 0.16234859824180603, "learning_rate": 7.553738760013618e-06, "loss": 0.0185, "step": 97990 }, { "epoch": 0.7929444129783964, "grad_norm": 0.3655562102794647, "learning_rate": 7.553131681798237e-06, "loss": 0.0403, "step": 98000 }, { "epoch": 0.7930253256735982, "grad_norm": 0.46433359384536743, "learning_rate": 7.552524552666034e-06, "loss": 0.0466, "step": 98010 }, { "epoch": 0.7931062383688, "grad_norm": 0.7855039238929749, "learning_rate": 7.551917372629118e-06, "loss": 0.027, "step": 98020 }, { "epoch": 0.793187151064002, "grad_norm": 0.32330650091171265, "learning_rate": 7.5513101416995995e-06, "loss": 0.0269, "step": 98030 }, { "epoch": 0.7932680637592038, "grad_norm": 0.40374988317489624, "learning_rate": 7.5507028598895835e-06, "loss": 0.0309, "step": 98040 }, { "epoch": 0.7933489764544057, "grad_norm": 0.33051732182502747, "learning_rate": 7.550095527211185e-06, "loss": 0.038, "step": 98050 }, { "epoch": 0.7934298891496075, "grad_norm": 0.4791913628578186, "learning_rate": 7.549488143676514e-06, "loss": 0.0383, "step": 98060 }, { "epoch": 0.7935108018448095, "grad_norm": 0.2462696135044098, "learning_rate": 7.5488807092976856e-06, "loss": 0.022, "step": 98070 }, { "epoch": 0.7935917145400113, "grad_norm": 0.6542195081710815, "learning_rate": 7.54827322408681e-06, "loss": 0.0276, "step": 98080 }, { "epoch": 0.7936726272352133, "grad_norm": 0.6315035223960876, "learning_rate": 7.5476656880560065e-06, "loss": 0.0411, "step": 98090 }, { "epoch": 0.7937535399304151, "grad_norm": 0.4864581227302551, "learning_rate": 7.547058101217389e-06, "loss": 0.0306, "step": 98100 }, { "epoch": 0.7938344526256169, "grad_norm": 0.3710721433162689, "learning_rate": 7.546450463583075e-06, "loss": 0.0368, "step": 98110 }, { "epoch": 0.7939153653208189, "grad_norm": 0.9186298847198486, "learning_rate": 7.54584277516518e-06, "loss": 0.0269, "step": 98120 }, { "epoch": 0.7939962780160207, "grad_norm": 0.21726353466510773, "learning_rate": 7.545235035975829e-06, "loss": 0.0279, "step": 98130 }, { "epoch": 0.7940771907112226, "grad_norm": 0.527026891708374, "learning_rate": 7.544627246027136e-06, "loss": 0.0241, "step": 98140 }, { "epoch": 0.7941581034064245, "grad_norm": 0.4851240813732147, "learning_rate": 7.544019405331225e-06, "loss": 0.0279, "step": 98150 }, { "epoch": 0.7942390161016264, "grad_norm": 0.49719953536987305, "learning_rate": 7.543411513900218e-06, "loss": 0.0335, "step": 98160 }, { "epoch": 0.7943199287968282, "grad_norm": 0.4687789976596832, "learning_rate": 7.542803571746238e-06, "loss": 0.0242, "step": 98170 }, { "epoch": 0.79440084149203, "grad_norm": 0.4837445616722107, "learning_rate": 7.542195578881408e-06, "loss": 0.0414, "step": 98180 }, { "epoch": 0.794481754187232, "grad_norm": 0.44776567816734314, "learning_rate": 7.5415875353178535e-06, "loss": 0.0214, "step": 98190 }, { "epoch": 0.7945626668824338, "grad_norm": 0.5147233605384827, "learning_rate": 7.540979441067701e-06, "loss": 0.0183, "step": 98200 }, { "epoch": 0.7946435795776358, "grad_norm": 0.37140941619873047, "learning_rate": 7.540371296143081e-06, "loss": 0.0243, "step": 98210 }, { "epoch": 0.7947244922728376, "grad_norm": 0.2710319459438324, "learning_rate": 7.539763100556116e-06, "loss": 0.0262, "step": 98220 }, { "epoch": 0.7948054049680395, "grad_norm": 1.0278228521347046, "learning_rate": 7.539154854318936e-06, "loss": 0.0333, "step": 98230 }, { "epoch": 0.7948863176632414, "grad_norm": 0.29684701561927795, "learning_rate": 7.538546557443676e-06, "loss": 0.0286, "step": 98240 }, { "epoch": 0.7949672303584432, "grad_norm": 0.7307835817337036, "learning_rate": 7.537938209942462e-06, "loss": 0.0264, "step": 98250 }, { "epoch": 0.7950481430536451, "grad_norm": 0.3014301359653473, "learning_rate": 7.53732981182743e-06, "loss": 0.0203, "step": 98260 }, { "epoch": 0.795129055748847, "grad_norm": 0.34599801898002625, "learning_rate": 7.53672136311071e-06, "loss": 0.0245, "step": 98270 }, { "epoch": 0.7952099684440489, "grad_norm": 0.10447043925523758, "learning_rate": 7.536112863804437e-06, "loss": 0.0288, "step": 98280 }, { "epoch": 0.7952908811392507, "grad_norm": 0.3178931772708893, "learning_rate": 7.5355043139207485e-06, "loss": 0.0234, "step": 98290 }, { "epoch": 0.7953717938344527, "grad_norm": 0.274135559797287, "learning_rate": 7.534895713471779e-06, "loss": 0.0398, "step": 98300 }, { "epoch": 0.7954527065296545, "grad_norm": 0.5078250169754028, "learning_rate": 7.534287062469664e-06, "loss": 0.0412, "step": 98310 }, { "epoch": 0.7955336192248563, "grad_norm": 0.2445373386144638, "learning_rate": 7.5336783609265454e-06, "loss": 0.0266, "step": 98320 }, { "epoch": 0.7956145319200583, "grad_norm": 0.32447391748428345, "learning_rate": 7.533069608854562e-06, "loss": 0.0251, "step": 98330 }, { "epoch": 0.7956954446152601, "grad_norm": 0.6804289221763611, "learning_rate": 7.5324608062658496e-06, "loss": 0.0351, "step": 98340 }, { "epoch": 0.795776357310462, "grad_norm": 0.8036599159240723, "learning_rate": 7.531851953172555e-06, "loss": 0.0453, "step": 98350 }, { "epoch": 0.7958572700056639, "grad_norm": 0.4527592360973358, "learning_rate": 7.531243049586817e-06, "loss": 0.036, "step": 98360 }, { "epoch": 0.7959381827008658, "grad_norm": 0.21807347238063812, "learning_rate": 7.530634095520781e-06, "loss": 0.0475, "step": 98370 }, { "epoch": 0.7960190953960676, "grad_norm": 0.27130571007728577, "learning_rate": 7.53002509098659e-06, "loss": 0.0225, "step": 98380 }, { "epoch": 0.7961000080912696, "grad_norm": 0.12581110000610352, "learning_rate": 7.529416035996389e-06, "loss": 0.0222, "step": 98390 }, { "epoch": 0.7961809207864714, "grad_norm": 0.1804698258638382, "learning_rate": 7.528806930562327e-06, "loss": 0.0275, "step": 98400 }, { "epoch": 0.7962618334816732, "grad_norm": 0.683952271938324, "learning_rate": 7.528197774696547e-06, "loss": 0.0211, "step": 98410 }, { "epoch": 0.7963427461768752, "grad_norm": 0.3067120611667633, "learning_rate": 7.5275885684111995e-06, "loss": 0.0428, "step": 98420 }, { "epoch": 0.796423658872077, "grad_norm": 0.3792475461959839, "learning_rate": 7.526979311718436e-06, "loss": 0.0334, "step": 98430 }, { "epoch": 0.7965045715672789, "grad_norm": 0.42325660586357117, "learning_rate": 7.526370004630403e-06, "loss": 0.0228, "step": 98440 }, { "epoch": 0.7965854842624808, "grad_norm": 0.2923519015312195, "learning_rate": 7.525760647159255e-06, "loss": 0.0332, "step": 98450 }, { "epoch": 0.7966663969576827, "grad_norm": 0.7252114415168762, "learning_rate": 7.525151239317143e-06, "loss": 0.0371, "step": 98460 }, { "epoch": 0.7967473096528845, "grad_norm": 0.7398513555526733, "learning_rate": 7.524541781116219e-06, "loss": 0.0267, "step": 98470 }, { "epoch": 0.7968282223480864, "grad_norm": 0.48695820569992065, "learning_rate": 7.523932272568641e-06, "loss": 0.0375, "step": 98480 }, { "epoch": 0.7969091350432883, "grad_norm": 0.34889382123947144, "learning_rate": 7.52332271368656e-06, "loss": 0.0234, "step": 98490 }, { "epoch": 0.7969900477384901, "grad_norm": 0.2783871591091156, "learning_rate": 7.522713104482135e-06, "loss": 0.023, "step": 98500 }, { "epoch": 0.7970709604336921, "grad_norm": 0.37904947996139526, "learning_rate": 7.522103444967523e-06, "loss": 0.02, "step": 98510 }, { "epoch": 0.7971518731288939, "grad_norm": 0.4237194061279297, "learning_rate": 7.521493735154881e-06, "loss": 0.0263, "step": 98520 }, { "epoch": 0.7972327858240958, "grad_norm": 0.581639289855957, "learning_rate": 7.520883975056371e-06, "loss": 0.0308, "step": 98530 }, { "epoch": 0.7973136985192977, "grad_norm": 0.3589133620262146, "learning_rate": 7.520274164684152e-06, "loss": 0.0179, "step": 98540 }, { "epoch": 0.7973946112144995, "grad_norm": 0.303506463766098, "learning_rate": 7.519664304050384e-06, "loss": 0.0297, "step": 98550 }, { "epoch": 0.7974755239097014, "grad_norm": 0.49245911836624146, "learning_rate": 7.519054393167233e-06, "loss": 0.0327, "step": 98560 }, { "epoch": 0.7975564366049033, "grad_norm": 0.6411172747612, "learning_rate": 7.518444432046859e-06, "loss": 0.0264, "step": 98570 }, { "epoch": 0.7976373493001052, "grad_norm": 0.719662606716156, "learning_rate": 7.5178344207014265e-06, "loss": 0.0305, "step": 98580 }, { "epoch": 0.797718261995307, "grad_norm": 1.218762755393982, "learning_rate": 7.5172243591431026e-06, "loss": 0.0208, "step": 98590 }, { "epoch": 0.797799174690509, "grad_norm": 0.5817612409591675, "learning_rate": 7.516614247384053e-06, "loss": 0.0401, "step": 98600 }, { "epoch": 0.7978800873857108, "grad_norm": 0.5591105818748474, "learning_rate": 7.516004085436445e-06, "loss": 0.0334, "step": 98610 }, { "epoch": 0.7979610000809126, "grad_norm": 0.21203258633613586, "learning_rate": 7.515393873312448e-06, "loss": 0.0304, "step": 98620 }, { "epoch": 0.7980419127761146, "grad_norm": 0.486214816570282, "learning_rate": 7.514783611024228e-06, "loss": 0.0233, "step": 98630 }, { "epoch": 0.7981228254713164, "grad_norm": 0.22882919013500214, "learning_rate": 7.51417329858396e-06, "loss": 0.0301, "step": 98640 }, { "epoch": 0.7982037381665184, "grad_norm": 0.5917877554893494, "learning_rate": 7.513562936003812e-06, "loss": 0.0251, "step": 98650 }, { "epoch": 0.7982846508617202, "grad_norm": 0.23157216608524323, "learning_rate": 7.512952523295958e-06, "loss": 0.0221, "step": 98660 }, { "epoch": 0.7983655635569221, "grad_norm": 0.5119881629943848, "learning_rate": 7.5123420604725704e-06, "loss": 0.0393, "step": 98670 }, { "epoch": 0.798446476252124, "grad_norm": 0.24874985218048096, "learning_rate": 7.511731547545826e-06, "loss": 0.0198, "step": 98680 }, { "epoch": 0.7985273889473259, "grad_norm": 0.3732568919658661, "learning_rate": 7.511120984527898e-06, "loss": 0.0213, "step": 98690 }, { "epoch": 0.7986083016425277, "grad_norm": 0.2498372495174408, "learning_rate": 7.510510371430961e-06, "loss": 0.0302, "step": 98700 }, { "epoch": 0.7986892143377295, "grad_norm": 0.6321455240249634, "learning_rate": 7.509899708267197e-06, "loss": 0.0282, "step": 98710 }, { "epoch": 0.7987701270329315, "grad_norm": 0.2741088569164276, "learning_rate": 7.50928899504878e-06, "loss": 0.0354, "step": 98720 }, { "epoch": 0.7988510397281333, "grad_norm": 0.34914374351501465, "learning_rate": 7.508678231787894e-06, "loss": 0.0361, "step": 98730 }, { "epoch": 0.7989319524233353, "grad_norm": 0.7078036069869995, "learning_rate": 7.508067418496714e-06, "loss": 0.0301, "step": 98740 }, { "epoch": 0.7990128651185371, "grad_norm": 0.8482773303985596, "learning_rate": 7.5074565551874244e-06, "loss": 0.037, "step": 98750 }, { "epoch": 0.799093777813739, "grad_norm": 0.7281376719474792, "learning_rate": 7.506845641872209e-06, "loss": 0.0342, "step": 98760 }, { "epoch": 0.7991746905089409, "grad_norm": 0.3666304051876068, "learning_rate": 7.506234678563248e-06, "loss": 0.0302, "step": 98770 }, { "epoch": 0.7992556032041427, "grad_norm": 0.25128173828125, "learning_rate": 7.505623665272728e-06, "loss": 0.0285, "step": 98780 }, { "epoch": 0.7993365158993446, "grad_norm": 0.23049162328243256, "learning_rate": 7.505012602012833e-06, "loss": 0.0353, "step": 98790 }, { "epoch": 0.7994174285945465, "grad_norm": 0.20715439319610596, "learning_rate": 7.504401488795749e-06, "loss": 0.0249, "step": 98800 }, { "epoch": 0.7994983412897484, "grad_norm": 0.26397255063056946, "learning_rate": 7.503790325633667e-06, "loss": 0.0325, "step": 98810 }, { "epoch": 0.7995792539849502, "grad_norm": 0.3049089014530182, "learning_rate": 7.50317911253877e-06, "loss": 0.0264, "step": 98820 }, { "epoch": 0.7996601666801522, "grad_norm": 0.3068076968193054, "learning_rate": 7.502567849523253e-06, "loss": 0.0243, "step": 98830 }, { "epoch": 0.799741079375354, "grad_norm": 0.5146015286445618, "learning_rate": 7.501956536599301e-06, "loss": 0.0327, "step": 98840 }, { "epoch": 0.7998219920705558, "grad_norm": 0.1512479931116104, "learning_rate": 7.5013451737791085e-06, "loss": 0.0111, "step": 98850 }, { "epoch": 0.7999029047657578, "grad_norm": 0.4902937412261963, "learning_rate": 7.5007337610748675e-06, "loss": 0.0376, "step": 98860 }, { "epoch": 0.7999838174609596, "grad_norm": 0.3929767310619354, "learning_rate": 7.50012229849877e-06, "loss": 0.0276, "step": 98870 }, { "epoch": 0.8000647301561615, "grad_norm": 0.24074746668338776, "learning_rate": 7.4995107860630115e-06, "loss": 0.0446, "step": 98880 }, { "epoch": 0.8001456428513634, "grad_norm": 0.37822505831718445, "learning_rate": 7.4988992237797876e-06, "loss": 0.0215, "step": 98890 }, { "epoch": 0.8002265555465653, "grad_norm": 1.986096739768982, "learning_rate": 7.498287611661292e-06, "loss": 0.033, "step": 98900 }, { "epoch": 0.8003074682417671, "grad_norm": 0.09893307834863663, "learning_rate": 7.497675949719727e-06, "loss": 0.0282, "step": 98910 }, { "epoch": 0.800388380936969, "grad_norm": 0.46046167612075806, "learning_rate": 7.4970642379672874e-06, "loss": 0.032, "step": 98920 }, { "epoch": 0.8004692936321709, "grad_norm": 0.6170758605003357, "learning_rate": 7.496452476416172e-06, "loss": 0.0221, "step": 98930 }, { "epoch": 0.8005502063273727, "grad_norm": 0.276753306388855, "learning_rate": 7.495840665078583e-06, "loss": 0.0331, "step": 98940 }, { "epoch": 0.8006311190225747, "grad_norm": 0.43533894419670105, "learning_rate": 7.495228803966721e-06, "loss": 0.0475, "step": 98950 }, { "epoch": 0.8007120317177765, "grad_norm": 0.5430919528007507, "learning_rate": 7.494616893092788e-06, "loss": 0.0352, "step": 98960 }, { "epoch": 0.8007929444129784, "grad_norm": 0.32452261447906494, "learning_rate": 7.494004932468987e-06, "loss": 0.0279, "step": 98970 }, { "epoch": 0.8008738571081803, "grad_norm": 0.41023796796798706, "learning_rate": 7.493392922107522e-06, "loss": 0.0292, "step": 98980 }, { "epoch": 0.8009547698033822, "grad_norm": 0.22838498651981354, "learning_rate": 7.4927808620206014e-06, "loss": 0.0232, "step": 98990 }, { "epoch": 0.801035682498584, "grad_norm": 0.3492870330810547, "learning_rate": 7.492168752220428e-06, "loss": 0.0276, "step": 99000 }, { "epoch": 0.8011165951937859, "grad_norm": 0.3799298107624054, "learning_rate": 7.491556592719208e-06, "loss": 0.0239, "step": 99010 }, { "epoch": 0.8011975078889878, "grad_norm": 0.26895084977149963, "learning_rate": 7.490944383529154e-06, "loss": 0.0256, "step": 99020 }, { "epoch": 0.8012784205841896, "grad_norm": 0.3707965314388275, "learning_rate": 7.490332124662473e-06, "loss": 0.0224, "step": 99030 }, { "epoch": 0.8013593332793916, "grad_norm": 0.8729506731033325, "learning_rate": 7.4897198161313735e-06, "loss": 0.0439, "step": 99040 }, { "epoch": 0.8014402459745934, "grad_norm": 0.41953417658805847, "learning_rate": 7.489107457948068e-06, "loss": 0.0293, "step": 99050 }, { "epoch": 0.8015211586697953, "grad_norm": 0.3875294327735901, "learning_rate": 7.488495050124771e-06, "loss": 0.0383, "step": 99060 }, { "epoch": 0.8016020713649972, "grad_norm": 0.45828595757484436, "learning_rate": 7.487882592673693e-06, "loss": 0.0502, "step": 99070 }, { "epoch": 0.801682984060199, "grad_norm": 0.3509196937084198, "learning_rate": 7.487270085607048e-06, "loss": 0.0277, "step": 99080 }, { "epoch": 0.8017638967554009, "grad_norm": 0.5433751940727234, "learning_rate": 7.486657528937051e-06, "loss": 0.0349, "step": 99090 }, { "epoch": 0.8018448094506028, "grad_norm": 0.43187960982322693, "learning_rate": 7.486044922675922e-06, "loss": 0.0224, "step": 99100 }, { "epoch": 0.8019257221458047, "grad_norm": 0.35797053575515747, "learning_rate": 7.4854322668358744e-06, "loss": 0.0306, "step": 99110 }, { "epoch": 0.8020066348410065, "grad_norm": 0.5697306394577026, "learning_rate": 7.484819561429126e-06, "loss": 0.0413, "step": 99120 }, { "epoch": 0.8020875475362085, "grad_norm": 0.5960054993629456, "learning_rate": 7.484206806467898e-06, "loss": 0.0292, "step": 99130 }, { "epoch": 0.8021684602314103, "grad_norm": 0.612224817276001, "learning_rate": 7.483594001964412e-06, "loss": 0.0231, "step": 99140 }, { "epoch": 0.8022493729266121, "grad_norm": 0.6109776496887207, "learning_rate": 7.482981147930882e-06, "loss": 0.0385, "step": 99150 }, { "epoch": 0.8023302856218141, "grad_norm": 0.8751876950263977, "learning_rate": 7.482368244379538e-06, "loss": 0.0279, "step": 99160 }, { "epoch": 0.8024111983170159, "grad_norm": 0.515859842300415, "learning_rate": 7.481755291322599e-06, "loss": 0.0203, "step": 99170 }, { "epoch": 0.8024921110122178, "grad_norm": 0.8430860042572021, "learning_rate": 7.481142288772289e-06, "loss": 0.0435, "step": 99180 }, { "epoch": 0.8025730237074197, "grad_norm": 0.3646980822086334, "learning_rate": 7.480529236740836e-06, "loss": 0.026, "step": 99190 }, { "epoch": 0.8026539364026216, "grad_norm": 0.4347916543483734, "learning_rate": 7.4799161352404616e-06, "loss": 0.0302, "step": 99200 }, { "epoch": 0.8027348490978234, "grad_norm": 0.26759034395217896, "learning_rate": 7.479302984283397e-06, "loss": 0.0297, "step": 99210 }, { "epoch": 0.8028157617930253, "grad_norm": 0.3099389374256134, "learning_rate": 7.47868978388187e-06, "loss": 0.0179, "step": 99220 }, { "epoch": 0.8028966744882272, "grad_norm": 0.672907829284668, "learning_rate": 7.478076534048104e-06, "loss": 0.042, "step": 99230 }, { "epoch": 0.802977587183429, "grad_norm": 0.4494985044002533, "learning_rate": 7.477463234794335e-06, "loss": 0.0271, "step": 99240 }, { "epoch": 0.803058499878631, "grad_norm": 0.2940426170825958, "learning_rate": 7.476849886132792e-06, "loss": 0.0308, "step": 99250 }, { "epoch": 0.8031394125738328, "grad_norm": 0.3208427429199219, "learning_rate": 7.476236488075707e-06, "loss": 0.0344, "step": 99260 }, { "epoch": 0.8032203252690348, "grad_norm": 0.32531943917274475, "learning_rate": 7.475623040635313e-06, "loss": 0.0244, "step": 99270 }, { "epoch": 0.8033012379642366, "grad_norm": 0.34851086139678955, "learning_rate": 7.475009543823844e-06, "loss": 0.0262, "step": 99280 }, { "epoch": 0.8033821506594385, "grad_norm": 0.23353585600852966, "learning_rate": 7.474395997653533e-06, "loss": 0.0249, "step": 99290 }, { "epoch": 0.8034630633546403, "grad_norm": 0.6216422319412231, "learning_rate": 7.473782402136618e-06, "loss": 0.0375, "step": 99300 }, { "epoch": 0.8035439760498422, "grad_norm": 0.9948941469192505, "learning_rate": 7.473168757285335e-06, "loss": 0.0388, "step": 99310 }, { "epoch": 0.8036248887450441, "grad_norm": 0.30376890301704407, "learning_rate": 7.472555063111924e-06, "loss": 0.0312, "step": 99320 }, { "epoch": 0.803705801440246, "grad_norm": 0.31059587001800537, "learning_rate": 7.471941319628621e-06, "loss": 0.0297, "step": 99330 }, { "epoch": 0.8037867141354479, "grad_norm": 0.6144154071807861, "learning_rate": 7.4713275268476674e-06, "loss": 0.0318, "step": 99340 }, { "epoch": 0.8038676268306497, "grad_norm": 0.46363088488578796, "learning_rate": 7.470713684781302e-06, "loss": 0.0289, "step": 99350 }, { "epoch": 0.8039485395258517, "grad_norm": 0.5118658542633057, "learning_rate": 7.4700997934417694e-06, "loss": 0.0226, "step": 99360 }, { "epoch": 0.8040294522210535, "grad_norm": 0.0887637734413147, "learning_rate": 7.4694858528413105e-06, "loss": 0.0164, "step": 99370 }, { "epoch": 0.8041103649162553, "grad_norm": 0.5492674112319946, "learning_rate": 7.468871862992171e-06, "loss": 0.0327, "step": 99380 }, { "epoch": 0.8041912776114573, "grad_norm": 0.6282122135162354, "learning_rate": 7.468257823906591e-06, "loss": 0.0351, "step": 99390 }, { "epoch": 0.8042721903066591, "grad_norm": 0.07057781517505646, "learning_rate": 7.467643735596823e-06, "loss": 0.0238, "step": 99400 }, { "epoch": 0.804353103001861, "grad_norm": 0.2909427285194397, "learning_rate": 7.467029598075109e-06, "loss": 0.0285, "step": 99410 }, { "epoch": 0.8044340156970629, "grad_norm": 0.6220176219940186, "learning_rate": 7.466415411353696e-06, "loss": 0.0346, "step": 99420 }, { "epoch": 0.8045149283922648, "grad_norm": 0.4715431332588196, "learning_rate": 7.465801175444836e-06, "loss": 0.0299, "step": 99430 }, { "epoch": 0.8045958410874666, "grad_norm": 0.6254429817199707, "learning_rate": 7.465186890360777e-06, "loss": 0.0376, "step": 99440 }, { "epoch": 0.8046767537826685, "grad_norm": 0.4141729772090912, "learning_rate": 7.46457255611377e-06, "loss": 0.0313, "step": 99450 }, { "epoch": 0.8047576664778704, "grad_norm": 0.5346055626869202, "learning_rate": 7.463958172716066e-06, "loss": 0.0228, "step": 99460 }, { "epoch": 0.8048385791730722, "grad_norm": 0.4774377644062042, "learning_rate": 7.463343740179918e-06, "loss": 0.0161, "step": 99470 }, { "epoch": 0.8049194918682742, "grad_norm": 0.23787665367126465, "learning_rate": 7.462729258517579e-06, "loss": 0.0334, "step": 99480 }, { "epoch": 0.805000404563476, "grad_norm": 0.5534453988075256, "learning_rate": 7.462114727741305e-06, "loss": 0.0302, "step": 99490 }, { "epoch": 0.8050813172586779, "grad_norm": 0.6369438171386719, "learning_rate": 7.461500147863349e-06, "loss": 0.0447, "step": 99500 }, { "epoch": 0.8051622299538798, "grad_norm": 0.2135586142539978, "learning_rate": 7.4608855188959705e-06, "loss": 0.0193, "step": 99510 }, { "epoch": 0.8052431426490816, "grad_norm": 0.36267417669296265, "learning_rate": 7.460270840851424e-06, "loss": 0.015, "step": 99520 }, { "epoch": 0.8053240553442835, "grad_norm": 0.5888487696647644, "learning_rate": 7.45965611374197e-06, "loss": 0.0269, "step": 99530 }, { "epoch": 0.8054049680394854, "grad_norm": 0.4645904302597046, "learning_rate": 7.459041337579867e-06, "loss": 0.0322, "step": 99540 }, { "epoch": 0.8054858807346873, "grad_norm": 0.3705228269100189, "learning_rate": 7.458426512377377e-06, "loss": 0.041, "step": 99550 }, { "epoch": 0.8055667934298891, "grad_norm": 0.66335129737854, "learning_rate": 7.45781163814676e-06, "loss": 0.0475, "step": 99560 }, { "epoch": 0.8056477061250911, "grad_norm": 0.5503975749015808, "learning_rate": 7.457196714900279e-06, "loss": 0.0331, "step": 99570 }, { "epoch": 0.8057286188202929, "grad_norm": 0.2848638892173767, "learning_rate": 7.456581742650195e-06, "loss": 0.0217, "step": 99580 }, { "epoch": 0.8058095315154947, "grad_norm": 0.22245602309703827, "learning_rate": 7.455966721408776e-06, "loss": 0.0327, "step": 99590 }, { "epoch": 0.8058904442106967, "grad_norm": 0.5475102663040161, "learning_rate": 7.455351651188285e-06, "loss": 0.0261, "step": 99600 }, { "epoch": 0.8059713569058985, "grad_norm": 0.04635236784815788, "learning_rate": 7.454736532000987e-06, "loss": 0.0235, "step": 99610 }, { "epoch": 0.8060522696011004, "grad_norm": 0.2255283147096634, "learning_rate": 7.454121363859154e-06, "loss": 0.0273, "step": 99620 }, { "epoch": 0.8061331822963023, "grad_norm": 0.38625872135162354, "learning_rate": 7.4535061467750495e-06, "loss": 0.0217, "step": 99630 }, { "epoch": 0.8062140949915042, "grad_norm": 0.5866373181343079, "learning_rate": 7.452890880760945e-06, "loss": 0.0234, "step": 99640 }, { "epoch": 0.806295007686706, "grad_norm": 0.44784387946128845, "learning_rate": 7.452275565829109e-06, "loss": 0.0306, "step": 99650 }, { "epoch": 0.806375920381908, "grad_norm": 0.4070758819580078, "learning_rate": 7.451660201991815e-06, "loss": 0.0281, "step": 99660 }, { "epoch": 0.8064568330771098, "grad_norm": 0.5683966279029846, "learning_rate": 7.451044789261334e-06, "loss": 0.0341, "step": 99670 }, { "epoch": 0.8065377457723116, "grad_norm": 0.12173178791999817, "learning_rate": 7.4504293276499394e-06, "loss": 0.0204, "step": 99680 }, { "epoch": 0.8066186584675136, "grad_norm": 0.8133991360664368, "learning_rate": 7.4498138171699045e-06, "loss": 0.035, "step": 99690 }, { "epoch": 0.8066995711627154, "grad_norm": 0.4980933666229248, "learning_rate": 7.449198257833505e-06, "loss": 0.0255, "step": 99700 }, { "epoch": 0.8067804838579173, "grad_norm": 0.2474455088376999, "learning_rate": 7.448582649653015e-06, "loss": 0.0232, "step": 99710 }, { "epoch": 0.8068613965531192, "grad_norm": 0.30800893902778625, "learning_rate": 7.4479669926407146e-06, "loss": 0.0279, "step": 99720 }, { "epoch": 0.8069423092483211, "grad_norm": 0.1064971387386322, "learning_rate": 7.4473512868088815e-06, "loss": 0.0472, "step": 99730 }, { "epoch": 0.8070232219435229, "grad_norm": 0.48489251732826233, "learning_rate": 7.446735532169792e-06, "loss": 0.0426, "step": 99740 }, { "epoch": 0.8071041346387248, "grad_norm": 0.5122143030166626, "learning_rate": 7.446119728735728e-06, "loss": 0.0285, "step": 99750 }, { "epoch": 0.8071850473339267, "grad_norm": 0.7232096791267395, "learning_rate": 7.4455038765189705e-06, "loss": 0.0204, "step": 99760 }, { "epoch": 0.8072659600291285, "grad_norm": 0.4571722149848938, "learning_rate": 7.4448879755318e-06, "loss": 0.0348, "step": 99770 }, { "epoch": 0.8073468727243305, "grad_norm": 0.40134745836257935, "learning_rate": 7.444272025786501e-06, "loss": 0.0323, "step": 99780 }, { "epoch": 0.8074277854195323, "grad_norm": 0.2687808573246002, "learning_rate": 7.443656027295356e-06, "loss": 0.0192, "step": 99790 }, { "epoch": 0.8075086981147342, "grad_norm": 0.42127594351768494, "learning_rate": 7.443039980070652e-06, "loss": 0.0297, "step": 99800 }, { "epoch": 0.8075896108099361, "grad_norm": 0.7062822580337524, "learning_rate": 7.442423884124671e-06, "loss": 0.0305, "step": 99810 }, { "epoch": 0.8076705235051379, "grad_norm": 0.22722561657428741, "learning_rate": 7.441807739469702e-06, "loss": 0.0334, "step": 99820 }, { "epoch": 0.8077514362003398, "grad_norm": 0.3083276152610779, "learning_rate": 7.441191546118033e-06, "loss": 0.0248, "step": 99830 }, { "epoch": 0.8078323488955417, "grad_norm": 0.32017141580581665, "learning_rate": 7.440575304081953e-06, "loss": 0.0468, "step": 99840 }, { "epoch": 0.8079132615907436, "grad_norm": 0.23021799325942993, "learning_rate": 7.43995901337375e-06, "loss": 0.022, "step": 99850 }, { "epoch": 0.8079941742859454, "grad_norm": 0.36527684330940247, "learning_rate": 7.4393426740057154e-06, "loss": 0.0358, "step": 99860 }, { "epoch": 0.8080750869811474, "grad_norm": 0.12759853899478912, "learning_rate": 7.438726285990141e-06, "loss": 0.023, "step": 99870 }, { "epoch": 0.8081559996763492, "grad_norm": 0.8678298592567444, "learning_rate": 7.438109849339319e-06, "loss": 0.0395, "step": 99880 }, { "epoch": 0.808236912371551, "grad_norm": 0.759488582611084, "learning_rate": 7.437493364065543e-06, "loss": 0.0348, "step": 99890 }, { "epoch": 0.808317825066753, "grad_norm": 0.6569472551345825, "learning_rate": 7.436876830181108e-06, "loss": 0.0284, "step": 99900 }, { "epoch": 0.8083987377619548, "grad_norm": 0.011566641740500927, "learning_rate": 7.436260247698308e-06, "loss": 0.0269, "step": 99910 }, { "epoch": 0.8084796504571568, "grad_norm": 0.4958949089050293, "learning_rate": 7.435643616629443e-06, "loss": 0.0257, "step": 99920 }, { "epoch": 0.8085605631523586, "grad_norm": 0.14265340566635132, "learning_rate": 7.435026936986806e-06, "loss": 0.0322, "step": 99930 }, { "epoch": 0.8086414758475605, "grad_norm": 0.4317944347858429, "learning_rate": 7.434410208782698e-06, "loss": 0.0302, "step": 99940 }, { "epoch": 0.8087223885427623, "grad_norm": 0.37248584628105164, "learning_rate": 7.433793432029417e-06, "loss": 0.0428, "step": 99950 }, { "epoch": 0.8088033012379643, "grad_norm": 0.345824658870697, "learning_rate": 7.433176606739265e-06, "loss": 0.0303, "step": 99960 }, { "epoch": 0.8088842139331661, "grad_norm": 0.40510374307632446, "learning_rate": 7.432559732924542e-06, "loss": 0.0231, "step": 99970 }, { "epoch": 0.808965126628368, "grad_norm": 0.7126820087432861, "learning_rate": 7.431942810597549e-06, "loss": 0.0409, "step": 99980 }, { "epoch": 0.8090460393235699, "grad_norm": 0.8253034949302673, "learning_rate": 7.431325839770593e-06, "loss": 0.0313, "step": 99990 }, { "epoch": 0.8091269520187717, "grad_norm": 0.44505760073661804, "learning_rate": 7.430708820455975e-06, "loss": 0.0432, "step": 100000 }, { "epoch": 0.8092078647139737, "grad_norm": 0.26098665595054626, "learning_rate": 7.430091752666e-06, "loss": 0.017, "step": 100010 }, { "epoch": 0.8092887774091755, "grad_norm": 0.38514986634254456, "learning_rate": 7.429474636412976e-06, "loss": 0.0276, "step": 100020 }, { "epoch": 0.8093696901043774, "grad_norm": 0.628227174282074, "learning_rate": 7.42885747170921e-06, "loss": 0.0367, "step": 100030 }, { "epoch": 0.8094506027995793, "grad_norm": 0.5201029777526855, "learning_rate": 7.428240258567008e-06, "loss": 0.0279, "step": 100040 }, { "epoch": 0.8095315154947811, "grad_norm": 0.29165124893188477, "learning_rate": 7.4276229969986805e-06, "loss": 0.02, "step": 100050 }, { "epoch": 0.809612428189983, "grad_norm": 0.19748768210411072, "learning_rate": 7.427005687016538e-06, "loss": 0.0283, "step": 100060 }, { "epoch": 0.8096933408851849, "grad_norm": 0.23263508081436157, "learning_rate": 7.42638832863289e-06, "loss": 0.0267, "step": 100070 }, { "epoch": 0.8097742535803868, "grad_norm": 0.1632208675146103, "learning_rate": 7.425770921860049e-06, "loss": 0.0316, "step": 100080 }, { "epoch": 0.8098551662755886, "grad_norm": 0.43805989623069763, "learning_rate": 7.425153466710328e-06, "loss": 0.0349, "step": 100090 }, { "epoch": 0.8099360789707906, "grad_norm": 0.20277462899684906, "learning_rate": 7.424535963196042e-06, "loss": 0.0163, "step": 100100 }, { "epoch": 0.8100169916659924, "grad_norm": 0.30014491081237793, "learning_rate": 7.423918411329504e-06, "loss": 0.022, "step": 100110 }, { "epoch": 0.8100979043611942, "grad_norm": 0.5852169394493103, "learning_rate": 7.423300811123028e-06, "loss": 0.0415, "step": 100120 }, { "epoch": 0.8101788170563962, "grad_norm": 0.23934577405452728, "learning_rate": 7.422683162588936e-06, "loss": 0.0455, "step": 100130 }, { "epoch": 0.810259729751598, "grad_norm": 0.7344825863838196, "learning_rate": 7.422065465739543e-06, "loss": 0.0349, "step": 100140 }, { "epoch": 0.8103406424467999, "grad_norm": 0.3236447274684906, "learning_rate": 7.421447720587166e-06, "loss": 0.0341, "step": 100150 }, { "epoch": 0.8104215551420018, "grad_norm": 0.4318086504936218, "learning_rate": 7.420829927144128e-06, "loss": 0.0308, "step": 100160 }, { "epoch": 0.8105024678372037, "grad_norm": 0.311743825674057, "learning_rate": 7.420212085422746e-06, "loss": 0.0384, "step": 100170 }, { "epoch": 0.8105833805324055, "grad_norm": 0.09483704715967178, "learning_rate": 7.419594195435343e-06, "loss": 0.0213, "step": 100180 }, { "epoch": 0.8106642932276074, "grad_norm": 0.22503307461738586, "learning_rate": 7.4189762571942435e-06, "loss": 0.0164, "step": 100190 }, { "epoch": 0.8107452059228093, "grad_norm": 0.28177136182785034, "learning_rate": 7.418358270711767e-06, "loss": 0.0245, "step": 100200 }, { "epoch": 0.8108261186180111, "grad_norm": 0.6681495308876038, "learning_rate": 7.417740236000242e-06, "loss": 0.0362, "step": 100210 }, { "epoch": 0.8109070313132131, "grad_norm": 0.524234414100647, "learning_rate": 7.417122153071992e-06, "loss": 0.0373, "step": 100220 }, { "epoch": 0.8109879440084149, "grad_norm": 0.3424968719482422, "learning_rate": 7.416504021939341e-06, "loss": 0.0274, "step": 100230 }, { "epoch": 0.8110688567036168, "grad_norm": 0.3431905210018158, "learning_rate": 7.415885842614622e-06, "loss": 0.0316, "step": 100240 }, { "epoch": 0.8111497693988187, "grad_norm": 0.17897635698318481, "learning_rate": 7.415267615110157e-06, "loss": 0.0231, "step": 100250 }, { "epoch": 0.8112306820940206, "grad_norm": 0.23531097173690796, "learning_rate": 7.414649339438282e-06, "loss": 0.0301, "step": 100260 }, { "epoch": 0.8113115947892224, "grad_norm": 0.38714033365249634, "learning_rate": 7.414031015611321e-06, "loss": 0.0333, "step": 100270 }, { "epoch": 0.8113925074844243, "grad_norm": 0.34760403633117676, "learning_rate": 7.413412643641607e-06, "loss": 0.0281, "step": 100280 }, { "epoch": 0.8114734201796262, "grad_norm": 0.4473830759525299, "learning_rate": 7.4127942235414754e-06, "loss": 0.0342, "step": 100290 }, { "epoch": 0.811554332874828, "grad_norm": 0.40173739194869995, "learning_rate": 7.412175755323254e-06, "loss": 0.0217, "step": 100300 }, { "epoch": 0.81163524557003, "grad_norm": 0.5717974305152893, "learning_rate": 7.411557238999281e-06, "loss": 0.0363, "step": 100310 }, { "epoch": 0.8117161582652318, "grad_norm": 0.45299994945526123, "learning_rate": 7.41093867458189e-06, "loss": 0.0327, "step": 100320 }, { "epoch": 0.8117970709604337, "grad_norm": 0.27875858545303345, "learning_rate": 7.410320062083416e-06, "loss": 0.0189, "step": 100330 }, { "epoch": 0.8118779836556356, "grad_norm": 0.2958775758743286, "learning_rate": 7.409701401516196e-06, "loss": 0.0251, "step": 100340 }, { "epoch": 0.8119588963508374, "grad_norm": 0.358877956867218, "learning_rate": 7.40908269289257e-06, "loss": 0.035, "step": 100350 }, { "epoch": 0.8120398090460393, "grad_norm": 0.48573046922683716, "learning_rate": 7.408463936224875e-06, "loss": 0.0319, "step": 100360 }, { "epoch": 0.8121207217412412, "grad_norm": 0.22757093608379364, "learning_rate": 7.407845131525452e-06, "loss": 0.0329, "step": 100370 }, { "epoch": 0.8122016344364431, "grad_norm": 0.4465605914592743, "learning_rate": 7.40722627880664e-06, "loss": 0.0238, "step": 100380 }, { "epoch": 0.8122825471316449, "grad_norm": 0.34183189272880554, "learning_rate": 7.406607378080781e-06, "loss": 0.0262, "step": 100390 }, { "epoch": 0.8123634598268469, "grad_norm": 0.3579387366771698, "learning_rate": 7.405988429360221e-06, "loss": 0.0276, "step": 100400 }, { "epoch": 0.8124443725220487, "grad_norm": 0.6256296038627625, "learning_rate": 7.4053694326573e-06, "loss": 0.0381, "step": 100410 }, { "epoch": 0.8125252852172505, "grad_norm": 0.5499087572097778, "learning_rate": 7.404750387984363e-06, "loss": 0.0317, "step": 100420 }, { "epoch": 0.8126061979124525, "grad_norm": 0.5260722041130066, "learning_rate": 7.404131295353756e-06, "loss": 0.0383, "step": 100430 }, { "epoch": 0.8126871106076543, "grad_norm": 0.5748369097709656, "learning_rate": 7.403512154777826e-06, "loss": 0.023, "step": 100440 }, { "epoch": 0.8127680233028562, "grad_norm": 0.4027036130428314, "learning_rate": 7.402892966268921e-06, "loss": 0.0335, "step": 100450 }, { "epoch": 0.8128489359980581, "grad_norm": 0.4145587384700775, "learning_rate": 7.402273729839388e-06, "loss": 0.0241, "step": 100460 }, { "epoch": 0.81292984869326, "grad_norm": 0.46314457058906555, "learning_rate": 7.401654445501577e-06, "loss": 0.0389, "step": 100470 }, { "epoch": 0.8130107613884618, "grad_norm": 0.6651785373687744, "learning_rate": 7.401035113267838e-06, "loss": 0.0332, "step": 100480 }, { "epoch": 0.8130916740836637, "grad_norm": 0.6574079394340515, "learning_rate": 7.4004157331505225e-06, "loss": 0.0302, "step": 100490 }, { "epoch": 0.8131725867788656, "grad_norm": 0.4169841408729553, "learning_rate": 7.399796305161982e-06, "loss": 0.0232, "step": 100500 }, { "epoch": 0.8132534994740674, "grad_norm": 0.2623451352119446, "learning_rate": 7.399176829314572e-06, "loss": 0.0232, "step": 100510 }, { "epoch": 0.8133344121692694, "grad_norm": 0.7300513386726379, "learning_rate": 7.3985573056206424e-06, "loss": 0.0324, "step": 100520 }, { "epoch": 0.8134153248644712, "grad_norm": 0.8054159283638, "learning_rate": 7.3979377340925526e-06, "loss": 0.0285, "step": 100530 }, { "epoch": 0.8134962375596732, "grad_norm": 0.3251931667327881, "learning_rate": 7.397318114742658e-06, "loss": 0.0236, "step": 100540 }, { "epoch": 0.813577150254875, "grad_norm": 0.3776753544807434, "learning_rate": 7.3966984475833124e-06, "loss": 0.0282, "step": 100550 }, { "epoch": 0.8136580629500769, "grad_norm": 0.25698065757751465, "learning_rate": 7.396078732626877e-06, "loss": 0.0309, "step": 100560 }, { "epoch": 0.8137389756452788, "grad_norm": 0.5530312061309814, "learning_rate": 7.39545896988571e-06, "loss": 0.0303, "step": 100570 }, { "epoch": 0.8138198883404806, "grad_norm": 0.3232353925704956, "learning_rate": 7.39483915937217e-06, "loss": 0.0263, "step": 100580 }, { "epoch": 0.8139008010356825, "grad_norm": 0.5801548957824707, "learning_rate": 7.39421930109862e-06, "loss": 0.04, "step": 100590 }, { "epoch": 0.8139817137308843, "grad_norm": 0.21491597592830658, "learning_rate": 7.39359939507742e-06, "loss": 0.0226, "step": 100600 }, { "epoch": 0.8140626264260863, "grad_norm": 0.47025489807128906, "learning_rate": 7.392979441320931e-06, "loss": 0.0267, "step": 100610 }, { "epoch": 0.8141435391212881, "grad_norm": 0.4557698965072632, "learning_rate": 7.392359439841522e-06, "loss": 0.022, "step": 100620 }, { "epoch": 0.8142244518164901, "grad_norm": 0.42256784439086914, "learning_rate": 7.391739390651552e-06, "loss": 0.0651, "step": 100630 }, { "epoch": 0.8143053645116919, "grad_norm": 0.6526263356208801, "learning_rate": 7.39111929376339e-06, "loss": 0.0325, "step": 100640 }, { "epoch": 0.8143862772068937, "grad_norm": 0.39832526445388794, "learning_rate": 7.390499149189403e-06, "loss": 0.0316, "step": 100650 }, { "epoch": 0.8144671899020957, "grad_norm": 0.5542557239532471, "learning_rate": 7.3898789569419545e-06, "loss": 0.0319, "step": 100660 }, { "epoch": 0.8145481025972975, "grad_norm": 0.4172927439212799, "learning_rate": 7.389258717033418e-06, "loss": 0.0191, "step": 100670 }, { "epoch": 0.8146290152924994, "grad_norm": 0.3376327157020569, "learning_rate": 7.388638429476158e-06, "loss": 0.0293, "step": 100680 }, { "epoch": 0.8147099279877013, "grad_norm": 0.3896227478981018, "learning_rate": 7.388018094282548e-06, "loss": 0.0386, "step": 100690 }, { "epoch": 0.8147908406829032, "grad_norm": 0.3417482078075409, "learning_rate": 7.38739771146496e-06, "loss": 0.0404, "step": 100700 }, { "epoch": 0.814871753378105, "grad_norm": 0.32117965817451477, "learning_rate": 7.386777281035762e-06, "loss": 0.0265, "step": 100710 }, { "epoch": 0.8149526660733069, "grad_norm": 0.7569177746772766, "learning_rate": 7.386156803007332e-06, "loss": 0.0259, "step": 100720 }, { "epoch": 0.8150335787685088, "grad_norm": 0.3396790623664856, "learning_rate": 7.385536277392042e-06, "loss": 0.0295, "step": 100730 }, { "epoch": 0.8151144914637106, "grad_norm": 0.16452361643314362, "learning_rate": 7.384915704202264e-06, "loss": 0.0272, "step": 100740 }, { "epoch": 0.8151954041589126, "grad_norm": 0.3293360769748688, "learning_rate": 7.38429508345038e-06, "loss": 0.0377, "step": 100750 }, { "epoch": 0.8152763168541144, "grad_norm": 0.19314485788345337, "learning_rate": 7.383674415148763e-06, "loss": 0.0372, "step": 100760 }, { "epoch": 0.8153572295493163, "grad_norm": 0.5825406312942505, "learning_rate": 7.3830536993097925e-06, "loss": 0.0299, "step": 100770 }, { "epoch": 0.8154381422445182, "grad_norm": 0.5146988034248352, "learning_rate": 7.3824329359458465e-06, "loss": 0.0338, "step": 100780 }, { "epoch": 0.81551905493972, "grad_norm": 0.572759211063385, "learning_rate": 7.381812125069304e-06, "loss": 0.0382, "step": 100790 }, { "epoch": 0.8155999676349219, "grad_norm": 0.19536568224430084, "learning_rate": 7.38119126669255e-06, "loss": 0.0342, "step": 100800 }, { "epoch": 0.8156808803301238, "grad_norm": 0.4910832643508911, "learning_rate": 7.380570360827961e-06, "loss": 0.0228, "step": 100810 }, { "epoch": 0.8157617930253257, "grad_norm": 0.27311810851097107, "learning_rate": 7.379949407487921e-06, "loss": 0.0276, "step": 100820 }, { "epoch": 0.8158427057205275, "grad_norm": 0.5372477173805237, "learning_rate": 7.379328406684816e-06, "loss": 0.0273, "step": 100830 }, { "epoch": 0.8159236184157295, "grad_norm": 0.3801361918449402, "learning_rate": 7.378707358431028e-06, "loss": 0.0282, "step": 100840 }, { "epoch": 0.8160045311109313, "grad_norm": 0.27385225892066956, "learning_rate": 7.378086262738944e-06, "loss": 0.0149, "step": 100850 }, { "epoch": 0.8160854438061332, "grad_norm": 0.28671565651893616, "learning_rate": 7.377465119620949e-06, "loss": 0.0259, "step": 100860 }, { "epoch": 0.8161663565013351, "grad_norm": 0.5530810356140137, "learning_rate": 7.376843929089431e-06, "loss": 0.0245, "step": 100870 }, { "epoch": 0.8162472691965369, "grad_norm": 0.38659343123435974, "learning_rate": 7.37622269115678e-06, "loss": 0.0313, "step": 100880 }, { "epoch": 0.8163281818917388, "grad_norm": 0.636850118637085, "learning_rate": 7.375601405835383e-06, "loss": 0.0176, "step": 100890 }, { "epoch": 0.8164090945869407, "grad_norm": 0.5979388356208801, "learning_rate": 7.37498007313763e-06, "loss": 0.0385, "step": 100900 }, { "epoch": 0.8164900072821426, "grad_norm": 0.4226751923561096, "learning_rate": 7.3743586930759145e-06, "loss": 0.0171, "step": 100910 }, { "epoch": 0.8165709199773444, "grad_norm": 0.4343307912349701, "learning_rate": 7.373737265662628e-06, "loss": 0.0321, "step": 100920 }, { "epoch": 0.8166518326725464, "grad_norm": 0.35736194252967834, "learning_rate": 7.373115790910161e-06, "loss": 0.0256, "step": 100930 }, { "epoch": 0.8167327453677482, "grad_norm": 0.33065855503082275, "learning_rate": 7.372494268830912e-06, "loss": 0.0342, "step": 100940 }, { "epoch": 0.81681365806295, "grad_norm": 0.7852398753166199, "learning_rate": 7.371872699437272e-06, "loss": 0.0384, "step": 100950 }, { "epoch": 0.816894570758152, "grad_norm": 0.48036709427833557, "learning_rate": 7.371251082741639e-06, "loss": 0.0281, "step": 100960 }, { "epoch": 0.8169754834533538, "grad_norm": 0.6500069499015808, "learning_rate": 7.370629418756407e-06, "loss": 0.0324, "step": 100970 }, { "epoch": 0.8170563961485557, "grad_norm": 0.39428865909576416, "learning_rate": 7.370007707493977e-06, "loss": 0.0288, "step": 100980 }, { "epoch": 0.8171373088437576, "grad_norm": 0.36163192987442017, "learning_rate": 7.369385948966747e-06, "loss": 0.0278, "step": 100990 }, { "epoch": 0.8172182215389595, "grad_norm": 0.4347050189971924, "learning_rate": 7.368764143187117e-06, "loss": 0.0353, "step": 101000 }, { "epoch": 0.8172991342341613, "grad_norm": 0.253909170627594, "learning_rate": 7.368142290167485e-06, "loss": 0.0223, "step": 101010 }, { "epoch": 0.8173800469293632, "grad_norm": 0.4808991253376007, "learning_rate": 7.367520389920255e-06, "loss": 0.0367, "step": 101020 }, { "epoch": 0.8174609596245651, "grad_norm": 0.34398913383483887, "learning_rate": 7.36689844245783e-06, "loss": 0.0359, "step": 101030 }, { "epoch": 0.8175418723197669, "grad_norm": 0.8477786779403687, "learning_rate": 7.366276447792611e-06, "loss": 0.0394, "step": 101040 }, { "epoch": 0.8176227850149689, "grad_norm": 0.18865130841732025, "learning_rate": 7.365654405937004e-06, "loss": 0.0238, "step": 101050 }, { "epoch": 0.8177036977101707, "grad_norm": 0.5995416045188904, "learning_rate": 7.3650323169034145e-06, "loss": 0.0247, "step": 101060 }, { "epoch": 0.8177846104053726, "grad_norm": 0.29210710525512695, "learning_rate": 7.364410180704248e-06, "loss": 0.0277, "step": 101070 }, { "epoch": 0.8178655231005745, "grad_norm": 0.49886223673820496, "learning_rate": 7.363787997351912e-06, "loss": 0.0394, "step": 101080 }, { "epoch": 0.8179464357957763, "grad_norm": 0.5436668992042542, "learning_rate": 7.363165766858813e-06, "loss": 0.024, "step": 101090 }, { "epoch": 0.8180273484909782, "grad_norm": 0.5744194388389587, "learning_rate": 7.362543489237365e-06, "loss": 0.0413, "step": 101100 }, { "epoch": 0.8181082611861801, "grad_norm": 0.4624500870704651, "learning_rate": 7.361921164499972e-06, "loss": 0.034, "step": 101110 }, { "epoch": 0.818189173881382, "grad_norm": 0.7010859251022339, "learning_rate": 7.3612987926590475e-06, "loss": 0.0391, "step": 101120 }, { "epoch": 0.8182700865765838, "grad_norm": 0.26180610060691833, "learning_rate": 7.360676373727005e-06, "loss": 0.0212, "step": 101130 }, { "epoch": 0.8183509992717858, "grad_norm": 0.2823032736778259, "learning_rate": 7.360053907716256e-06, "loss": 0.0274, "step": 101140 }, { "epoch": 0.8184319119669876, "grad_norm": 0.779371440410614, "learning_rate": 7.359431394639211e-06, "loss": 0.0353, "step": 101150 }, { "epoch": 0.8185128246621894, "grad_norm": 0.48088881373405457, "learning_rate": 7.358808834508291e-06, "loss": 0.0193, "step": 101160 }, { "epoch": 0.8185937373573914, "grad_norm": 0.5854122042655945, "learning_rate": 7.358186227335908e-06, "loss": 0.0219, "step": 101170 }, { "epoch": 0.8186746500525932, "grad_norm": 0.6788217425346375, "learning_rate": 7.357563573134479e-06, "loss": 0.0247, "step": 101180 }, { "epoch": 0.8187555627477952, "grad_norm": 0.585076093673706, "learning_rate": 7.3569408719164206e-06, "loss": 0.0286, "step": 101190 }, { "epoch": 0.818836475442997, "grad_norm": 0.5441122055053711, "learning_rate": 7.356318123694152e-06, "loss": 0.0209, "step": 101200 }, { "epoch": 0.8189173881381989, "grad_norm": 0.9419203996658325, "learning_rate": 7.355695328480095e-06, "loss": 0.0273, "step": 101210 }, { "epoch": 0.8189983008334007, "grad_norm": 0.3585087060928345, "learning_rate": 7.355072486286666e-06, "loss": 0.0139, "step": 101220 }, { "epoch": 0.8190792135286027, "grad_norm": 0.299625039100647, "learning_rate": 7.354449597126287e-06, "loss": 0.0234, "step": 101230 }, { "epoch": 0.8191601262238045, "grad_norm": 0.24321289360523224, "learning_rate": 7.353826661011383e-06, "loss": 0.0254, "step": 101240 }, { "epoch": 0.8192410389190063, "grad_norm": 0.49576324224472046, "learning_rate": 7.353203677954376e-06, "loss": 0.0241, "step": 101250 }, { "epoch": 0.8193219516142083, "grad_norm": 0.49964389204978943, "learning_rate": 7.352580647967689e-06, "loss": 0.029, "step": 101260 }, { "epoch": 0.8194028643094101, "grad_norm": 0.39539408683776855, "learning_rate": 7.351957571063746e-06, "loss": 0.0317, "step": 101270 }, { "epoch": 0.8194837770046121, "grad_norm": 0.5222379565238953, "learning_rate": 7.351334447254977e-06, "loss": 0.0385, "step": 101280 }, { "epoch": 0.8195646896998139, "grad_norm": 0.23089581727981567, "learning_rate": 7.350711276553803e-06, "loss": 0.0267, "step": 101290 }, { "epoch": 0.8196456023950158, "grad_norm": 0.7251933217048645, "learning_rate": 7.350088058972658e-06, "loss": 0.0258, "step": 101300 }, { "epoch": 0.8197265150902177, "grad_norm": 0.4464932978153229, "learning_rate": 7.349464794523966e-06, "loss": 0.0311, "step": 101310 }, { "epoch": 0.8198074277854195, "grad_norm": 0.4360834062099457, "learning_rate": 7.3488414832201605e-06, "loss": 0.0264, "step": 101320 }, { "epoch": 0.8198883404806214, "grad_norm": 0.4096660017967224, "learning_rate": 7.348218125073669e-06, "loss": 0.0234, "step": 101330 }, { "epoch": 0.8199692531758233, "grad_norm": 0.5319542288780212, "learning_rate": 7.3475947200969235e-06, "loss": 0.0371, "step": 101340 }, { "epoch": 0.8200501658710252, "grad_norm": 0.6355952620506287, "learning_rate": 7.346971268302358e-06, "loss": 0.0288, "step": 101350 }, { "epoch": 0.820131078566227, "grad_norm": 0.5766019225120544, "learning_rate": 7.3463477697024066e-06, "loss": 0.0397, "step": 101360 }, { "epoch": 0.820211991261429, "grad_norm": 0.22844885289669037, "learning_rate": 7.3457242243095e-06, "loss": 0.0402, "step": 101370 }, { "epoch": 0.8202929039566308, "grad_norm": 0.5756012797355652, "learning_rate": 7.345100632136078e-06, "loss": 0.0342, "step": 101380 }, { "epoch": 0.8203738166518326, "grad_norm": 0.41547271609306335, "learning_rate": 7.344476993194572e-06, "loss": 0.0223, "step": 101390 }, { "epoch": 0.8204547293470346, "grad_norm": 0.12921157479286194, "learning_rate": 7.343853307497426e-06, "loss": 0.0244, "step": 101400 }, { "epoch": 0.8205356420422364, "grad_norm": 0.34042635560035706, "learning_rate": 7.343229575057071e-06, "loss": 0.0167, "step": 101410 }, { "epoch": 0.8206165547374383, "grad_norm": 0.3738366663455963, "learning_rate": 7.342605795885949e-06, "loss": 0.0209, "step": 101420 }, { "epoch": 0.8206974674326402, "grad_norm": 0.5392149686813354, "learning_rate": 7.341981969996499e-06, "loss": 0.0197, "step": 101430 }, { "epoch": 0.8207783801278421, "grad_norm": 0.4889218509197235, "learning_rate": 7.341358097401165e-06, "loss": 0.0293, "step": 101440 }, { "epoch": 0.8208592928230439, "grad_norm": 0.7095639109611511, "learning_rate": 7.340734178112385e-06, "loss": 0.026, "step": 101450 }, { "epoch": 0.8209402055182458, "grad_norm": 0.6303330063819885, "learning_rate": 7.3401102121426035e-06, "loss": 0.0288, "step": 101460 }, { "epoch": 0.8210211182134477, "grad_norm": 0.8007014989852905, "learning_rate": 7.339486199504264e-06, "loss": 0.0246, "step": 101470 }, { "epoch": 0.8211020309086495, "grad_norm": 1.3449853658676147, "learning_rate": 7.338862140209812e-06, "loss": 0.0325, "step": 101480 }, { "epoch": 0.8211829436038515, "grad_norm": 0.24467724561691284, "learning_rate": 7.338238034271692e-06, "loss": 0.0292, "step": 101490 }, { "epoch": 0.8212638562990533, "grad_norm": 0.4215541481971741, "learning_rate": 7.337613881702349e-06, "loss": 0.0332, "step": 101500 }, { "epoch": 0.8213447689942552, "grad_norm": 0.39191555976867676, "learning_rate": 7.3369896825142336e-06, "loss": 0.0328, "step": 101510 }, { "epoch": 0.8214256816894571, "grad_norm": 0.33189812302589417, "learning_rate": 7.336365436719793e-06, "loss": 0.031, "step": 101520 }, { "epoch": 0.821506594384659, "grad_norm": 0.5337298512458801, "learning_rate": 7.335741144331474e-06, "loss": 0.0266, "step": 101530 }, { "epoch": 0.8215875070798608, "grad_norm": 0.13552241027355194, "learning_rate": 7.335116805361731e-06, "loss": 0.0304, "step": 101540 }, { "epoch": 0.8216684197750627, "grad_norm": 0.6006972193717957, "learning_rate": 7.3344924198230115e-06, "loss": 0.025, "step": 101550 }, { "epoch": 0.8217493324702646, "grad_norm": 0.6101964116096497, "learning_rate": 7.333867987727769e-06, "loss": 0.0258, "step": 101560 }, { "epoch": 0.8218302451654664, "grad_norm": 0.3708588182926178, "learning_rate": 7.333243509088458e-06, "loss": 0.0305, "step": 101570 }, { "epoch": 0.8219111578606684, "grad_norm": 0.43960100412368774, "learning_rate": 7.33261898391753e-06, "loss": 0.034, "step": 101580 }, { "epoch": 0.8219920705558702, "grad_norm": 0.29257479310035706, "learning_rate": 7.331994412227441e-06, "loss": 0.0128, "step": 101590 }, { "epoch": 0.8220729832510721, "grad_norm": 0.2673463821411133, "learning_rate": 7.331369794030645e-06, "loss": 0.0211, "step": 101600 }, { "epoch": 0.822153895946274, "grad_norm": 0.33421581983566284, "learning_rate": 7.330745129339602e-06, "loss": 0.0237, "step": 101610 }, { "epoch": 0.8222348086414758, "grad_norm": 0.5228977799415588, "learning_rate": 7.330120418166769e-06, "loss": 0.0198, "step": 101620 }, { "epoch": 0.8223157213366777, "grad_norm": 0.33915647864341736, "learning_rate": 7.3294956605246e-06, "loss": 0.0253, "step": 101630 }, { "epoch": 0.8223966340318796, "grad_norm": 0.3813247084617615, "learning_rate": 7.32887085642556e-06, "loss": 0.0306, "step": 101640 }, { "epoch": 0.8224775467270815, "grad_norm": 0.2109178900718689, "learning_rate": 7.328246005882107e-06, "loss": 0.0471, "step": 101650 }, { "epoch": 0.8225584594222833, "grad_norm": 0.14153987169265747, "learning_rate": 7.327621108906702e-06, "loss": 0.0352, "step": 101660 }, { "epoch": 0.8226393721174853, "grad_norm": 0.45636987686157227, "learning_rate": 7.3269961655118085e-06, "loss": 0.0305, "step": 101670 }, { "epoch": 0.8227202848126871, "grad_norm": 0.2959943115711212, "learning_rate": 7.326371175709887e-06, "loss": 0.0266, "step": 101680 }, { "epoch": 0.8228011975078889, "grad_norm": 0.672064483165741, "learning_rate": 7.325746139513407e-06, "loss": 0.0239, "step": 101690 }, { "epoch": 0.8228821102030909, "grad_norm": 0.5309780836105347, "learning_rate": 7.325121056934828e-06, "loss": 0.0323, "step": 101700 }, { "epoch": 0.8229630228982927, "grad_norm": 0.45593422651290894, "learning_rate": 7.3244959279866164e-06, "loss": 0.024, "step": 101710 }, { "epoch": 0.8230439355934946, "grad_norm": 0.5053868889808655, "learning_rate": 7.323870752681243e-06, "loss": 0.0232, "step": 101720 }, { "epoch": 0.8231248482886965, "grad_norm": 0.4411580264568329, "learning_rate": 7.323245531031175e-06, "loss": 0.0248, "step": 101730 }, { "epoch": 0.8232057609838984, "grad_norm": 0.09401050955057144, "learning_rate": 7.322620263048877e-06, "loss": 0.0285, "step": 101740 }, { "epoch": 0.8232866736791002, "grad_norm": 0.27214673161506653, "learning_rate": 7.321994948746821e-06, "loss": 0.0322, "step": 101750 }, { "epoch": 0.8233675863743021, "grad_norm": 0.19873201847076416, "learning_rate": 7.321369588137478e-06, "loss": 0.0228, "step": 101760 }, { "epoch": 0.823448499069504, "grad_norm": 0.47014713287353516, "learning_rate": 7.3207441812333204e-06, "loss": 0.0265, "step": 101770 }, { "epoch": 0.8235294117647058, "grad_norm": 0.5028486847877502, "learning_rate": 7.320118728046818e-06, "loss": 0.0325, "step": 101780 }, { "epoch": 0.8236103244599078, "grad_norm": 0.39115896821022034, "learning_rate": 7.319493228590445e-06, "loss": 0.0402, "step": 101790 }, { "epoch": 0.8236912371551096, "grad_norm": 0.08648835122585297, "learning_rate": 7.318867682876679e-06, "loss": 0.0391, "step": 101800 }, { "epoch": 0.8237721498503116, "grad_norm": 0.5344505906105042, "learning_rate": 7.318242090917991e-06, "loss": 0.0252, "step": 101810 }, { "epoch": 0.8238530625455134, "grad_norm": 0.25346559286117554, "learning_rate": 7.317616452726857e-06, "loss": 0.0462, "step": 101820 }, { "epoch": 0.8239339752407153, "grad_norm": 0.635592520236969, "learning_rate": 7.316990768315757e-06, "loss": 0.0293, "step": 101830 }, { "epoch": 0.8240148879359172, "grad_norm": 0.6138765811920166, "learning_rate": 7.316365037697168e-06, "loss": 0.0345, "step": 101840 }, { "epoch": 0.824095800631119, "grad_norm": 0.6423075199127197, "learning_rate": 7.315739260883568e-06, "loss": 0.0242, "step": 101850 }, { "epoch": 0.8241767133263209, "grad_norm": 0.2715951204299927, "learning_rate": 7.3151134378874356e-06, "loss": 0.0184, "step": 101860 }, { "epoch": 0.8242576260215227, "grad_norm": 0.5008171796798706, "learning_rate": 7.314487568721254e-06, "loss": 0.0368, "step": 101870 }, { "epoch": 0.8243385387167247, "grad_norm": 0.397335946559906, "learning_rate": 7.313861653397505e-06, "loss": 0.0333, "step": 101880 }, { "epoch": 0.8244194514119265, "grad_norm": 0.30391207337379456, "learning_rate": 7.31323569192867e-06, "loss": 0.0263, "step": 101890 }, { "epoch": 0.8245003641071285, "grad_norm": 0.6207795739173889, "learning_rate": 7.312609684327231e-06, "loss": 0.04, "step": 101900 }, { "epoch": 0.8245812768023303, "grad_norm": 0.490743488073349, "learning_rate": 7.311983630605675e-06, "loss": 0.0195, "step": 101910 }, { "epoch": 0.8246621894975321, "grad_norm": 0.40723803639411926, "learning_rate": 7.311357530776487e-06, "loss": 0.0309, "step": 101920 }, { "epoch": 0.8247431021927341, "grad_norm": 0.46416354179382324, "learning_rate": 7.310731384852151e-06, "loss": 0.0269, "step": 101930 }, { "epoch": 0.8248240148879359, "grad_norm": 0.37348684668540955, "learning_rate": 7.310105192845156e-06, "loss": 0.0311, "step": 101940 }, { "epoch": 0.8249049275831378, "grad_norm": 0.33063238859176636, "learning_rate": 7.309478954767991e-06, "loss": 0.0273, "step": 101950 }, { "epoch": 0.8249858402783397, "grad_norm": 0.2678394019603729, "learning_rate": 7.308852670633143e-06, "loss": 0.0453, "step": 101960 }, { "epoch": 0.8250667529735416, "grad_norm": 0.49018147587776184, "learning_rate": 7.3082263404531025e-06, "loss": 0.0311, "step": 101970 }, { "epoch": 0.8251476656687434, "grad_norm": 0.6178140044212341, "learning_rate": 7.3075999642403604e-06, "loss": 0.0267, "step": 101980 }, { "epoch": 0.8252285783639453, "grad_norm": 0.2260906994342804, "learning_rate": 7.3069735420074095e-06, "loss": 0.0414, "step": 101990 }, { "epoch": 0.8253094910591472, "grad_norm": 0.4233773946762085, "learning_rate": 7.306347073766742e-06, "loss": 0.0316, "step": 102000 }, { "epoch": 0.825390403754349, "grad_norm": 0.29520922899246216, "learning_rate": 7.305720559530849e-06, "loss": 0.0206, "step": 102010 }, { "epoch": 0.825471316449551, "grad_norm": 0.25521785020828247, "learning_rate": 7.305093999312228e-06, "loss": 0.0405, "step": 102020 }, { "epoch": 0.8255522291447528, "grad_norm": 0.5705458521842957, "learning_rate": 7.3044673931233745e-06, "loss": 0.0456, "step": 102030 }, { "epoch": 0.8256331418399547, "grad_norm": 0.6035584211349487, "learning_rate": 7.303840740976782e-06, "loss": 0.0461, "step": 102040 }, { "epoch": 0.8257140545351566, "grad_norm": 0.3432346284389496, "learning_rate": 7.303214042884951e-06, "loss": 0.0237, "step": 102050 }, { "epoch": 0.8257949672303584, "grad_norm": 0.6646531224250793, "learning_rate": 7.302587298860378e-06, "loss": 0.0369, "step": 102060 }, { "epoch": 0.8258758799255603, "grad_norm": 0.45567867159843445, "learning_rate": 7.301960508915561e-06, "loss": 0.0321, "step": 102070 }, { "epoch": 0.8259567926207622, "grad_norm": 0.16991175711154938, "learning_rate": 7.301333673063003e-06, "loss": 0.0125, "step": 102080 }, { "epoch": 0.8260377053159641, "grad_norm": 0.36417892575263977, "learning_rate": 7.3007067913152004e-06, "loss": 0.0144, "step": 102090 }, { "epoch": 0.8261186180111659, "grad_norm": 0.43258848786354065, "learning_rate": 7.300079863684661e-06, "loss": 0.0282, "step": 102100 }, { "epoch": 0.8261995307063679, "grad_norm": 0.16201747953891754, "learning_rate": 7.299452890183883e-06, "loss": 0.0282, "step": 102110 }, { "epoch": 0.8262804434015697, "grad_norm": 0.4346805512905121, "learning_rate": 7.298825870825371e-06, "loss": 0.0237, "step": 102120 }, { "epoch": 0.8263613560967716, "grad_norm": 0.1293545812368393, "learning_rate": 7.298198805621631e-06, "loss": 0.0216, "step": 102130 }, { "epoch": 0.8264422687919735, "grad_norm": 0.5054435729980469, "learning_rate": 7.297571694585168e-06, "loss": 0.0371, "step": 102140 }, { "epoch": 0.8265231814871753, "grad_norm": 0.35452860593795776, "learning_rate": 7.296944537728488e-06, "loss": 0.0315, "step": 102150 }, { "epoch": 0.8266040941823772, "grad_norm": 0.2687671184539795, "learning_rate": 7.296317335064097e-06, "loss": 0.0235, "step": 102160 }, { "epoch": 0.8266850068775791, "grad_norm": 0.6448177695274353, "learning_rate": 7.295690086604505e-06, "loss": 0.0199, "step": 102170 }, { "epoch": 0.826765919572781, "grad_norm": 0.40834033489227295, "learning_rate": 7.29506279236222e-06, "loss": 0.0292, "step": 102180 }, { "epoch": 0.8268468322679828, "grad_norm": 0.425103098154068, "learning_rate": 7.294435452349754e-06, "loss": 0.0241, "step": 102190 }, { "epoch": 0.8269277449631848, "grad_norm": 0.0949978157877922, "learning_rate": 7.293808066579615e-06, "loss": 0.0187, "step": 102200 }, { "epoch": 0.8270086576583866, "grad_norm": 0.6441097855567932, "learning_rate": 7.293180635064319e-06, "loss": 0.032, "step": 102210 }, { "epoch": 0.8270895703535884, "grad_norm": 0.6865141987800598, "learning_rate": 7.292553157816374e-06, "loss": 0.0385, "step": 102220 }, { "epoch": 0.8271704830487904, "grad_norm": 0.5858628749847412, "learning_rate": 7.291925634848297e-06, "loss": 0.0361, "step": 102230 }, { "epoch": 0.8272513957439922, "grad_norm": 0.3045150935649872, "learning_rate": 7.291298066172601e-06, "loss": 0.035, "step": 102240 }, { "epoch": 0.8273323084391941, "grad_norm": 0.5258945822715759, "learning_rate": 7.290670451801803e-06, "loss": 0.0197, "step": 102250 }, { "epoch": 0.827413221134396, "grad_norm": 0.3607216477394104, "learning_rate": 7.290042791748418e-06, "loss": 0.0276, "step": 102260 }, { "epoch": 0.8274941338295979, "grad_norm": 0.2295159101486206, "learning_rate": 7.289415086024965e-06, "loss": 0.0275, "step": 102270 }, { "epoch": 0.8275750465247997, "grad_norm": 0.2872673571109772, "learning_rate": 7.288787334643961e-06, "loss": 0.0256, "step": 102280 }, { "epoch": 0.8276559592200016, "grad_norm": 0.15302728116512299, "learning_rate": 7.288159537617926e-06, "loss": 0.0264, "step": 102290 }, { "epoch": 0.8277368719152035, "grad_norm": 0.3576658368110657, "learning_rate": 7.287531694959379e-06, "loss": 0.0268, "step": 102300 }, { "epoch": 0.8278177846104053, "grad_norm": 0.426100492477417, "learning_rate": 7.286903806680841e-06, "loss": 0.0246, "step": 102310 }, { "epoch": 0.8278986973056073, "grad_norm": 0.5501701235771179, "learning_rate": 7.286275872794834e-06, "loss": 0.0246, "step": 102320 }, { "epoch": 0.8279796100008091, "grad_norm": 0.23796917498111725, "learning_rate": 7.285647893313882e-06, "loss": 0.0347, "step": 102330 }, { "epoch": 0.828060522696011, "grad_norm": 0.2812215983867645, "learning_rate": 7.285019868250508e-06, "loss": 0.0261, "step": 102340 }, { "epoch": 0.8281414353912129, "grad_norm": 0.2637401223182678, "learning_rate": 7.284391797617236e-06, "loss": 0.0184, "step": 102350 }, { "epoch": 0.8282223480864147, "grad_norm": 0.3980659544467926, "learning_rate": 7.283763681426594e-06, "loss": 0.0219, "step": 102360 }, { "epoch": 0.8283032607816166, "grad_norm": 0.45251452922821045, "learning_rate": 7.283135519691105e-06, "loss": 0.037, "step": 102370 }, { "epoch": 0.8283841734768185, "grad_norm": 0.23281803727149963, "learning_rate": 7.282507312423299e-06, "loss": 0.0236, "step": 102380 }, { "epoch": 0.8284650861720204, "grad_norm": 0.3594660460948944, "learning_rate": 7.2818790596357025e-06, "loss": 0.034, "step": 102390 }, { "epoch": 0.8285459988672222, "grad_norm": 0.3748892843723297, "learning_rate": 7.281250761340846e-06, "loss": 0.0329, "step": 102400 }, { "epoch": 0.8286269115624242, "grad_norm": 0.06298518925905228, "learning_rate": 7.280622417551259e-06, "loss": 0.0271, "step": 102410 }, { "epoch": 0.828707824257626, "grad_norm": 0.3952678143978119, "learning_rate": 7.27999402827947e-06, "loss": 0.0302, "step": 102420 }, { "epoch": 0.828788736952828, "grad_norm": 0.5112590193748474, "learning_rate": 7.279365593538016e-06, "loss": 0.0298, "step": 102430 }, { "epoch": 0.8288696496480298, "grad_norm": 0.46090829372406006, "learning_rate": 7.278737113339427e-06, "loss": 0.0207, "step": 102440 }, { "epoch": 0.8289505623432316, "grad_norm": 0.2875474989414215, "learning_rate": 7.278108587696236e-06, "loss": 0.0256, "step": 102450 }, { "epoch": 0.8290314750384336, "grad_norm": 0.7041022181510925, "learning_rate": 7.277480016620979e-06, "loss": 0.0265, "step": 102460 }, { "epoch": 0.8291123877336354, "grad_norm": 0.31594356894493103, "learning_rate": 7.2768514001261914e-06, "loss": 0.0237, "step": 102470 }, { "epoch": 0.8291933004288373, "grad_norm": 0.4343200623989105, "learning_rate": 7.276222738224409e-06, "loss": 0.0349, "step": 102480 }, { "epoch": 0.8292742131240392, "grad_norm": 0.8109174966812134, "learning_rate": 7.27559403092817e-06, "loss": 0.0321, "step": 102490 }, { "epoch": 0.8293551258192411, "grad_norm": 0.3758082687854767, "learning_rate": 7.2749652782500105e-06, "loss": 0.0283, "step": 102500 }, { "epoch": 0.8294360385144429, "grad_norm": 0.4527086913585663, "learning_rate": 7.274336480202473e-06, "loss": 0.0336, "step": 102510 }, { "epoch": 0.8295169512096447, "grad_norm": 0.4475845992565155, "learning_rate": 7.2737076367980944e-06, "loss": 0.0224, "step": 102520 }, { "epoch": 0.8295978639048467, "grad_norm": 0.061121005564928055, "learning_rate": 7.273078748049417e-06, "loss": 0.0277, "step": 102530 }, { "epoch": 0.8296787766000485, "grad_norm": 0.5087100863456726, "learning_rate": 7.272449813968982e-06, "loss": 0.0414, "step": 102540 }, { "epoch": 0.8297596892952505, "grad_norm": 0.41087228059768677, "learning_rate": 7.271820834569335e-06, "loss": 0.0269, "step": 102550 }, { "epoch": 0.8298406019904523, "grad_norm": 0.35816076397895813, "learning_rate": 7.271191809863017e-06, "loss": 0.0254, "step": 102560 }, { "epoch": 0.8299215146856542, "grad_norm": 0.40696629881858826, "learning_rate": 7.2705627398625726e-06, "loss": 0.0225, "step": 102570 }, { "epoch": 0.8300024273808561, "grad_norm": 0.48690417408943176, "learning_rate": 7.2699336245805475e-06, "loss": 0.0281, "step": 102580 }, { "epoch": 0.8300833400760579, "grad_norm": 0.5747388601303101, "learning_rate": 7.269304464029487e-06, "loss": 0.0398, "step": 102590 }, { "epoch": 0.8301642527712598, "grad_norm": 0.7362308502197266, "learning_rate": 7.2686752582219415e-06, "loss": 0.0322, "step": 102600 }, { "epoch": 0.8302451654664617, "grad_norm": 0.7851080894470215, "learning_rate": 7.268046007170457e-06, "loss": 0.0498, "step": 102610 }, { "epoch": 0.8303260781616636, "grad_norm": 0.40821120142936707, "learning_rate": 7.267416710887584e-06, "loss": 0.025, "step": 102620 }, { "epoch": 0.8304069908568654, "grad_norm": 0.3403991758823395, "learning_rate": 7.266787369385869e-06, "loss": 0.0256, "step": 102630 }, { "epoch": 0.8304879035520674, "grad_norm": 0.3817172348499298, "learning_rate": 7.266157982677866e-06, "loss": 0.0343, "step": 102640 }, { "epoch": 0.8305688162472692, "grad_norm": 0.4142898917198181, "learning_rate": 7.2655285507761276e-06, "loss": 0.0236, "step": 102650 }, { "epoch": 0.830649728942471, "grad_norm": 0.34191688895225525, "learning_rate": 7.264899073693203e-06, "loss": 0.0312, "step": 102660 }, { "epoch": 0.830730641637673, "grad_norm": 0.4100680351257324, "learning_rate": 7.264269551441649e-06, "loss": 0.0235, "step": 102670 }, { "epoch": 0.8308115543328748, "grad_norm": 0.6066515445709229, "learning_rate": 7.26363998403402e-06, "loss": 0.0271, "step": 102680 }, { "epoch": 0.8308924670280767, "grad_norm": 0.5118092894554138, "learning_rate": 7.263010371482868e-06, "loss": 0.0411, "step": 102690 }, { "epoch": 0.8309733797232786, "grad_norm": 0.34094133973121643, "learning_rate": 7.262380713800754e-06, "loss": 0.0265, "step": 102700 }, { "epoch": 0.8310542924184805, "grad_norm": 0.48161840438842773, "learning_rate": 7.2617510110002295e-06, "loss": 0.0289, "step": 102710 }, { "epoch": 0.8311352051136823, "grad_norm": 0.23486262559890747, "learning_rate": 7.261121263093858e-06, "loss": 0.0182, "step": 102720 }, { "epoch": 0.8312161178088842, "grad_norm": 0.2225063592195511, "learning_rate": 7.260491470094198e-06, "loss": 0.0165, "step": 102730 }, { "epoch": 0.8312970305040861, "grad_norm": 0.4076996147632599, "learning_rate": 7.259861632013805e-06, "loss": 0.0293, "step": 102740 }, { "epoch": 0.8313779431992879, "grad_norm": 0.3270263373851776, "learning_rate": 7.259231748865243e-06, "loss": 0.0232, "step": 102750 }, { "epoch": 0.8314588558944899, "grad_norm": 0.5163840651512146, "learning_rate": 7.258601820661073e-06, "loss": 0.0299, "step": 102760 }, { "epoch": 0.8315397685896917, "grad_norm": 0.7149772644042969, "learning_rate": 7.2579718474138584e-06, "loss": 0.0274, "step": 102770 }, { "epoch": 0.8316206812848936, "grad_norm": 0.41274213790893555, "learning_rate": 7.257341829136162e-06, "loss": 0.026, "step": 102780 }, { "epoch": 0.8317015939800955, "grad_norm": 0.6034438014030457, "learning_rate": 7.256711765840547e-06, "loss": 0.0264, "step": 102790 }, { "epoch": 0.8317825066752974, "grad_norm": 0.5625047087669373, "learning_rate": 7.256081657539583e-06, "loss": 0.0244, "step": 102800 }, { "epoch": 0.8318634193704992, "grad_norm": 0.4704059660434723, "learning_rate": 7.2554515042458315e-06, "loss": 0.0341, "step": 102810 }, { "epoch": 0.8319443320657011, "grad_norm": 0.144851952791214, "learning_rate": 7.254821305971861e-06, "loss": 0.0247, "step": 102820 }, { "epoch": 0.832025244760903, "grad_norm": 0.34522008895874023, "learning_rate": 7.254191062730239e-06, "loss": 0.0282, "step": 102830 }, { "epoch": 0.8321061574561048, "grad_norm": 0.5491629838943481, "learning_rate": 7.253560774533536e-06, "loss": 0.0357, "step": 102840 }, { "epoch": 0.8321870701513068, "grad_norm": 0.3695802390575409, "learning_rate": 7.252930441394321e-06, "loss": 0.0195, "step": 102850 }, { "epoch": 0.8322679828465086, "grad_norm": 0.43511223793029785, "learning_rate": 7.252300063325164e-06, "loss": 0.0459, "step": 102860 }, { "epoch": 0.8323488955417105, "grad_norm": 0.37665367126464844, "learning_rate": 7.251669640338637e-06, "loss": 0.0326, "step": 102870 }, { "epoch": 0.8324298082369124, "grad_norm": 0.3613027036190033, "learning_rate": 7.251039172447314e-06, "loss": 0.0287, "step": 102880 }, { "epoch": 0.8325107209321142, "grad_norm": 0.005095220170915127, "learning_rate": 7.250408659663766e-06, "loss": 0.0222, "step": 102890 }, { "epoch": 0.8325916336273161, "grad_norm": 0.18495747447013855, "learning_rate": 7.249778102000567e-06, "loss": 0.0353, "step": 102900 }, { "epoch": 0.832672546322518, "grad_norm": 0.5681814551353455, "learning_rate": 7.249147499470295e-06, "loss": 0.0236, "step": 102910 }, { "epoch": 0.8327534590177199, "grad_norm": 0.6170727014541626, "learning_rate": 7.248516852085523e-06, "loss": 0.027, "step": 102920 }, { "epoch": 0.8328343717129217, "grad_norm": 0.33763614296913147, "learning_rate": 7.247886159858828e-06, "loss": 0.0313, "step": 102930 }, { "epoch": 0.8329152844081237, "grad_norm": 0.3097108006477356, "learning_rate": 7.24725542280279e-06, "loss": 0.0171, "step": 102940 }, { "epoch": 0.8329961971033255, "grad_norm": 0.29451173543930054, "learning_rate": 7.246624640929989e-06, "loss": 0.0231, "step": 102950 }, { "epoch": 0.8330771097985273, "grad_norm": 0.5784592628479004, "learning_rate": 7.245993814253001e-06, "loss": 0.0398, "step": 102960 }, { "epoch": 0.8331580224937293, "grad_norm": 0.5370703339576721, "learning_rate": 7.2453629427844065e-06, "loss": 0.0211, "step": 102970 }, { "epoch": 0.8332389351889311, "grad_norm": 0.6494331955909729, "learning_rate": 7.2447320265367895e-06, "loss": 0.0399, "step": 102980 }, { "epoch": 0.833319847884133, "grad_norm": 0.5431585311889648, "learning_rate": 7.244101065522731e-06, "loss": 0.023, "step": 102990 }, { "epoch": 0.8334007605793349, "grad_norm": 0.27127784490585327, "learning_rate": 7.243470059754815e-06, "loss": 0.0163, "step": 103000 }, { "epoch": 0.8334816732745368, "grad_norm": 0.0622824989259243, "learning_rate": 7.2428390092456225e-06, "loss": 0.0226, "step": 103010 }, { "epoch": 0.8335625859697386, "grad_norm": 0.4353417456150055, "learning_rate": 7.2422079140077415e-06, "loss": 0.0344, "step": 103020 }, { "epoch": 0.8336434986649405, "grad_norm": 0.5562686920166016, "learning_rate": 7.24157677405376e-06, "loss": 0.0313, "step": 103030 }, { "epoch": 0.8337244113601424, "grad_norm": 0.2355138510465622, "learning_rate": 7.240945589396259e-06, "loss": 0.0353, "step": 103040 }, { "epoch": 0.8338053240553442, "grad_norm": 0.4776296317577362, "learning_rate": 7.24031436004783e-06, "loss": 0.0352, "step": 103050 }, { "epoch": 0.8338862367505462, "grad_norm": 0.21920444071292877, "learning_rate": 7.239683086021061e-06, "loss": 0.0322, "step": 103060 }, { "epoch": 0.833967149445748, "grad_norm": 0.2207183986902237, "learning_rate": 7.23905176732854e-06, "loss": 0.044, "step": 103070 }, { "epoch": 0.83404806214095, "grad_norm": 0.7691861987113953, "learning_rate": 7.2384204039828585e-06, "loss": 0.029, "step": 103080 }, { "epoch": 0.8341289748361518, "grad_norm": 0.2615881860256195, "learning_rate": 7.2377889959966075e-06, "loss": 0.0261, "step": 103090 }, { "epoch": 0.8342098875313537, "grad_norm": 0.2985248863697052, "learning_rate": 7.23715754338238e-06, "loss": 0.0202, "step": 103100 }, { "epoch": 0.8342908002265556, "grad_norm": 0.4305630922317505, "learning_rate": 7.236526046152769e-06, "loss": 0.0327, "step": 103110 }, { "epoch": 0.8343717129217574, "grad_norm": 0.3719443082809448, "learning_rate": 7.235894504320366e-06, "loss": 0.0234, "step": 103120 }, { "epoch": 0.8344526256169593, "grad_norm": 0.3881160318851471, "learning_rate": 7.235262917897768e-06, "loss": 0.0279, "step": 103130 }, { "epoch": 0.8345335383121611, "grad_norm": 0.3840380012989044, "learning_rate": 7.234631286897569e-06, "loss": 0.0302, "step": 103140 }, { "epoch": 0.8346144510073631, "grad_norm": 0.5306450128555298, "learning_rate": 7.2339996113323665e-06, "loss": 0.0392, "step": 103150 }, { "epoch": 0.8346953637025649, "grad_norm": 0.425971657037735, "learning_rate": 7.233367891214759e-06, "loss": 0.0282, "step": 103160 }, { "epoch": 0.8347762763977669, "grad_norm": 0.18897181749343872, "learning_rate": 7.232736126557343e-06, "loss": 0.0211, "step": 103170 }, { "epoch": 0.8348571890929687, "grad_norm": 0.4386764466762543, "learning_rate": 7.232104317372719e-06, "loss": 0.0233, "step": 103180 }, { "epoch": 0.8349381017881705, "grad_norm": 0.11611813306808472, "learning_rate": 7.231472463673486e-06, "loss": 0.0228, "step": 103190 }, { "epoch": 0.8350190144833725, "grad_norm": 0.5827283263206482, "learning_rate": 7.2308405654722445e-06, "loss": 0.0451, "step": 103200 }, { "epoch": 0.8350999271785743, "grad_norm": 0.3993639647960663, "learning_rate": 7.2302086227816005e-06, "loss": 0.0367, "step": 103210 }, { "epoch": 0.8351808398737762, "grad_norm": 0.5115835070610046, "learning_rate": 7.229576635614152e-06, "loss": 0.028, "step": 103220 }, { "epoch": 0.835261752568978, "grad_norm": 0.09886657446622849, "learning_rate": 7.228944603982503e-06, "loss": 0.0244, "step": 103230 }, { "epoch": 0.83534266526418, "grad_norm": 0.49152642488479614, "learning_rate": 7.228312527899262e-06, "loss": 0.0196, "step": 103240 }, { "epoch": 0.8354235779593818, "grad_norm": 0.7866784334182739, "learning_rate": 7.227680407377029e-06, "loss": 0.0398, "step": 103250 }, { "epoch": 0.8355044906545837, "grad_norm": 0.31705334782600403, "learning_rate": 7.227048242428415e-06, "loss": 0.0286, "step": 103260 }, { "epoch": 0.8355854033497856, "grad_norm": 0.3514941334724426, "learning_rate": 7.226416033066026e-06, "loss": 0.0225, "step": 103270 }, { "epoch": 0.8356663160449874, "grad_norm": 0.2481289803981781, "learning_rate": 7.225783779302468e-06, "loss": 0.0468, "step": 103280 }, { "epoch": 0.8357472287401894, "grad_norm": 1.4855905771255493, "learning_rate": 7.225151481150352e-06, "loss": 0.0326, "step": 103290 }, { "epoch": 0.8358281414353912, "grad_norm": 0.7024805545806885, "learning_rate": 7.224519138622287e-06, "loss": 0.0289, "step": 103300 }, { "epoch": 0.8359090541305931, "grad_norm": 0.277018278837204, "learning_rate": 7.223886751730882e-06, "loss": 0.0289, "step": 103310 }, { "epoch": 0.835989966825795, "grad_norm": 0.5919057726860046, "learning_rate": 7.223254320488754e-06, "loss": 0.0323, "step": 103320 }, { "epoch": 0.8360708795209968, "grad_norm": 0.5254311561584473, "learning_rate": 7.22262184490851e-06, "loss": 0.0297, "step": 103330 }, { "epoch": 0.8361517922161987, "grad_norm": 0.3647581934928894, "learning_rate": 7.221989325002765e-06, "loss": 0.0193, "step": 103340 }, { "epoch": 0.8362327049114006, "grad_norm": 0.14475098252296448, "learning_rate": 7.221356760784135e-06, "loss": 0.0231, "step": 103350 }, { "epoch": 0.8363136176066025, "grad_norm": 0.25824570655822754, "learning_rate": 7.220724152265234e-06, "loss": 0.033, "step": 103360 }, { "epoch": 0.8363945303018043, "grad_norm": 0.40783458948135376, "learning_rate": 7.220091499458678e-06, "loss": 0.0326, "step": 103370 }, { "epoch": 0.8364754429970063, "grad_norm": 0.6965222954750061, "learning_rate": 7.2194588023770825e-06, "loss": 0.023, "step": 103380 }, { "epoch": 0.8365563556922081, "grad_norm": 0.6087554097175598, "learning_rate": 7.218826061033069e-06, "loss": 0.0292, "step": 103390 }, { "epoch": 0.83663726838741, "grad_norm": 0.6855730414390564, "learning_rate": 7.218193275439251e-06, "loss": 0.0303, "step": 103400 }, { "epoch": 0.8367181810826119, "grad_norm": 0.4519483745098114, "learning_rate": 7.217560445608252e-06, "loss": 0.0225, "step": 103410 }, { "epoch": 0.8367990937778137, "grad_norm": 0.5110702514648438, "learning_rate": 7.216927571552692e-06, "loss": 0.0196, "step": 103420 }, { "epoch": 0.8368800064730156, "grad_norm": 0.12050134688615799, "learning_rate": 7.216294653285193e-06, "loss": 0.0231, "step": 103430 }, { "epoch": 0.8369609191682175, "grad_norm": 0.21609380841255188, "learning_rate": 7.215661690818373e-06, "loss": 0.0196, "step": 103440 }, { "epoch": 0.8370418318634194, "grad_norm": 0.28784871101379395, "learning_rate": 7.2150286841648595e-06, "loss": 0.0382, "step": 103450 }, { "epoch": 0.8371227445586212, "grad_norm": 0.3309037983417511, "learning_rate": 7.2143956333372754e-06, "loss": 0.0408, "step": 103460 }, { "epoch": 0.8372036572538232, "grad_norm": 0.7597655057907104, "learning_rate": 7.213762538348245e-06, "loss": 0.0393, "step": 103470 }, { "epoch": 0.837284569949025, "grad_norm": 0.4309041500091553, "learning_rate": 7.2131293992103946e-06, "loss": 0.0251, "step": 103480 }, { "epoch": 0.8373654826442268, "grad_norm": 0.38594767451286316, "learning_rate": 7.2124962159363495e-06, "loss": 0.0224, "step": 103490 }, { "epoch": 0.8374463953394288, "grad_norm": 0.46595755219459534, "learning_rate": 7.211862988538738e-06, "loss": 0.0346, "step": 103500 }, { "epoch": 0.8375273080346306, "grad_norm": 0.3160749673843384, "learning_rate": 7.211229717030191e-06, "loss": 0.026, "step": 103510 }, { "epoch": 0.8376082207298325, "grad_norm": 0.7458192110061646, "learning_rate": 7.210596401423331e-06, "loss": 0.037, "step": 103520 }, { "epoch": 0.8376891334250344, "grad_norm": 0.3940221965312958, "learning_rate": 7.209963041730797e-06, "loss": 0.0172, "step": 103530 }, { "epoch": 0.8377700461202363, "grad_norm": 0.5117478370666504, "learning_rate": 7.209329637965212e-06, "loss": 0.0306, "step": 103540 }, { "epoch": 0.8378509588154381, "grad_norm": 0.3266237676143646, "learning_rate": 7.208696190139214e-06, "loss": 0.0237, "step": 103550 }, { "epoch": 0.83793187151064, "grad_norm": 0.44739529490470886, "learning_rate": 7.208062698265433e-06, "loss": 0.026, "step": 103560 }, { "epoch": 0.8380127842058419, "grad_norm": 0.2982240915298462, "learning_rate": 7.207429162356502e-06, "loss": 0.0306, "step": 103570 }, { "epoch": 0.8380936969010437, "grad_norm": 0.2875094711780548, "learning_rate": 7.206795582425057e-06, "loss": 0.033, "step": 103580 }, { "epoch": 0.8381746095962457, "grad_norm": 0.21457426249980927, "learning_rate": 7.206161958483733e-06, "loss": 0.0158, "step": 103590 }, { "epoch": 0.8382555222914475, "grad_norm": 0.03736110031604767, "learning_rate": 7.205528290545164e-06, "loss": 0.0266, "step": 103600 }, { "epoch": 0.8383364349866494, "grad_norm": 0.6633205413818359, "learning_rate": 7.204894578621991e-06, "loss": 0.0265, "step": 103610 }, { "epoch": 0.8384173476818513, "grad_norm": 0.690517246723175, "learning_rate": 7.204260822726852e-06, "loss": 0.043, "step": 103620 }, { "epoch": 0.8384982603770531, "grad_norm": 0.4082900285720825, "learning_rate": 7.203627022872381e-06, "loss": 0.025, "step": 103630 }, { "epoch": 0.838579173072255, "grad_norm": 0.14384368062019348, "learning_rate": 7.202993179071224e-06, "loss": 0.0292, "step": 103640 }, { "epoch": 0.8386600857674569, "grad_norm": 0.24312514066696167, "learning_rate": 7.202359291336018e-06, "loss": 0.0257, "step": 103650 }, { "epoch": 0.8387409984626588, "grad_norm": 0.5606517195701599, "learning_rate": 7.2017253596794045e-06, "loss": 0.0333, "step": 103660 }, { "epoch": 0.8388219111578606, "grad_norm": 0.5461474061012268, "learning_rate": 7.201091384114027e-06, "loss": 0.0268, "step": 103670 }, { "epoch": 0.8389028238530626, "grad_norm": 0.8520286679267883, "learning_rate": 7.200457364652529e-06, "loss": 0.0343, "step": 103680 }, { "epoch": 0.8389837365482644, "grad_norm": 0.32672497630119324, "learning_rate": 7.199823301307555e-06, "loss": 0.0232, "step": 103690 }, { "epoch": 0.8390646492434664, "grad_norm": 0.37758195400238037, "learning_rate": 7.199189194091748e-06, "loss": 0.0356, "step": 103700 }, { "epoch": 0.8391455619386682, "grad_norm": 0.41511932015419006, "learning_rate": 7.198555043017754e-06, "loss": 0.0424, "step": 103710 }, { "epoch": 0.83922647463387, "grad_norm": 0.5925471782684326, "learning_rate": 7.197920848098223e-06, "loss": 0.0297, "step": 103720 }, { "epoch": 0.839307387329072, "grad_norm": 0.4675324559211731, "learning_rate": 7.197286609345802e-06, "loss": 0.0285, "step": 103730 }, { "epoch": 0.8393883000242738, "grad_norm": 0.3672501742839813, "learning_rate": 7.1966523267731345e-06, "loss": 0.0254, "step": 103740 }, { "epoch": 0.8394692127194757, "grad_norm": 0.3360888361930847, "learning_rate": 7.196018000392877e-06, "loss": 0.0335, "step": 103750 }, { "epoch": 0.8395501254146776, "grad_norm": 0.6458571553230286, "learning_rate": 7.195383630217675e-06, "loss": 0.0362, "step": 103760 }, { "epoch": 0.8396310381098795, "grad_norm": 0.4845335781574249, "learning_rate": 7.19474921626018e-06, "loss": 0.0393, "step": 103770 }, { "epoch": 0.8397119508050813, "grad_norm": 0.44762128591537476, "learning_rate": 7.194114758533047e-06, "loss": 0.031, "step": 103780 }, { "epoch": 0.8397928635002831, "grad_norm": 0.5815743803977966, "learning_rate": 7.193480257048926e-06, "loss": 0.0252, "step": 103790 }, { "epoch": 0.8398737761954851, "grad_norm": 0.3284963369369507, "learning_rate": 7.1928457118204745e-06, "loss": 0.0323, "step": 103800 }, { "epoch": 0.8399546888906869, "grad_norm": 0.9237771034240723, "learning_rate": 7.192211122860342e-06, "loss": 0.0292, "step": 103810 }, { "epoch": 0.8400356015858889, "grad_norm": 0.6148058176040649, "learning_rate": 7.191576490181187e-06, "loss": 0.0392, "step": 103820 }, { "epoch": 0.8401165142810907, "grad_norm": 0.21718423068523407, "learning_rate": 7.1909418137956655e-06, "loss": 0.0281, "step": 103830 }, { "epoch": 0.8401974269762926, "grad_norm": 0.25467467308044434, "learning_rate": 7.1903070937164355e-06, "loss": 0.0308, "step": 103840 }, { "epoch": 0.8402783396714945, "grad_norm": 0.32779356837272644, "learning_rate": 7.189672329956154e-06, "loss": 0.0295, "step": 103850 }, { "epoch": 0.8403592523666963, "grad_norm": 0.8756517171859741, "learning_rate": 7.18903752252748e-06, "loss": 0.0261, "step": 103860 }, { "epoch": 0.8404401650618982, "grad_norm": 0.27640390396118164, "learning_rate": 7.188402671443076e-06, "loss": 0.0328, "step": 103870 }, { "epoch": 0.8405210777571, "grad_norm": 0.6686864495277405, "learning_rate": 7.187767776715597e-06, "loss": 0.0341, "step": 103880 }, { "epoch": 0.840601990452302, "grad_norm": 0.2901129424571991, "learning_rate": 7.187132838357711e-06, "loss": 0.0309, "step": 103890 }, { "epoch": 0.8406829031475038, "grad_norm": 0.4021689295768738, "learning_rate": 7.186497856382077e-06, "loss": 0.0203, "step": 103900 }, { "epoch": 0.8407638158427058, "grad_norm": 0.3318554162979126, "learning_rate": 7.18586283080136e-06, "loss": 0.0473, "step": 103910 }, { "epoch": 0.8408447285379076, "grad_norm": 0.28716418147087097, "learning_rate": 7.185227761628223e-06, "loss": 0.0248, "step": 103920 }, { "epoch": 0.8409256412331094, "grad_norm": 0.2970519959926605, "learning_rate": 7.18459264887533e-06, "loss": 0.0208, "step": 103930 }, { "epoch": 0.8410065539283114, "grad_norm": 0.4218970537185669, "learning_rate": 7.183957492555349e-06, "loss": 0.0302, "step": 103940 }, { "epoch": 0.8410874666235132, "grad_norm": 0.05684303492307663, "learning_rate": 7.183322292680949e-06, "loss": 0.0267, "step": 103950 }, { "epoch": 0.8411683793187151, "grad_norm": 0.3681650161743164, "learning_rate": 7.182687049264792e-06, "loss": 0.0242, "step": 103960 }, { "epoch": 0.841249292013917, "grad_norm": 0.3017815053462982, "learning_rate": 7.18205176231955e-06, "loss": 0.0227, "step": 103970 }, { "epoch": 0.8413302047091189, "grad_norm": 0.3202369511127472, "learning_rate": 7.181416431857894e-06, "loss": 0.0233, "step": 103980 }, { "epoch": 0.8414111174043207, "grad_norm": 0.42866724729537964, "learning_rate": 7.180781057892491e-06, "loss": 0.0342, "step": 103990 }, { "epoch": 0.8414920300995227, "grad_norm": 0.3355036973953247, "learning_rate": 7.180145640436014e-06, "loss": 0.0235, "step": 104000 }, { "epoch": 0.8415729427947245, "grad_norm": 0.8330175280570984, "learning_rate": 7.179510179501134e-06, "loss": 0.0224, "step": 104010 }, { "epoch": 0.8416538554899263, "grad_norm": 0.36046677827835083, "learning_rate": 7.178874675100525e-06, "loss": 0.022, "step": 104020 }, { "epoch": 0.8417347681851283, "grad_norm": 0.9743009209632874, "learning_rate": 7.178239127246862e-06, "loss": 0.022, "step": 104030 }, { "epoch": 0.8418156808803301, "grad_norm": 0.3937666118144989, "learning_rate": 7.177603535952815e-06, "loss": 0.0239, "step": 104040 }, { "epoch": 0.841896593575532, "grad_norm": 0.6917479634284973, "learning_rate": 7.176967901231066e-06, "loss": 0.0363, "step": 104050 }, { "epoch": 0.8419775062707339, "grad_norm": 0.2741652727127075, "learning_rate": 7.176332223094286e-06, "loss": 0.0116, "step": 104060 }, { "epoch": 0.8420584189659358, "grad_norm": 0.050997667014598846, "learning_rate": 7.1756965015551535e-06, "loss": 0.0215, "step": 104070 }, { "epoch": 0.8421393316611376, "grad_norm": 0.36562368273735046, "learning_rate": 7.1750607366263495e-06, "loss": 0.032, "step": 104080 }, { "epoch": 0.8422202443563395, "grad_norm": 0.5476484894752502, "learning_rate": 7.1744249283205495e-06, "loss": 0.0302, "step": 104090 }, { "epoch": 0.8423011570515414, "grad_norm": 0.5218830108642578, "learning_rate": 7.1737890766504345e-06, "loss": 0.0285, "step": 104100 }, { "epoch": 0.8423820697467432, "grad_norm": 0.2758115530014038, "learning_rate": 7.173153181628687e-06, "loss": 0.0191, "step": 104110 }, { "epoch": 0.8424629824419452, "grad_norm": 0.1726311892271042, "learning_rate": 7.172517243267985e-06, "loss": 0.0195, "step": 104120 }, { "epoch": 0.842543895137147, "grad_norm": 0.25420457124710083, "learning_rate": 7.171881261581014e-06, "loss": 0.025, "step": 104130 }, { "epoch": 0.8426248078323489, "grad_norm": 0.2313343584537506, "learning_rate": 7.171245236580457e-06, "loss": 0.0302, "step": 104140 }, { "epoch": 0.8427057205275508, "grad_norm": 0.0378522127866745, "learning_rate": 7.170609168278997e-06, "loss": 0.0167, "step": 104150 }, { "epoch": 0.8427866332227526, "grad_norm": 0.3783091604709625, "learning_rate": 7.169973056689319e-06, "loss": 0.038, "step": 104160 }, { "epoch": 0.8428675459179545, "grad_norm": 0.16869617998600006, "learning_rate": 7.169336901824109e-06, "loss": 0.0193, "step": 104170 }, { "epoch": 0.8429484586131564, "grad_norm": 0.04671052098274231, "learning_rate": 7.1687007036960554e-06, "loss": 0.0255, "step": 104180 }, { "epoch": 0.8430293713083583, "grad_norm": 0.481869637966156, "learning_rate": 7.168064462317844e-06, "loss": 0.0242, "step": 104190 }, { "epoch": 0.8431102840035601, "grad_norm": 0.8114178776741028, "learning_rate": 7.167428177702163e-06, "loss": 0.0462, "step": 104200 }, { "epoch": 0.8431911966987621, "grad_norm": 0.5765791535377502, "learning_rate": 7.166791849861704e-06, "loss": 0.0276, "step": 104210 }, { "epoch": 0.8432721093939639, "grad_norm": 0.38292595744132996, "learning_rate": 7.166155478809156e-06, "loss": 0.0262, "step": 104220 }, { "epoch": 0.8433530220891657, "grad_norm": 0.4369131028652191, "learning_rate": 7.165519064557208e-06, "loss": 0.0622, "step": 104230 }, { "epoch": 0.8434339347843677, "grad_norm": 0.44456371665000916, "learning_rate": 7.164882607118555e-06, "loss": 0.0164, "step": 104240 }, { "epoch": 0.8435148474795695, "grad_norm": 0.6600943207740784, "learning_rate": 7.16424610650589e-06, "loss": 0.0318, "step": 104250 }, { "epoch": 0.8435957601747714, "grad_norm": 0.6833974719047546, "learning_rate": 7.1636095627319035e-06, "loss": 0.0308, "step": 104260 }, { "epoch": 0.8436766728699733, "grad_norm": 0.45250827074050903, "learning_rate": 7.162972975809292e-06, "loss": 0.0318, "step": 104270 }, { "epoch": 0.8437575855651752, "grad_norm": 0.6106036305427551, "learning_rate": 7.1623363457507524e-06, "loss": 0.0273, "step": 104280 }, { "epoch": 0.843838498260377, "grad_norm": 0.19909100234508514, "learning_rate": 7.161699672568978e-06, "loss": 0.0226, "step": 104290 }, { "epoch": 0.843919410955579, "grad_norm": 0.29290592670440674, "learning_rate": 7.161062956276668e-06, "loss": 0.0301, "step": 104300 }, { "epoch": 0.8440003236507808, "grad_norm": 0.08872581273317337, "learning_rate": 7.160426196886517e-06, "loss": 0.0238, "step": 104310 }, { "epoch": 0.8440812363459826, "grad_norm": 0.1924077719449997, "learning_rate": 7.159789394411229e-06, "loss": 0.0319, "step": 104320 }, { "epoch": 0.8441621490411846, "grad_norm": 0.3248840272426605, "learning_rate": 7.1591525488634995e-06, "loss": 0.0316, "step": 104330 }, { "epoch": 0.8442430617363864, "grad_norm": 0.4326971471309662, "learning_rate": 7.158515660256032e-06, "loss": 0.0239, "step": 104340 }, { "epoch": 0.8443239744315884, "grad_norm": 0.5247673392295837, "learning_rate": 7.157878728601526e-06, "loss": 0.0239, "step": 104350 }, { "epoch": 0.8444048871267902, "grad_norm": 0.4488750398159027, "learning_rate": 7.157241753912684e-06, "loss": 0.0149, "step": 104360 }, { "epoch": 0.8444857998219921, "grad_norm": 0.5122777223587036, "learning_rate": 7.156604736202209e-06, "loss": 0.033, "step": 104370 }, { "epoch": 0.844566712517194, "grad_norm": 0.1290086954832077, "learning_rate": 7.155967675482807e-06, "loss": 0.0176, "step": 104380 }, { "epoch": 0.8446476252123958, "grad_norm": 0.24215713143348694, "learning_rate": 7.15533057176718e-06, "loss": 0.0208, "step": 104390 }, { "epoch": 0.8447285379075977, "grad_norm": 0.43379390239715576, "learning_rate": 7.154693425068033e-06, "loss": 0.0361, "step": 104400 }, { "epoch": 0.8448094506027995, "grad_norm": 0.46316051483154297, "learning_rate": 7.154056235398077e-06, "loss": 0.0286, "step": 104410 }, { "epoch": 0.8448903632980015, "grad_norm": 0.04038279131054878, "learning_rate": 7.153419002770016e-06, "loss": 0.0328, "step": 104420 }, { "epoch": 0.8449712759932033, "grad_norm": 0.26627880334854126, "learning_rate": 7.15278172719656e-06, "loss": 0.0264, "step": 104430 }, { "epoch": 0.8450521886884053, "grad_norm": 0.31085067987442017, "learning_rate": 7.152144408690415e-06, "loss": 0.0251, "step": 104440 }, { "epoch": 0.8451331013836071, "grad_norm": 0.6239633560180664, "learning_rate": 7.1515070472642945e-06, "loss": 0.0229, "step": 104450 }, { "epoch": 0.8452140140788089, "grad_norm": 0.24022330343723297, "learning_rate": 7.150869642930908e-06, "loss": 0.0364, "step": 104460 }, { "epoch": 0.8452949267740109, "grad_norm": 0.31915703415870667, "learning_rate": 7.150232195702966e-06, "loss": 0.0348, "step": 104470 }, { "epoch": 0.8453758394692127, "grad_norm": 0.5999157428741455, "learning_rate": 7.149594705593184e-06, "loss": 0.0213, "step": 104480 }, { "epoch": 0.8454567521644146, "grad_norm": 0.1379186064004898, "learning_rate": 7.148957172614272e-06, "loss": 0.0304, "step": 104490 }, { "epoch": 0.8455376648596165, "grad_norm": 0.4550628364086151, "learning_rate": 7.148319596778948e-06, "loss": 0.025, "step": 104500 }, { "epoch": 0.8456185775548184, "grad_norm": 0.3481631577014923, "learning_rate": 7.147681978099923e-06, "loss": 0.0324, "step": 104510 }, { "epoch": 0.8456994902500202, "grad_norm": 0.4237340986728668, "learning_rate": 7.147044316589915e-06, "loss": 0.0225, "step": 104520 }, { "epoch": 0.845780402945222, "grad_norm": 0.2730658948421478, "learning_rate": 7.146406612261641e-06, "loss": 0.0327, "step": 104530 }, { "epoch": 0.845861315640424, "grad_norm": 0.6977445483207703, "learning_rate": 7.145768865127819e-06, "loss": 0.0429, "step": 104540 }, { "epoch": 0.8459422283356258, "grad_norm": 0.2582169473171234, "learning_rate": 7.145131075201166e-06, "loss": 0.0207, "step": 104550 }, { "epoch": 0.8460231410308278, "grad_norm": 0.48579204082489014, "learning_rate": 7.144493242494404e-06, "loss": 0.0245, "step": 104560 }, { "epoch": 0.8461040537260296, "grad_norm": 0.4425709843635559, "learning_rate": 7.143855367020249e-06, "loss": 0.0334, "step": 104570 }, { "epoch": 0.8461849664212315, "grad_norm": 0.3230472505092621, "learning_rate": 7.143217448791426e-06, "loss": 0.0257, "step": 104580 }, { "epoch": 0.8462658791164334, "grad_norm": 0.3893071115016937, "learning_rate": 7.142579487820655e-06, "loss": 0.033, "step": 104590 }, { "epoch": 0.8463467918116352, "grad_norm": 0.47743940353393555, "learning_rate": 7.141941484120659e-06, "loss": 0.0235, "step": 104600 }, { "epoch": 0.8464277045068371, "grad_norm": 0.6436387896537781, "learning_rate": 7.141303437704163e-06, "loss": 0.0317, "step": 104610 }, { "epoch": 0.846508617202039, "grad_norm": 0.3319253623485565, "learning_rate": 7.140665348583891e-06, "loss": 0.0323, "step": 104620 }, { "epoch": 0.8465895298972409, "grad_norm": 0.3953939974308014, "learning_rate": 7.140027216772566e-06, "loss": 0.0312, "step": 104630 }, { "epoch": 0.8466704425924427, "grad_norm": 0.5474250316619873, "learning_rate": 7.139389042282917e-06, "loss": 0.041, "step": 104640 }, { "epoch": 0.8467513552876447, "grad_norm": 0.9160459041595459, "learning_rate": 7.138750825127669e-06, "loss": 0.0206, "step": 104650 }, { "epoch": 0.8468322679828465, "grad_norm": 0.8928424119949341, "learning_rate": 7.1381125653195525e-06, "loss": 0.0343, "step": 104660 }, { "epoch": 0.8469131806780484, "grad_norm": 0.40195232629776, "learning_rate": 7.1374742628712935e-06, "loss": 0.0259, "step": 104670 }, { "epoch": 0.8469940933732503, "grad_norm": 0.12607739865779877, "learning_rate": 7.136835917795623e-06, "loss": 0.0285, "step": 104680 }, { "epoch": 0.8470750060684521, "grad_norm": 0.3743208348751068, "learning_rate": 7.136197530105272e-06, "loss": 0.0348, "step": 104690 }, { "epoch": 0.847155918763654, "grad_norm": 0.28191545605659485, "learning_rate": 7.135559099812969e-06, "loss": 0.0335, "step": 104700 }, { "epoch": 0.8472368314588559, "grad_norm": 0.5658888816833496, "learning_rate": 7.13492062693145e-06, "loss": 0.0335, "step": 104710 }, { "epoch": 0.8473177441540578, "grad_norm": 0.4761948585510254, "learning_rate": 7.134282111473445e-06, "loss": 0.0342, "step": 104720 }, { "epoch": 0.8473986568492596, "grad_norm": 0.37053829431533813, "learning_rate": 7.13364355345169e-06, "loss": 0.0246, "step": 104730 }, { "epoch": 0.8474795695444616, "grad_norm": 0.3143292963504791, "learning_rate": 7.133004952878918e-06, "loss": 0.0198, "step": 104740 }, { "epoch": 0.8475604822396634, "grad_norm": 0.295418381690979, "learning_rate": 7.132366309767865e-06, "loss": 0.021, "step": 104750 }, { "epoch": 0.8476413949348652, "grad_norm": 0.5424026250839233, "learning_rate": 7.131727624131268e-06, "loss": 0.029, "step": 104760 }, { "epoch": 0.8477223076300672, "grad_norm": 0.5096818804740906, "learning_rate": 7.131088895981864e-06, "loss": 0.0243, "step": 104770 }, { "epoch": 0.847803220325269, "grad_norm": 0.3022329807281494, "learning_rate": 7.13045012533239e-06, "loss": 0.0187, "step": 104780 }, { "epoch": 0.8478841330204709, "grad_norm": 0.45406562089920044, "learning_rate": 7.1298113121955844e-06, "loss": 0.0423, "step": 104790 }, { "epoch": 0.8479650457156728, "grad_norm": 0.29465407133102417, "learning_rate": 7.129172456584192e-06, "loss": 0.0279, "step": 104800 }, { "epoch": 0.8480459584108747, "grad_norm": 0.6494532227516174, "learning_rate": 7.1285335585109485e-06, "loss": 0.0306, "step": 104810 }, { "epoch": 0.8481268711060765, "grad_norm": 0.5382136702537537, "learning_rate": 7.127894617988595e-06, "loss": 0.027, "step": 104820 }, { "epoch": 0.8482077838012784, "grad_norm": 0.4905571937561035, "learning_rate": 7.127255635029877e-06, "loss": 0.0309, "step": 104830 }, { "epoch": 0.8482886964964803, "grad_norm": 0.3619508147239685, "learning_rate": 7.126616609647537e-06, "loss": 0.0334, "step": 104840 }, { "epoch": 0.8483696091916821, "grad_norm": 0.24848727881908417, "learning_rate": 7.125977541854317e-06, "loss": 0.0304, "step": 104850 }, { "epoch": 0.8484505218868841, "grad_norm": 0.2496364712715149, "learning_rate": 7.125338431662964e-06, "loss": 0.0221, "step": 104860 }, { "epoch": 0.8485314345820859, "grad_norm": 0.7115689516067505, "learning_rate": 7.124699279086222e-06, "loss": 0.0189, "step": 104870 }, { "epoch": 0.8486123472772878, "grad_norm": 0.3877069354057312, "learning_rate": 7.124060084136839e-06, "loss": 0.0263, "step": 104880 }, { "epoch": 0.8486932599724897, "grad_norm": 0.19053395092487335, "learning_rate": 7.12342084682756e-06, "loss": 0.02, "step": 104890 }, { "epoch": 0.8487741726676915, "grad_norm": 0.276120662689209, "learning_rate": 7.122781567171135e-06, "loss": 0.0201, "step": 104900 }, { "epoch": 0.8488550853628934, "grad_norm": 0.19643959403038025, "learning_rate": 7.122142245180317e-06, "loss": 0.0321, "step": 104910 }, { "epoch": 0.8489359980580953, "grad_norm": 0.6080031991004944, "learning_rate": 7.1215028808678486e-06, "loss": 0.0386, "step": 104920 }, { "epoch": 0.8490169107532972, "grad_norm": 0.46855276823043823, "learning_rate": 7.120863474246483e-06, "loss": 0.0308, "step": 104930 }, { "epoch": 0.849097823448499, "grad_norm": 0.45077088475227356, "learning_rate": 7.120224025328974e-06, "loss": 0.0217, "step": 104940 }, { "epoch": 0.849178736143701, "grad_norm": 0.33253803849220276, "learning_rate": 7.1195845341280735e-06, "loss": 0.0201, "step": 104950 }, { "epoch": 0.8492596488389028, "grad_norm": 0.3225858211517334, "learning_rate": 7.118945000656534e-06, "loss": 0.0339, "step": 104960 }, { "epoch": 0.8493405615341048, "grad_norm": 0.3252366781234741, "learning_rate": 7.11830542492711e-06, "loss": 0.0379, "step": 104970 }, { "epoch": 0.8494214742293066, "grad_norm": 0.12754406034946442, "learning_rate": 7.117665806952555e-06, "loss": 0.0165, "step": 104980 }, { "epoch": 0.8495023869245084, "grad_norm": 0.593739926815033, "learning_rate": 7.1170261467456266e-06, "loss": 0.0318, "step": 104990 }, { "epoch": 0.8495832996197104, "grad_norm": 0.3686714768409729, "learning_rate": 7.116386444319081e-06, "loss": 0.0356, "step": 105000 }, { "epoch": 0.8496642123149122, "grad_norm": 0.3843996226787567, "learning_rate": 7.115746699685675e-06, "loss": 0.0223, "step": 105010 }, { "epoch": 0.8497451250101141, "grad_norm": 0.6153486371040344, "learning_rate": 7.115106912858169e-06, "loss": 0.0348, "step": 105020 }, { "epoch": 0.849826037705316, "grad_norm": 0.2288748323917389, "learning_rate": 7.114467083849321e-06, "loss": 0.0159, "step": 105030 }, { "epoch": 0.8499069504005179, "grad_norm": 0.6004863381385803, "learning_rate": 7.113827212671889e-06, "loss": 0.0293, "step": 105040 }, { "epoch": 0.8499878630957197, "grad_norm": 0.21464888751506805, "learning_rate": 7.113187299338637e-06, "loss": 0.0227, "step": 105050 }, { "epoch": 0.8500687757909215, "grad_norm": 0.489016056060791, "learning_rate": 7.112547343862325e-06, "loss": 0.0165, "step": 105060 }, { "epoch": 0.8501496884861235, "grad_norm": 0.49083393812179565, "learning_rate": 7.111907346255716e-06, "loss": 0.0271, "step": 105070 }, { "epoch": 0.8502306011813253, "grad_norm": 0.5125178694725037, "learning_rate": 7.111267306531573e-06, "loss": 0.0281, "step": 105080 }, { "epoch": 0.8503115138765273, "grad_norm": 0.567499577999115, "learning_rate": 7.110627224702661e-06, "loss": 0.0315, "step": 105090 }, { "epoch": 0.8503924265717291, "grad_norm": 0.28240856528282166, "learning_rate": 7.109987100781745e-06, "loss": 0.0306, "step": 105100 }, { "epoch": 0.850473339266931, "grad_norm": 0.13759027421474457, "learning_rate": 7.109346934781591e-06, "loss": 0.0282, "step": 105110 }, { "epoch": 0.8505542519621329, "grad_norm": 0.20332270860671997, "learning_rate": 7.108706726714965e-06, "loss": 0.0166, "step": 105120 }, { "epoch": 0.8506351646573347, "grad_norm": 0.3037869930267334, "learning_rate": 7.108066476594635e-06, "loss": 0.0228, "step": 105130 }, { "epoch": 0.8507160773525366, "grad_norm": 0.7170480489730835, "learning_rate": 7.107426184433369e-06, "loss": 0.0358, "step": 105140 }, { "epoch": 0.8507969900477385, "grad_norm": 0.28996244072914124, "learning_rate": 7.106785850243938e-06, "loss": 0.045, "step": 105150 }, { "epoch": 0.8508779027429404, "grad_norm": 0.2738091051578522, "learning_rate": 7.10614547403911e-06, "loss": 0.0298, "step": 105160 }, { "epoch": 0.8509588154381422, "grad_norm": 0.498699426651001, "learning_rate": 7.105505055831656e-06, "loss": 0.0337, "step": 105170 }, { "epoch": 0.8510397281333442, "grad_norm": 0.623417317867279, "learning_rate": 7.10486459563435e-06, "loss": 0.035, "step": 105180 }, { "epoch": 0.851120640828546, "grad_norm": 0.5388669371604919, "learning_rate": 7.104224093459962e-06, "loss": 0.0348, "step": 105190 }, { "epoch": 0.8512015535237478, "grad_norm": 0.2809653580188751, "learning_rate": 7.1035835493212665e-06, "loss": 0.0477, "step": 105200 }, { "epoch": 0.8512824662189498, "grad_norm": 0.40231314301490784, "learning_rate": 7.102942963231041e-06, "loss": 0.0205, "step": 105210 }, { "epoch": 0.8513633789141516, "grad_norm": 0.3851624131202698, "learning_rate": 7.102302335202055e-06, "loss": 0.0291, "step": 105220 }, { "epoch": 0.8514442916093535, "grad_norm": 0.37016943097114563, "learning_rate": 7.101661665247086e-06, "loss": 0.0286, "step": 105230 }, { "epoch": 0.8515252043045554, "grad_norm": 0.947695791721344, "learning_rate": 7.101020953378915e-06, "loss": 0.0366, "step": 105240 }, { "epoch": 0.8516061169997573, "grad_norm": 0.333060085773468, "learning_rate": 7.100380199610314e-06, "loss": 0.0345, "step": 105250 }, { "epoch": 0.8516870296949591, "grad_norm": 0.3871934711933136, "learning_rate": 7.099739403954065e-06, "loss": 0.0243, "step": 105260 }, { "epoch": 0.8517679423901611, "grad_norm": 0.29833391308784485, "learning_rate": 7.099098566422946e-06, "loss": 0.0206, "step": 105270 }, { "epoch": 0.8518488550853629, "grad_norm": 0.40378764271736145, "learning_rate": 7.098457687029739e-06, "loss": 0.0384, "step": 105280 }, { "epoch": 0.8519297677805647, "grad_norm": 0.35912781953811646, "learning_rate": 7.097816765787221e-06, "loss": 0.0338, "step": 105290 }, { "epoch": 0.8520106804757667, "grad_norm": 0.48561662435531616, "learning_rate": 7.0971758027081785e-06, "loss": 0.0302, "step": 105300 }, { "epoch": 0.8520915931709685, "grad_norm": 0.3514406383037567, "learning_rate": 7.09653479780539e-06, "loss": 0.0181, "step": 105310 }, { "epoch": 0.8521725058661704, "grad_norm": 0.2749694287776947, "learning_rate": 7.095893751091642e-06, "loss": 0.0253, "step": 105320 }, { "epoch": 0.8522534185613723, "grad_norm": 0.18390491604804993, "learning_rate": 7.095252662579717e-06, "loss": 0.0349, "step": 105330 }, { "epoch": 0.8523343312565742, "grad_norm": 0.30576014518737793, "learning_rate": 7.094611532282401e-06, "loss": 0.0288, "step": 105340 }, { "epoch": 0.852415243951776, "grad_norm": 0.4370502531528473, "learning_rate": 7.093970360212481e-06, "loss": 0.0207, "step": 105350 }, { "epoch": 0.8524961566469779, "grad_norm": 0.306063175201416, "learning_rate": 7.093329146382743e-06, "loss": 0.0254, "step": 105360 }, { "epoch": 0.8525770693421798, "grad_norm": 0.35033315420150757, "learning_rate": 7.092687890805974e-06, "loss": 0.0235, "step": 105370 }, { "epoch": 0.8526579820373816, "grad_norm": 0.4100586175918579, "learning_rate": 7.092046593494961e-06, "loss": 0.0271, "step": 105380 }, { "epoch": 0.8527388947325836, "grad_norm": 0.2816217243671417, "learning_rate": 7.091405254462498e-06, "loss": 0.0374, "step": 105390 }, { "epoch": 0.8528198074277854, "grad_norm": 0.570559024810791, "learning_rate": 7.090763873721371e-06, "loss": 0.0223, "step": 105400 }, { "epoch": 0.8529007201229873, "grad_norm": 0.41370463371276855, "learning_rate": 7.090122451284371e-06, "loss": 0.022, "step": 105410 }, { "epoch": 0.8529816328181892, "grad_norm": 0.5425238609313965, "learning_rate": 7.0894809871642924e-06, "loss": 0.0408, "step": 105420 }, { "epoch": 0.853062545513391, "grad_norm": 0.6593540906906128, "learning_rate": 7.088839481373928e-06, "loss": 0.0272, "step": 105430 }, { "epoch": 0.8531434582085929, "grad_norm": 0.222042515873909, "learning_rate": 7.088197933926067e-06, "loss": 0.0208, "step": 105440 }, { "epoch": 0.8532243709037948, "grad_norm": 0.33017659187316895, "learning_rate": 7.08755634483351e-06, "loss": 0.0383, "step": 105450 }, { "epoch": 0.8533052835989967, "grad_norm": 0.6219000220298767, "learning_rate": 7.086914714109046e-06, "loss": 0.0285, "step": 105460 }, { "epoch": 0.8533861962941985, "grad_norm": 0.3525998890399933, "learning_rate": 7.086273041765475e-06, "loss": 0.025, "step": 105470 }, { "epoch": 0.8534671089894005, "grad_norm": 0.4446958005428314, "learning_rate": 7.085631327815592e-06, "loss": 0.0261, "step": 105480 }, { "epoch": 0.8535480216846023, "grad_norm": 0.41530001163482666, "learning_rate": 7.084989572272195e-06, "loss": 0.0483, "step": 105490 }, { "epoch": 0.8536289343798041, "grad_norm": 0.41715767979621887, "learning_rate": 7.084347775148084e-06, "loss": 0.0252, "step": 105500 }, { "epoch": 0.8537098470750061, "grad_norm": 0.1675492376089096, "learning_rate": 7.083705936456054e-06, "loss": 0.0269, "step": 105510 }, { "epoch": 0.8537907597702079, "grad_norm": 0.6033148765563965, "learning_rate": 7.0830640562089106e-06, "loss": 0.0407, "step": 105520 }, { "epoch": 0.8538716724654098, "grad_norm": 0.4652425944805145, "learning_rate": 7.082422134419451e-06, "loss": 0.0165, "step": 105530 }, { "epoch": 0.8539525851606117, "grad_norm": 0.14436230063438416, "learning_rate": 7.081780171100478e-06, "loss": 0.0254, "step": 105540 }, { "epoch": 0.8540334978558136, "grad_norm": 0.3772483170032501, "learning_rate": 7.081138166264795e-06, "loss": 0.019, "step": 105550 }, { "epoch": 0.8541144105510154, "grad_norm": 0.42571598291397095, "learning_rate": 7.080496119925204e-06, "loss": 0.0332, "step": 105560 }, { "epoch": 0.8541953232462174, "grad_norm": 0.454790323972702, "learning_rate": 7.079854032094511e-06, "loss": 0.0189, "step": 105570 }, { "epoch": 0.8542762359414192, "grad_norm": 0.32643407583236694, "learning_rate": 7.079211902785519e-06, "loss": 0.0339, "step": 105580 }, { "epoch": 0.854357148636621, "grad_norm": 0.5871717929840088, "learning_rate": 7.078569732011036e-06, "loss": 0.028, "step": 105590 }, { "epoch": 0.854438061331823, "grad_norm": 0.4456590414047241, "learning_rate": 7.077927519783866e-06, "loss": 0.0306, "step": 105600 }, { "epoch": 0.8545189740270248, "grad_norm": 0.23040971159934998, "learning_rate": 7.07728526611682e-06, "loss": 0.0245, "step": 105610 }, { "epoch": 0.8545998867222268, "grad_norm": 0.34545794129371643, "learning_rate": 7.076642971022706e-06, "loss": 0.026, "step": 105620 }, { "epoch": 0.8546807994174286, "grad_norm": 0.20685455203056335, "learning_rate": 7.076000634514329e-06, "loss": 0.0322, "step": 105630 }, { "epoch": 0.8547617121126305, "grad_norm": 0.6994714736938477, "learning_rate": 7.075358256604503e-06, "loss": 0.0217, "step": 105640 }, { "epoch": 0.8548426248078324, "grad_norm": 0.28357234597206116, "learning_rate": 7.074715837306039e-06, "loss": 0.0281, "step": 105650 }, { "epoch": 0.8549235375030342, "grad_norm": 0.4715360105037689, "learning_rate": 7.074073376631746e-06, "loss": 0.0242, "step": 105660 }, { "epoch": 0.8550044501982361, "grad_norm": 0.2256048023700714, "learning_rate": 7.073430874594438e-06, "loss": 0.0293, "step": 105670 }, { "epoch": 0.855085362893438, "grad_norm": 0.11423247307538986, "learning_rate": 7.072788331206929e-06, "loss": 0.043, "step": 105680 }, { "epoch": 0.8551662755886399, "grad_norm": 0.2528741657733917, "learning_rate": 7.072145746482033e-06, "loss": 0.0302, "step": 105690 }, { "epoch": 0.8552471882838417, "grad_norm": 0.23971430957317352, "learning_rate": 7.071503120432565e-06, "loss": 0.0208, "step": 105700 }, { "epoch": 0.8553281009790437, "grad_norm": 0.5126708149909973, "learning_rate": 7.070860453071339e-06, "loss": 0.0187, "step": 105710 }, { "epoch": 0.8554090136742455, "grad_norm": 0.5056090950965881, "learning_rate": 7.070217744411174e-06, "loss": 0.0472, "step": 105720 }, { "epoch": 0.8554899263694473, "grad_norm": 0.47540217638015747, "learning_rate": 7.069574994464887e-06, "loss": 0.0264, "step": 105730 }, { "epoch": 0.8555708390646493, "grad_norm": 0.2640886902809143, "learning_rate": 7.068932203245293e-06, "loss": 0.0228, "step": 105740 }, { "epoch": 0.8556517517598511, "grad_norm": 0.427785724401474, "learning_rate": 7.068289370765217e-06, "loss": 0.0317, "step": 105750 }, { "epoch": 0.855732664455053, "grad_norm": 0.6301827430725098, "learning_rate": 7.067646497037475e-06, "loss": 0.0383, "step": 105760 }, { "epoch": 0.8558135771502549, "grad_norm": 0.21384064853191376, "learning_rate": 7.067003582074889e-06, "loss": 0.0312, "step": 105770 }, { "epoch": 0.8558944898454568, "grad_norm": 0.6108934879302979, "learning_rate": 7.066360625890279e-06, "loss": 0.0246, "step": 105780 }, { "epoch": 0.8559754025406586, "grad_norm": 0.27430829405784607, "learning_rate": 7.065717628496468e-06, "loss": 0.0401, "step": 105790 }, { "epoch": 0.8560563152358605, "grad_norm": 0.32599061727523804, "learning_rate": 7.0650745899062824e-06, "loss": 0.0222, "step": 105800 }, { "epoch": 0.8561372279310624, "grad_norm": 0.4433104991912842, "learning_rate": 7.064431510132542e-06, "loss": 0.0246, "step": 105810 }, { "epoch": 0.8562181406262642, "grad_norm": 0.17812564969062805, "learning_rate": 7.063788389188073e-06, "loss": 0.0392, "step": 105820 }, { "epoch": 0.8562990533214662, "grad_norm": 0.5554496645927429, "learning_rate": 7.063145227085701e-06, "loss": 0.0253, "step": 105830 }, { "epoch": 0.856379966016668, "grad_norm": 0.27938076853752136, "learning_rate": 7.062502023838256e-06, "loss": 0.0291, "step": 105840 }, { "epoch": 0.8564608787118699, "grad_norm": 0.8107367753982544, "learning_rate": 7.061858779458559e-06, "loss": 0.0405, "step": 105850 }, { "epoch": 0.8565417914070718, "grad_norm": 0.4703519940376282, "learning_rate": 7.061215493959443e-06, "loss": 0.024, "step": 105860 }, { "epoch": 0.8566227041022737, "grad_norm": 0.3030337989330292, "learning_rate": 7.0605721673537355e-06, "loss": 0.0124, "step": 105870 }, { "epoch": 0.8567036167974755, "grad_norm": 0.34863951802253723, "learning_rate": 7.059928799654265e-06, "loss": 0.0312, "step": 105880 }, { "epoch": 0.8567845294926774, "grad_norm": 0.1429407149553299, "learning_rate": 7.0592853908738645e-06, "loss": 0.0234, "step": 105890 }, { "epoch": 0.8568654421878793, "grad_norm": 0.4937424957752228, "learning_rate": 7.058641941025364e-06, "loss": 0.0192, "step": 105900 }, { "epoch": 0.8569463548830811, "grad_norm": 0.26020386815071106, "learning_rate": 7.057998450121596e-06, "loss": 0.0212, "step": 105910 }, { "epoch": 0.8570272675782831, "grad_norm": 0.23805391788482666, "learning_rate": 7.057354918175394e-06, "loss": 0.0173, "step": 105920 }, { "epoch": 0.8571081802734849, "grad_norm": 0.3163476884365082, "learning_rate": 7.0567113451995894e-06, "loss": 0.0199, "step": 105930 }, { "epoch": 0.8571890929686868, "grad_norm": 0.18505944311618805, "learning_rate": 7.05606773120702e-06, "loss": 0.0367, "step": 105940 }, { "epoch": 0.8572700056638887, "grad_norm": 0.571570634841919, "learning_rate": 7.055424076210521e-06, "loss": 0.0386, "step": 105950 }, { "epoch": 0.8573509183590905, "grad_norm": 0.6009892225265503, "learning_rate": 7.054780380222927e-06, "loss": 0.0275, "step": 105960 }, { "epoch": 0.8574318310542924, "grad_norm": 0.3442853093147278, "learning_rate": 7.054136643257077e-06, "loss": 0.0464, "step": 105970 }, { "epoch": 0.8575127437494943, "grad_norm": 0.5876498222351074, "learning_rate": 7.053492865325808e-06, "loss": 0.036, "step": 105980 }, { "epoch": 0.8575936564446962, "grad_norm": 0.6164939999580383, "learning_rate": 7.052849046441959e-06, "loss": 0.0297, "step": 105990 }, { "epoch": 0.857674569139898, "grad_norm": 0.4523082375526428, "learning_rate": 7.05220518661837e-06, "loss": 0.0352, "step": 106000 }, { "epoch": 0.8577554818351, "grad_norm": 0.48807814717292786, "learning_rate": 7.051561285867878e-06, "loss": 0.031, "step": 106010 }, { "epoch": 0.8578363945303018, "grad_norm": 0.8343235850334167, "learning_rate": 7.050917344203331e-06, "loss": 0.0394, "step": 106020 }, { "epoch": 0.8579173072255036, "grad_norm": 0.29125601053237915, "learning_rate": 7.050273361637565e-06, "loss": 0.0348, "step": 106030 }, { "epoch": 0.8579982199207056, "grad_norm": 0.3435223698616028, "learning_rate": 7.0496293381834256e-06, "loss": 0.0333, "step": 106040 }, { "epoch": 0.8580791326159074, "grad_norm": 0.609167218208313, "learning_rate": 7.048985273853756e-06, "loss": 0.0243, "step": 106050 }, { "epoch": 0.8581600453111093, "grad_norm": 0.2077392190694809, "learning_rate": 7.048341168661401e-06, "loss": 0.0174, "step": 106060 }, { "epoch": 0.8582409580063112, "grad_norm": 0.2107972949743271, "learning_rate": 7.0476970226192055e-06, "loss": 0.031, "step": 106070 }, { "epoch": 0.8583218707015131, "grad_norm": 0.2117399424314499, "learning_rate": 7.047052835740016e-06, "loss": 0.0377, "step": 106080 }, { "epoch": 0.8584027833967149, "grad_norm": 0.10777079313993454, "learning_rate": 7.046408608036679e-06, "loss": 0.0203, "step": 106090 }, { "epoch": 0.8584836960919168, "grad_norm": 0.1924654245376587, "learning_rate": 7.045764339522042e-06, "loss": 0.033, "step": 106100 }, { "epoch": 0.8585646087871187, "grad_norm": 0.23804451525211334, "learning_rate": 7.045120030208954e-06, "loss": 0.0289, "step": 106110 }, { "epoch": 0.8586455214823205, "grad_norm": 0.5374487042427063, "learning_rate": 7.044475680110264e-06, "loss": 0.0241, "step": 106120 }, { "epoch": 0.8587264341775225, "grad_norm": 0.17612016201019287, "learning_rate": 7.043831289238824e-06, "loss": 0.0247, "step": 106130 }, { "epoch": 0.8588073468727243, "grad_norm": 0.21821478009223938, "learning_rate": 7.043186857607483e-06, "loss": 0.0344, "step": 106140 }, { "epoch": 0.8588882595679262, "grad_norm": 0.2037995159626007, "learning_rate": 7.042542385229094e-06, "loss": 0.0206, "step": 106150 }, { "epoch": 0.8589691722631281, "grad_norm": 0.37479814887046814, "learning_rate": 7.041897872116509e-06, "loss": 0.0367, "step": 106160 }, { "epoch": 0.8590500849583299, "grad_norm": 0.47425389289855957, "learning_rate": 7.041253318282581e-06, "loss": 0.0282, "step": 106170 }, { "epoch": 0.8591309976535318, "grad_norm": 0.2710539698600769, "learning_rate": 7.040608723740166e-06, "loss": 0.0205, "step": 106180 }, { "epoch": 0.8592119103487337, "grad_norm": 0.2172677218914032, "learning_rate": 7.039964088502117e-06, "loss": 0.0381, "step": 106190 }, { "epoch": 0.8592928230439356, "grad_norm": 0.2945094108581543, "learning_rate": 7.03931941258129e-06, "loss": 0.0196, "step": 106200 }, { "epoch": 0.8593737357391374, "grad_norm": 0.45241913199424744, "learning_rate": 7.038674695990545e-06, "loss": 0.0266, "step": 106210 }, { "epoch": 0.8594546484343394, "grad_norm": 0.5871242880821228, "learning_rate": 7.0380299387427346e-06, "loss": 0.0384, "step": 106220 }, { "epoch": 0.8595355611295412, "grad_norm": 0.19405579566955566, "learning_rate": 7.03738514085072e-06, "loss": 0.0283, "step": 106230 }, { "epoch": 0.8596164738247432, "grad_norm": 0.353866308927536, "learning_rate": 7.03674030232736e-06, "loss": 0.0326, "step": 106240 }, { "epoch": 0.859697386519945, "grad_norm": 0.4477740526199341, "learning_rate": 7.036095423185516e-06, "loss": 0.0241, "step": 106250 }, { "epoch": 0.8597782992151468, "grad_norm": 0.4314667284488678, "learning_rate": 7.035450503438046e-06, "loss": 0.032, "step": 106260 }, { "epoch": 0.8598592119103488, "grad_norm": 0.7062766551971436, "learning_rate": 7.034805543097812e-06, "loss": 0.045, "step": 106270 }, { "epoch": 0.8599401246055506, "grad_norm": 0.4982915222644806, "learning_rate": 7.0341605421776784e-06, "loss": 0.0324, "step": 106280 }, { "epoch": 0.8600210373007525, "grad_norm": 0.4952411949634552, "learning_rate": 7.033515500690507e-06, "loss": 0.0404, "step": 106290 }, { "epoch": 0.8601019499959544, "grad_norm": 0.05119473859667778, "learning_rate": 7.03287041864916e-06, "loss": 0.0409, "step": 106300 }, { "epoch": 0.8601828626911563, "grad_norm": 0.44141870737075806, "learning_rate": 7.032225296066505e-06, "loss": 0.0229, "step": 106310 }, { "epoch": 0.8602637753863581, "grad_norm": 0.2591254413127899, "learning_rate": 7.0315801329554075e-06, "loss": 0.0314, "step": 106320 }, { "epoch": 0.86034468808156, "grad_norm": 0.4109679162502289, "learning_rate": 7.0309349293287325e-06, "loss": 0.0272, "step": 106330 }, { "epoch": 0.8604256007767619, "grad_norm": 0.2829309403896332, "learning_rate": 7.0302896851993475e-06, "loss": 0.0259, "step": 106340 }, { "epoch": 0.8605065134719637, "grad_norm": 0.5636774301528931, "learning_rate": 7.029644400580122e-06, "loss": 0.031, "step": 106350 }, { "epoch": 0.8605874261671657, "grad_norm": 0.011308745481073856, "learning_rate": 7.0289990754839236e-06, "loss": 0.0234, "step": 106360 }, { "epoch": 0.8606683388623675, "grad_norm": 0.3905390799045563, "learning_rate": 7.028353709923621e-06, "loss": 0.0369, "step": 106370 }, { "epoch": 0.8607492515575694, "grad_norm": 0.2312418520450592, "learning_rate": 7.0277083039120866e-06, "loss": 0.042, "step": 106380 }, { "epoch": 0.8608301642527713, "grad_norm": 0.41875430941581726, "learning_rate": 7.027062857462191e-06, "loss": 0.0386, "step": 106390 }, { "epoch": 0.8609110769479731, "grad_norm": 0.8011084794998169, "learning_rate": 7.026417370586804e-06, "loss": 0.0181, "step": 106400 }, { "epoch": 0.860991989643175, "grad_norm": 0.44145867228507996, "learning_rate": 7.025771843298802e-06, "loss": 0.0379, "step": 106410 }, { "epoch": 0.8610729023383769, "grad_norm": 0.433102548122406, "learning_rate": 7.025126275611058e-06, "loss": 0.0195, "step": 106420 }, { "epoch": 0.8611538150335788, "grad_norm": 0.4309477210044861, "learning_rate": 7.024480667536444e-06, "loss": 0.0347, "step": 106430 }, { "epoch": 0.8612347277287806, "grad_norm": 0.580436110496521, "learning_rate": 7.023835019087838e-06, "loss": 0.0375, "step": 106440 }, { "epoch": 0.8613156404239826, "grad_norm": 0.45580238103866577, "learning_rate": 7.023189330278114e-06, "loss": 0.0321, "step": 106450 }, { "epoch": 0.8613965531191844, "grad_norm": 0.24952426552772522, "learning_rate": 7.022543601120151e-06, "loss": 0.0164, "step": 106460 }, { "epoch": 0.8614774658143862, "grad_norm": 0.6816796660423279, "learning_rate": 7.021897831626825e-06, "loss": 0.0327, "step": 106470 }, { "epoch": 0.8615583785095882, "grad_norm": 0.31850379705429077, "learning_rate": 7.021252021811016e-06, "loss": 0.0236, "step": 106480 }, { "epoch": 0.86163929120479, "grad_norm": 0.332552433013916, "learning_rate": 7.020606171685601e-06, "loss": 0.0427, "step": 106490 }, { "epoch": 0.8617202038999919, "grad_norm": 0.7104529738426208, "learning_rate": 7.019960281263464e-06, "loss": 0.049, "step": 106500 }, { "epoch": 0.8618011165951938, "grad_norm": 0.3850005269050598, "learning_rate": 7.019314350557482e-06, "loss": 0.035, "step": 106510 }, { "epoch": 0.8618820292903957, "grad_norm": 0.6853266358375549, "learning_rate": 7.018668379580538e-06, "loss": 0.0225, "step": 106520 }, { "epoch": 0.8619629419855975, "grad_norm": 0.18178202211856842, "learning_rate": 7.018022368345515e-06, "loss": 0.0208, "step": 106530 }, { "epoch": 0.8620438546807995, "grad_norm": 0.9466009736061096, "learning_rate": 7.017376316865297e-06, "loss": 0.0389, "step": 106540 }, { "epoch": 0.8621247673760013, "grad_norm": 0.7816338539123535, "learning_rate": 7.016730225152766e-06, "loss": 0.0341, "step": 106550 }, { "epoch": 0.8622056800712031, "grad_norm": 0.3867073655128479, "learning_rate": 7.016084093220808e-06, "loss": 0.0425, "step": 106560 }, { "epoch": 0.8622865927664051, "grad_norm": 0.4124191105365753, "learning_rate": 7.01543792108231e-06, "loss": 0.0221, "step": 106570 }, { "epoch": 0.8623675054616069, "grad_norm": 0.6312037110328674, "learning_rate": 7.014791708750157e-06, "loss": 0.0273, "step": 106580 }, { "epoch": 0.8624484181568088, "grad_norm": 0.5975012183189392, "learning_rate": 7.014145456237237e-06, "loss": 0.0271, "step": 106590 }, { "epoch": 0.8625293308520107, "grad_norm": 0.581611692905426, "learning_rate": 7.013499163556437e-06, "loss": 0.0334, "step": 106600 }, { "epoch": 0.8626102435472126, "grad_norm": 0.16588884592056274, "learning_rate": 7.012852830720648e-06, "loss": 0.0267, "step": 106610 }, { "epoch": 0.8626911562424144, "grad_norm": 0.798154890537262, "learning_rate": 7.0122064577427585e-06, "loss": 0.0293, "step": 106620 }, { "epoch": 0.8627720689376163, "grad_norm": 0.785077691078186, "learning_rate": 7.011560044635658e-06, "loss": 0.0319, "step": 106630 }, { "epoch": 0.8628529816328182, "grad_norm": 0.6681966781616211, "learning_rate": 7.01091359141224e-06, "loss": 0.0236, "step": 106640 }, { "epoch": 0.86293389432802, "grad_norm": 0.6151028275489807, "learning_rate": 7.010267098085395e-06, "loss": 0.0369, "step": 106650 }, { "epoch": 0.863014807023222, "grad_norm": 0.24904844164848328, "learning_rate": 7.009620564668017e-06, "loss": 0.0306, "step": 106660 }, { "epoch": 0.8630957197184238, "grad_norm": 0.23450049757957458, "learning_rate": 7.008973991173e-06, "loss": 0.0251, "step": 106670 }, { "epoch": 0.8631766324136257, "grad_norm": 0.503986656665802, "learning_rate": 7.008327377613236e-06, "loss": 0.0365, "step": 106680 }, { "epoch": 0.8632575451088276, "grad_norm": 0.35339629650115967, "learning_rate": 7.007680724001624e-06, "loss": 0.0239, "step": 106690 }, { "epoch": 0.8633384578040294, "grad_norm": 0.6156579256057739, "learning_rate": 7.007034030351059e-06, "loss": 0.0266, "step": 106700 }, { "epoch": 0.8634193704992313, "grad_norm": 0.20123472809791565, "learning_rate": 7.006387296674435e-06, "loss": 0.019, "step": 106710 }, { "epoch": 0.8635002831944332, "grad_norm": 0.5525861382484436, "learning_rate": 7.005740522984653e-06, "loss": 0.0236, "step": 106720 }, { "epoch": 0.8635811958896351, "grad_norm": 0.5604747533798218, "learning_rate": 7.0050937092946125e-06, "loss": 0.0288, "step": 106730 }, { "epoch": 0.8636621085848369, "grad_norm": 0.5363519787788391, "learning_rate": 7.004446855617207e-06, "loss": 0.0466, "step": 106740 }, { "epoch": 0.8637430212800389, "grad_norm": 0.6373652219772339, "learning_rate": 7.003799961965343e-06, "loss": 0.0476, "step": 106750 }, { "epoch": 0.8638239339752407, "grad_norm": 0.7112053036689758, "learning_rate": 7.003153028351919e-06, "loss": 0.0357, "step": 106760 }, { "epoch": 0.8639048466704425, "grad_norm": 0.331570565700531, "learning_rate": 7.0025060547898375e-06, "loss": 0.0194, "step": 106770 }, { "epoch": 0.8639857593656445, "grad_norm": 0.606758177280426, "learning_rate": 7.001859041291998e-06, "loss": 0.0302, "step": 106780 }, { "epoch": 0.8640666720608463, "grad_norm": 0.5345818996429443, "learning_rate": 7.001211987871306e-06, "loss": 0.0263, "step": 106790 }, { "epoch": 0.8641475847560482, "grad_norm": 0.46534693241119385, "learning_rate": 7.000564894540668e-06, "loss": 0.0377, "step": 106800 }, { "epoch": 0.8642284974512501, "grad_norm": 0.461473673582077, "learning_rate": 6.9999177613129855e-06, "loss": 0.024, "step": 106810 }, { "epoch": 0.864309410146452, "grad_norm": 0.24793246388435364, "learning_rate": 6.999270588201165e-06, "loss": 0.0313, "step": 106820 }, { "epoch": 0.8643903228416538, "grad_norm": 0.48187386989593506, "learning_rate": 6.998623375218112e-06, "loss": 0.0385, "step": 106830 }, { "epoch": 0.8644712355368558, "grad_norm": 0.2262558788061142, "learning_rate": 6.997976122376738e-06, "loss": 0.0486, "step": 106840 }, { "epoch": 0.8645521482320576, "grad_norm": 0.24592439830303192, "learning_rate": 6.997328829689946e-06, "loss": 0.018, "step": 106850 }, { "epoch": 0.8646330609272594, "grad_norm": 0.30231374502182007, "learning_rate": 6.996681497170648e-06, "loss": 0.0273, "step": 106860 }, { "epoch": 0.8647139736224614, "grad_norm": 0.40680238604545593, "learning_rate": 6.996034124831752e-06, "loss": 0.0179, "step": 106870 }, { "epoch": 0.8647948863176632, "grad_norm": 0.14443811774253845, "learning_rate": 6.99538671268617e-06, "loss": 0.0192, "step": 106880 }, { "epoch": 0.8648757990128652, "grad_norm": 0.7294256091117859, "learning_rate": 6.994739260746811e-06, "loss": 0.0456, "step": 106890 }, { "epoch": 0.864956711708067, "grad_norm": 0.41112858057022095, "learning_rate": 6.99409176902659e-06, "loss": 0.0243, "step": 106900 }, { "epoch": 0.8650376244032689, "grad_norm": 0.4816953241825104, "learning_rate": 6.993444237538418e-06, "loss": 0.0311, "step": 106910 }, { "epoch": 0.8651185370984708, "grad_norm": 0.2687222361564636, "learning_rate": 6.992796666295209e-06, "loss": 0.0325, "step": 106920 }, { "epoch": 0.8651994497936726, "grad_norm": 0.39815592765808105, "learning_rate": 6.992149055309877e-06, "loss": 0.0255, "step": 106930 }, { "epoch": 0.8652803624888745, "grad_norm": 0.34314170479774475, "learning_rate": 6.9915014045953374e-06, "loss": 0.0405, "step": 106940 }, { "epoch": 0.8653612751840764, "grad_norm": 0.4897509813308716, "learning_rate": 6.990853714164507e-06, "loss": 0.0307, "step": 106950 }, { "epoch": 0.8654421878792783, "grad_norm": 0.47613388299942017, "learning_rate": 6.990205984030303e-06, "loss": 0.0348, "step": 106960 }, { "epoch": 0.8655231005744801, "grad_norm": 0.2639673352241516, "learning_rate": 6.989558214205642e-06, "loss": 0.0282, "step": 106970 }, { "epoch": 0.8656040132696821, "grad_norm": 0.29585355520248413, "learning_rate": 6.9889104047034415e-06, "loss": 0.0215, "step": 106980 }, { "epoch": 0.8656849259648839, "grad_norm": 0.3593114912509918, "learning_rate": 6.988262555536623e-06, "loss": 0.0205, "step": 106990 }, { "epoch": 0.8657658386600857, "grad_norm": 0.47759443521499634, "learning_rate": 6.987614666718105e-06, "loss": 0.0423, "step": 107000 }, { "epoch": 0.8658467513552877, "grad_norm": 0.35735100507736206, "learning_rate": 6.986966738260806e-06, "loss": 0.0503, "step": 107010 }, { "epoch": 0.8659276640504895, "grad_norm": 0.5349607467651367, "learning_rate": 6.986318770177654e-06, "loss": 0.0342, "step": 107020 }, { "epoch": 0.8660085767456914, "grad_norm": 0.39491406083106995, "learning_rate": 6.985670762481566e-06, "loss": 0.0282, "step": 107030 }, { "epoch": 0.8660894894408933, "grad_norm": 0.9852083325386047, "learning_rate": 6.985022715185466e-06, "loss": 0.0527, "step": 107040 }, { "epoch": 0.8661704021360952, "grad_norm": 0.36466267704963684, "learning_rate": 6.9843746283022775e-06, "loss": 0.0155, "step": 107050 }, { "epoch": 0.866251314831297, "grad_norm": 0.2689850926399231, "learning_rate": 6.983726501844927e-06, "loss": 0.0119, "step": 107060 }, { "epoch": 0.8663322275264989, "grad_norm": 0.3521324098110199, "learning_rate": 6.983078335826341e-06, "loss": 0.0238, "step": 107070 }, { "epoch": 0.8664131402217008, "grad_norm": 0.1904733031988144, "learning_rate": 6.982430130259442e-06, "loss": 0.021, "step": 107080 }, { "epoch": 0.8664940529169026, "grad_norm": 0.25162965059280396, "learning_rate": 6.98178188515716e-06, "loss": 0.0344, "step": 107090 }, { "epoch": 0.8665749656121046, "grad_norm": 0.416850209236145, "learning_rate": 6.981133600532423e-06, "loss": 0.0244, "step": 107100 }, { "epoch": 0.8666558783073064, "grad_norm": 0.4552445411682129, "learning_rate": 6.980485276398157e-06, "loss": 0.019, "step": 107110 }, { "epoch": 0.8667367910025083, "grad_norm": 0.32360294461250305, "learning_rate": 6.979836912767293e-06, "loss": 0.0332, "step": 107120 }, { "epoch": 0.8668177036977102, "grad_norm": 0.23877689242362976, "learning_rate": 6.979188509652763e-06, "loss": 0.0246, "step": 107130 }, { "epoch": 0.8668986163929121, "grad_norm": 0.3611743152141571, "learning_rate": 6.978540067067495e-06, "loss": 0.0331, "step": 107140 }, { "epoch": 0.8669795290881139, "grad_norm": 0.38679951429367065, "learning_rate": 6.977891585024423e-06, "loss": 0.0345, "step": 107150 }, { "epoch": 0.8670604417833158, "grad_norm": 0.759224534034729, "learning_rate": 6.977243063536478e-06, "loss": 0.0288, "step": 107160 }, { "epoch": 0.8671413544785177, "grad_norm": 0.5954559445381165, "learning_rate": 6.976594502616596e-06, "loss": 0.0304, "step": 107170 }, { "epoch": 0.8672222671737195, "grad_norm": 0.45493197441101074, "learning_rate": 6.975945902277708e-06, "loss": 0.0222, "step": 107180 }, { "epoch": 0.8673031798689215, "grad_norm": 0.2849156856536865, "learning_rate": 6.975297262532751e-06, "loss": 0.027, "step": 107190 }, { "epoch": 0.8673840925641233, "grad_norm": 0.3193250596523285, "learning_rate": 6.974648583394659e-06, "loss": 0.0164, "step": 107200 }, { "epoch": 0.8674650052593252, "grad_norm": 0.2708066701889038, "learning_rate": 6.973999864876371e-06, "loss": 0.0269, "step": 107210 }, { "epoch": 0.8675459179545271, "grad_norm": 0.41521263122558594, "learning_rate": 6.973351106990822e-06, "loss": 0.0244, "step": 107220 }, { "epoch": 0.8676268306497289, "grad_norm": 0.7827758193016052, "learning_rate": 6.9727023097509514e-06, "loss": 0.0477, "step": 107230 }, { "epoch": 0.8677077433449308, "grad_norm": 0.5628129839897156, "learning_rate": 6.972053473169699e-06, "loss": 0.0342, "step": 107240 }, { "epoch": 0.8677886560401327, "grad_norm": 0.5651184320449829, "learning_rate": 6.971404597260002e-06, "loss": 0.027, "step": 107250 }, { "epoch": 0.8678695687353346, "grad_norm": 0.49383363127708435, "learning_rate": 6.970755682034802e-06, "loss": 0.0382, "step": 107260 }, { "epoch": 0.8679504814305364, "grad_norm": 0.6888267993927002, "learning_rate": 6.9701067275070415e-06, "loss": 0.0312, "step": 107270 }, { "epoch": 0.8680313941257384, "grad_norm": 0.4649532735347748, "learning_rate": 6.969457733689661e-06, "loss": 0.0297, "step": 107280 }, { "epoch": 0.8681123068209402, "grad_norm": 0.8586151003837585, "learning_rate": 6.968808700595603e-06, "loss": 0.0184, "step": 107290 }, { "epoch": 0.868193219516142, "grad_norm": 0.4966121017932892, "learning_rate": 6.968159628237811e-06, "loss": 0.0311, "step": 107300 }, { "epoch": 0.868274132211344, "grad_norm": 0.465628445148468, "learning_rate": 6.967510516629232e-06, "loss": 0.034, "step": 107310 }, { "epoch": 0.8683550449065458, "grad_norm": 0.6102150678634644, "learning_rate": 6.96686136578281e-06, "loss": 0.022, "step": 107320 }, { "epoch": 0.8684359576017477, "grad_norm": 0.16512921452522278, "learning_rate": 6.9662121757114875e-06, "loss": 0.0174, "step": 107330 }, { "epoch": 0.8685168702969496, "grad_norm": 0.4458147883415222, "learning_rate": 6.965562946428216e-06, "loss": 0.0166, "step": 107340 }, { "epoch": 0.8685977829921515, "grad_norm": 0.32651078701019287, "learning_rate": 6.9649136779459405e-06, "loss": 0.0304, "step": 107350 }, { "epoch": 0.8686786956873533, "grad_norm": 0.42860472202301025, "learning_rate": 6.9642643702776105e-06, "loss": 0.0245, "step": 107360 }, { "epoch": 0.8687596083825552, "grad_norm": 0.5477842092514038, "learning_rate": 6.963615023436173e-06, "loss": 0.0182, "step": 107370 }, { "epoch": 0.8688405210777571, "grad_norm": 0.2623554468154907, "learning_rate": 6.96296563743458e-06, "loss": 0.0247, "step": 107380 }, { "epoch": 0.8689214337729589, "grad_norm": 0.44772547483444214, "learning_rate": 6.962316212285782e-06, "loss": 0.0337, "step": 107390 }, { "epoch": 0.8690023464681609, "grad_norm": 0.6823416948318481, "learning_rate": 6.961666748002729e-06, "loss": 0.0368, "step": 107400 }, { "epoch": 0.8690832591633627, "grad_norm": 0.4730526804924011, "learning_rate": 6.961017244598374e-06, "loss": 0.0259, "step": 107410 }, { "epoch": 0.8691641718585646, "grad_norm": 0.4679967761039734, "learning_rate": 6.96036770208567e-06, "loss": 0.0307, "step": 107420 }, { "epoch": 0.8692450845537665, "grad_norm": 0.6877357363700867, "learning_rate": 6.959718120477571e-06, "loss": 0.0246, "step": 107430 }, { "epoch": 0.8693259972489684, "grad_norm": 0.40024831891059875, "learning_rate": 6.95906849978703e-06, "loss": 0.0217, "step": 107440 }, { "epoch": 0.8694069099441702, "grad_norm": 0.42380276322364807, "learning_rate": 6.958418840027005e-06, "loss": 0.0258, "step": 107450 }, { "epoch": 0.8694878226393721, "grad_norm": 0.2198006957769394, "learning_rate": 6.957769141210451e-06, "loss": 0.0314, "step": 107460 }, { "epoch": 0.869568735334574, "grad_norm": 0.6223219037055969, "learning_rate": 6.957119403350323e-06, "loss": 0.0226, "step": 107470 }, { "epoch": 0.8696496480297758, "grad_norm": 0.3981439173221588, "learning_rate": 6.956469626459581e-06, "loss": 0.0325, "step": 107480 }, { "epoch": 0.8697305607249778, "grad_norm": 0.287276029586792, "learning_rate": 6.955819810551181e-06, "loss": 0.0231, "step": 107490 }, { "epoch": 0.8698114734201796, "grad_norm": 0.3008795976638794, "learning_rate": 6.955169955638087e-06, "loss": 0.0233, "step": 107500 }, { "epoch": 0.8698923861153816, "grad_norm": 0.20436899363994598, "learning_rate": 6.954520061733254e-06, "loss": 0.0275, "step": 107510 }, { "epoch": 0.8699732988105834, "grad_norm": 0.48355117440223694, "learning_rate": 6.953870128849643e-06, "loss": 0.0336, "step": 107520 }, { "epoch": 0.8700542115057852, "grad_norm": 0.27572357654571533, "learning_rate": 6.953220157000219e-06, "loss": 0.0304, "step": 107530 }, { "epoch": 0.8701351242009872, "grad_norm": 0.2748813033103943, "learning_rate": 6.952570146197943e-06, "loss": 0.0284, "step": 107540 }, { "epoch": 0.870216036896189, "grad_norm": 0.26527780294418335, "learning_rate": 6.951920096455776e-06, "loss": 0.0151, "step": 107550 }, { "epoch": 0.8702969495913909, "grad_norm": 0.6849920749664307, "learning_rate": 6.9512700077866835e-06, "loss": 0.0319, "step": 107560 }, { "epoch": 0.8703778622865928, "grad_norm": 0.2536602318286896, "learning_rate": 6.950619880203631e-06, "loss": 0.027, "step": 107570 }, { "epoch": 0.8704587749817947, "grad_norm": 0.2426244616508484, "learning_rate": 6.949969713719582e-06, "loss": 0.036, "step": 107580 }, { "epoch": 0.8705396876769965, "grad_norm": 0.3586392402648926, "learning_rate": 6.949319508347505e-06, "loss": 0.0324, "step": 107590 }, { "epoch": 0.8706206003721983, "grad_norm": 0.4134311378002167, "learning_rate": 6.948669264100363e-06, "loss": 0.0284, "step": 107600 }, { "epoch": 0.8707015130674003, "grad_norm": 0.20191903412342072, "learning_rate": 6.948018980991129e-06, "loss": 0.0303, "step": 107610 }, { "epoch": 0.8707824257626021, "grad_norm": 0.4359758794307709, "learning_rate": 6.947368659032768e-06, "loss": 0.0244, "step": 107620 }, { "epoch": 0.8708633384578041, "grad_norm": 0.4553579092025757, "learning_rate": 6.946718298238248e-06, "loss": 0.0285, "step": 107630 }, { "epoch": 0.8709442511530059, "grad_norm": 0.523535430431366, "learning_rate": 6.9460678986205445e-06, "loss": 0.0228, "step": 107640 }, { "epoch": 0.8710251638482078, "grad_norm": 0.407543808221817, "learning_rate": 6.945417460192625e-06, "loss": 0.0312, "step": 107650 }, { "epoch": 0.8711060765434097, "grad_norm": 0.6825631856918335, "learning_rate": 6.944766982967459e-06, "loss": 0.0293, "step": 107660 }, { "epoch": 0.8711869892386115, "grad_norm": 0.4081723093986511, "learning_rate": 6.944116466958022e-06, "loss": 0.0461, "step": 107670 }, { "epoch": 0.8712679019338134, "grad_norm": 0.5159308910369873, "learning_rate": 6.943465912177287e-06, "loss": 0.0261, "step": 107680 }, { "epoch": 0.8713488146290153, "grad_norm": 0.7979797720909119, "learning_rate": 6.942815318638226e-06, "loss": 0.0654, "step": 107690 }, { "epoch": 0.8714297273242172, "grad_norm": 0.535019040107727, "learning_rate": 6.942164686353816e-06, "loss": 0.0355, "step": 107700 }, { "epoch": 0.871510640019419, "grad_norm": 0.7555779814720154, "learning_rate": 6.941514015337029e-06, "loss": 0.0345, "step": 107710 }, { "epoch": 0.871591552714621, "grad_norm": 0.35347387194633484, "learning_rate": 6.940863305600847e-06, "loss": 0.0217, "step": 107720 }, { "epoch": 0.8716724654098228, "grad_norm": 0.18487368524074554, "learning_rate": 6.940212557158243e-06, "loss": 0.0241, "step": 107730 }, { "epoch": 0.8717533781050246, "grad_norm": 0.3308773338794708, "learning_rate": 6.939561770022194e-06, "loss": 0.0201, "step": 107740 }, { "epoch": 0.8718342908002266, "grad_norm": 0.6051469445228577, "learning_rate": 6.938910944205681e-06, "loss": 0.0255, "step": 107750 }, { "epoch": 0.8719152034954284, "grad_norm": 0.21525321900844574, "learning_rate": 6.938260079721684e-06, "loss": 0.0338, "step": 107760 }, { "epoch": 0.8719961161906303, "grad_norm": 0.4818894565105438, "learning_rate": 6.9376091765831785e-06, "loss": 0.0345, "step": 107770 }, { "epoch": 0.8720770288858322, "grad_norm": 0.4357854127883911, "learning_rate": 6.9369582348031504e-06, "loss": 0.0259, "step": 107780 }, { "epoch": 0.8721579415810341, "grad_norm": 0.6973806619644165, "learning_rate": 6.936307254394579e-06, "loss": 0.0204, "step": 107790 }, { "epoch": 0.8722388542762359, "grad_norm": 0.47052067518234253, "learning_rate": 6.935656235370447e-06, "loss": 0.0356, "step": 107800 }, { "epoch": 0.8723197669714379, "grad_norm": 0.230106920003891, "learning_rate": 6.935005177743739e-06, "loss": 0.024, "step": 107810 }, { "epoch": 0.8724006796666397, "grad_norm": 0.4217115044593811, "learning_rate": 6.9343540815274345e-06, "loss": 0.0225, "step": 107820 }, { "epoch": 0.8724815923618415, "grad_norm": 0.38768136501312256, "learning_rate": 6.933702946734524e-06, "loss": 0.0432, "step": 107830 }, { "epoch": 0.8725625050570435, "grad_norm": 0.2847502529621124, "learning_rate": 6.933051773377991e-06, "loss": 0.0326, "step": 107840 }, { "epoch": 0.8726434177522453, "grad_norm": 0.3450745940208435, "learning_rate": 6.932400561470819e-06, "loss": 0.0256, "step": 107850 }, { "epoch": 0.8727243304474472, "grad_norm": 0.5123427510261536, "learning_rate": 6.9317493110259996e-06, "loss": 0.0367, "step": 107860 }, { "epoch": 0.8728052431426491, "grad_norm": 0.17767122387886047, "learning_rate": 6.931098022056517e-06, "loss": 0.0274, "step": 107870 }, { "epoch": 0.872886155837851, "grad_norm": 0.4345822036266327, "learning_rate": 6.9304466945753615e-06, "loss": 0.0306, "step": 107880 }, { "epoch": 0.8729670685330528, "grad_norm": 0.3882080912590027, "learning_rate": 6.929795328595521e-06, "loss": 0.0236, "step": 107890 }, { "epoch": 0.8730479812282547, "grad_norm": 0.6164035797119141, "learning_rate": 6.9291439241299865e-06, "loss": 0.0293, "step": 107900 }, { "epoch": 0.8731288939234566, "grad_norm": 0.873369038105011, "learning_rate": 6.928492481191751e-06, "loss": 0.0438, "step": 107910 }, { "epoch": 0.8732098066186584, "grad_norm": 0.3452698588371277, "learning_rate": 6.927840999793803e-06, "loss": 0.0271, "step": 107920 }, { "epoch": 0.8732907193138604, "grad_norm": 0.36629346013069153, "learning_rate": 6.927189479949136e-06, "loss": 0.0472, "step": 107930 }, { "epoch": 0.8733716320090622, "grad_norm": 0.5914292931556702, "learning_rate": 6.926537921670744e-06, "loss": 0.0283, "step": 107940 }, { "epoch": 0.8734525447042641, "grad_norm": 0.39599698781967163, "learning_rate": 6.925886324971619e-06, "loss": 0.0426, "step": 107950 }, { "epoch": 0.873533457399466, "grad_norm": 0.20727959275245667, "learning_rate": 6.925234689864758e-06, "loss": 0.0438, "step": 107960 }, { "epoch": 0.8736143700946678, "grad_norm": 0.3869984447956085, "learning_rate": 6.924583016363155e-06, "loss": 0.0253, "step": 107970 }, { "epoch": 0.8736952827898697, "grad_norm": 0.40005597472190857, "learning_rate": 6.923931304479808e-06, "loss": 0.0256, "step": 107980 }, { "epoch": 0.8737761954850716, "grad_norm": 0.2879880964756012, "learning_rate": 6.923279554227711e-06, "loss": 0.0259, "step": 107990 }, { "epoch": 0.8738571081802735, "grad_norm": 0.3151858150959015, "learning_rate": 6.922627765619865e-06, "loss": 0.023, "step": 108000 }, { "epoch": 0.8739380208754753, "grad_norm": 0.270730584859848, "learning_rate": 6.921975938669265e-06, "loss": 0.0277, "step": 108010 }, { "epoch": 0.8740189335706773, "grad_norm": 0.3412320613861084, "learning_rate": 6.921324073388914e-06, "loss": 0.0323, "step": 108020 }, { "epoch": 0.8740998462658791, "grad_norm": 0.40360164642333984, "learning_rate": 6.92067216979181e-06, "loss": 0.035, "step": 108030 }, { "epoch": 0.8741807589610809, "grad_norm": 0.46644285321235657, "learning_rate": 6.920020227890955e-06, "loss": 0.0337, "step": 108040 }, { "epoch": 0.8742616716562829, "grad_norm": 0.744538426399231, "learning_rate": 6.919368247699348e-06, "loss": 0.0289, "step": 108050 }, { "epoch": 0.8743425843514847, "grad_norm": 0.3835870623588562, "learning_rate": 6.918716229229995e-06, "loss": 0.024, "step": 108060 }, { "epoch": 0.8744234970466866, "grad_norm": 0.5065471529960632, "learning_rate": 6.918064172495897e-06, "loss": 0.0344, "step": 108070 }, { "epoch": 0.8745044097418885, "grad_norm": 0.38941067457199097, "learning_rate": 6.917412077510058e-06, "loss": 0.0377, "step": 108080 }, { "epoch": 0.8745853224370904, "grad_norm": 0.7274932861328125, "learning_rate": 6.916759944285484e-06, "loss": 0.0283, "step": 108090 }, { "epoch": 0.8746662351322922, "grad_norm": 0.4497902989387512, "learning_rate": 6.916107772835179e-06, "loss": 0.0312, "step": 108100 }, { "epoch": 0.8747471478274942, "grad_norm": 0.9568036198616028, "learning_rate": 6.915455563172148e-06, "loss": 0.0244, "step": 108110 }, { "epoch": 0.874828060522696, "grad_norm": 0.3662303388118744, "learning_rate": 6.9148033153094e-06, "loss": 0.036, "step": 108120 }, { "epoch": 0.8749089732178978, "grad_norm": 0.40815192461013794, "learning_rate": 6.914151029259943e-06, "loss": 0.0253, "step": 108130 }, { "epoch": 0.8749898859130998, "grad_norm": 0.40560251474380493, "learning_rate": 6.913498705036786e-06, "loss": 0.024, "step": 108140 }, { "epoch": 0.8750707986083016, "grad_norm": 0.5060930252075195, "learning_rate": 6.912846342652934e-06, "loss": 0.0317, "step": 108150 }, { "epoch": 0.8751517113035036, "grad_norm": 0.47688204050064087, "learning_rate": 6.912193942121401e-06, "loss": 0.033, "step": 108160 }, { "epoch": 0.8752326239987054, "grad_norm": 0.4268210530281067, "learning_rate": 6.911541503455197e-06, "loss": 0.0301, "step": 108170 }, { "epoch": 0.8753135366939073, "grad_norm": 0.5869781374931335, "learning_rate": 6.910889026667333e-06, "loss": 0.027, "step": 108180 }, { "epoch": 0.8753944493891092, "grad_norm": 0.42878416180610657, "learning_rate": 6.910236511770822e-06, "loss": 0.0369, "step": 108190 }, { "epoch": 0.875475362084311, "grad_norm": 0.5461891889572144, "learning_rate": 6.909583958778676e-06, "loss": 0.0241, "step": 108200 }, { "epoch": 0.8755562747795129, "grad_norm": 0.33846938610076904, "learning_rate": 6.90893136770391e-06, "loss": 0.0189, "step": 108210 }, { "epoch": 0.8756371874747148, "grad_norm": 0.11408092081546783, "learning_rate": 6.908278738559535e-06, "loss": 0.0229, "step": 108220 }, { "epoch": 0.8757181001699167, "grad_norm": 0.30127599835395813, "learning_rate": 6.907626071358572e-06, "loss": 0.0332, "step": 108230 }, { "epoch": 0.8757990128651185, "grad_norm": 0.4155624210834503, "learning_rate": 6.906973366114034e-06, "loss": 0.0287, "step": 108240 }, { "epoch": 0.8758799255603205, "grad_norm": 0.26548099517822266, "learning_rate": 6.906320622838937e-06, "loss": 0.0271, "step": 108250 }, { "epoch": 0.8759608382555223, "grad_norm": 0.40940287709236145, "learning_rate": 6.905667841546299e-06, "loss": 0.0425, "step": 108260 }, { "epoch": 0.8760417509507241, "grad_norm": 0.6132738590240479, "learning_rate": 6.905015022249141e-06, "loss": 0.0295, "step": 108270 }, { "epoch": 0.8761226636459261, "grad_norm": 0.30337274074554443, "learning_rate": 6.9043621649604785e-06, "loss": 0.0191, "step": 108280 }, { "epoch": 0.8762035763411279, "grad_norm": 0.3963151276111603, "learning_rate": 6.903709269693334e-06, "loss": 0.0254, "step": 108290 }, { "epoch": 0.8762844890363298, "grad_norm": 0.4914872646331787, "learning_rate": 6.903056336460725e-06, "loss": 0.0368, "step": 108300 }, { "epoch": 0.8763654017315317, "grad_norm": 0.4995487928390503, "learning_rate": 6.902403365275676e-06, "loss": 0.0337, "step": 108310 }, { "epoch": 0.8764463144267336, "grad_norm": 0.5851635932922363, "learning_rate": 6.90175035615121e-06, "loss": 0.0294, "step": 108320 }, { "epoch": 0.8765272271219354, "grad_norm": 0.37712034583091736, "learning_rate": 6.901097309100346e-06, "loss": 0.0288, "step": 108330 }, { "epoch": 0.8766081398171373, "grad_norm": 0.7483405470848083, "learning_rate": 6.900444224136109e-06, "loss": 0.0397, "step": 108340 }, { "epoch": 0.8766890525123392, "grad_norm": 0.4024650752544403, "learning_rate": 6.899791101271524e-06, "loss": 0.0281, "step": 108350 }, { "epoch": 0.876769965207541, "grad_norm": 0.33382776379585266, "learning_rate": 6.899137940519618e-06, "loss": 0.0138, "step": 108360 }, { "epoch": 0.876850877902743, "grad_norm": 0.44583839178085327, "learning_rate": 6.8984847418934145e-06, "loss": 0.0321, "step": 108370 }, { "epoch": 0.8769317905979448, "grad_norm": 0.1309622824192047, "learning_rate": 6.8978315054059385e-06, "loss": 0.0158, "step": 108380 }, { "epoch": 0.8770127032931467, "grad_norm": 0.13968227803707123, "learning_rate": 6.897178231070222e-06, "loss": 0.032, "step": 108390 }, { "epoch": 0.8770936159883486, "grad_norm": 0.8345388174057007, "learning_rate": 6.89652491889929e-06, "loss": 0.0308, "step": 108400 }, { "epoch": 0.8771745286835505, "grad_norm": 0.31845977902412415, "learning_rate": 6.895871568906172e-06, "loss": 0.0268, "step": 108410 }, { "epoch": 0.8772554413787523, "grad_norm": 0.527818500995636, "learning_rate": 6.895218181103897e-06, "loss": 0.0362, "step": 108420 }, { "epoch": 0.8773363540739542, "grad_norm": 1.2170826196670532, "learning_rate": 6.894564755505499e-06, "loss": 0.0368, "step": 108430 }, { "epoch": 0.8774172667691561, "grad_norm": 0.14763151109218597, "learning_rate": 6.893911292124005e-06, "loss": 0.0251, "step": 108440 }, { "epoch": 0.8774981794643579, "grad_norm": 0.3836027979850769, "learning_rate": 6.893257790972448e-06, "loss": 0.023, "step": 108450 }, { "epoch": 0.8775790921595599, "grad_norm": 0.2609007656574249, "learning_rate": 6.892604252063862e-06, "loss": 0.0225, "step": 108460 }, { "epoch": 0.8776600048547617, "grad_norm": 0.3958355784416199, "learning_rate": 6.8919506754112795e-06, "loss": 0.0334, "step": 108470 }, { "epoch": 0.8777409175499636, "grad_norm": 1.009084939956665, "learning_rate": 6.891297061027735e-06, "loss": 0.0325, "step": 108480 }, { "epoch": 0.8778218302451655, "grad_norm": 0.37556082010269165, "learning_rate": 6.890643408926262e-06, "loss": 0.0252, "step": 108490 }, { "epoch": 0.8779027429403673, "grad_norm": 0.5686347484588623, "learning_rate": 6.889989719119901e-06, "loss": 0.0304, "step": 108500 }, { "epoch": 0.8779836556355692, "grad_norm": 0.372006356716156, "learning_rate": 6.889335991621682e-06, "loss": 0.0328, "step": 108510 }, { "epoch": 0.8780645683307711, "grad_norm": 0.35801783204078674, "learning_rate": 6.888682226444645e-06, "loss": 0.0222, "step": 108520 }, { "epoch": 0.878145481025973, "grad_norm": 0.32023462653160095, "learning_rate": 6.888028423601829e-06, "loss": 0.0284, "step": 108530 }, { "epoch": 0.8782263937211748, "grad_norm": 0.4821911156177521, "learning_rate": 6.887374583106273e-06, "loss": 0.0172, "step": 108540 }, { "epoch": 0.8783073064163768, "grad_norm": 0.28590911626815796, "learning_rate": 6.8867207049710125e-06, "loss": 0.0292, "step": 108550 }, { "epoch": 0.8783882191115786, "grad_norm": 0.7068740129470825, "learning_rate": 6.886066789209094e-06, "loss": 0.0297, "step": 108560 }, { "epoch": 0.8784691318067804, "grad_norm": 0.4446936845779419, "learning_rate": 6.885412835833553e-06, "loss": 0.0385, "step": 108570 }, { "epoch": 0.8785500445019824, "grad_norm": 0.3531185984611511, "learning_rate": 6.884758844857434e-06, "loss": 0.0316, "step": 108580 }, { "epoch": 0.8786309571971842, "grad_norm": 0.5568084716796875, "learning_rate": 6.8841048162937785e-06, "loss": 0.0328, "step": 108590 }, { "epoch": 0.8787118698923861, "grad_norm": 0.3788543939590454, "learning_rate": 6.883450750155629e-06, "loss": 0.0233, "step": 108600 }, { "epoch": 0.878792782587588, "grad_norm": 0.26062139868736267, "learning_rate": 6.882796646456033e-06, "loss": 0.0282, "step": 108610 }, { "epoch": 0.8788736952827899, "grad_norm": 0.24887530505657196, "learning_rate": 6.8821425052080305e-06, "loss": 0.0197, "step": 108620 }, { "epoch": 0.8789546079779917, "grad_norm": 0.6745718121528625, "learning_rate": 6.881488326424668e-06, "loss": 0.025, "step": 108630 }, { "epoch": 0.8790355206731936, "grad_norm": 0.6431907415390015, "learning_rate": 6.880834110118994e-06, "loss": 0.0396, "step": 108640 }, { "epoch": 0.8791164333683955, "grad_norm": 0.3010806143283844, "learning_rate": 6.8801798563040564e-06, "loss": 0.0336, "step": 108650 }, { "epoch": 0.8791973460635973, "grad_norm": 0.09285004436969757, "learning_rate": 6.879525564992898e-06, "loss": 0.0207, "step": 108660 }, { "epoch": 0.8792782587587993, "grad_norm": 0.369475394487381, "learning_rate": 6.878871236198572e-06, "loss": 0.0307, "step": 108670 }, { "epoch": 0.8793591714540011, "grad_norm": 0.3953878581523895, "learning_rate": 6.878216869934125e-06, "loss": 0.0339, "step": 108680 }, { "epoch": 0.879440084149203, "grad_norm": 0.4235200583934784, "learning_rate": 6.877562466212607e-06, "loss": 0.0245, "step": 108690 }, { "epoch": 0.8795209968444049, "grad_norm": 0.422379732131958, "learning_rate": 6.876908025047071e-06, "loss": 0.0182, "step": 108700 }, { "epoch": 0.8796019095396068, "grad_norm": 0.20588159561157227, "learning_rate": 6.876253546450564e-06, "loss": 0.0263, "step": 108710 }, { "epoch": 0.8796828222348086, "grad_norm": 0.41198641061782837, "learning_rate": 6.875599030436144e-06, "loss": 0.023, "step": 108720 }, { "epoch": 0.8797637349300105, "grad_norm": 0.7294766902923584, "learning_rate": 6.87494447701686e-06, "loss": 0.0237, "step": 108730 }, { "epoch": 0.8798446476252124, "grad_norm": 0.2673121988773346, "learning_rate": 6.874289886205764e-06, "loss": 0.0266, "step": 108740 }, { "epoch": 0.8799255603204142, "grad_norm": 0.2929225265979767, "learning_rate": 6.873635258015916e-06, "loss": 0.0324, "step": 108750 }, { "epoch": 0.8800064730156162, "grad_norm": 0.5325430631637573, "learning_rate": 6.872980592460367e-06, "loss": 0.0339, "step": 108760 }, { "epoch": 0.880087385710818, "grad_norm": 0.8493109941482544, "learning_rate": 6.872325889552174e-06, "loss": 0.0383, "step": 108770 }, { "epoch": 0.88016829840602, "grad_norm": 0.47837433218955994, "learning_rate": 6.871671149304394e-06, "loss": 0.0309, "step": 108780 }, { "epoch": 0.8802492111012218, "grad_norm": 0.24281403422355652, "learning_rate": 6.871016371730084e-06, "loss": 0.0354, "step": 108790 }, { "epoch": 0.8803301237964236, "grad_norm": 0.6254341006278992, "learning_rate": 6.870361556842304e-06, "loss": 0.0246, "step": 108800 }, { "epoch": 0.8804110364916256, "grad_norm": 0.5231487154960632, "learning_rate": 6.8697067046541096e-06, "loss": 0.0262, "step": 108810 }, { "epoch": 0.8804919491868274, "grad_norm": 0.397786021232605, "learning_rate": 6.869051815178562e-06, "loss": 0.0249, "step": 108820 }, { "epoch": 0.8805728618820293, "grad_norm": 0.4550574719905853, "learning_rate": 6.868396888428722e-06, "loss": 0.0301, "step": 108830 }, { "epoch": 0.8806537745772312, "grad_norm": 0.4251735806465149, "learning_rate": 6.867741924417651e-06, "loss": 0.0206, "step": 108840 }, { "epoch": 0.8807346872724331, "grad_norm": 0.874992311000824, "learning_rate": 6.8670869231584095e-06, "loss": 0.0305, "step": 108850 }, { "epoch": 0.8808155999676349, "grad_norm": 0.5813215374946594, "learning_rate": 6.866431884664062e-06, "loss": 0.0272, "step": 108860 }, { "epoch": 0.8808965126628368, "grad_norm": 0.3625577390193939, "learning_rate": 6.86577680894767e-06, "loss": 0.0207, "step": 108870 }, { "epoch": 0.8809774253580387, "grad_norm": 0.14823617041110992, "learning_rate": 6.865121696022298e-06, "loss": 0.0252, "step": 108880 }, { "epoch": 0.8810583380532405, "grad_norm": 0.605435311794281, "learning_rate": 6.864466545901012e-06, "loss": 0.0236, "step": 108890 }, { "epoch": 0.8811392507484425, "grad_norm": 0.705984354019165, "learning_rate": 6.8638113585968755e-06, "loss": 0.0279, "step": 108900 }, { "epoch": 0.8812201634436443, "grad_norm": 0.024185795336961746, "learning_rate": 6.863156134122958e-06, "loss": 0.0258, "step": 108910 }, { "epoch": 0.8813010761388462, "grad_norm": 0.3928101360797882, "learning_rate": 6.862500872492324e-06, "loss": 0.0122, "step": 108920 }, { "epoch": 0.8813819888340481, "grad_norm": 0.21237005293369293, "learning_rate": 6.861845573718041e-06, "loss": 0.0243, "step": 108930 }, { "epoch": 0.8814629015292499, "grad_norm": 0.05196994170546532, "learning_rate": 6.861190237813179e-06, "loss": 0.0197, "step": 108940 }, { "epoch": 0.8815438142244518, "grad_norm": 0.5239133238792419, "learning_rate": 6.860534864790808e-06, "loss": 0.0228, "step": 108950 }, { "epoch": 0.8816247269196537, "grad_norm": 0.4698008894920349, "learning_rate": 6.8598794546639954e-06, "loss": 0.0204, "step": 108960 }, { "epoch": 0.8817056396148556, "grad_norm": 0.32168495655059814, "learning_rate": 6.859224007445815e-06, "loss": 0.0392, "step": 108970 }, { "epoch": 0.8817865523100574, "grad_norm": 0.2938535511493683, "learning_rate": 6.858568523149336e-06, "loss": 0.0188, "step": 108980 }, { "epoch": 0.8818674650052594, "grad_norm": 0.39845260977745056, "learning_rate": 6.857913001787631e-06, "loss": 0.0241, "step": 108990 }, { "epoch": 0.8819483777004612, "grad_norm": 0.731486976146698, "learning_rate": 6.857257443373774e-06, "loss": 0.0393, "step": 109000 }, { "epoch": 0.8820292903956631, "grad_norm": 0.4871065318584442, "learning_rate": 6.856601847920838e-06, "loss": 0.0196, "step": 109010 }, { "epoch": 0.882110203090865, "grad_norm": 0.5155003070831299, "learning_rate": 6.855946215441897e-06, "loss": 0.017, "step": 109020 }, { "epoch": 0.8821911157860668, "grad_norm": 0.29496580362319946, "learning_rate": 6.855290545950026e-06, "loss": 0.0262, "step": 109030 }, { "epoch": 0.8822720284812687, "grad_norm": 1.089357852935791, "learning_rate": 6.854634839458304e-06, "loss": 0.0326, "step": 109040 }, { "epoch": 0.8823529411764706, "grad_norm": 0.23334504663944244, "learning_rate": 6.8539790959798045e-06, "loss": 0.0244, "step": 109050 }, { "epoch": 0.8824338538716725, "grad_norm": 0.5758644938468933, "learning_rate": 6.853323315527606e-06, "loss": 0.0315, "step": 109060 }, { "epoch": 0.8825147665668743, "grad_norm": 0.384792298078537, "learning_rate": 6.852667498114787e-06, "loss": 0.0301, "step": 109070 }, { "epoch": 0.8825956792620763, "grad_norm": 0.6400187015533447, "learning_rate": 6.852011643754425e-06, "loss": 0.0375, "step": 109080 }, { "epoch": 0.8826765919572781, "grad_norm": 0.2957768738269806, "learning_rate": 6.851355752459601e-06, "loss": 0.0199, "step": 109090 }, { "epoch": 0.8827575046524799, "grad_norm": 0.8460476398468018, "learning_rate": 6.850699824243395e-06, "loss": 0.0274, "step": 109100 }, { "epoch": 0.8828384173476819, "grad_norm": 1.0103875398635864, "learning_rate": 6.8500438591188874e-06, "loss": 0.0359, "step": 109110 }, { "epoch": 0.8829193300428837, "grad_norm": 0.522125780582428, "learning_rate": 6.84938785709916e-06, "loss": 0.0203, "step": 109120 }, { "epoch": 0.8830002427380856, "grad_norm": 0.06644323468208313, "learning_rate": 6.848731818197299e-06, "loss": 0.0263, "step": 109130 }, { "epoch": 0.8830811554332875, "grad_norm": 0.6142889261245728, "learning_rate": 6.848075742426381e-06, "loss": 0.0298, "step": 109140 }, { "epoch": 0.8831620681284894, "grad_norm": 0.5608254671096802, "learning_rate": 6.847419629799496e-06, "loss": 0.0235, "step": 109150 }, { "epoch": 0.8832429808236912, "grad_norm": 0.26554587483406067, "learning_rate": 6.846763480329725e-06, "loss": 0.0325, "step": 109160 }, { "epoch": 0.8833238935188931, "grad_norm": 0.16796742379665375, "learning_rate": 6.846107294030157e-06, "loss": 0.0455, "step": 109170 }, { "epoch": 0.883404806214095, "grad_norm": 0.3796449601650238, "learning_rate": 6.845451070913875e-06, "loss": 0.0264, "step": 109180 }, { "epoch": 0.8834857189092968, "grad_norm": 0.3332307040691376, "learning_rate": 6.844794810993969e-06, "loss": 0.0195, "step": 109190 }, { "epoch": 0.8835666316044988, "grad_norm": 0.5715904235839844, "learning_rate": 6.844138514283522e-06, "loss": 0.0381, "step": 109200 }, { "epoch": 0.8836475442997006, "grad_norm": 0.9640285968780518, "learning_rate": 6.843482180795629e-06, "loss": 0.0329, "step": 109210 }, { "epoch": 0.8837284569949025, "grad_norm": 0.07860874384641647, "learning_rate": 6.842825810543372e-06, "loss": 0.0243, "step": 109220 }, { "epoch": 0.8838093696901044, "grad_norm": 0.3843705952167511, "learning_rate": 6.842169403539847e-06, "loss": 0.046, "step": 109230 }, { "epoch": 0.8838902823853062, "grad_norm": 0.5468346476554871, "learning_rate": 6.841512959798144e-06, "loss": 0.0462, "step": 109240 }, { "epoch": 0.8839711950805081, "grad_norm": 0.39502501487731934, "learning_rate": 6.8408564793313485e-06, "loss": 0.0311, "step": 109250 }, { "epoch": 0.88405210777571, "grad_norm": 0.4396345913410187, "learning_rate": 6.840199962152558e-06, "loss": 0.0312, "step": 109260 }, { "epoch": 0.8841330204709119, "grad_norm": 0.36223843693733215, "learning_rate": 6.839543408274866e-06, "loss": 0.0287, "step": 109270 }, { "epoch": 0.8842139331661137, "grad_norm": 0.24105769395828247, "learning_rate": 6.838886817711362e-06, "loss": 0.0299, "step": 109280 }, { "epoch": 0.8842948458613157, "grad_norm": 0.3834106922149658, "learning_rate": 6.838230190475144e-06, "loss": 0.0185, "step": 109290 }, { "epoch": 0.8843757585565175, "grad_norm": 0.43379244208335876, "learning_rate": 6.837573526579304e-06, "loss": 0.0202, "step": 109300 }, { "epoch": 0.8844566712517193, "grad_norm": 0.42572665214538574, "learning_rate": 6.83691682603694e-06, "loss": 0.0286, "step": 109310 }, { "epoch": 0.8845375839469213, "grad_norm": 0.42331570386886597, "learning_rate": 6.836260088861149e-06, "loss": 0.0265, "step": 109320 }, { "epoch": 0.8846184966421231, "grad_norm": 0.7967879772186279, "learning_rate": 6.835603315065025e-06, "loss": 0.0188, "step": 109330 }, { "epoch": 0.884699409337325, "grad_norm": 0.43818849325180054, "learning_rate": 6.834946504661669e-06, "loss": 0.0373, "step": 109340 }, { "epoch": 0.8847803220325269, "grad_norm": 0.8892890214920044, "learning_rate": 6.834289657664179e-06, "loss": 0.0276, "step": 109350 }, { "epoch": 0.8848612347277288, "grad_norm": 0.21130113303661346, "learning_rate": 6.833632774085654e-06, "loss": 0.0226, "step": 109360 }, { "epoch": 0.8849421474229306, "grad_norm": 0.1945694535970688, "learning_rate": 6.832975853939194e-06, "loss": 0.0326, "step": 109370 }, { "epoch": 0.8850230601181326, "grad_norm": 0.41764092445373535, "learning_rate": 6.832318897237902e-06, "loss": 0.0325, "step": 109380 }, { "epoch": 0.8851039728133344, "grad_norm": 0.22586573660373688, "learning_rate": 6.831661903994875e-06, "loss": 0.0227, "step": 109390 }, { "epoch": 0.8851848855085362, "grad_norm": 0.36145734786987305, "learning_rate": 6.8310048742232194e-06, "loss": 0.0202, "step": 109400 }, { "epoch": 0.8852657982037382, "grad_norm": 0.3673683702945709, "learning_rate": 6.8303478079360366e-06, "loss": 0.0214, "step": 109410 }, { "epoch": 0.88534671089894, "grad_norm": 0.4298092722892761, "learning_rate": 6.829690705146431e-06, "loss": 0.0439, "step": 109420 }, { "epoch": 0.885427623594142, "grad_norm": 0.4541352093219757, "learning_rate": 6.829033565867508e-06, "loss": 0.0415, "step": 109430 }, { "epoch": 0.8855085362893438, "grad_norm": 0.4491717517375946, "learning_rate": 6.82837639011237e-06, "loss": 0.0283, "step": 109440 }, { "epoch": 0.8855894489845457, "grad_norm": 0.5445480346679688, "learning_rate": 6.827719177894126e-06, "loss": 0.0307, "step": 109450 }, { "epoch": 0.8856703616797476, "grad_norm": 0.26724734902381897, "learning_rate": 6.827061929225881e-06, "loss": 0.0288, "step": 109460 }, { "epoch": 0.8857512743749494, "grad_norm": 0.26831725239753723, "learning_rate": 6.826404644120744e-06, "loss": 0.0245, "step": 109470 }, { "epoch": 0.8858321870701513, "grad_norm": 0.6099120378494263, "learning_rate": 6.82574732259182e-06, "loss": 0.0358, "step": 109480 }, { "epoch": 0.8859130997653532, "grad_norm": 0.36607542634010315, "learning_rate": 6.825089964652221e-06, "loss": 0.0345, "step": 109490 }, { "epoch": 0.8859940124605551, "grad_norm": 0.7659576535224915, "learning_rate": 6.824432570315056e-06, "loss": 0.0335, "step": 109500 }, { "epoch": 0.8860749251557569, "grad_norm": 0.6832265853881836, "learning_rate": 6.823775139593435e-06, "loss": 0.0281, "step": 109510 }, { "epoch": 0.8861558378509589, "grad_norm": 0.5159032940864563, "learning_rate": 6.823117672500467e-06, "loss": 0.0242, "step": 109520 }, { "epoch": 0.8862367505461607, "grad_norm": 0.269695520401001, "learning_rate": 6.822460169049268e-06, "loss": 0.0305, "step": 109530 }, { "epoch": 0.8863176632413625, "grad_norm": 0.1752975583076477, "learning_rate": 6.821802629252949e-06, "loss": 0.0198, "step": 109540 }, { "epoch": 0.8863985759365645, "grad_norm": 0.5893359780311584, "learning_rate": 6.82114505312462e-06, "loss": 0.0292, "step": 109550 }, { "epoch": 0.8864794886317663, "grad_norm": 0.630506157875061, "learning_rate": 6.820487440677399e-06, "loss": 0.0356, "step": 109560 }, { "epoch": 0.8865604013269682, "grad_norm": 0.2868235111236572, "learning_rate": 6.8198297919244e-06, "loss": 0.0224, "step": 109570 }, { "epoch": 0.8866413140221701, "grad_norm": 0.3099327087402344, "learning_rate": 6.819172106878735e-06, "loss": 0.0262, "step": 109580 }, { "epoch": 0.886722226717372, "grad_norm": 0.466600239276886, "learning_rate": 6.818514385553525e-06, "loss": 0.0543, "step": 109590 }, { "epoch": 0.8868031394125738, "grad_norm": 0.5665205717086792, "learning_rate": 6.817856627961883e-06, "loss": 0.0237, "step": 109600 }, { "epoch": 0.8868840521077757, "grad_norm": 0.26337483525276184, "learning_rate": 6.817198834116931e-06, "loss": 0.0204, "step": 109610 }, { "epoch": 0.8869649648029776, "grad_norm": 0.5040386915206909, "learning_rate": 6.816541004031782e-06, "loss": 0.0461, "step": 109620 }, { "epoch": 0.8870458774981794, "grad_norm": 0.8164501190185547, "learning_rate": 6.815883137719557e-06, "loss": 0.0472, "step": 109630 }, { "epoch": 0.8871267901933814, "grad_norm": 0.6218354105949402, "learning_rate": 6.815225235193379e-06, "loss": 0.0326, "step": 109640 }, { "epoch": 0.8872077028885832, "grad_norm": 0.267098993062973, "learning_rate": 6.814567296466365e-06, "loss": 0.0367, "step": 109650 }, { "epoch": 0.8872886155837851, "grad_norm": 0.608502984046936, "learning_rate": 6.8139093215516365e-06, "loss": 0.0338, "step": 109660 }, { "epoch": 0.887369528278987, "grad_norm": 0.6117669939994812, "learning_rate": 6.813251310462315e-06, "loss": 0.0315, "step": 109670 }, { "epoch": 0.8874504409741889, "grad_norm": 0.5896560549736023, "learning_rate": 6.812593263211526e-06, "loss": 0.0181, "step": 109680 }, { "epoch": 0.8875313536693907, "grad_norm": 0.22324810922145844, "learning_rate": 6.811935179812389e-06, "loss": 0.0172, "step": 109690 }, { "epoch": 0.8876122663645926, "grad_norm": 0.47974705696105957, "learning_rate": 6.811277060278031e-06, "loss": 0.0307, "step": 109700 }, { "epoch": 0.8876931790597945, "grad_norm": 0.47980374097824097, "learning_rate": 6.810618904621575e-06, "loss": 0.029, "step": 109710 }, { "epoch": 0.8877740917549963, "grad_norm": 1.4745030403137207, "learning_rate": 6.8099607128561485e-06, "loss": 0.0372, "step": 109720 }, { "epoch": 0.8878550044501983, "grad_norm": 0.5256962180137634, "learning_rate": 6.809302484994876e-06, "loss": 0.0334, "step": 109730 }, { "epoch": 0.8879359171454001, "grad_norm": 0.2677527666091919, "learning_rate": 6.808644221050884e-06, "loss": 0.0277, "step": 109740 }, { "epoch": 0.888016829840602, "grad_norm": 0.4544890522956848, "learning_rate": 6.807985921037303e-06, "loss": 0.0264, "step": 109750 }, { "epoch": 0.8880977425358039, "grad_norm": 0.47828635573387146, "learning_rate": 6.807327584967259e-06, "loss": 0.0358, "step": 109760 }, { "epoch": 0.8881786552310057, "grad_norm": 0.369201123714447, "learning_rate": 6.806669212853882e-06, "loss": 0.0274, "step": 109770 }, { "epoch": 0.8882595679262076, "grad_norm": 0.7977684140205383, "learning_rate": 6.806010804710301e-06, "loss": 0.023, "step": 109780 }, { "epoch": 0.8883404806214095, "grad_norm": 0.8594682216644287, "learning_rate": 6.805352360549648e-06, "loss": 0.0454, "step": 109790 }, { "epoch": 0.8884213933166114, "grad_norm": 0.3808635175228119, "learning_rate": 6.804693880385053e-06, "loss": 0.0272, "step": 109800 }, { "epoch": 0.8885023060118132, "grad_norm": 0.5529844164848328, "learning_rate": 6.804035364229649e-06, "loss": 0.0264, "step": 109810 }, { "epoch": 0.8885832187070152, "grad_norm": 0.51358962059021, "learning_rate": 6.803376812096566e-06, "loss": 0.0206, "step": 109820 }, { "epoch": 0.888664131402217, "grad_norm": 0.44742390513420105, "learning_rate": 6.8027182239989405e-06, "loss": 0.021, "step": 109830 }, { "epoch": 0.8887450440974188, "grad_norm": 0.337482213973999, "learning_rate": 6.802059599949907e-06, "loss": 0.0197, "step": 109840 }, { "epoch": 0.8888259567926208, "grad_norm": 0.3611651062965393, "learning_rate": 6.8014009399625995e-06, "loss": 0.0344, "step": 109850 }, { "epoch": 0.8889068694878226, "grad_norm": 0.19960756599903107, "learning_rate": 6.8007422440501514e-06, "loss": 0.0179, "step": 109860 }, { "epoch": 0.8889877821830245, "grad_norm": 0.8195915222167969, "learning_rate": 6.800083512225701e-06, "loss": 0.0416, "step": 109870 }, { "epoch": 0.8890686948782264, "grad_norm": 0.4290008544921875, "learning_rate": 6.799424744502385e-06, "loss": 0.0274, "step": 109880 }, { "epoch": 0.8891496075734283, "grad_norm": 0.6170951128005981, "learning_rate": 6.7987659408933425e-06, "loss": 0.0252, "step": 109890 }, { "epoch": 0.8892305202686301, "grad_norm": 0.35536396503448486, "learning_rate": 6.798107101411709e-06, "loss": 0.0226, "step": 109900 }, { "epoch": 0.889311432963832, "grad_norm": 0.38226640224456787, "learning_rate": 6.797448226070628e-06, "loss": 0.027, "step": 109910 }, { "epoch": 0.8893923456590339, "grad_norm": 0.3588625192642212, "learning_rate": 6.796789314883234e-06, "loss": 0.0375, "step": 109920 }, { "epoch": 0.8894732583542357, "grad_norm": 0.6400310397148132, "learning_rate": 6.7961303678626715e-06, "loss": 0.0241, "step": 109930 }, { "epoch": 0.8895541710494377, "grad_norm": 0.4188773036003113, "learning_rate": 6.79547138502208e-06, "loss": 0.0197, "step": 109940 }, { "epoch": 0.8896350837446395, "grad_norm": 0.6628064513206482, "learning_rate": 6.794812366374603e-06, "loss": 0.0521, "step": 109950 }, { "epoch": 0.8897159964398415, "grad_norm": 0.576845109462738, "learning_rate": 6.794153311933381e-06, "loss": 0.0253, "step": 109960 }, { "epoch": 0.8897969091350433, "grad_norm": 0.3677317202091217, "learning_rate": 6.7934942217115606e-06, "loss": 0.0331, "step": 109970 }, { "epoch": 0.8898778218302452, "grad_norm": 0.5830894708633423, "learning_rate": 6.792835095722283e-06, "loss": 0.0296, "step": 109980 }, { "epoch": 0.889958734525447, "grad_norm": 0.7340328693389893, "learning_rate": 6.7921759339786955e-06, "loss": 0.0346, "step": 109990 }, { "epoch": 0.8900396472206489, "grad_norm": 0.5207382440567017, "learning_rate": 6.791516736493942e-06, "loss": 0.0297, "step": 110000 }, { "epoch": 0.8901205599158508, "grad_norm": 0.2663717567920685, "learning_rate": 6.790857503281168e-06, "loss": 0.0269, "step": 110010 }, { "epoch": 0.8902014726110526, "grad_norm": 0.2395128458738327, "learning_rate": 6.790198234353522e-06, "loss": 0.0363, "step": 110020 }, { "epoch": 0.8902823853062546, "grad_norm": 0.6955199241638184, "learning_rate": 6.789538929724152e-06, "loss": 0.0322, "step": 110030 }, { "epoch": 0.8903632980014564, "grad_norm": 0.5845304727554321, "learning_rate": 6.7888795894062055e-06, "loss": 0.0175, "step": 110040 }, { "epoch": 0.8904442106966584, "grad_norm": 0.34876689314842224, "learning_rate": 6.788220213412833e-06, "loss": 0.023, "step": 110050 }, { "epoch": 0.8905251233918602, "grad_norm": 0.48356521129608154, "learning_rate": 6.787560801757183e-06, "loss": 0.0331, "step": 110060 }, { "epoch": 0.890606036087062, "grad_norm": 0.2661408483982086, "learning_rate": 6.786901354452407e-06, "loss": 0.0222, "step": 110070 }, { "epoch": 0.890686948782264, "grad_norm": 1.2678931951522827, "learning_rate": 6.786241871511655e-06, "loss": 0.0381, "step": 110080 }, { "epoch": 0.8907678614774658, "grad_norm": 0.9957953691482544, "learning_rate": 6.78558235294808e-06, "loss": 0.0247, "step": 110090 }, { "epoch": 0.8908487741726677, "grad_norm": 0.37142446637153625, "learning_rate": 6.784922798774834e-06, "loss": 0.0216, "step": 110100 }, { "epoch": 0.8909296868678696, "grad_norm": 0.25577327609062195, "learning_rate": 6.784263209005073e-06, "loss": 0.028, "step": 110110 }, { "epoch": 0.8910105995630715, "grad_norm": 0.7353119850158691, "learning_rate": 6.7836035836519466e-06, "loss": 0.0326, "step": 110120 }, { "epoch": 0.8910915122582733, "grad_norm": 0.23273806273937225, "learning_rate": 6.782943922728613e-06, "loss": 0.0317, "step": 110130 }, { "epoch": 0.8911724249534752, "grad_norm": 0.6990270018577576, "learning_rate": 6.782284226248227e-06, "loss": 0.0284, "step": 110140 }, { "epoch": 0.8912533376486771, "grad_norm": 0.5795183181762695, "learning_rate": 6.781624494223944e-06, "loss": 0.0356, "step": 110150 }, { "epoch": 0.8913342503438789, "grad_norm": 0.352154016494751, "learning_rate": 6.780964726668922e-06, "loss": 0.0211, "step": 110160 }, { "epoch": 0.8914151630390809, "grad_norm": 0.3855026364326477, "learning_rate": 6.780304923596319e-06, "loss": 0.0208, "step": 110170 }, { "epoch": 0.8914960757342827, "grad_norm": 0.6339566707611084, "learning_rate": 6.779645085019293e-06, "loss": 0.0375, "step": 110180 }, { "epoch": 0.8915769884294846, "grad_norm": 0.36046263575553894, "learning_rate": 6.778985210951001e-06, "loss": 0.0268, "step": 110190 }, { "epoch": 0.8916579011246865, "grad_norm": 0.5145716071128845, "learning_rate": 6.778325301404606e-06, "loss": 0.026, "step": 110200 }, { "epoch": 0.8917388138198883, "grad_norm": 0.28004878759384155, "learning_rate": 6.777665356393267e-06, "loss": 0.0174, "step": 110210 }, { "epoch": 0.8918197265150902, "grad_norm": 0.264247864484787, "learning_rate": 6.777005375930143e-06, "loss": 0.0174, "step": 110220 }, { "epoch": 0.8919006392102921, "grad_norm": 0.48938828706741333, "learning_rate": 6.7763453600284e-06, "loss": 0.0333, "step": 110230 }, { "epoch": 0.891981551905494, "grad_norm": 0.41243430972099304, "learning_rate": 6.775685308701201e-06, "loss": 0.0232, "step": 110240 }, { "epoch": 0.8920624646006958, "grad_norm": 0.8878365159034729, "learning_rate": 6.775025221961704e-06, "loss": 0.0245, "step": 110250 }, { "epoch": 0.8921433772958978, "grad_norm": 0.5873983502388, "learning_rate": 6.774365099823077e-06, "loss": 0.0284, "step": 110260 }, { "epoch": 0.8922242899910996, "grad_norm": 0.2527182698249817, "learning_rate": 6.773704942298485e-06, "loss": 0.0478, "step": 110270 }, { "epoch": 0.8923052026863015, "grad_norm": 0.3044189214706421, "learning_rate": 6.773044749401091e-06, "loss": 0.0237, "step": 110280 }, { "epoch": 0.8923861153815034, "grad_norm": 0.4357004761695862, "learning_rate": 6.772384521144063e-06, "loss": 0.0254, "step": 110290 }, { "epoch": 0.8924670280767052, "grad_norm": 0.20253829658031464, "learning_rate": 6.771724257540567e-06, "loss": 0.019, "step": 110300 }, { "epoch": 0.8925479407719071, "grad_norm": 0.5767363905906677, "learning_rate": 6.771063958603773e-06, "loss": 0.0283, "step": 110310 }, { "epoch": 0.892628853467109, "grad_norm": 0.6044718623161316, "learning_rate": 6.770403624346845e-06, "loss": 0.0243, "step": 110320 }, { "epoch": 0.8927097661623109, "grad_norm": 0.32210755348205566, "learning_rate": 6.769743254782954e-06, "loss": 0.0291, "step": 110330 }, { "epoch": 0.8927906788575127, "grad_norm": 0.3877619504928589, "learning_rate": 6.769082849925272e-06, "loss": 0.0309, "step": 110340 }, { "epoch": 0.8928715915527147, "grad_norm": 0.27182692289352417, "learning_rate": 6.768422409786965e-06, "loss": 0.025, "step": 110350 }, { "epoch": 0.8929525042479165, "grad_norm": 0.15277335047721863, "learning_rate": 6.767761934381207e-06, "loss": 0.0166, "step": 110360 }, { "epoch": 0.8930334169431183, "grad_norm": 0.16697148978710175, "learning_rate": 6.76710142372117e-06, "loss": 0.03, "step": 110370 }, { "epoch": 0.8931143296383203, "grad_norm": 0.2950744330883026, "learning_rate": 6.766440877820025e-06, "loss": 0.0422, "step": 110380 }, { "epoch": 0.8931952423335221, "grad_norm": 0.42525285482406616, "learning_rate": 6.765780296690947e-06, "loss": 0.0269, "step": 110390 }, { "epoch": 0.893276155028724, "grad_norm": 0.48572999238967896, "learning_rate": 6.765119680347107e-06, "loss": 0.0239, "step": 110400 }, { "epoch": 0.8933570677239259, "grad_norm": 0.22885146737098694, "learning_rate": 6.764459028801682e-06, "loss": 0.0263, "step": 110410 }, { "epoch": 0.8934379804191278, "grad_norm": 0.2915177345275879, "learning_rate": 6.763798342067847e-06, "loss": 0.0274, "step": 110420 }, { "epoch": 0.8935188931143296, "grad_norm": 0.5554261207580566, "learning_rate": 6.763137620158777e-06, "loss": 0.0348, "step": 110430 }, { "epoch": 0.8935998058095315, "grad_norm": 0.7099806070327759, "learning_rate": 6.762476863087649e-06, "loss": 0.0367, "step": 110440 }, { "epoch": 0.8936807185047334, "grad_norm": 0.16127943992614746, "learning_rate": 6.7618160708676415e-06, "loss": 0.0246, "step": 110450 }, { "epoch": 0.8937616311999352, "grad_norm": 0.12220175564289093, "learning_rate": 6.761155243511931e-06, "loss": 0.0224, "step": 110460 }, { "epoch": 0.8938425438951372, "grad_norm": 0.33541613817214966, "learning_rate": 6.760494381033697e-06, "loss": 0.0228, "step": 110470 }, { "epoch": 0.893923456590339, "grad_norm": 0.30687302350997925, "learning_rate": 6.759833483446121e-06, "loss": 0.0236, "step": 110480 }, { "epoch": 0.894004369285541, "grad_norm": 0.42026156187057495, "learning_rate": 6.75917255076238e-06, "loss": 0.0272, "step": 110490 }, { "epoch": 0.8940852819807428, "grad_norm": 0.35677000880241394, "learning_rate": 6.758511582995657e-06, "loss": 0.0167, "step": 110500 }, { "epoch": 0.8941661946759446, "grad_norm": 0.44081220030784607, "learning_rate": 6.757850580159132e-06, "loss": 0.0304, "step": 110510 }, { "epoch": 0.8942471073711465, "grad_norm": 0.6465012431144714, "learning_rate": 6.757189542265987e-06, "loss": 0.0337, "step": 110520 }, { "epoch": 0.8943280200663484, "grad_norm": 0.1605795919895172, "learning_rate": 6.756528469329409e-06, "loss": 0.027, "step": 110530 }, { "epoch": 0.8944089327615503, "grad_norm": 0.2457607239484787, "learning_rate": 6.755867361362578e-06, "loss": 0.0439, "step": 110540 }, { "epoch": 0.8944898454567521, "grad_norm": 0.5593549609184265, "learning_rate": 6.755206218378678e-06, "loss": 0.0202, "step": 110550 }, { "epoch": 0.8945707581519541, "grad_norm": 0.2591516077518463, "learning_rate": 6.754545040390897e-06, "loss": 0.0305, "step": 110560 }, { "epoch": 0.8946516708471559, "grad_norm": 0.18946924805641174, "learning_rate": 6.7538838274124175e-06, "loss": 0.0255, "step": 110570 }, { "epoch": 0.8947325835423579, "grad_norm": 0.5875333547592163, "learning_rate": 6.753222579456429e-06, "loss": 0.0231, "step": 110580 }, { "epoch": 0.8948134962375597, "grad_norm": 0.4229816198348999, "learning_rate": 6.7525612965361165e-06, "loss": 0.0319, "step": 110590 }, { "epoch": 0.8948944089327615, "grad_norm": 0.2794201374053955, "learning_rate": 6.751899978664668e-06, "loss": 0.0354, "step": 110600 }, { "epoch": 0.8949753216279634, "grad_norm": 0.5651185512542725, "learning_rate": 6.751238625855275e-06, "loss": 0.0208, "step": 110610 }, { "epoch": 0.8950562343231653, "grad_norm": 0.5190436840057373, "learning_rate": 6.750577238121123e-06, "loss": 0.0352, "step": 110620 }, { "epoch": 0.8951371470183672, "grad_norm": 0.35787588357925415, "learning_rate": 6.749915815475404e-06, "loss": 0.0348, "step": 110630 }, { "epoch": 0.895218059713569, "grad_norm": 0.5495992302894592, "learning_rate": 6.749254357931309e-06, "loss": 0.0249, "step": 110640 }, { "epoch": 0.895298972408771, "grad_norm": 0.495612233877182, "learning_rate": 6.748592865502029e-06, "loss": 0.0342, "step": 110650 }, { "epoch": 0.8953798851039728, "grad_norm": 0.2851148843765259, "learning_rate": 6.747931338200754e-06, "loss": 0.0202, "step": 110660 }, { "epoch": 0.8954607977991746, "grad_norm": 0.37923017144203186, "learning_rate": 6.747269776040679e-06, "loss": 0.0284, "step": 110670 }, { "epoch": 0.8955417104943766, "grad_norm": 0.402288556098938, "learning_rate": 6.746608179034998e-06, "loss": 0.0269, "step": 110680 }, { "epoch": 0.8956226231895784, "grad_norm": 0.5788600444793701, "learning_rate": 6.7459465471969034e-06, "loss": 0.0331, "step": 110690 }, { "epoch": 0.8957035358847804, "grad_norm": 0.4425297677516937, "learning_rate": 6.745284880539591e-06, "loss": 0.0233, "step": 110700 }, { "epoch": 0.8957844485799822, "grad_norm": 2.0314879417419434, "learning_rate": 6.744623179076255e-06, "loss": 0.0375, "step": 110710 }, { "epoch": 0.8958653612751841, "grad_norm": 0.26618722081184387, "learning_rate": 6.7439614428200954e-06, "loss": 0.0251, "step": 110720 }, { "epoch": 0.895946273970386, "grad_norm": 0.3449837267398834, "learning_rate": 6.743299671784305e-06, "loss": 0.0335, "step": 110730 }, { "epoch": 0.8960271866655878, "grad_norm": 0.21067073941230774, "learning_rate": 6.742637865982081e-06, "loss": 0.015, "step": 110740 }, { "epoch": 0.8961080993607897, "grad_norm": 0.28784283995628357, "learning_rate": 6.7419760254266265e-06, "loss": 0.0355, "step": 110750 }, { "epoch": 0.8961890120559916, "grad_norm": 0.3779001832008362, "learning_rate": 6.741314150131136e-06, "loss": 0.0281, "step": 110760 }, { "epoch": 0.8962699247511935, "grad_norm": 0.7440901398658752, "learning_rate": 6.740652240108812e-06, "loss": 0.0244, "step": 110770 }, { "epoch": 0.8963508374463953, "grad_norm": 0.31527459621429443, "learning_rate": 6.739990295372854e-06, "loss": 0.0214, "step": 110780 }, { "epoch": 0.8964317501415973, "grad_norm": 0.21073350310325623, "learning_rate": 6.7393283159364605e-06, "loss": 0.0193, "step": 110790 }, { "epoch": 0.8965126628367991, "grad_norm": 0.6976863145828247, "learning_rate": 6.738666301812837e-06, "loss": 0.0265, "step": 110800 }, { "epoch": 0.8965935755320009, "grad_norm": 0.22158409655094147, "learning_rate": 6.738004253015185e-06, "loss": 0.0401, "step": 110810 }, { "epoch": 0.8966744882272029, "grad_norm": 0.48828428983688354, "learning_rate": 6.737342169556706e-06, "loss": 0.0295, "step": 110820 }, { "epoch": 0.8967554009224047, "grad_norm": 0.44197481870651245, "learning_rate": 6.736680051450607e-06, "loss": 0.021, "step": 110830 }, { "epoch": 0.8968363136176066, "grad_norm": 0.5122292637825012, "learning_rate": 6.73601789871009e-06, "loss": 0.0245, "step": 110840 }, { "epoch": 0.8969172263128085, "grad_norm": 0.5951984524726868, "learning_rate": 6.73535571134836e-06, "loss": 0.0277, "step": 110850 }, { "epoch": 0.8969981390080104, "grad_norm": 0.2618372142314911, "learning_rate": 6.734693489378625e-06, "loss": 0.0234, "step": 110860 }, { "epoch": 0.8970790517032122, "grad_norm": 0.22079694271087646, "learning_rate": 6.73403123281409e-06, "loss": 0.0241, "step": 110870 }, { "epoch": 0.8971599643984141, "grad_norm": 0.8621811270713806, "learning_rate": 6.733368941667962e-06, "loss": 0.0409, "step": 110880 }, { "epoch": 0.897240877093616, "grad_norm": 0.45588624477386475, "learning_rate": 6.7327066159534515e-06, "loss": 0.0194, "step": 110890 }, { "epoch": 0.8973217897888178, "grad_norm": 0.41832029819488525, "learning_rate": 6.7320442556837625e-06, "loss": 0.0289, "step": 110900 }, { "epoch": 0.8974027024840198, "grad_norm": 0.21770308911800385, "learning_rate": 6.731381860872111e-06, "loss": 0.0267, "step": 110910 }, { "epoch": 0.8974836151792216, "grad_norm": 0.3809710741043091, "learning_rate": 6.7307194315317026e-06, "loss": 0.0204, "step": 110920 }, { "epoch": 0.8975645278744235, "grad_norm": 0.5261046886444092, "learning_rate": 6.730056967675746e-06, "loss": 0.0376, "step": 110930 }, { "epoch": 0.8976454405696254, "grad_norm": 0.371573269367218, "learning_rate": 6.729394469317458e-06, "loss": 0.0371, "step": 110940 }, { "epoch": 0.8977263532648273, "grad_norm": 0.1515306979417801, "learning_rate": 6.728731936470048e-06, "loss": 0.0218, "step": 110950 }, { "epoch": 0.8978072659600291, "grad_norm": 0.4531714916229248, "learning_rate": 6.728069369146728e-06, "loss": 0.0235, "step": 110960 }, { "epoch": 0.897888178655231, "grad_norm": 0.13862921297550201, "learning_rate": 6.727406767360714e-06, "loss": 0.0287, "step": 110970 }, { "epoch": 0.8979690913504329, "grad_norm": 0.10928642004728317, "learning_rate": 6.726744131125218e-06, "loss": 0.0276, "step": 110980 }, { "epoch": 0.8980500040456347, "grad_norm": 0.3061254024505615, "learning_rate": 6.726081460453456e-06, "loss": 0.0232, "step": 110990 }, { "epoch": 0.8981309167408367, "grad_norm": 0.3554747700691223, "learning_rate": 6.725418755358643e-06, "loss": 0.0364, "step": 111000 }, { "epoch": 0.8982118294360385, "grad_norm": 0.2654728889465332, "learning_rate": 6.724756015853994e-06, "loss": 0.0278, "step": 111010 }, { "epoch": 0.8982927421312404, "grad_norm": 0.3250589072704315, "learning_rate": 6.724093241952729e-06, "loss": 0.0211, "step": 111020 }, { "epoch": 0.8983736548264423, "grad_norm": 0.3772439658641815, "learning_rate": 6.723430433668063e-06, "loss": 0.0213, "step": 111030 }, { "epoch": 0.8984545675216441, "grad_norm": 0.4289124310016632, "learning_rate": 6.722767591013216e-06, "loss": 0.0303, "step": 111040 }, { "epoch": 0.898535480216846, "grad_norm": 0.4142113924026489, "learning_rate": 6.7221047140014064e-06, "loss": 0.0214, "step": 111050 }, { "epoch": 0.8986163929120479, "grad_norm": 0.03636688366532326, "learning_rate": 6.721441802645854e-06, "loss": 0.0457, "step": 111060 }, { "epoch": 0.8986973056072498, "grad_norm": 0.438888281583786, "learning_rate": 6.720778856959779e-06, "loss": 0.0242, "step": 111070 }, { "epoch": 0.8987782183024516, "grad_norm": 0.30928149819374084, "learning_rate": 6.720115876956401e-06, "loss": 0.0327, "step": 111080 }, { "epoch": 0.8988591309976536, "grad_norm": 0.39506036043167114, "learning_rate": 6.719452862648945e-06, "loss": 0.0302, "step": 111090 }, { "epoch": 0.8989400436928554, "grad_norm": 0.35849934816360474, "learning_rate": 6.718789814050631e-06, "loss": 0.0232, "step": 111100 }, { "epoch": 0.8990209563880572, "grad_norm": 0.3349664509296417, "learning_rate": 6.718126731174681e-06, "loss": 0.0201, "step": 111110 }, { "epoch": 0.8991018690832592, "grad_norm": 0.4268949627876282, "learning_rate": 6.717463614034323e-06, "loss": 0.0242, "step": 111120 }, { "epoch": 0.899182781778461, "grad_norm": 0.32393139600753784, "learning_rate": 6.7168004626427786e-06, "loss": 0.0266, "step": 111130 }, { "epoch": 0.899263694473663, "grad_norm": 0.39382368326187134, "learning_rate": 6.716137277013272e-06, "loss": 0.0268, "step": 111140 }, { "epoch": 0.8993446071688648, "grad_norm": 0.5895078182220459, "learning_rate": 6.71547405715903e-06, "loss": 0.0293, "step": 111150 }, { "epoch": 0.8994255198640667, "grad_norm": 0.7189726829528809, "learning_rate": 6.714810803093281e-06, "loss": 0.0238, "step": 111160 }, { "epoch": 0.8995064325592685, "grad_norm": 0.29203739762306213, "learning_rate": 6.71414751482925e-06, "loss": 0.0258, "step": 111170 }, { "epoch": 0.8995873452544704, "grad_norm": 0.5982320308685303, "learning_rate": 6.713484192380167e-06, "loss": 0.0353, "step": 111180 }, { "epoch": 0.8996682579496723, "grad_norm": 0.26907506585121155, "learning_rate": 6.712820835759257e-06, "loss": 0.0151, "step": 111190 }, { "epoch": 0.8997491706448741, "grad_norm": 0.3428424596786499, "learning_rate": 6.712157444979752e-06, "loss": 0.0256, "step": 111200 }, { "epoch": 0.8998300833400761, "grad_norm": 0.2391018569469452, "learning_rate": 6.711494020054882e-06, "loss": 0.0309, "step": 111210 }, { "epoch": 0.8999109960352779, "grad_norm": 0.3257918953895569, "learning_rate": 6.710830560997874e-06, "loss": 0.0189, "step": 111220 }, { "epoch": 0.8999919087304799, "grad_norm": 0.18640151619911194, "learning_rate": 6.710167067821966e-06, "loss": 0.0297, "step": 111230 }, { "epoch": 0.9000728214256817, "grad_norm": 0.3103052079677582, "learning_rate": 6.7095035405403854e-06, "loss": 0.0262, "step": 111240 }, { "epoch": 0.9001537341208836, "grad_norm": 0.5694136619567871, "learning_rate": 6.7088399791663635e-06, "loss": 0.0415, "step": 111250 }, { "epoch": 0.9002346468160854, "grad_norm": 0.3854541778564453, "learning_rate": 6.7081763837131365e-06, "loss": 0.0267, "step": 111260 }, { "epoch": 0.9003155595112873, "grad_norm": 0.3576522767543793, "learning_rate": 6.707512754193939e-06, "loss": 0.0162, "step": 111270 }, { "epoch": 0.9003964722064892, "grad_norm": 0.6462686657905579, "learning_rate": 6.706849090622003e-06, "loss": 0.0459, "step": 111280 }, { "epoch": 0.900477384901691, "grad_norm": 0.4728459417819977, "learning_rate": 6.706185393010566e-06, "loss": 0.0343, "step": 111290 }, { "epoch": 0.900558297596893, "grad_norm": 0.5152249336242676, "learning_rate": 6.705521661372862e-06, "loss": 0.0359, "step": 111300 }, { "epoch": 0.9006392102920948, "grad_norm": 0.31530627608299255, "learning_rate": 6.704857895722131e-06, "loss": 0.029, "step": 111310 }, { "epoch": 0.9007201229872968, "grad_norm": 0.47741591930389404, "learning_rate": 6.704194096071608e-06, "loss": 0.0326, "step": 111320 }, { "epoch": 0.9008010356824986, "grad_norm": 0.39703142642974854, "learning_rate": 6.70353026243453e-06, "loss": 0.0193, "step": 111330 }, { "epoch": 0.9008819483777004, "grad_norm": 0.3125641942024231, "learning_rate": 6.702866394824138e-06, "loss": 0.0222, "step": 111340 }, { "epoch": 0.9009628610729024, "grad_norm": 0.35189101099967957, "learning_rate": 6.702202493253671e-06, "loss": 0.0279, "step": 111350 }, { "epoch": 0.9010437737681042, "grad_norm": 0.4804939925670624, "learning_rate": 6.701538557736368e-06, "loss": 0.0326, "step": 111360 }, { "epoch": 0.9011246864633061, "grad_norm": 0.4289320409297943, "learning_rate": 6.700874588285471e-06, "loss": 0.0275, "step": 111370 }, { "epoch": 0.901205599158508, "grad_norm": 0.6872775554656982, "learning_rate": 6.7002105849142205e-06, "loss": 0.0411, "step": 111380 }, { "epoch": 0.9012865118537099, "grad_norm": 0.041655875742435455, "learning_rate": 6.69954654763586e-06, "loss": 0.0296, "step": 111390 }, { "epoch": 0.9013674245489117, "grad_norm": 0.9834123849868774, "learning_rate": 6.698882476463632e-06, "loss": 0.0376, "step": 111400 }, { "epoch": 0.9014483372441136, "grad_norm": 0.6554432511329651, "learning_rate": 6.698218371410778e-06, "loss": 0.033, "step": 111410 }, { "epoch": 0.9015292499393155, "grad_norm": 0.44804802536964417, "learning_rate": 6.697554232490544e-06, "loss": 0.0337, "step": 111420 }, { "epoch": 0.9016101626345173, "grad_norm": 0.49779289960861206, "learning_rate": 6.696890059716177e-06, "loss": 0.0394, "step": 111430 }, { "epoch": 0.9016910753297193, "grad_norm": 0.31979888677597046, "learning_rate": 6.696225853100917e-06, "loss": 0.0238, "step": 111440 }, { "epoch": 0.9017719880249211, "grad_norm": 0.1548190861940384, "learning_rate": 6.695561612658015e-06, "loss": 0.0242, "step": 111450 }, { "epoch": 0.901852900720123, "grad_norm": 0.6887672543525696, "learning_rate": 6.694897338400715e-06, "loss": 0.0388, "step": 111460 }, { "epoch": 0.9019338134153249, "grad_norm": 0.3560086488723755, "learning_rate": 6.694233030342266e-06, "loss": 0.0347, "step": 111470 }, { "epoch": 0.9020147261105267, "grad_norm": 0.4310336410999298, "learning_rate": 6.693568688495916e-06, "loss": 0.0236, "step": 111480 }, { "epoch": 0.9020956388057286, "grad_norm": 0.13104084134101868, "learning_rate": 6.6929043128749125e-06, "loss": 0.0271, "step": 111490 }, { "epoch": 0.9021765515009305, "grad_norm": 0.28256356716156006, "learning_rate": 6.692239903492509e-06, "loss": 0.0186, "step": 111500 }, { "epoch": 0.9022574641961324, "grad_norm": 0.25920796394348145, "learning_rate": 6.691575460361952e-06, "loss": 0.0253, "step": 111510 }, { "epoch": 0.9023383768913342, "grad_norm": 0.22871802747249603, "learning_rate": 6.690910983496492e-06, "loss": 0.0189, "step": 111520 }, { "epoch": 0.9024192895865362, "grad_norm": 0.30243486166000366, "learning_rate": 6.690246472909383e-06, "loss": 0.0384, "step": 111530 }, { "epoch": 0.902500202281738, "grad_norm": 0.21875064074993134, "learning_rate": 6.689581928613879e-06, "loss": 0.0258, "step": 111540 }, { "epoch": 0.9025811149769399, "grad_norm": 0.6852708458900452, "learning_rate": 6.688917350623227e-06, "loss": 0.0354, "step": 111550 }, { "epoch": 0.9026620276721418, "grad_norm": 0.5836325287818909, "learning_rate": 6.688252738950685e-06, "loss": 0.019, "step": 111560 }, { "epoch": 0.9027429403673436, "grad_norm": 0.23253504931926727, "learning_rate": 6.687588093609506e-06, "loss": 0.0186, "step": 111570 }, { "epoch": 0.9028238530625455, "grad_norm": 0.2037055939435959, "learning_rate": 6.686923414612944e-06, "loss": 0.0351, "step": 111580 }, { "epoch": 0.9029047657577474, "grad_norm": 0.46375399827957153, "learning_rate": 6.686258701974259e-06, "loss": 0.0278, "step": 111590 }, { "epoch": 0.9029856784529493, "grad_norm": 0.46178922057151794, "learning_rate": 6.685593955706701e-06, "loss": 0.038, "step": 111600 }, { "epoch": 0.9030665911481511, "grad_norm": 0.4762795567512512, "learning_rate": 6.684929175823531e-06, "loss": 0.0444, "step": 111610 }, { "epoch": 0.9031475038433531, "grad_norm": 0.43070435523986816, "learning_rate": 6.684264362338007e-06, "loss": 0.0257, "step": 111620 }, { "epoch": 0.9032284165385549, "grad_norm": 0.3858734965324402, "learning_rate": 6.683599515263382e-06, "loss": 0.0269, "step": 111630 }, { "epoch": 0.9033093292337567, "grad_norm": 0.5428004860877991, "learning_rate": 6.682934634612923e-06, "loss": 0.0271, "step": 111640 }, { "epoch": 0.9033902419289587, "grad_norm": 0.40295130014419556, "learning_rate": 6.682269720399886e-06, "loss": 0.0247, "step": 111650 }, { "epoch": 0.9034711546241605, "grad_norm": 0.48096147179603577, "learning_rate": 6.681604772637527e-06, "loss": 0.0247, "step": 111660 }, { "epoch": 0.9035520673193624, "grad_norm": 0.01569674164056778, "learning_rate": 6.680939791339113e-06, "loss": 0.0375, "step": 111670 }, { "epoch": 0.9036329800145643, "grad_norm": 0.4275330603122711, "learning_rate": 6.680274776517903e-06, "loss": 0.0215, "step": 111680 }, { "epoch": 0.9037138927097662, "grad_norm": 0.45127078890800476, "learning_rate": 6.67960972818716e-06, "loss": 0.0286, "step": 111690 }, { "epoch": 0.903794805404968, "grad_norm": 0.5969920754432678, "learning_rate": 6.678944646360147e-06, "loss": 0.0332, "step": 111700 }, { "epoch": 0.9038757181001699, "grad_norm": 0.7563439011573792, "learning_rate": 6.678279531050126e-06, "loss": 0.0426, "step": 111710 }, { "epoch": 0.9039566307953718, "grad_norm": 0.43221771717071533, "learning_rate": 6.677614382270365e-06, "loss": 0.0219, "step": 111720 }, { "epoch": 0.9040375434905736, "grad_norm": 0.16440647840499878, "learning_rate": 6.676949200034126e-06, "loss": 0.0329, "step": 111730 }, { "epoch": 0.9041184561857756, "grad_norm": 0.5101078152656555, "learning_rate": 6.676283984354674e-06, "loss": 0.0302, "step": 111740 }, { "epoch": 0.9041993688809774, "grad_norm": 0.2749769687652588, "learning_rate": 6.675618735245279e-06, "loss": 0.0274, "step": 111750 }, { "epoch": 0.9042802815761793, "grad_norm": 0.32002201676368713, "learning_rate": 6.6749534527192036e-06, "loss": 0.0349, "step": 111760 }, { "epoch": 0.9043611942713812, "grad_norm": 0.5569091439247131, "learning_rate": 6.674288136789719e-06, "loss": 0.0251, "step": 111770 }, { "epoch": 0.904442106966583, "grad_norm": 0.31502050161361694, "learning_rate": 6.673622787470092e-06, "loss": 0.0336, "step": 111780 }, { "epoch": 0.9045230196617849, "grad_norm": 0.06069531291723251, "learning_rate": 6.672957404773591e-06, "loss": 0.0258, "step": 111790 }, { "epoch": 0.9046039323569868, "grad_norm": 0.3525902330875397, "learning_rate": 6.672291988713487e-06, "loss": 0.02, "step": 111800 }, { "epoch": 0.9046848450521887, "grad_norm": 0.3752771317958832, "learning_rate": 6.6716265393030485e-06, "loss": 0.0348, "step": 111810 }, { "epoch": 0.9047657577473905, "grad_norm": 0.29513460397720337, "learning_rate": 6.670961056555548e-06, "loss": 0.0215, "step": 111820 }, { "epoch": 0.9048466704425925, "grad_norm": 0.531382143497467, "learning_rate": 6.670295540484259e-06, "loss": 0.0272, "step": 111830 }, { "epoch": 0.9049275831377943, "grad_norm": 0.2917748987674713, "learning_rate": 6.669629991102448e-06, "loss": 0.0263, "step": 111840 }, { "epoch": 0.9050084958329963, "grad_norm": 0.3062901198863983, "learning_rate": 6.668964408423394e-06, "loss": 0.0344, "step": 111850 }, { "epoch": 0.9050894085281981, "grad_norm": 0.40253904461860657, "learning_rate": 6.668298792460368e-06, "loss": 0.0178, "step": 111860 }, { "epoch": 0.9051703212233999, "grad_norm": 0.28223490715026855, "learning_rate": 6.667633143226644e-06, "loss": 0.0202, "step": 111870 }, { "epoch": 0.9052512339186018, "grad_norm": 0.5090281963348389, "learning_rate": 6.666967460735498e-06, "loss": 0.0205, "step": 111880 }, { "epoch": 0.9053321466138037, "grad_norm": 0.07844725996255875, "learning_rate": 6.666301745000204e-06, "loss": 0.0191, "step": 111890 }, { "epoch": 0.9054130593090056, "grad_norm": 0.41362106800079346, "learning_rate": 6.66563599603404e-06, "loss": 0.0331, "step": 111900 }, { "epoch": 0.9054939720042074, "grad_norm": 0.22504013776779175, "learning_rate": 6.664970213850284e-06, "loss": 0.0187, "step": 111910 }, { "epoch": 0.9055748846994094, "grad_norm": 0.41619202494621277, "learning_rate": 6.664304398462211e-06, "loss": 0.0227, "step": 111920 }, { "epoch": 0.9056557973946112, "grad_norm": 0.2738371789455414, "learning_rate": 6.663638549883098e-06, "loss": 0.0336, "step": 111930 }, { "epoch": 0.905736710089813, "grad_norm": 0.3318054974079132, "learning_rate": 6.662972668126228e-06, "loss": 0.0339, "step": 111940 }, { "epoch": 0.905817622785015, "grad_norm": 0.5977798700332642, "learning_rate": 6.6623067532048795e-06, "loss": 0.0243, "step": 111950 }, { "epoch": 0.9058985354802168, "grad_norm": 0.24227072298526764, "learning_rate": 6.6616408051323315e-06, "loss": 0.0271, "step": 111960 }, { "epoch": 0.9059794481754188, "grad_norm": 0.2346383035182953, "learning_rate": 6.660974823921865e-06, "loss": 0.0189, "step": 111970 }, { "epoch": 0.9060603608706206, "grad_norm": 0.5532374382019043, "learning_rate": 6.660308809586763e-06, "loss": 0.0217, "step": 111980 }, { "epoch": 0.9061412735658225, "grad_norm": 0.47914057970046997, "learning_rate": 6.6596427621403056e-06, "loss": 0.0251, "step": 111990 }, { "epoch": 0.9062221862610244, "grad_norm": 0.665562093257904, "learning_rate": 6.658976681595779e-06, "loss": 0.0272, "step": 112000 }, { "epoch": 0.9063030989562262, "grad_norm": 0.6463813781738281, "learning_rate": 6.658310567966461e-06, "loss": 0.0268, "step": 112010 }, { "epoch": 0.9063840116514281, "grad_norm": 0.29153746366500854, "learning_rate": 6.6576444212656445e-06, "loss": 0.0406, "step": 112020 }, { "epoch": 0.90646492434663, "grad_norm": 0.42447829246520996, "learning_rate": 6.656978241506606e-06, "loss": 0.0374, "step": 112030 }, { "epoch": 0.9065458370418319, "grad_norm": 0.6043476462364197, "learning_rate": 6.656312028702636e-06, "loss": 0.0301, "step": 112040 }, { "epoch": 0.9066267497370337, "grad_norm": 0.22930024564266205, "learning_rate": 6.655645782867018e-06, "loss": 0.026, "step": 112050 }, { "epoch": 0.9067076624322357, "grad_norm": 0.79155433177948, "learning_rate": 6.654979504013042e-06, "loss": 0.0282, "step": 112060 }, { "epoch": 0.9067885751274375, "grad_norm": 0.5018786787986755, "learning_rate": 6.654313192153992e-06, "loss": 0.0229, "step": 112070 }, { "epoch": 0.9068694878226393, "grad_norm": 0.5308438539505005, "learning_rate": 6.653646847303158e-06, "loss": 0.0288, "step": 112080 }, { "epoch": 0.9069504005178413, "grad_norm": 0.2233431190252304, "learning_rate": 6.652980469473828e-06, "loss": 0.02, "step": 112090 }, { "epoch": 0.9070313132130431, "grad_norm": 0.5705039501190186, "learning_rate": 6.652314058679295e-06, "loss": 0.0394, "step": 112100 }, { "epoch": 0.907112225908245, "grad_norm": 0.6514375805854797, "learning_rate": 6.6516476149328434e-06, "loss": 0.0233, "step": 112110 }, { "epoch": 0.9071931386034469, "grad_norm": 0.45816275477409363, "learning_rate": 6.650981138247768e-06, "loss": 0.0367, "step": 112120 }, { "epoch": 0.9072740512986488, "grad_norm": 0.2181634157896042, "learning_rate": 6.65031462863736e-06, "loss": 0.025, "step": 112130 }, { "epoch": 0.9073549639938506, "grad_norm": 0.2445763200521469, "learning_rate": 6.6496480861149085e-06, "loss": 0.0254, "step": 112140 }, { "epoch": 0.9074358766890526, "grad_norm": 0.5687393546104431, "learning_rate": 6.648981510693709e-06, "loss": 0.0267, "step": 112150 }, { "epoch": 0.9075167893842544, "grad_norm": 0.38878270983695984, "learning_rate": 6.648314902387056e-06, "loss": 0.013, "step": 112160 }, { "epoch": 0.9075977020794562, "grad_norm": 0.3253770172595978, "learning_rate": 6.6476482612082415e-06, "loss": 0.0202, "step": 112170 }, { "epoch": 0.9076786147746582, "grad_norm": 0.5142486095428467, "learning_rate": 6.64698158717056e-06, "loss": 0.0308, "step": 112180 }, { "epoch": 0.90775952746986, "grad_norm": 0.5861392617225647, "learning_rate": 6.646314880287308e-06, "loss": 0.0363, "step": 112190 }, { "epoch": 0.9078404401650619, "grad_norm": 0.24531196057796478, "learning_rate": 6.645648140571781e-06, "loss": 0.0215, "step": 112200 }, { "epoch": 0.9079213528602638, "grad_norm": 0.4172305762767792, "learning_rate": 6.644981368037276e-06, "loss": 0.0292, "step": 112210 }, { "epoch": 0.9080022655554657, "grad_norm": 0.4263603091239929, "learning_rate": 6.6443145626970905e-06, "loss": 0.0344, "step": 112220 }, { "epoch": 0.9080831782506675, "grad_norm": 0.28748196363449097, "learning_rate": 6.643647724564522e-06, "loss": 0.0254, "step": 112230 }, { "epoch": 0.9081640909458694, "grad_norm": 0.28201979398727417, "learning_rate": 6.6429808536528715e-06, "loss": 0.0289, "step": 112240 }, { "epoch": 0.9082450036410713, "grad_norm": 0.5073429346084595, "learning_rate": 6.642313949975434e-06, "loss": 0.0257, "step": 112250 }, { "epoch": 0.9083259163362731, "grad_norm": 0.49725526571273804, "learning_rate": 6.641647013545512e-06, "loss": 0.0208, "step": 112260 }, { "epoch": 0.9084068290314751, "grad_norm": 0.49837514758110046, "learning_rate": 6.6409800443764075e-06, "loss": 0.0234, "step": 112270 }, { "epoch": 0.9084877417266769, "grad_norm": 0.2989226281642914, "learning_rate": 6.64031304248142e-06, "loss": 0.0306, "step": 112280 }, { "epoch": 0.9085686544218788, "grad_norm": 0.29928112030029297, "learning_rate": 6.639646007873851e-06, "loss": 0.0359, "step": 112290 }, { "epoch": 0.9086495671170807, "grad_norm": 0.7438235878944397, "learning_rate": 6.638978940567003e-06, "loss": 0.0447, "step": 112300 }, { "epoch": 0.9087304798122825, "grad_norm": 0.2143852859735489, "learning_rate": 6.638311840574182e-06, "loss": 0.0252, "step": 112310 }, { "epoch": 0.9088113925074844, "grad_norm": 0.429082453250885, "learning_rate": 6.63764470790869e-06, "loss": 0.036, "step": 112320 }, { "epoch": 0.9088923052026863, "grad_norm": 0.47117286920547485, "learning_rate": 6.636977542583829e-06, "loss": 0.028, "step": 112330 }, { "epoch": 0.9089732178978882, "grad_norm": 0.5915630459785461, "learning_rate": 6.6363103446129085e-06, "loss": 0.0303, "step": 112340 }, { "epoch": 0.90905413059309, "grad_norm": 0.1972111165523529, "learning_rate": 6.6356431140092335e-06, "loss": 0.0301, "step": 112350 }, { "epoch": 0.909135043288292, "grad_norm": 0.5157634019851685, "learning_rate": 6.634975850786107e-06, "loss": 0.0293, "step": 112360 }, { "epoch": 0.9092159559834938, "grad_norm": 0.5148714184761047, "learning_rate": 6.634308554956841e-06, "loss": 0.0337, "step": 112370 }, { "epoch": 0.9092968686786956, "grad_norm": 0.3993450701236725, "learning_rate": 6.633641226534739e-06, "loss": 0.0267, "step": 112380 }, { "epoch": 0.9093777813738976, "grad_norm": 0.22152598202228546, "learning_rate": 6.6329738655331135e-06, "loss": 0.0175, "step": 112390 }, { "epoch": 0.9094586940690994, "grad_norm": 0.5722699761390686, "learning_rate": 6.6323064719652706e-06, "loss": 0.0384, "step": 112400 }, { "epoch": 0.9095396067643013, "grad_norm": 0.43371909856796265, "learning_rate": 6.63163904584452e-06, "loss": 0.0247, "step": 112410 }, { "epoch": 0.9096205194595032, "grad_norm": 0.5771256685256958, "learning_rate": 6.630971587184175e-06, "loss": 0.0264, "step": 112420 }, { "epoch": 0.9097014321547051, "grad_norm": 0.6031247973442078, "learning_rate": 6.6303040959975465e-06, "loss": 0.0231, "step": 112430 }, { "epoch": 0.9097823448499069, "grad_norm": 3.253696918487549, "learning_rate": 6.6296365722979406e-06, "loss": 0.034, "step": 112440 }, { "epoch": 0.9098632575451088, "grad_norm": 0.16171416640281677, "learning_rate": 6.628969016098675e-06, "loss": 0.0242, "step": 112450 }, { "epoch": 0.9099441702403107, "grad_norm": 0.2147640734910965, "learning_rate": 6.628301427413062e-06, "loss": 0.0247, "step": 112460 }, { "epoch": 0.9100250829355125, "grad_norm": 0.3179178833961487, "learning_rate": 6.627633806254415e-06, "loss": 0.0234, "step": 112470 }, { "epoch": 0.9101059956307145, "grad_norm": 0.16573533415794373, "learning_rate": 6.626966152636047e-06, "loss": 0.0315, "step": 112480 }, { "epoch": 0.9101869083259163, "grad_norm": 0.3980347216129303, "learning_rate": 6.626298466571273e-06, "loss": 0.0262, "step": 112490 }, { "epoch": 0.9102678210211183, "grad_norm": 0.3871801793575287, "learning_rate": 6.6256307480734105e-06, "loss": 0.0159, "step": 112500 }, { "epoch": 0.9103487337163201, "grad_norm": 0.3514006435871124, "learning_rate": 6.624962997155774e-06, "loss": 0.0226, "step": 112510 }, { "epoch": 0.910429646411522, "grad_norm": 0.33363354206085205, "learning_rate": 6.624295213831681e-06, "loss": 0.0356, "step": 112520 }, { "epoch": 0.9105105591067238, "grad_norm": 0.5419085025787354, "learning_rate": 6.623627398114449e-06, "loss": 0.0401, "step": 112530 }, { "epoch": 0.9105914718019257, "grad_norm": 0.30871057510375977, "learning_rate": 6.622959550017397e-06, "loss": 0.0238, "step": 112540 }, { "epoch": 0.9106723844971276, "grad_norm": 0.3966231346130371, "learning_rate": 6.622291669553842e-06, "loss": 0.0276, "step": 112550 }, { "epoch": 0.9107532971923294, "grad_norm": 0.3202579617500305, "learning_rate": 6.621623756737105e-06, "loss": 0.0229, "step": 112560 }, { "epoch": 0.9108342098875314, "grad_norm": 0.4695504605770111, "learning_rate": 6.620955811580505e-06, "loss": 0.0196, "step": 112570 }, { "epoch": 0.9109151225827332, "grad_norm": 0.36470359563827515, "learning_rate": 6.620287834097363e-06, "loss": 0.0346, "step": 112580 }, { "epoch": 0.9109960352779352, "grad_norm": 0.007327517960220575, "learning_rate": 6.6196198243010025e-06, "loss": 0.0236, "step": 112590 }, { "epoch": 0.911076947973137, "grad_norm": 0.08587835729122162, "learning_rate": 6.618951782204741e-06, "loss": 0.0345, "step": 112600 }, { "epoch": 0.9111578606683388, "grad_norm": 0.7255931496620178, "learning_rate": 6.618283707821907e-06, "loss": 0.0294, "step": 112610 }, { "epoch": 0.9112387733635408, "grad_norm": 0.2857055366039276, "learning_rate": 6.617615601165819e-06, "loss": 0.0118, "step": 112620 }, { "epoch": 0.9113196860587426, "grad_norm": 0.330380380153656, "learning_rate": 6.616947462249803e-06, "loss": 0.0201, "step": 112630 }, { "epoch": 0.9114005987539445, "grad_norm": 0.04007745161652565, "learning_rate": 6.6162792910871834e-06, "loss": 0.0293, "step": 112640 }, { "epoch": 0.9114815114491464, "grad_norm": 0.31403934955596924, "learning_rate": 6.615611087691285e-06, "loss": 0.0205, "step": 112650 }, { "epoch": 0.9115624241443483, "grad_norm": 0.7103270292282104, "learning_rate": 6.614942852075436e-06, "loss": 0.0268, "step": 112660 }, { "epoch": 0.9116433368395501, "grad_norm": 0.38687554001808167, "learning_rate": 6.61427458425296e-06, "loss": 0.0279, "step": 112670 }, { "epoch": 0.911724249534752, "grad_norm": 0.5158726572990417, "learning_rate": 6.613606284237185e-06, "loss": 0.0305, "step": 112680 }, { "epoch": 0.9118051622299539, "grad_norm": 0.418606698513031, "learning_rate": 6.61293795204144e-06, "loss": 0.0189, "step": 112690 }, { "epoch": 0.9118860749251557, "grad_norm": 0.42116355895996094, "learning_rate": 6.612269587679051e-06, "loss": 0.0189, "step": 112700 }, { "epoch": 0.9119669876203577, "grad_norm": 0.48825737833976746, "learning_rate": 6.61160119116335e-06, "loss": 0.0169, "step": 112710 }, { "epoch": 0.9120479003155595, "grad_norm": 0.6605806350708008, "learning_rate": 6.610932762507665e-06, "loss": 0.0316, "step": 112720 }, { "epoch": 0.9121288130107614, "grad_norm": 0.6490705013275146, "learning_rate": 6.610264301725328e-06, "loss": 0.0364, "step": 112730 }, { "epoch": 0.9122097257059633, "grad_norm": 0.7370728850364685, "learning_rate": 6.6095958088296665e-06, "loss": 0.026, "step": 112740 }, { "epoch": 0.9122906384011651, "grad_norm": 0.20231026411056519, "learning_rate": 6.608927283834016e-06, "loss": 0.0282, "step": 112750 }, { "epoch": 0.912371551096367, "grad_norm": 0.45483988523483276, "learning_rate": 6.608258726751707e-06, "loss": 0.0295, "step": 112760 }, { "epoch": 0.9124524637915689, "grad_norm": 1.1905022859573364, "learning_rate": 6.607590137596074e-06, "loss": 0.0328, "step": 112770 }, { "epoch": 0.9125333764867708, "grad_norm": 0.11734412610530853, "learning_rate": 6.606921516380449e-06, "loss": 0.0172, "step": 112780 }, { "epoch": 0.9126142891819726, "grad_norm": 0.25805559754371643, "learning_rate": 6.6062528631181666e-06, "loss": 0.0219, "step": 112790 }, { "epoch": 0.9126952018771746, "grad_norm": 0.7034748196601868, "learning_rate": 6.605584177822562e-06, "loss": 0.0358, "step": 112800 }, { "epoch": 0.9127761145723764, "grad_norm": 0.42926299571990967, "learning_rate": 6.604915460506971e-06, "loss": 0.0198, "step": 112810 }, { "epoch": 0.9128570272675783, "grad_norm": 0.4661414325237274, "learning_rate": 6.604246711184728e-06, "loss": 0.0429, "step": 112820 }, { "epoch": 0.9129379399627802, "grad_norm": 0.19457407295703888, "learning_rate": 6.6035779298691715e-06, "loss": 0.0229, "step": 112830 }, { "epoch": 0.913018852657982, "grad_norm": 0.3257066309452057, "learning_rate": 6.602909116573638e-06, "loss": 0.0311, "step": 112840 }, { "epoch": 0.9130997653531839, "grad_norm": 0.16204600036144257, "learning_rate": 6.602240271311466e-06, "loss": 0.0196, "step": 112850 }, { "epoch": 0.9131806780483858, "grad_norm": 0.3627863824367523, "learning_rate": 6.601571394095996e-06, "loss": 0.0271, "step": 112860 }, { "epoch": 0.9132615907435877, "grad_norm": 0.5002480149269104, "learning_rate": 6.600902484940565e-06, "loss": 0.0294, "step": 112870 }, { "epoch": 0.9133425034387895, "grad_norm": 0.340115487575531, "learning_rate": 6.600233543858512e-06, "loss": 0.0225, "step": 112880 }, { "epoch": 0.9134234161339915, "grad_norm": 0.1878766268491745, "learning_rate": 6.59956457086318e-06, "loss": 0.0202, "step": 112890 }, { "epoch": 0.9135043288291933, "grad_norm": 0.1836623102426529, "learning_rate": 6.598895565967911e-06, "loss": 0.0213, "step": 112900 }, { "epoch": 0.9135852415243951, "grad_norm": 0.35437917709350586, "learning_rate": 6.598226529186044e-06, "loss": 0.0265, "step": 112910 }, { "epoch": 0.9136661542195971, "grad_norm": 0.41350287199020386, "learning_rate": 6.597557460530922e-06, "loss": 0.0164, "step": 112920 }, { "epoch": 0.9137470669147989, "grad_norm": 0.00046124609070830047, "learning_rate": 6.596888360015891e-06, "loss": 0.0221, "step": 112930 }, { "epoch": 0.9138279796100008, "grad_norm": 0.3217988908290863, "learning_rate": 6.5962192276542915e-06, "loss": 0.0225, "step": 112940 }, { "epoch": 0.9139088923052027, "grad_norm": 0.6244301795959473, "learning_rate": 6.5955500634594694e-06, "loss": 0.0313, "step": 112950 }, { "epoch": 0.9139898050004046, "grad_norm": 0.44242992997169495, "learning_rate": 6.5948808674447705e-06, "loss": 0.0313, "step": 112960 }, { "epoch": 0.9140707176956064, "grad_norm": 0.40878286957740784, "learning_rate": 6.594211639623539e-06, "loss": 0.032, "step": 112970 }, { "epoch": 0.9141516303908083, "grad_norm": 0.4208299219608307, "learning_rate": 6.593542380009121e-06, "loss": 0.0269, "step": 112980 }, { "epoch": 0.9142325430860102, "grad_norm": 0.5089192390441895, "learning_rate": 6.592873088614866e-06, "loss": 0.0344, "step": 112990 }, { "epoch": 0.914313455781212, "grad_norm": 0.23976707458496094, "learning_rate": 6.592203765454119e-06, "loss": 0.0254, "step": 113000 }, { "epoch": 0.914394368476414, "grad_norm": 3.4470467567443848, "learning_rate": 6.591534410540229e-06, "loss": 0.0272, "step": 113010 }, { "epoch": 0.9144752811716158, "grad_norm": 0.6719285845756531, "learning_rate": 6.590865023886547e-06, "loss": 0.0333, "step": 113020 }, { "epoch": 0.9145561938668177, "grad_norm": 0.4564239978790283, "learning_rate": 6.590195605506417e-06, "loss": 0.031, "step": 113030 }, { "epoch": 0.9146371065620196, "grad_norm": 0.662395715713501, "learning_rate": 6.589526155413195e-06, "loss": 0.0311, "step": 113040 }, { "epoch": 0.9147180192572214, "grad_norm": 0.4505552649497986, "learning_rate": 6.58885667362023e-06, "loss": 0.0307, "step": 113050 }, { "epoch": 0.9147989319524233, "grad_norm": 0.23395414650440216, "learning_rate": 6.588187160140872e-06, "loss": 0.0191, "step": 113060 }, { "epoch": 0.9148798446476252, "grad_norm": 0.16764868795871735, "learning_rate": 6.5875176149884745e-06, "loss": 0.0191, "step": 113070 }, { "epoch": 0.9149607573428271, "grad_norm": 0.4606640934944153, "learning_rate": 6.586848038176389e-06, "loss": 0.0343, "step": 113080 }, { "epoch": 0.9150416700380289, "grad_norm": 0.3608930706977844, "learning_rate": 6.58617842971797e-06, "loss": 0.0253, "step": 113090 }, { "epoch": 0.9151225827332309, "grad_norm": 0.010484063066542149, "learning_rate": 6.58550878962657e-06, "loss": 0.0198, "step": 113100 }, { "epoch": 0.9152034954284327, "grad_norm": 0.34465816617012024, "learning_rate": 6.584839117915544e-06, "loss": 0.0345, "step": 113110 }, { "epoch": 0.9152844081236347, "grad_norm": 0.3899461627006531, "learning_rate": 6.584169414598247e-06, "loss": 0.0259, "step": 113120 }, { "epoch": 0.9153653208188365, "grad_norm": 0.39003419876098633, "learning_rate": 6.583499679688038e-06, "loss": 0.0317, "step": 113130 }, { "epoch": 0.9154462335140383, "grad_norm": 0.32434719800949097, "learning_rate": 6.582829913198269e-06, "loss": 0.03, "step": 113140 }, { "epoch": 0.9155271462092403, "grad_norm": 0.2986994683742523, "learning_rate": 6.582160115142299e-06, "loss": 0.0278, "step": 113150 }, { "epoch": 0.9156080589044421, "grad_norm": 0.7051041126251221, "learning_rate": 6.581490285533485e-06, "loss": 0.04, "step": 113160 }, { "epoch": 0.915688971599644, "grad_norm": 0.25753530859947205, "learning_rate": 6.580820424385188e-06, "loss": 0.023, "step": 113170 }, { "epoch": 0.9157698842948458, "grad_norm": 0.23225897550582886, "learning_rate": 6.580150531710763e-06, "loss": 0.0296, "step": 113180 }, { "epoch": 0.9158507969900478, "grad_norm": 0.3053447902202606, "learning_rate": 6.579480607523572e-06, "loss": 0.0278, "step": 113190 }, { "epoch": 0.9159317096852496, "grad_norm": 0.5291491150856018, "learning_rate": 6.5788106518369755e-06, "loss": 0.0366, "step": 113200 }, { "epoch": 0.9160126223804514, "grad_norm": 0.6965476870536804, "learning_rate": 6.578140664664332e-06, "loss": 0.0234, "step": 113210 }, { "epoch": 0.9160935350756534, "grad_norm": 0.31171441078186035, "learning_rate": 6.5774706460190055e-06, "loss": 0.0219, "step": 113220 }, { "epoch": 0.9161744477708552, "grad_norm": 0.48086854815483093, "learning_rate": 6.576800595914357e-06, "loss": 0.0194, "step": 113230 }, { "epoch": 0.9162553604660572, "grad_norm": 0.7792114615440369, "learning_rate": 6.57613051436375e-06, "loss": 0.0274, "step": 113240 }, { "epoch": 0.916336273161259, "grad_norm": 0.2552523612976074, "learning_rate": 6.5754604013805454e-06, "loss": 0.0235, "step": 113250 }, { "epoch": 0.9164171858564609, "grad_norm": 0.8506461381912231, "learning_rate": 6.574790256978111e-06, "loss": 0.0204, "step": 113260 }, { "epoch": 0.9164980985516628, "grad_norm": 0.49577459692955017, "learning_rate": 6.5741200811698095e-06, "loss": 0.0203, "step": 113270 }, { "epoch": 0.9165790112468646, "grad_norm": 0.24515540897846222, "learning_rate": 6.573449873969004e-06, "loss": 0.0236, "step": 113280 }, { "epoch": 0.9166599239420665, "grad_norm": 0.42167767882347107, "learning_rate": 6.572779635389065e-06, "loss": 0.0289, "step": 113290 }, { "epoch": 0.9167408366372684, "grad_norm": 0.055381596088409424, "learning_rate": 6.572109365443354e-06, "loss": 0.0253, "step": 113300 }, { "epoch": 0.9168217493324703, "grad_norm": 0.34017300605773926, "learning_rate": 6.571439064145243e-06, "loss": 0.0261, "step": 113310 }, { "epoch": 0.9169026620276721, "grad_norm": 0.5921871662139893, "learning_rate": 6.570768731508096e-06, "loss": 0.0322, "step": 113320 }, { "epoch": 0.9169835747228741, "grad_norm": 0.42991364002227783, "learning_rate": 6.570098367545282e-06, "loss": 0.0418, "step": 113330 }, { "epoch": 0.9170644874180759, "grad_norm": 0.7310345768928528, "learning_rate": 6.569427972270172e-06, "loss": 0.0468, "step": 113340 }, { "epoch": 0.9171454001132777, "grad_norm": 0.4099172055721283, "learning_rate": 6.568757545696135e-06, "loss": 0.0205, "step": 113350 }, { "epoch": 0.9172263128084797, "grad_norm": 0.21624146401882172, "learning_rate": 6.568087087836539e-06, "loss": 0.0227, "step": 113360 }, { "epoch": 0.9173072255036815, "grad_norm": 0.6818528771400452, "learning_rate": 6.567416598704756e-06, "loss": 0.0219, "step": 113370 }, { "epoch": 0.9173881381988834, "grad_norm": 0.3730120062828064, "learning_rate": 6.566746078314159e-06, "loss": 0.0225, "step": 113380 }, { "epoch": 0.9174690508940853, "grad_norm": 0.6974877119064331, "learning_rate": 6.566075526678119e-06, "loss": 0.0266, "step": 113390 }, { "epoch": 0.9175499635892872, "grad_norm": 0.2645949721336365, "learning_rate": 6.565404943810009e-06, "loss": 0.0217, "step": 113400 }, { "epoch": 0.917630876284489, "grad_norm": 0.3976088762283325, "learning_rate": 6.5647343297232005e-06, "loss": 0.0213, "step": 113410 }, { "epoch": 0.917711788979691, "grad_norm": 0.7997286319732666, "learning_rate": 6.5640636844310715e-06, "loss": 0.0287, "step": 113420 }, { "epoch": 0.9177927016748928, "grad_norm": 0.4355662763118744, "learning_rate": 6.5633930079469924e-06, "loss": 0.0198, "step": 113430 }, { "epoch": 0.9178736143700946, "grad_norm": 0.749404788017273, "learning_rate": 6.5627223002843395e-06, "loss": 0.0397, "step": 113440 }, { "epoch": 0.9179545270652966, "grad_norm": 0.8634995222091675, "learning_rate": 6.562051561456491e-06, "loss": 0.0298, "step": 113450 }, { "epoch": 0.9180354397604984, "grad_norm": 0.07609857618808746, "learning_rate": 6.561380791476821e-06, "loss": 0.0286, "step": 113460 }, { "epoch": 0.9181163524557003, "grad_norm": 0.5364691615104675, "learning_rate": 6.560709990358708e-06, "loss": 0.0303, "step": 113470 }, { "epoch": 0.9181972651509022, "grad_norm": 0.46165141463279724, "learning_rate": 6.560039158115528e-06, "loss": 0.0309, "step": 113480 }, { "epoch": 0.9182781778461041, "grad_norm": 0.4635840356349945, "learning_rate": 6.559368294760662e-06, "loss": 0.0256, "step": 113490 }, { "epoch": 0.9183590905413059, "grad_norm": 0.493770569562912, "learning_rate": 6.558697400307487e-06, "loss": 0.0387, "step": 113500 }, { "epoch": 0.9184400032365078, "grad_norm": 0.314618855714798, "learning_rate": 6.558026474769382e-06, "loss": 0.0134, "step": 113510 }, { "epoch": 0.9185209159317097, "grad_norm": 0.5272510647773743, "learning_rate": 6.5573555181597274e-06, "loss": 0.0317, "step": 113520 }, { "epoch": 0.9186018286269115, "grad_norm": 0.6752405166625977, "learning_rate": 6.556684530491906e-06, "loss": 0.0183, "step": 113530 }, { "epoch": 0.9186827413221135, "grad_norm": 0.6522656083106995, "learning_rate": 6.556013511779299e-06, "loss": 0.0298, "step": 113540 }, { "epoch": 0.9187636540173153, "grad_norm": 0.4604284167289734, "learning_rate": 6.5553424620352855e-06, "loss": 0.0257, "step": 113550 }, { "epoch": 0.9188445667125172, "grad_norm": 0.21756090223789215, "learning_rate": 6.55467138127325e-06, "loss": 0.0227, "step": 113560 }, { "epoch": 0.9189254794077191, "grad_norm": 0.5579298734664917, "learning_rate": 6.554000269506578e-06, "loss": 0.0264, "step": 113570 }, { "epoch": 0.9190063921029209, "grad_norm": 0.4444356858730316, "learning_rate": 6.553329126748649e-06, "loss": 0.0221, "step": 113580 }, { "epoch": 0.9190873047981228, "grad_norm": 0.3635611832141876, "learning_rate": 6.5526579530128506e-06, "loss": 0.0264, "step": 113590 }, { "epoch": 0.9191682174933247, "grad_norm": 0.43079623579978943, "learning_rate": 6.551986748312567e-06, "loss": 0.0322, "step": 113600 }, { "epoch": 0.9192491301885266, "grad_norm": 0.3410833179950714, "learning_rate": 6.551315512661185e-06, "loss": 0.0267, "step": 113610 }, { "epoch": 0.9193300428837284, "grad_norm": 0.6306021213531494, "learning_rate": 6.550644246072089e-06, "loss": 0.0288, "step": 113620 }, { "epoch": 0.9194109555789304, "grad_norm": 0.39849284291267395, "learning_rate": 6.549972948558666e-06, "loss": 0.021, "step": 113630 }, { "epoch": 0.9194918682741322, "grad_norm": 0.25498977303504944, "learning_rate": 6.549301620134306e-06, "loss": 0.039, "step": 113640 }, { "epoch": 0.919572780969334, "grad_norm": 0.5196565389633179, "learning_rate": 6.5486302608123964e-06, "loss": 0.0344, "step": 113650 }, { "epoch": 0.919653693664536, "grad_norm": 0.34876134991645813, "learning_rate": 6.5479588706063256e-06, "loss": 0.0253, "step": 113660 }, { "epoch": 0.9197346063597378, "grad_norm": 0.19881555438041687, "learning_rate": 6.5472874495294825e-06, "loss": 0.0366, "step": 113670 }, { "epoch": 0.9198155190549397, "grad_norm": 0.5562601685523987, "learning_rate": 6.546615997595257e-06, "loss": 0.0219, "step": 113680 }, { "epoch": 0.9198964317501416, "grad_norm": 0.36838552355766296, "learning_rate": 6.545944514817042e-06, "loss": 0.0344, "step": 113690 }, { "epoch": 0.9199773444453435, "grad_norm": 0.41426706314086914, "learning_rate": 6.545273001208226e-06, "loss": 0.021, "step": 113700 }, { "epoch": 0.9200582571405453, "grad_norm": 0.3551033139228821, "learning_rate": 6.544601456782202e-06, "loss": 0.021, "step": 113710 }, { "epoch": 0.9201391698357473, "grad_norm": 0.25473371148109436, "learning_rate": 6.543929881552364e-06, "loss": 0.0205, "step": 113720 }, { "epoch": 0.9202200825309491, "grad_norm": 0.22148747742176056, "learning_rate": 6.543258275532104e-06, "loss": 0.0443, "step": 113730 }, { "epoch": 0.9203009952261509, "grad_norm": 0.5110005140304565, "learning_rate": 6.542586638734814e-06, "loss": 0.0278, "step": 113740 }, { "epoch": 0.9203819079213529, "grad_norm": 0.1960798054933548, "learning_rate": 6.54191497117389e-06, "loss": 0.0217, "step": 113750 }, { "epoch": 0.9204628206165547, "grad_norm": 0.35281819105148315, "learning_rate": 6.54124327286273e-06, "loss": 0.0288, "step": 113760 }, { "epoch": 0.9205437333117567, "grad_norm": 0.38158366084098816, "learning_rate": 6.540571543814724e-06, "loss": 0.0306, "step": 113770 }, { "epoch": 0.9206246460069585, "grad_norm": 0.14072772860527039, "learning_rate": 6.539899784043271e-06, "loss": 0.0303, "step": 113780 }, { "epoch": 0.9207055587021604, "grad_norm": 0.3922027051448822, "learning_rate": 6.5392279935617686e-06, "loss": 0.0201, "step": 113790 }, { "epoch": 0.9207864713973622, "grad_norm": 0.22562432289123535, "learning_rate": 6.538556172383613e-06, "loss": 0.0289, "step": 113800 }, { "epoch": 0.9208673840925641, "grad_norm": 0.5598754286766052, "learning_rate": 6.537884320522203e-06, "loss": 0.029, "step": 113810 }, { "epoch": 0.920948296787766, "grad_norm": 0.40446797013282776, "learning_rate": 6.537212437990935e-06, "loss": 0.0186, "step": 113820 }, { "epoch": 0.9210292094829678, "grad_norm": 0.37431496381759644, "learning_rate": 6.536540524803212e-06, "loss": 0.0173, "step": 113830 }, { "epoch": 0.9211101221781698, "grad_norm": 0.4494782090187073, "learning_rate": 6.535868580972431e-06, "loss": 0.0364, "step": 113840 }, { "epoch": 0.9211910348733716, "grad_norm": 0.35987499356269836, "learning_rate": 6.5351966065119945e-06, "loss": 0.035, "step": 113850 }, { "epoch": 0.9212719475685736, "grad_norm": 0.36146003007888794, "learning_rate": 6.534524601435302e-06, "loss": 0.0236, "step": 113860 }, { "epoch": 0.9213528602637754, "grad_norm": 0.38188445568084717, "learning_rate": 6.533852565755756e-06, "loss": 0.0175, "step": 113870 }, { "epoch": 0.9214337729589772, "grad_norm": 0.42164236307144165, "learning_rate": 6.5331804994867586e-06, "loss": 0.0285, "step": 113880 }, { "epoch": 0.9215146856541792, "grad_norm": 0.3606167435646057, "learning_rate": 6.532508402641713e-06, "loss": 0.0248, "step": 113890 }, { "epoch": 0.921595598349381, "grad_norm": 0.3186030089855194, "learning_rate": 6.531836275234023e-06, "loss": 0.0267, "step": 113900 }, { "epoch": 0.9216765110445829, "grad_norm": 0.5893025994300842, "learning_rate": 6.531164117277091e-06, "loss": 0.0209, "step": 113910 }, { "epoch": 0.9217574237397848, "grad_norm": 0.027033481746912003, "learning_rate": 6.530491928784323e-06, "loss": 0.0281, "step": 113920 }, { "epoch": 0.9218383364349867, "grad_norm": 0.39247533679008484, "learning_rate": 6.529819709769125e-06, "loss": 0.0361, "step": 113930 }, { "epoch": 0.9219192491301885, "grad_norm": 0.4037855267524719, "learning_rate": 6.529147460244905e-06, "loss": 0.0179, "step": 113940 }, { "epoch": 0.9220001618253904, "grad_norm": 0.5062001943588257, "learning_rate": 6.528475180225063e-06, "loss": 0.0296, "step": 113950 }, { "epoch": 0.9220810745205923, "grad_norm": 0.3744424879550934, "learning_rate": 6.527802869723013e-06, "loss": 0.0327, "step": 113960 }, { "epoch": 0.9221619872157941, "grad_norm": 0.3365252912044525, "learning_rate": 6.527130528752159e-06, "loss": 0.0304, "step": 113970 }, { "epoch": 0.9222428999109961, "grad_norm": 0.6505987048149109, "learning_rate": 6.526458157325911e-06, "loss": 0.0234, "step": 113980 }, { "epoch": 0.9223238126061979, "grad_norm": 0.3631609380245209, "learning_rate": 6.525785755457678e-06, "loss": 0.0325, "step": 113990 }, { "epoch": 0.9224047253013998, "grad_norm": 0.33021876215934753, "learning_rate": 6.525113323160867e-06, "loss": 0.0183, "step": 114000 }, { "epoch": 0.9224856379966017, "grad_norm": 0.6177461743354797, "learning_rate": 6.524440860448893e-06, "loss": 0.0266, "step": 114010 }, { "epoch": 0.9225665506918035, "grad_norm": 0.8145748376846313, "learning_rate": 6.5237683673351636e-06, "loss": 0.0252, "step": 114020 }, { "epoch": 0.9226474633870054, "grad_norm": 0.2154068648815155, "learning_rate": 6.52309584383309e-06, "loss": 0.0243, "step": 114030 }, { "epoch": 0.9227283760822073, "grad_norm": 0.7550851106643677, "learning_rate": 6.522423289956086e-06, "loss": 0.0281, "step": 114040 }, { "epoch": 0.9228092887774092, "grad_norm": 0.2897433340549469, "learning_rate": 6.521750705717563e-06, "loss": 0.0402, "step": 114050 }, { "epoch": 0.922890201472611, "grad_norm": 0.33157220482826233, "learning_rate": 6.521078091130935e-06, "loss": 0.0325, "step": 114060 }, { "epoch": 0.922971114167813, "grad_norm": 0.33299708366394043, "learning_rate": 6.520405446209615e-06, "loss": 0.0185, "step": 114070 }, { "epoch": 0.9230520268630148, "grad_norm": 0.4314892292022705, "learning_rate": 6.51973277096702e-06, "loss": 0.0254, "step": 114080 }, { "epoch": 0.9231329395582167, "grad_norm": 0.525558590888977, "learning_rate": 6.519060065416562e-06, "loss": 0.0275, "step": 114090 }, { "epoch": 0.9232138522534186, "grad_norm": 0.41410189867019653, "learning_rate": 6.518387329571657e-06, "loss": 0.022, "step": 114100 }, { "epoch": 0.9232947649486204, "grad_norm": 0.5309212803840637, "learning_rate": 6.517714563445723e-06, "loss": 0.0323, "step": 114110 }, { "epoch": 0.9233756776438223, "grad_norm": 0.5245781540870667, "learning_rate": 6.5170417670521756e-06, "loss": 0.0378, "step": 114120 }, { "epoch": 0.9234565903390242, "grad_norm": 0.2549930810928345, "learning_rate": 6.516368940404434e-06, "loss": 0.0259, "step": 114130 }, { "epoch": 0.9235375030342261, "grad_norm": 0.5202728509902954, "learning_rate": 6.515696083515913e-06, "loss": 0.0281, "step": 114140 }, { "epoch": 0.9236184157294279, "grad_norm": 0.3592221140861511, "learning_rate": 6.515023196400035e-06, "loss": 0.0491, "step": 114150 }, { "epoch": 0.9236993284246299, "grad_norm": 0.24683018028736115, "learning_rate": 6.514350279070217e-06, "loss": 0.0178, "step": 114160 }, { "epoch": 0.9237802411198317, "grad_norm": 0.9751449227333069, "learning_rate": 6.51367733153988e-06, "loss": 0.0355, "step": 114170 }, { "epoch": 0.9238611538150335, "grad_norm": 0.19003336131572723, "learning_rate": 6.5130043538224454e-06, "loss": 0.0176, "step": 114180 }, { "epoch": 0.9239420665102355, "grad_norm": 0.5798590183258057, "learning_rate": 6.51233134593133e-06, "loss": 0.0288, "step": 114190 }, { "epoch": 0.9240229792054373, "grad_norm": 0.6862103343009949, "learning_rate": 6.5116583078799625e-06, "loss": 0.0394, "step": 114200 }, { "epoch": 0.9241038919006392, "grad_norm": 0.4652461111545563, "learning_rate": 6.5109852396817585e-06, "loss": 0.0322, "step": 114210 }, { "epoch": 0.9241848045958411, "grad_norm": 0.4135796129703522, "learning_rate": 6.510312141350143e-06, "loss": 0.0261, "step": 114220 }, { "epoch": 0.924265717291043, "grad_norm": 0.3681754469871521, "learning_rate": 6.509639012898542e-06, "loss": 0.0282, "step": 114230 }, { "epoch": 0.9243466299862448, "grad_norm": 0.2557118237018585, "learning_rate": 6.508965854340379e-06, "loss": 0.0251, "step": 114240 }, { "epoch": 0.9244275426814467, "grad_norm": 0.4287669062614441, "learning_rate": 6.508292665689074e-06, "loss": 0.0336, "step": 114250 }, { "epoch": 0.9245084553766486, "grad_norm": 0.2216467559337616, "learning_rate": 6.507619446958059e-06, "loss": 0.0227, "step": 114260 }, { "epoch": 0.9245893680718504, "grad_norm": 0.31470537185668945, "learning_rate": 6.506946198160755e-06, "loss": 0.0276, "step": 114270 }, { "epoch": 0.9246702807670524, "grad_norm": 0.9755870699882507, "learning_rate": 6.506272919310592e-06, "loss": 0.0423, "step": 114280 }, { "epoch": 0.9247511934622542, "grad_norm": 0.6295934915542603, "learning_rate": 6.505599610420995e-06, "loss": 0.0353, "step": 114290 }, { "epoch": 0.9248321061574561, "grad_norm": 0.3314964473247528, "learning_rate": 6.504926271505391e-06, "loss": 0.0434, "step": 114300 }, { "epoch": 0.924913018852658, "grad_norm": 0.6590321660041809, "learning_rate": 6.504252902577212e-06, "loss": 0.0209, "step": 114310 }, { "epoch": 0.9249939315478598, "grad_norm": 0.39576414227485657, "learning_rate": 6.503579503649882e-06, "loss": 0.0441, "step": 114320 }, { "epoch": 0.9250748442430617, "grad_norm": 0.23947137594223022, "learning_rate": 6.502906074736835e-06, "loss": 0.0288, "step": 114330 }, { "epoch": 0.9251557569382636, "grad_norm": 0.4686635434627533, "learning_rate": 6.502232615851497e-06, "loss": 0.02, "step": 114340 }, { "epoch": 0.9252366696334655, "grad_norm": 0.2210606187582016, "learning_rate": 6.501559127007303e-06, "loss": 0.0304, "step": 114350 }, { "epoch": 0.9253175823286673, "grad_norm": 0.27293720841407776, "learning_rate": 6.50088560821768e-06, "loss": 0.0262, "step": 114360 }, { "epoch": 0.9253984950238693, "grad_norm": 0.6829143166542053, "learning_rate": 6.500212059496064e-06, "loss": 0.0337, "step": 114370 }, { "epoch": 0.9254794077190711, "grad_norm": 0.14523547887802124, "learning_rate": 6.4995384808558845e-06, "loss": 0.0275, "step": 114380 }, { "epoch": 0.925560320414273, "grad_norm": 0.04767376929521561, "learning_rate": 6.498864872310577e-06, "loss": 0.0219, "step": 114390 }, { "epoch": 0.9256412331094749, "grad_norm": 0.4918143153190613, "learning_rate": 6.498191233873572e-06, "loss": 0.017, "step": 114400 }, { "epoch": 0.9257221458046767, "grad_norm": 0.485821396112442, "learning_rate": 6.497517565558306e-06, "loss": 0.0223, "step": 114410 }, { "epoch": 0.9258030584998787, "grad_norm": 0.5561384558677673, "learning_rate": 6.496843867378214e-06, "loss": 0.041, "step": 114420 }, { "epoch": 0.9258839711950805, "grad_norm": 0.45255714654922485, "learning_rate": 6.496170139346731e-06, "loss": 0.0434, "step": 114430 }, { "epoch": 0.9259648838902824, "grad_norm": 0.7996748089790344, "learning_rate": 6.495496381477293e-06, "loss": 0.0279, "step": 114440 }, { "epoch": 0.9260457965854842, "grad_norm": 0.4138067960739136, "learning_rate": 6.494822593783336e-06, "loss": 0.0233, "step": 114450 }, { "epoch": 0.9261267092806862, "grad_norm": 0.4011629521846771, "learning_rate": 6.4941487762782996e-06, "loss": 0.0209, "step": 114460 }, { "epoch": 0.926207621975888, "grad_norm": 0.5689182877540588, "learning_rate": 6.4934749289756204e-06, "loss": 0.0224, "step": 114470 }, { "epoch": 0.9262885346710898, "grad_norm": 0.3023223280906677, "learning_rate": 6.492801051888735e-06, "loss": 0.0276, "step": 114480 }, { "epoch": 0.9263694473662918, "grad_norm": 0.5848714113235474, "learning_rate": 6.492127145031085e-06, "loss": 0.0265, "step": 114490 }, { "epoch": 0.9264503600614936, "grad_norm": 0.2929220199584961, "learning_rate": 6.49145320841611e-06, "loss": 0.0258, "step": 114500 }, { "epoch": 0.9265312727566956, "grad_norm": 0.9220371842384338, "learning_rate": 6.490779242057248e-06, "loss": 0.0383, "step": 114510 }, { "epoch": 0.9266121854518974, "grad_norm": 0.41654250025749207, "learning_rate": 6.49010524596794e-06, "loss": 0.0249, "step": 114520 }, { "epoch": 0.9266930981470993, "grad_norm": 0.20950807631015778, "learning_rate": 6.48943122016163e-06, "loss": 0.0256, "step": 114530 }, { "epoch": 0.9267740108423012, "grad_norm": 0.25626853108406067, "learning_rate": 6.48875716465176e-06, "loss": 0.0348, "step": 114540 }, { "epoch": 0.926854923537503, "grad_norm": 0.15880341827869415, "learning_rate": 6.488083079451769e-06, "loss": 0.0211, "step": 114550 }, { "epoch": 0.9269358362327049, "grad_norm": 0.18352991342544556, "learning_rate": 6.487408964575104e-06, "loss": 0.0236, "step": 114560 }, { "epoch": 0.9270167489279068, "grad_norm": 0.38020986318588257, "learning_rate": 6.486734820035207e-06, "loss": 0.0245, "step": 114570 }, { "epoch": 0.9270976616231087, "grad_norm": 0.29714205861091614, "learning_rate": 6.486060645845523e-06, "loss": 0.0164, "step": 114580 }, { "epoch": 0.9271785743183105, "grad_norm": 0.3494773805141449, "learning_rate": 6.485386442019496e-06, "loss": 0.0318, "step": 114590 }, { "epoch": 0.9272594870135125, "grad_norm": 0.36955392360687256, "learning_rate": 6.484712208570571e-06, "loss": 0.0415, "step": 114600 }, { "epoch": 0.9273403997087143, "grad_norm": 1.1008822917938232, "learning_rate": 6.4840379455121974e-06, "loss": 0.0488, "step": 114610 }, { "epoch": 0.9274213124039161, "grad_norm": 0.5637485384941101, "learning_rate": 6.483363652857819e-06, "loss": 0.0289, "step": 114620 }, { "epoch": 0.9275022250991181, "grad_norm": 0.6196892857551575, "learning_rate": 6.4826893306208834e-06, "loss": 0.0237, "step": 114630 }, { "epoch": 0.9275831377943199, "grad_norm": 0.40471118688583374, "learning_rate": 6.4820149788148405e-06, "loss": 0.0334, "step": 114640 }, { "epoch": 0.9276640504895218, "grad_norm": 0.26439958810806274, "learning_rate": 6.481340597453136e-06, "loss": 0.0187, "step": 114650 }, { "epoch": 0.9277449631847237, "grad_norm": 0.2608315050601959, "learning_rate": 6.4806661865492225e-06, "loss": 0.0256, "step": 114660 }, { "epoch": 0.9278258758799256, "grad_norm": 0.6784178614616394, "learning_rate": 6.479991746116546e-06, "loss": 0.0292, "step": 114670 }, { "epoch": 0.9279067885751274, "grad_norm": 0.39313656091690063, "learning_rate": 6.47931727616856e-06, "loss": 0.0136, "step": 114680 }, { "epoch": 0.9279877012703294, "grad_norm": 0.31560418009757996, "learning_rate": 6.478642776718713e-06, "loss": 0.0234, "step": 114690 }, { "epoch": 0.9280686139655312, "grad_norm": 0.39605438709259033, "learning_rate": 6.477968247780457e-06, "loss": 0.015, "step": 114700 }, { "epoch": 0.928149526660733, "grad_norm": 0.18652674555778503, "learning_rate": 6.477293689367246e-06, "loss": 0.0149, "step": 114710 }, { "epoch": 0.928230439355935, "grad_norm": 0.547528862953186, "learning_rate": 6.47661910149253e-06, "loss": 0.0352, "step": 114720 }, { "epoch": 0.9283113520511368, "grad_norm": 0.342536598443985, "learning_rate": 6.4759444841697625e-06, "loss": 0.021, "step": 114730 }, { "epoch": 0.9283922647463387, "grad_norm": 0.08714987337589264, "learning_rate": 6.475269837412399e-06, "loss": 0.0239, "step": 114740 }, { "epoch": 0.9284731774415406, "grad_norm": 0.1445925384759903, "learning_rate": 6.474595161233893e-06, "loss": 0.0264, "step": 114750 }, { "epoch": 0.9285540901367425, "grad_norm": 0.6644790768623352, "learning_rate": 6.4739204556477e-06, "loss": 0.0427, "step": 114760 }, { "epoch": 0.9286350028319443, "grad_norm": 0.2284337729215622, "learning_rate": 6.473245720667274e-06, "loss": 0.0276, "step": 114770 }, { "epoch": 0.9287159155271462, "grad_norm": 0.28702473640441895, "learning_rate": 6.472570956306073e-06, "loss": 0.0238, "step": 114780 }, { "epoch": 0.9287968282223481, "grad_norm": 0.35360610485076904, "learning_rate": 6.471896162577554e-06, "loss": 0.0327, "step": 114790 }, { "epoch": 0.9288777409175499, "grad_norm": 0.3750341236591339, "learning_rate": 6.471221339495172e-06, "loss": 0.0336, "step": 114800 }, { "epoch": 0.9289586536127519, "grad_norm": 0.5061252117156982, "learning_rate": 6.4705464870723865e-06, "loss": 0.0322, "step": 114810 }, { "epoch": 0.9290395663079537, "grad_norm": 0.5670151114463806, "learning_rate": 6.469871605322655e-06, "loss": 0.0313, "step": 114820 }, { "epoch": 0.9291204790031556, "grad_norm": 0.515342652797699, "learning_rate": 6.4691966942594385e-06, "loss": 0.0286, "step": 114830 }, { "epoch": 0.9292013916983575, "grad_norm": 0.4670683443546295, "learning_rate": 6.468521753896195e-06, "loss": 0.0399, "step": 114840 }, { "epoch": 0.9292823043935593, "grad_norm": 0.5548128485679626, "learning_rate": 6.467846784246385e-06, "loss": 0.039, "step": 114850 }, { "epoch": 0.9293632170887612, "grad_norm": 0.45319947600364685, "learning_rate": 6.46717178532347e-06, "loss": 0.023, "step": 114860 }, { "epoch": 0.9294441297839631, "grad_norm": 0.48875510692596436, "learning_rate": 6.466496757140912e-06, "loss": 0.0255, "step": 114870 }, { "epoch": 0.929525042479165, "grad_norm": 0.29642102122306824, "learning_rate": 6.465821699712172e-06, "loss": 0.0263, "step": 114880 }, { "epoch": 0.9296059551743668, "grad_norm": 0.419932097196579, "learning_rate": 6.465146613050712e-06, "loss": 0.0454, "step": 114890 }, { "epoch": 0.9296868678695688, "grad_norm": 0.7362931370735168, "learning_rate": 6.464471497169995e-06, "loss": 0.0376, "step": 114900 }, { "epoch": 0.9297677805647706, "grad_norm": 0.6506897807121277, "learning_rate": 6.463796352083487e-06, "loss": 0.03, "step": 114910 }, { "epoch": 0.9298486932599724, "grad_norm": 0.2979544997215271, "learning_rate": 6.46312117780465e-06, "loss": 0.0257, "step": 114920 }, { "epoch": 0.9299296059551744, "grad_norm": 0.298645555973053, "learning_rate": 6.46244597434695e-06, "loss": 0.0238, "step": 114930 }, { "epoch": 0.9300105186503762, "grad_norm": 0.37658071517944336, "learning_rate": 6.461770741723854e-06, "loss": 0.0306, "step": 114940 }, { "epoch": 0.9300914313455781, "grad_norm": 0.254512220621109, "learning_rate": 6.461095479948824e-06, "loss": 0.0242, "step": 114950 }, { "epoch": 0.93017234404078, "grad_norm": 0.32710644602775574, "learning_rate": 6.46042018903533e-06, "loss": 0.0187, "step": 114960 }, { "epoch": 0.9302532567359819, "grad_norm": 0.030345717445015907, "learning_rate": 6.45974486899684e-06, "loss": 0.0139, "step": 114970 }, { "epoch": 0.9303341694311837, "grad_norm": 0.38812190294265747, "learning_rate": 6.459069519846819e-06, "loss": 0.0175, "step": 114980 }, { "epoch": 0.9304150821263857, "grad_norm": 0.6736276745796204, "learning_rate": 6.458394141598736e-06, "loss": 0.0337, "step": 114990 }, { "epoch": 0.9304959948215875, "grad_norm": 0.3066943883895874, "learning_rate": 6.457718734266062e-06, "loss": 0.0123, "step": 115000 }, { "epoch": 0.9305769075167893, "grad_norm": 0.18943163752555847, "learning_rate": 6.457043297862264e-06, "loss": 0.0278, "step": 115010 }, { "epoch": 0.9306578202119913, "grad_norm": 0.5815531611442566, "learning_rate": 6.456367832400815e-06, "loss": 0.0337, "step": 115020 }, { "epoch": 0.9307387329071931, "grad_norm": 0.4652842879295349, "learning_rate": 6.455692337895182e-06, "loss": 0.0328, "step": 115030 }, { "epoch": 0.930819645602395, "grad_norm": 0.4397602379322052, "learning_rate": 6.45501681435884e-06, "loss": 0.0221, "step": 115040 }, { "epoch": 0.9309005582975969, "grad_norm": 0.15681234002113342, "learning_rate": 6.454341261805259e-06, "loss": 0.0404, "step": 115050 }, { "epoch": 0.9309814709927988, "grad_norm": 0.18190641701221466, "learning_rate": 6.453665680247911e-06, "loss": 0.0219, "step": 115060 }, { "epoch": 0.9310623836880006, "grad_norm": 0.28289395570755005, "learning_rate": 6.452990069700271e-06, "loss": 0.0202, "step": 115070 }, { "epoch": 0.9311432963832025, "grad_norm": 0.5005539655685425, "learning_rate": 6.452314430175811e-06, "loss": 0.0327, "step": 115080 }, { "epoch": 0.9312242090784044, "grad_norm": 0.677975058555603, "learning_rate": 6.451638761688006e-06, "loss": 0.0271, "step": 115090 }, { "epoch": 0.9313051217736062, "grad_norm": 0.5498334169387817, "learning_rate": 6.45096306425033e-06, "loss": 0.032, "step": 115100 }, { "epoch": 0.9313860344688082, "grad_norm": 0.5489168763160706, "learning_rate": 6.450287337876257e-06, "loss": 0.0396, "step": 115110 }, { "epoch": 0.93146694716401, "grad_norm": 0.2851606011390686, "learning_rate": 6.449611582579267e-06, "loss": 0.0209, "step": 115120 }, { "epoch": 0.931547859859212, "grad_norm": 0.9424200654029846, "learning_rate": 6.448935798372835e-06, "loss": 0.035, "step": 115130 }, { "epoch": 0.9316287725544138, "grad_norm": 0.4115005135536194, "learning_rate": 6.448259985270435e-06, "loss": 0.0239, "step": 115140 }, { "epoch": 0.9317096852496156, "grad_norm": 0.06839628517627716, "learning_rate": 6.447584143285548e-06, "loss": 0.021, "step": 115150 }, { "epoch": 0.9317905979448176, "grad_norm": 0.6011661887168884, "learning_rate": 6.446908272431653e-06, "loss": 0.0399, "step": 115160 }, { "epoch": 0.9318715106400194, "grad_norm": 0.5129987597465515, "learning_rate": 6.4462323727222255e-06, "loss": 0.0456, "step": 115170 }, { "epoch": 0.9319524233352213, "grad_norm": 0.31246015429496765, "learning_rate": 6.445556444170746e-06, "loss": 0.0261, "step": 115180 }, { "epoch": 0.9320333360304232, "grad_norm": 0.5269485712051392, "learning_rate": 6.444880486790695e-06, "loss": 0.0339, "step": 115190 }, { "epoch": 0.9321142487256251, "grad_norm": 0.2455223947763443, "learning_rate": 6.444204500595553e-06, "loss": 0.0299, "step": 115200 }, { "epoch": 0.9321951614208269, "grad_norm": 0.6320953965187073, "learning_rate": 6.4435284855988024e-06, "loss": 0.0287, "step": 115210 }, { "epoch": 0.9322760741160288, "grad_norm": 0.472856342792511, "learning_rate": 6.442852441813922e-06, "loss": 0.031, "step": 115220 }, { "epoch": 0.9323569868112307, "grad_norm": 0.651383638381958, "learning_rate": 6.442176369254396e-06, "loss": 0.0362, "step": 115230 }, { "epoch": 0.9324378995064325, "grad_norm": 0.6525214910507202, "learning_rate": 6.441500267933708e-06, "loss": 0.0373, "step": 115240 }, { "epoch": 0.9325188122016345, "grad_norm": 0.6199836134910583, "learning_rate": 6.440824137865338e-06, "loss": 0.0255, "step": 115250 }, { "epoch": 0.9325997248968363, "grad_norm": 0.08670413494110107, "learning_rate": 6.4401479790627745e-06, "loss": 0.025, "step": 115260 }, { "epoch": 0.9326806375920382, "grad_norm": 0.4216073155403137, "learning_rate": 6.439471791539499e-06, "loss": 0.015, "step": 115270 }, { "epoch": 0.9327615502872401, "grad_norm": 0.5171701908111572, "learning_rate": 6.4387955753089974e-06, "loss": 0.0371, "step": 115280 }, { "epoch": 0.932842462982442, "grad_norm": 0.27773016691207886, "learning_rate": 6.438119330384755e-06, "loss": 0.0283, "step": 115290 }, { "epoch": 0.9329233756776438, "grad_norm": 0.28005003929138184, "learning_rate": 6.4374430567802595e-06, "loss": 0.0155, "step": 115300 }, { "epoch": 0.9330042883728457, "grad_norm": 0.42906466126441956, "learning_rate": 6.436766754508998e-06, "loss": 0.0226, "step": 115310 }, { "epoch": 0.9330852010680476, "grad_norm": 0.13582246005535126, "learning_rate": 6.436090423584454e-06, "loss": 0.0178, "step": 115320 }, { "epoch": 0.9331661137632494, "grad_norm": 0.41609206795692444, "learning_rate": 6.435414064020119e-06, "loss": 0.0177, "step": 115330 }, { "epoch": 0.9332470264584514, "grad_norm": 0.18711179494857788, "learning_rate": 6.434737675829481e-06, "loss": 0.0319, "step": 115340 }, { "epoch": 0.9333279391536532, "grad_norm": 0.6204723119735718, "learning_rate": 6.4340612590260294e-06, "loss": 0.0291, "step": 115350 }, { "epoch": 0.9334088518488551, "grad_norm": 0.40620192885398865, "learning_rate": 6.433384813623254e-06, "loss": 0.0234, "step": 115360 }, { "epoch": 0.933489764544057, "grad_norm": 0.3482615649700165, "learning_rate": 6.432708339634644e-06, "loss": 0.0214, "step": 115370 }, { "epoch": 0.9335706772392588, "grad_norm": 0.6024184226989746, "learning_rate": 6.432031837073689e-06, "loss": 0.0334, "step": 115380 }, { "epoch": 0.9336515899344607, "grad_norm": 0.3674362897872925, "learning_rate": 6.4313553059538835e-06, "loss": 0.0236, "step": 115390 }, { "epoch": 0.9337325026296626, "grad_norm": 1.1436524391174316, "learning_rate": 6.430678746288719e-06, "loss": 0.0307, "step": 115400 }, { "epoch": 0.9338134153248645, "grad_norm": 0.20603208243846893, "learning_rate": 6.430002158091686e-06, "loss": 0.0267, "step": 115410 }, { "epoch": 0.9338943280200663, "grad_norm": 0.46481233835220337, "learning_rate": 6.42932554137628e-06, "loss": 0.026, "step": 115420 }, { "epoch": 0.9339752407152683, "grad_norm": 0.4265654683113098, "learning_rate": 6.428648896155992e-06, "loss": 0.0263, "step": 115430 }, { "epoch": 0.9340561534104701, "grad_norm": 0.3342074155807495, "learning_rate": 6.427972222444318e-06, "loss": 0.0221, "step": 115440 }, { "epoch": 0.9341370661056719, "grad_norm": 0.2940434217453003, "learning_rate": 6.427295520254754e-06, "loss": 0.0218, "step": 115450 }, { "epoch": 0.9342179788008739, "grad_norm": 0.21020127832889557, "learning_rate": 6.426618789600793e-06, "loss": 0.0265, "step": 115460 }, { "epoch": 0.9342988914960757, "grad_norm": 0.42742758989334106, "learning_rate": 6.425942030495932e-06, "loss": 0.0222, "step": 115470 }, { "epoch": 0.9343798041912776, "grad_norm": 0.47803959250450134, "learning_rate": 6.425265242953668e-06, "loss": 0.0331, "step": 115480 }, { "epoch": 0.9344607168864795, "grad_norm": 0.25252240896224976, "learning_rate": 6.424588426987497e-06, "loss": 0.0199, "step": 115490 }, { "epoch": 0.9345416295816814, "grad_norm": 0.4258207082748413, "learning_rate": 6.423911582610918e-06, "loss": 0.0284, "step": 115500 }, { "epoch": 0.9346225422768832, "grad_norm": 4.157339096069336, "learning_rate": 6.423234709837427e-06, "loss": 0.0342, "step": 115510 }, { "epoch": 0.9347034549720851, "grad_norm": 0.2666739523410797, "learning_rate": 6.422557808680525e-06, "loss": 0.023, "step": 115520 }, { "epoch": 0.934784367667287, "grad_norm": 0.20137107372283936, "learning_rate": 6.421880879153712e-06, "loss": 0.0328, "step": 115530 }, { "epoch": 0.9348652803624888, "grad_norm": 0.5253541469573975, "learning_rate": 6.421203921270486e-06, "loss": 0.0342, "step": 115540 }, { "epoch": 0.9349461930576908, "grad_norm": 0.5778594017028809, "learning_rate": 6.420526935044347e-06, "loss": 0.0247, "step": 115550 }, { "epoch": 0.9350271057528926, "grad_norm": 0.7596262693405151, "learning_rate": 6.419849920488796e-06, "loss": 0.0383, "step": 115560 }, { "epoch": 0.9351080184480945, "grad_norm": 0.42038413882255554, "learning_rate": 6.419172877617339e-06, "loss": 0.0181, "step": 115570 }, { "epoch": 0.9351889311432964, "grad_norm": 0.3668648898601532, "learning_rate": 6.418495806443473e-06, "loss": 0.0287, "step": 115580 }, { "epoch": 0.9352698438384982, "grad_norm": 0.47717422246932983, "learning_rate": 6.417818706980703e-06, "loss": 0.0316, "step": 115590 }, { "epoch": 0.9353507565337001, "grad_norm": 0.13618001341819763, "learning_rate": 6.417141579242532e-06, "loss": 0.0348, "step": 115600 }, { "epoch": 0.935431669228902, "grad_norm": 0.37016916275024414, "learning_rate": 6.416464423242465e-06, "loss": 0.0291, "step": 115610 }, { "epoch": 0.9355125819241039, "grad_norm": 0.4197920262813568, "learning_rate": 6.415787238994004e-06, "loss": 0.02, "step": 115620 }, { "epoch": 0.9355934946193057, "grad_norm": 0.3175155818462372, "learning_rate": 6.415110026510656e-06, "loss": 0.0273, "step": 115630 }, { "epoch": 0.9356744073145077, "grad_norm": 1.0033715963363647, "learning_rate": 6.414432785805926e-06, "loss": 0.0268, "step": 115640 }, { "epoch": 0.9357553200097095, "grad_norm": 0.6062372922897339, "learning_rate": 6.413755516893319e-06, "loss": 0.0228, "step": 115650 }, { "epoch": 0.9358362327049115, "grad_norm": 0.35480988025665283, "learning_rate": 6.413078219786344e-06, "loss": 0.0353, "step": 115660 }, { "epoch": 0.9359171454001133, "grad_norm": 0.24765652418136597, "learning_rate": 6.4124008944985076e-06, "loss": 0.021, "step": 115670 }, { "epoch": 0.9359980580953151, "grad_norm": 0.3241812586784363, "learning_rate": 6.411723541043316e-06, "loss": 0.0254, "step": 115680 }, { "epoch": 0.936078970790517, "grad_norm": 0.25536054372787476, "learning_rate": 6.411046159434278e-06, "loss": 0.0184, "step": 115690 }, { "epoch": 0.9361598834857189, "grad_norm": 0.39324378967285156, "learning_rate": 6.4103687496849054e-06, "loss": 0.0344, "step": 115700 }, { "epoch": 0.9362407961809208, "grad_norm": 0.6074846386909485, "learning_rate": 6.409691311808703e-06, "loss": 0.0339, "step": 115710 }, { "epoch": 0.9363217088761226, "grad_norm": 0.763431966304779, "learning_rate": 6.409013845819185e-06, "loss": 0.0435, "step": 115720 }, { "epoch": 0.9364026215713246, "grad_norm": 0.4202411472797394, "learning_rate": 6.40833635172986e-06, "loss": 0.0386, "step": 115730 }, { "epoch": 0.9364835342665264, "grad_norm": 0.8650287985801697, "learning_rate": 6.40765882955424e-06, "loss": 0.0299, "step": 115740 }, { "epoch": 0.9365644469617282, "grad_norm": 0.15541592240333557, "learning_rate": 6.406981279305835e-06, "loss": 0.0176, "step": 115750 }, { "epoch": 0.9366453596569302, "grad_norm": 0.2713872790336609, "learning_rate": 6.40630370099816e-06, "loss": 0.0253, "step": 115760 }, { "epoch": 0.936726272352132, "grad_norm": 0.33862701058387756, "learning_rate": 6.405626094644727e-06, "loss": 0.0257, "step": 115770 }, { "epoch": 0.936807185047334, "grad_norm": 0.6949165463447571, "learning_rate": 6.404948460259048e-06, "loss": 0.0275, "step": 115780 }, { "epoch": 0.9368880977425358, "grad_norm": 0.5528290271759033, "learning_rate": 6.404270797854638e-06, "loss": 0.0349, "step": 115790 }, { "epoch": 0.9369690104377377, "grad_norm": 0.33702531456947327, "learning_rate": 6.403593107445012e-06, "loss": 0.0323, "step": 115800 }, { "epoch": 0.9370499231329396, "grad_norm": 0.7140340805053711, "learning_rate": 6.402915389043683e-06, "loss": 0.0473, "step": 115810 }, { "epoch": 0.9371308358281414, "grad_norm": 0.6210941076278687, "learning_rate": 6.402237642664169e-06, "loss": 0.0248, "step": 115820 }, { "epoch": 0.9372117485233433, "grad_norm": 0.5220000147819519, "learning_rate": 6.4015598683199865e-06, "loss": 0.0305, "step": 115830 }, { "epoch": 0.9372926612185452, "grad_norm": 0.3191349506378174, "learning_rate": 6.4008820660246494e-06, "loss": 0.0177, "step": 115840 }, { "epoch": 0.9373735739137471, "grad_norm": 0.35545673966407776, "learning_rate": 6.400204235791678e-06, "loss": 0.0321, "step": 115850 }, { "epoch": 0.9374544866089489, "grad_norm": 0.18002404272556305, "learning_rate": 6.399526377634589e-06, "loss": 0.0185, "step": 115860 }, { "epoch": 0.9375353993041509, "grad_norm": 0.3287126123905182, "learning_rate": 6.3988484915669025e-06, "loss": 0.0184, "step": 115870 }, { "epoch": 0.9376163119993527, "grad_norm": 0.3920912742614746, "learning_rate": 6.398170577602134e-06, "loss": 0.0337, "step": 115880 }, { "epoch": 0.9376972246945545, "grad_norm": 0.38663434982299805, "learning_rate": 6.397492635753806e-06, "loss": 0.0246, "step": 115890 }, { "epoch": 0.9377781373897565, "grad_norm": 0.4323361814022064, "learning_rate": 6.396814666035437e-06, "loss": 0.0245, "step": 115900 }, { "epoch": 0.9378590500849583, "grad_norm": 0.3289196193218231, "learning_rate": 6.396136668460549e-06, "loss": 0.0331, "step": 115910 }, { "epoch": 0.9379399627801602, "grad_norm": 0.4376518726348877, "learning_rate": 6.39545864304266e-06, "loss": 0.0236, "step": 115920 }, { "epoch": 0.9380208754753621, "grad_norm": 0.5737155675888062, "learning_rate": 6.394780589795296e-06, "loss": 0.0317, "step": 115930 }, { "epoch": 0.938101788170564, "grad_norm": 0.35684913396835327, "learning_rate": 6.394102508731979e-06, "loss": 0.0281, "step": 115940 }, { "epoch": 0.9381827008657658, "grad_norm": 0.45387589931488037, "learning_rate": 6.393424399866228e-06, "loss": 0.0306, "step": 115950 }, { "epoch": 0.9382636135609678, "grad_norm": 0.44450101256370544, "learning_rate": 6.3927462632115715e-06, "loss": 0.0284, "step": 115960 }, { "epoch": 0.9383445262561696, "grad_norm": 0.18744318187236786, "learning_rate": 6.39206809878153e-06, "loss": 0.0167, "step": 115970 }, { "epoch": 0.9384254389513714, "grad_norm": 0.3185156285762787, "learning_rate": 6.391389906589629e-06, "loss": 0.02, "step": 115980 }, { "epoch": 0.9385063516465734, "grad_norm": 0.16204746067523956, "learning_rate": 6.390711686649394e-06, "loss": 0.0237, "step": 115990 }, { "epoch": 0.9385872643417752, "grad_norm": 0.7443621158599854, "learning_rate": 6.390033438974348e-06, "loss": 0.0251, "step": 116000 }, { "epoch": 0.9386681770369771, "grad_norm": 0.31285780668258667, "learning_rate": 6.389355163578024e-06, "loss": 0.0224, "step": 116010 }, { "epoch": 0.938749089732179, "grad_norm": 0.42701345682144165, "learning_rate": 6.388676860473941e-06, "loss": 0.0226, "step": 116020 }, { "epoch": 0.9388300024273809, "grad_norm": 0.5227283835411072, "learning_rate": 6.38799852967563e-06, "loss": 0.0314, "step": 116030 }, { "epoch": 0.9389109151225827, "grad_norm": 0.8014439344406128, "learning_rate": 6.387320171196618e-06, "loss": 0.0343, "step": 116040 }, { "epoch": 0.9389918278177846, "grad_norm": 0.2540060579776764, "learning_rate": 6.386641785050437e-06, "loss": 0.0275, "step": 116050 }, { "epoch": 0.9390727405129865, "grad_norm": 0.5516749024391174, "learning_rate": 6.38596337125061e-06, "loss": 0.0279, "step": 116060 }, { "epoch": 0.9391536532081883, "grad_norm": 0.3809971809387207, "learning_rate": 6.38528492981067e-06, "loss": 0.0372, "step": 116070 }, { "epoch": 0.9392345659033903, "grad_norm": 0.31977683305740356, "learning_rate": 6.384606460744147e-06, "loss": 0.0288, "step": 116080 }, { "epoch": 0.9393154785985921, "grad_norm": 0.35041624307632446, "learning_rate": 6.383927964064571e-06, "loss": 0.0217, "step": 116090 }, { "epoch": 0.939396391293794, "grad_norm": 0.3377957344055176, "learning_rate": 6.3832494397854736e-06, "loss": 0.0385, "step": 116100 }, { "epoch": 0.9394773039889959, "grad_norm": 0.7924112677574158, "learning_rate": 6.382570887920385e-06, "loss": 0.0268, "step": 116110 }, { "epoch": 0.9395582166841977, "grad_norm": 0.7094995975494385, "learning_rate": 6.38189230848284e-06, "loss": 0.0272, "step": 116120 }, { "epoch": 0.9396391293793996, "grad_norm": 0.2386271208524704, "learning_rate": 6.381213701486372e-06, "loss": 0.0366, "step": 116130 }, { "epoch": 0.9397200420746015, "grad_norm": 0.33958134055137634, "learning_rate": 6.380535066944509e-06, "loss": 0.0152, "step": 116140 }, { "epoch": 0.9398009547698034, "grad_norm": 0.21302106976509094, "learning_rate": 6.37985640487079e-06, "loss": 0.0227, "step": 116150 }, { "epoch": 0.9398818674650052, "grad_norm": 0.771236777305603, "learning_rate": 6.379177715278748e-06, "loss": 0.028, "step": 116160 }, { "epoch": 0.9399627801602072, "grad_norm": 0.39960142970085144, "learning_rate": 6.378498998181919e-06, "loss": 0.0198, "step": 116170 }, { "epoch": 0.940043692855409, "grad_norm": 0.41687506437301636, "learning_rate": 6.377820253593837e-06, "loss": 0.0215, "step": 116180 }, { "epoch": 0.9401246055506108, "grad_norm": 0.2111392468214035, "learning_rate": 6.377141481528037e-06, "loss": 0.0242, "step": 116190 }, { "epoch": 0.9402055182458128, "grad_norm": 0.4646829664707184, "learning_rate": 6.376462681998061e-06, "loss": 0.0266, "step": 116200 }, { "epoch": 0.9402864309410146, "grad_norm": 0.29561832547187805, "learning_rate": 6.375783855017441e-06, "loss": 0.0273, "step": 116210 }, { "epoch": 0.9403673436362165, "grad_norm": 0.0826735720038414, "learning_rate": 6.375105000599715e-06, "loss": 0.0261, "step": 116220 }, { "epoch": 0.9404482563314184, "grad_norm": 0.4944678544998169, "learning_rate": 6.3744261187584236e-06, "loss": 0.0327, "step": 116230 }, { "epoch": 0.9405291690266203, "grad_norm": 0.22936144471168518, "learning_rate": 6.373747209507106e-06, "loss": 0.0378, "step": 116240 }, { "epoch": 0.9406100817218221, "grad_norm": 0.5187225341796875, "learning_rate": 6.373068272859298e-06, "loss": 0.0197, "step": 116250 }, { "epoch": 0.9406909944170241, "grad_norm": 0.11800865828990936, "learning_rate": 6.372389308828544e-06, "loss": 0.0194, "step": 116260 }, { "epoch": 0.9407719071122259, "grad_norm": 0.4113437235355377, "learning_rate": 6.371710317428382e-06, "loss": 0.029, "step": 116270 }, { "epoch": 0.9408528198074277, "grad_norm": 0.5396967530250549, "learning_rate": 6.371031298672355e-06, "loss": 0.0383, "step": 116280 }, { "epoch": 0.9409337325026297, "grad_norm": 0.42214614152908325, "learning_rate": 6.370352252574001e-06, "loss": 0.028, "step": 116290 }, { "epoch": 0.9410146451978315, "grad_norm": 0.8384644985198975, "learning_rate": 6.369673179146863e-06, "loss": 0.0363, "step": 116300 }, { "epoch": 0.9410955578930335, "grad_norm": 0.5178996324539185, "learning_rate": 6.368994078404489e-06, "loss": 0.0217, "step": 116310 }, { "epoch": 0.9411764705882353, "grad_norm": 0.5985778570175171, "learning_rate": 6.368314950360416e-06, "loss": 0.0171, "step": 116320 }, { "epoch": 0.9412573832834372, "grad_norm": 0.20005634427070618, "learning_rate": 6.367635795028189e-06, "loss": 0.0234, "step": 116330 }, { "epoch": 0.941338295978639, "grad_norm": 0.42821401357650757, "learning_rate": 6.366956612421353e-06, "loss": 0.0303, "step": 116340 }, { "epoch": 0.9414192086738409, "grad_norm": 0.16579313576221466, "learning_rate": 6.366277402553456e-06, "loss": 0.0437, "step": 116350 }, { "epoch": 0.9415001213690428, "grad_norm": 0.3329183757305145, "learning_rate": 6.3655981654380385e-06, "loss": 0.0343, "step": 116360 }, { "epoch": 0.9415810340642446, "grad_norm": 0.46079158782958984, "learning_rate": 6.3649189010886484e-06, "loss": 0.0189, "step": 116370 }, { "epoch": 0.9416619467594466, "grad_norm": 0.15339583158493042, "learning_rate": 6.364239609518833e-06, "loss": 0.0217, "step": 116380 }, { "epoch": 0.9417428594546484, "grad_norm": 0.4655012786388397, "learning_rate": 6.363560290742139e-06, "loss": 0.0383, "step": 116390 }, { "epoch": 0.9418237721498504, "grad_norm": 0.2614424526691437, "learning_rate": 6.362880944772113e-06, "loss": 0.0257, "step": 116400 }, { "epoch": 0.9419046848450522, "grad_norm": 0.3880716562271118, "learning_rate": 6.362201571622303e-06, "loss": 0.0202, "step": 116410 }, { "epoch": 0.941985597540254, "grad_norm": 0.6899620890617371, "learning_rate": 6.36152217130626e-06, "loss": 0.0283, "step": 116420 }, { "epoch": 0.942066510235456, "grad_norm": 0.3503442108631134, "learning_rate": 6.360842743837531e-06, "loss": 0.0377, "step": 116430 }, { "epoch": 0.9421474229306578, "grad_norm": 0.16279304027557373, "learning_rate": 6.360163289229665e-06, "loss": 0.0253, "step": 116440 }, { "epoch": 0.9422283356258597, "grad_norm": 0.580896258354187, "learning_rate": 6.359483807496214e-06, "loss": 0.0304, "step": 116450 }, { "epoch": 0.9423092483210616, "grad_norm": 0.8427955508232117, "learning_rate": 6.358804298650729e-06, "loss": 0.0302, "step": 116460 }, { "epoch": 0.9423901610162635, "grad_norm": 0.2610500156879425, "learning_rate": 6.358124762706762e-06, "loss": 0.0119, "step": 116470 }, { "epoch": 0.9424710737114653, "grad_norm": 0.20934689044952393, "learning_rate": 6.357445199677863e-06, "loss": 0.0343, "step": 116480 }, { "epoch": 0.9425519864066672, "grad_norm": 0.21541571617126465, "learning_rate": 6.356765609577584e-06, "loss": 0.0235, "step": 116490 }, { "epoch": 0.9426328991018691, "grad_norm": 0.4171440601348877, "learning_rate": 6.35608599241948e-06, "loss": 0.0402, "step": 116500 }, { "epoch": 0.9427138117970709, "grad_norm": 0.2750205099582672, "learning_rate": 6.355406348217105e-06, "loss": 0.0185, "step": 116510 }, { "epoch": 0.9427947244922729, "grad_norm": 0.7606230974197388, "learning_rate": 6.3547266769840085e-06, "loss": 0.0129, "step": 116520 }, { "epoch": 0.9428756371874747, "grad_norm": 0.29436177015304565, "learning_rate": 6.354046978733752e-06, "loss": 0.0205, "step": 116530 }, { "epoch": 0.9429565498826766, "grad_norm": 0.4559485614299774, "learning_rate": 6.353367253479885e-06, "loss": 0.0334, "step": 116540 }, { "epoch": 0.9430374625778785, "grad_norm": 0.1911260187625885, "learning_rate": 6.352687501235966e-06, "loss": 0.0333, "step": 116550 }, { "epoch": 0.9431183752730804, "grad_norm": 0.7186287641525269, "learning_rate": 6.35200772201555e-06, "loss": 0.0366, "step": 116560 }, { "epoch": 0.9431992879682822, "grad_norm": 0.33562278747558594, "learning_rate": 6.351327915832194e-06, "loss": 0.0329, "step": 116570 }, { "epoch": 0.9432802006634841, "grad_norm": 0.3862907886505127, "learning_rate": 6.350648082699456e-06, "loss": 0.0241, "step": 116580 }, { "epoch": 0.943361113358686, "grad_norm": 0.21135683357715607, "learning_rate": 6.349968222630893e-06, "loss": 0.027, "step": 116590 }, { "epoch": 0.9434420260538878, "grad_norm": 0.2376904934644699, "learning_rate": 6.349288335640065e-06, "loss": 0.025, "step": 116600 }, { "epoch": 0.9435229387490898, "grad_norm": 0.5539640784263611, "learning_rate": 6.3486084217405275e-06, "loss": 0.0322, "step": 116610 }, { "epoch": 0.9436038514442916, "grad_norm": 0.47748231887817383, "learning_rate": 6.347928480945843e-06, "loss": 0.0235, "step": 116620 }, { "epoch": 0.9436847641394935, "grad_norm": 0.5411038994789124, "learning_rate": 6.347248513269571e-06, "loss": 0.0254, "step": 116630 }, { "epoch": 0.9437656768346954, "grad_norm": 0.7618997693061829, "learning_rate": 6.34656851872527e-06, "loss": 0.0346, "step": 116640 }, { "epoch": 0.9438465895298972, "grad_norm": 0.05995837226510048, "learning_rate": 6.345888497326505e-06, "loss": 0.0246, "step": 116650 }, { "epoch": 0.9439275022250991, "grad_norm": 0.34265226125717163, "learning_rate": 6.345208449086833e-06, "loss": 0.0151, "step": 116660 }, { "epoch": 0.944008414920301, "grad_norm": 0.5981622934341431, "learning_rate": 6.3445283740198195e-06, "loss": 0.0361, "step": 116670 }, { "epoch": 0.9440893276155029, "grad_norm": 0.1428573727607727, "learning_rate": 6.343848272139026e-06, "loss": 0.0231, "step": 116680 }, { "epoch": 0.9441702403107047, "grad_norm": 0.2987513840198517, "learning_rate": 6.343168143458016e-06, "loss": 0.0155, "step": 116690 }, { "epoch": 0.9442511530059067, "grad_norm": 0.49440309405326843, "learning_rate": 6.342487987990352e-06, "loss": 0.034, "step": 116700 }, { "epoch": 0.9443320657011085, "grad_norm": 0.8815200328826904, "learning_rate": 6.341807805749599e-06, "loss": 0.038, "step": 116710 }, { "epoch": 0.9444129783963103, "grad_norm": 0.6235834956169128, "learning_rate": 6.341127596749323e-06, "loss": 0.0204, "step": 116720 }, { "epoch": 0.9444938910915123, "grad_norm": 0.27748987078666687, "learning_rate": 6.340447361003088e-06, "loss": 0.023, "step": 116730 }, { "epoch": 0.9445748037867141, "grad_norm": 0.2377813160419464, "learning_rate": 6.3397670985244605e-06, "loss": 0.0255, "step": 116740 }, { "epoch": 0.944655716481916, "grad_norm": 0.3058912456035614, "learning_rate": 6.339086809327006e-06, "loss": 0.0298, "step": 116750 }, { "epoch": 0.9447366291771179, "grad_norm": 0.664172351360321, "learning_rate": 6.338406493424293e-06, "loss": 0.0333, "step": 116760 }, { "epoch": 0.9448175418723198, "grad_norm": 0.5362569093704224, "learning_rate": 6.337726150829887e-06, "loss": 0.0268, "step": 116770 }, { "epoch": 0.9448984545675216, "grad_norm": 0.3781282305717468, "learning_rate": 6.3370457815573574e-06, "loss": 0.0266, "step": 116780 }, { "epoch": 0.9449793672627235, "grad_norm": 0.3198787271976471, "learning_rate": 6.336365385620272e-06, "loss": 0.0217, "step": 116790 }, { "epoch": 0.9450602799579254, "grad_norm": 0.5187235474586487, "learning_rate": 6.335684963032201e-06, "loss": 0.0251, "step": 116800 }, { "epoch": 0.9451411926531272, "grad_norm": 0.45870429277420044, "learning_rate": 6.3350045138067126e-06, "loss": 0.0274, "step": 116810 }, { "epoch": 0.9452221053483292, "grad_norm": 0.2716699242591858, "learning_rate": 6.334324037957375e-06, "loss": 0.0224, "step": 116820 }, { "epoch": 0.945303018043531, "grad_norm": 0.44599077105522156, "learning_rate": 6.333643535497766e-06, "loss": 0.0301, "step": 116830 }, { "epoch": 0.945383930738733, "grad_norm": 0.36891382932662964, "learning_rate": 6.332963006441449e-06, "loss": 0.0268, "step": 116840 }, { "epoch": 0.9454648434339348, "grad_norm": 0.40122994780540466, "learning_rate": 6.332282450802e-06, "loss": 0.0392, "step": 116850 }, { "epoch": 0.9455457561291367, "grad_norm": 0.5100402235984802, "learning_rate": 6.33160186859299e-06, "loss": 0.0261, "step": 116860 }, { "epoch": 0.9456266688243385, "grad_norm": 0.5232436656951904, "learning_rate": 6.330921259827992e-06, "loss": 0.0184, "step": 116870 }, { "epoch": 0.9457075815195404, "grad_norm": 0.44888800382614136, "learning_rate": 6.330240624520578e-06, "loss": 0.027, "step": 116880 }, { "epoch": 0.9457884942147423, "grad_norm": 0.5689104199409485, "learning_rate": 6.329559962684325e-06, "loss": 0.0162, "step": 116890 }, { "epoch": 0.9458694069099441, "grad_norm": 0.3635327219963074, "learning_rate": 6.328879274332804e-06, "loss": 0.018, "step": 116900 }, { "epoch": 0.9459503196051461, "grad_norm": 0.17599192261695862, "learning_rate": 6.328198559479591e-06, "loss": 0.0277, "step": 116910 }, { "epoch": 0.9460312323003479, "grad_norm": 0.16611480712890625, "learning_rate": 6.327517818138262e-06, "loss": 0.025, "step": 116920 }, { "epoch": 0.9461121449955499, "grad_norm": 0.31238359212875366, "learning_rate": 6.326837050322393e-06, "loss": 0.0261, "step": 116930 }, { "epoch": 0.9461930576907517, "grad_norm": 0.21859781444072723, "learning_rate": 6.326156256045561e-06, "loss": 0.028, "step": 116940 }, { "epoch": 0.9462739703859535, "grad_norm": 0.4472612738609314, "learning_rate": 6.3254754353213395e-06, "loss": 0.0228, "step": 116950 }, { "epoch": 0.9463548830811555, "grad_norm": 0.4294273853302002, "learning_rate": 6.324794588163311e-06, "loss": 0.0292, "step": 116960 }, { "epoch": 0.9464357957763573, "grad_norm": 0.5388429164886475, "learning_rate": 6.3241137145850504e-06, "loss": 0.028, "step": 116970 }, { "epoch": 0.9465167084715592, "grad_norm": 0.3535838723182678, "learning_rate": 6.3234328146001365e-06, "loss": 0.0261, "step": 116980 }, { "epoch": 0.946597621166761, "grad_norm": 0.2282751202583313, "learning_rate": 6.32275188822215e-06, "loss": 0.0304, "step": 116990 }, { "epoch": 0.946678533861963, "grad_norm": 0.2222185581922531, "learning_rate": 6.322070935464668e-06, "loss": 0.0247, "step": 117000 }, { "epoch": 0.9467594465571648, "grad_norm": 0.3703763782978058, "learning_rate": 6.321389956341275e-06, "loss": 0.0152, "step": 117010 }, { "epoch": 0.9468403592523666, "grad_norm": 0.31622403860092163, "learning_rate": 6.320708950865547e-06, "loss": 0.0266, "step": 117020 }, { "epoch": 0.9469212719475686, "grad_norm": 0.3422453999519348, "learning_rate": 6.320027919051067e-06, "loss": 0.0302, "step": 117030 }, { "epoch": 0.9470021846427704, "grad_norm": 0.03939281031489372, "learning_rate": 6.3193468609114176e-06, "loss": 0.0195, "step": 117040 }, { "epoch": 0.9470830973379724, "grad_norm": 0.3673878014087677, "learning_rate": 6.318665776460181e-06, "loss": 0.0119, "step": 117050 }, { "epoch": 0.9471640100331742, "grad_norm": 0.6316158175468445, "learning_rate": 6.317984665710937e-06, "loss": 0.0217, "step": 117060 }, { "epoch": 0.9472449227283761, "grad_norm": 0.23979490995407104, "learning_rate": 6.317303528677273e-06, "loss": 0.0158, "step": 117070 }, { "epoch": 0.947325835423578, "grad_norm": 0.6066983342170715, "learning_rate": 6.316622365372771e-06, "loss": 0.0284, "step": 117080 }, { "epoch": 0.9474067481187798, "grad_norm": 0.4382939338684082, "learning_rate": 6.3159411758110145e-06, "loss": 0.0238, "step": 117090 }, { "epoch": 0.9474876608139817, "grad_norm": 0.29276928305625916, "learning_rate": 6.3152599600055895e-06, "loss": 0.0262, "step": 117100 }, { "epoch": 0.9475685735091836, "grad_norm": 0.3397556245326996, "learning_rate": 6.314578717970081e-06, "loss": 0.0242, "step": 117110 }, { "epoch": 0.9476494862043855, "grad_norm": 0.33002516627311707, "learning_rate": 6.3138974497180775e-06, "loss": 0.0217, "step": 117120 }, { "epoch": 0.9477303988995873, "grad_norm": 0.2267095297574997, "learning_rate": 6.313216155263161e-06, "loss": 0.0294, "step": 117130 }, { "epoch": 0.9478113115947893, "grad_norm": 0.8154088258743286, "learning_rate": 6.312534834618919e-06, "loss": 0.0285, "step": 117140 }, { "epoch": 0.9478922242899911, "grad_norm": 0.4052101969718933, "learning_rate": 6.3118534877989435e-06, "loss": 0.0274, "step": 117150 }, { "epoch": 0.947973136985193, "grad_norm": 0.2079489529132843, "learning_rate": 6.311172114816819e-06, "loss": 0.0194, "step": 117160 }, { "epoch": 0.9480540496803949, "grad_norm": 0.2725338339805603, "learning_rate": 6.310490715686135e-06, "loss": 0.0347, "step": 117170 }, { "epoch": 0.9481349623755967, "grad_norm": 0.6104832887649536, "learning_rate": 6.309809290420479e-06, "loss": 0.0364, "step": 117180 }, { "epoch": 0.9482158750707986, "grad_norm": 0.7056928277015686, "learning_rate": 6.309127839033442e-06, "loss": 0.0254, "step": 117190 }, { "epoch": 0.9482967877660005, "grad_norm": 0.3975503146648407, "learning_rate": 6.308446361538616e-06, "loss": 0.0369, "step": 117200 }, { "epoch": 0.9483777004612024, "grad_norm": 0.011987670324742794, "learning_rate": 6.307764857949587e-06, "loss": 0.023, "step": 117210 }, { "epoch": 0.9484586131564042, "grad_norm": 0.16693158447742462, "learning_rate": 6.307083328279949e-06, "loss": 0.0149, "step": 117220 }, { "epoch": 0.9485395258516062, "grad_norm": 0.439510703086853, "learning_rate": 6.306401772543295e-06, "loss": 0.0245, "step": 117230 }, { "epoch": 0.948620438546808, "grad_norm": 0.17613452672958374, "learning_rate": 6.305720190753215e-06, "loss": 0.028, "step": 117240 }, { "epoch": 0.9487013512420098, "grad_norm": 0.5306090116500854, "learning_rate": 6.305038582923302e-06, "loss": 0.0375, "step": 117250 }, { "epoch": 0.9487822639372118, "grad_norm": 0.3304996192455292, "learning_rate": 6.304356949067148e-06, "loss": 0.0347, "step": 117260 }, { "epoch": 0.9488631766324136, "grad_norm": 0.1646309494972229, "learning_rate": 6.303675289198351e-06, "loss": 0.0233, "step": 117270 }, { "epoch": 0.9489440893276155, "grad_norm": 0.8759145140647888, "learning_rate": 6.302993603330501e-06, "loss": 0.0357, "step": 117280 }, { "epoch": 0.9490250020228174, "grad_norm": 0.5164772272109985, "learning_rate": 6.302311891477195e-06, "loss": 0.02, "step": 117290 }, { "epoch": 0.9491059147180193, "grad_norm": 0.1551777720451355, "learning_rate": 6.301630153652026e-06, "loss": 0.0287, "step": 117300 }, { "epoch": 0.9491868274132211, "grad_norm": 0.41632887721061707, "learning_rate": 6.300948389868594e-06, "loss": 0.0279, "step": 117310 }, { "epoch": 0.949267740108423, "grad_norm": 0.27749350666999817, "learning_rate": 6.300266600140492e-06, "loss": 0.0248, "step": 117320 }, { "epoch": 0.9493486528036249, "grad_norm": 0.7537426948547363, "learning_rate": 6.299584784481316e-06, "loss": 0.0225, "step": 117330 }, { "epoch": 0.9494295654988267, "grad_norm": 0.29526475071907043, "learning_rate": 6.298902942904666e-06, "loss": 0.0268, "step": 117340 }, { "epoch": 0.9495104781940287, "grad_norm": 0.2019142508506775, "learning_rate": 6.29822107542414e-06, "loss": 0.0356, "step": 117350 }, { "epoch": 0.9495913908892305, "grad_norm": 0.37262779474258423, "learning_rate": 6.297539182053334e-06, "loss": 0.0231, "step": 117360 }, { "epoch": 0.9496723035844324, "grad_norm": 0.3911735415458679, "learning_rate": 6.2968572628058485e-06, "loss": 0.0366, "step": 117370 }, { "epoch": 0.9497532162796343, "grad_norm": 0.5250468254089355, "learning_rate": 6.296175317695284e-06, "loss": 0.0309, "step": 117380 }, { "epoch": 0.9498341289748361, "grad_norm": 0.4711020588874817, "learning_rate": 6.295493346735237e-06, "loss": 0.0244, "step": 117390 }, { "epoch": 0.949915041670038, "grad_norm": 0.1329796016216278, "learning_rate": 6.29481134993931e-06, "loss": 0.0187, "step": 117400 }, { "epoch": 0.9499959543652399, "grad_norm": 0.1712218075990677, "learning_rate": 6.294129327321105e-06, "loss": 0.029, "step": 117410 }, { "epoch": 0.9500768670604418, "grad_norm": 0.5663705468177795, "learning_rate": 6.293447278894223e-06, "loss": 0.0363, "step": 117420 }, { "epoch": 0.9501577797556436, "grad_norm": 0.3647831380367279, "learning_rate": 6.292765204672267e-06, "loss": 0.027, "step": 117430 }, { "epoch": 0.9502386924508456, "grad_norm": 0.19580566883087158, "learning_rate": 6.292083104668835e-06, "loss": 0.0269, "step": 117440 }, { "epoch": 0.9503196051460474, "grad_norm": 0.3940146267414093, "learning_rate": 6.291400978897535e-06, "loss": 0.0247, "step": 117450 }, { "epoch": 0.9504005178412492, "grad_norm": 0.3311965763568878, "learning_rate": 6.290718827371967e-06, "loss": 0.0449, "step": 117460 }, { "epoch": 0.9504814305364512, "grad_norm": 0.4425223469734192, "learning_rate": 6.29003665010574e-06, "loss": 0.0233, "step": 117470 }, { "epoch": 0.950562343231653, "grad_norm": 0.3162563741207123, "learning_rate": 6.289354447112454e-06, "loss": 0.0298, "step": 117480 }, { "epoch": 0.950643255926855, "grad_norm": 0.4946383535861969, "learning_rate": 6.288672218405715e-06, "loss": 0.0322, "step": 117490 }, { "epoch": 0.9507241686220568, "grad_norm": 0.4941067397594452, "learning_rate": 6.287989963999129e-06, "loss": 0.015, "step": 117500 }, { "epoch": 0.9508050813172587, "grad_norm": 0.29860928654670715, "learning_rate": 6.287307683906303e-06, "loss": 0.0284, "step": 117510 }, { "epoch": 0.9508859940124605, "grad_norm": 0.007451456505805254, "learning_rate": 6.28662537814084e-06, "loss": 0.0378, "step": 117520 }, { "epoch": 0.9509669067076625, "grad_norm": 0.23228943347930908, "learning_rate": 6.285943046716354e-06, "loss": 0.0265, "step": 117530 }, { "epoch": 0.9510478194028643, "grad_norm": 0.349889874458313, "learning_rate": 6.285260689646446e-06, "loss": 0.0185, "step": 117540 }, { "epoch": 0.9511287320980661, "grad_norm": 0.35390952229499817, "learning_rate": 6.284578306944729e-06, "loss": 0.0346, "step": 117550 }, { "epoch": 0.9512096447932681, "grad_norm": 0.3046332597732544, "learning_rate": 6.283895898624808e-06, "loss": 0.0239, "step": 117560 }, { "epoch": 0.9512905574884699, "grad_norm": 0.2899073660373688, "learning_rate": 6.283213464700294e-06, "loss": 0.0351, "step": 117570 }, { "epoch": 0.9513714701836719, "grad_norm": 0.6897015571594238, "learning_rate": 6.2825310051847965e-06, "loss": 0.0286, "step": 117580 }, { "epoch": 0.9514523828788737, "grad_norm": 0.20594391226768494, "learning_rate": 6.281848520091926e-06, "loss": 0.0217, "step": 117590 }, { "epoch": 0.9515332955740756, "grad_norm": 0.6063412427902222, "learning_rate": 6.2811660094352936e-06, "loss": 0.0421, "step": 117600 }, { "epoch": 0.9516142082692775, "grad_norm": 0.37597936391830444, "learning_rate": 6.280483473228508e-06, "loss": 0.0249, "step": 117610 }, { "epoch": 0.9516951209644793, "grad_norm": 0.48926809430122375, "learning_rate": 6.279800911485183e-06, "loss": 0.0282, "step": 117620 }, { "epoch": 0.9517760336596812, "grad_norm": 0.399604469537735, "learning_rate": 6.2791183242189315e-06, "loss": 0.0283, "step": 117630 }, { "epoch": 0.951856946354883, "grad_norm": 0.3217346966266632, "learning_rate": 6.278435711443366e-06, "loss": 0.0161, "step": 117640 }, { "epoch": 0.951937859050085, "grad_norm": 0.44164326786994934, "learning_rate": 6.277753073172097e-06, "loss": 0.0244, "step": 117650 }, { "epoch": 0.9520187717452868, "grad_norm": 0.47094741463661194, "learning_rate": 6.277070409418742e-06, "loss": 0.0199, "step": 117660 }, { "epoch": 0.9520996844404888, "grad_norm": 0.33416828513145447, "learning_rate": 6.276387720196912e-06, "loss": 0.0198, "step": 117670 }, { "epoch": 0.9521805971356906, "grad_norm": 0.30915480852127075, "learning_rate": 6.275705005520225e-06, "loss": 0.0316, "step": 117680 }, { "epoch": 0.9522615098308924, "grad_norm": 0.23781031370162964, "learning_rate": 6.275022265402296e-06, "loss": 0.0211, "step": 117690 }, { "epoch": 0.9523424225260944, "grad_norm": 0.37026724219322205, "learning_rate": 6.274339499856738e-06, "loss": 0.0154, "step": 117700 }, { "epoch": 0.9524233352212962, "grad_norm": 0.3627431392669678, "learning_rate": 6.273656708897168e-06, "loss": 0.0307, "step": 117710 }, { "epoch": 0.9525042479164981, "grad_norm": 0.2300843447446823, "learning_rate": 6.272973892537205e-06, "loss": 0.0158, "step": 117720 }, { "epoch": 0.9525851606117, "grad_norm": 0.2667086720466614, "learning_rate": 6.272291050790464e-06, "loss": 0.0236, "step": 117730 }, { "epoch": 0.9526660733069019, "grad_norm": 0.2649281919002533, "learning_rate": 6.2716081836705654e-06, "loss": 0.0183, "step": 117740 }, { "epoch": 0.9527469860021037, "grad_norm": 0.2120559811592102, "learning_rate": 6.270925291191125e-06, "loss": 0.0167, "step": 117750 }, { "epoch": 0.9528278986973056, "grad_norm": 0.5430396795272827, "learning_rate": 6.270242373365763e-06, "loss": 0.0365, "step": 117760 }, { "epoch": 0.9529088113925075, "grad_norm": 0.418876975774765, "learning_rate": 6.2695594302080996e-06, "loss": 0.0281, "step": 117770 }, { "epoch": 0.9529897240877093, "grad_norm": 0.353371262550354, "learning_rate": 6.268876461731753e-06, "loss": 0.0253, "step": 117780 }, { "epoch": 0.9530706367829113, "grad_norm": 0.10934654623270035, "learning_rate": 6.2681934679503435e-06, "loss": 0.0251, "step": 117790 }, { "epoch": 0.9531515494781131, "grad_norm": 0.5516221523284912, "learning_rate": 6.267510448877493e-06, "loss": 0.0362, "step": 117800 }, { "epoch": 0.953232462173315, "grad_norm": 0.6495227217674255, "learning_rate": 6.26682740452682e-06, "loss": 0.0277, "step": 117810 }, { "epoch": 0.9533133748685169, "grad_norm": 0.5262221097946167, "learning_rate": 6.266144334911952e-06, "loss": 0.0286, "step": 117820 }, { "epoch": 0.9533942875637188, "grad_norm": 0.3184516429901123, "learning_rate": 6.2654612400465084e-06, "loss": 0.0419, "step": 117830 }, { "epoch": 0.9534752002589206, "grad_norm": 0.28449463844299316, "learning_rate": 6.26477811994411e-06, "loss": 0.0193, "step": 117840 }, { "epoch": 0.9535561129541225, "grad_norm": 0.5312556624412537, "learning_rate": 6.264094974618382e-06, "loss": 0.0391, "step": 117850 }, { "epoch": 0.9536370256493244, "grad_norm": 0.3760516941547394, "learning_rate": 6.263411804082949e-06, "loss": 0.0344, "step": 117860 }, { "epoch": 0.9537179383445262, "grad_norm": 0.5482272505760193, "learning_rate": 6.262728608351436e-06, "loss": 0.0376, "step": 117870 }, { "epoch": 0.9537988510397282, "grad_norm": 0.4461342990398407, "learning_rate": 6.262045387437465e-06, "loss": 0.0201, "step": 117880 }, { "epoch": 0.95387976373493, "grad_norm": 0.8421902060508728, "learning_rate": 6.261362141354663e-06, "loss": 0.0358, "step": 117890 }, { "epoch": 0.9539606764301319, "grad_norm": 0.6056849360466003, "learning_rate": 6.2606788701166575e-06, "loss": 0.0345, "step": 117900 }, { "epoch": 0.9540415891253338, "grad_norm": 0.6019222736358643, "learning_rate": 6.259995573737072e-06, "loss": 0.0296, "step": 117910 }, { "epoch": 0.9541225018205356, "grad_norm": 0.6248283982276917, "learning_rate": 6.259312252229534e-06, "loss": 0.0261, "step": 117920 }, { "epoch": 0.9542034145157375, "grad_norm": 0.6806381344795227, "learning_rate": 6.258628905607673e-06, "loss": 0.042, "step": 117930 }, { "epoch": 0.9542843272109394, "grad_norm": 0.45618709921836853, "learning_rate": 6.257945533885117e-06, "loss": 0.0216, "step": 117940 }, { "epoch": 0.9543652399061413, "grad_norm": 0.4487035274505615, "learning_rate": 6.25726213707549e-06, "loss": 0.0258, "step": 117950 }, { "epoch": 0.9544461526013431, "grad_norm": 0.548548698425293, "learning_rate": 6.256578715192425e-06, "loss": 0.0304, "step": 117960 }, { "epoch": 0.9545270652965451, "grad_norm": 0.3279223144054413, "learning_rate": 6.25589526824955e-06, "loss": 0.028, "step": 117970 }, { "epoch": 0.9546079779917469, "grad_norm": 0.38779497146606445, "learning_rate": 6.255211796260496e-06, "loss": 0.017, "step": 117980 }, { "epoch": 0.9546888906869487, "grad_norm": 0.2575972080230713, "learning_rate": 6.254528299238892e-06, "loss": 0.0342, "step": 117990 }, { "epoch": 0.9547698033821507, "grad_norm": 0.18923655152320862, "learning_rate": 6.253844777198369e-06, "loss": 0.0192, "step": 118000 }, { "epoch": 0.9548507160773525, "grad_norm": 0.47328895330429077, "learning_rate": 6.25316123015256e-06, "loss": 0.0341, "step": 118010 }, { "epoch": 0.9549316287725544, "grad_norm": 0.15154702961444855, "learning_rate": 6.252477658115096e-06, "loss": 0.0201, "step": 118020 }, { "epoch": 0.9550125414677563, "grad_norm": 0.32920172810554504, "learning_rate": 6.251794061099607e-06, "loss": 0.0187, "step": 118030 }, { "epoch": 0.9550934541629582, "grad_norm": 0.48677945137023926, "learning_rate": 6.251110439119729e-06, "loss": 0.021, "step": 118040 }, { "epoch": 0.95517436685816, "grad_norm": 0.795786440372467, "learning_rate": 6.250426792189097e-06, "loss": 0.0351, "step": 118050 }, { "epoch": 0.9552552795533619, "grad_norm": 0.18458564579486847, "learning_rate": 6.24974312032134e-06, "loss": 0.0253, "step": 118060 }, { "epoch": 0.9553361922485638, "grad_norm": 0.2224704921245575, "learning_rate": 6.249059423530096e-06, "loss": 0.0353, "step": 118070 }, { "epoch": 0.9554171049437656, "grad_norm": 0.3197583854198456, "learning_rate": 6.248375701828998e-06, "loss": 0.0317, "step": 118080 }, { "epoch": 0.9554980176389676, "grad_norm": 0.520449161529541, "learning_rate": 6.247691955231681e-06, "loss": 0.0343, "step": 118090 }, { "epoch": 0.9555789303341694, "grad_norm": 0.42263463139533997, "learning_rate": 6.2470081837517835e-06, "loss": 0.0225, "step": 118100 }, { "epoch": 0.9556598430293713, "grad_norm": 0.3235742151737213, "learning_rate": 6.24632438740294e-06, "loss": 0.0291, "step": 118110 }, { "epoch": 0.9557407557245732, "grad_norm": 0.5116958618164062, "learning_rate": 6.245640566198788e-06, "loss": 0.0286, "step": 118120 }, { "epoch": 0.9558216684197751, "grad_norm": 0.24878086149692535, "learning_rate": 6.244956720152965e-06, "loss": 0.0477, "step": 118130 }, { "epoch": 0.955902581114977, "grad_norm": 0.27364763617515564, "learning_rate": 6.244272849279106e-06, "loss": 0.0229, "step": 118140 }, { "epoch": 0.9559834938101788, "grad_norm": 0.6377224922180176, "learning_rate": 6.243588953590854e-06, "loss": 0.0278, "step": 118150 }, { "epoch": 0.9560644065053807, "grad_norm": 0.48923492431640625, "learning_rate": 6.242905033101846e-06, "loss": 0.0325, "step": 118160 }, { "epoch": 0.9561453192005825, "grad_norm": 0.17737507820129395, "learning_rate": 6.242221087825719e-06, "loss": 0.0292, "step": 118170 }, { "epoch": 0.9562262318957845, "grad_norm": 0.15933138132095337, "learning_rate": 6.241537117776116e-06, "loss": 0.0164, "step": 118180 }, { "epoch": 0.9563071445909863, "grad_norm": 0.3791901469230652, "learning_rate": 6.240853122966676e-06, "loss": 0.0269, "step": 118190 }, { "epoch": 0.9563880572861883, "grad_norm": 0.6554873585700989, "learning_rate": 6.24016910341104e-06, "loss": 0.0269, "step": 118200 }, { "epoch": 0.9564689699813901, "grad_norm": 0.39403653144836426, "learning_rate": 6.23948505912285e-06, "loss": 0.024, "step": 118210 }, { "epoch": 0.9565498826765919, "grad_norm": 0.2544141113758087, "learning_rate": 6.238800990115745e-06, "loss": 0.0225, "step": 118220 }, { "epoch": 0.9566307953717939, "grad_norm": 0.6495897769927979, "learning_rate": 6.238116896403372e-06, "loss": 0.0174, "step": 118230 }, { "epoch": 0.9567117080669957, "grad_norm": 0.5494350790977478, "learning_rate": 6.237432777999371e-06, "loss": 0.0199, "step": 118240 }, { "epoch": 0.9567926207621976, "grad_norm": 0.7488820552825928, "learning_rate": 6.236748634917384e-06, "loss": 0.0325, "step": 118250 }, { "epoch": 0.9568735334573994, "grad_norm": 0.3314894735813141, "learning_rate": 6.236064467171058e-06, "loss": 0.025, "step": 118260 }, { "epoch": 0.9569544461526014, "grad_norm": 0.8704761266708374, "learning_rate": 6.235380274774035e-06, "loss": 0.0277, "step": 118270 }, { "epoch": 0.9570353588478032, "grad_norm": 0.6011255383491516, "learning_rate": 6.23469605773996e-06, "loss": 0.0159, "step": 118280 }, { "epoch": 0.957116271543005, "grad_norm": 0.35300159454345703, "learning_rate": 6.2340118160824794e-06, "loss": 0.0367, "step": 118290 }, { "epoch": 0.957197184238207, "grad_norm": 0.45904889702796936, "learning_rate": 6.233327549815237e-06, "loss": 0.0326, "step": 118300 }, { "epoch": 0.9572780969334088, "grad_norm": 0.33038660883903503, "learning_rate": 6.232643258951882e-06, "loss": 0.046, "step": 118310 }, { "epoch": 0.9573590096286108, "grad_norm": 0.24187321960926056, "learning_rate": 6.231958943506059e-06, "loss": 0.0193, "step": 118320 }, { "epoch": 0.9574399223238126, "grad_norm": 0.474481999874115, "learning_rate": 6.231274603491414e-06, "loss": 0.0226, "step": 118330 }, { "epoch": 0.9575208350190145, "grad_norm": 0.3285272717475891, "learning_rate": 6.230590238921598e-06, "loss": 0.0245, "step": 118340 }, { "epoch": 0.9576017477142164, "grad_norm": 0.36680930852890015, "learning_rate": 6.229905849810258e-06, "loss": 0.0268, "step": 118350 }, { "epoch": 0.9576826604094182, "grad_norm": 0.4093407392501831, "learning_rate": 6.229221436171041e-06, "loss": 0.0387, "step": 118360 }, { "epoch": 0.9577635731046201, "grad_norm": 0.17301784455776215, "learning_rate": 6.228536998017598e-06, "loss": 0.0171, "step": 118370 }, { "epoch": 0.957844485799822, "grad_norm": 0.1804971545934677, "learning_rate": 6.2278525353635775e-06, "loss": 0.0262, "step": 118380 }, { "epoch": 0.9579253984950239, "grad_norm": 0.46082910895347595, "learning_rate": 6.227168048222631e-06, "loss": 0.0338, "step": 118390 }, { "epoch": 0.9580063111902257, "grad_norm": 0.07935558259487152, "learning_rate": 6.2264835366084085e-06, "loss": 0.0238, "step": 118400 }, { "epoch": 0.9580872238854277, "grad_norm": 0.23625944554805756, "learning_rate": 6.22579900053456e-06, "loss": 0.0284, "step": 118410 }, { "epoch": 0.9581681365806295, "grad_norm": 0.27703243494033813, "learning_rate": 6.2251144400147415e-06, "loss": 0.0325, "step": 118420 }, { "epoch": 0.9582490492758314, "grad_norm": 0.8257225155830383, "learning_rate": 6.2244298550626e-06, "loss": 0.0249, "step": 118430 }, { "epoch": 0.9583299619710333, "grad_norm": 0.35159292817115784, "learning_rate": 6.223745245691787e-06, "loss": 0.0286, "step": 118440 }, { "epoch": 0.9584108746662351, "grad_norm": 0.8149724006652832, "learning_rate": 6.223060611915961e-06, "loss": 0.0505, "step": 118450 }, { "epoch": 0.958491787361437, "grad_norm": 0.551555871963501, "learning_rate": 6.222375953748774e-06, "loss": 0.0432, "step": 118460 }, { "epoch": 0.9585727000566389, "grad_norm": 0.5684025287628174, "learning_rate": 6.221691271203878e-06, "loss": 0.0409, "step": 118470 }, { "epoch": 0.9586536127518408, "grad_norm": 0.45745593309402466, "learning_rate": 6.221006564294928e-06, "loss": 0.0295, "step": 118480 }, { "epoch": 0.9587345254470426, "grad_norm": 0.20518353581428528, "learning_rate": 6.220321833035581e-06, "loss": 0.0134, "step": 118490 }, { "epoch": 0.9588154381422446, "grad_norm": 0.3237500786781311, "learning_rate": 6.219637077439491e-06, "loss": 0.0223, "step": 118500 }, { "epoch": 0.9588963508374464, "grad_norm": 0.5092571377754211, "learning_rate": 6.218952297520314e-06, "loss": 0.0225, "step": 118510 }, { "epoch": 0.9589772635326482, "grad_norm": 0.40672099590301514, "learning_rate": 6.218267493291705e-06, "loss": 0.0295, "step": 118520 }, { "epoch": 0.9590581762278502, "grad_norm": 0.41642072796821594, "learning_rate": 6.2175826647673265e-06, "loss": 0.0367, "step": 118530 }, { "epoch": 0.959139088923052, "grad_norm": 0.647084653377533, "learning_rate": 6.216897811960829e-06, "loss": 0.0255, "step": 118540 }, { "epoch": 0.9592200016182539, "grad_norm": 0.5309892892837524, "learning_rate": 6.216212934885876e-06, "loss": 0.0235, "step": 118550 }, { "epoch": 0.9593009143134558, "grad_norm": 0.18435437977313995, "learning_rate": 6.2155280335561216e-06, "loss": 0.0264, "step": 118560 }, { "epoch": 0.9593818270086577, "grad_norm": 0.5270761847496033, "learning_rate": 6.214843107985228e-06, "loss": 0.0195, "step": 118570 }, { "epoch": 0.9594627397038595, "grad_norm": 0.26366209983825684, "learning_rate": 6.214158158186853e-06, "loss": 0.0253, "step": 118580 }, { "epoch": 0.9595436523990614, "grad_norm": 0.7449432611465454, "learning_rate": 6.2134731841746565e-06, "loss": 0.0229, "step": 118590 }, { "epoch": 0.9596245650942633, "grad_norm": 0.5624193549156189, "learning_rate": 6.212788185962299e-06, "loss": 0.021, "step": 118600 }, { "epoch": 0.9597054777894651, "grad_norm": 0.21654658019542694, "learning_rate": 6.212103163563441e-06, "loss": 0.0285, "step": 118610 }, { "epoch": 0.9597863904846671, "grad_norm": 0.3021886348724365, "learning_rate": 6.2114181169917456e-06, "loss": 0.0221, "step": 118620 }, { "epoch": 0.9598673031798689, "grad_norm": 0.37623992562294006, "learning_rate": 6.21073304626087e-06, "loss": 0.0235, "step": 118630 }, { "epoch": 0.9599482158750708, "grad_norm": 0.5642278790473938, "learning_rate": 6.210047951384485e-06, "loss": 0.0346, "step": 118640 }, { "epoch": 0.9600291285702727, "grad_norm": 0.37948673963546753, "learning_rate": 6.2093628323762445e-06, "loss": 0.0294, "step": 118650 }, { "epoch": 0.9601100412654745, "grad_norm": 0.7237253189086914, "learning_rate": 6.208677689249816e-06, "loss": 0.0355, "step": 118660 }, { "epoch": 0.9601909539606764, "grad_norm": 0.18255667388439178, "learning_rate": 6.207992522018863e-06, "loss": 0.0247, "step": 118670 }, { "epoch": 0.9602718666558783, "grad_norm": 0.27011504769325256, "learning_rate": 6.207307330697049e-06, "loss": 0.0275, "step": 118680 }, { "epoch": 0.9603527793510802, "grad_norm": 0.5719330906867981, "learning_rate": 6.2066221152980386e-06, "loss": 0.0234, "step": 118690 }, { "epoch": 0.960433692046282, "grad_norm": 0.2281394749879837, "learning_rate": 6.205936875835499e-06, "loss": 0.0276, "step": 118700 }, { "epoch": 0.960514604741484, "grad_norm": 0.40699511766433716, "learning_rate": 6.205251612323093e-06, "loss": 0.0248, "step": 118710 }, { "epoch": 0.9605955174366858, "grad_norm": 0.6199064254760742, "learning_rate": 6.204566324774488e-06, "loss": 0.0381, "step": 118720 }, { "epoch": 0.9606764301318877, "grad_norm": 0.36825990676879883, "learning_rate": 6.20388101320335e-06, "loss": 0.0329, "step": 118730 }, { "epoch": 0.9607573428270896, "grad_norm": 0.3644351363182068, "learning_rate": 6.203195677623348e-06, "loss": 0.0349, "step": 118740 }, { "epoch": 0.9608382555222914, "grad_norm": 0.689295768737793, "learning_rate": 6.202510318048149e-06, "loss": 0.0183, "step": 118750 }, { "epoch": 0.9609191682174933, "grad_norm": 0.4556211829185486, "learning_rate": 6.2018249344914185e-06, "loss": 0.019, "step": 118760 }, { "epoch": 0.9610000809126952, "grad_norm": 0.3604438602924347, "learning_rate": 6.201139526966827e-06, "loss": 0.0327, "step": 118770 }, { "epoch": 0.9610809936078971, "grad_norm": 0.2132377177476883, "learning_rate": 6.2004540954880445e-06, "loss": 0.0231, "step": 118780 }, { "epoch": 0.961161906303099, "grad_norm": 0.1478135883808136, "learning_rate": 6.1997686400687385e-06, "loss": 0.0258, "step": 118790 }, { "epoch": 0.9612428189983009, "grad_norm": 0.2931446433067322, "learning_rate": 6.19908316072258e-06, "loss": 0.0243, "step": 118800 }, { "epoch": 0.9613237316935027, "grad_norm": 0.7983535528182983, "learning_rate": 6.198397657463239e-06, "loss": 0.0189, "step": 118810 }, { "epoch": 0.9614046443887045, "grad_norm": 0.3731174170970917, "learning_rate": 6.197712130304386e-06, "loss": 0.0179, "step": 118820 }, { "epoch": 0.9614855570839065, "grad_norm": 0.3164411783218384, "learning_rate": 6.197026579259695e-06, "loss": 0.0189, "step": 118830 }, { "epoch": 0.9615664697791083, "grad_norm": 0.2870576083660126, "learning_rate": 6.196341004342834e-06, "loss": 0.0209, "step": 118840 }, { "epoch": 0.9616473824743103, "grad_norm": 0.3730475902557373, "learning_rate": 6.195655405567479e-06, "loss": 0.0359, "step": 118850 }, { "epoch": 0.9617282951695121, "grad_norm": 0.3534623980522156, "learning_rate": 6.1949697829473e-06, "loss": 0.0264, "step": 118860 }, { "epoch": 0.961809207864714, "grad_norm": 0.39657580852508545, "learning_rate": 6.194284136495972e-06, "loss": 0.0175, "step": 118870 }, { "epoch": 0.9618901205599159, "grad_norm": 0.5818430185317993, "learning_rate": 6.193598466227168e-06, "loss": 0.0424, "step": 118880 }, { "epoch": 0.9619710332551177, "grad_norm": 0.22589021921157837, "learning_rate": 6.192912772154561e-06, "loss": 0.0209, "step": 118890 }, { "epoch": 0.9620519459503196, "grad_norm": 0.4209907650947571, "learning_rate": 6.192227054291828e-06, "loss": 0.0191, "step": 118900 }, { "epoch": 0.9621328586455214, "grad_norm": 0.38764312863349915, "learning_rate": 6.191541312652645e-06, "loss": 0.0197, "step": 118910 }, { "epoch": 0.9622137713407234, "grad_norm": 0.1321772336959839, "learning_rate": 6.190855547250683e-06, "loss": 0.0189, "step": 118920 }, { "epoch": 0.9622946840359252, "grad_norm": 0.7977522611618042, "learning_rate": 6.190169758099622e-06, "loss": 0.0332, "step": 118930 }, { "epoch": 0.9623755967311272, "grad_norm": 0.42637377977371216, "learning_rate": 6.189483945213139e-06, "loss": 0.0252, "step": 118940 }, { "epoch": 0.962456509426329, "grad_norm": 0.46613839268684387, "learning_rate": 6.188798108604909e-06, "loss": 0.014, "step": 118950 }, { "epoch": 0.9625374221215308, "grad_norm": 0.33784255385398865, "learning_rate": 6.18811224828861e-06, "loss": 0.0279, "step": 118960 }, { "epoch": 0.9626183348167328, "grad_norm": 0.4052280783653259, "learning_rate": 6.187426364277922e-06, "loss": 0.0238, "step": 118970 }, { "epoch": 0.9626992475119346, "grad_norm": 0.8035749793052673, "learning_rate": 6.186740456586521e-06, "loss": 0.0372, "step": 118980 }, { "epoch": 0.9627801602071365, "grad_norm": 0.2006974071264267, "learning_rate": 6.1860545252280865e-06, "loss": 0.0247, "step": 118990 }, { "epoch": 0.9628610729023384, "grad_norm": 0.6262415051460266, "learning_rate": 6.185368570216298e-06, "loss": 0.0305, "step": 119000 }, { "epoch": 0.9629419855975403, "grad_norm": 0.22522981464862823, "learning_rate": 6.184682591564837e-06, "loss": 0.0206, "step": 119010 }, { "epoch": 0.9630228982927421, "grad_norm": 0.30170518159866333, "learning_rate": 6.183996589287382e-06, "loss": 0.0254, "step": 119020 }, { "epoch": 0.963103810987944, "grad_norm": 0.6210441589355469, "learning_rate": 6.183310563397614e-06, "loss": 0.0193, "step": 119030 }, { "epoch": 0.9631847236831459, "grad_norm": 0.13131773471832275, "learning_rate": 6.1826245139092155e-06, "loss": 0.0178, "step": 119040 }, { "epoch": 0.9632656363783477, "grad_norm": 0.41896045207977295, "learning_rate": 6.18193844083587e-06, "loss": 0.0183, "step": 119050 }, { "epoch": 0.9633465490735497, "grad_norm": 0.5363998413085938, "learning_rate": 6.181252344191253e-06, "loss": 0.0278, "step": 119060 }, { "epoch": 0.9634274617687515, "grad_norm": 0.487313836812973, "learning_rate": 6.1805662239890555e-06, "loss": 0.0266, "step": 119070 }, { "epoch": 0.9635083744639534, "grad_norm": 0.4723667502403259, "learning_rate": 6.179880080242955e-06, "loss": 0.0322, "step": 119080 }, { "epoch": 0.9635892871591553, "grad_norm": 0.5138161182403564, "learning_rate": 6.1791939129666365e-06, "loss": 0.0342, "step": 119090 }, { "epoch": 0.9636701998543572, "grad_norm": 1.153330683708191, "learning_rate": 6.178507722173787e-06, "loss": 0.0424, "step": 119100 }, { "epoch": 0.963751112549559, "grad_norm": 0.5625882148742676, "learning_rate": 6.177821507878087e-06, "loss": 0.036, "step": 119110 }, { "epoch": 0.9638320252447609, "grad_norm": 0.11833149939775467, "learning_rate": 6.177135270093225e-06, "loss": 0.0306, "step": 119120 }, { "epoch": 0.9639129379399628, "grad_norm": 0.5958217978477478, "learning_rate": 6.176449008832885e-06, "loss": 0.0227, "step": 119130 }, { "epoch": 0.9639938506351646, "grad_norm": 0.27929869294166565, "learning_rate": 6.175762724110751e-06, "loss": 0.0249, "step": 119140 }, { "epoch": 0.9640747633303666, "grad_norm": 0.3783056139945984, "learning_rate": 6.175076415940513e-06, "loss": 0.0258, "step": 119150 }, { "epoch": 0.9641556760255684, "grad_norm": 0.2839469015598297, "learning_rate": 6.1743900843358585e-06, "loss": 0.0158, "step": 119160 }, { "epoch": 0.9642365887207703, "grad_norm": 0.37945038080215454, "learning_rate": 6.17370372931047e-06, "loss": 0.0227, "step": 119170 }, { "epoch": 0.9643175014159722, "grad_norm": 0.3084070682525635, "learning_rate": 6.17301735087804e-06, "loss": 0.0264, "step": 119180 }, { "epoch": 0.964398414111174, "grad_norm": 0.42378973960876465, "learning_rate": 6.172330949052254e-06, "loss": 0.0274, "step": 119190 }, { "epoch": 0.9644793268063759, "grad_norm": 0.15628281235694885, "learning_rate": 6.1716445238468036e-06, "loss": 0.0175, "step": 119200 }, { "epoch": 0.9645602395015778, "grad_norm": 0.21624726057052612, "learning_rate": 6.170958075275376e-06, "loss": 0.0203, "step": 119210 }, { "epoch": 0.9646411521967797, "grad_norm": 0.2928691804409027, "learning_rate": 6.170271603351661e-06, "loss": 0.0337, "step": 119220 }, { "epoch": 0.9647220648919815, "grad_norm": 0.6359908580780029, "learning_rate": 6.169585108089351e-06, "loss": 0.0263, "step": 119230 }, { "epoch": 0.9648029775871835, "grad_norm": 0.675714373588562, "learning_rate": 6.168898589502135e-06, "loss": 0.0176, "step": 119240 }, { "epoch": 0.9648838902823853, "grad_norm": 0.5177713632583618, "learning_rate": 6.168212047603702e-06, "loss": 0.0305, "step": 119250 }, { "epoch": 0.9649648029775871, "grad_norm": 0.44594866037368774, "learning_rate": 6.167525482407748e-06, "loss": 0.0334, "step": 119260 }, { "epoch": 0.9650457156727891, "grad_norm": 0.38271820545196533, "learning_rate": 6.166838893927962e-06, "loss": 0.0248, "step": 119270 }, { "epoch": 0.9651266283679909, "grad_norm": 0.43979158997535706, "learning_rate": 6.1661522821780385e-06, "loss": 0.0291, "step": 119280 }, { "epoch": 0.9652075410631928, "grad_norm": 0.254900187253952, "learning_rate": 6.165465647171669e-06, "loss": 0.0275, "step": 119290 }, { "epoch": 0.9652884537583947, "grad_norm": 0.3332623243331909, "learning_rate": 6.164778988922546e-06, "loss": 0.0236, "step": 119300 }, { "epoch": 0.9653693664535966, "grad_norm": 0.4468452036380768, "learning_rate": 6.164092307444368e-06, "loss": 0.0211, "step": 119310 }, { "epoch": 0.9654502791487984, "grad_norm": 0.5587648749351501, "learning_rate": 6.163405602750825e-06, "loss": 0.0226, "step": 119320 }, { "epoch": 0.9655311918440003, "grad_norm": 0.4710986316204071, "learning_rate": 6.162718874855613e-06, "loss": 0.0303, "step": 119330 }, { "epoch": 0.9656121045392022, "grad_norm": 0.15064191818237305, "learning_rate": 6.162032123772428e-06, "loss": 0.0228, "step": 119340 }, { "epoch": 0.965693017234404, "grad_norm": 0.41541799902915955, "learning_rate": 6.161345349514965e-06, "loss": 0.0272, "step": 119350 }, { "epoch": 0.965773929929606, "grad_norm": 0.14919918775558472, "learning_rate": 6.160658552096922e-06, "loss": 0.0326, "step": 119360 }, { "epoch": 0.9658548426248078, "grad_norm": 0.3728121221065521, "learning_rate": 6.159971731531994e-06, "loss": 0.034, "step": 119370 }, { "epoch": 0.9659357553200097, "grad_norm": 0.25969716906547546, "learning_rate": 6.159284887833878e-06, "loss": 0.0373, "step": 119380 }, { "epoch": 0.9660166680152116, "grad_norm": 0.8704550862312317, "learning_rate": 6.158598021016273e-06, "loss": 0.0406, "step": 119390 }, { "epoch": 0.9660975807104135, "grad_norm": 0.258836030960083, "learning_rate": 6.157911131092875e-06, "loss": 0.0206, "step": 119400 }, { "epoch": 0.9661784934056153, "grad_norm": 0.2928456664085388, "learning_rate": 6.157224218077385e-06, "loss": 0.022, "step": 119410 }, { "epoch": 0.9662594061008172, "grad_norm": 0.34816548228263855, "learning_rate": 6.156537281983501e-06, "loss": 0.0208, "step": 119420 }, { "epoch": 0.9663403187960191, "grad_norm": 0.40229323506355286, "learning_rate": 6.1558503228249235e-06, "loss": 0.0249, "step": 119430 }, { "epoch": 0.966421231491221, "grad_norm": 0.33902695775032043, "learning_rate": 6.155163340615349e-06, "loss": 0.0219, "step": 119440 }, { "epoch": 0.9665021441864229, "grad_norm": 0.6476215124130249, "learning_rate": 6.154476335368481e-06, "loss": 0.0274, "step": 119450 }, { "epoch": 0.9665830568816247, "grad_norm": 0.22715292870998383, "learning_rate": 6.15378930709802e-06, "loss": 0.0173, "step": 119460 }, { "epoch": 0.9666639695768267, "grad_norm": 0.2851291298866272, "learning_rate": 6.153102255817668e-06, "loss": 0.0287, "step": 119470 }, { "epoch": 0.9667448822720285, "grad_norm": 0.42695915699005127, "learning_rate": 6.152415181541125e-06, "loss": 0.0378, "step": 119480 }, { "epoch": 0.9668257949672303, "grad_norm": 0.4635707437992096, "learning_rate": 6.151728084282095e-06, "loss": 0.0333, "step": 119490 }, { "epoch": 0.9669067076624323, "grad_norm": 0.6047472357749939, "learning_rate": 6.151040964054279e-06, "loss": 0.0343, "step": 119500 }, { "epoch": 0.9669876203576341, "grad_norm": 0.31920763850212097, "learning_rate": 6.150353820871381e-06, "loss": 0.0274, "step": 119510 }, { "epoch": 0.967068533052836, "grad_norm": 0.36487051844596863, "learning_rate": 6.149666654747103e-06, "loss": 0.0206, "step": 119520 }, { "epoch": 0.9671494457480379, "grad_norm": 0.2828507721424103, "learning_rate": 6.148979465695153e-06, "loss": 0.0193, "step": 119530 }, { "epoch": 0.9672303584432398, "grad_norm": 0.24372835457324982, "learning_rate": 6.148292253729231e-06, "loss": 0.0225, "step": 119540 }, { "epoch": 0.9673112711384416, "grad_norm": 0.20841151475906372, "learning_rate": 6.1476050188630465e-06, "loss": 0.0209, "step": 119550 }, { "epoch": 0.9673921838336434, "grad_norm": 0.6322450041770935, "learning_rate": 6.146917761110301e-06, "loss": 0.0371, "step": 119560 }, { "epoch": 0.9674730965288454, "grad_norm": 0.17144565284252167, "learning_rate": 6.146230480484703e-06, "loss": 0.0272, "step": 119570 }, { "epoch": 0.9675540092240472, "grad_norm": 0.3579259514808655, "learning_rate": 6.145543176999958e-06, "loss": 0.0332, "step": 119580 }, { "epoch": 0.9676349219192492, "grad_norm": 0.3573393225669861, "learning_rate": 6.144855850669771e-06, "loss": 0.0223, "step": 119590 }, { "epoch": 0.967715834614451, "grad_norm": 0.6600172519683838, "learning_rate": 6.1441685015078525e-06, "loss": 0.0241, "step": 119600 }, { "epoch": 0.9677967473096529, "grad_norm": 0.3535291850566864, "learning_rate": 6.143481129527908e-06, "loss": 0.0281, "step": 119610 }, { "epoch": 0.9678776600048548, "grad_norm": 0.31000688672065735, "learning_rate": 6.142793734743646e-06, "loss": 0.013, "step": 119620 }, { "epoch": 0.9679585727000566, "grad_norm": 0.7044485807418823, "learning_rate": 6.1421063171687755e-06, "loss": 0.0313, "step": 119630 }, { "epoch": 0.9680394853952585, "grad_norm": 0.4073227345943451, "learning_rate": 6.141418876817007e-06, "loss": 0.032, "step": 119640 }, { "epoch": 0.9681203980904604, "grad_norm": 0.32897821068763733, "learning_rate": 6.140731413702047e-06, "loss": 0.0355, "step": 119650 }, { "epoch": 0.9682013107856623, "grad_norm": 0.26469558477401733, "learning_rate": 6.1400439278376075e-06, "loss": 0.0279, "step": 119660 }, { "epoch": 0.9682822234808641, "grad_norm": 0.6341250538825989, "learning_rate": 6.1393564192374e-06, "loss": 0.0299, "step": 119670 }, { "epoch": 0.9683631361760661, "grad_norm": 0.1430354118347168, "learning_rate": 6.138668887915133e-06, "loss": 0.0198, "step": 119680 }, { "epoch": 0.9684440488712679, "grad_norm": 0.6593372821807861, "learning_rate": 6.137981333884519e-06, "loss": 0.029, "step": 119690 }, { "epoch": 0.9685249615664698, "grad_norm": 0.3636895418167114, "learning_rate": 6.137293757159269e-06, "loss": 0.0233, "step": 119700 }, { "epoch": 0.9686058742616717, "grad_norm": 0.22880342602729797, "learning_rate": 6.136606157753097e-06, "loss": 0.015, "step": 119710 }, { "epoch": 0.9686867869568735, "grad_norm": 0.18657740950584412, "learning_rate": 6.135918535679714e-06, "loss": 0.026, "step": 119720 }, { "epoch": 0.9687676996520754, "grad_norm": 0.3948960602283478, "learning_rate": 6.1352308909528335e-06, "loss": 0.028, "step": 119730 }, { "epoch": 0.9688486123472773, "grad_norm": 0.3382587432861328, "learning_rate": 6.13454322358617e-06, "loss": 0.0232, "step": 119740 }, { "epoch": 0.9689295250424792, "grad_norm": 0.41509053111076355, "learning_rate": 6.133855533593438e-06, "loss": 0.0288, "step": 119750 }, { "epoch": 0.969010437737681, "grad_norm": 0.35625192523002625, "learning_rate": 6.133167820988349e-06, "loss": 0.0347, "step": 119760 }, { "epoch": 0.969091350432883, "grad_norm": 0.28988566994667053, "learning_rate": 6.132480085784621e-06, "loss": 0.0169, "step": 119770 }, { "epoch": 0.9691722631280848, "grad_norm": 0.2757348120212555, "learning_rate": 6.131792327995969e-06, "loss": 0.0335, "step": 119780 }, { "epoch": 0.9692531758232866, "grad_norm": 0.41372939944267273, "learning_rate": 6.131104547636108e-06, "loss": 0.0294, "step": 119790 }, { "epoch": 0.9693340885184886, "grad_norm": 0.24888281524181366, "learning_rate": 6.1304167447187544e-06, "loss": 0.0167, "step": 119800 }, { "epoch": 0.9694150012136904, "grad_norm": 0.32472360134124756, "learning_rate": 6.1297289192576246e-06, "loss": 0.0172, "step": 119810 }, { "epoch": 0.9694959139088923, "grad_norm": 1.1030914783477783, "learning_rate": 6.129041071266437e-06, "loss": 0.0312, "step": 119820 }, { "epoch": 0.9695768266040942, "grad_norm": 0.6708600521087646, "learning_rate": 6.128353200758911e-06, "loss": 0.0362, "step": 119830 }, { "epoch": 0.9696577392992961, "grad_norm": 0.32679277658462524, "learning_rate": 6.127665307748759e-06, "loss": 0.0216, "step": 119840 }, { "epoch": 0.9697386519944979, "grad_norm": 0.4116196632385254, "learning_rate": 6.1269773922497046e-06, "loss": 0.0215, "step": 119850 }, { "epoch": 0.9698195646896998, "grad_norm": 0.444263756275177, "learning_rate": 6.1262894542754655e-06, "loss": 0.0203, "step": 119860 }, { "epoch": 0.9699004773849017, "grad_norm": 0.32357996702194214, "learning_rate": 6.125601493839761e-06, "loss": 0.0319, "step": 119870 }, { "epoch": 0.9699813900801035, "grad_norm": 0.3431931138038635, "learning_rate": 6.124913510956311e-06, "loss": 0.049, "step": 119880 }, { "epoch": 0.9700623027753055, "grad_norm": 0.4600096344947815, "learning_rate": 6.124225505638834e-06, "loss": 0.0323, "step": 119890 }, { "epoch": 0.9701432154705073, "grad_norm": 0.2791806757450104, "learning_rate": 6.123537477901054e-06, "loss": 0.0327, "step": 119900 }, { "epoch": 0.9702241281657092, "grad_norm": 0.11464440822601318, "learning_rate": 6.122849427756692e-06, "loss": 0.0402, "step": 119910 }, { "epoch": 0.9703050408609111, "grad_norm": 0.5209159851074219, "learning_rate": 6.122161355219467e-06, "loss": 0.0237, "step": 119920 }, { "epoch": 0.9703859535561129, "grad_norm": 0.5859810709953308, "learning_rate": 6.121473260303104e-06, "loss": 0.0223, "step": 119930 }, { "epoch": 0.9704668662513148, "grad_norm": 0.155231773853302, "learning_rate": 6.120785143021324e-06, "loss": 0.0336, "step": 119940 }, { "epoch": 0.9705477789465167, "grad_norm": 0.4316483736038208, "learning_rate": 6.120097003387849e-06, "loss": 0.0297, "step": 119950 }, { "epoch": 0.9706286916417186, "grad_norm": 0.3880285620689392, "learning_rate": 6.119408841416405e-06, "loss": 0.0301, "step": 119960 }, { "epoch": 0.9707096043369204, "grad_norm": 0.22097797691822052, "learning_rate": 6.118720657120714e-06, "loss": 0.0237, "step": 119970 }, { "epoch": 0.9707905170321224, "grad_norm": 0.48571059107780457, "learning_rate": 6.1180324505145025e-06, "loss": 0.0183, "step": 119980 }, { "epoch": 0.9708714297273242, "grad_norm": 0.41025662422180176, "learning_rate": 6.117344221611494e-06, "loss": 0.0315, "step": 119990 }, { "epoch": 0.9709523424225261, "grad_norm": 0.5725871324539185, "learning_rate": 6.1166559704254125e-06, "loss": 0.0198, "step": 120000 }, { "epoch": 0.971033255117728, "grad_norm": 0.29659202694892883, "learning_rate": 6.115967696969987e-06, "loss": 0.0269, "step": 120010 }, { "epoch": 0.9711141678129298, "grad_norm": 0.5175048112869263, "learning_rate": 6.115279401258941e-06, "loss": 0.0192, "step": 120020 }, { "epoch": 0.9711950805081317, "grad_norm": 0.5996086001396179, "learning_rate": 6.114591083306001e-06, "loss": 0.0292, "step": 120030 }, { "epoch": 0.9712759932033336, "grad_norm": 0.17728787660598755, "learning_rate": 6.1139027431248945e-06, "loss": 0.0214, "step": 120040 }, { "epoch": 0.9713569058985355, "grad_norm": 0.4554349482059479, "learning_rate": 6.113214380729351e-06, "loss": 0.0215, "step": 120050 }, { "epoch": 0.9714378185937373, "grad_norm": 0.3213081359863281, "learning_rate": 6.112525996133095e-06, "loss": 0.0223, "step": 120060 }, { "epoch": 0.9715187312889393, "grad_norm": 0.8077605366706848, "learning_rate": 6.111837589349858e-06, "loss": 0.0341, "step": 120070 }, { "epoch": 0.9715996439841411, "grad_norm": 0.4461977183818817, "learning_rate": 6.1111491603933675e-06, "loss": 0.022, "step": 120080 }, { "epoch": 0.9716805566793429, "grad_norm": 0.29759708046913147, "learning_rate": 6.1104607092773515e-06, "loss": 0.0254, "step": 120090 }, { "epoch": 0.9717614693745449, "grad_norm": 0.3530043661594391, "learning_rate": 6.109772236015542e-06, "loss": 0.0239, "step": 120100 }, { "epoch": 0.9718423820697467, "grad_norm": 0.31743288040161133, "learning_rate": 6.109083740621667e-06, "loss": 0.0278, "step": 120110 }, { "epoch": 0.9719232947649487, "grad_norm": 0.26298871636390686, "learning_rate": 6.10839522310946e-06, "loss": 0.0252, "step": 120120 }, { "epoch": 0.9720042074601505, "grad_norm": 0.6478415131568909, "learning_rate": 6.107706683492649e-06, "loss": 0.0208, "step": 120130 }, { "epoch": 0.9720851201553524, "grad_norm": 0.4439146816730499, "learning_rate": 6.107018121784966e-06, "loss": 0.0392, "step": 120140 }, { "epoch": 0.9721660328505543, "grad_norm": 0.28625863790512085, "learning_rate": 6.1063295380001445e-06, "loss": 0.0256, "step": 120150 }, { "epoch": 0.9722469455457561, "grad_norm": 0.5044764876365662, "learning_rate": 6.105640932151917e-06, "loss": 0.0442, "step": 120160 }, { "epoch": 0.972327858240958, "grad_norm": 0.08623325079679489, "learning_rate": 6.104952304254012e-06, "loss": 0.0296, "step": 120170 }, { "epoch": 0.9724087709361598, "grad_norm": 0.23783090710639954, "learning_rate": 6.10426365432017e-06, "loss": 0.0272, "step": 120180 }, { "epoch": 0.9724896836313618, "grad_norm": 0.28816214203834534, "learning_rate": 6.103574982364118e-06, "loss": 0.0265, "step": 120190 }, { "epoch": 0.9725705963265636, "grad_norm": 0.31634122133255005, "learning_rate": 6.102886288399594e-06, "loss": 0.0119, "step": 120200 }, { "epoch": 0.9726515090217656, "grad_norm": 0.40316373109817505, "learning_rate": 6.102197572440331e-06, "loss": 0.0212, "step": 120210 }, { "epoch": 0.9727324217169674, "grad_norm": 0.3896730840206146, "learning_rate": 6.101508834500062e-06, "loss": 0.0378, "step": 120220 }, { "epoch": 0.9728133344121692, "grad_norm": 0.24238501489162445, "learning_rate": 6.100820074592527e-06, "loss": 0.0349, "step": 120230 }, { "epoch": 0.9728942471073712, "grad_norm": 0.572842001914978, "learning_rate": 6.100131292731459e-06, "loss": 0.0316, "step": 120240 }, { "epoch": 0.972975159802573, "grad_norm": 0.42619240283966064, "learning_rate": 6.099442488930595e-06, "loss": 0.0336, "step": 120250 }, { "epoch": 0.9730560724977749, "grad_norm": 0.4203534722328186, "learning_rate": 6.098753663203672e-06, "loss": 0.0212, "step": 120260 }, { "epoch": 0.9731369851929768, "grad_norm": 0.4835767447948456, "learning_rate": 6.098064815564426e-06, "loss": 0.0305, "step": 120270 }, { "epoch": 0.9732178978881787, "grad_norm": 0.3772258758544922, "learning_rate": 6.097375946026596e-06, "loss": 0.0183, "step": 120280 }, { "epoch": 0.9732988105833805, "grad_norm": 0.1377778947353363, "learning_rate": 6.096687054603918e-06, "loss": 0.04, "step": 120290 }, { "epoch": 0.9733797232785825, "grad_norm": 0.4365558922290802, "learning_rate": 6.095998141310133e-06, "loss": 0.0311, "step": 120300 }, { "epoch": 0.9734606359737843, "grad_norm": 0.32058510184288025, "learning_rate": 6.095309206158979e-06, "loss": 0.0225, "step": 120310 }, { "epoch": 0.9735415486689861, "grad_norm": 0.44441837072372437, "learning_rate": 6.094620249164196e-06, "loss": 0.0302, "step": 120320 }, { "epoch": 0.9736224613641881, "grad_norm": 0.2247929573059082, "learning_rate": 6.09393127033952e-06, "loss": 0.0198, "step": 120330 }, { "epoch": 0.9737033740593899, "grad_norm": 0.259310245513916, "learning_rate": 6.0932422696986974e-06, "loss": 0.0231, "step": 120340 }, { "epoch": 0.9737842867545918, "grad_norm": 0.18617939949035645, "learning_rate": 6.092553247255464e-06, "loss": 0.0211, "step": 120350 }, { "epoch": 0.9738651994497937, "grad_norm": 0.4240022301673889, "learning_rate": 6.091864203023563e-06, "loss": 0.017, "step": 120360 }, { "epoch": 0.9739461121449956, "grad_norm": 0.3611692488193512, "learning_rate": 6.091175137016736e-06, "loss": 0.0216, "step": 120370 }, { "epoch": 0.9740270248401974, "grad_norm": 0.4129657745361328, "learning_rate": 6.090486049248724e-06, "loss": 0.0281, "step": 120380 }, { "epoch": 0.9741079375353993, "grad_norm": 0.4319581687450409, "learning_rate": 6.08979693973327e-06, "loss": 0.0262, "step": 120390 }, { "epoch": 0.9741888502306012, "grad_norm": 0.4934118092060089, "learning_rate": 6.089107808484116e-06, "loss": 0.0317, "step": 120400 }, { "epoch": 0.974269762925803, "grad_norm": 0.19900664687156677, "learning_rate": 6.088418655515006e-06, "loss": 0.027, "step": 120410 }, { "epoch": 0.974350675621005, "grad_norm": 0.6175819039344788, "learning_rate": 6.087729480839686e-06, "loss": 0.0202, "step": 120420 }, { "epoch": 0.9744315883162068, "grad_norm": 0.7761678695678711, "learning_rate": 6.087040284471896e-06, "loss": 0.0266, "step": 120430 }, { "epoch": 0.9745125010114087, "grad_norm": 0.26917698979377747, "learning_rate": 6.086351066425381e-06, "loss": 0.0326, "step": 120440 }, { "epoch": 0.9745934137066106, "grad_norm": 0.25738322734832764, "learning_rate": 6.085661826713889e-06, "loss": 0.0262, "step": 120450 }, { "epoch": 0.9746743264018124, "grad_norm": 0.40368083119392395, "learning_rate": 6.0849725653511634e-06, "loss": 0.0219, "step": 120460 }, { "epoch": 0.9747552390970143, "grad_norm": 0.4930611848831177, "learning_rate": 6.08428328235095e-06, "loss": 0.0279, "step": 120470 }, { "epoch": 0.9748361517922162, "grad_norm": 0.357011079788208, "learning_rate": 6.083593977726997e-06, "loss": 0.0228, "step": 120480 }, { "epoch": 0.9749170644874181, "grad_norm": 0.3408024311065674, "learning_rate": 6.082904651493049e-06, "loss": 0.0302, "step": 120490 }, { "epoch": 0.9749979771826199, "grad_norm": 0.3945739269256592, "learning_rate": 6.0822153036628525e-06, "loss": 0.0387, "step": 120500 }, { "epoch": 0.9750788898778219, "grad_norm": 0.3535522222518921, "learning_rate": 6.081525934250159e-06, "loss": 0.0171, "step": 120510 }, { "epoch": 0.9751598025730237, "grad_norm": 0.33039402961730957, "learning_rate": 6.0808365432687114e-06, "loss": 0.0272, "step": 120520 }, { "epoch": 0.9752407152682255, "grad_norm": 0.5779383182525635, "learning_rate": 6.080147130732263e-06, "loss": 0.0441, "step": 120530 }, { "epoch": 0.9753216279634275, "grad_norm": 0.5365325212478638, "learning_rate": 6.079457696654558e-06, "loss": 0.0236, "step": 120540 }, { "epoch": 0.9754025406586293, "grad_norm": 0.3350776731967926, "learning_rate": 6.078768241049349e-06, "loss": 0.0149, "step": 120550 }, { "epoch": 0.9754834533538312, "grad_norm": 0.4212894141674042, "learning_rate": 6.078078763930385e-06, "loss": 0.0393, "step": 120560 }, { "epoch": 0.9755643660490331, "grad_norm": 0.4869990646839142, "learning_rate": 6.077389265311417e-06, "loss": 0.0263, "step": 120570 }, { "epoch": 0.975645278744235, "grad_norm": 0.21541427075862885, "learning_rate": 6.076699745206194e-06, "loss": 0.033, "step": 120580 }, { "epoch": 0.9757261914394368, "grad_norm": 0.6723233461380005, "learning_rate": 6.076010203628466e-06, "loss": 0.0287, "step": 120590 }, { "epoch": 0.9758071041346387, "grad_norm": 0.15202046930789948, "learning_rate": 6.075320640591988e-06, "loss": 0.0239, "step": 120600 }, { "epoch": 0.9758880168298406, "grad_norm": 0.5619229674339294, "learning_rate": 6.074631056110509e-06, "loss": 0.0222, "step": 120610 }, { "epoch": 0.9759689295250424, "grad_norm": 0.35062217712402344, "learning_rate": 6.073941450197782e-06, "loss": 0.0231, "step": 120620 }, { "epoch": 0.9760498422202444, "grad_norm": 0.6140514016151428, "learning_rate": 6.073251822867561e-06, "loss": 0.0377, "step": 120630 }, { "epoch": 0.9761307549154462, "grad_norm": 0.30737268924713135, "learning_rate": 6.072562174133599e-06, "loss": 0.0273, "step": 120640 }, { "epoch": 0.9762116676106481, "grad_norm": 0.2208976000547409, "learning_rate": 6.071872504009645e-06, "loss": 0.0346, "step": 120650 }, { "epoch": 0.97629258030585, "grad_norm": 0.2952912151813507, "learning_rate": 6.071182812509459e-06, "loss": 0.027, "step": 120660 }, { "epoch": 0.9763734930010519, "grad_norm": 0.6239324808120728, "learning_rate": 6.070493099646794e-06, "loss": 0.0296, "step": 120670 }, { "epoch": 0.9764544056962537, "grad_norm": 0.6670848727226257, "learning_rate": 6.069803365435403e-06, "loss": 0.0272, "step": 120680 }, { "epoch": 0.9765353183914556, "grad_norm": 0.4863775074481964, "learning_rate": 6.069113609889042e-06, "loss": 0.0294, "step": 120690 }, { "epoch": 0.9766162310866575, "grad_norm": 0.5781436562538147, "learning_rate": 6.068423833021469e-06, "loss": 0.0451, "step": 120700 }, { "epoch": 0.9766971437818593, "grad_norm": 0.37349677085876465, "learning_rate": 6.067734034846435e-06, "loss": 0.0263, "step": 120710 }, { "epoch": 0.9767780564770613, "grad_norm": 0.9103744029998779, "learning_rate": 6.067044215377702e-06, "loss": 0.0335, "step": 120720 }, { "epoch": 0.9768589691722631, "grad_norm": 0.38707396388053894, "learning_rate": 6.066354374629024e-06, "loss": 0.0275, "step": 120730 }, { "epoch": 0.976939881867465, "grad_norm": 0.8713588118553162, "learning_rate": 6.065664512614159e-06, "loss": 0.0302, "step": 120740 }, { "epoch": 0.9770207945626669, "grad_norm": 0.23901692032814026, "learning_rate": 6.064974629346867e-06, "loss": 0.0324, "step": 120750 }, { "epoch": 0.9771017072578687, "grad_norm": 0.4052366018295288, "learning_rate": 6.064284724840902e-06, "loss": 0.0316, "step": 120760 }, { "epoch": 0.9771826199530707, "grad_norm": 0.3421311676502228, "learning_rate": 6.0635947991100255e-06, "loss": 0.0217, "step": 120770 }, { "epoch": 0.9772635326482725, "grad_norm": 0.20576882362365723, "learning_rate": 6.0629048521679965e-06, "loss": 0.0292, "step": 120780 }, { "epoch": 0.9773444453434744, "grad_norm": 0.5910294651985168, "learning_rate": 6.062214884028574e-06, "loss": 0.0434, "step": 120790 }, { "epoch": 0.9774253580386763, "grad_norm": 0.4221534729003906, "learning_rate": 6.0615248947055185e-06, "loss": 0.0273, "step": 120800 }, { "epoch": 0.9775062707338782, "grad_norm": 0.3398286700248718, "learning_rate": 6.060834884212588e-06, "loss": 0.0297, "step": 120810 }, { "epoch": 0.97758718342908, "grad_norm": 0.4086586833000183, "learning_rate": 6.060144852563547e-06, "loss": 0.0274, "step": 120820 }, { "epoch": 0.9776680961242818, "grad_norm": 0.2992108166217804, "learning_rate": 6.0594547997721555e-06, "loss": 0.0127, "step": 120830 }, { "epoch": 0.9777490088194838, "grad_norm": 0.9019862413406372, "learning_rate": 6.058764725852173e-06, "loss": 0.0448, "step": 120840 }, { "epoch": 0.9778299215146856, "grad_norm": 0.47753870487213135, "learning_rate": 6.058074630817363e-06, "loss": 0.0204, "step": 120850 }, { "epoch": 0.9779108342098876, "grad_norm": 0.11681803315877914, "learning_rate": 6.05738451468149e-06, "loss": 0.0265, "step": 120860 }, { "epoch": 0.9779917469050894, "grad_norm": 0.46948131918907166, "learning_rate": 6.0566943774583145e-06, "loss": 0.0294, "step": 120870 }, { "epoch": 0.9780726596002913, "grad_norm": 0.15222415328025818, "learning_rate": 6.0560042191616e-06, "loss": 0.024, "step": 120880 }, { "epoch": 0.9781535722954932, "grad_norm": 0.6069643497467041, "learning_rate": 6.05531403980511e-06, "loss": 0.0397, "step": 120890 }, { "epoch": 0.978234484990695, "grad_norm": 0.2772913873195648, "learning_rate": 6.054623839402611e-06, "loss": 0.0299, "step": 120900 }, { "epoch": 0.9783153976858969, "grad_norm": 0.2709035873413086, "learning_rate": 6.053933617967866e-06, "loss": 0.019, "step": 120910 }, { "epoch": 0.9783963103810988, "grad_norm": 0.3505125939846039, "learning_rate": 6.053243375514638e-06, "loss": 0.0231, "step": 120920 }, { "epoch": 0.9784772230763007, "grad_norm": 0.6038029789924622, "learning_rate": 6.052553112056695e-06, "loss": 0.0277, "step": 120930 }, { "epoch": 0.9785581357715025, "grad_norm": 0.36768460273742676, "learning_rate": 6.051862827607804e-06, "loss": 0.0249, "step": 120940 }, { "epoch": 0.9786390484667045, "grad_norm": 0.45332619547843933, "learning_rate": 6.051172522181727e-06, "loss": 0.0355, "step": 120950 }, { "epoch": 0.9787199611619063, "grad_norm": 0.868343710899353, "learning_rate": 6.050482195792235e-06, "loss": 0.0261, "step": 120960 }, { "epoch": 0.9788008738571082, "grad_norm": 0.34964513778686523, "learning_rate": 6.0497918484530925e-06, "loss": 0.0296, "step": 120970 }, { "epoch": 0.9788817865523101, "grad_norm": 0.5678244233131409, "learning_rate": 6.049101480178068e-06, "loss": 0.0286, "step": 120980 }, { "epoch": 0.9789626992475119, "grad_norm": 0.19675004482269287, "learning_rate": 6.048411090980929e-06, "loss": 0.0267, "step": 120990 }, { "epoch": 0.9790436119427138, "grad_norm": 0.23480580747127533, "learning_rate": 6.047720680875443e-06, "loss": 0.0196, "step": 121000 }, { "epoch": 0.9791245246379157, "grad_norm": 0.29228875041007996, "learning_rate": 6.047030249875383e-06, "loss": 0.0201, "step": 121010 }, { "epoch": 0.9792054373331176, "grad_norm": 0.11650720238685608, "learning_rate": 6.046339797994513e-06, "loss": 0.0268, "step": 121020 }, { "epoch": 0.9792863500283194, "grad_norm": 0.3645036518573761, "learning_rate": 6.045649325246603e-06, "loss": 0.0259, "step": 121030 }, { "epoch": 0.9793672627235214, "grad_norm": 0.3578357994556427, "learning_rate": 6.044958831645426e-06, "loss": 0.0283, "step": 121040 }, { "epoch": 0.9794481754187232, "grad_norm": 0.25193944573402405, "learning_rate": 6.044268317204753e-06, "loss": 0.0198, "step": 121050 }, { "epoch": 0.979529088113925, "grad_norm": 0.433132141828537, "learning_rate": 6.043577781938351e-06, "loss": 0.0204, "step": 121060 }, { "epoch": 0.979610000809127, "grad_norm": 0.18731775879859924, "learning_rate": 6.042887225859993e-06, "loss": 0.014, "step": 121070 }, { "epoch": 0.9796909135043288, "grad_norm": 0.7482723593711853, "learning_rate": 6.042196648983453e-06, "loss": 0.0261, "step": 121080 }, { "epoch": 0.9797718261995307, "grad_norm": 0.42631351947784424, "learning_rate": 6.041506051322499e-06, "loss": 0.0211, "step": 121090 }, { "epoch": 0.9798527388947326, "grad_norm": 0.5067154169082642, "learning_rate": 6.040815432890907e-06, "loss": 0.0313, "step": 121100 }, { "epoch": 0.9799336515899345, "grad_norm": 0.41677340865135193, "learning_rate": 6.0401247937024466e-06, "loss": 0.0319, "step": 121110 }, { "epoch": 0.9800145642851363, "grad_norm": 0.563428521156311, "learning_rate": 6.039434133770895e-06, "loss": 0.0248, "step": 121120 }, { "epoch": 0.9800954769803382, "grad_norm": 0.5519830584526062, "learning_rate": 6.038743453110023e-06, "loss": 0.0288, "step": 121130 }, { "epoch": 0.9801763896755401, "grad_norm": 0.4294428825378418, "learning_rate": 6.038052751733604e-06, "loss": 0.0213, "step": 121140 }, { "epoch": 0.9802573023707419, "grad_norm": 0.48253288865089417, "learning_rate": 6.037362029655417e-06, "loss": 0.0348, "step": 121150 }, { "epoch": 0.9803382150659439, "grad_norm": 0.20538140833377838, "learning_rate": 6.036671286889233e-06, "loss": 0.0188, "step": 121160 }, { "epoch": 0.9804191277611457, "grad_norm": 0.4709146320819855, "learning_rate": 6.035980523448829e-06, "loss": 0.0264, "step": 121170 }, { "epoch": 0.9805000404563476, "grad_norm": 0.1912272423505783, "learning_rate": 6.03528973934798e-06, "loss": 0.0239, "step": 121180 }, { "epoch": 0.9805809531515495, "grad_norm": 0.4919400215148926, "learning_rate": 6.034598934600464e-06, "loss": 0.024, "step": 121190 }, { "epoch": 0.9806618658467513, "grad_norm": 0.07973724603652954, "learning_rate": 6.033908109220055e-06, "loss": 0.0288, "step": 121200 }, { "epoch": 0.9807427785419532, "grad_norm": 0.3049072325229645, "learning_rate": 6.033217263220532e-06, "loss": 0.0226, "step": 121210 }, { "epoch": 0.9808236912371551, "grad_norm": 0.4272693991661072, "learning_rate": 6.032526396615671e-06, "loss": 0.0201, "step": 121220 }, { "epoch": 0.980904603932357, "grad_norm": 0.27533698081970215, "learning_rate": 6.031835509419254e-06, "loss": 0.023, "step": 121230 }, { "epoch": 0.9809855166275588, "grad_norm": 0.435669481754303, "learning_rate": 6.031144601645054e-06, "loss": 0.0319, "step": 121240 }, { "epoch": 0.9810664293227608, "grad_norm": 0.34031155705451965, "learning_rate": 6.03045367330685e-06, "loss": 0.0219, "step": 121250 }, { "epoch": 0.9811473420179626, "grad_norm": 0.7338642477989197, "learning_rate": 6.029762724418424e-06, "loss": 0.0426, "step": 121260 }, { "epoch": 0.9812282547131645, "grad_norm": 0.4414200484752655, "learning_rate": 6.0290717549935555e-06, "loss": 0.0214, "step": 121270 }, { "epoch": 0.9813091674083664, "grad_norm": 0.3055782616138458, "learning_rate": 6.028380765046022e-06, "loss": 0.0321, "step": 121280 }, { "epoch": 0.9813900801035682, "grad_norm": 0.22748692333698273, "learning_rate": 6.027689754589606e-06, "loss": 0.0267, "step": 121290 }, { "epoch": 0.9814709927987701, "grad_norm": 0.19305461645126343, "learning_rate": 6.026998723638087e-06, "loss": 0.0276, "step": 121300 }, { "epoch": 0.981551905493972, "grad_norm": 0.6248748302459717, "learning_rate": 6.026307672205247e-06, "loss": 0.0262, "step": 121310 }, { "epoch": 0.9816328181891739, "grad_norm": 0.20460335910320282, "learning_rate": 6.025616600304866e-06, "loss": 0.0254, "step": 121320 }, { "epoch": 0.9817137308843757, "grad_norm": 0.14115966856479645, "learning_rate": 6.0249255079507276e-06, "loss": 0.0186, "step": 121330 }, { "epoch": 0.9817946435795777, "grad_norm": 0.47022944688796997, "learning_rate": 6.024234395156614e-06, "loss": 0.0275, "step": 121340 }, { "epoch": 0.9818755562747795, "grad_norm": 0.3527242839336395, "learning_rate": 6.023543261936308e-06, "loss": 0.0332, "step": 121350 }, { "epoch": 0.9819564689699813, "grad_norm": 0.4625534415245056, "learning_rate": 6.022852108303592e-06, "loss": 0.0259, "step": 121360 }, { "epoch": 0.9820373816651833, "grad_norm": 0.2162131369113922, "learning_rate": 6.02216093427225e-06, "loss": 0.0338, "step": 121370 }, { "epoch": 0.9821182943603851, "grad_norm": 0.1410539448261261, "learning_rate": 6.021469739856066e-06, "loss": 0.0244, "step": 121380 }, { "epoch": 0.982199207055587, "grad_norm": 0.4872356951236725, "learning_rate": 6.020778525068825e-06, "loss": 0.0334, "step": 121390 }, { "epoch": 0.9822801197507889, "grad_norm": 0.1783919334411621, "learning_rate": 6.020087289924311e-06, "loss": 0.041, "step": 121400 }, { "epoch": 0.9823610324459908, "grad_norm": 0.3109161853790283, "learning_rate": 6.019396034436308e-06, "loss": 0.0267, "step": 121410 }, { "epoch": 0.9824419451411927, "grad_norm": 0.5636948347091675, "learning_rate": 6.018704758618605e-06, "loss": 0.0343, "step": 121420 }, { "epoch": 0.9825228578363945, "grad_norm": 0.4019421637058258, "learning_rate": 6.018013462484986e-06, "loss": 0.0288, "step": 121430 }, { "epoch": 0.9826037705315964, "grad_norm": 0.4266161620616913, "learning_rate": 6.017322146049237e-06, "loss": 0.0386, "step": 121440 }, { "epoch": 0.9826846832267983, "grad_norm": 0.743483304977417, "learning_rate": 6.016630809325146e-06, "loss": 0.0308, "step": 121450 }, { "epoch": 0.9827655959220002, "grad_norm": 0.03348236158490181, "learning_rate": 6.015939452326499e-06, "loss": 0.0148, "step": 121460 }, { "epoch": 0.982846508617202, "grad_norm": 0.5137221217155457, "learning_rate": 6.015248075067085e-06, "loss": 0.0356, "step": 121470 }, { "epoch": 0.982927421312404, "grad_norm": 0.6779260039329529, "learning_rate": 6.014556677560691e-06, "loss": 0.0519, "step": 121480 }, { "epoch": 0.9830083340076058, "grad_norm": 0.8360458612442017, "learning_rate": 6.0138652598211065e-06, "loss": 0.0311, "step": 121490 }, { "epoch": 0.9830892467028076, "grad_norm": 0.6999005079269409, "learning_rate": 6.0131738218621195e-06, "loss": 0.0256, "step": 121500 }, { "epoch": 0.9831701593980096, "grad_norm": 0.29288944602012634, "learning_rate": 6.012482363697519e-06, "loss": 0.0267, "step": 121510 }, { "epoch": 0.9832510720932114, "grad_norm": 0.49644574522972107, "learning_rate": 6.0117908853410956e-06, "loss": 0.0276, "step": 121520 }, { "epoch": 0.9833319847884133, "grad_norm": 0.18150126934051514, "learning_rate": 6.011099386806639e-06, "loss": 0.0246, "step": 121530 }, { "epoch": 0.9834128974836152, "grad_norm": 0.5123111605644226, "learning_rate": 6.010407868107939e-06, "loss": 0.0317, "step": 121540 }, { "epoch": 0.9834938101788171, "grad_norm": 0.2015339881181717, "learning_rate": 6.009716329258788e-06, "loss": 0.0188, "step": 121550 }, { "epoch": 0.9835747228740189, "grad_norm": 0.2835295796394348, "learning_rate": 6.009024770272976e-06, "loss": 0.0336, "step": 121560 }, { "epoch": 0.9836556355692209, "grad_norm": 0.3054146468639374, "learning_rate": 6.008333191164295e-06, "loss": 0.0274, "step": 121570 }, { "epoch": 0.9837365482644227, "grad_norm": 0.7251073122024536, "learning_rate": 6.007641591946538e-06, "loss": 0.0367, "step": 121580 }, { "epoch": 0.9838174609596245, "grad_norm": 0.26696741580963135, "learning_rate": 6.006949972633496e-06, "loss": 0.0296, "step": 121590 }, { "epoch": 0.9838983736548265, "grad_norm": 0.4350050687789917, "learning_rate": 6.006258333238963e-06, "loss": 0.0269, "step": 121600 }, { "epoch": 0.9839792863500283, "grad_norm": 0.5670273303985596, "learning_rate": 6.005566673776731e-06, "loss": 0.0296, "step": 121610 }, { "epoch": 0.9840601990452302, "grad_norm": 0.44132938981056213, "learning_rate": 6.004874994260594e-06, "loss": 0.0324, "step": 121620 }, { "epoch": 0.9841411117404321, "grad_norm": 0.45822617411613464, "learning_rate": 6.004183294704349e-06, "loss": 0.0357, "step": 121630 }, { "epoch": 0.984222024435634, "grad_norm": 0.3151506781578064, "learning_rate": 6.003491575121787e-06, "loss": 0.0494, "step": 121640 }, { "epoch": 0.9843029371308358, "grad_norm": 0.29261699318885803, "learning_rate": 6.002799835526704e-06, "loss": 0.03, "step": 121650 }, { "epoch": 0.9843838498260377, "grad_norm": 0.5700427889823914, "learning_rate": 6.002108075932894e-06, "loss": 0.0274, "step": 121660 }, { "epoch": 0.9844647625212396, "grad_norm": 0.19839143753051758, "learning_rate": 6.001416296354156e-06, "loss": 0.0165, "step": 121670 }, { "epoch": 0.9845456752164414, "grad_norm": 0.3555707335472107, "learning_rate": 6.000724496804283e-06, "loss": 0.03, "step": 121680 }, { "epoch": 0.9846265879116434, "grad_norm": 0.37257084250450134, "learning_rate": 6.000032677297073e-06, "loss": 0.0257, "step": 121690 }, { "epoch": 0.9847075006068452, "grad_norm": 0.30304887890815735, "learning_rate": 5.9993408378463216e-06, "loss": 0.0306, "step": 121700 }, { "epoch": 0.9847884133020471, "grad_norm": 0.47869259119033813, "learning_rate": 5.9986489784658275e-06, "loss": 0.0253, "step": 121710 }, { "epoch": 0.984869325997249, "grad_norm": 0.5138556957244873, "learning_rate": 5.997957099169388e-06, "loss": 0.0474, "step": 121720 }, { "epoch": 0.9849502386924508, "grad_norm": 0.22788161039352417, "learning_rate": 5.9972651999707995e-06, "loss": 0.0209, "step": 121730 }, { "epoch": 0.9850311513876527, "grad_norm": 0.9499151110649109, "learning_rate": 5.996573280883863e-06, "loss": 0.0367, "step": 121740 }, { "epoch": 0.9851120640828546, "grad_norm": 0.5218007564544678, "learning_rate": 5.995881341922377e-06, "loss": 0.0313, "step": 121750 }, { "epoch": 0.9851929767780565, "grad_norm": 0.014122528955340385, "learning_rate": 5.995189383100139e-06, "loss": 0.0312, "step": 121760 }, { "epoch": 0.9852738894732583, "grad_norm": 1.025101900100708, "learning_rate": 5.99449740443095e-06, "loss": 0.0405, "step": 121770 }, { "epoch": 0.9853548021684603, "grad_norm": 0.3513593375682831, "learning_rate": 5.99380540592861e-06, "loss": 0.0135, "step": 121780 }, { "epoch": 0.9854357148636621, "grad_norm": 0.49550527334213257, "learning_rate": 5.993113387606919e-06, "loss": 0.0238, "step": 121790 }, { "epoch": 0.9855166275588639, "grad_norm": 0.07915312796831131, "learning_rate": 5.992421349479678e-06, "loss": 0.0327, "step": 121800 }, { "epoch": 0.9855975402540659, "grad_norm": 0.3506109416484833, "learning_rate": 5.991729291560687e-06, "loss": 0.0357, "step": 121810 }, { "epoch": 0.9856784529492677, "grad_norm": 0.6374923586845398, "learning_rate": 5.991037213863752e-06, "loss": 0.0316, "step": 121820 }, { "epoch": 0.9857593656444696, "grad_norm": 0.266776442527771, "learning_rate": 5.990345116402669e-06, "loss": 0.0209, "step": 121830 }, { "epoch": 0.9858402783396715, "grad_norm": 0.6387210488319397, "learning_rate": 5.989652999191245e-06, "loss": 0.0225, "step": 121840 }, { "epoch": 0.9859211910348734, "grad_norm": 0.38381147384643555, "learning_rate": 5.988960862243279e-06, "loss": 0.036, "step": 121850 }, { "epoch": 0.9860021037300752, "grad_norm": 0.2939307391643524, "learning_rate": 5.988268705572578e-06, "loss": 0.0139, "step": 121860 }, { "epoch": 0.9860830164252772, "grad_norm": 0.23116150498390198, "learning_rate": 5.987576529192945e-06, "loss": 0.0283, "step": 121870 }, { "epoch": 0.986163929120479, "grad_norm": 0.549940288066864, "learning_rate": 5.986884333118182e-06, "loss": 0.0618, "step": 121880 }, { "epoch": 0.9862448418156808, "grad_norm": 0.20665879547595978, "learning_rate": 5.986192117362093e-06, "loss": 0.0192, "step": 121890 }, { "epoch": 0.9863257545108828, "grad_norm": 0.6377382278442383, "learning_rate": 5.985499881938487e-06, "loss": 0.0362, "step": 121900 }, { "epoch": 0.9864066672060846, "grad_norm": 0.4527864158153534, "learning_rate": 5.984807626861165e-06, "loss": 0.0258, "step": 121910 }, { "epoch": 0.9864875799012865, "grad_norm": 0.32119596004486084, "learning_rate": 5.9841153521439336e-06, "loss": 0.0364, "step": 121920 }, { "epoch": 0.9865684925964884, "grad_norm": 0.49446937441825867, "learning_rate": 5.983423057800599e-06, "loss": 0.0327, "step": 121930 }, { "epoch": 0.9866494052916903, "grad_norm": 0.24890607595443726, "learning_rate": 5.982730743844969e-06, "loss": 0.0281, "step": 121940 }, { "epoch": 0.9867303179868921, "grad_norm": 0.3390759825706482, "learning_rate": 5.982038410290847e-06, "loss": 0.0295, "step": 121950 }, { "epoch": 0.986811230682094, "grad_norm": 0.6124514937400818, "learning_rate": 5.981346057152042e-06, "loss": 0.0284, "step": 121960 }, { "epoch": 0.9868921433772959, "grad_norm": 0.49721580743789673, "learning_rate": 5.980653684442363e-06, "loss": 0.0281, "step": 121970 }, { "epoch": 0.9869730560724977, "grad_norm": 0.6339683532714844, "learning_rate": 5.979961292175616e-06, "loss": 0.0258, "step": 121980 }, { "epoch": 0.9870539687676997, "grad_norm": 0.48528486490249634, "learning_rate": 5.979268880365609e-06, "loss": 0.0381, "step": 121990 }, { "epoch": 0.9871348814629015, "grad_norm": 0.32962456345558167, "learning_rate": 5.978576449026152e-06, "loss": 0.0214, "step": 122000 }, { "epoch": 0.9871348814629015, "eval_loss": 0.02508345991373062, "eval_runtime": 3.832, "eval_samples_per_second": 52.192, "eval_steps_per_second": 26.096, "step": 122000 }, { "epoch": 0.9872157941581035, "grad_norm": 0.3098955452442169, "learning_rate": 5.9778839981710555e-06, "loss": 0.0277, "step": 122010 }, { "epoch": 0.9872967068533053, "grad_norm": 0.3148452937602997, "learning_rate": 5.977191527814125e-06, "loss": 0.021, "step": 122020 }, { "epoch": 0.9873776195485071, "grad_norm": 0.23598870635032654, "learning_rate": 5.976499037969173e-06, "loss": 0.0242, "step": 122030 }, { "epoch": 0.987458532243709, "grad_norm": 0.4022393226623535, "learning_rate": 5.975806528650009e-06, "loss": 0.0191, "step": 122040 }, { "epoch": 0.9875394449389109, "grad_norm": 0.2798384428024292, "learning_rate": 5.975113999870445e-06, "loss": 0.0179, "step": 122050 }, { "epoch": 0.9876203576341128, "grad_norm": 0.2667788565158844, "learning_rate": 5.974421451644289e-06, "loss": 0.037, "step": 122060 }, { "epoch": 0.9877012703293147, "grad_norm": 0.3643583655357361, "learning_rate": 5.973728883985355e-06, "loss": 0.0242, "step": 122070 }, { "epoch": 0.9877821830245166, "grad_norm": 0.3510759770870209, "learning_rate": 5.973036296907454e-06, "loss": 0.0277, "step": 122080 }, { "epoch": 0.9878630957197184, "grad_norm": 0.4352872371673584, "learning_rate": 5.9723436904243985e-06, "loss": 0.0318, "step": 122090 }, { "epoch": 0.9879440084149202, "grad_norm": 0.7276754379272461, "learning_rate": 5.97165106455e-06, "loss": 0.024, "step": 122100 }, { "epoch": 0.9880249211101222, "grad_norm": 0.24429450929164886, "learning_rate": 5.970958419298072e-06, "loss": 0.0286, "step": 122110 }, { "epoch": 0.988105833805324, "grad_norm": 0.39251843094825745, "learning_rate": 5.97026575468243e-06, "loss": 0.0152, "step": 122120 }, { "epoch": 0.988186746500526, "grad_norm": 0.31664562225341797, "learning_rate": 5.969573070716885e-06, "loss": 0.0209, "step": 122130 }, { "epoch": 0.9882676591957278, "grad_norm": 0.5922419428825378, "learning_rate": 5.9688803674152505e-06, "loss": 0.0209, "step": 122140 }, { "epoch": 0.9883485718909297, "grad_norm": 0.27573123574256897, "learning_rate": 5.9681876447913434e-06, "loss": 0.0255, "step": 122150 }, { "epoch": 0.9884294845861316, "grad_norm": 0.1877116560935974, "learning_rate": 5.967494902858979e-06, "loss": 0.0242, "step": 122160 }, { "epoch": 0.9885103972813334, "grad_norm": 0.2899636924266815, "learning_rate": 5.96680214163197e-06, "loss": 0.0332, "step": 122170 }, { "epoch": 0.9885913099765353, "grad_norm": 0.6408886313438416, "learning_rate": 5.966109361124134e-06, "loss": 0.02, "step": 122180 }, { "epoch": 0.9886722226717372, "grad_norm": 0.5700345635414124, "learning_rate": 5.965416561349285e-06, "loss": 0.0339, "step": 122190 }, { "epoch": 0.9887531353669391, "grad_norm": 0.23486967384815216, "learning_rate": 5.964723742321242e-06, "loss": 0.0284, "step": 122200 }, { "epoch": 0.9888340480621409, "grad_norm": 1.209540605545044, "learning_rate": 5.96403090405382e-06, "loss": 0.0289, "step": 122210 }, { "epoch": 0.9889149607573429, "grad_norm": 0.4905691146850586, "learning_rate": 5.963338046560836e-06, "loss": 0.0234, "step": 122220 }, { "epoch": 0.9889958734525447, "grad_norm": 0.34758415818214417, "learning_rate": 5.962645169856111e-06, "loss": 0.0305, "step": 122230 }, { "epoch": 0.9890767861477466, "grad_norm": 0.04109979420900345, "learning_rate": 5.961952273953458e-06, "loss": 0.0137, "step": 122240 }, { "epoch": 0.9891576988429485, "grad_norm": 0.2418995052576065, "learning_rate": 5.961259358866698e-06, "loss": 0.0248, "step": 122250 }, { "epoch": 0.9892386115381503, "grad_norm": 0.2672691345214844, "learning_rate": 5.96056642460965e-06, "loss": 0.0315, "step": 122260 }, { "epoch": 0.9893195242333522, "grad_norm": 0.2446783035993576, "learning_rate": 5.959873471196132e-06, "loss": 0.0293, "step": 122270 }, { "epoch": 0.9894004369285541, "grad_norm": 0.3492155969142914, "learning_rate": 5.959180498639964e-06, "loss": 0.0401, "step": 122280 }, { "epoch": 0.989481349623756, "grad_norm": 0.29036998748779297, "learning_rate": 5.958487506954966e-06, "loss": 0.0396, "step": 122290 }, { "epoch": 0.9895622623189578, "grad_norm": 0.3380427956581116, "learning_rate": 5.957794496154958e-06, "loss": 0.031, "step": 122300 }, { "epoch": 0.9896431750141598, "grad_norm": 0.8149664998054504, "learning_rate": 5.957101466253762e-06, "loss": 0.0354, "step": 122310 }, { "epoch": 0.9897240877093616, "grad_norm": 0.322503924369812, "learning_rate": 5.956408417265196e-06, "loss": 0.0298, "step": 122320 }, { "epoch": 0.9898050004045634, "grad_norm": 0.4229881465435028, "learning_rate": 5.955715349203084e-06, "loss": 0.0232, "step": 122330 }, { "epoch": 0.9898859130997654, "grad_norm": 0.22253760695457458, "learning_rate": 5.955022262081247e-06, "loss": 0.0205, "step": 122340 }, { "epoch": 0.9899668257949672, "grad_norm": 0.5297670960426331, "learning_rate": 5.954329155913506e-06, "loss": 0.0321, "step": 122350 }, { "epoch": 0.9900477384901691, "grad_norm": 0.3364377021789551, "learning_rate": 5.953636030713687e-06, "loss": 0.0341, "step": 122360 }, { "epoch": 0.990128651185371, "grad_norm": 0.4694083631038666, "learning_rate": 5.9529428864956084e-06, "loss": 0.0402, "step": 122370 }, { "epoch": 0.9902095638805729, "grad_norm": 0.22709402441978455, "learning_rate": 5.9522497232730966e-06, "loss": 0.0142, "step": 122380 }, { "epoch": 0.9902904765757747, "grad_norm": 0.2692870795726776, "learning_rate": 5.9515565410599756e-06, "loss": 0.019, "step": 122390 }, { "epoch": 0.9903713892709766, "grad_norm": 0.5971506834030151, "learning_rate": 5.9508633398700665e-06, "loss": 0.0217, "step": 122400 }, { "epoch": 0.9904523019661785, "grad_norm": 0.37893545627593994, "learning_rate": 5.950170119717195e-06, "loss": 0.0415, "step": 122410 }, { "epoch": 0.9905332146613803, "grad_norm": 0.4017837345600128, "learning_rate": 5.9494768806151885e-06, "loss": 0.0261, "step": 122420 }, { "epoch": 0.9906141273565823, "grad_norm": 0.4552858769893646, "learning_rate": 5.948783622577867e-06, "loss": 0.0289, "step": 122430 }, { "epoch": 0.9906950400517841, "grad_norm": 0.25636592507362366, "learning_rate": 5.948090345619061e-06, "loss": 0.0355, "step": 122440 }, { "epoch": 0.990775952746986, "grad_norm": 0.337464302778244, "learning_rate": 5.9473970497525945e-06, "loss": 0.0306, "step": 122450 }, { "epoch": 0.9908568654421879, "grad_norm": 0.24359586834907532, "learning_rate": 5.946703734992294e-06, "loss": 0.0296, "step": 122460 }, { "epoch": 0.9909377781373897, "grad_norm": 0.36870211362838745, "learning_rate": 5.946010401351986e-06, "loss": 0.0225, "step": 122470 }, { "epoch": 0.9910186908325916, "grad_norm": 0.5234124064445496, "learning_rate": 5.945317048845498e-06, "loss": 0.0182, "step": 122480 }, { "epoch": 0.9910996035277935, "grad_norm": 0.25033023953437805, "learning_rate": 5.944623677486656e-06, "loss": 0.0282, "step": 122490 }, { "epoch": 0.9911805162229954, "grad_norm": 0.4692956805229187, "learning_rate": 5.94393028728929e-06, "loss": 0.0362, "step": 122500 }, { "epoch": 0.9912614289181972, "grad_norm": 0.4362461566925049, "learning_rate": 5.943236878267228e-06, "loss": 0.0174, "step": 122510 }, { "epoch": 0.9913423416133992, "grad_norm": 0.21766436100006104, "learning_rate": 5.942543450434295e-06, "loss": 0.0307, "step": 122520 }, { "epoch": 0.991423254308601, "grad_norm": 0.3664482533931732, "learning_rate": 5.941850003804326e-06, "loss": 0.0295, "step": 122530 }, { "epoch": 0.991504167003803, "grad_norm": 0.3295988440513611, "learning_rate": 5.941156538391144e-06, "loss": 0.035, "step": 122540 }, { "epoch": 0.9915850796990048, "grad_norm": 0.5709457993507385, "learning_rate": 5.940463054208583e-06, "loss": 0.0324, "step": 122550 }, { "epoch": 0.9916659923942066, "grad_norm": 0.5454807281494141, "learning_rate": 5.939769551270471e-06, "loss": 0.0222, "step": 122560 }, { "epoch": 0.9917469050894085, "grad_norm": 0.3730475604534149, "learning_rate": 5.93907602959064e-06, "loss": 0.0207, "step": 122570 }, { "epoch": 0.9918278177846104, "grad_norm": 0.4616522789001465, "learning_rate": 5.93838248918292e-06, "loss": 0.0216, "step": 122580 }, { "epoch": 0.9919087304798123, "grad_norm": 0.3658333420753479, "learning_rate": 5.937688930061143e-06, "loss": 0.0314, "step": 122590 }, { "epoch": 0.9919896431750141, "grad_norm": 0.48099052906036377, "learning_rate": 5.936995352239139e-06, "loss": 0.0344, "step": 122600 }, { "epoch": 0.9920705558702161, "grad_norm": 0.48528867959976196, "learning_rate": 5.936301755730741e-06, "loss": 0.0335, "step": 122610 }, { "epoch": 0.9921514685654179, "grad_norm": 0.49834075570106506, "learning_rate": 5.93560814054978e-06, "loss": 0.0222, "step": 122620 }, { "epoch": 0.9922323812606197, "grad_norm": 0.5112107992172241, "learning_rate": 5.934914506710091e-06, "loss": 0.0222, "step": 122630 }, { "epoch": 0.9923132939558217, "grad_norm": 0.5874253511428833, "learning_rate": 5.934220854225507e-06, "loss": 0.0375, "step": 122640 }, { "epoch": 0.9923942066510235, "grad_norm": 0.27851250767707825, "learning_rate": 5.933527183109858e-06, "loss": 0.0215, "step": 122650 }, { "epoch": 0.9924751193462255, "grad_norm": 0.5428466200828552, "learning_rate": 5.93283349337698e-06, "loss": 0.0209, "step": 122660 }, { "epoch": 0.9925560320414273, "grad_norm": 0.44077834486961365, "learning_rate": 5.932139785040709e-06, "loss": 0.0275, "step": 122670 }, { "epoch": 0.9926369447366292, "grad_norm": 0.48466163873672485, "learning_rate": 5.931446058114878e-06, "loss": 0.0284, "step": 122680 }, { "epoch": 0.992717857431831, "grad_norm": 0.32835283875465393, "learning_rate": 5.93075231261332e-06, "loss": 0.0263, "step": 122690 }, { "epoch": 0.9927987701270329, "grad_norm": 0.25135406851768494, "learning_rate": 5.930058548549873e-06, "loss": 0.0292, "step": 122700 }, { "epoch": 0.9928796828222348, "grad_norm": 0.24609370529651642, "learning_rate": 5.929364765938372e-06, "loss": 0.0247, "step": 122710 }, { "epoch": 0.9929605955174367, "grad_norm": 0.44083210825920105, "learning_rate": 5.928670964792653e-06, "loss": 0.0294, "step": 122720 }, { "epoch": 0.9930415082126386, "grad_norm": 0.4787077009677887, "learning_rate": 5.92797714512655e-06, "loss": 0.0268, "step": 122730 }, { "epoch": 0.9931224209078404, "grad_norm": 0.5838300585746765, "learning_rate": 5.927283306953904e-06, "loss": 0.0312, "step": 122740 }, { "epoch": 0.9932033336030424, "grad_norm": 0.3312002122402191, "learning_rate": 5.92658945028855e-06, "loss": 0.0157, "step": 122750 }, { "epoch": 0.9932842462982442, "grad_norm": 0.3453129529953003, "learning_rate": 5.9258955751443234e-06, "loss": 0.0309, "step": 122760 }, { "epoch": 0.993365158993446, "grad_norm": 0.4030348062515259, "learning_rate": 5.925201681535067e-06, "loss": 0.0234, "step": 122770 }, { "epoch": 0.993446071688648, "grad_norm": 0.40514135360717773, "learning_rate": 5.924507769474614e-06, "loss": 0.0378, "step": 122780 }, { "epoch": 0.9935269843838498, "grad_norm": 0.31632283329963684, "learning_rate": 5.923813838976806e-06, "loss": 0.0288, "step": 122790 }, { "epoch": 0.9936078970790517, "grad_norm": 0.35282963514328003, "learning_rate": 5.923119890055482e-06, "loss": 0.0186, "step": 122800 }, { "epoch": 0.9936888097742536, "grad_norm": 0.6647527813911438, "learning_rate": 5.922425922724479e-06, "loss": 0.0352, "step": 122810 }, { "epoch": 0.9937697224694555, "grad_norm": 0.4016115367412567, "learning_rate": 5.9217319369976415e-06, "loss": 0.0237, "step": 122820 }, { "epoch": 0.9938506351646573, "grad_norm": 0.3787563741207123, "learning_rate": 5.921037932888803e-06, "loss": 0.0156, "step": 122830 }, { "epoch": 0.9939315478598593, "grad_norm": 0.38760408759117126, "learning_rate": 5.920343910411808e-06, "loss": 0.0257, "step": 122840 }, { "epoch": 0.9940124605550611, "grad_norm": 0.12240800261497498, "learning_rate": 5.9196498695804975e-06, "loss": 0.0283, "step": 122850 }, { "epoch": 0.9940933732502629, "grad_norm": 0.251912921667099, "learning_rate": 5.918955810408713e-06, "loss": 0.0327, "step": 122860 }, { "epoch": 0.9941742859454649, "grad_norm": 0.4070022702217102, "learning_rate": 5.918261732910292e-06, "loss": 0.0156, "step": 122870 }, { "epoch": 0.9942551986406667, "grad_norm": 0.4645324945449829, "learning_rate": 5.917567637099081e-06, "loss": 0.0246, "step": 122880 }, { "epoch": 0.9943361113358686, "grad_norm": 0.3017638921737671, "learning_rate": 5.91687352298892e-06, "loss": 0.0353, "step": 122890 }, { "epoch": 0.9944170240310705, "grad_norm": 0.1674114465713501, "learning_rate": 5.9161793905936526e-06, "loss": 0.0282, "step": 122900 }, { "epoch": 0.9944979367262724, "grad_norm": 0.43861302733421326, "learning_rate": 5.9154852399271215e-06, "loss": 0.021, "step": 122910 }, { "epoch": 0.9945788494214742, "grad_norm": 0.3356949985027313, "learning_rate": 5.914791071003168e-06, "loss": 0.0208, "step": 122920 }, { "epoch": 0.9946597621166761, "grad_norm": 0.15214984118938446, "learning_rate": 5.914096883835639e-06, "loss": 0.0276, "step": 122930 }, { "epoch": 0.994740674811878, "grad_norm": 0.3504437208175659, "learning_rate": 5.913402678438379e-06, "loss": 0.0243, "step": 122940 }, { "epoch": 0.9948215875070798, "grad_norm": 0.2266930639743805, "learning_rate": 5.912708454825229e-06, "loss": 0.0216, "step": 122950 }, { "epoch": 0.9949025002022818, "grad_norm": 0.2519701421260834, "learning_rate": 5.912014213010037e-06, "loss": 0.0259, "step": 122960 }, { "epoch": 0.9949834128974836, "grad_norm": 0.375564306974411, "learning_rate": 5.911319953006645e-06, "loss": 0.0244, "step": 122970 }, { "epoch": 0.9950643255926855, "grad_norm": 0.3060637414455414, "learning_rate": 5.910625674828903e-06, "loss": 0.0214, "step": 122980 }, { "epoch": 0.9951452382878874, "grad_norm": 0.5260807275772095, "learning_rate": 5.909931378490653e-06, "loss": 0.0254, "step": 122990 }, { "epoch": 0.9952261509830892, "grad_norm": 0.20086291432380676, "learning_rate": 5.909237064005742e-06, "loss": 0.0219, "step": 123000 }, { "epoch": 0.9953070636782911, "grad_norm": 0.3543337285518646, "learning_rate": 5.9085427313880185e-06, "loss": 0.0167, "step": 123010 }, { "epoch": 0.995387976373493, "grad_norm": 0.4546210765838623, "learning_rate": 5.907848380651328e-06, "loss": 0.0266, "step": 123020 }, { "epoch": 0.9954688890686949, "grad_norm": 0.3607065677642822, "learning_rate": 5.907154011809516e-06, "loss": 0.0258, "step": 123030 }, { "epoch": 0.9955498017638967, "grad_norm": 0.11063513904809952, "learning_rate": 5.906459624876434e-06, "loss": 0.0303, "step": 123040 }, { "epoch": 0.9956307144590987, "grad_norm": 0.4235687553882599, "learning_rate": 5.90576521986593e-06, "loss": 0.0285, "step": 123050 }, { "epoch": 0.9957116271543005, "grad_norm": 0.4310131072998047, "learning_rate": 5.905070796791848e-06, "loss": 0.0261, "step": 123060 }, { "epoch": 0.9957925398495023, "grad_norm": 0.6756704449653625, "learning_rate": 5.904376355668041e-06, "loss": 0.0289, "step": 123070 }, { "epoch": 0.9958734525447043, "grad_norm": 0.36603817343711853, "learning_rate": 5.9036818965083564e-06, "loss": 0.0289, "step": 123080 }, { "epoch": 0.9959543652399061, "grad_norm": 0.33042705059051514, "learning_rate": 5.902987419326644e-06, "loss": 0.0249, "step": 123090 }, { "epoch": 0.996035277935108, "grad_norm": 0.3892121911048889, "learning_rate": 5.9022929241367556e-06, "loss": 0.0128, "step": 123100 }, { "epoch": 0.9961161906303099, "grad_norm": 0.3734445571899414, "learning_rate": 5.901598410952537e-06, "loss": 0.0289, "step": 123110 }, { "epoch": 0.9961971033255118, "grad_norm": 0.5830621123313904, "learning_rate": 5.900903879787844e-06, "loss": 0.0234, "step": 123120 }, { "epoch": 0.9962780160207136, "grad_norm": 0.5718761682510376, "learning_rate": 5.900209330656523e-06, "loss": 0.0267, "step": 123130 }, { "epoch": 0.9963589287159156, "grad_norm": 0.6315025091171265, "learning_rate": 5.899514763572428e-06, "loss": 0.0343, "step": 123140 }, { "epoch": 0.9964398414111174, "grad_norm": 0.21610255539417267, "learning_rate": 5.8988201785494104e-06, "loss": 0.0293, "step": 123150 }, { "epoch": 0.9965207541063192, "grad_norm": 0.27118152379989624, "learning_rate": 5.898125575601321e-06, "loss": 0.0209, "step": 123160 }, { "epoch": 0.9966016668015212, "grad_norm": 0.5924343466758728, "learning_rate": 5.8974309547420135e-06, "loss": 0.0227, "step": 123170 }, { "epoch": 0.996682579496723, "grad_norm": 0.29515811800956726, "learning_rate": 5.896736315985341e-06, "loss": 0.0301, "step": 123180 }, { "epoch": 0.996763492191925, "grad_norm": 0.4772202670574188, "learning_rate": 5.8960416593451545e-06, "loss": 0.0292, "step": 123190 }, { "epoch": 0.9968444048871268, "grad_norm": 0.6137338876724243, "learning_rate": 5.8953469848353095e-06, "loss": 0.0335, "step": 123200 }, { "epoch": 0.9969253175823287, "grad_norm": 0.47640788555145264, "learning_rate": 5.894652292469659e-06, "loss": 0.0306, "step": 123210 }, { "epoch": 0.9970062302775305, "grad_norm": 0.4509849548339844, "learning_rate": 5.893957582262055e-06, "loss": 0.0296, "step": 123220 }, { "epoch": 0.9970871429727324, "grad_norm": 0.4353901743888855, "learning_rate": 5.893262854226358e-06, "loss": 0.0241, "step": 123230 }, { "epoch": 0.9971680556679343, "grad_norm": 0.06951820850372314, "learning_rate": 5.892568108376417e-06, "loss": 0.0233, "step": 123240 }, { "epoch": 0.9972489683631361, "grad_norm": 0.26195940375328064, "learning_rate": 5.891873344726089e-06, "loss": 0.0287, "step": 123250 }, { "epoch": 0.9973298810583381, "grad_norm": 0.12481486052274704, "learning_rate": 5.891178563289231e-06, "loss": 0.0144, "step": 123260 }, { "epoch": 0.9974107937535399, "grad_norm": 0.45711684226989746, "learning_rate": 5.890483764079697e-06, "loss": 0.0309, "step": 123270 }, { "epoch": 0.9974917064487419, "grad_norm": 0.32735249400138855, "learning_rate": 5.889788947111346e-06, "loss": 0.0364, "step": 123280 }, { "epoch": 0.9975726191439437, "grad_norm": 0.4169866144657135, "learning_rate": 5.889094112398032e-06, "loss": 0.024, "step": 123290 }, { "epoch": 0.9976535318391455, "grad_norm": 0.21339377760887146, "learning_rate": 5.888399259953612e-06, "loss": 0.0363, "step": 123300 }, { "epoch": 0.9977344445343475, "grad_norm": 0.3308395445346832, "learning_rate": 5.8877043897919454e-06, "loss": 0.0281, "step": 123310 }, { "epoch": 0.9978153572295493, "grad_norm": 0.18071144819259644, "learning_rate": 5.887009501926888e-06, "loss": 0.0359, "step": 123320 }, { "epoch": 0.9978962699247512, "grad_norm": 0.38973742723464966, "learning_rate": 5.886314596372298e-06, "loss": 0.0269, "step": 123330 }, { "epoch": 0.997977182619953, "grad_norm": 0.23085229098796844, "learning_rate": 5.885619673142037e-06, "loss": 0.0267, "step": 123340 }, { "epoch": 0.998058095315155, "grad_norm": 0.4923273026943207, "learning_rate": 5.884924732249958e-06, "loss": 0.0231, "step": 123350 }, { "epoch": 0.9981390080103568, "grad_norm": 0.14425380527973175, "learning_rate": 5.884229773709925e-06, "loss": 0.0213, "step": 123360 }, { "epoch": 0.9982199207055586, "grad_norm": 0.30802565813064575, "learning_rate": 5.883534797535796e-06, "loss": 0.0299, "step": 123370 }, { "epoch": 0.9983008334007606, "grad_norm": 0.7069463729858398, "learning_rate": 5.88283980374143e-06, "loss": 0.0274, "step": 123380 }, { "epoch": 0.9983817460959624, "grad_norm": 0.45255640149116516, "learning_rate": 5.882144792340689e-06, "loss": 0.027, "step": 123390 }, { "epoch": 0.9984626587911644, "grad_norm": 0.6152719259262085, "learning_rate": 5.881449763347432e-06, "loss": 0.0432, "step": 123400 }, { "epoch": 0.9985435714863662, "grad_norm": 0.19896867871284485, "learning_rate": 5.88075471677552e-06, "loss": 0.026, "step": 123410 }, { "epoch": 0.9986244841815681, "grad_norm": 0.23637863993644714, "learning_rate": 5.880059652638814e-06, "loss": 0.0237, "step": 123420 }, { "epoch": 0.99870539687677, "grad_norm": 0.5169551372528076, "learning_rate": 5.879364570951176e-06, "loss": 0.0323, "step": 123430 }, { "epoch": 0.9987863095719719, "grad_norm": 0.28268712759017944, "learning_rate": 5.878669471726469e-06, "loss": 0.0259, "step": 123440 }, { "epoch": 0.9988672222671737, "grad_norm": 0.3642747402191162, "learning_rate": 5.877974354978555e-06, "loss": 0.0326, "step": 123450 }, { "epoch": 0.9989481349623756, "grad_norm": 0.37034761905670166, "learning_rate": 5.877279220721294e-06, "loss": 0.0325, "step": 123460 }, { "epoch": 0.9990290476575775, "grad_norm": 0.44517782330513, "learning_rate": 5.8765840689685525e-06, "loss": 0.0224, "step": 123470 }, { "epoch": 0.9991099603527793, "grad_norm": 0.5139932632446289, "learning_rate": 5.8758888997341925e-06, "loss": 0.031, "step": 123480 }, { "epoch": 0.9991908730479813, "grad_norm": 0.4497836232185364, "learning_rate": 5.875193713032077e-06, "loss": 0.0272, "step": 123490 }, { "epoch": 0.9992717857431831, "grad_norm": 0.21207985281944275, "learning_rate": 5.87449850887607e-06, "loss": 0.0242, "step": 123500 }, { "epoch": 0.999352698438385, "grad_norm": 0.41833728551864624, "learning_rate": 5.873803287280037e-06, "loss": 0.0215, "step": 123510 }, { "epoch": 0.9994336111335869, "grad_norm": 0.6690316796302795, "learning_rate": 5.87310804825784e-06, "loss": 0.0204, "step": 123520 }, { "epoch": 0.9995145238287887, "grad_norm": 0.4759089946746826, "learning_rate": 5.87241279182335e-06, "loss": 0.0185, "step": 123530 }, { "epoch": 0.9995954365239906, "grad_norm": 0.1591448038816452, "learning_rate": 5.871717517990425e-06, "loss": 0.0275, "step": 123540 }, { "epoch": 0.9996763492191925, "grad_norm": 0.8218047618865967, "learning_rate": 5.871022226772936e-06, "loss": 0.0183, "step": 123550 }, { "epoch": 0.9997572619143944, "grad_norm": 0.5728346705436707, "learning_rate": 5.870326918184749e-06, "loss": 0.0251, "step": 123560 }, { "epoch": 0.9998381746095962, "grad_norm": 0.38796740770339966, "learning_rate": 5.869631592239727e-06, "loss": 0.0248, "step": 123570 }, { "epoch": 0.9999190873047982, "grad_norm": 0.21463407576084137, "learning_rate": 5.86893624895174e-06, "loss": 0.0272, "step": 123580 }, { "epoch": 1.0, "grad_norm": 0.5683429837226868, "learning_rate": 5.8682408883346535e-06, "loss": 0.0368, "step": 123590 }, { "epoch": 1.000080912695202, "grad_norm": 0.23890115320682526, "learning_rate": 5.867545510402335e-06, "loss": 0.0236, "step": 123600 }, { "epoch": 1.0001618253904037, "grad_norm": 0.2899535298347473, "learning_rate": 5.866850115168652e-06, "loss": 0.0214, "step": 123610 }, { "epoch": 1.0002427380856056, "grad_norm": 0.6363174915313721, "learning_rate": 5.866154702647473e-06, "loss": 0.0205, "step": 123620 }, { "epoch": 1.0003236507808075, "grad_norm": 0.3802925646305084, "learning_rate": 5.86545927285267e-06, "loss": 0.0369, "step": 123630 }, { "epoch": 1.0004045634760095, "grad_norm": 0.5950984358787537, "learning_rate": 5.864763825798108e-06, "loss": 0.033, "step": 123640 }, { "epoch": 1.0004854761712112, "grad_norm": 0.24176381528377533, "learning_rate": 5.864068361497655e-06, "loss": 0.0233, "step": 123650 }, { "epoch": 1.0005663888664131, "grad_norm": 0.6391503810882568, "learning_rate": 5.863372879965183e-06, "loss": 0.0216, "step": 123660 }, { "epoch": 1.000647301561615, "grad_norm": 0.23489508032798767, "learning_rate": 5.862677381214563e-06, "loss": 0.0237, "step": 123670 }, { "epoch": 1.0007282142568168, "grad_norm": 0.32513943314552307, "learning_rate": 5.861981865259664e-06, "loss": 0.0258, "step": 123680 }, { "epoch": 1.0008091269520187, "grad_norm": 0.19162768125534058, "learning_rate": 5.8612863321143555e-06, "loss": 0.0253, "step": 123690 }, { "epoch": 1.0008900396472207, "grad_norm": 0.21072758734226227, "learning_rate": 5.8605907817925075e-06, "loss": 0.0215, "step": 123700 }, { "epoch": 1.0009709523424226, "grad_norm": 0.2959703803062439, "learning_rate": 5.8598952143079975e-06, "loss": 0.0408, "step": 123710 }, { "epoch": 1.0010518650376243, "grad_norm": 0.3930806517601013, "learning_rate": 5.85919962967469e-06, "loss": 0.0236, "step": 123720 }, { "epoch": 1.0011327777328263, "grad_norm": 0.483081191778183, "learning_rate": 5.85850402790646e-06, "loss": 0.0234, "step": 123730 }, { "epoch": 1.0012136904280282, "grad_norm": 0.36171457171440125, "learning_rate": 5.857808409017179e-06, "loss": 0.026, "step": 123740 }, { "epoch": 1.00129460312323, "grad_norm": 0.20904384553432465, "learning_rate": 5.8571127730207225e-06, "loss": 0.0216, "step": 123750 }, { "epoch": 1.0013755158184319, "grad_norm": 1.2507317066192627, "learning_rate": 5.856417119930959e-06, "loss": 0.0287, "step": 123760 }, { "epoch": 1.0014564285136338, "grad_norm": 0.2857131063938141, "learning_rate": 5.8557214497617646e-06, "loss": 0.0168, "step": 123770 }, { "epoch": 1.0015373412088358, "grad_norm": 0.17968697845935822, "learning_rate": 5.855025762527013e-06, "loss": 0.018, "step": 123780 }, { "epoch": 1.0016182539040375, "grad_norm": 0.20991048216819763, "learning_rate": 5.854330058240577e-06, "loss": 0.0191, "step": 123790 }, { "epoch": 1.0016991665992394, "grad_norm": 0.17681120336055756, "learning_rate": 5.8536343369163315e-06, "loss": 0.0215, "step": 123800 }, { "epoch": 1.0017800792944414, "grad_norm": 0.19662344455718994, "learning_rate": 5.852938598568151e-06, "loss": 0.0211, "step": 123810 }, { "epoch": 1.001860991989643, "grad_norm": 0.7114370465278625, "learning_rate": 5.852242843209912e-06, "loss": 0.0244, "step": 123820 }, { "epoch": 1.001941904684845, "grad_norm": 0.38866469264030457, "learning_rate": 5.851547070855488e-06, "loss": 0.023, "step": 123830 }, { "epoch": 1.002022817380047, "grad_norm": 0.36947736144065857, "learning_rate": 5.850851281518753e-06, "loss": 0.0194, "step": 123840 }, { "epoch": 1.0021037300752489, "grad_norm": 0.6565784811973572, "learning_rate": 5.850155475213588e-06, "loss": 0.0175, "step": 123850 }, { "epoch": 1.0021846427704506, "grad_norm": 0.6697403192520142, "learning_rate": 5.849459651953868e-06, "loss": 0.0249, "step": 123860 }, { "epoch": 1.0022655554656525, "grad_norm": 0.20685873925685883, "learning_rate": 5.848763811753466e-06, "loss": 0.0267, "step": 123870 }, { "epoch": 1.0023464681608545, "grad_norm": 0.13795624673366547, "learning_rate": 5.848067954626263e-06, "loss": 0.0219, "step": 123880 }, { "epoch": 1.0024273808560562, "grad_norm": 0.42073848843574524, "learning_rate": 5.847372080586135e-06, "loss": 0.0199, "step": 123890 }, { "epoch": 1.0025082935512581, "grad_norm": 0.37300366163253784, "learning_rate": 5.846676189646959e-06, "loss": 0.0273, "step": 123900 }, { "epoch": 1.00258920624646, "grad_norm": 0.28832879662513733, "learning_rate": 5.845980281822614e-06, "loss": 0.0296, "step": 123910 }, { "epoch": 1.002670118941662, "grad_norm": 0.4504339098930359, "learning_rate": 5.845284357126977e-06, "loss": 0.0138, "step": 123920 }, { "epoch": 1.0027510316368637, "grad_norm": 0.6220274567604065, "learning_rate": 5.844588415573931e-06, "loss": 0.0321, "step": 123930 }, { "epoch": 1.0028319443320657, "grad_norm": 0.40660741925239563, "learning_rate": 5.84389245717735e-06, "loss": 0.0161, "step": 123940 }, { "epoch": 1.0029128570272676, "grad_norm": 0.7495659589767456, "learning_rate": 5.843196481951115e-06, "loss": 0.0203, "step": 123950 }, { "epoch": 1.0029937697224696, "grad_norm": 0.2013615369796753, "learning_rate": 5.842500489909108e-06, "loss": 0.0173, "step": 123960 }, { "epoch": 1.0030746824176713, "grad_norm": 0.5889478325843811, "learning_rate": 5.841804481065206e-06, "loss": 0.0192, "step": 123970 }, { "epoch": 1.0031555951128732, "grad_norm": 0.44364023208618164, "learning_rate": 5.841108455433291e-06, "loss": 0.0305, "step": 123980 }, { "epoch": 1.0032365078080752, "grad_norm": 0.2900795638561249, "learning_rate": 5.840412413027243e-06, "loss": 0.022, "step": 123990 }, { "epoch": 1.0033174205032769, "grad_norm": 0.3393096923828125, "learning_rate": 5.8397163538609434e-06, "loss": 0.0301, "step": 124000 }, { "epoch": 1.0033983331984788, "grad_norm": 0.5821153521537781, "learning_rate": 5.839020277948276e-06, "loss": 0.035, "step": 124010 }, { "epoch": 1.0034792458936808, "grad_norm": 0.6216897368431091, "learning_rate": 5.838324185303119e-06, "loss": 0.0188, "step": 124020 }, { "epoch": 1.0035601585888827, "grad_norm": 0.5246712565422058, "learning_rate": 5.837628075939355e-06, "loss": 0.0189, "step": 124030 }, { "epoch": 1.0036410712840844, "grad_norm": 0.5008507966995239, "learning_rate": 5.836931949870869e-06, "loss": 0.0285, "step": 124040 }, { "epoch": 1.0037219839792864, "grad_norm": 0.13963648676872253, "learning_rate": 5.836235807111543e-06, "loss": 0.0267, "step": 124050 }, { "epoch": 1.0038028966744883, "grad_norm": 0.49757587909698486, "learning_rate": 5.835539647675257e-06, "loss": 0.0203, "step": 124060 }, { "epoch": 1.00388380936969, "grad_norm": 0.4772986173629761, "learning_rate": 5.834843471575897e-06, "loss": 0.0293, "step": 124070 }, { "epoch": 1.003964722064892, "grad_norm": 0.07850389927625656, "learning_rate": 5.834147278827347e-06, "loss": 0.0166, "step": 124080 }, { "epoch": 1.004045634760094, "grad_norm": 0.7044297456741333, "learning_rate": 5.83345106944349e-06, "loss": 0.025, "step": 124090 }, { "epoch": 1.0041265474552958, "grad_norm": 0.31011781096458435, "learning_rate": 5.832754843438211e-06, "loss": 0.0264, "step": 124100 }, { "epoch": 1.0042074601504976, "grad_norm": 0.3366524875164032, "learning_rate": 5.832058600825394e-06, "loss": 0.0181, "step": 124110 }, { "epoch": 1.0042883728456995, "grad_norm": 0.22804103791713715, "learning_rate": 5.831362341618927e-06, "loss": 0.0244, "step": 124120 }, { "epoch": 1.0043692855409014, "grad_norm": 0.7225281596183777, "learning_rate": 5.830666065832691e-06, "loss": 0.0215, "step": 124130 }, { "epoch": 1.0044501982361032, "grad_norm": 0.4200109839439392, "learning_rate": 5.829969773480574e-06, "loss": 0.0219, "step": 124140 }, { "epoch": 1.004531110931305, "grad_norm": 1.4113954305648804, "learning_rate": 5.829273464576462e-06, "loss": 0.0382, "step": 124150 }, { "epoch": 1.004612023626507, "grad_norm": 0.0987396165728569, "learning_rate": 5.828577139134243e-06, "loss": 0.0134, "step": 124160 }, { "epoch": 1.004692936321709, "grad_norm": 0.460494726896286, "learning_rate": 5.827880797167801e-06, "loss": 0.025, "step": 124170 }, { "epoch": 1.0047738490169107, "grad_norm": 0.47291186451911926, "learning_rate": 5.827184438691025e-06, "loss": 0.0213, "step": 124180 }, { "epoch": 1.0048547617121126, "grad_norm": 0.3216601610183716, "learning_rate": 5.826488063717802e-06, "loss": 0.0244, "step": 124190 }, { "epoch": 1.0049356744073146, "grad_norm": 0.38058897852897644, "learning_rate": 5.825791672262017e-06, "loss": 0.0284, "step": 124200 }, { "epoch": 1.0050165871025163, "grad_norm": 0.1945067197084427, "learning_rate": 5.825095264337562e-06, "loss": 0.0214, "step": 124210 }, { "epoch": 1.0050974997977182, "grad_norm": 0.405167818069458, "learning_rate": 5.824398839958323e-06, "loss": 0.0309, "step": 124220 }, { "epoch": 1.0051784124929202, "grad_norm": 0.2788666784763336, "learning_rate": 5.8237023991381915e-06, "loss": 0.0196, "step": 124230 }, { "epoch": 1.005259325188122, "grad_norm": 0.25135233998298645, "learning_rate": 5.823005941891053e-06, "loss": 0.0299, "step": 124240 }, { "epoch": 1.0053402378833238, "grad_norm": 0.31600749492645264, "learning_rate": 5.822309468230798e-06, "loss": 0.0208, "step": 124250 }, { "epoch": 1.0054211505785258, "grad_norm": 0.22735171020030975, "learning_rate": 5.821612978171317e-06, "loss": 0.0178, "step": 124260 }, { "epoch": 1.0055020632737277, "grad_norm": 0.2859821915626526, "learning_rate": 5.8209164717265e-06, "loss": 0.017, "step": 124270 }, { "epoch": 1.0055829759689294, "grad_norm": 0.19325213134288788, "learning_rate": 5.820219948910237e-06, "loss": 0.0149, "step": 124280 }, { "epoch": 1.0056638886641314, "grad_norm": 0.4662918150424957, "learning_rate": 5.819523409736419e-06, "loss": 0.0231, "step": 124290 }, { "epoch": 1.0057448013593333, "grad_norm": 0.3832801878452301, "learning_rate": 5.818826854218936e-06, "loss": 0.012, "step": 124300 }, { "epoch": 1.0058257140545352, "grad_norm": 1.2558295726776123, "learning_rate": 5.818130282371681e-06, "loss": 0.0262, "step": 124310 }, { "epoch": 1.005906626749737, "grad_norm": 0.1911199986934662, "learning_rate": 5.817433694208544e-06, "loss": 0.0255, "step": 124320 }, { "epoch": 1.005987539444939, "grad_norm": 0.33607447147369385, "learning_rate": 5.816737089743417e-06, "loss": 0.0293, "step": 124330 }, { "epoch": 1.0060684521401408, "grad_norm": 0.28642892837524414, "learning_rate": 5.816040468990195e-06, "loss": 0.0173, "step": 124340 }, { "epoch": 1.0061493648353426, "grad_norm": 0.7222099900245667, "learning_rate": 5.815343831962765e-06, "loss": 0.033, "step": 124350 }, { "epoch": 1.0062302775305445, "grad_norm": 0.3956417739391327, "learning_rate": 5.814647178675027e-06, "loss": 0.0328, "step": 124360 }, { "epoch": 1.0063111902257464, "grad_norm": 0.3972913324832916, "learning_rate": 5.813950509140869e-06, "loss": 0.0204, "step": 124370 }, { "epoch": 1.0063921029209484, "grad_norm": 0.5272371172904968, "learning_rate": 5.813253823374188e-06, "loss": 0.0221, "step": 124380 }, { "epoch": 1.00647301561615, "grad_norm": 0.4467296898365021, "learning_rate": 5.812557121388874e-06, "loss": 0.0267, "step": 124390 }, { "epoch": 1.006553928311352, "grad_norm": 0.9180585741996765, "learning_rate": 5.8118604031988256e-06, "loss": 0.0234, "step": 124400 }, { "epoch": 1.006634841006554, "grad_norm": 0.24558229744434357, "learning_rate": 5.811163668817935e-06, "loss": 0.0158, "step": 124410 }, { "epoch": 1.0067157537017557, "grad_norm": 0.18716758489608765, "learning_rate": 5.810466918260097e-06, "loss": 0.0213, "step": 124420 }, { "epoch": 1.0067966663969576, "grad_norm": 0.6104241609573364, "learning_rate": 5.809770151539207e-06, "loss": 0.0264, "step": 124430 }, { "epoch": 1.0068775790921596, "grad_norm": 0.18168114125728607, "learning_rate": 5.809073368669161e-06, "loss": 0.0292, "step": 124440 }, { "epoch": 1.0069584917873615, "grad_norm": 1.8906443119049072, "learning_rate": 5.808376569663856e-06, "loss": 0.042, "step": 124450 }, { "epoch": 1.0070394044825632, "grad_norm": 0.14163920283317566, "learning_rate": 5.807679754537185e-06, "loss": 0.0186, "step": 124460 }, { "epoch": 1.0071203171777652, "grad_norm": 0.34517958760261536, "learning_rate": 5.806982923303047e-06, "loss": 0.0258, "step": 124470 }, { "epoch": 1.0072012298729671, "grad_norm": 0.43204525113105774, "learning_rate": 5.806286075975339e-06, "loss": 0.0273, "step": 124480 }, { "epoch": 1.0072821425681688, "grad_norm": 0.4326138198375702, "learning_rate": 5.805589212567957e-06, "loss": 0.0287, "step": 124490 }, { "epoch": 1.0073630552633708, "grad_norm": 0.4590374827384949, "learning_rate": 5.804892333094798e-06, "loss": 0.0222, "step": 124500 }, { "epoch": 1.0074439679585727, "grad_norm": 0.4120861887931824, "learning_rate": 5.8041954375697615e-06, "loss": 0.0223, "step": 124510 }, { "epoch": 1.0075248806537747, "grad_norm": 0.5126749277114868, "learning_rate": 5.803498526006744e-06, "loss": 0.0264, "step": 124520 }, { "epoch": 1.0076057933489764, "grad_norm": 0.6078987717628479, "learning_rate": 5.802801598419646e-06, "loss": 0.029, "step": 124530 }, { "epoch": 1.0076867060441783, "grad_norm": 0.8786176443099976, "learning_rate": 5.802104654822364e-06, "loss": 0.0284, "step": 124540 }, { "epoch": 1.0077676187393803, "grad_norm": 0.7428706288337708, "learning_rate": 5.801407695228798e-06, "loss": 0.017, "step": 124550 }, { "epoch": 1.0078485314345822, "grad_norm": 0.7432427406311035, "learning_rate": 5.800710719652847e-06, "loss": 0.0261, "step": 124560 }, { "epoch": 1.007929444129784, "grad_norm": 0.3121102452278137, "learning_rate": 5.800013728108411e-06, "loss": 0.0223, "step": 124570 }, { "epoch": 1.0080103568249859, "grad_norm": 0.20839107036590576, "learning_rate": 5.799316720609391e-06, "loss": 0.0356, "step": 124580 }, { "epoch": 1.0080912695201878, "grad_norm": 0.4061105251312256, "learning_rate": 5.7986196971696866e-06, "loss": 0.0225, "step": 124590 }, { "epoch": 1.0081721822153895, "grad_norm": 0.41345271468162537, "learning_rate": 5.797922657803198e-06, "loss": 0.016, "step": 124600 }, { "epoch": 1.0082530949105915, "grad_norm": 0.10412538051605225, "learning_rate": 5.797225602523826e-06, "loss": 0.0218, "step": 124610 }, { "epoch": 1.0083340076057934, "grad_norm": 0.3108693063259125, "learning_rate": 5.7965285313454714e-06, "loss": 0.0169, "step": 124620 }, { "epoch": 1.0084149203009953, "grad_norm": 0.19047921895980835, "learning_rate": 5.795831444282037e-06, "loss": 0.017, "step": 124630 }, { "epoch": 1.008495832996197, "grad_norm": 0.5741358399391174, "learning_rate": 5.795134341347427e-06, "loss": 0.0314, "step": 124640 }, { "epoch": 1.008576745691399, "grad_norm": 0.3487216830253601, "learning_rate": 5.794437222555539e-06, "loss": 0.0193, "step": 124650 }, { "epoch": 1.008657658386601, "grad_norm": 0.07979540526866913, "learning_rate": 5.793740087920279e-06, "loss": 0.0191, "step": 124660 }, { "epoch": 1.0087385710818026, "grad_norm": 0.360227108001709, "learning_rate": 5.7930429374555475e-06, "loss": 0.0226, "step": 124670 }, { "epoch": 1.0088194837770046, "grad_norm": 0.5460313558578491, "learning_rate": 5.792345771175249e-06, "loss": 0.0264, "step": 124680 }, { "epoch": 1.0089003964722065, "grad_norm": 0.16243211925029755, "learning_rate": 5.791648589093287e-06, "loss": 0.0191, "step": 124690 }, { "epoch": 1.0089813091674085, "grad_norm": 0.5347035527229309, "learning_rate": 5.790951391223565e-06, "loss": 0.0312, "step": 124700 }, { "epoch": 1.0090622218626102, "grad_norm": 0.4833073914051056, "learning_rate": 5.790254177579988e-06, "loss": 0.0208, "step": 124710 }, { "epoch": 1.0091431345578121, "grad_norm": 0.44938433170318604, "learning_rate": 5.78955694817646e-06, "loss": 0.0236, "step": 124720 }, { "epoch": 1.009224047253014, "grad_norm": 0.41708528995513916, "learning_rate": 5.788859703026884e-06, "loss": 0.0242, "step": 124730 }, { "epoch": 1.0093049599482158, "grad_norm": 0.32518818974494934, "learning_rate": 5.788162442145167e-06, "loss": 0.0187, "step": 124740 }, { "epoch": 1.0093858726434177, "grad_norm": 0.4709189534187317, "learning_rate": 5.7874651655452154e-06, "loss": 0.0161, "step": 124750 }, { "epoch": 1.0094667853386197, "grad_norm": 0.3521060645580292, "learning_rate": 5.786767873240932e-06, "loss": 0.0239, "step": 124760 }, { "epoch": 1.0095476980338216, "grad_norm": 0.4564734697341919, "learning_rate": 5.786070565246224e-06, "loss": 0.0169, "step": 124770 }, { "epoch": 1.0096286107290233, "grad_norm": 0.17313174903392792, "learning_rate": 5.7853732415749985e-06, "loss": 0.0156, "step": 124780 }, { "epoch": 1.0097095234242253, "grad_norm": 0.41927045583724976, "learning_rate": 5.784675902241163e-06, "loss": 0.0246, "step": 124790 }, { "epoch": 1.0097904361194272, "grad_norm": 1.0704091787338257, "learning_rate": 5.783978547258622e-06, "loss": 0.0331, "step": 124800 }, { "epoch": 1.009871348814629, "grad_norm": 0.4895000457763672, "learning_rate": 5.783281176641282e-06, "loss": 0.0279, "step": 124810 }, { "epoch": 1.0099522615098309, "grad_norm": 0.7366101741790771, "learning_rate": 5.782583790403056e-06, "loss": 0.0374, "step": 124820 }, { "epoch": 1.0100331742050328, "grad_norm": 0.532626211643219, "learning_rate": 5.781886388557847e-06, "loss": 0.0304, "step": 124830 }, { "epoch": 1.0101140869002347, "grad_norm": 0.5010527968406677, "learning_rate": 5.7811889711195635e-06, "loss": 0.0212, "step": 124840 }, { "epoch": 1.0101949995954365, "grad_norm": 0.43591824173927307, "learning_rate": 5.780491538102116e-06, "loss": 0.016, "step": 124850 }, { "epoch": 1.0102759122906384, "grad_norm": 0.443141371011734, "learning_rate": 5.7797940895194135e-06, "loss": 0.0205, "step": 124860 }, { "epoch": 1.0103568249858403, "grad_norm": 0.0440601110458374, "learning_rate": 5.779096625385363e-06, "loss": 0.014, "step": 124870 }, { "epoch": 1.010437737681042, "grad_norm": 0.4500998854637146, "learning_rate": 5.778399145713875e-06, "loss": 0.0239, "step": 124880 }, { "epoch": 1.010518650376244, "grad_norm": 0.4733697474002838, "learning_rate": 5.77770165051886e-06, "loss": 0.032, "step": 124890 }, { "epoch": 1.010599563071446, "grad_norm": 0.27844056487083435, "learning_rate": 5.777004139814226e-06, "loss": 0.021, "step": 124900 }, { "epoch": 1.0106804757666479, "grad_norm": 0.4194970726966858, "learning_rate": 5.7763066136138865e-06, "loss": 0.016, "step": 124910 }, { "epoch": 1.0107613884618496, "grad_norm": 0.5268365740776062, "learning_rate": 5.77560907193175e-06, "loss": 0.0217, "step": 124920 }, { "epoch": 1.0108423011570515, "grad_norm": 0.35161203145980835, "learning_rate": 5.774911514781728e-06, "loss": 0.02, "step": 124930 }, { "epoch": 1.0109232138522535, "grad_norm": 0.41683852672576904, "learning_rate": 5.774213942177731e-06, "loss": 0.0199, "step": 124940 }, { "epoch": 1.0110041265474552, "grad_norm": 0.38465046882629395, "learning_rate": 5.77351635413367e-06, "loss": 0.0219, "step": 124950 }, { "epoch": 1.0110850392426571, "grad_norm": 0.6887980699539185, "learning_rate": 5.77281875066346e-06, "loss": 0.0205, "step": 124960 }, { "epoch": 1.011165951937859, "grad_norm": 0.37973448634147644, "learning_rate": 5.7721211317810125e-06, "loss": 0.0259, "step": 124970 }, { "epoch": 1.011246864633061, "grad_norm": 0.25649377703666687, "learning_rate": 5.7714234975002385e-06, "loss": 0.0146, "step": 124980 }, { "epoch": 1.0113277773282627, "grad_norm": 0.28004854917526245, "learning_rate": 5.77072584783505e-06, "loss": 0.0196, "step": 124990 }, { "epoch": 1.0114086900234647, "grad_norm": 0.49409690499305725, "learning_rate": 5.770028182799363e-06, "loss": 0.0406, "step": 125000 }, { "epoch": 1.0114896027186666, "grad_norm": 0.7373704314231873, "learning_rate": 5.769330502407089e-06, "loss": 0.0126, "step": 125010 }, { "epoch": 1.0115705154138683, "grad_norm": 0.40555763244628906, "learning_rate": 5.7686328066721434e-06, "loss": 0.0167, "step": 125020 }, { "epoch": 1.0116514281090703, "grad_norm": 0.4596891701221466, "learning_rate": 5.767935095608437e-06, "loss": 0.0235, "step": 125030 }, { "epoch": 1.0117323408042722, "grad_norm": 0.33039844036102295, "learning_rate": 5.767237369229886e-06, "loss": 0.0223, "step": 125040 }, { "epoch": 1.0118132534994742, "grad_norm": 0.15201041102409363, "learning_rate": 5.766539627550408e-06, "loss": 0.0146, "step": 125050 }, { "epoch": 1.0118941661946759, "grad_norm": 0.4426884651184082, "learning_rate": 5.7658418705839136e-06, "loss": 0.0221, "step": 125060 }, { "epoch": 1.0119750788898778, "grad_norm": 0.3811897039413452, "learning_rate": 5.7651440983443195e-06, "loss": 0.0184, "step": 125070 }, { "epoch": 1.0120559915850798, "grad_norm": 0.1395297348499298, "learning_rate": 5.764446310845543e-06, "loss": 0.0147, "step": 125080 }, { "epoch": 1.0121369042802815, "grad_norm": 0.47159984707832336, "learning_rate": 5.763748508101499e-06, "loss": 0.0255, "step": 125090 }, { "epoch": 1.0122178169754834, "grad_norm": 0.5587863922119141, "learning_rate": 5.763050690126102e-06, "loss": 0.0313, "step": 125100 }, { "epoch": 1.0122987296706853, "grad_norm": 0.3122391402721405, "learning_rate": 5.762352856933269e-06, "loss": 0.0131, "step": 125110 }, { "epoch": 1.0123796423658873, "grad_norm": 0.4991612732410431, "learning_rate": 5.76165500853692e-06, "loss": 0.0268, "step": 125120 }, { "epoch": 1.012460555061089, "grad_norm": 0.3590589463710785, "learning_rate": 5.76095714495097e-06, "loss": 0.0224, "step": 125130 }, { "epoch": 1.012541467756291, "grad_norm": 0.38795578479766846, "learning_rate": 5.760259266189333e-06, "loss": 0.0276, "step": 125140 }, { "epoch": 1.0126223804514929, "grad_norm": 0.4840691387653351, "learning_rate": 5.759561372265932e-06, "loss": 0.0234, "step": 125150 }, { "epoch": 1.0127032931466946, "grad_norm": 0.2819635570049286, "learning_rate": 5.758863463194683e-06, "loss": 0.0226, "step": 125160 }, { "epoch": 1.0127842058418965, "grad_norm": 0.23601272702217102, "learning_rate": 5.758165538989504e-06, "loss": 0.0202, "step": 125170 }, { "epoch": 1.0128651185370985, "grad_norm": 0.053037211298942566, "learning_rate": 5.757467599664314e-06, "loss": 0.0157, "step": 125180 }, { "epoch": 1.0129460312323004, "grad_norm": 0.520594596862793, "learning_rate": 5.756769645233031e-06, "loss": 0.0224, "step": 125190 }, { "epoch": 1.0130269439275021, "grad_norm": 0.6811202764511108, "learning_rate": 5.756071675709577e-06, "loss": 0.0266, "step": 125200 }, { "epoch": 1.013107856622704, "grad_norm": 0.5544137954711914, "learning_rate": 5.755373691107868e-06, "loss": 0.0199, "step": 125210 }, { "epoch": 1.013188769317906, "grad_norm": 0.30617016553878784, "learning_rate": 5.754675691441824e-06, "loss": 0.0146, "step": 125220 }, { "epoch": 1.013269682013108, "grad_norm": 0.36876213550567627, "learning_rate": 5.75397767672537e-06, "loss": 0.0262, "step": 125230 }, { "epoch": 1.0133505947083097, "grad_norm": 0.5849815011024475, "learning_rate": 5.753279646972421e-06, "loss": 0.0162, "step": 125240 }, { "epoch": 1.0134315074035116, "grad_norm": 0.41903921961784363, "learning_rate": 5.752581602196899e-06, "loss": 0.0243, "step": 125250 }, { "epoch": 1.0135124200987136, "grad_norm": 0.5690382122993469, "learning_rate": 5.7518835424127276e-06, "loss": 0.0255, "step": 125260 }, { "epoch": 1.0135933327939153, "grad_norm": 0.22747251391410828, "learning_rate": 5.751185467633824e-06, "loss": 0.0278, "step": 125270 }, { "epoch": 1.0136742454891172, "grad_norm": 0.7903956770896912, "learning_rate": 5.750487377874114e-06, "loss": 0.0386, "step": 125280 }, { "epoch": 1.0137551581843192, "grad_norm": 0.46472617983818054, "learning_rate": 5.749789273147517e-06, "loss": 0.0238, "step": 125290 }, { "epoch": 1.013836070879521, "grad_norm": 0.3893413245677948, "learning_rate": 5.749091153467956e-06, "loss": 0.0338, "step": 125300 }, { "epoch": 1.0139169835747228, "grad_norm": 0.2758513391017914, "learning_rate": 5.748393018849353e-06, "loss": 0.0199, "step": 125310 }, { "epoch": 1.0139978962699248, "grad_norm": 0.5556018352508545, "learning_rate": 5.7476948693056315e-06, "loss": 0.0159, "step": 125320 }, { "epoch": 1.0140788089651267, "grad_norm": 0.06496544927358627, "learning_rate": 5.7469967048507126e-06, "loss": 0.0149, "step": 125330 }, { "epoch": 1.0141597216603284, "grad_norm": 0.34653881192207336, "learning_rate": 5.746298525498525e-06, "loss": 0.0174, "step": 125340 }, { "epoch": 1.0142406343555304, "grad_norm": 0.31808942556381226, "learning_rate": 5.745600331262985e-06, "loss": 0.0186, "step": 125350 }, { "epoch": 1.0143215470507323, "grad_norm": 0.2972455322742462, "learning_rate": 5.744902122158022e-06, "loss": 0.0226, "step": 125360 }, { "epoch": 1.0144024597459342, "grad_norm": 0.39616236090660095, "learning_rate": 5.744203898197559e-06, "loss": 0.016, "step": 125370 }, { "epoch": 1.014483372441136, "grad_norm": 0.4017733037471771, "learning_rate": 5.743505659395521e-06, "loss": 0.0135, "step": 125380 }, { "epoch": 1.014564285136338, "grad_norm": 0.2815978229045868, "learning_rate": 5.742807405765831e-06, "loss": 0.0148, "step": 125390 }, { "epoch": 1.0146451978315398, "grad_norm": 0.1273786574602127, "learning_rate": 5.7421091373224155e-06, "loss": 0.0201, "step": 125400 }, { "epoch": 1.0147261105267416, "grad_norm": 0.2642878293991089, "learning_rate": 5.7414108540792005e-06, "loss": 0.0252, "step": 125410 }, { "epoch": 1.0148070232219435, "grad_norm": 0.216535285115242, "learning_rate": 5.740712556050111e-06, "loss": 0.0342, "step": 125420 }, { "epoch": 1.0148879359171454, "grad_norm": 0.11475139856338501, "learning_rate": 5.740014243249073e-06, "loss": 0.025, "step": 125430 }, { "epoch": 1.0149688486123474, "grad_norm": 0.4688553512096405, "learning_rate": 5.739315915690012e-06, "loss": 0.0215, "step": 125440 }, { "epoch": 1.015049761307549, "grad_norm": 0.13710355758666992, "learning_rate": 5.738617573386859e-06, "loss": 0.0211, "step": 125450 }, { "epoch": 1.015130674002751, "grad_norm": 0.18894587457180023, "learning_rate": 5.737919216353535e-06, "loss": 0.019, "step": 125460 }, { "epoch": 1.015211586697953, "grad_norm": 0.7368744611740112, "learning_rate": 5.7372208446039715e-06, "loss": 0.0256, "step": 125470 }, { "epoch": 1.0152924993931547, "grad_norm": 0.2034449428319931, "learning_rate": 5.736522458152094e-06, "loss": 0.0202, "step": 125480 }, { "epoch": 1.0153734120883566, "grad_norm": 0.15460579097270966, "learning_rate": 5.735824057011831e-06, "loss": 0.0198, "step": 125490 }, { "epoch": 1.0154543247835586, "grad_norm": 0.5138454437255859, "learning_rate": 5.73512564119711e-06, "loss": 0.0229, "step": 125500 }, { "epoch": 1.0155352374787605, "grad_norm": 0.5378125309944153, "learning_rate": 5.73442721072186e-06, "loss": 0.021, "step": 125510 }, { "epoch": 1.0156161501739622, "grad_norm": 0.3002491593360901, "learning_rate": 5.733728765600012e-06, "loss": 0.0236, "step": 125520 }, { "epoch": 1.0156970628691642, "grad_norm": 0.23950673639774323, "learning_rate": 5.733030305845491e-06, "loss": 0.0204, "step": 125530 }, { "epoch": 1.015777975564366, "grad_norm": 0.5479670166969299, "learning_rate": 5.732331831472228e-06, "loss": 0.0365, "step": 125540 }, { "epoch": 1.0158588882595678, "grad_norm": 0.3769558370113373, "learning_rate": 5.731633342494152e-06, "loss": 0.0257, "step": 125550 }, { "epoch": 1.0159398009547698, "grad_norm": 0.12850359082221985, "learning_rate": 5.7309348389251955e-06, "loss": 0.0279, "step": 125560 }, { "epoch": 1.0160207136499717, "grad_norm": 0.4447375535964966, "learning_rate": 5.730236320779284e-06, "loss": 0.0206, "step": 125570 }, { "epoch": 1.0161016263451736, "grad_norm": 0.32504814863204956, "learning_rate": 5.729537788070353e-06, "loss": 0.0235, "step": 125580 }, { "epoch": 1.0161825390403754, "grad_norm": 0.41804611682891846, "learning_rate": 5.72883924081233e-06, "loss": 0.0388, "step": 125590 }, { "epoch": 1.0162634517355773, "grad_norm": 0.2758578658103943, "learning_rate": 5.728140679019147e-06, "loss": 0.0313, "step": 125600 }, { "epoch": 1.0163443644307792, "grad_norm": 0.3752889633178711, "learning_rate": 5.727442102704736e-06, "loss": 0.0179, "step": 125610 }, { "epoch": 1.016425277125981, "grad_norm": 0.45547232031822205, "learning_rate": 5.7267435118830265e-06, "loss": 0.0154, "step": 125620 }, { "epoch": 1.016506189821183, "grad_norm": 0.47974660992622375, "learning_rate": 5.7260449065679525e-06, "loss": 0.0257, "step": 125630 }, { "epoch": 1.0165871025163848, "grad_norm": 1.17630136013031, "learning_rate": 5.725346286773447e-06, "loss": 0.041, "step": 125640 }, { "epoch": 1.0166680152115868, "grad_norm": 0.3654598295688629, "learning_rate": 5.72464765251344e-06, "loss": 0.027, "step": 125650 }, { "epoch": 1.0167489279067885, "grad_norm": 0.9043847918510437, "learning_rate": 5.723949003801865e-06, "loss": 0.0251, "step": 125660 }, { "epoch": 1.0168298406019904, "grad_norm": 0.433480829000473, "learning_rate": 5.723250340652656e-06, "loss": 0.0159, "step": 125670 }, { "epoch": 1.0169107532971924, "grad_norm": 0.3855384290218353, "learning_rate": 5.722551663079745e-06, "loss": 0.0186, "step": 125680 }, { "epoch": 1.016991665992394, "grad_norm": 0.1598065197467804, "learning_rate": 5.721852971097067e-06, "loss": 0.0237, "step": 125690 }, { "epoch": 1.017072578687596, "grad_norm": 0.3256476819515228, "learning_rate": 5.721154264718555e-06, "loss": 0.021, "step": 125700 }, { "epoch": 1.017153491382798, "grad_norm": 0.3413519263267517, "learning_rate": 5.720455543958146e-06, "loss": 0.019, "step": 125710 }, { "epoch": 1.017234404078, "grad_norm": 0.3758890926837921, "learning_rate": 5.7197568088297694e-06, "loss": 0.0125, "step": 125720 }, { "epoch": 1.0173153167732016, "grad_norm": 0.7601802349090576, "learning_rate": 5.719058059347362e-06, "loss": 0.0251, "step": 125730 }, { "epoch": 1.0173962294684036, "grad_norm": 0.6338085532188416, "learning_rate": 5.718359295524863e-06, "loss": 0.0305, "step": 125740 }, { "epoch": 1.0174771421636055, "grad_norm": 0.2618781328201294, "learning_rate": 5.717660517376202e-06, "loss": 0.0236, "step": 125750 }, { "epoch": 1.0175580548588075, "grad_norm": 0.2646407186985016, "learning_rate": 5.7169617249153175e-06, "loss": 0.0217, "step": 125760 }, { "epoch": 1.0176389675540092, "grad_norm": 0.4290194511413574, "learning_rate": 5.716262918156145e-06, "loss": 0.0239, "step": 125770 }, { "epoch": 1.0177198802492111, "grad_norm": 0.34128108620643616, "learning_rate": 5.71556409711262e-06, "loss": 0.0259, "step": 125780 }, { "epoch": 1.017800792944413, "grad_norm": 0.24765613675117493, "learning_rate": 5.714865261798681e-06, "loss": 0.0233, "step": 125790 }, { "epoch": 1.0178817056396148, "grad_norm": 0.7447476983070374, "learning_rate": 5.714166412228263e-06, "loss": 0.0358, "step": 125800 }, { "epoch": 1.0179626183348167, "grad_norm": 0.2308376282453537, "learning_rate": 5.713467548415302e-06, "loss": 0.0217, "step": 125810 }, { "epoch": 1.0180435310300187, "grad_norm": 0.392770379781723, "learning_rate": 5.7127686703737396e-06, "loss": 0.0232, "step": 125820 }, { "epoch": 1.0181244437252206, "grad_norm": 0.5417890548706055, "learning_rate": 5.712069778117511e-06, "loss": 0.0214, "step": 125830 }, { "epoch": 1.0182053564204223, "grad_norm": 0.14812210202217102, "learning_rate": 5.711370871660551e-06, "loss": 0.0367, "step": 125840 }, { "epoch": 1.0182862691156243, "grad_norm": 0.7303215861320496, "learning_rate": 5.7106719510168016e-06, "loss": 0.0306, "step": 125850 }, { "epoch": 1.0183671818108262, "grad_norm": 0.3613852262496948, "learning_rate": 5.709973016200203e-06, "loss": 0.0227, "step": 125860 }, { "epoch": 1.018448094506028, "grad_norm": 0.3157370686531067, "learning_rate": 5.709274067224688e-06, "loss": 0.0108, "step": 125870 }, { "epoch": 1.0185290072012299, "grad_norm": 0.3227003216743469, "learning_rate": 5.7085751041042005e-06, "loss": 0.0173, "step": 125880 }, { "epoch": 1.0186099198964318, "grad_norm": 0.4843076169490814, "learning_rate": 5.7078761268526784e-06, "loss": 0.0328, "step": 125890 }, { "epoch": 1.0186908325916337, "grad_norm": 0.42888158559799194, "learning_rate": 5.707177135484062e-06, "loss": 0.0261, "step": 125900 }, { "epoch": 1.0187717452868355, "grad_norm": 0.16387921571731567, "learning_rate": 5.706478130012289e-06, "loss": 0.0186, "step": 125910 }, { "epoch": 1.0188526579820374, "grad_norm": 0.20140403509140015, "learning_rate": 5.705779110451302e-06, "loss": 0.0211, "step": 125920 }, { "epoch": 1.0189335706772393, "grad_norm": 0.4784558117389679, "learning_rate": 5.705080076815041e-06, "loss": 0.0216, "step": 125930 }, { "epoch": 1.019014483372441, "grad_norm": 0.5978667736053467, "learning_rate": 5.7043810291174475e-06, "loss": 0.0301, "step": 125940 }, { "epoch": 1.019095396067643, "grad_norm": 0.3770587146282196, "learning_rate": 5.703681967372459e-06, "loss": 0.0123, "step": 125950 }, { "epoch": 1.019176308762845, "grad_norm": 0.5742083787918091, "learning_rate": 5.70298289159402e-06, "loss": 0.0248, "step": 125960 }, { "epoch": 1.0192572214580469, "grad_norm": 0.7708909511566162, "learning_rate": 5.702283801796072e-06, "loss": 0.0175, "step": 125970 }, { "epoch": 1.0193381341532486, "grad_norm": 0.5315244793891907, "learning_rate": 5.7015846979925545e-06, "loss": 0.0155, "step": 125980 }, { "epoch": 1.0194190468484505, "grad_norm": 0.6135061383247375, "learning_rate": 5.700885580197413e-06, "loss": 0.0194, "step": 125990 }, { "epoch": 1.0194999595436525, "grad_norm": 0.5347714424133301, "learning_rate": 5.700186448424588e-06, "loss": 0.0293, "step": 126000 }, { "epoch": 1.0195808722388542, "grad_norm": 0.3891798257827759, "learning_rate": 5.699487302688023e-06, "loss": 0.0222, "step": 126010 }, { "epoch": 1.0196617849340561, "grad_norm": 0.7080230116844177, "learning_rate": 5.698788143001659e-06, "loss": 0.0257, "step": 126020 }, { "epoch": 1.019742697629258, "grad_norm": 0.31210848689079285, "learning_rate": 5.698088969379441e-06, "loss": 0.0279, "step": 126030 }, { "epoch": 1.01982361032446, "grad_norm": 0.4465043842792511, "learning_rate": 5.697389781835313e-06, "loss": 0.0286, "step": 126040 }, { "epoch": 1.0199045230196617, "grad_norm": 0.5178950428962708, "learning_rate": 5.696690580383217e-06, "loss": 0.016, "step": 126050 }, { "epoch": 1.0199854357148637, "grad_norm": 0.5855041146278381, "learning_rate": 5.695991365037098e-06, "loss": 0.0336, "step": 126060 }, { "epoch": 1.0200663484100656, "grad_norm": 0.4597233235836029, "learning_rate": 5.695292135810901e-06, "loss": 0.0276, "step": 126070 }, { "epoch": 1.0201472611052673, "grad_norm": 0.2519826292991638, "learning_rate": 5.694592892718571e-06, "loss": 0.0275, "step": 126080 }, { "epoch": 1.0202281738004693, "grad_norm": 0.48857221007347107, "learning_rate": 5.6938936357740514e-06, "loss": 0.0162, "step": 126090 }, { "epoch": 1.0203090864956712, "grad_norm": 0.20701974630355835, "learning_rate": 5.693194364991288e-06, "loss": 0.0197, "step": 126100 }, { "epoch": 1.0203899991908731, "grad_norm": 0.4088670611381531, "learning_rate": 5.692495080384226e-06, "loss": 0.0292, "step": 126110 }, { "epoch": 1.0204709118860749, "grad_norm": 0.3954131305217743, "learning_rate": 5.691795781966814e-06, "loss": 0.028, "step": 126120 }, { "epoch": 1.0205518245812768, "grad_norm": 0.2916911840438843, "learning_rate": 5.691096469752994e-06, "loss": 0.0196, "step": 126130 }, { "epoch": 1.0206327372764787, "grad_norm": 0.4906357526779175, "learning_rate": 5.690397143756712e-06, "loss": 0.0298, "step": 126140 }, { "epoch": 1.0207136499716805, "grad_norm": 0.40026384592056274, "learning_rate": 5.6896978039919185e-06, "loss": 0.0159, "step": 126150 }, { "epoch": 1.0207945626668824, "grad_norm": 0.3238833248615265, "learning_rate": 5.688998450472557e-06, "loss": 0.0213, "step": 126160 }, { "epoch": 1.0208754753620843, "grad_norm": 0.42873436212539673, "learning_rate": 5.688299083212577e-06, "loss": 0.0147, "step": 126170 }, { "epoch": 1.0209563880572863, "grad_norm": 0.34580865502357483, "learning_rate": 5.687599702225925e-06, "loss": 0.0287, "step": 126180 }, { "epoch": 1.021037300752488, "grad_norm": 0.5589075684547424, "learning_rate": 5.686900307526548e-06, "loss": 0.0322, "step": 126190 }, { "epoch": 1.02111821344769, "grad_norm": 0.5408740043640137, "learning_rate": 5.686200899128396e-06, "loss": 0.0276, "step": 126200 }, { "epoch": 1.0211991261428919, "grad_norm": 0.42472654581069946, "learning_rate": 5.685501477045414e-06, "loss": 0.019, "step": 126210 }, { "epoch": 1.0212800388380936, "grad_norm": 0.3318297863006592, "learning_rate": 5.684802041291551e-06, "loss": 0.0199, "step": 126220 }, { "epoch": 1.0213609515332955, "grad_norm": 0.21793024241924286, "learning_rate": 5.68410259188076e-06, "loss": 0.0215, "step": 126230 }, { "epoch": 1.0214418642284975, "grad_norm": 0.22959136962890625, "learning_rate": 5.6834031288269845e-06, "loss": 0.0176, "step": 126240 }, { "epoch": 1.0215227769236994, "grad_norm": 0.4488028585910797, "learning_rate": 5.682703652144177e-06, "loss": 0.0222, "step": 126250 }, { "epoch": 1.0216036896189011, "grad_norm": 0.37405622005462646, "learning_rate": 5.682004161846288e-06, "loss": 0.0234, "step": 126260 }, { "epoch": 1.021684602314103, "grad_norm": 0.5254485607147217, "learning_rate": 5.6813046579472645e-06, "loss": 0.0347, "step": 126270 }, { "epoch": 1.021765515009305, "grad_norm": 0.2952100336551666, "learning_rate": 5.680605140461058e-06, "loss": 0.0277, "step": 126280 }, { "epoch": 1.0218464277045067, "grad_norm": 0.6931747794151306, "learning_rate": 5.679905609401618e-06, "loss": 0.0194, "step": 126290 }, { "epoch": 1.0219273403997087, "grad_norm": 0.27513420581817627, "learning_rate": 5.679206064782897e-06, "loss": 0.0264, "step": 126300 }, { "epoch": 1.0220082530949106, "grad_norm": 0.32796958088874817, "learning_rate": 5.678506506618845e-06, "loss": 0.0304, "step": 126310 }, { "epoch": 1.0220891657901126, "grad_norm": 0.5069066882133484, "learning_rate": 5.6778069349234125e-06, "loss": 0.0176, "step": 126320 }, { "epoch": 1.0221700784853143, "grad_norm": 0.5408557653427124, "learning_rate": 5.677107349710551e-06, "loss": 0.0179, "step": 126330 }, { "epoch": 1.0222509911805162, "grad_norm": 0.3089045286178589, "learning_rate": 5.676407750994215e-06, "loss": 0.0155, "step": 126340 }, { "epoch": 1.0223319038757182, "grad_norm": 0.4280298948287964, "learning_rate": 5.675708138788352e-06, "loss": 0.0286, "step": 126350 }, { "epoch": 1.0224128165709199, "grad_norm": 0.2123483270406723, "learning_rate": 5.675008513106917e-06, "loss": 0.0166, "step": 126360 }, { "epoch": 1.0224937292661218, "grad_norm": 0.059504423290491104, "learning_rate": 5.674308873963864e-06, "loss": 0.0181, "step": 126370 }, { "epoch": 1.0225746419613237, "grad_norm": 0.440035879611969, "learning_rate": 5.673609221373143e-06, "loss": 0.0226, "step": 126380 }, { "epoch": 1.0226555546565257, "grad_norm": 0.3547200858592987, "learning_rate": 5.672909555348708e-06, "loss": 0.0214, "step": 126390 }, { "epoch": 1.0227364673517274, "grad_norm": 0.06595311313867569, "learning_rate": 5.672209875904513e-06, "loss": 0.0325, "step": 126400 }, { "epoch": 1.0228173800469293, "grad_norm": 0.1993991881608963, "learning_rate": 5.671510183054511e-06, "loss": 0.0225, "step": 126410 }, { "epoch": 1.0228982927421313, "grad_norm": 0.394782155752182, "learning_rate": 5.670810476812655e-06, "loss": 0.024, "step": 126420 }, { "epoch": 1.0229792054373332, "grad_norm": 0.3049663305282593, "learning_rate": 5.6701107571929e-06, "loss": 0.0337, "step": 126430 }, { "epoch": 1.023060118132535, "grad_norm": 0.4268330931663513, "learning_rate": 5.6694110242092015e-06, "loss": 0.036, "step": 126440 }, { "epoch": 1.0231410308277369, "grad_norm": 0.8339303135871887, "learning_rate": 5.668711277875515e-06, "loss": 0.0218, "step": 126450 }, { "epoch": 1.0232219435229388, "grad_norm": 0.39042603969573975, "learning_rate": 5.66801151820579e-06, "loss": 0.0104, "step": 126460 }, { "epoch": 1.0233028562181405, "grad_norm": 0.423035591840744, "learning_rate": 5.667311745213988e-06, "loss": 0.0364, "step": 126470 }, { "epoch": 1.0233837689133425, "grad_norm": 0.44415125250816345, "learning_rate": 5.6666119589140625e-06, "loss": 0.0168, "step": 126480 }, { "epoch": 1.0234646816085444, "grad_norm": 0.2710362672805786, "learning_rate": 5.6659121593199675e-06, "loss": 0.02, "step": 126490 }, { "epoch": 1.0235455943037464, "grad_norm": 0.3862279951572418, "learning_rate": 5.665212346445661e-06, "loss": 0.016, "step": 126500 }, { "epoch": 1.023626506998948, "grad_norm": 0.30594491958618164, "learning_rate": 5.6645125203050964e-06, "loss": 0.0195, "step": 126510 }, { "epoch": 1.02370741969415, "grad_norm": 0.42390939593315125, "learning_rate": 5.663812680912236e-06, "loss": 0.019, "step": 126520 }, { "epoch": 1.023788332389352, "grad_norm": 0.714435875415802, "learning_rate": 5.663112828281031e-06, "loss": 0.0352, "step": 126530 }, { "epoch": 1.0238692450845537, "grad_norm": 0.47112923860549927, "learning_rate": 5.66241296242544e-06, "loss": 0.0276, "step": 126540 }, { "epoch": 1.0239501577797556, "grad_norm": 0.3157631456851959, "learning_rate": 5.661713083359421e-06, "loss": 0.0144, "step": 126550 }, { "epoch": 1.0240310704749576, "grad_norm": 0.27018535137176514, "learning_rate": 5.661013191096934e-06, "loss": 0.012, "step": 126560 }, { "epoch": 1.0241119831701595, "grad_norm": 0.4398082196712494, "learning_rate": 5.660313285651931e-06, "loss": 0.0193, "step": 126570 }, { "epoch": 1.0241928958653612, "grad_norm": 0.5167397856712341, "learning_rate": 5.6596133670383756e-06, "loss": 0.0304, "step": 126580 }, { "epoch": 1.0242738085605632, "grad_norm": 0.5914731621742249, "learning_rate": 5.6589134352702245e-06, "loss": 0.0306, "step": 126590 }, { "epoch": 1.024354721255765, "grad_norm": 0.3826145827770233, "learning_rate": 5.658213490361435e-06, "loss": 0.0222, "step": 126600 }, { "epoch": 1.0244356339509668, "grad_norm": 0.36526402831077576, "learning_rate": 5.657513532325967e-06, "loss": 0.0334, "step": 126610 }, { "epoch": 1.0245165466461688, "grad_norm": 0.19400866329669952, "learning_rate": 5.656813561177779e-06, "loss": 0.0177, "step": 126620 }, { "epoch": 1.0245974593413707, "grad_norm": 0.5349475741386414, "learning_rate": 5.656113576930832e-06, "loss": 0.022, "step": 126630 }, { "epoch": 1.0246783720365726, "grad_norm": 0.8856109976768494, "learning_rate": 5.655413579599085e-06, "loss": 0.0266, "step": 126640 }, { "epoch": 1.0247592847317744, "grad_norm": 0.2729538083076477, "learning_rate": 5.654713569196497e-06, "loss": 0.0216, "step": 126650 }, { "epoch": 1.0248401974269763, "grad_norm": 0.39448341727256775, "learning_rate": 5.6540135457370295e-06, "loss": 0.0253, "step": 126660 }, { "epoch": 1.0249211101221782, "grad_norm": 0.3511151969432831, "learning_rate": 5.653313509234643e-06, "loss": 0.0216, "step": 126670 }, { "epoch": 1.02500202281738, "grad_norm": 0.30658191442489624, "learning_rate": 5.652613459703297e-06, "loss": 0.0223, "step": 126680 }, { "epoch": 1.025082935512582, "grad_norm": 0.44291216135025024, "learning_rate": 5.651913397156953e-06, "loss": 0.0264, "step": 126690 }, { "epoch": 1.0251638482077838, "grad_norm": 0.4550771117210388, "learning_rate": 5.651213321609572e-06, "loss": 0.0181, "step": 126700 }, { "epoch": 1.0252447609029858, "grad_norm": 0.48648083209991455, "learning_rate": 5.650513233075119e-06, "loss": 0.0222, "step": 126710 }, { "epoch": 1.0253256735981875, "grad_norm": 0.20680856704711914, "learning_rate": 5.649813131567551e-06, "loss": 0.0165, "step": 126720 }, { "epoch": 1.0254065862933894, "grad_norm": 0.26061275601387024, "learning_rate": 5.64911301710083e-06, "loss": 0.0178, "step": 126730 }, { "epoch": 1.0254874989885914, "grad_norm": 0.26334473490715027, "learning_rate": 5.648412889688922e-06, "loss": 0.0281, "step": 126740 }, { "epoch": 1.025568411683793, "grad_norm": 0.28912830352783203, "learning_rate": 5.64771274934579e-06, "loss": 0.0348, "step": 126750 }, { "epoch": 1.025649324378995, "grad_norm": 0.8381834030151367, "learning_rate": 5.647012596085392e-06, "loss": 0.0343, "step": 126760 }, { "epoch": 1.025730237074197, "grad_norm": 0.45607811212539673, "learning_rate": 5.6463124299216935e-06, "loss": 0.0315, "step": 126770 }, { "epoch": 1.025811149769399, "grad_norm": 0.6753295660018921, "learning_rate": 5.6456122508686585e-06, "loss": 0.0304, "step": 126780 }, { "epoch": 1.0258920624646006, "grad_norm": 0.28565290570259094, "learning_rate": 5.644912058940251e-06, "loss": 0.0276, "step": 126790 }, { "epoch": 1.0259729751598026, "grad_norm": 0.09958723187446594, "learning_rate": 5.644211854150433e-06, "loss": 0.0367, "step": 126800 }, { "epoch": 1.0260538878550045, "grad_norm": 0.09885304421186447, "learning_rate": 5.643511636513168e-06, "loss": 0.0123, "step": 126810 }, { "epoch": 1.0261348005502062, "grad_norm": 0.39722204208374023, "learning_rate": 5.642811406042426e-06, "loss": 0.0368, "step": 126820 }, { "epoch": 1.0262157132454082, "grad_norm": 0.22131524980068207, "learning_rate": 5.6421111627521655e-06, "loss": 0.0165, "step": 126830 }, { "epoch": 1.02629662594061, "grad_norm": 0.4125031530857086, "learning_rate": 5.641410906656352e-06, "loss": 0.017, "step": 126840 }, { "epoch": 1.026377538635812, "grad_norm": 0.46422967314720154, "learning_rate": 5.640710637768952e-06, "loss": 0.0149, "step": 126850 }, { "epoch": 1.0264584513310138, "grad_norm": 0.1595683991909027, "learning_rate": 5.640010356103934e-06, "loss": 0.0129, "step": 126860 }, { "epoch": 1.0265393640262157, "grad_norm": 0.43619176745414734, "learning_rate": 5.639310061675258e-06, "loss": 0.0305, "step": 126870 }, { "epoch": 1.0266202767214176, "grad_norm": 0.2634323239326477, "learning_rate": 5.638609754496893e-06, "loss": 0.0149, "step": 126880 }, { "epoch": 1.0267011894166194, "grad_norm": 0.7518396377563477, "learning_rate": 5.637909434582804e-06, "loss": 0.0168, "step": 126890 }, { "epoch": 1.0267821021118213, "grad_norm": 0.2857092320919037, "learning_rate": 5.63720910194696e-06, "loss": 0.022, "step": 126900 }, { "epoch": 1.0268630148070232, "grad_norm": 0.5353099703788757, "learning_rate": 5.636508756603324e-06, "loss": 0.03, "step": 126910 }, { "epoch": 1.0269439275022252, "grad_norm": 0.7812924385070801, "learning_rate": 5.635808398565864e-06, "loss": 0.0235, "step": 126920 }, { "epoch": 1.027024840197427, "grad_norm": 0.40029817819595337, "learning_rate": 5.63510802784855e-06, "loss": 0.0212, "step": 126930 }, { "epoch": 1.0271057528926288, "grad_norm": 0.40933382511138916, "learning_rate": 5.634407644465346e-06, "loss": 0.0276, "step": 126940 }, { "epoch": 1.0271866655878308, "grad_norm": 0.39207640290260315, "learning_rate": 5.63370724843022e-06, "loss": 0.0117, "step": 126950 }, { "epoch": 1.0272675782830325, "grad_norm": 0.39963504672050476, "learning_rate": 5.633006839757142e-06, "loss": 0.0189, "step": 126960 }, { "epoch": 1.0273484909782344, "grad_norm": 0.43522095680236816, "learning_rate": 5.632306418460078e-06, "loss": 0.0179, "step": 126970 }, { "epoch": 1.0274294036734364, "grad_norm": 0.5154375433921814, "learning_rate": 5.631605984552998e-06, "loss": 0.0236, "step": 126980 }, { "epoch": 1.0275103163686383, "grad_norm": 0.3809194564819336, "learning_rate": 5.63090553804987e-06, "loss": 0.0146, "step": 126990 }, { "epoch": 1.02759122906384, "grad_norm": 0.8607330918312073, "learning_rate": 5.630205078964662e-06, "loss": 0.0357, "step": 127000 }, { "epoch": 1.027672141759042, "grad_norm": 0.2557770609855652, "learning_rate": 5.629504607311346e-06, "loss": 0.0164, "step": 127010 }, { "epoch": 1.027753054454244, "grad_norm": 0.3615961968898773, "learning_rate": 5.628804123103888e-06, "loss": 0.0265, "step": 127020 }, { "epoch": 1.0278339671494456, "grad_norm": 0.27459481358528137, "learning_rate": 5.6281036263562584e-06, "loss": 0.0127, "step": 127030 }, { "epoch": 1.0279148798446476, "grad_norm": 0.25662562251091003, "learning_rate": 5.627403117082431e-06, "loss": 0.0244, "step": 127040 }, { "epoch": 1.0279957925398495, "grad_norm": 0.5800948739051819, "learning_rate": 5.626702595296369e-06, "loss": 0.0246, "step": 127050 }, { "epoch": 1.0280767052350515, "grad_norm": 0.6844745874404907, "learning_rate": 5.6260020610120505e-06, "loss": 0.0253, "step": 127060 }, { "epoch": 1.0281576179302532, "grad_norm": 0.49455463886260986, "learning_rate": 5.625301514243439e-06, "loss": 0.0221, "step": 127070 }, { "epoch": 1.0282385306254551, "grad_norm": 0.391938179731369, "learning_rate": 5.624600955004512e-06, "loss": 0.0186, "step": 127080 }, { "epoch": 1.028319443320657, "grad_norm": 0.6778194904327393, "learning_rate": 5.623900383309236e-06, "loss": 0.0363, "step": 127090 }, { "epoch": 1.028400356015859, "grad_norm": 0.46492546796798706, "learning_rate": 5.623199799171585e-06, "loss": 0.0223, "step": 127100 }, { "epoch": 1.0284812687110607, "grad_norm": 0.23389704525470734, "learning_rate": 5.622499202605529e-06, "loss": 0.0167, "step": 127110 }, { "epoch": 1.0285621814062627, "grad_norm": 0.42684754729270935, "learning_rate": 5.6217985936250396e-06, "loss": 0.019, "step": 127120 }, { "epoch": 1.0286430941014646, "grad_norm": 0.16973961889743805, "learning_rate": 5.621097972244091e-06, "loss": 0.0235, "step": 127130 }, { "epoch": 1.0287240067966663, "grad_norm": 0.09173687547445297, "learning_rate": 5.620397338476654e-06, "loss": 0.0168, "step": 127140 }, { "epoch": 1.0288049194918683, "grad_norm": 0.5533711314201355, "learning_rate": 5.6196966923367016e-06, "loss": 0.0252, "step": 127150 }, { "epoch": 1.0288858321870702, "grad_norm": 0.4125807285308838, "learning_rate": 5.618996033838208e-06, "loss": 0.0247, "step": 127160 }, { "epoch": 1.0289667448822721, "grad_norm": 0.09924108535051346, "learning_rate": 5.618295362995144e-06, "loss": 0.0172, "step": 127170 }, { "epoch": 1.0290476575774739, "grad_norm": 0.7223089337348938, "learning_rate": 5.617594679821486e-06, "loss": 0.0321, "step": 127180 }, { "epoch": 1.0291285702726758, "grad_norm": 0.5276917219161987, "learning_rate": 5.616893984331204e-06, "loss": 0.0216, "step": 127190 }, { "epoch": 1.0292094829678777, "grad_norm": 0.4968339502811432, "learning_rate": 5.616193276538275e-06, "loss": 0.0192, "step": 127200 }, { "epoch": 1.0292903956630794, "grad_norm": 0.5108069181442261, "learning_rate": 5.615492556456672e-06, "loss": 0.0217, "step": 127210 }, { "epoch": 1.0293713083582814, "grad_norm": 0.18660223484039307, "learning_rate": 5.6147918241003675e-06, "loss": 0.0268, "step": 127220 }, { "epoch": 1.0294522210534833, "grad_norm": 0.3851677179336548, "learning_rate": 5.61409107948334e-06, "loss": 0.0234, "step": 127230 }, { "epoch": 1.0295331337486853, "grad_norm": 0.25230684876441956, "learning_rate": 5.613390322619561e-06, "loss": 0.0214, "step": 127240 }, { "epoch": 1.029614046443887, "grad_norm": 0.44399362802505493, "learning_rate": 5.6126895535230074e-06, "loss": 0.0264, "step": 127250 }, { "epoch": 1.029694959139089, "grad_norm": 0.5256907939910889, "learning_rate": 5.6119887722076545e-06, "loss": 0.0221, "step": 127260 }, { "epoch": 1.0297758718342909, "grad_norm": 0.7669011354446411, "learning_rate": 5.6112879786874775e-06, "loss": 0.0217, "step": 127270 }, { "epoch": 1.0298567845294926, "grad_norm": 0.3327856957912445, "learning_rate": 5.6105871729764526e-06, "loss": 0.021, "step": 127280 }, { "epoch": 1.0299376972246945, "grad_norm": 0.4333939850330353, "learning_rate": 5.609886355088555e-06, "loss": 0.0137, "step": 127290 }, { "epoch": 1.0300186099198965, "grad_norm": 0.5556111931800842, "learning_rate": 5.609185525037761e-06, "loss": 0.0354, "step": 127300 }, { "epoch": 1.0300995226150984, "grad_norm": 0.26269182562828064, "learning_rate": 5.608484682838049e-06, "loss": 0.0238, "step": 127310 }, { "epoch": 1.0301804353103001, "grad_norm": 0.2855207026004791, "learning_rate": 5.607783828503393e-06, "loss": 0.023, "step": 127320 }, { "epoch": 1.030261348005502, "grad_norm": 0.3857981860637665, "learning_rate": 5.6070829620477725e-06, "loss": 0.022, "step": 127330 }, { "epoch": 1.030342260700704, "grad_norm": 0.6337608098983765, "learning_rate": 5.606382083485166e-06, "loss": 0.032, "step": 127340 }, { "epoch": 1.0304231733959057, "grad_norm": 0.37454190850257874, "learning_rate": 5.605681192829545e-06, "loss": 0.0284, "step": 127350 }, { "epoch": 1.0305040860911077, "grad_norm": 0.18621575832366943, "learning_rate": 5.604980290094894e-06, "loss": 0.0216, "step": 127360 }, { "epoch": 1.0305849987863096, "grad_norm": 0.20865795016288757, "learning_rate": 5.604279375295188e-06, "loss": 0.0172, "step": 127370 }, { "epoch": 1.0306659114815115, "grad_norm": 0.4241603910923004, "learning_rate": 5.603578448444404e-06, "loss": 0.016, "step": 127380 }, { "epoch": 1.0307468241767133, "grad_norm": 0.42350494861602783, "learning_rate": 5.602877509556523e-06, "loss": 0.0201, "step": 127390 }, { "epoch": 1.0308277368719152, "grad_norm": 0.5186290144920349, "learning_rate": 5.602176558645523e-06, "loss": 0.0321, "step": 127400 }, { "epoch": 1.0309086495671171, "grad_norm": 0.16458016633987427, "learning_rate": 5.601475595725383e-06, "loss": 0.0134, "step": 127410 }, { "epoch": 1.0309895622623189, "grad_norm": 0.30631253123283386, "learning_rate": 5.6007746208100825e-06, "loss": 0.0247, "step": 127420 }, { "epoch": 1.0310704749575208, "grad_norm": 0.43147432804107666, "learning_rate": 5.600073633913597e-06, "loss": 0.0303, "step": 127430 }, { "epoch": 1.0311513876527227, "grad_norm": 0.6050078272819519, "learning_rate": 5.599372635049913e-06, "loss": 0.0126, "step": 127440 }, { "epoch": 1.0312323003479247, "grad_norm": 0.4174087941646576, "learning_rate": 5.598671624233008e-06, "loss": 0.0259, "step": 127450 }, { "epoch": 1.0313132130431264, "grad_norm": 0.34653937816619873, "learning_rate": 5.597970601476858e-06, "loss": 0.018, "step": 127460 }, { "epoch": 1.0313941257383283, "grad_norm": 0.7870433926582336, "learning_rate": 5.597269566795449e-06, "loss": 0.0262, "step": 127470 }, { "epoch": 1.0314750384335303, "grad_norm": 0.5027792453765869, "learning_rate": 5.5965685202027594e-06, "loss": 0.023, "step": 127480 }, { "epoch": 1.031555951128732, "grad_norm": 0.821760892868042, "learning_rate": 5.5958674617127694e-06, "loss": 0.0261, "step": 127490 }, { "epoch": 1.031636863823934, "grad_norm": 0.5118758082389832, "learning_rate": 5.595166391339462e-06, "loss": 0.0395, "step": 127500 }, { "epoch": 1.0317177765191359, "grad_norm": 0.15082988142967224, "learning_rate": 5.594465309096815e-06, "loss": 0.0167, "step": 127510 }, { "epoch": 1.0317986892143378, "grad_norm": 0.23130738735198975, "learning_rate": 5.593764214998815e-06, "loss": 0.0282, "step": 127520 }, { "epoch": 1.0318796019095395, "grad_norm": 0.6939193606376648, "learning_rate": 5.59306310905944e-06, "loss": 0.0274, "step": 127530 }, { "epoch": 1.0319605146047415, "grad_norm": 0.3597336411476135, "learning_rate": 5.592361991292672e-06, "loss": 0.0267, "step": 127540 }, { "epoch": 1.0320414272999434, "grad_norm": 0.2786080539226532, "learning_rate": 5.591660861712496e-06, "loss": 0.0191, "step": 127550 }, { "epoch": 1.0321223399951451, "grad_norm": 0.035516273230314255, "learning_rate": 5.590959720332893e-06, "loss": 0.0329, "step": 127560 }, { "epoch": 1.032203252690347, "grad_norm": 0.25992223620414734, "learning_rate": 5.590258567167845e-06, "loss": 0.0226, "step": 127570 }, { "epoch": 1.032284165385549, "grad_norm": 0.26626312732696533, "learning_rate": 5.589557402231338e-06, "loss": 0.0216, "step": 127580 }, { "epoch": 1.032365078080751, "grad_norm": 0.3686232268810272, "learning_rate": 5.5888562255373525e-06, "loss": 0.0256, "step": 127590 }, { "epoch": 1.0324459907759527, "grad_norm": 0.597822904586792, "learning_rate": 5.588155037099872e-06, "loss": 0.0322, "step": 127600 }, { "epoch": 1.0325269034711546, "grad_norm": 0.359066903591156, "learning_rate": 5.5874538369328815e-06, "loss": 0.0234, "step": 127610 }, { "epoch": 1.0326078161663566, "grad_norm": 0.39239925146102905, "learning_rate": 5.586752625050364e-06, "loss": 0.0413, "step": 127620 }, { "epoch": 1.0326887288615585, "grad_norm": 0.30461984872817993, "learning_rate": 5.586051401466305e-06, "loss": 0.0141, "step": 127630 }, { "epoch": 1.0327696415567602, "grad_norm": 0.6668668389320374, "learning_rate": 5.585350166194688e-06, "loss": 0.0287, "step": 127640 }, { "epoch": 1.0328505542519621, "grad_norm": 0.08777613937854767, "learning_rate": 5.584648919249496e-06, "loss": 0.0209, "step": 127650 }, { "epoch": 1.032931466947164, "grad_norm": 0.6250537037849426, "learning_rate": 5.583947660644716e-06, "loss": 0.0185, "step": 127660 }, { "epoch": 1.0330123796423658, "grad_norm": 0.4083855450153351, "learning_rate": 5.583246390394334e-06, "loss": 0.0312, "step": 127670 }, { "epoch": 1.0330932923375677, "grad_norm": 0.3091481029987335, "learning_rate": 5.582545108512333e-06, "loss": 0.0259, "step": 127680 }, { "epoch": 1.0331742050327697, "grad_norm": 0.23014789819717407, "learning_rate": 5.5818438150126994e-06, "loss": 0.021, "step": 127690 }, { "epoch": 1.0332551177279714, "grad_norm": 0.19504064321517944, "learning_rate": 5.581142509909418e-06, "loss": 0.019, "step": 127700 }, { "epoch": 1.0333360304231733, "grad_norm": 0.26112011075019836, "learning_rate": 5.580441193216479e-06, "loss": 0.0296, "step": 127710 }, { "epoch": 1.0334169431183753, "grad_norm": 0.5853290557861328, "learning_rate": 5.579739864947865e-06, "loss": 0.0285, "step": 127720 }, { "epoch": 1.0334978558135772, "grad_norm": 0.3799084424972534, "learning_rate": 5.5790385251175616e-06, "loss": 0.0265, "step": 127730 }, { "epoch": 1.033578768508779, "grad_norm": 0.1643984466791153, "learning_rate": 5.5783371737395585e-06, "loss": 0.019, "step": 127740 }, { "epoch": 1.0336596812039809, "grad_norm": 0.526897132396698, "learning_rate": 5.577635810827842e-06, "loss": 0.0277, "step": 127750 }, { "epoch": 1.0337405938991828, "grad_norm": 0.26407161355018616, "learning_rate": 5.576934436396399e-06, "loss": 0.0126, "step": 127760 }, { "epoch": 1.0338215065943848, "grad_norm": 0.3237599730491638, "learning_rate": 5.576233050459214e-06, "loss": 0.0303, "step": 127770 }, { "epoch": 1.0339024192895865, "grad_norm": 0.09927834570407867, "learning_rate": 5.5755316530302795e-06, "loss": 0.0214, "step": 127780 }, { "epoch": 1.0339833319847884, "grad_norm": 0.22958491742610931, "learning_rate": 5.574830244123582e-06, "loss": 0.0178, "step": 127790 }, { "epoch": 1.0340642446799904, "grad_norm": 0.22024846076965332, "learning_rate": 5.574128823753107e-06, "loss": 0.0171, "step": 127800 }, { "epoch": 1.034145157375192, "grad_norm": 0.39633557200431824, "learning_rate": 5.573427391932844e-06, "loss": 0.0231, "step": 127810 }, { "epoch": 1.034226070070394, "grad_norm": 0.31387025117874146, "learning_rate": 5.572725948676784e-06, "loss": 0.0228, "step": 127820 }, { "epoch": 1.034306982765596, "grad_norm": 0.25797730684280396, "learning_rate": 5.5720244939989136e-06, "loss": 0.0264, "step": 127830 }, { "epoch": 1.034387895460798, "grad_norm": 0.21789878606796265, "learning_rate": 5.571323027913221e-06, "loss": 0.0252, "step": 127840 }, { "epoch": 1.0344688081559996, "grad_norm": 0.518001914024353, "learning_rate": 5.570621550433698e-06, "loss": 0.0225, "step": 127850 }, { "epoch": 1.0345497208512016, "grad_norm": 0.6177282929420471, "learning_rate": 5.569920061574333e-06, "loss": 0.0293, "step": 127860 }, { "epoch": 1.0346306335464035, "grad_norm": 0.4406214952468872, "learning_rate": 5.569218561349115e-06, "loss": 0.0227, "step": 127870 }, { "epoch": 1.0347115462416052, "grad_norm": 0.36556458473205566, "learning_rate": 5.5685170497720346e-06, "loss": 0.0396, "step": 127880 }, { "epoch": 1.0347924589368072, "grad_norm": 0.37248989939689636, "learning_rate": 5.567815526857081e-06, "loss": 0.0124, "step": 127890 }, { "epoch": 1.034873371632009, "grad_norm": 0.6416247487068176, "learning_rate": 5.567113992618247e-06, "loss": 0.02, "step": 127900 }, { "epoch": 1.034954284327211, "grad_norm": 0.2227400839328766, "learning_rate": 5.56641244706952e-06, "loss": 0.015, "step": 127910 }, { "epoch": 1.0350351970224128, "grad_norm": 0.701080322265625, "learning_rate": 5.5657108902248934e-06, "loss": 0.0253, "step": 127920 }, { "epoch": 1.0351161097176147, "grad_norm": 0.3703119456768036, "learning_rate": 5.565009322098358e-06, "loss": 0.0259, "step": 127930 }, { "epoch": 1.0351970224128166, "grad_norm": 0.1274973601102829, "learning_rate": 5.564307742703904e-06, "loss": 0.0203, "step": 127940 }, { "epoch": 1.0352779351080184, "grad_norm": 0.611073911190033, "learning_rate": 5.563606152055522e-06, "loss": 0.0195, "step": 127950 }, { "epoch": 1.0353588478032203, "grad_norm": 0.28845518827438354, "learning_rate": 5.562904550167206e-06, "loss": 0.0248, "step": 127960 }, { "epoch": 1.0354397604984222, "grad_norm": 0.6563576459884644, "learning_rate": 5.562202937052947e-06, "loss": 0.0216, "step": 127970 }, { "epoch": 1.0355206731936242, "grad_norm": 0.42325279116630554, "learning_rate": 5.561501312726738e-06, "loss": 0.0183, "step": 127980 }, { "epoch": 1.035601585888826, "grad_norm": 0.38040992617607117, "learning_rate": 5.560799677202571e-06, "loss": 0.0249, "step": 127990 }, { "epoch": 1.0356824985840278, "grad_norm": 0.3792056739330292, "learning_rate": 5.560098030494437e-06, "loss": 0.027, "step": 128000 }, { "epoch": 1.0357634112792298, "grad_norm": 0.2409868985414505, "learning_rate": 5.559396372616331e-06, "loss": 0.019, "step": 128010 }, { "epoch": 1.0358443239744315, "grad_norm": 0.4422881603240967, "learning_rate": 5.558694703582246e-06, "loss": 0.0182, "step": 128020 }, { "epoch": 1.0359252366696334, "grad_norm": 0.07807912677526474, "learning_rate": 5.557993023406173e-06, "loss": 0.0208, "step": 128030 }, { "epoch": 1.0360061493648354, "grad_norm": 0.376200407743454, "learning_rate": 5.557291332102109e-06, "loss": 0.0173, "step": 128040 }, { "epoch": 1.0360870620600373, "grad_norm": 0.23336787521839142, "learning_rate": 5.556589629684044e-06, "loss": 0.032, "step": 128050 }, { "epoch": 1.036167974755239, "grad_norm": 0.4718013107776642, "learning_rate": 5.555887916165974e-06, "loss": 0.029, "step": 128060 }, { "epoch": 1.036248887450441, "grad_norm": 0.24647922813892365, "learning_rate": 5.555186191561895e-06, "loss": 0.0124, "step": 128070 }, { "epoch": 1.036329800145643, "grad_norm": 0.19620469212532043, "learning_rate": 5.5544844558857965e-06, "loss": 0.0229, "step": 128080 }, { "epoch": 1.0364107128408446, "grad_norm": 0.35881587862968445, "learning_rate": 5.553782709151678e-06, "loss": 0.0202, "step": 128090 }, { "epoch": 1.0364916255360466, "grad_norm": 0.3616161048412323, "learning_rate": 5.5530809513735325e-06, "loss": 0.0282, "step": 128100 }, { "epoch": 1.0365725382312485, "grad_norm": 0.8248772621154785, "learning_rate": 5.552379182565355e-06, "loss": 0.0189, "step": 128110 }, { "epoch": 1.0366534509264504, "grad_norm": 0.72307950258255, "learning_rate": 5.55167740274114e-06, "loss": 0.0411, "step": 128120 }, { "epoch": 1.0367343636216522, "grad_norm": 0.5926255583763123, "learning_rate": 5.550975611914885e-06, "loss": 0.0248, "step": 128130 }, { "epoch": 1.036815276316854, "grad_norm": 0.5734752416610718, "learning_rate": 5.550273810100582e-06, "loss": 0.024, "step": 128140 }, { "epoch": 1.036896189012056, "grad_norm": 0.2734264135360718, "learning_rate": 5.549571997312234e-06, "loss": 0.0284, "step": 128150 }, { "epoch": 1.0369771017072578, "grad_norm": 0.6553695797920227, "learning_rate": 5.548870173563829e-06, "loss": 0.0278, "step": 128160 }, { "epoch": 1.0370580144024597, "grad_norm": 0.08413722366094589, "learning_rate": 5.548168338869368e-06, "loss": 0.0117, "step": 128170 }, { "epoch": 1.0371389270976616, "grad_norm": 0.214999720454216, "learning_rate": 5.547466493242847e-06, "loss": 0.013, "step": 128180 }, { "epoch": 1.0372198397928636, "grad_norm": 0.3248373568058014, "learning_rate": 5.546764636698263e-06, "loss": 0.0205, "step": 128190 }, { "epoch": 1.0373007524880653, "grad_norm": 0.16150431334972382, "learning_rate": 5.546062769249611e-06, "loss": 0.0129, "step": 128200 }, { "epoch": 1.0373816651832672, "grad_norm": 0.21612462401390076, "learning_rate": 5.545360890910892e-06, "loss": 0.0231, "step": 128210 }, { "epoch": 1.0374625778784692, "grad_norm": 0.4098620116710663, "learning_rate": 5.5446590016961e-06, "loss": 0.0337, "step": 128220 }, { "epoch": 1.037543490573671, "grad_norm": 0.6399174928665161, "learning_rate": 5.543957101619235e-06, "loss": 0.0243, "step": 128230 }, { "epoch": 1.0376244032688728, "grad_norm": 0.38470539450645447, "learning_rate": 5.543255190694292e-06, "loss": 0.0256, "step": 128240 }, { "epoch": 1.0377053159640748, "grad_norm": 0.37367311120033264, "learning_rate": 5.542553268935274e-06, "loss": 0.0156, "step": 128250 }, { "epoch": 1.0377862286592767, "grad_norm": 0.17549775540828705, "learning_rate": 5.541851336356175e-06, "loss": 0.0214, "step": 128260 }, { "epoch": 1.0378671413544784, "grad_norm": 0.2015535682439804, "learning_rate": 5.5411493929709935e-06, "loss": 0.0303, "step": 128270 }, { "epoch": 1.0379480540496804, "grad_norm": 1.0590708255767822, "learning_rate": 5.5404474387937316e-06, "loss": 0.022, "step": 128280 }, { "epoch": 1.0380289667448823, "grad_norm": 0.31086266040802, "learning_rate": 5.539745473838386e-06, "loss": 0.0264, "step": 128290 }, { "epoch": 1.0381098794400843, "grad_norm": 0.40263476967811584, "learning_rate": 5.539043498118956e-06, "loss": 0.024, "step": 128300 }, { "epoch": 1.038190792135286, "grad_norm": 0.35322120785713196, "learning_rate": 5.5383415116494425e-06, "loss": 0.0164, "step": 128310 }, { "epoch": 1.038271704830488, "grad_norm": 0.24441581964492798, "learning_rate": 5.537639514443842e-06, "loss": 0.0238, "step": 128320 }, { "epoch": 1.0383526175256899, "grad_norm": 0.004726337268948555, "learning_rate": 5.536937506516158e-06, "loss": 0.0133, "step": 128330 }, { "epoch": 1.0384335302208916, "grad_norm": 0.3918890953063965, "learning_rate": 5.53623548788039e-06, "loss": 0.0305, "step": 128340 }, { "epoch": 1.0385144429160935, "grad_norm": 0.6037814617156982, "learning_rate": 5.535533458550535e-06, "loss": 0.0254, "step": 128350 }, { "epoch": 1.0385953556112955, "grad_norm": 0.2568117678165436, "learning_rate": 5.534831418540596e-06, "loss": 0.029, "step": 128360 }, { "epoch": 1.0386762683064974, "grad_norm": 0.4502254128456116, "learning_rate": 5.5341293678645746e-06, "loss": 0.0233, "step": 128370 }, { "epoch": 1.0387571810016991, "grad_norm": 0.3884800672531128, "learning_rate": 5.53342730653647e-06, "loss": 0.0153, "step": 128380 }, { "epoch": 1.038838093696901, "grad_norm": 0.008183431811630726, "learning_rate": 5.532725234570284e-06, "loss": 0.0174, "step": 128390 }, { "epoch": 1.038919006392103, "grad_norm": 0.4373493194580078, "learning_rate": 5.532023151980018e-06, "loss": 0.0249, "step": 128400 }, { "epoch": 1.0389999190873047, "grad_norm": 0.3688172399997711, "learning_rate": 5.5313210587796725e-06, "loss": 0.0264, "step": 128410 }, { "epoch": 1.0390808317825067, "grad_norm": 0.5847728848457336, "learning_rate": 5.530618954983251e-06, "loss": 0.0304, "step": 128420 }, { "epoch": 1.0391617444777086, "grad_norm": 0.19316600263118744, "learning_rate": 5.529916840604755e-06, "loss": 0.0184, "step": 128430 }, { "epoch": 1.0392426571729105, "grad_norm": 0.12730279564857483, "learning_rate": 5.529214715658184e-06, "loss": 0.0183, "step": 128440 }, { "epoch": 1.0393235698681123, "grad_norm": 0.2665981352329254, "learning_rate": 5.528512580157546e-06, "loss": 0.0301, "step": 128450 }, { "epoch": 1.0394044825633142, "grad_norm": 0.5413110852241516, "learning_rate": 5.527810434116838e-06, "loss": 0.0342, "step": 128460 }, { "epoch": 1.0394853952585161, "grad_norm": 0.4868902266025543, "learning_rate": 5.527108277550066e-06, "loss": 0.0222, "step": 128470 }, { "epoch": 1.0395663079537178, "grad_norm": 0.3717004358768463, "learning_rate": 5.526406110471232e-06, "loss": 0.0257, "step": 128480 }, { "epoch": 1.0396472206489198, "grad_norm": 0.3136546313762665, "learning_rate": 5.5257039328943395e-06, "loss": 0.0298, "step": 128490 }, { "epoch": 1.0397281333441217, "grad_norm": 0.5563220381736755, "learning_rate": 5.525001744833392e-06, "loss": 0.023, "step": 128500 }, { "epoch": 1.0398090460393237, "grad_norm": 0.4918583333492279, "learning_rate": 5.524299546302392e-06, "loss": 0.0257, "step": 128510 }, { "epoch": 1.0398899587345254, "grad_norm": 0.41385552287101746, "learning_rate": 5.523597337315346e-06, "loss": 0.0188, "step": 128520 }, { "epoch": 1.0399708714297273, "grad_norm": 0.2912329435348511, "learning_rate": 5.5228951178862566e-06, "loss": 0.0216, "step": 128530 }, { "epoch": 1.0400517841249293, "grad_norm": 0.24107378721237183, "learning_rate": 5.522192888029127e-06, "loss": 0.0215, "step": 128540 }, { "epoch": 1.040132696820131, "grad_norm": 0.46601325273513794, "learning_rate": 5.521490647757962e-06, "loss": 0.0283, "step": 128550 }, { "epoch": 1.040213609515333, "grad_norm": 0.5920807123184204, "learning_rate": 5.520788397086769e-06, "loss": 0.0219, "step": 128560 }, { "epoch": 1.0402945222105349, "grad_norm": 0.3381712734699249, "learning_rate": 5.520086136029549e-06, "loss": 0.0296, "step": 128570 }, { "epoch": 1.0403754349057368, "grad_norm": 0.21164055168628693, "learning_rate": 5.519383864600309e-06, "loss": 0.028, "step": 128580 }, { "epoch": 1.0404563476009385, "grad_norm": 0.5795095562934875, "learning_rate": 5.518681582813055e-06, "loss": 0.0291, "step": 128590 }, { "epoch": 1.0405372602961405, "grad_norm": 0.45857617259025574, "learning_rate": 5.517979290681792e-06, "loss": 0.0233, "step": 128600 }, { "epoch": 1.0406181729913424, "grad_norm": 0.2642989158630371, "learning_rate": 5.517276988220525e-06, "loss": 0.026, "step": 128610 }, { "epoch": 1.0406990856865441, "grad_norm": 0.3872814178466797, "learning_rate": 5.5165746754432595e-06, "loss": 0.0239, "step": 128620 }, { "epoch": 1.040779998381746, "grad_norm": 0.4724734127521515, "learning_rate": 5.5158723523640044e-06, "loss": 0.0213, "step": 128630 }, { "epoch": 1.040860911076948, "grad_norm": 0.39657506346702576, "learning_rate": 5.515170018996765e-06, "loss": 0.0258, "step": 128640 }, { "epoch": 1.04094182377215, "grad_norm": 0.6799248456954956, "learning_rate": 5.514467675355544e-06, "loss": 0.0223, "step": 128650 }, { "epoch": 1.0410227364673517, "grad_norm": 0.3536997437477112, "learning_rate": 5.513765321454354e-06, "loss": 0.0141, "step": 128660 }, { "epoch": 1.0411036491625536, "grad_norm": 0.425754189491272, "learning_rate": 5.513062957307199e-06, "loss": 0.0228, "step": 128670 }, { "epoch": 1.0411845618577555, "grad_norm": 0.22347618639469147, "learning_rate": 5.512360582928085e-06, "loss": 0.0202, "step": 128680 }, { "epoch": 1.0412654745529573, "grad_norm": 0.05726855993270874, "learning_rate": 5.511658198331022e-06, "loss": 0.0225, "step": 128690 }, { "epoch": 1.0413463872481592, "grad_norm": 0.29953184723854065, "learning_rate": 5.510955803530016e-06, "loss": 0.0212, "step": 128700 }, { "epoch": 1.0414272999433611, "grad_norm": 0.5200151801109314, "learning_rate": 5.510253398539075e-06, "loss": 0.0217, "step": 128710 }, { "epoch": 1.041508212638563, "grad_norm": 0.43561694025993347, "learning_rate": 5.509550983372207e-06, "loss": 0.0296, "step": 128720 }, { "epoch": 1.0415891253337648, "grad_norm": 0.4076007008552551, "learning_rate": 5.5088485580434204e-06, "loss": 0.0273, "step": 128730 }, { "epoch": 1.0416700380289667, "grad_norm": 0.4337277114391327, "learning_rate": 5.508146122566724e-06, "loss": 0.0262, "step": 128740 }, { "epoch": 1.0417509507241687, "grad_norm": 0.4753462076187134, "learning_rate": 5.507443676956127e-06, "loss": 0.023, "step": 128750 }, { "epoch": 1.0418318634193704, "grad_norm": 0.5714084506034851, "learning_rate": 5.506741221225636e-06, "loss": 0.023, "step": 128760 }, { "epoch": 1.0419127761145723, "grad_norm": 0.3142508268356323, "learning_rate": 5.50603875538926e-06, "loss": 0.0191, "step": 128770 }, { "epoch": 1.0419936888097743, "grad_norm": 0.4306241273880005, "learning_rate": 5.505336279461011e-06, "loss": 0.0197, "step": 128780 }, { "epoch": 1.0420746015049762, "grad_norm": 0.33388540148735046, "learning_rate": 5.5046337934548955e-06, "loss": 0.0276, "step": 128790 }, { "epoch": 1.042155514200178, "grad_norm": 0.20618782937526703, "learning_rate": 5.503931297384926e-06, "loss": 0.0255, "step": 128800 }, { "epoch": 1.0422364268953799, "grad_norm": 0.31456258893013, "learning_rate": 5.503228791265107e-06, "loss": 0.0219, "step": 128810 }, { "epoch": 1.0423173395905818, "grad_norm": 0.3552430272102356, "learning_rate": 5.502526275109457e-06, "loss": 0.0144, "step": 128820 }, { "epoch": 1.0423982522857835, "grad_norm": 0.09787061810493469, "learning_rate": 5.501823748931978e-06, "loss": 0.026, "step": 128830 }, { "epoch": 1.0424791649809855, "grad_norm": 0.388189435005188, "learning_rate": 5.501121212746684e-06, "loss": 0.0129, "step": 128840 }, { "epoch": 1.0425600776761874, "grad_norm": 0.2346428632736206, "learning_rate": 5.500418666567586e-06, "loss": 0.0253, "step": 128850 }, { "epoch": 1.0426409903713894, "grad_norm": 0.28696855902671814, "learning_rate": 5.4997161104086945e-06, "loss": 0.0275, "step": 128860 }, { "epoch": 1.042721903066591, "grad_norm": 0.6562628149986267, "learning_rate": 5.49901354428402e-06, "loss": 0.0183, "step": 128870 }, { "epoch": 1.042802815761793, "grad_norm": 0.3664018511772156, "learning_rate": 5.498310968207574e-06, "loss": 0.0275, "step": 128880 }, { "epoch": 1.042883728456995, "grad_norm": 0.10441672056913376, "learning_rate": 5.497608382193366e-06, "loss": 0.0229, "step": 128890 }, { "epoch": 1.0429646411521967, "grad_norm": 0.27198919653892517, "learning_rate": 5.496905786255412e-06, "loss": 0.0184, "step": 128900 }, { "epoch": 1.0430455538473986, "grad_norm": 0.31535083055496216, "learning_rate": 5.49620318040772e-06, "loss": 0.0195, "step": 128910 }, { "epoch": 1.0431264665426006, "grad_norm": 0.2134285718202591, "learning_rate": 5.495500564664301e-06, "loss": 0.0189, "step": 128920 }, { "epoch": 1.0432073792378025, "grad_norm": 0.37738922238349915, "learning_rate": 5.4947979390391725e-06, "loss": 0.0209, "step": 128930 }, { "epoch": 1.0432882919330042, "grad_norm": 0.4471345543861389, "learning_rate": 5.494095303546343e-06, "loss": 0.0174, "step": 128940 }, { "epoch": 1.0433692046282061, "grad_norm": 0.32768478989601135, "learning_rate": 5.4933926581998244e-06, "loss": 0.016, "step": 128950 }, { "epoch": 1.043450117323408, "grad_norm": 0.3707192838191986, "learning_rate": 5.492690003013631e-06, "loss": 0.0179, "step": 128960 }, { "epoch": 1.04353103001861, "grad_norm": 0.2696634829044342, "learning_rate": 5.491987338001776e-06, "loss": 0.0335, "step": 128970 }, { "epoch": 1.0436119427138117, "grad_norm": 0.2978191077709198, "learning_rate": 5.491284663178273e-06, "loss": 0.0386, "step": 128980 }, { "epoch": 1.0436928554090137, "grad_norm": 0.34954604506492615, "learning_rate": 5.4905819785571334e-06, "loss": 0.031, "step": 128990 }, { "epoch": 1.0437737681042156, "grad_norm": 0.3883298635482788, "learning_rate": 5.489879284152373e-06, "loss": 0.0337, "step": 129000 }, { "epoch": 1.0438546807994173, "grad_norm": 0.2760920524597168, "learning_rate": 5.489176579978003e-06, "loss": 0.0241, "step": 129010 }, { "epoch": 1.0439355934946193, "grad_norm": 0.44534024596214294, "learning_rate": 5.488473866048039e-06, "loss": 0.0209, "step": 129020 }, { "epoch": 1.0440165061898212, "grad_norm": 0.42054781317710876, "learning_rate": 5.487771142376496e-06, "loss": 0.0301, "step": 129030 }, { "epoch": 1.0440974188850232, "grad_norm": 0.5998133420944214, "learning_rate": 5.487068408977387e-06, "loss": 0.0337, "step": 129040 }, { "epoch": 1.0441783315802249, "grad_norm": 0.8795748949050903, "learning_rate": 5.486365665864726e-06, "loss": 0.0207, "step": 129050 }, { "epoch": 1.0442592442754268, "grad_norm": 0.539923369884491, "learning_rate": 5.48566291305253e-06, "loss": 0.0333, "step": 129060 }, { "epoch": 1.0443401569706288, "grad_norm": 0.1274518519639969, "learning_rate": 5.484960150554811e-06, "loss": 0.022, "step": 129070 }, { "epoch": 1.0444210696658305, "grad_norm": 0.1870398372411728, "learning_rate": 5.484257378385587e-06, "loss": 0.0135, "step": 129080 }, { "epoch": 1.0445019823610324, "grad_norm": 0.39143988490104675, "learning_rate": 5.483554596558871e-06, "loss": 0.0156, "step": 129090 }, { "epoch": 1.0445828950562344, "grad_norm": 0.1423243135213852, "learning_rate": 5.482851805088679e-06, "loss": 0.0257, "step": 129100 }, { "epoch": 1.0446638077514363, "grad_norm": 0.6429786682128906, "learning_rate": 5.482149003989028e-06, "loss": 0.0316, "step": 129110 }, { "epoch": 1.044744720446638, "grad_norm": 0.4386562407016754, "learning_rate": 5.481446193273933e-06, "loss": 0.0229, "step": 129120 }, { "epoch": 1.04482563314184, "grad_norm": 0.618684709072113, "learning_rate": 5.480743372957409e-06, "loss": 0.015, "step": 129130 }, { "epoch": 1.044906545837042, "grad_norm": 0.4939152002334595, "learning_rate": 5.480040543053474e-06, "loss": 0.0208, "step": 129140 }, { "epoch": 1.0449874585322436, "grad_norm": 1.0460870265960693, "learning_rate": 5.479337703576145e-06, "loss": 0.0301, "step": 129150 }, { "epoch": 1.0450683712274456, "grad_norm": 0.35144031047821045, "learning_rate": 5.4786348545394354e-06, "loss": 0.02, "step": 129160 }, { "epoch": 1.0451492839226475, "grad_norm": 0.262809693813324, "learning_rate": 5.477931995957365e-06, "loss": 0.0288, "step": 129170 }, { "epoch": 1.0452301966178494, "grad_norm": 0.4875805377960205, "learning_rate": 5.477229127843949e-06, "loss": 0.0171, "step": 129180 }, { "epoch": 1.0453111093130512, "grad_norm": 0.675797164440155, "learning_rate": 5.476526250213207e-06, "loss": 0.0187, "step": 129190 }, { "epoch": 1.045392022008253, "grad_norm": 0.553245484828949, "learning_rate": 5.475823363079155e-06, "loss": 0.0291, "step": 129200 }, { "epoch": 1.045472934703455, "grad_norm": 0.5299426317214966, "learning_rate": 5.4751204664558086e-06, "loss": 0.0248, "step": 129210 }, { "epoch": 1.0455538473986568, "grad_norm": 0.29147452116012573, "learning_rate": 5.474417560357189e-06, "loss": 0.0208, "step": 129220 }, { "epoch": 1.0456347600938587, "grad_norm": 0.7253831028938293, "learning_rate": 5.473714644797313e-06, "loss": 0.0243, "step": 129230 }, { "epoch": 1.0457156727890606, "grad_norm": 0.1911836862564087, "learning_rate": 5.473011719790197e-06, "loss": 0.0275, "step": 129240 }, { "epoch": 1.0457965854842626, "grad_norm": 1.2548567056655884, "learning_rate": 5.472308785349861e-06, "loss": 0.0298, "step": 129250 }, { "epoch": 1.0458774981794643, "grad_norm": 0.7446393370628357, "learning_rate": 5.471605841490325e-06, "loss": 0.0264, "step": 129260 }, { "epoch": 1.0459584108746662, "grad_norm": 0.4942404627799988, "learning_rate": 5.470902888225605e-06, "loss": 0.0149, "step": 129270 }, { "epoch": 1.0460393235698682, "grad_norm": 0.415256142616272, "learning_rate": 5.470199925569721e-06, "loss": 0.0163, "step": 129280 }, { "epoch": 1.04612023626507, "grad_norm": 0.1631653904914856, "learning_rate": 5.469496953536693e-06, "loss": 0.0312, "step": 129290 }, { "epoch": 1.0462011489602718, "grad_norm": 0.9358953833580017, "learning_rate": 5.468793972140538e-06, "loss": 0.0371, "step": 129300 }, { "epoch": 1.0462820616554738, "grad_norm": 0.4459729492664337, "learning_rate": 5.468090981395278e-06, "loss": 0.0222, "step": 129310 }, { "epoch": 1.0463629743506757, "grad_norm": 0.5267007946968079, "learning_rate": 5.46738798131493e-06, "loss": 0.026, "step": 129320 }, { "epoch": 1.0464438870458774, "grad_norm": 0.24361000955104828, "learning_rate": 5.466684971913518e-06, "loss": 0.0255, "step": 129330 }, { "epoch": 1.0465247997410794, "grad_norm": 0.2292039841413498, "learning_rate": 5.465981953205058e-06, "loss": 0.0132, "step": 129340 }, { "epoch": 1.0466057124362813, "grad_norm": 0.20242424309253693, "learning_rate": 5.465278925203571e-06, "loss": 0.0314, "step": 129350 }, { "epoch": 1.046686625131483, "grad_norm": 0.36496230959892273, "learning_rate": 5.464575887923078e-06, "loss": 0.0312, "step": 129360 }, { "epoch": 1.046767537826685, "grad_norm": 0.45907023549079895, "learning_rate": 5.463872841377601e-06, "loss": 0.0279, "step": 129370 }, { "epoch": 1.046848450521887, "grad_norm": 0.2289179265499115, "learning_rate": 5.463169785581159e-06, "loss": 0.0167, "step": 129380 }, { "epoch": 1.0469293632170888, "grad_norm": 0.37932538986206055, "learning_rate": 5.462466720547773e-06, "loss": 0.0143, "step": 129390 }, { "epoch": 1.0470102759122906, "grad_norm": 0.4387490153312683, "learning_rate": 5.461763646291466e-06, "loss": 0.0183, "step": 129400 }, { "epoch": 1.0470911886074925, "grad_norm": 0.3502218723297119, "learning_rate": 5.461060562826257e-06, "loss": 0.016, "step": 129410 }, { "epoch": 1.0471721013026944, "grad_norm": 0.46413323283195496, "learning_rate": 5.460357470166168e-06, "loss": 0.0209, "step": 129420 }, { "epoch": 1.0472530139978962, "grad_norm": 0.08331740647554398, "learning_rate": 5.45965436832522e-06, "loss": 0.0223, "step": 129430 }, { "epoch": 1.047333926693098, "grad_norm": 0.3876549005508423, "learning_rate": 5.458951257317438e-06, "loss": 0.0204, "step": 129440 }, { "epoch": 1.0474148393883, "grad_norm": 0.44452354311943054, "learning_rate": 5.458248137156842e-06, "loss": 0.0244, "step": 129450 }, { "epoch": 1.047495752083502, "grad_norm": 0.05346718057990074, "learning_rate": 5.457545007857453e-06, "loss": 0.0105, "step": 129460 }, { "epoch": 1.0475766647787037, "grad_norm": 0.3436775207519531, "learning_rate": 5.456841869433296e-06, "loss": 0.0226, "step": 129470 }, { "epoch": 1.0476575774739056, "grad_norm": 1.081440806388855, "learning_rate": 5.456138721898392e-06, "loss": 0.0192, "step": 129480 }, { "epoch": 1.0477384901691076, "grad_norm": 0.3471055328845978, "learning_rate": 5.455435565266763e-06, "loss": 0.0205, "step": 129490 }, { "epoch": 1.0478194028643095, "grad_norm": 0.3599824607372284, "learning_rate": 5.454732399552435e-06, "loss": 0.0156, "step": 129500 }, { "epoch": 1.0479003155595112, "grad_norm": 0.12520356476306915, "learning_rate": 5.454029224769427e-06, "loss": 0.0146, "step": 129510 }, { "epoch": 1.0479812282547132, "grad_norm": 0.3455687463283539, "learning_rate": 5.453326040931767e-06, "loss": 0.0189, "step": 129520 }, { "epoch": 1.0480621409499151, "grad_norm": 0.30432602763175964, "learning_rate": 5.452622848053475e-06, "loss": 0.0263, "step": 129530 }, { "epoch": 1.0481430536451168, "grad_norm": 0.7219581007957458, "learning_rate": 5.451919646148574e-06, "loss": 0.0135, "step": 129540 }, { "epoch": 1.0482239663403188, "grad_norm": 0.2890873849391937, "learning_rate": 5.451216435231093e-06, "loss": 0.0231, "step": 129550 }, { "epoch": 1.0483048790355207, "grad_norm": 0.8402034044265747, "learning_rate": 5.450513215315052e-06, "loss": 0.028, "step": 129560 }, { "epoch": 1.0483857917307224, "grad_norm": 0.8269307613372803, "learning_rate": 5.449809986414475e-06, "loss": 0.0314, "step": 129570 }, { "epoch": 1.0484667044259244, "grad_norm": 0.4004015624523163, "learning_rate": 5.4491067485433864e-06, "loss": 0.0117, "step": 129580 }, { "epoch": 1.0485476171211263, "grad_norm": 0.693669855594635, "learning_rate": 5.448403501715814e-06, "loss": 0.0234, "step": 129590 }, { "epoch": 1.0486285298163283, "grad_norm": 0.3629318177700043, "learning_rate": 5.4477002459457795e-06, "loss": 0.0225, "step": 129600 }, { "epoch": 1.04870944251153, "grad_norm": 0.528456449508667, "learning_rate": 5.446996981247309e-06, "loss": 0.0273, "step": 129610 }, { "epoch": 1.048790355206732, "grad_norm": 0.268952339887619, "learning_rate": 5.446293707634427e-06, "loss": 0.0105, "step": 129620 }, { "epoch": 1.0488712679019339, "grad_norm": 0.3390282690525055, "learning_rate": 5.44559042512116e-06, "loss": 0.0289, "step": 129630 }, { "epoch": 1.0489521805971358, "grad_norm": 0.3879832923412323, "learning_rate": 5.444887133721532e-06, "loss": 0.0298, "step": 129640 }, { "epoch": 1.0490330932923375, "grad_norm": 0.6640790700912476, "learning_rate": 5.444183833449567e-06, "loss": 0.0313, "step": 129650 }, { "epoch": 1.0491140059875395, "grad_norm": 0.42675742506980896, "learning_rate": 5.443480524319296e-06, "loss": 0.0195, "step": 129660 }, { "epoch": 1.0491949186827414, "grad_norm": 0.5861726403236389, "learning_rate": 5.442777206344744e-06, "loss": 0.0206, "step": 129670 }, { "epoch": 1.0492758313779431, "grad_norm": 0.10510144382715225, "learning_rate": 5.442073879539932e-06, "loss": 0.0195, "step": 129680 }, { "epoch": 1.049356744073145, "grad_norm": 0.444724440574646, "learning_rate": 5.44137054391889e-06, "loss": 0.0254, "step": 129690 }, { "epoch": 1.049437656768347, "grad_norm": 0.42505601048469543, "learning_rate": 5.440667199495645e-06, "loss": 0.0219, "step": 129700 }, { "epoch": 1.049518569463549, "grad_norm": 0.3154621422290802, "learning_rate": 5.439963846284223e-06, "loss": 0.0265, "step": 129710 }, { "epoch": 1.0495994821587507, "grad_norm": 0.12740552425384521, "learning_rate": 5.439260484298652e-06, "loss": 0.0167, "step": 129720 }, { "epoch": 1.0496803948539526, "grad_norm": 0.18973688781261444, "learning_rate": 5.4385571135529555e-06, "loss": 0.0106, "step": 129730 }, { "epoch": 1.0497613075491545, "grad_norm": 0.4266907274723053, "learning_rate": 5.437853734061166e-06, "loss": 0.024, "step": 129740 }, { "epoch": 1.0498422202443563, "grad_norm": 0.3320683240890503, "learning_rate": 5.4371503458373065e-06, "loss": 0.0282, "step": 129750 }, { "epoch": 1.0499231329395582, "grad_norm": 0.3914063572883606, "learning_rate": 5.436446948895405e-06, "loss": 0.0328, "step": 129760 }, { "epoch": 1.0500040456347601, "grad_norm": 0.38733184337615967, "learning_rate": 5.435743543249492e-06, "loss": 0.0265, "step": 129770 }, { "epoch": 1.050084958329962, "grad_norm": 0.4846474826335907, "learning_rate": 5.435040128913593e-06, "loss": 0.0335, "step": 129780 }, { "epoch": 1.0501658710251638, "grad_norm": 0.44655853509902954, "learning_rate": 5.43433670590174e-06, "loss": 0.0326, "step": 129790 }, { "epoch": 1.0502467837203657, "grad_norm": 0.8535609245300293, "learning_rate": 5.433633274227955e-06, "loss": 0.0318, "step": 129800 }, { "epoch": 1.0503276964155677, "grad_norm": 0.08314832299947739, "learning_rate": 5.4329298339062696e-06, "loss": 0.0176, "step": 129810 }, { "epoch": 1.0504086091107694, "grad_norm": 0.34561899304389954, "learning_rate": 5.432226384950715e-06, "loss": 0.0235, "step": 129820 }, { "epoch": 1.0504895218059713, "grad_norm": 0.5495561361312866, "learning_rate": 5.431522927375316e-06, "loss": 0.0243, "step": 129830 }, { "epoch": 1.0505704345011733, "grad_norm": 0.3672122061252594, "learning_rate": 5.4308194611941035e-06, "loss": 0.0129, "step": 129840 }, { "epoch": 1.0506513471963752, "grad_norm": 0.259355366230011, "learning_rate": 5.430115986421108e-06, "loss": 0.0202, "step": 129850 }, { "epoch": 1.050732259891577, "grad_norm": 0.7050122618675232, "learning_rate": 5.429412503070355e-06, "loss": 0.026, "step": 129860 }, { "epoch": 1.0508131725867789, "grad_norm": 0.25371819734573364, "learning_rate": 5.428709011155877e-06, "loss": 0.0244, "step": 129870 }, { "epoch": 1.0508940852819808, "grad_norm": 0.6366099119186401, "learning_rate": 5.428005510691703e-06, "loss": 0.0435, "step": 129880 }, { "epoch": 1.0509749979771825, "grad_norm": 0.13725590705871582, "learning_rate": 5.427302001691862e-06, "loss": 0.016, "step": 129890 }, { "epoch": 1.0510559106723845, "grad_norm": 0.21958747506141663, "learning_rate": 5.426598484170386e-06, "loss": 0.026, "step": 129900 }, { "epoch": 1.0511368233675864, "grad_norm": 0.23898865282535553, "learning_rate": 5.425894958141303e-06, "loss": 0.0287, "step": 129910 }, { "epoch": 1.0512177360627883, "grad_norm": 0.276177316904068, "learning_rate": 5.425191423618643e-06, "loss": 0.0181, "step": 129920 }, { "epoch": 1.05129864875799, "grad_norm": 0.3772784471511841, "learning_rate": 5.424487880616439e-06, "loss": 0.0249, "step": 129930 }, { "epoch": 1.051379561453192, "grad_norm": 0.31078967452049255, "learning_rate": 5.42378432914872e-06, "loss": 0.0195, "step": 129940 }, { "epoch": 1.051460474148394, "grad_norm": 0.30711501836776733, "learning_rate": 5.423080769229517e-06, "loss": 0.0238, "step": 129950 }, { "epoch": 1.0515413868435957, "grad_norm": 0.4044956564903259, "learning_rate": 5.422377200872862e-06, "loss": 0.0221, "step": 129960 }, { "epoch": 1.0516222995387976, "grad_norm": 0.5075411796569824, "learning_rate": 5.421673624092785e-06, "loss": 0.02, "step": 129970 }, { "epoch": 1.0517032122339995, "grad_norm": 0.25811806321144104, "learning_rate": 5.420970038903317e-06, "loss": 0.0162, "step": 129980 }, { "epoch": 1.0517841249292015, "grad_norm": 0.3084053695201874, "learning_rate": 5.420266445318492e-06, "loss": 0.0267, "step": 129990 }, { "epoch": 1.0518650376244032, "grad_norm": 0.18036457896232605, "learning_rate": 5.419562843352338e-06, "loss": 0.0253, "step": 130000 }, { "epoch": 1.0519459503196051, "grad_norm": 0.34852996468544006, "learning_rate": 5.41885923301889e-06, "loss": 0.037, "step": 130010 }, { "epoch": 1.052026863014807, "grad_norm": 0.3112465739250183, "learning_rate": 5.4181556143321786e-06, "loss": 0.0201, "step": 130020 }, { "epoch": 1.0521077757100088, "grad_norm": 0.6012296080589294, "learning_rate": 5.417451987306236e-06, "loss": 0.0248, "step": 130030 }, { "epoch": 1.0521886884052107, "grad_norm": 0.5125448703765869, "learning_rate": 5.416748351955096e-06, "loss": 0.0201, "step": 130040 }, { "epoch": 1.0522696011004127, "grad_norm": 0.21448250114917755, "learning_rate": 5.416044708292788e-06, "loss": 0.0179, "step": 130050 }, { "epoch": 1.0523505137956146, "grad_norm": 0.15658776462078094, "learning_rate": 5.415341056333348e-06, "loss": 0.0177, "step": 130060 }, { "epoch": 1.0524314264908163, "grad_norm": 0.8250021934509277, "learning_rate": 5.414637396090806e-06, "loss": 0.0325, "step": 130070 }, { "epoch": 1.0525123391860183, "grad_norm": 0.3268207907676697, "learning_rate": 5.413933727579198e-06, "loss": 0.0307, "step": 130080 }, { "epoch": 1.0525932518812202, "grad_norm": 0.20562759041786194, "learning_rate": 5.413230050812555e-06, "loss": 0.0188, "step": 130090 }, { "epoch": 1.052674164576422, "grad_norm": 0.019452104344964027, "learning_rate": 5.412526365804911e-06, "loss": 0.0142, "step": 130100 }, { "epoch": 1.0527550772716239, "grad_norm": 0.7557426691055298, "learning_rate": 5.4118226725703e-06, "loss": 0.0234, "step": 130110 }, { "epoch": 1.0528359899668258, "grad_norm": 0.3489669859409332, "learning_rate": 5.4111189711227555e-06, "loss": 0.0257, "step": 130120 }, { "epoch": 1.0529169026620278, "grad_norm": 0.28444501757621765, "learning_rate": 5.41041526147631e-06, "loss": 0.0292, "step": 130130 }, { "epoch": 1.0529978153572295, "grad_norm": 0.3205603063106537, "learning_rate": 5.409711543644998e-06, "loss": 0.0201, "step": 130140 }, { "epoch": 1.0530787280524314, "grad_norm": 0.6072038412094116, "learning_rate": 5.409007817642857e-06, "loss": 0.0164, "step": 130150 }, { "epoch": 1.0531596407476334, "grad_norm": 0.20182177424430847, "learning_rate": 5.408304083483915e-06, "loss": 0.0108, "step": 130160 }, { "epoch": 1.0532405534428353, "grad_norm": 0.25927525758743286, "learning_rate": 5.407600341182212e-06, "loss": 0.0291, "step": 130170 }, { "epoch": 1.053321466138037, "grad_norm": 0.06444179266691208, "learning_rate": 5.40689659075178e-06, "loss": 0.0222, "step": 130180 }, { "epoch": 1.053402378833239, "grad_norm": 0.2637476325035095, "learning_rate": 5.406192832206655e-06, "loss": 0.0298, "step": 130190 }, { "epoch": 1.053483291528441, "grad_norm": 0.4113057255744934, "learning_rate": 5.405489065560872e-06, "loss": 0.0225, "step": 130200 }, { "epoch": 1.0535642042236426, "grad_norm": 0.606594979763031, "learning_rate": 5.404785290828464e-06, "loss": 0.0187, "step": 130210 }, { "epoch": 1.0536451169188445, "grad_norm": 0.38457661867141724, "learning_rate": 5.404081508023469e-06, "loss": 0.0239, "step": 130220 }, { "epoch": 1.0537260296140465, "grad_norm": 0.3451842665672302, "learning_rate": 5.403377717159921e-06, "loss": 0.0319, "step": 130230 }, { "epoch": 1.0538069423092482, "grad_norm": 0.35973018407821655, "learning_rate": 5.402673918251856e-06, "loss": 0.0216, "step": 130240 }, { "epoch": 1.0538878550044501, "grad_norm": 0.6266698837280273, "learning_rate": 5.40197011131331e-06, "loss": 0.0416, "step": 130250 }, { "epoch": 1.053968767699652, "grad_norm": 0.46683210134506226, "learning_rate": 5.4012662963583195e-06, "loss": 0.02, "step": 130260 }, { "epoch": 1.054049680394854, "grad_norm": 0.1889779269695282, "learning_rate": 5.400562473400919e-06, "loss": 0.021, "step": 130270 }, { "epoch": 1.0541305930900557, "grad_norm": 0.42134737968444824, "learning_rate": 5.399858642455145e-06, "loss": 0.0114, "step": 130280 }, { "epoch": 1.0542115057852577, "grad_norm": 0.2150544673204422, "learning_rate": 5.399154803535035e-06, "loss": 0.0259, "step": 130290 }, { "epoch": 1.0542924184804596, "grad_norm": 0.5541437268257141, "learning_rate": 5.3984509566546265e-06, "loss": 0.0258, "step": 130300 }, { "epoch": 1.0543733311756616, "grad_norm": 0.0008742374484427273, "learning_rate": 5.397747101827952e-06, "loss": 0.0232, "step": 130310 }, { "epoch": 1.0544542438708633, "grad_norm": 0.3777325451374054, "learning_rate": 5.397043239069053e-06, "loss": 0.0234, "step": 130320 }, { "epoch": 1.0545351565660652, "grad_norm": 0.6864734292030334, "learning_rate": 5.396339368391964e-06, "loss": 0.0237, "step": 130330 }, { "epoch": 1.0546160692612672, "grad_norm": 0.3013902008533478, "learning_rate": 5.395635489810726e-06, "loss": 0.0175, "step": 130340 }, { "epoch": 1.0546969819564689, "grad_norm": 0.455221563577652, "learning_rate": 5.39493160333937e-06, "loss": 0.0234, "step": 130350 }, { "epoch": 1.0547778946516708, "grad_norm": 0.1810154765844345, "learning_rate": 5.394227708991937e-06, "loss": 0.0243, "step": 130360 }, { "epoch": 1.0548588073468728, "grad_norm": 0.39231163263320923, "learning_rate": 5.393523806782466e-06, "loss": 0.0221, "step": 130370 }, { "epoch": 1.0549397200420747, "grad_norm": 0.7094037532806396, "learning_rate": 5.392819896724992e-06, "loss": 0.0207, "step": 130380 }, { "epoch": 1.0550206327372764, "grad_norm": 1.1107348203659058, "learning_rate": 5.392115978833556e-06, "loss": 0.0329, "step": 130390 }, { "epoch": 1.0551015454324784, "grad_norm": 0.27239736914634705, "learning_rate": 5.391412053122194e-06, "loss": 0.0129, "step": 130400 }, { "epoch": 1.0551824581276803, "grad_norm": 0.21763518452644348, "learning_rate": 5.390708119604945e-06, "loss": 0.0244, "step": 130410 }, { "epoch": 1.055263370822882, "grad_norm": 0.5924713611602783, "learning_rate": 5.390004178295848e-06, "loss": 0.0212, "step": 130420 }, { "epoch": 1.055344283518084, "grad_norm": 0.15989023447036743, "learning_rate": 5.389300229208939e-06, "loss": 0.026, "step": 130430 }, { "epoch": 1.055425196213286, "grad_norm": 0.29912519454956055, "learning_rate": 5.38859627235826e-06, "loss": 0.0181, "step": 130440 }, { "epoch": 1.0555061089084878, "grad_norm": 0.38280004262924194, "learning_rate": 5.38789230775785e-06, "loss": 0.0361, "step": 130450 }, { "epoch": 1.0555870216036896, "grad_norm": 0.30193325877189636, "learning_rate": 5.387188335421745e-06, "loss": 0.0252, "step": 130460 }, { "epoch": 1.0556679342988915, "grad_norm": 0.148726686835289, "learning_rate": 5.386484355363985e-06, "loss": 0.0248, "step": 130470 }, { "epoch": 1.0557488469940934, "grad_norm": 0.4320315718650818, "learning_rate": 5.385780367598613e-06, "loss": 0.0266, "step": 130480 }, { "epoch": 1.0558297596892952, "grad_norm": 0.14402444660663605, "learning_rate": 5.385076372139666e-06, "loss": 0.0232, "step": 130490 }, { "epoch": 1.055910672384497, "grad_norm": 0.5663232207298279, "learning_rate": 5.384372369001182e-06, "loss": 0.0166, "step": 130500 }, { "epoch": 1.055991585079699, "grad_norm": 0.519707202911377, "learning_rate": 5.383668358197202e-06, "loss": 0.0298, "step": 130510 }, { "epoch": 1.056072497774901, "grad_norm": 0.6365600824356079, "learning_rate": 5.38296433974177e-06, "loss": 0.0438, "step": 130520 }, { "epoch": 1.0561534104701027, "grad_norm": 1.36440110206604, "learning_rate": 5.38226031364892e-06, "loss": 0.0351, "step": 130530 }, { "epoch": 1.0562343231653046, "grad_norm": 0.5595368146896362, "learning_rate": 5.381556279932695e-06, "loss": 0.0137, "step": 130540 }, { "epoch": 1.0563152358605066, "grad_norm": 0.2483154535293579, "learning_rate": 5.380852238607136e-06, "loss": 0.0387, "step": 130550 }, { "epoch": 1.0563961485557083, "grad_norm": 0.5555522441864014, "learning_rate": 5.380148189686284e-06, "loss": 0.011, "step": 130560 }, { "epoch": 1.0564770612509102, "grad_norm": 0.24960270524024963, "learning_rate": 5.379444133184178e-06, "loss": 0.0225, "step": 130570 }, { "epoch": 1.0565579739461122, "grad_norm": 0.5353361964225769, "learning_rate": 5.37874006911486e-06, "loss": 0.0319, "step": 130580 }, { "epoch": 1.0566388866413141, "grad_norm": 0.9216743111610413, "learning_rate": 5.37803599749237e-06, "loss": 0.015, "step": 130590 }, { "epoch": 1.0567197993365158, "grad_norm": 0.6978642344474792, "learning_rate": 5.37733191833075e-06, "loss": 0.0325, "step": 130600 }, { "epoch": 1.0568007120317178, "grad_norm": 0.5432732105255127, "learning_rate": 5.376627831644043e-06, "loss": 0.0221, "step": 130610 }, { "epoch": 1.0568816247269197, "grad_norm": 1.4953233003616333, "learning_rate": 5.375923737446287e-06, "loss": 0.0343, "step": 130620 }, { "epoch": 1.0569625374221214, "grad_norm": 0.24440525472164154, "learning_rate": 5.375219635751528e-06, "loss": 0.0175, "step": 130630 }, { "epoch": 1.0570434501173234, "grad_norm": 0.30954933166503906, "learning_rate": 5.374515526573804e-06, "loss": 0.0231, "step": 130640 }, { "epoch": 1.0571243628125253, "grad_norm": 0.39423274993896484, "learning_rate": 5.373811409927159e-06, "loss": 0.023, "step": 130650 }, { "epoch": 1.0572052755077272, "grad_norm": 0.3710916340351105, "learning_rate": 5.373107285825635e-06, "loss": 0.02, "step": 130660 }, { "epoch": 1.057286188202929, "grad_norm": 0.5683009624481201, "learning_rate": 5.372403154283273e-06, "loss": 0.017, "step": 130670 }, { "epoch": 1.057367100898131, "grad_norm": 0.28308090567588806, "learning_rate": 5.371699015314117e-06, "loss": 0.0241, "step": 130680 }, { "epoch": 1.0574480135933328, "grad_norm": 0.5147309899330139, "learning_rate": 5.370994868932209e-06, "loss": 0.0177, "step": 130690 }, { "epoch": 1.0575289262885346, "grad_norm": 0.19271428883075714, "learning_rate": 5.3702907151515905e-06, "loss": 0.022, "step": 130700 }, { "epoch": 1.0576098389837365, "grad_norm": 0.42138734459877014, "learning_rate": 5.3695865539863055e-06, "loss": 0.0229, "step": 130710 }, { "epoch": 1.0576907516789384, "grad_norm": 0.24107645452022552, "learning_rate": 5.368882385450398e-06, "loss": 0.0246, "step": 130720 }, { "epoch": 1.0577716643741404, "grad_norm": 0.13605228066444397, "learning_rate": 5.368178209557908e-06, "loss": 0.0248, "step": 130730 }, { "epoch": 1.057852577069342, "grad_norm": 0.47412893176078796, "learning_rate": 5.367474026322884e-06, "loss": 0.0173, "step": 130740 }, { "epoch": 1.057933489764544, "grad_norm": 0.19669227302074432, "learning_rate": 5.366769835759364e-06, "loss": 0.0133, "step": 130750 }, { "epoch": 1.058014402459746, "grad_norm": 0.458914190530777, "learning_rate": 5.366065637881394e-06, "loss": 0.0255, "step": 130760 }, { "epoch": 1.0580953151549477, "grad_norm": 0.7033968567848206, "learning_rate": 5.365361432703018e-06, "loss": 0.016, "step": 130770 }, { "epoch": 1.0581762278501496, "grad_norm": 0.19115719199180603, "learning_rate": 5.3646572202382796e-06, "loss": 0.0187, "step": 130780 }, { "epoch": 1.0582571405453516, "grad_norm": 0.43254008889198303, "learning_rate": 5.363953000501223e-06, "loss": 0.0263, "step": 130790 }, { "epoch": 1.0583380532405535, "grad_norm": 0.368883341550827, "learning_rate": 5.3632487735058925e-06, "loss": 0.016, "step": 130800 }, { "epoch": 1.0584189659357552, "grad_norm": 0.41071397066116333, "learning_rate": 5.3625445392663315e-06, "loss": 0.0204, "step": 130810 }, { "epoch": 1.0584998786309572, "grad_norm": 0.24098344147205353, "learning_rate": 5.3618402977965845e-06, "loss": 0.0268, "step": 130820 }, { "epoch": 1.0585807913261591, "grad_norm": 0.7960377335548401, "learning_rate": 5.361136049110698e-06, "loss": 0.0308, "step": 130830 }, { "epoch": 1.058661704021361, "grad_norm": 0.818061113357544, "learning_rate": 5.360431793222714e-06, "loss": 0.0232, "step": 130840 }, { "epoch": 1.0587426167165628, "grad_norm": 0.2728002667427063, "learning_rate": 5.359727530146679e-06, "loss": 0.0196, "step": 130850 }, { "epoch": 1.0588235294117647, "grad_norm": 0.8671619296073914, "learning_rate": 5.359023259896638e-06, "loss": 0.0288, "step": 130860 }, { "epoch": 1.0589044421069667, "grad_norm": 0.3092726469039917, "learning_rate": 5.358318982486637e-06, "loss": 0.0284, "step": 130870 }, { "epoch": 1.0589853548021684, "grad_norm": 0.15951308608055115, "learning_rate": 5.3576146979307185e-06, "loss": 0.0194, "step": 130880 }, { "epoch": 1.0590662674973703, "grad_norm": 0.3349311649799347, "learning_rate": 5.356910406242931e-06, "loss": 0.0183, "step": 130890 }, { "epoch": 1.0591471801925723, "grad_norm": 0.5980486273765564, "learning_rate": 5.35620610743732e-06, "loss": 0.0188, "step": 130900 }, { "epoch": 1.0592280928877742, "grad_norm": 0.05519922077655792, "learning_rate": 5.355501801527928e-06, "loss": 0.0178, "step": 130910 }, { "epoch": 1.059309005582976, "grad_norm": 0.005039688665419817, "learning_rate": 5.354797488528804e-06, "loss": 0.0166, "step": 130920 }, { "epoch": 1.0593899182781779, "grad_norm": 0.25657209753990173, "learning_rate": 5.354093168453994e-06, "loss": 0.0213, "step": 130930 }, { "epoch": 1.0594708309733798, "grad_norm": 0.38471344113349915, "learning_rate": 5.353388841317542e-06, "loss": 0.0235, "step": 130940 }, { "epoch": 1.0595517436685815, "grad_norm": 0.4597354829311371, "learning_rate": 5.352684507133495e-06, "loss": 0.0351, "step": 130950 }, { "epoch": 1.0596326563637835, "grad_norm": 0.45191287994384766, "learning_rate": 5.351980165915901e-06, "loss": 0.0226, "step": 130960 }, { "epoch": 1.0597135690589854, "grad_norm": 0.35251617431640625, "learning_rate": 5.351275817678806e-06, "loss": 0.0187, "step": 130970 }, { "epoch": 1.0597944817541873, "grad_norm": 0.3458830714225769, "learning_rate": 5.350571462436256e-06, "loss": 0.0185, "step": 130980 }, { "epoch": 1.059875394449389, "grad_norm": 0.7499619722366333, "learning_rate": 5.349867100202299e-06, "loss": 0.0338, "step": 130990 }, { "epoch": 1.059956307144591, "grad_norm": 0.3749048411846161, "learning_rate": 5.349162730990981e-06, "loss": 0.0233, "step": 131000 }, { "epoch": 1.060037219839793, "grad_norm": 0.4177207052707672, "learning_rate": 5.348458354816349e-06, "loss": 0.0209, "step": 131010 }, { "epoch": 1.0601181325349947, "grad_norm": 0.5162248015403748, "learning_rate": 5.347753971692451e-06, "loss": 0.0265, "step": 131020 }, { "epoch": 1.0601990452301966, "grad_norm": 0.4665655791759491, "learning_rate": 5.3470495816333324e-06, "loss": 0.0205, "step": 131030 }, { "epoch": 1.0602799579253985, "grad_norm": 0.47578656673431396, "learning_rate": 5.346345184653046e-06, "loss": 0.0312, "step": 131040 }, { "epoch": 1.0603608706206005, "grad_norm": 0.1294291913509369, "learning_rate": 5.345640780765633e-06, "loss": 0.0221, "step": 131050 }, { "epoch": 1.0604417833158022, "grad_norm": 0.2543075680732727, "learning_rate": 5.3449363699851454e-06, "loss": 0.0307, "step": 131060 }, { "epoch": 1.0605226960110041, "grad_norm": 0.38750240206718445, "learning_rate": 5.34423195232563e-06, "loss": 0.0314, "step": 131070 }, { "epoch": 1.060603608706206, "grad_norm": 0.46930113434791565, "learning_rate": 5.343527527801136e-06, "loss": 0.0236, "step": 131080 }, { "epoch": 1.0606845214014078, "grad_norm": 0.48260819911956787, "learning_rate": 5.34282309642571e-06, "loss": 0.034, "step": 131090 }, { "epoch": 1.0607654340966097, "grad_norm": 0.3279814124107361, "learning_rate": 5.342118658213401e-06, "loss": 0.0152, "step": 131100 }, { "epoch": 1.0608463467918117, "grad_norm": 0.6566097736358643, "learning_rate": 5.341414213178257e-06, "loss": 0.025, "step": 131110 }, { "epoch": 1.0609272594870136, "grad_norm": 0.22149088978767395, "learning_rate": 5.340709761334329e-06, "loss": 0.0145, "step": 131120 }, { "epoch": 1.0610081721822153, "grad_norm": 0.3410159945487976, "learning_rate": 5.340005302695662e-06, "loss": 0.0127, "step": 131130 }, { "epoch": 1.0610890848774173, "grad_norm": 0.31113412976264954, "learning_rate": 5.3393008372763075e-06, "loss": 0.0248, "step": 131140 }, { "epoch": 1.0611699975726192, "grad_norm": 0.3915354907512665, "learning_rate": 5.338596365090316e-06, "loss": 0.0201, "step": 131150 }, { "epoch": 1.061250910267821, "grad_norm": 0.2099820077419281, "learning_rate": 5.337891886151733e-06, "loss": 0.0173, "step": 131160 }, { "epoch": 1.0613318229630229, "grad_norm": 0.5126613974571228, "learning_rate": 5.337187400474609e-06, "loss": 0.0202, "step": 131170 }, { "epoch": 1.0614127356582248, "grad_norm": 0.2655245065689087, "learning_rate": 5.336482908072996e-06, "loss": 0.0171, "step": 131180 }, { "epoch": 1.0614936483534267, "grad_norm": 0.289730429649353, "learning_rate": 5.335778408960941e-06, "loss": 0.0227, "step": 131190 }, { "epoch": 1.0615745610486285, "grad_norm": 0.5822217464447021, "learning_rate": 5.3350739031524946e-06, "loss": 0.0136, "step": 131200 }, { "epoch": 1.0616554737438304, "grad_norm": 0.3803147077560425, "learning_rate": 5.334369390661707e-06, "loss": 0.0191, "step": 131210 }, { "epoch": 1.0617363864390323, "grad_norm": 0.6166713237762451, "learning_rate": 5.333664871502626e-06, "loss": 0.025, "step": 131220 }, { "epoch": 1.061817299134234, "grad_norm": 0.5330175161361694, "learning_rate": 5.332960345689305e-06, "loss": 0.0219, "step": 131230 }, { "epoch": 1.061898211829436, "grad_norm": 0.5734526515007019, "learning_rate": 5.332255813235791e-06, "loss": 0.0201, "step": 131240 }, { "epoch": 1.061979124524638, "grad_norm": 0.5578537583351135, "learning_rate": 5.3315512741561395e-06, "loss": 0.0236, "step": 131250 }, { "epoch": 1.0620600372198399, "grad_norm": 0.3868412375450134, "learning_rate": 5.330846728464396e-06, "loss": 0.023, "step": 131260 }, { "epoch": 1.0621409499150416, "grad_norm": 0.5063632130622864, "learning_rate": 5.330142176174613e-06, "loss": 0.023, "step": 131270 }, { "epoch": 1.0622218626102435, "grad_norm": 0.42653611302375793, "learning_rate": 5.329437617300841e-06, "loss": 0.017, "step": 131280 }, { "epoch": 1.0623027753054455, "grad_norm": 0.9225382804870605, "learning_rate": 5.328733051857131e-06, "loss": 0.0195, "step": 131290 }, { "epoch": 1.0623836880006472, "grad_norm": 0.4787473976612091, "learning_rate": 5.3280284798575354e-06, "loss": 0.0319, "step": 131300 }, { "epoch": 1.0624646006958491, "grad_norm": 0.5661333203315735, "learning_rate": 5.327323901316103e-06, "loss": 0.0296, "step": 131310 }, { "epoch": 1.062545513391051, "grad_norm": 0.42679381370544434, "learning_rate": 5.326619316246886e-06, "loss": 0.0223, "step": 131320 }, { "epoch": 1.062626426086253, "grad_norm": 0.2714577615261078, "learning_rate": 5.325914724663937e-06, "loss": 0.0216, "step": 131330 }, { "epoch": 1.0627073387814547, "grad_norm": 0.08301389217376709, "learning_rate": 5.325210126581309e-06, "loss": 0.0176, "step": 131340 }, { "epoch": 1.0627882514766567, "grad_norm": 0.27343764901161194, "learning_rate": 5.3245055220130485e-06, "loss": 0.028, "step": 131350 }, { "epoch": 1.0628691641718586, "grad_norm": 0.5209699869155884, "learning_rate": 5.323800910973211e-06, "loss": 0.0317, "step": 131360 }, { "epoch": 1.0629500768670606, "grad_norm": 0.2894384562969208, "learning_rate": 5.323096293475849e-06, "loss": 0.0227, "step": 131370 }, { "epoch": 1.0630309895622623, "grad_norm": 0.35093703866004944, "learning_rate": 5.322391669535013e-06, "loss": 0.0341, "step": 131380 }, { "epoch": 1.0631119022574642, "grad_norm": 0.4713391065597534, "learning_rate": 5.321687039164756e-06, "loss": 0.0252, "step": 131390 }, { "epoch": 1.0631928149526662, "grad_norm": 0.09371276199817657, "learning_rate": 5.320982402379129e-06, "loss": 0.0176, "step": 131400 }, { "epoch": 1.0632737276478679, "grad_norm": 0.2857670187950134, "learning_rate": 5.3202777591921876e-06, "loss": 0.0211, "step": 131410 }, { "epoch": 1.0633546403430698, "grad_norm": 0.29826390743255615, "learning_rate": 5.319573109617981e-06, "loss": 0.0157, "step": 131420 }, { "epoch": 1.0634355530382718, "grad_norm": 0.3883703649044037, "learning_rate": 5.318868453670564e-06, "loss": 0.0207, "step": 131430 }, { "epoch": 1.0635164657334735, "grad_norm": 0.5252205729484558, "learning_rate": 5.3181637913639885e-06, "loss": 0.0276, "step": 131440 }, { "epoch": 1.0635973784286754, "grad_norm": 0.5527163147926331, "learning_rate": 5.317459122712308e-06, "loss": 0.0355, "step": 131450 }, { "epoch": 1.0636782911238774, "grad_norm": 0.4814528226852417, "learning_rate": 5.316754447729575e-06, "loss": 0.0211, "step": 131460 }, { "epoch": 1.0637592038190793, "grad_norm": 0.42510536313056946, "learning_rate": 5.316049766429844e-06, "loss": 0.0207, "step": 131470 }, { "epoch": 1.063840116514281, "grad_norm": 0.3954213559627533, "learning_rate": 5.315345078827167e-06, "loss": 0.0342, "step": 131480 }, { "epoch": 1.063921029209483, "grad_norm": 0.6808356642723083, "learning_rate": 5.3146403849355986e-06, "loss": 0.0256, "step": 131490 }, { "epoch": 1.064001941904685, "grad_norm": 0.6088076829910278, "learning_rate": 5.313935684769192e-06, "loss": 0.0267, "step": 131500 }, { "epoch": 1.0640828545998868, "grad_norm": 0.6148215532302856, "learning_rate": 5.313230978341999e-06, "loss": 0.03, "step": 131510 }, { "epoch": 1.0641637672950885, "grad_norm": 0.4787062108516693, "learning_rate": 5.312526265668079e-06, "loss": 0.0254, "step": 131520 }, { "epoch": 1.0642446799902905, "grad_norm": 0.09362316131591797, "learning_rate": 5.31182154676148e-06, "loss": 0.0172, "step": 131530 }, { "epoch": 1.0643255926854924, "grad_norm": 0.2947685420513153, "learning_rate": 5.311116821636257e-06, "loss": 0.0247, "step": 131540 }, { "epoch": 1.0644065053806941, "grad_norm": 0.21446874737739563, "learning_rate": 5.310412090306468e-06, "loss": 0.0262, "step": 131550 }, { "epoch": 1.064487418075896, "grad_norm": 0.2979060113430023, "learning_rate": 5.309707352786167e-06, "loss": 0.0196, "step": 131560 }, { "epoch": 1.064568330771098, "grad_norm": 0.3980163633823395, "learning_rate": 5.3090026090894034e-06, "loss": 0.0248, "step": 131570 }, { "epoch": 1.0646492434663, "grad_norm": 0.26597288250923157, "learning_rate": 5.308297859230236e-06, "loss": 0.0141, "step": 131580 }, { "epoch": 1.0647301561615017, "grad_norm": 0.28309306502342224, "learning_rate": 5.307593103222719e-06, "loss": 0.021, "step": 131590 }, { "epoch": 1.0648110688567036, "grad_norm": 0.44058957695961, "learning_rate": 5.306888341080907e-06, "loss": 0.0251, "step": 131600 }, { "epoch": 1.0648919815519056, "grad_norm": 0.5979407429695129, "learning_rate": 5.306183572818855e-06, "loss": 0.0266, "step": 131610 }, { "epoch": 1.0649728942471073, "grad_norm": 0.47851765155792236, "learning_rate": 5.305478798450617e-06, "loss": 0.0227, "step": 131620 }, { "epoch": 1.0650538069423092, "grad_norm": 0.40621140599250793, "learning_rate": 5.304774017990251e-06, "loss": 0.0127, "step": 131630 }, { "epoch": 1.0651347196375112, "grad_norm": 0.37554189562797546, "learning_rate": 5.3040692314518095e-06, "loss": 0.0217, "step": 131640 }, { "epoch": 1.065215632332713, "grad_norm": 0.5438486933708191, "learning_rate": 5.303364438849349e-06, "loss": 0.0221, "step": 131650 }, { "epoch": 1.0652965450279148, "grad_norm": 0.41564345359802246, "learning_rate": 5.302659640196925e-06, "loss": 0.0211, "step": 131660 }, { "epoch": 1.0653774577231168, "grad_norm": 0.156113401055336, "learning_rate": 5.301954835508593e-06, "loss": 0.0252, "step": 131670 }, { "epoch": 1.0654583704183187, "grad_norm": 0.21839413046836853, "learning_rate": 5.301250024798411e-06, "loss": 0.0233, "step": 131680 }, { "epoch": 1.0655392831135204, "grad_norm": 0.23703570663928986, "learning_rate": 5.300545208080433e-06, "loss": 0.0313, "step": 131690 }, { "epoch": 1.0656201958087224, "grad_norm": 0.20869438350200653, "learning_rate": 5.299840385368714e-06, "loss": 0.0173, "step": 131700 }, { "epoch": 1.0657011085039243, "grad_norm": 0.42447876930236816, "learning_rate": 5.2991355566773126e-06, "loss": 0.0187, "step": 131710 }, { "epoch": 1.0657820211991262, "grad_norm": 0.5814302563667297, "learning_rate": 5.298430722020283e-06, "loss": 0.0179, "step": 131720 }, { "epoch": 1.065862933894328, "grad_norm": 0.2768366038799286, "learning_rate": 5.297725881411683e-06, "loss": 0.0201, "step": 131730 }, { "epoch": 1.06594384658953, "grad_norm": 0.28936800360679626, "learning_rate": 5.29702103486557e-06, "loss": 0.0223, "step": 131740 }, { "epoch": 1.0660247592847318, "grad_norm": 0.33963292837142944, "learning_rate": 5.296316182395998e-06, "loss": 0.0218, "step": 131750 }, { "epoch": 1.0661056719799336, "grad_norm": 0.5416159629821777, "learning_rate": 5.295611324017026e-06, "loss": 0.0426, "step": 131760 }, { "epoch": 1.0661865846751355, "grad_norm": 0.151515930891037, "learning_rate": 5.29490645974271e-06, "loss": 0.0309, "step": 131770 }, { "epoch": 1.0662674973703374, "grad_norm": 0.04870852455496788, "learning_rate": 5.294201589587107e-06, "loss": 0.0186, "step": 131780 }, { "epoch": 1.0663484100655394, "grad_norm": 0.484406054019928, "learning_rate": 5.293496713564276e-06, "loss": 0.0293, "step": 131790 }, { "epoch": 1.066429322760741, "grad_norm": 0.20811371505260468, "learning_rate": 5.292791831688271e-06, "loss": 0.028, "step": 131800 }, { "epoch": 1.066510235455943, "grad_norm": 0.32397013902664185, "learning_rate": 5.292086943973151e-06, "loss": 0.0199, "step": 131810 }, { "epoch": 1.066591148151145, "grad_norm": 0.3875899612903595, "learning_rate": 5.291382050432974e-06, "loss": 0.0217, "step": 131820 }, { "epoch": 1.0666720608463467, "grad_norm": 0.7304558753967285, "learning_rate": 5.290677151081797e-06, "loss": 0.0365, "step": 131830 }, { "epoch": 1.0667529735415486, "grad_norm": 0.38017892837524414, "learning_rate": 5.289972245933677e-06, "loss": 0.0184, "step": 131840 }, { "epoch": 1.0668338862367506, "grad_norm": 0.019300589337944984, "learning_rate": 5.289267335002673e-06, "loss": 0.0233, "step": 131850 }, { "epoch": 1.0669147989319525, "grad_norm": 0.45346808433532715, "learning_rate": 5.288562418302844e-06, "loss": 0.0315, "step": 131860 }, { "epoch": 1.0669957116271542, "grad_norm": 0.21831254661083221, "learning_rate": 5.287857495848246e-06, "loss": 0.0189, "step": 131870 }, { "epoch": 1.0670766243223562, "grad_norm": 1.3254486322402954, "learning_rate": 5.287152567652937e-06, "loss": 0.0263, "step": 131880 }, { "epoch": 1.067157537017558, "grad_norm": 0.19379806518554688, "learning_rate": 5.286447633730979e-06, "loss": 0.0262, "step": 131890 }, { "epoch": 1.0672384497127598, "grad_norm": 0.2806471884250641, "learning_rate": 5.2857426940964255e-06, "loss": 0.0313, "step": 131900 }, { "epoch": 1.0673193624079618, "grad_norm": 0.3950709402561188, "learning_rate": 5.285037748763337e-06, "loss": 0.0327, "step": 131910 }, { "epoch": 1.0674002751031637, "grad_norm": 0.30138272047042847, "learning_rate": 5.2843327977457725e-06, "loss": 0.029, "step": 131920 }, { "epoch": 1.0674811877983656, "grad_norm": 0.28884583711624146, "learning_rate": 5.283627841057792e-06, "loss": 0.0162, "step": 131930 }, { "epoch": 1.0675621004935674, "grad_norm": 0.24931131303310394, "learning_rate": 5.282922878713451e-06, "loss": 0.0278, "step": 131940 }, { "epoch": 1.0676430131887693, "grad_norm": 0.3063344359397888, "learning_rate": 5.282217910726811e-06, "loss": 0.0201, "step": 131950 }, { "epoch": 1.0677239258839712, "grad_norm": 0.49020472168922424, "learning_rate": 5.281512937111932e-06, "loss": 0.024, "step": 131960 }, { "epoch": 1.067804838579173, "grad_norm": 0.7808274626731873, "learning_rate": 5.280807957882871e-06, "loss": 0.0303, "step": 131970 }, { "epoch": 1.067885751274375, "grad_norm": 0.47578489780426025, "learning_rate": 5.280102973053688e-06, "loss": 0.0234, "step": 131980 }, { "epoch": 1.0679666639695768, "grad_norm": 0.5719346404075623, "learning_rate": 5.279397982638442e-06, "loss": 0.0173, "step": 131990 }, { "epoch": 1.0680475766647788, "grad_norm": 0.21037061512470245, "learning_rate": 5.2786929866511936e-06, "loss": 0.0283, "step": 132000 }, { "epoch": 1.0681284893599805, "grad_norm": 0.2637035548686981, "learning_rate": 5.277987985106002e-06, "loss": 0.012, "step": 132010 }, { "epoch": 1.0682094020551824, "grad_norm": 0.6946185231208801, "learning_rate": 5.277282978016926e-06, "loss": 0.0221, "step": 132020 }, { "epoch": 1.0682903147503844, "grad_norm": 0.5438452959060669, "learning_rate": 5.276577965398027e-06, "loss": 0.0455, "step": 132030 }, { "epoch": 1.0683712274455863, "grad_norm": 0.7293345332145691, "learning_rate": 5.275872947263365e-06, "loss": 0.0222, "step": 132040 }, { "epoch": 1.068452140140788, "grad_norm": 0.7980112433433533, "learning_rate": 5.275167923626998e-06, "loss": 0.0252, "step": 132050 }, { "epoch": 1.06853305283599, "grad_norm": 1.1057912111282349, "learning_rate": 5.274462894502989e-06, "loss": 0.0461, "step": 132060 }, { "epoch": 1.068613965531192, "grad_norm": 0.4181237816810608, "learning_rate": 5.273757859905396e-06, "loss": 0.0257, "step": 132070 }, { "epoch": 1.0686948782263936, "grad_norm": 1.1968927383422852, "learning_rate": 5.273052819848282e-06, "loss": 0.0294, "step": 132080 }, { "epoch": 1.0687757909215956, "grad_norm": 0.17277191579341888, "learning_rate": 5.272347774345705e-06, "loss": 0.0231, "step": 132090 }, { "epoch": 1.0688567036167975, "grad_norm": 0.5396491289138794, "learning_rate": 5.271642723411726e-06, "loss": 0.0219, "step": 132100 }, { "epoch": 1.0689376163119992, "grad_norm": 0.07304779440164566, "learning_rate": 5.270937667060407e-06, "loss": 0.0257, "step": 132110 }, { "epoch": 1.0690185290072012, "grad_norm": 0.547731876373291, "learning_rate": 5.270232605305808e-06, "loss": 0.0179, "step": 132120 }, { "epoch": 1.0690994417024031, "grad_norm": 0.5347861647605896, "learning_rate": 5.269527538161989e-06, "loss": 0.0275, "step": 132130 }, { "epoch": 1.069180354397605, "grad_norm": 0.20312991738319397, "learning_rate": 5.268822465643014e-06, "loss": 0.0314, "step": 132140 }, { "epoch": 1.0692612670928068, "grad_norm": 0.3914337456226349, "learning_rate": 5.268117387762943e-06, "loss": 0.0125, "step": 132150 }, { "epoch": 1.0693421797880087, "grad_norm": 0.2065049558877945, "learning_rate": 5.267412304535835e-06, "loss": 0.0189, "step": 132160 }, { "epoch": 1.0694230924832107, "grad_norm": 0.08691208064556122, "learning_rate": 5.2667072159757535e-06, "loss": 0.0105, "step": 132170 }, { "epoch": 1.0695040051784126, "grad_norm": 1.0148979425430298, "learning_rate": 5.266002122096761e-06, "loss": 0.0195, "step": 132180 }, { "epoch": 1.0695849178736143, "grad_norm": 0.5227622985839844, "learning_rate": 5.265297022912917e-06, "loss": 0.0276, "step": 132190 }, { "epoch": 1.0696658305688163, "grad_norm": 0.6728842854499817, "learning_rate": 5.264591918438282e-06, "loss": 0.0121, "step": 132200 }, { "epoch": 1.0697467432640182, "grad_norm": 0.27574577927589417, "learning_rate": 5.263886808686921e-06, "loss": 0.0219, "step": 132210 }, { "epoch": 1.06982765595922, "grad_norm": 0.4768593907356262, "learning_rate": 5.263181693672896e-06, "loss": 0.027, "step": 132220 }, { "epoch": 1.0699085686544219, "grad_norm": 0.16815537214279175, "learning_rate": 5.262476573410267e-06, "loss": 0.0225, "step": 132230 }, { "epoch": 1.0699894813496238, "grad_norm": 0.2850438356399536, "learning_rate": 5.261771447913095e-06, "loss": 0.0252, "step": 132240 }, { "epoch": 1.0700703940448257, "grad_norm": 0.4722629189491272, "learning_rate": 5.261066317195446e-06, "loss": 0.028, "step": 132250 }, { "epoch": 1.0701513067400275, "grad_norm": 0.48528191447257996, "learning_rate": 5.260361181271382e-06, "loss": 0.0134, "step": 132260 }, { "epoch": 1.0702322194352294, "grad_norm": 0.43125849962234497, "learning_rate": 5.25965604015496e-06, "loss": 0.025, "step": 132270 }, { "epoch": 1.0703131321304313, "grad_norm": 0.6029598712921143, "learning_rate": 5.258950893860249e-06, "loss": 0.0231, "step": 132280 }, { "epoch": 1.070394044825633, "grad_norm": 0.12047819793224335, "learning_rate": 5.258245742401309e-06, "loss": 0.022, "step": 132290 }, { "epoch": 1.070474957520835, "grad_norm": 0.5691363215446472, "learning_rate": 5.257540585792202e-06, "loss": 0.03, "step": 132300 }, { "epoch": 1.070555870216037, "grad_norm": 0.35608768463134766, "learning_rate": 5.2568354240469925e-06, "loss": 0.0207, "step": 132310 }, { "epoch": 1.0706367829112389, "grad_norm": 0.34245434403419495, "learning_rate": 5.256130257179742e-06, "loss": 0.0286, "step": 132320 }, { "epoch": 1.0707176956064406, "grad_norm": 0.1854054033756256, "learning_rate": 5.255425085204514e-06, "loss": 0.0318, "step": 132330 }, { "epoch": 1.0707986083016425, "grad_norm": 0.6807618141174316, "learning_rate": 5.2547199081353725e-06, "loss": 0.0271, "step": 132340 }, { "epoch": 1.0708795209968445, "grad_norm": 0.37756964564323425, "learning_rate": 5.254014725986379e-06, "loss": 0.0234, "step": 132350 }, { "epoch": 1.0709604336920462, "grad_norm": 0.06283656507730484, "learning_rate": 5.253309538771599e-06, "loss": 0.0234, "step": 132360 }, { "epoch": 1.0710413463872481, "grad_norm": 0.3465169370174408, "learning_rate": 5.252604346505096e-06, "loss": 0.0371, "step": 132370 }, { "epoch": 1.07112225908245, "grad_norm": 0.7752432227134705, "learning_rate": 5.251899149200932e-06, "loss": 0.031, "step": 132380 }, { "epoch": 1.071203171777652, "grad_norm": 0.5101624727249146, "learning_rate": 5.251193946873171e-06, "loss": 0.0257, "step": 132390 }, { "epoch": 1.0712840844728537, "grad_norm": 0.4826684892177582, "learning_rate": 5.250488739535876e-06, "loss": 0.0274, "step": 132400 }, { "epoch": 1.0713649971680557, "grad_norm": 0.6566480994224548, "learning_rate": 5.249783527203114e-06, "loss": 0.021, "step": 132410 }, { "epoch": 1.0714459098632576, "grad_norm": 0.3569868505001068, "learning_rate": 5.249078309888945e-06, "loss": 0.0206, "step": 132420 }, { "epoch": 1.0715268225584593, "grad_norm": 0.8234373331069946, "learning_rate": 5.248373087607434e-06, "loss": 0.0332, "step": 132430 }, { "epoch": 1.0716077352536613, "grad_norm": 0.2397635132074356, "learning_rate": 5.247667860372649e-06, "loss": 0.0332, "step": 132440 }, { "epoch": 1.0716886479488632, "grad_norm": 0.13905096054077148, "learning_rate": 5.246962628198651e-06, "loss": 0.0148, "step": 132450 }, { "epoch": 1.0717695606440651, "grad_norm": 0.1530657559633255, "learning_rate": 5.246257391099503e-06, "loss": 0.028, "step": 132460 }, { "epoch": 1.0718504733392669, "grad_norm": 0.5743502974510193, "learning_rate": 5.245552149089272e-06, "loss": 0.0269, "step": 132470 }, { "epoch": 1.0719313860344688, "grad_norm": 0.5999422073364258, "learning_rate": 5.2448469021820226e-06, "loss": 0.0194, "step": 132480 }, { "epoch": 1.0720122987296707, "grad_norm": 0.4628141224384308, "learning_rate": 5.2441416503918175e-06, "loss": 0.0279, "step": 132490 }, { "epoch": 1.0720932114248725, "grad_norm": 0.5112066268920898, "learning_rate": 5.243436393732724e-06, "loss": 0.0219, "step": 132500 }, { "epoch": 1.0721741241200744, "grad_norm": 0.6471268534660339, "learning_rate": 5.242731132218803e-06, "loss": 0.0171, "step": 132510 }, { "epoch": 1.0722550368152763, "grad_norm": 0.22892539203166962, "learning_rate": 5.2420258658641245e-06, "loss": 0.0168, "step": 132520 }, { "epoch": 1.0723359495104783, "grad_norm": 0.5379655361175537, "learning_rate": 5.241320594682751e-06, "loss": 0.0364, "step": 132530 }, { "epoch": 1.07241686220568, "grad_norm": 0.3778282701969147, "learning_rate": 5.240615318688745e-06, "loss": 0.017, "step": 132540 }, { "epoch": 1.072497774900882, "grad_norm": 0.14887255430221558, "learning_rate": 5.239910037896176e-06, "loss": 0.0364, "step": 132550 }, { "epoch": 1.0725786875960839, "grad_norm": 0.40078097581863403, "learning_rate": 5.2392047523191095e-06, "loss": 0.0179, "step": 132560 }, { "epoch": 1.0726596002912858, "grad_norm": 0.0024698893539607525, "learning_rate": 5.2384994619716065e-06, "loss": 0.0253, "step": 132570 }, { "epoch": 1.0727405129864875, "grad_norm": 0.16426785290241241, "learning_rate": 5.237794166867736e-06, "loss": 0.0191, "step": 132580 }, { "epoch": 1.0728214256816895, "grad_norm": 0.3917185068130493, "learning_rate": 5.2370888670215635e-06, "loss": 0.0248, "step": 132590 }, { "epoch": 1.0729023383768914, "grad_norm": 0.2350698560476303, "learning_rate": 5.236383562447154e-06, "loss": 0.0264, "step": 132600 }, { "epoch": 1.0729832510720931, "grad_norm": 0.42692646384239197, "learning_rate": 5.235678253158573e-06, "loss": 0.0268, "step": 132610 }, { "epoch": 1.073064163767295, "grad_norm": 0.375516802072525, "learning_rate": 5.234972939169887e-06, "loss": 0.0157, "step": 132620 }, { "epoch": 1.073145076462497, "grad_norm": 0.4162215292453766, "learning_rate": 5.234267620495163e-06, "loss": 0.0336, "step": 132630 }, { "epoch": 1.0732259891576987, "grad_norm": 0.3154701292514801, "learning_rate": 5.233562297148464e-06, "loss": 0.0186, "step": 132640 }, { "epoch": 1.0733069018529007, "grad_norm": 6.886834144592285, "learning_rate": 5.232856969143858e-06, "loss": 0.0245, "step": 132650 }, { "epoch": 1.0733878145481026, "grad_norm": 0.4159584641456604, "learning_rate": 5.232151636495412e-06, "loss": 0.0277, "step": 132660 }, { "epoch": 1.0734687272433046, "grad_norm": 0.4230526387691498, "learning_rate": 5.231446299217191e-06, "loss": 0.0258, "step": 132670 }, { "epoch": 1.0735496399385063, "grad_norm": 0.3449136018753052, "learning_rate": 5.230740957323263e-06, "loss": 0.0207, "step": 132680 }, { "epoch": 1.0736305526337082, "grad_norm": 0.4687078893184662, "learning_rate": 5.230035610827694e-06, "loss": 0.0205, "step": 132690 }, { "epoch": 1.0737114653289102, "grad_norm": 0.7128381729125977, "learning_rate": 5.229330259744549e-06, "loss": 0.0347, "step": 132700 }, { "epoch": 1.073792378024112, "grad_norm": 0.004172858316451311, "learning_rate": 5.228624904087897e-06, "loss": 0.013, "step": 132710 }, { "epoch": 1.0738732907193138, "grad_norm": 0.36184483766555786, "learning_rate": 5.227919543871806e-06, "loss": 0.0223, "step": 132720 }, { "epoch": 1.0739542034145158, "grad_norm": 0.4813555181026459, "learning_rate": 5.227214179110337e-06, "loss": 0.0229, "step": 132730 }, { "epoch": 1.0740351161097177, "grad_norm": 0.026896962895989418, "learning_rate": 5.226508809817565e-06, "loss": 0.0165, "step": 132740 }, { "epoch": 1.0741160288049194, "grad_norm": 0.40996843576431274, "learning_rate": 5.225803436007549e-06, "loss": 0.0302, "step": 132750 }, { "epoch": 1.0741969415001213, "grad_norm": 0.24838030338287354, "learning_rate": 5.225098057694361e-06, "loss": 0.0188, "step": 132760 }, { "epoch": 1.0742778541953233, "grad_norm": 0.48743394017219543, "learning_rate": 5.2243926748920695e-06, "loss": 0.019, "step": 132770 }, { "epoch": 1.074358766890525, "grad_norm": 0.3935087323188782, "learning_rate": 5.223687287614738e-06, "loss": 0.0228, "step": 132780 }, { "epoch": 1.074439679585727, "grad_norm": 0.2959183156490326, "learning_rate": 5.222981895876437e-06, "loss": 0.0231, "step": 132790 }, { "epoch": 1.0745205922809289, "grad_norm": 0.3648086488246918, "learning_rate": 5.222276499691233e-06, "loss": 0.0288, "step": 132800 }, { "epoch": 1.0746015049761308, "grad_norm": 0.30027464032173157, "learning_rate": 5.221571099073192e-06, "loss": 0.0218, "step": 132810 }, { "epoch": 1.0746824176713325, "grad_norm": 0.45919913053512573, "learning_rate": 5.220865694036383e-06, "loss": 0.0254, "step": 132820 }, { "epoch": 1.0747633303665345, "grad_norm": 0.37525367736816406, "learning_rate": 5.220160284594875e-06, "loss": 0.0251, "step": 132830 }, { "epoch": 1.0748442430617364, "grad_norm": 0.6549801230430603, "learning_rate": 5.219454870762733e-06, "loss": 0.0365, "step": 132840 }, { "epoch": 1.0749251557569384, "grad_norm": 0.4292462468147278, "learning_rate": 5.218749452554031e-06, "loss": 0.023, "step": 132850 }, { "epoch": 1.07500606845214, "grad_norm": 0.5514318346977234, "learning_rate": 5.2180440299828295e-06, "loss": 0.0144, "step": 132860 }, { "epoch": 1.075086981147342, "grad_norm": 0.004324589855968952, "learning_rate": 5.217338603063201e-06, "loss": 0.0188, "step": 132870 }, { "epoch": 1.075167893842544, "grad_norm": 0.21399514377117157, "learning_rate": 5.216633171809213e-06, "loss": 0.0114, "step": 132880 }, { "epoch": 1.0752488065377457, "grad_norm": 0.962540864944458, "learning_rate": 5.215927736234935e-06, "loss": 0.0355, "step": 132890 }, { "epoch": 1.0753297192329476, "grad_norm": 0.3817758560180664, "learning_rate": 5.215222296354433e-06, "loss": 0.0305, "step": 132900 }, { "epoch": 1.0754106319281496, "grad_norm": 0.312174916267395, "learning_rate": 5.214516852181775e-06, "loss": 0.0102, "step": 132910 }, { "epoch": 1.0754915446233515, "grad_norm": 0.33779028058052063, "learning_rate": 5.2138114037310326e-06, "loss": 0.0257, "step": 132920 }, { "epoch": 1.0755724573185532, "grad_norm": 0.3071642816066742, "learning_rate": 5.213105951016275e-06, "loss": 0.0211, "step": 132930 }, { "epoch": 1.0756533700137552, "grad_norm": 0.17958477139472961, "learning_rate": 5.212400494051566e-06, "loss": 0.0235, "step": 132940 }, { "epoch": 1.075734282708957, "grad_norm": 0.6733971238136292, "learning_rate": 5.2116950328509795e-06, "loss": 0.0203, "step": 132950 }, { "epoch": 1.0758151954041588, "grad_norm": 0.40944766998291016, "learning_rate": 5.210989567428582e-06, "loss": 0.0155, "step": 132960 }, { "epoch": 1.0758961080993608, "grad_norm": 0.4736618101596832, "learning_rate": 5.210284097798444e-06, "loss": 0.0232, "step": 132970 }, { "epoch": 1.0759770207945627, "grad_norm": 0.14182239770889282, "learning_rate": 5.209578623974633e-06, "loss": 0.0353, "step": 132980 }, { "epoch": 1.0760579334897646, "grad_norm": 0.6635263562202454, "learning_rate": 5.208873145971217e-06, "loss": 0.0367, "step": 132990 }, { "epoch": 1.0761388461849664, "grad_norm": 0.17058032751083374, "learning_rate": 5.2081676638022694e-06, "loss": 0.0245, "step": 133000 }, { "epoch": 1.0762197588801683, "grad_norm": 0.005735449027270079, "learning_rate": 5.207462177481856e-06, "loss": 0.0282, "step": 133010 }, { "epoch": 1.0763006715753702, "grad_norm": 0.3615254759788513, "learning_rate": 5.206756687024048e-06, "loss": 0.0165, "step": 133020 }, { "epoch": 1.076381584270572, "grad_norm": 0.3314811885356903, "learning_rate": 5.2060511924429135e-06, "loss": 0.0206, "step": 133030 }, { "epoch": 1.076462496965774, "grad_norm": 0.3830474019050598, "learning_rate": 5.205345693752524e-06, "loss": 0.0271, "step": 133040 }, { "epoch": 1.0765434096609758, "grad_norm": 0.32123705744743347, "learning_rate": 5.204640190966946e-06, "loss": 0.0321, "step": 133050 }, { "epoch": 1.0766243223561778, "grad_norm": 0.3338528871536255, "learning_rate": 5.2039346841002534e-06, "loss": 0.0281, "step": 133060 }, { "epoch": 1.0767052350513795, "grad_norm": 0.5484827756881714, "learning_rate": 5.203229173166514e-06, "loss": 0.0209, "step": 133070 }, { "epoch": 1.0767861477465814, "grad_norm": 0.3468274474143982, "learning_rate": 5.202523658179798e-06, "loss": 0.0309, "step": 133080 }, { "epoch": 1.0768670604417834, "grad_norm": 0.7374740242958069, "learning_rate": 5.201818139154174e-06, "loss": 0.0424, "step": 133090 }, { "epoch": 1.076947973136985, "grad_norm": 0.6322664022445679, "learning_rate": 5.201112616103715e-06, "loss": 0.0272, "step": 133100 }, { "epoch": 1.077028885832187, "grad_norm": 0.5714854598045349, "learning_rate": 5.200407089042489e-06, "loss": 0.0352, "step": 133110 }, { "epoch": 1.077109798527389, "grad_norm": 0.5253944993019104, "learning_rate": 5.199701557984566e-06, "loss": 0.0216, "step": 133120 }, { "epoch": 1.077190711222591, "grad_norm": 0.30831944942474365, "learning_rate": 5.198996022944016e-06, "loss": 0.0152, "step": 133130 }, { "epoch": 1.0772716239177926, "grad_norm": 0.2852049767971039, "learning_rate": 5.198290483934911e-06, "loss": 0.0294, "step": 133140 }, { "epoch": 1.0773525366129946, "grad_norm": 0.2821691334247589, "learning_rate": 5.197584940971323e-06, "loss": 0.0183, "step": 133150 }, { "epoch": 1.0774334493081965, "grad_norm": 0.40846186876296997, "learning_rate": 5.196879394067318e-06, "loss": 0.0279, "step": 133160 }, { "epoch": 1.0775143620033982, "grad_norm": 0.32079747319221497, "learning_rate": 5.19617384323697e-06, "loss": 0.0261, "step": 133170 }, { "epoch": 1.0775952746986002, "grad_norm": 0.09894057363271713, "learning_rate": 5.1954682884943486e-06, "loss": 0.0212, "step": 133180 }, { "epoch": 1.077676187393802, "grad_norm": 0.4833110272884369, "learning_rate": 5.194762729853525e-06, "loss": 0.0213, "step": 133190 }, { "epoch": 1.077757100089004, "grad_norm": 0.3904714584350586, "learning_rate": 5.194057167328571e-06, "loss": 0.0205, "step": 133200 }, { "epoch": 1.0778380127842058, "grad_norm": 0.2747276723384857, "learning_rate": 5.193351600933554e-06, "loss": 0.0234, "step": 133210 }, { "epoch": 1.0779189254794077, "grad_norm": 0.485271692276001, "learning_rate": 5.19264603068255e-06, "loss": 0.0308, "step": 133220 }, { "epoch": 1.0779998381746096, "grad_norm": 0.46710342168807983, "learning_rate": 5.191940456589627e-06, "loss": 0.0304, "step": 133230 }, { "epoch": 1.0780807508698116, "grad_norm": 0.45788028836250305, "learning_rate": 5.1912348786688545e-06, "loss": 0.0127, "step": 133240 }, { "epoch": 1.0781616635650133, "grad_norm": 0.5218028426170349, "learning_rate": 5.190529296934309e-06, "loss": 0.0232, "step": 133250 }, { "epoch": 1.0782425762602152, "grad_norm": 0.6236563324928284, "learning_rate": 5.18982371140006e-06, "loss": 0.0206, "step": 133260 }, { "epoch": 1.0783234889554172, "grad_norm": 0.15211668610572815, "learning_rate": 5.189118122080174e-06, "loss": 0.0317, "step": 133270 }, { "epoch": 1.078404401650619, "grad_norm": 0.43251657485961914, "learning_rate": 5.188412528988728e-06, "loss": 0.0218, "step": 133280 }, { "epoch": 1.0784853143458208, "grad_norm": 0.2693665325641632, "learning_rate": 5.187706932139791e-06, "loss": 0.0244, "step": 133290 }, { "epoch": 1.0785662270410228, "grad_norm": 0.43573617935180664, "learning_rate": 5.187001331547437e-06, "loss": 0.0232, "step": 133300 }, { "epoch": 1.0786471397362245, "grad_norm": 0.3498889207839966, "learning_rate": 5.186295727225735e-06, "loss": 0.0247, "step": 133310 }, { "epoch": 1.0787280524314264, "grad_norm": 0.31879958510398865, "learning_rate": 5.185590119188758e-06, "loss": 0.0179, "step": 133320 }, { "epoch": 1.0788089651266284, "grad_norm": 0.2864210307598114, "learning_rate": 5.184884507450579e-06, "loss": 0.027, "step": 133330 }, { "epoch": 1.0788898778218303, "grad_norm": 0.32715076208114624, "learning_rate": 5.184178892025269e-06, "loss": 0.0161, "step": 133340 }, { "epoch": 1.078970790517032, "grad_norm": 0.3099779486656189, "learning_rate": 5.183473272926898e-06, "loss": 0.0271, "step": 133350 }, { "epoch": 1.079051703212234, "grad_norm": 0.4907994568347931, "learning_rate": 5.182767650169541e-06, "loss": 0.0133, "step": 133360 }, { "epoch": 1.079132615907436, "grad_norm": 0.409108430147171, "learning_rate": 5.18206202376727e-06, "loss": 0.0223, "step": 133370 }, { "epoch": 1.0792135286026379, "grad_norm": 0.28924426436424255, "learning_rate": 5.1813563937341545e-06, "loss": 0.026, "step": 133380 }, { "epoch": 1.0792944412978396, "grad_norm": 0.4954299032688141, "learning_rate": 5.180650760084269e-06, "loss": 0.0294, "step": 133390 }, { "epoch": 1.0793753539930415, "grad_norm": 0.6045107841491699, "learning_rate": 5.1799451228316866e-06, "loss": 0.0192, "step": 133400 }, { "epoch": 1.0794562666882435, "grad_norm": 0.5594207644462585, "learning_rate": 5.179239481990478e-06, "loss": 0.0204, "step": 133410 }, { "epoch": 1.0795371793834452, "grad_norm": 0.4764079451560974, "learning_rate": 5.178533837574716e-06, "loss": 0.0258, "step": 133420 }, { "epoch": 1.0796180920786471, "grad_norm": 0.008044597692787647, "learning_rate": 5.177828189598473e-06, "loss": 0.0081, "step": 133430 }, { "epoch": 1.079699004773849, "grad_norm": 0.6158592104911804, "learning_rate": 5.177122538075823e-06, "loss": 0.0291, "step": 133440 }, { "epoch": 1.0797799174690508, "grad_norm": 0.16201938688755035, "learning_rate": 5.176416883020838e-06, "loss": 0.0175, "step": 133450 }, { "epoch": 1.0798608301642527, "grad_norm": 0.5148629546165466, "learning_rate": 5.17571122444759e-06, "loss": 0.0184, "step": 133460 }, { "epoch": 1.0799417428594547, "grad_norm": 0.20358414947986603, "learning_rate": 5.1750055623701535e-06, "loss": 0.0198, "step": 133470 }, { "epoch": 1.0800226555546566, "grad_norm": 0.40070486068725586, "learning_rate": 5.1742998968026e-06, "loss": 0.032, "step": 133480 }, { "epoch": 1.0801035682498583, "grad_norm": 0.3468593955039978, "learning_rate": 5.173594227759004e-06, "loss": 0.0162, "step": 133490 }, { "epoch": 1.0801844809450603, "grad_norm": 0.3144914507865906, "learning_rate": 5.172888555253436e-06, "loss": 0.0223, "step": 133500 }, { "epoch": 1.0802653936402622, "grad_norm": 0.4426841139793396, "learning_rate": 5.17218287929997e-06, "loss": 0.0138, "step": 133510 }, { "epoch": 1.0803463063354641, "grad_norm": 0.3617018759250641, "learning_rate": 5.1714771999126834e-06, "loss": 0.0205, "step": 133520 }, { "epoch": 1.0804272190306659, "grad_norm": 0.7862845063209534, "learning_rate": 5.170771517105644e-06, "loss": 0.0165, "step": 133530 }, { "epoch": 1.0805081317258678, "grad_norm": 0.43628835678100586, "learning_rate": 5.170065830892927e-06, "loss": 0.0213, "step": 133540 }, { "epoch": 1.0805890444210697, "grad_norm": 0.528980016708374, "learning_rate": 5.1693601412886055e-06, "loss": 0.0263, "step": 133550 }, { "epoch": 1.0806699571162715, "grad_norm": 0.6515029072761536, "learning_rate": 5.168654448306756e-06, "loss": 0.0275, "step": 133560 }, { "epoch": 1.0807508698114734, "grad_norm": 0.5702374577522278, "learning_rate": 5.1679487519614455e-06, "loss": 0.0228, "step": 133570 }, { "epoch": 1.0808317825066753, "grad_norm": 0.1891990751028061, "learning_rate": 5.1672430522667546e-06, "loss": 0.0202, "step": 133580 }, { "epoch": 1.0809126952018773, "grad_norm": 0.25635313987731934, "learning_rate": 5.1665373492367545e-06, "loss": 0.0278, "step": 133590 }, { "epoch": 1.080993607897079, "grad_norm": 0.143098846077919, "learning_rate": 5.165831642885517e-06, "loss": 0.0333, "step": 133600 }, { "epoch": 1.081074520592281, "grad_norm": 0.3980741798877716, "learning_rate": 5.165125933227119e-06, "loss": 0.0335, "step": 133610 }, { "epoch": 1.0811554332874829, "grad_norm": 0.4153616428375244, "learning_rate": 5.16442022027563e-06, "loss": 0.0174, "step": 133620 }, { "epoch": 1.0812363459826846, "grad_norm": 0.4564460813999176, "learning_rate": 5.163714504045129e-06, "loss": 0.0268, "step": 133630 }, { "epoch": 1.0813172586778865, "grad_norm": 0.4316505193710327, "learning_rate": 5.163008784549689e-06, "loss": 0.0368, "step": 133640 }, { "epoch": 1.0813981713730885, "grad_norm": 0.6483705639839172, "learning_rate": 5.16230306180338e-06, "loss": 0.0265, "step": 133650 }, { "epoch": 1.0814790840682904, "grad_norm": 0.2482154816389084, "learning_rate": 5.161597335820281e-06, "loss": 0.0193, "step": 133660 }, { "epoch": 1.0815599967634921, "grad_norm": 0.19279327988624573, "learning_rate": 5.160891606614463e-06, "loss": 0.0181, "step": 133670 }, { "epoch": 1.081640909458694, "grad_norm": 0.4534429907798767, "learning_rate": 5.1601858742000035e-06, "loss": 0.0131, "step": 133680 }, { "epoch": 1.081721822153896, "grad_norm": 0.6688477993011475, "learning_rate": 5.159480138590973e-06, "loss": 0.0176, "step": 133690 }, { "epoch": 1.0818027348490977, "grad_norm": 0.3877945840358734, "learning_rate": 5.1587743998014495e-06, "loss": 0.0158, "step": 133700 }, { "epoch": 1.0818836475442997, "grad_norm": 0.36441540718078613, "learning_rate": 5.158068657845506e-06, "loss": 0.0249, "step": 133710 }, { "epoch": 1.0819645602395016, "grad_norm": 0.38261038064956665, "learning_rate": 5.157362912737215e-06, "loss": 0.0157, "step": 133720 }, { "epoch": 1.0820454729347035, "grad_norm": 0.26143863797187805, "learning_rate": 5.1566571644906525e-06, "loss": 0.0215, "step": 133730 }, { "epoch": 1.0821263856299053, "grad_norm": 0.17737430334091187, "learning_rate": 5.155951413119896e-06, "loss": 0.0158, "step": 133740 }, { "epoch": 1.0822072983251072, "grad_norm": 0.41603517532348633, "learning_rate": 5.155245658639016e-06, "loss": 0.0132, "step": 133750 }, { "epoch": 1.0822882110203091, "grad_norm": 0.4988296627998352, "learning_rate": 5.1545399010620885e-06, "loss": 0.0361, "step": 133760 }, { "epoch": 1.0823691237155109, "grad_norm": 0.4992333650588989, "learning_rate": 5.15383414040319e-06, "loss": 0.0145, "step": 133770 }, { "epoch": 1.0824500364107128, "grad_norm": 0.49605876207351685, "learning_rate": 5.153128376676395e-06, "loss": 0.0232, "step": 133780 }, { "epoch": 1.0825309491059147, "grad_norm": 0.3132605254650116, "learning_rate": 5.152422609895777e-06, "loss": 0.0292, "step": 133790 }, { "epoch": 1.0826118618011167, "grad_norm": 0.3401131331920624, "learning_rate": 5.1517168400754125e-06, "loss": 0.0402, "step": 133800 }, { "epoch": 1.0826927744963184, "grad_norm": 0.5403391718864441, "learning_rate": 5.151011067229375e-06, "loss": 0.0279, "step": 133810 }, { "epoch": 1.0827736871915203, "grad_norm": 0.3881329298019409, "learning_rate": 5.150305291371741e-06, "loss": 0.0177, "step": 133820 }, { "epoch": 1.0828545998867223, "grad_norm": 0.23889335989952087, "learning_rate": 5.149599512516586e-06, "loss": 0.0267, "step": 133830 }, { "epoch": 1.082935512581924, "grad_norm": 0.3216209411621094, "learning_rate": 5.148893730677982e-06, "loss": 0.0106, "step": 133840 }, { "epoch": 1.083016425277126, "grad_norm": 0.5220315456390381, "learning_rate": 5.14818794587001e-06, "loss": 0.0187, "step": 133850 }, { "epoch": 1.0830973379723279, "grad_norm": 0.3743610382080078, "learning_rate": 5.14748215810674e-06, "loss": 0.0248, "step": 133860 }, { "epoch": 1.0831782506675298, "grad_norm": 0.25514960289001465, "learning_rate": 5.146776367402251e-06, "loss": 0.0179, "step": 133870 }, { "epoch": 1.0832591633627315, "grad_norm": 0.3333488702774048, "learning_rate": 5.146070573770617e-06, "loss": 0.0224, "step": 133880 }, { "epoch": 1.0833400760579335, "grad_norm": 0.4317946135997772, "learning_rate": 5.145364777225913e-06, "loss": 0.021, "step": 133890 }, { "epoch": 1.0834209887531354, "grad_norm": 0.5601721405982971, "learning_rate": 5.144658977782216e-06, "loss": 0.0306, "step": 133900 }, { "epoch": 1.0835019014483374, "grad_norm": 0.6135077476501465, "learning_rate": 5.143953175453601e-06, "loss": 0.0316, "step": 133910 }, { "epoch": 1.083582814143539, "grad_norm": 0.2938636839389801, "learning_rate": 5.1432473702541444e-06, "loss": 0.0202, "step": 133920 }, { "epoch": 1.083663726838741, "grad_norm": 0.13779908418655396, "learning_rate": 5.14254156219792e-06, "loss": 0.0238, "step": 133930 }, { "epoch": 1.083744639533943, "grad_norm": 0.22347566485404968, "learning_rate": 5.141835751299005e-06, "loss": 0.0124, "step": 133940 }, { "epoch": 1.0838255522291447, "grad_norm": 0.6933437585830688, "learning_rate": 5.1411299375714765e-06, "loss": 0.0197, "step": 133950 }, { "epoch": 1.0839064649243466, "grad_norm": 0.33012741804122925, "learning_rate": 5.1404241210294095e-06, "loss": 0.0249, "step": 133960 }, { "epoch": 1.0839873776195486, "grad_norm": 0.42688432335853577, "learning_rate": 5.1397183016868805e-06, "loss": 0.0212, "step": 133970 }, { "epoch": 1.0840682903147503, "grad_norm": 0.22858120501041412, "learning_rate": 5.139012479557963e-06, "loss": 0.0181, "step": 133980 }, { "epoch": 1.0841492030099522, "grad_norm": 0.45824310183525085, "learning_rate": 5.138306654656736e-06, "loss": 0.0286, "step": 133990 }, { "epoch": 1.0842301157051542, "grad_norm": 0.37439796328544617, "learning_rate": 5.137600826997275e-06, "loss": 0.0257, "step": 134000 }, { "epoch": 1.084311028400356, "grad_norm": 0.560278058052063, "learning_rate": 5.136894996593656e-06, "loss": 0.0192, "step": 134010 }, { "epoch": 1.0843919410955578, "grad_norm": 0.7884436249732971, "learning_rate": 5.136189163459954e-06, "loss": 0.0298, "step": 134020 }, { "epoch": 1.0844728537907597, "grad_norm": 0.2910591661930084, "learning_rate": 5.135483327610248e-06, "loss": 0.0281, "step": 134030 }, { "epoch": 1.0845537664859617, "grad_norm": 0.6127641201019287, "learning_rate": 5.134777489058613e-06, "loss": 0.0271, "step": 134040 }, { "epoch": 1.0846346791811636, "grad_norm": 0.49253949522972107, "learning_rate": 5.134071647819125e-06, "loss": 0.0245, "step": 134050 }, { "epoch": 1.0847155918763653, "grad_norm": 0.2125508338212967, "learning_rate": 5.133365803905861e-06, "loss": 0.0226, "step": 134060 }, { "epoch": 1.0847965045715673, "grad_norm": 0.4336393475532532, "learning_rate": 5.132659957332897e-06, "loss": 0.0199, "step": 134070 }, { "epoch": 1.0848774172667692, "grad_norm": 0.5270925164222717, "learning_rate": 5.131954108114312e-06, "loss": 0.0166, "step": 134080 }, { "epoch": 1.084958329961971, "grad_norm": 0.5466964244842529, "learning_rate": 5.13124825626418e-06, "loss": 0.0265, "step": 134090 }, { "epoch": 1.0850392426571729, "grad_norm": 0.6119095683097839, "learning_rate": 5.130542401796579e-06, "loss": 0.0291, "step": 134100 }, { "epoch": 1.0851201553523748, "grad_norm": 0.3558647036552429, "learning_rate": 5.1298365447255845e-06, "loss": 0.0226, "step": 134110 }, { "epoch": 1.0852010680475768, "grad_norm": 0.5373870134353638, "learning_rate": 5.129130685065274e-06, "loss": 0.026, "step": 134120 }, { "epoch": 1.0852819807427785, "grad_norm": 0.3343772888183594, "learning_rate": 5.128424822829725e-06, "loss": 0.0199, "step": 134130 }, { "epoch": 1.0853628934379804, "grad_norm": 0.3216545581817627, "learning_rate": 5.127718958033014e-06, "loss": 0.0201, "step": 134140 }, { "epoch": 1.0854438061331824, "grad_norm": 0.2684314548969269, "learning_rate": 5.127013090689219e-06, "loss": 0.0167, "step": 134150 }, { "epoch": 1.085524718828384, "grad_norm": 0.31292617321014404, "learning_rate": 5.126307220812415e-06, "loss": 0.019, "step": 134160 }, { "epoch": 1.085605631523586, "grad_norm": 0.5297778844833374, "learning_rate": 5.12560134841668e-06, "loss": 0.0262, "step": 134170 }, { "epoch": 1.085686544218788, "grad_norm": 0.3724476397037506, "learning_rate": 5.124895473516092e-06, "loss": 0.0202, "step": 134180 }, { "epoch": 1.08576745691399, "grad_norm": 0.2433464080095291, "learning_rate": 5.124189596124726e-06, "loss": 0.019, "step": 134190 }, { "epoch": 1.0858483696091916, "grad_norm": 0.5597654581069946, "learning_rate": 5.123483716256663e-06, "loss": 0.0126, "step": 134200 }, { "epoch": 1.0859292823043936, "grad_norm": 0.7271255254745483, "learning_rate": 5.122777833925976e-06, "loss": 0.0289, "step": 134210 }, { "epoch": 1.0860101949995955, "grad_norm": 0.4282609224319458, "learning_rate": 5.122071949146746e-06, "loss": 0.0337, "step": 134220 }, { "epoch": 1.0860911076947972, "grad_norm": 0.30109795928001404, "learning_rate": 5.1213660619330485e-06, "loss": 0.0216, "step": 134230 }, { "epoch": 1.0861720203899992, "grad_norm": 0.46759283542633057, "learning_rate": 5.120660172298959e-06, "loss": 0.0201, "step": 134240 }, { "epoch": 1.086252933085201, "grad_norm": 0.4582553207874298, "learning_rate": 5.119954280258559e-06, "loss": 0.0149, "step": 134250 }, { "epoch": 1.086333845780403, "grad_norm": 0.3689066767692566, "learning_rate": 5.119248385825925e-06, "loss": 0.0216, "step": 134260 }, { "epoch": 1.0864147584756048, "grad_norm": 0.16893187165260315, "learning_rate": 5.11854248901513e-06, "loss": 0.024, "step": 134270 }, { "epoch": 1.0864956711708067, "grad_norm": 0.2789037823677063, "learning_rate": 5.117836589840258e-06, "loss": 0.0271, "step": 134280 }, { "epoch": 1.0865765838660086, "grad_norm": 0.25590792298316956, "learning_rate": 5.1171306883153834e-06, "loss": 0.0203, "step": 134290 }, { "epoch": 1.0866574965612104, "grad_norm": 0.14533108472824097, "learning_rate": 5.116424784454585e-06, "loss": 0.0271, "step": 134300 }, { "epoch": 1.0867384092564123, "grad_norm": 0.23816271126270294, "learning_rate": 5.115718878271939e-06, "loss": 0.0228, "step": 134310 }, { "epoch": 1.0868193219516142, "grad_norm": 0.18925663828849792, "learning_rate": 5.115012969781524e-06, "loss": 0.024, "step": 134320 }, { "epoch": 1.0869002346468162, "grad_norm": 0.29411691427230835, "learning_rate": 5.1143070589974206e-06, "loss": 0.0218, "step": 134330 }, { "epoch": 1.086981147342018, "grad_norm": 0.403364360332489, "learning_rate": 5.113601145933703e-06, "loss": 0.0176, "step": 134340 }, { "epoch": 1.0870620600372198, "grad_norm": 0.6209577322006226, "learning_rate": 5.112895230604449e-06, "loss": 0.0175, "step": 134350 }, { "epoch": 1.0871429727324218, "grad_norm": 0.9199937582015991, "learning_rate": 5.1121893130237386e-06, "loss": 0.0179, "step": 134360 }, { "epoch": 1.0872238854276235, "grad_norm": 0.23949620127677917, "learning_rate": 5.1114833932056514e-06, "loss": 0.0243, "step": 134370 }, { "epoch": 1.0873047981228254, "grad_norm": 0.21751530468463898, "learning_rate": 5.1107774711642596e-06, "loss": 0.0228, "step": 134380 }, { "epoch": 1.0873857108180274, "grad_norm": 0.49120649695396423, "learning_rate": 5.110071546913647e-06, "loss": 0.0253, "step": 134390 }, { "epoch": 1.0874666235132293, "grad_norm": 0.07338369637727737, "learning_rate": 5.109365620467891e-06, "loss": 0.0133, "step": 134400 }, { "epoch": 1.087547536208431, "grad_norm": 0.09808381646871567, "learning_rate": 5.108659691841067e-06, "loss": 0.0402, "step": 134410 }, { "epoch": 1.087628448903633, "grad_norm": 0.9181035757064819, "learning_rate": 5.1079537610472555e-06, "loss": 0.03, "step": 134420 }, { "epoch": 1.087709361598835, "grad_norm": 0.12338273972272873, "learning_rate": 5.107247828100532e-06, "loss": 0.0134, "step": 134430 }, { "epoch": 1.0877902742940369, "grad_norm": 0.5133151412010193, "learning_rate": 5.10654189301498e-06, "loss": 0.0191, "step": 134440 }, { "epoch": 1.0878711869892386, "grad_norm": 0.6388642191886902, "learning_rate": 5.105835955804673e-06, "loss": 0.0438, "step": 134450 }, { "epoch": 1.0879520996844405, "grad_norm": 0.4029775559902191, "learning_rate": 5.1051300164836915e-06, "loss": 0.0213, "step": 134460 }, { "epoch": 1.0880330123796425, "grad_norm": 0.33294668793678284, "learning_rate": 5.104424075066114e-06, "loss": 0.0232, "step": 134470 }, { "epoch": 1.0881139250748442, "grad_norm": 0.42498838901519775, "learning_rate": 5.1037181315660185e-06, "loss": 0.0233, "step": 134480 }, { "epoch": 1.088194837770046, "grad_norm": 0.43149086833000183, "learning_rate": 5.103012185997484e-06, "loss": 0.0305, "step": 134490 }, { "epoch": 1.088275750465248, "grad_norm": 0.4418594539165497, "learning_rate": 5.10230623837459e-06, "loss": 0.0224, "step": 134500 }, { "epoch": 1.0883566631604498, "grad_norm": 0.3852660655975342, "learning_rate": 5.101600288711413e-06, "loss": 0.0166, "step": 134510 }, { "epoch": 1.0884375758556517, "grad_norm": 0.38423919677734375, "learning_rate": 5.100894337022033e-06, "loss": 0.0231, "step": 134520 }, { "epoch": 1.0885184885508536, "grad_norm": 0.41169247031211853, "learning_rate": 5.100188383320529e-06, "loss": 0.024, "step": 134530 }, { "epoch": 1.0885994012460556, "grad_norm": 0.4619399905204773, "learning_rate": 5.099482427620978e-06, "loss": 0.0164, "step": 134540 }, { "epoch": 1.0886803139412573, "grad_norm": 0.32587113976478577, "learning_rate": 5.09877646993746e-06, "loss": 0.0395, "step": 134550 }, { "epoch": 1.0887612266364592, "grad_norm": 0.1892855167388916, "learning_rate": 5.098070510284055e-06, "loss": 0.0232, "step": 134560 }, { "epoch": 1.0888421393316612, "grad_norm": 0.26258280873298645, "learning_rate": 5.09736454867484e-06, "loss": 0.0133, "step": 134570 }, { "epoch": 1.0889230520268631, "grad_norm": 0.2676180601119995, "learning_rate": 5.0966585851238945e-06, "loss": 0.0275, "step": 134580 }, { "epoch": 1.0890039647220648, "grad_norm": 0.39544564485549927, "learning_rate": 5.095952619645298e-06, "loss": 0.018, "step": 134590 }, { "epoch": 1.0890848774172668, "grad_norm": 0.3858434557914734, "learning_rate": 5.095246652253128e-06, "loss": 0.0154, "step": 134600 }, { "epoch": 1.0891657901124687, "grad_norm": 0.670426607131958, "learning_rate": 5.094540682961466e-06, "loss": 0.0216, "step": 134610 }, { "epoch": 1.0892467028076704, "grad_norm": 0.5701856017112732, "learning_rate": 5.0938347117843886e-06, "loss": 0.0255, "step": 134620 }, { "epoch": 1.0893276155028724, "grad_norm": 0.234077587723732, "learning_rate": 5.093128738735977e-06, "loss": 0.0391, "step": 134630 }, { "epoch": 1.0894085281980743, "grad_norm": 0.41164177656173706, "learning_rate": 5.092422763830309e-06, "loss": 0.0307, "step": 134640 }, { "epoch": 1.089489440893276, "grad_norm": 0.6489603519439697, "learning_rate": 5.091716787081463e-06, "loss": 0.03, "step": 134650 }, { "epoch": 1.089570353588478, "grad_norm": 0.5689505934715271, "learning_rate": 5.091010808503521e-06, "loss": 0.0181, "step": 134660 }, { "epoch": 1.08965126628368, "grad_norm": 0.5785636305809021, "learning_rate": 5.090304828110559e-06, "loss": 0.0172, "step": 134670 }, { "epoch": 1.0897321789788819, "grad_norm": 0.49667656421661377, "learning_rate": 5.089598845916659e-06, "loss": 0.0264, "step": 134680 }, { "epoch": 1.0898130916740836, "grad_norm": 0.3929329514503479, "learning_rate": 5.088892861935899e-06, "loss": 0.0143, "step": 134690 }, { "epoch": 1.0898940043692855, "grad_norm": 0.3035770654678345, "learning_rate": 5.088186876182359e-06, "loss": 0.0343, "step": 134700 }, { "epoch": 1.0899749170644875, "grad_norm": 0.3348235785961151, "learning_rate": 5.087480888670117e-06, "loss": 0.0251, "step": 134710 }, { "epoch": 1.0900558297596894, "grad_norm": 0.6179656386375427, "learning_rate": 5.086774899413253e-06, "loss": 0.0299, "step": 134720 }, { "epoch": 1.0901367424548911, "grad_norm": 0.30707991123199463, "learning_rate": 5.086068908425846e-06, "loss": 0.0187, "step": 134730 }, { "epoch": 1.090217655150093, "grad_norm": 0.15622113645076752, "learning_rate": 5.0853629157219794e-06, "loss": 0.0239, "step": 134740 }, { "epoch": 1.090298567845295, "grad_norm": 0.3588186502456665, "learning_rate": 5.084656921315727e-06, "loss": 0.0123, "step": 134750 }, { "epoch": 1.0903794805404967, "grad_norm": 0.18678218126296997, "learning_rate": 5.083950925221172e-06, "loss": 0.0221, "step": 134760 }, { "epoch": 1.0904603932356987, "grad_norm": 0.15694144368171692, "learning_rate": 5.0832449274523934e-06, "loss": 0.0136, "step": 134770 }, { "epoch": 1.0905413059309006, "grad_norm": 0.22628502547740936, "learning_rate": 5.082538928023469e-06, "loss": 0.0266, "step": 134780 }, { "epoch": 1.0906222186261025, "grad_norm": 0.8258254528045654, "learning_rate": 5.0818329269484825e-06, "loss": 0.0166, "step": 134790 }, { "epoch": 1.0907031313213043, "grad_norm": 0.6112856864929199, "learning_rate": 5.081126924241509e-06, "loss": 0.0201, "step": 134800 }, { "epoch": 1.0907840440165062, "grad_norm": 0.2817021608352661, "learning_rate": 5.080420919916631e-06, "loss": 0.0373, "step": 134810 }, { "epoch": 1.0908649567117081, "grad_norm": 0.3813830316066742, "learning_rate": 5.079714913987928e-06, "loss": 0.019, "step": 134820 }, { "epoch": 1.0909458694069099, "grad_norm": 1.1186327934265137, "learning_rate": 5.079008906469478e-06, "loss": 0.0357, "step": 134830 }, { "epoch": 1.0910267821021118, "grad_norm": 0.20470494031906128, "learning_rate": 5.078302897375363e-06, "loss": 0.0357, "step": 134840 }, { "epoch": 1.0911076947973137, "grad_norm": 0.25541719794273376, "learning_rate": 5.0775968867196615e-06, "loss": 0.0279, "step": 134850 }, { "epoch": 1.0911886074925157, "grad_norm": 0.3229621350765228, "learning_rate": 5.076890874516454e-06, "loss": 0.0111, "step": 134860 }, { "epoch": 1.0912695201877174, "grad_norm": 0.37030526995658875, "learning_rate": 5.07618486077982e-06, "loss": 0.026, "step": 134870 }, { "epoch": 1.0913504328829193, "grad_norm": 0.2533875107765198, "learning_rate": 5.07547884552384e-06, "loss": 0.0254, "step": 134880 }, { "epoch": 1.0914313455781213, "grad_norm": 0.563403844833374, "learning_rate": 5.074772828762595e-06, "loss": 0.0191, "step": 134890 }, { "epoch": 1.091512258273323, "grad_norm": 0.3561936020851135, "learning_rate": 5.074066810510163e-06, "loss": 0.0242, "step": 134900 }, { "epoch": 1.091593170968525, "grad_norm": 0.16042280197143555, "learning_rate": 5.0733607907806246e-06, "loss": 0.0177, "step": 134910 }, { "epoch": 1.0916740836637269, "grad_norm": 0.7711054682731628, "learning_rate": 5.0726547695880605e-06, "loss": 0.0184, "step": 134920 }, { "epoch": 1.0917549963589288, "grad_norm": 0.2660571038722992, "learning_rate": 5.07194874694655e-06, "loss": 0.0177, "step": 134930 }, { "epoch": 1.0918359090541305, "grad_norm": 0.29404422640800476, "learning_rate": 5.071242722870172e-06, "loss": 0.022, "step": 134940 }, { "epoch": 1.0919168217493325, "grad_norm": 0.6758273839950562, "learning_rate": 5.070536697373011e-06, "loss": 0.0238, "step": 134950 }, { "epoch": 1.0919977344445344, "grad_norm": 0.34339639544487, "learning_rate": 5.069830670469145e-06, "loss": 0.0128, "step": 134960 }, { "epoch": 1.0920786471397361, "grad_norm": 0.4048081338405609, "learning_rate": 5.0691246421726504e-06, "loss": 0.023, "step": 134970 }, { "epoch": 1.092159559834938, "grad_norm": 0.46674203872680664, "learning_rate": 5.068418612497613e-06, "loss": 0.0188, "step": 134980 }, { "epoch": 1.09224047253014, "grad_norm": 0.3909727931022644, "learning_rate": 5.0677125814581106e-06, "loss": 0.0204, "step": 134990 }, { "epoch": 1.092321385225342, "grad_norm": 0.20596987009048462, "learning_rate": 5.067006549068223e-06, "loss": 0.0215, "step": 135000 }, { "epoch": 1.0924022979205437, "grad_norm": 0.3559597432613373, "learning_rate": 5.066300515342032e-06, "loss": 0.0211, "step": 135010 }, { "epoch": 1.0924832106157456, "grad_norm": 0.40102219581604004, "learning_rate": 5.065594480293616e-06, "loss": 0.0354, "step": 135020 }, { "epoch": 1.0925641233109475, "grad_norm": 0.17980879545211792, "learning_rate": 5.064888443937057e-06, "loss": 0.0253, "step": 135030 }, { "epoch": 1.0926450360061493, "grad_norm": 0.34965479373931885, "learning_rate": 5.064182406286437e-06, "loss": 0.0222, "step": 135040 }, { "epoch": 1.0927259487013512, "grad_norm": 0.6700506806373596, "learning_rate": 5.063476367355832e-06, "loss": 0.0287, "step": 135050 }, { "epoch": 1.0928068613965531, "grad_norm": 0.4275735914707184, "learning_rate": 5.062770327159326e-06, "loss": 0.0246, "step": 135060 }, { "epoch": 1.092887774091755, "grad_norm": 0.09703697264194489, "learning_rate": 5.062064285710998e-06, "loss": 0.0305, "step": 135070 }, { "epoch": 1.0929686867869568, "grad_norm": 0.6997761130332947, "learning_rate": 5.061358243024929e-06, "loss": 0.0378, "step": 135080 }, { "epoch": 1.0930495994821587, "grad_norm": 0.6047876477241516, "learning_rate": 5.0606521991151995e-06, "loss": 0.0161, "step": 135090 }, { "epoch": 1.0931305121773607, "grad_norm": 0.5620645880699158, "learning_rate": 5.059946153995889e-06, "loss": 0.0357, "step": 135100 }, { "epoch": 1.0932114248725626, "grad_norm": 0.41884931921958923, "learning_rate": 5.05924010768108e-06, "loss": 0.0167, "step": 135110 }, { "epoch": 1.0932923375677643, "grad_norm": 0.3479888439178467, "learning_rate": 5.058534060184852e-06, "loss": 0.0328, "step": 135120 }, { "epoch": 1.0933732502629663, "grad_norm": 0.4912208616733551, "learning_rate": 5.057828011521285e-06, "loss": 0.021, "step": 135130 }, { "epoch": 1.0934541629581682, "grad_norm": 0.20128001272678375, "learning_rate": 5.057121961704461e-06, "loss": 0.0165, "step": 135140 }, { "epoch": 1.09353507565337, "grad_norm": 0.46990808844566345, "learning_rate": 5.056415910748461e-06, "loss": 0.0222, "step": 135150 }, { "epoch": 1.0936159883485719, "grad_norm": 0.12360748648643494, "learning_rate": 5.0557098586673626e-06, "loss": 0.012, "step": 135160 }, { "epoch": 1.0936969010437738, "grad_norm": 0.41713011264801025, "learning_rate": 5.055003805475251e-06, "loss": 0.0311, "step": 135170 }, { "epoch": 1.0937778137389755, "grad_norm": 0.29524531960487366, "learning_rate": 5.0542977511862024e-06, "loss": 0.02, "step": 135180 }, { "epoch": 1.0938587264341775, "grad_norm": 0.48911404609680176, "learning_rate": 5.053591695814301e-06, "loss": 0.0286, "step": 135190 }, { "epoch": 1.0939396391293794, "grad_norm": 0.5437619090080261, "learning_rate": 5.052885639373627e-06, "loss": 0.0184, "step": 135200 }, { "epoch": 1.0940205518245814, "grad_norm": 0.5646159052848816, "learning_rate": 5.052179581878258e-06, "loss": 0.0149, "step": 135210 }, { "epoch": 1.094101464519783, "grad_norm": 0.33252352476119995, "learning_rate": 5.0514735233422805e-06, "loss": 0.0164, "step": 135220 }, { "epoch": 1.094182377214985, "grad_norm": 0.5963517427444458, "learning_rate": 5.050767463779771e-06, "loss": 0.0441, "step": 135230 }, { "epoch": 1.094263289910187, "grad_norm": 0.31949350237846375, "learning_rate": 5.050061403204811e-06, "loss": 0.018, "step": 135240 }, { "epoch": 1.094344202605389, "grad_norm": 0.3908734917640686, "learning_rate": 5.0493553416314825e-06, "loss": 0.0249, "step": 135250 }, { "epoch": 1.0944251153005906, "grad_norm": 0.2554077208042145, "learning_rate": 5.048649279073867e-06, "loss": 0.0168, "step": 135260 }, { "epoch": 1.0945060279957926, "grad_norm": 0.5440850257873535, "learning_rate": 5.047943215546042e-06, "loss": 0.0161, "step": 135270 }, { "epoch": 1.0945869406909945, "grad_norm": 0.24128282070159912, "learning_rate": 5.047237151062092e-06, "loss": 0.0198, "step": 135280 }, { "epoch": 1.0946678533861962, "grad_norm": 0.18877266347408295, "learning_rate": 5.046531085636097e-06, "loss": 0.0238, "step": 135290 }, { "epoch": 1.0947487660813982, "grad_norm": 0.2200695276260376, "learning_rate": 5.045825019282138e-06, "loss": 0.013, "step": 135300 }, { "epoch": 1.0948296787766, "grad_norm": 0.3720390200614929, "learning_rate": 5.045118952014295e-06, "loss": 0.0161, "step": 135310 }, { "epoch": 1.0949105914718018, "grad_norm": 0.3600325584411621, "learning_rate": 5.044412883846649e-06, "loss": 0.0106, "step": 135320 }, { "epoch": 1.0949915041670037, "grad_norm": 0.7473242878913879, "learning_rate": 5.0437068147932845e-06, "loss": 0.0332, "step": 135330 }, { "epoch": 1.0950724168622057, "grad_norm": 0.2864091098308563, "learning_rate": 5.043000744868278e-06, "loss": 0.0327, "step": 135340 }, { "epoch": 1.0951533295574076, "grad_norm": 0.09445306658744812, "learning_rate": 5.042294674085711e-06, "loss": 0.0254, "step": 135350 }, { "epoch": 1.0952342422526093, "grad_norm": 0.13754236698150635, "learning_rate": 5.041588602459668e-06, "loss": 0.0225, "step": 135360 }, { "epoch": 1.0953151549478113, "grad_norm": 0.3837260901927948, "learning_rate": 5.040882530004229e-06, "loss": 0.0382, "step": 135370 }, { "epoch": 1.0953960676430132, "grad_norm": 0.5678438544273376, "learning_rate": 5.040176456733472e-06, "loss": 0.0171, "step": 135380 }, { "epoch": 1.0954769803382152, "grad_norm": 0.2056659758090973, "learning_rate": 5.039470382661481e-06, "loss": 0.0189, "step": 135390 }, { "epoch": 1.0955578930334169, "grad_norm": 0.548591673374176, "learning_rate": 5.038764307802338e-06, "loss": 0.0175, "step": 135400 }, { "epoch": 1.0956388057286188, "grad_norm": 1.143403172492981, "learning_rate": 5.038058232170121e-06, "loss": 0.0246, "step": 135410 }, { "epoch": 1.0957197184238208, "grad_norm": 0.3868081271648407, "learning_rate": 5.037352155778913e-06, "loss": 0.0096, "step": 135420 }, { "epoch": 1.0958006311190225, "grad_norm": 0.2918008863925934, "learning_rate": 5.036646078642795e-06, "loss": 0.0181, "step": 135430 }, { "epoch": 1.0958815438142244, "grad_norm": 0.4053747057914734, "learning_rate": 5.03594000077585e-06, "loss": 0.0314, "step": 135440 }, { "epoch": 1.0959624565094264, "grad_norm": 0.36082541942596436, "learning_rate": 5.035233922192157e-06, "loss": 0.0219, "step": 135450 }, { "epoch": 1.0960433692046283, "grad_norm": 0.24598027765750885, "learning_rate": 5.0345278429057965e-06, "loss": 0.0234, "step": 135460 }, { "epoch": 1.09612428189983, "grad_norm": 0.527870237827301, "learning_rate": 5.033821762930852e-06, "loss": 0.0195, "step": 135470 }, { "epoch": 1.096205194595032, "grad_norm": 0.20545732975006104, "learning_rate": 5.033115682281403e-06, "loss": 0.0245, "step": 135480 }, { "epoch": 1.096286107290234, "grad_norm": 0.17237581312656403, "learning_rate": 5.032409600971533e-06, "loss": 0.0165, "step": 135490 }, { "epoch": 1.0963670199854356, "grad_norm": 0.24626736342906952, "learning_rate": 5.0317035190153205e-06, "loss": 0.0218, "step": 135500 }, { "epoch": 1.0964479326806376, "grad_norm": 0.9347394704818726, "learning_rate": 5.03099743642685e-06, "loss": 0.0363, "step": 135510 }, { "epoch": 1.0965288453758395, "grad_norm": 0.38123318552970886, "learning_rate": 5.030291353220199e-06, "loss": 0.0286, "step": 135520 }, { "epoch": 1.0966097580710414, "grad_norm": 0.3138059675693512, "learning_rate": 5.029585269409453e-06, "loss": 0.023, "step": 135530 }, { "epoch": 1.0966906707662432, "grad_norm": 0.2038440704345703, "learning_rate": 5.028879185008689e-06, "loss": 0.0165, "step": 135540 }, { "epoch": 1.096771583461445, "grad_norm": 0.08979606628417969, "learning_rate": 5.028173100031992e-06, "loss": 0.0212, "step": 135550 }, { "epoch": 1.096852496156647, "grad_norm": 0.15595559775829315, "learning_rate": 5.027467014493441e-06, "loss": 0.0254, "step": 135560 }, { "epoch": 1.0969334088518488, "grad_norm": 0.4594569802284241, "learning_rate": 5.026760928407119e-06, "loss": 0.0307, "step": 135570 }, { "epoch": 1.0970143215470507, "grad_norm": 1.0523707866668701, "learning_rate": 5.026054841787108e-06, "loss": 0.0263, "step": 135580 }, { "epoch": 1.0970952342422526, "grad_norm": 0.3162063956260681, "learning_rate": 5.025348754647488e-06, "loss": 0.016, "step": 135590 }, { "epoch": 1.0971761469374546, "grad_norm": 0.4484206438064575, "learning_rate": 5.0246426670023395e-06, "loss": 0.022, "step": 135600 }, { "epoch": 1.0972570596326563, "grad_norm": 0.3897746503353119, "learning_rate": 5.0239365788657445e-06, "loss": 0.0282, "step": 135610 }, { "epoch": 1.0973379723278582, "grad_norm": 0.2693675458431244, "learning_rate": 5.0232304902517855e-06, "loss": 0.0158, "step": 135620 }, { "epoch": 1.0974188850230602, "grad_norm": 0.5350448489189148, "learning_rate": 5.022524401174544e-06, "loss": 0.0181, "step": 135630 }, { "epoch": 1.097499797718262, "grad_norm": 0.3974560797214508, "learning_rate": 5.021818311648101e-06, "loss": 0.0209, "step": 135640 }, { "epoch": 1.0975807104134638, "grad_norm": 0.22929689288139343, "learning_rate": 5.021112221686537e-06, "loss": 0.0323, "step": 135650 }, { "epoch": 1.0976616231086658, "grad_norm": 0.4177696108818054, "learning_rate": 5.020406131303935e-06, "loss": 0.0205, "step": 135660 }, { "epoch": 1.0977425358038677, "grad_norm": 0.38916733860969543, "learning_rate": 5.019700040514375e-06, "loss": 0.0131, "step": 135670 }, { "epoch": 1.0978234484990694, "grad_norm": 0.2915493845939636, "learning_rate": 5.018993949331939e-06, "loss": 0.0299, "step": 135680 }, { "epoch": 1.0979043611942714, "grad_norm": 0.46637359261512756, "learning_rate": 5.01828785777071e-06, "loss": 0.0261, "step": 135690 }, { "epoch": 1.0979852738894733, "grad_norm": 0.38562411069869995, "learning_rate": 5.017581765844769e-06, "loss": 0.023, "step": 135700 }, { "epoch": 1.098066186584675, "grad_norm": 0.37458112835884094, "learning_rate": 5.016875673568196e-06, "loss": 0.0256, "step": 135710 }, { "epoch": 1.098147099279877, "grad_norm": 0.4364350736141205, "learning_rate": 5.0161695809550725e-06, "loss": 0.032, "step": 135720 }, { "epoch": 1.098228011975079, "grad_norm": 0.7059774398803711, "learning_rate": 5.015463488019479e-06, "loss": 0.0269, "step": 135730 }, { "epoch": 1.0983089246702809, "grad_norm": 0.47076717019081116, "learning_rate": 5.014757394775504e-06, "loss": 0.0229, "step": 135740 }, { "epoch": 1.0983898373654826, "grad_norm": 0.3689452111721039, "learning_rate": 5.0140513012372195e-06, "loss": 0.0162, "step": 135750 }, { "epoch": 1.0984707500606845, "grad_norm": 0.3621799349784851, "learning_rate": 5.013345207418713e-06, "loss": 0.0159, "step": 135760 }, { "epoch": 1.0985516627558864, "grad_norm": 0.371206134557724, "learning_rate": 5.012639113334065e-06, "loss": 0.0248, "step": 135770 }, { "epoch": 1.0986325754510884, "grad_norm": 1.144883155822754, "learning_rate": 5.011933018997356e-06, "loss": 0.0445, "step": 135780 }, { "epoch": 1.09871348814629, "grad_norm": 0.37731820344924927, "learning_rate": 5.011226924422669e-06, "loss": 0.0146, "step": 135790 }, { "epoch": 1.098794400841492, "grad_norm": 1.4987852573394775, "learning_rate": 5.0105208296240835e-06, "loss": 0.0354, "step": 135800 }, { "epoch": 1.098875313536694, "grad_norm": 0.15762513875961304, "learning_rate": 5.009814734615682e-06, "loss": 0.0273, "step": 135810 }, { "epoch": 1.0989562262318957, "grad_norm": 0.10255693644285202, "learning_rate": 5.009108639411548e-06, "loss": 0.0237, "step": 135820 }, { "epoch": 1.0990371389270976, "grad_norm": 0.3076164424419403, "learning_rate": 5.00840254402576e-06, "loss": 0.0174, "step": 135830 }, { "epoch": 1.0991180516222996, "grad_norm": 0.34838536381721497, "learning_rate": 5.0076964484724015e-06, "loss": 0.0308, "step": 135840 }, { "epoch": 1.0991989643175013, "grad_norm": 0.203557550907135, "learning_rate": 5.006990352765554e-06, "loss": 0.0209, "step": 135850 }, { "epoch": 1.0992798770127032, "grad_norm": 0.40360647439956665, "learning_rate": 5.006284256919297e-06, "loss": 0.0141, "step": 135860 }, { "epoch": 1.0993607897079052, "grad_norm": 0.47510045766830444, "learning_rate": 5.005578160947715e-06, "loss": 0.0207, "step": 135870 }, { "epoch": 1.0994417024031071, "grad_norm": 0.3591654598712921, "learning_rate": 5.004872064864889e-06, "loss": 0.0222, "step": 135880 }, { "epoch": 1.0995226150983088, "grad_norm": 0.3846880793571472, "learning_rate": 5.004165968684899e-06, "loss": 0.0086, "step": 135890 }, { "epoch": 1.0996035277935108, "grad_norm": 0.5954439043998718, "learning_rate": 5.003459872421829e-06, "loss": 0.0223, "step": 135900 }, { "epoch": 1.0996844404887127, "grad_norm": 0.4618792235851288, "learning_rate": 5.002753776089757e-06, "loss": 0.0257, "step": 135910 }, { "epoch": 1.0997653531839147, "grad_norm": 0.3011169731616974, "learning_rate": 5.002047679702768e-06, "loss": 0.0266, "step": 135920 }, { "epoch": 1.0998462658791164, "grad_norm": 0.3801358938217163, "learning_rate": 5.001341583274942e-06, "loss": 0.0261, "step": 135930 }, { "epoch": 1.0999271785743183, "grad_norm": 0.35009700059890747, "learning_rate": 5.00063548682036e-06, "loss": 0.02, "step": 135940 }, { "epoch": 1.1000080912695203, "grad_norm": 0.35086581110954285, "learning_rate": 4.999929390353106e-06, "loss": 0.0216, "step": 135950 }, { "epoch": 1.100089003964722, "grad_norm": 0.4748905599117279, "learning_rate": 4.9992232938872594e-06, "loss": 0.0229, "step": 135960 }, { "epoch": 1.100169916659924, "grad_norm": 0.8349231481552124, "learning_rate": 4.998517197436905e-06, "loss": 0.022, "step": 135970 }, { "epoch": 1.1002508293551259, "grad_norm": 0.35873502492904663, "learning_rate": 4.997811101016118e-06, "loss": 0.0189, "step": 135980 }, { "epoch": 1.1003317420503278, "grad_norm": 0.35624057054519653, "learning_rate": 4.997105004638986e-06, "loss": 0.0363, "step": 135990 }, { "epoch": 1.1004126547455295, "grad_norm": 0.25070422887802124, "learning_rate": 4.99639890831959e-06, "loss": 0.028, "step": 136000 }, { "epoch": 1.1004935674407315, "grad_norm": 0.1928921490907669, "learning_rate": 4.995692812072007e-06, "loss": 0.0082, "step": 136010 }, { "epoch": 1.1005744801359334, "grad_norm": 0.3140897750854492, "learning_rate": 4.994986715910323e-06, "loss": 0.0275, "step": 136020 }, { "epoch": 1.1006553928311351, "grad_norm": 0.36591392755508423, "learning_rate": 4.99428061984862e-06, "loss": 0.0168, "step": 136030 }, { "epoch": 1.100736305526337, "grad_norm": 0.3187583088874817, "learning_rate": 4.993574523900976e-06, "loss": 0.0252, "step": 136040 }, { "epoch": 1.100817218221539, "grad_norm": 0.19634084403514862, "learning_rate": 4.992868428081475e-06, "loss": 0.0222, "step": 136050 }, { "epoch": 1.100898130916741, "grad_norm": 0.5682613849639893, "learning_rate": 4.992162332404199e-06, "loss": 0.0294, "step": 136060 }, { "epoch": 1.1009790436119427, "grad_norm": 0.230634406208992, "learning_rate": 4.991456236883227e-06, "loss": 0.0228, "step": 136070 }, { "epoch": 1.1010599563071446, "grad_norm": 0.39620596170425415, "learning_rate": 4.990750141532644e-06, "loss": 0.0206, "step": 136080 }, { "epoch": 1.1011408690023465, "grad_norm": 0.2387705296278, "learning_rate": 4.9900440463665285e-06, "loss": 0.0247, "step": 136090 }, { "epoch": 1.1012217816975483, "grad_norm": 0.42063724994659424, "learning_rate": 4.989337951398965e-06, "loss": 0.031, "step": 136100 }, { "epoch": 1.1013026943927502, "grad_norm": 0.6381822228431702, "learning_rate": 4.988631856644033e-06, "loss": 0.018, "step": 136110 }, { "epoch": 1.1013836070879521, "grad_norm": 0.027479881420731544, "learning_rate": 4.987925762115814e-06, "loss": 0.026, "step": 136120 }, { "epoch": 1.101464519783154, "grad_norm": 0.4466626048088074, "learning_rate": 4.987219667828392e-06, "loss": 0.0115, "step": 136130 }, { "epoch": 1.1015454324783558, "grad_norm": 0.5158681869506836, "learning_rate": 4.986513573795844e-06, "loss": 0.0242, "step": 136140 }, { "epoch": 1.1016263451735577, "grad_norm": 0.426954448223114, "learning_rate": 4.985807480032256e-06, "loss": 0.0237, "step": 136150 }, { "epoch": 1.1017072578687597, "grad_norm": 0.11214461922645569, "learning_rate": 4.985101386551709e-06, "loss": 0.0296, "step": 136160 }, { "epoch": 1.1017881705639614, "grad_norm": 0.09130308032035828, "learning_rate": 4.9843952933682805e-06, "loss": 0.0216, "step": 136170 }, { "epoch": 1.1018690832591633, "grad_norm": 0.28963810205459595, "learning_rate": 4.983689200496057e-06, "loss": 0.0238, "step": 136180 }, { "epoch": 1.1019499959543653, "grad_norm": 0.36653608083724976, "learning_rate": 4.982983107949118e-06, "loss": 0.021, "step": 136190 }, { "epoch": 1.1020309086495672, "grad_norm": 0.16409745812416077, "learning_rate": 4.982277015741543e-06, "loss": 0.0181, "step": 136200 }, { "epoch": 1.102111821344769, "grad_norm": 0.40828627347946167, "learning_rate": 4.9815709238874165e-06, "loss": 0.0202, "step": 136210 }, { "epoch": 1.1021927340399709, "grad_norm": 0.2394128441810608, "learning_rate": 4.980864832400821e-06, "loss": 0.0138, "step": 136220 }, { "epoch": 1.1022736467351728, "grad_norm": 0.652962327003479, "learning_rate": 4.980158741295834e-06, "loss": 0.0242, "step": 136230 }, { "epoch": 1.1023545594303745, "grad_norm": 0.440387099981308, "learning_rate": 4.9794526505865385e-06, "loss": 0.0181, "step": 136240 }, { "epoch": 1.1024354721255765, "grad_norm": 0.4449760913848877, "learning_rate": 4.978746560287017e-06, "loss": 0.0116, "step": 136250 }, { "epoch": 1.1025163848207784, "grad_norm": 0.1489703506231308, "learning_rate": 4.978040470411353e-06, "loss": 0.0227, "step": 136260 }, { "epoch": 1.1025972975159803, "grad_norm": 0.547634482383728, "learning_rate": 4.9773343809736216e-06, "loss": 0.0208, "step": 136270 }, { "epoch": 1.102678210211182, "grad_norm": 0.25825634598731995, "learning_rate": 4.976628291987909e-06, "loss": 0.0305, "step": 136280 }, { "epoch": 1.102759122906384, "grad_norm": 0.23188015818595886, "learning_rate": 4.975922203468298e-06, "loss": 0.0204, "step": 136290 }, { "epoch": 1.102840035601586, "grad_norm": 0.5820443630218506, "learning_rate": 4.975216115428867e-06, "loss": 0.0319, "step": 136300 }, { "epoch": 1.1029209482967877, "grad_norm": 0.058969784528017044, "learning_rate": 4.974510027883696e-06, "loss": 0.0199, "step": 136310 }, { "epoch": 1.1030018609919896, "grad_norm": 0.2307106852531433, "learning_rate": 4.973803940846872e-06, "loss": 0.0231, "step": 136320 }, { "epoch": 1.1030827736871915, "grad_norm": 0.4712175130844116, "learning_rate": 4.973097854332471e-06, "loss": 0.0236, "step": 136330 }, { "epoch": 1.1031636863823935, "grad_norm": 0.44554582238197327, "learning_rate": 4.972391768354576e-06, "loss": 0.0354, "step": 136340 }, { "epoch": 1.1032445990775952, "grad_norm": 0.5002067685127258, "learning_rate": 4.971685682927272e-06, "loss": 0.0143, "step": 136350 }, { "epoch": 1.1033255117727971, "grad_norm": 0.6424174904823303, "learning_rate": 4.970979598064634e-06, "loss": 0.0273, "step": 136360 }, { "epoch": 1.103406424467999, "grad_norm": 0.19691471755504608, "learning_rate": 4.970273513780748e-06, "loss": 0.015, "step": 136370 }, { "epoch": 1.1034873371632008, "grad_norm": 0.34187445044517517, "learning_rate": 4.969567430089695e-06, "loss": 0.0243, "step": 136380 }, { "epoch": 1.1035682498584027, "grad_norm": 0.31789806485176086, "learning_rate": 4.968861347005553e-06, "loss": 0.0235, "step": 136390 }, { "epoch": 1.1036491625536047, "grad_norm": 0.19689863920211792, "learning_rate": 4.968155264542407e-06, "loss": 0.0184, "step": 136400 }, { "epoch": 1.1037300752488066, "grad_norm": 0.22722001373767853, "learning_rate": 4.9674491827143384e-06, "loss": 0.0202, "step": 136410 }, { "epoch": 1.1038109879440083, "grad_norm": 0.5423765182495117, "learning_rate": 4.9667431015354246e-06, "loss": 0.0338, "step": 136420 }, { "epoch": 1.1038919006392103, "grad_norm": 0.25731194019317627, "learning_rate": 4.966037021019749e-06, "loss": 0.0229, "step": 136430 }, { "epoch": 1.1039728133344122, "grad_norm": 0.45366257429122925, "learning_rate": 4.9653309411813955e-06, "loss": 0.0206, "step": 136440 }, { "epoch": 1.1040537260296142, "grad_norm": 0.9877212047576904, "learning_rate": 4.964624862034443e-06, "loss": 0.0258, "step": 136450 }, { "epoch": 1.1041346387248159, "grad_norm": 0.28850507736206055, "learning_rate": 4.963918783592972e-06, "loss": 0.0145, "step": 136460 }, { "epoch": 1.1042155514200178, "grad_norm": 0.775036096572876, "learning_rate": 4.963212705871065e-06, "loss": 0.0316, "step": 136470 }, { "epoch": 1.1042964641152198, "grad_norm": 0.411165326833725, "learning_rate": 4.962506628882804e-06, "loss": 0.0182, "step": 136480 }, { "epoch": 1.1043773768104215, "grad_norm": 0.479993611574173, "learning_rate": 4.961800552642268e-06, "loss": 0.0314, "step": 136490 }, { "epoch": 1.1044582895056234, "grad_norm": 0.44457998871803284, "learning_rate": 4.961094477163538e-06, "loss": 0.0178, "step": 136500 }, { "epoch": 1.1045392022008254, "grad_norm": 0.3553275465965271, "learning_rate": 4.9603884024606996e-06, "loss": 0.0321, "step": 136510 }, { "epoch": 1.104620114896027, "grad_norm": 0.38169437646865845, "learning_rate": 4.959682328547829e-06, "loss": 0.0289, "step": 136520 }, { "epoch": 1.104701027591229, "grad_norm": 0.14198897778987885, "learning_rate": 4.958976255439009e-06, "loss": 0.0104, "step": 136530 }, { "epoch": 1.104781940286431, "grad_norm": 0.2272050380706787, "learning_rate": 4.958270183148323e-06, "loss": 0.0246, "step": 136540 }, { "epoch": 1.104862852981633, "grad_norm": 0.23884832859039307, "learning_rate": 4.957564111689848e-06, "loss": 0.0225, "step": 136550 }, { "epoch": 1.1049437656768346, "grad_norm": 0.46389663219451904, "learning_rate": 4.9568580410776675e-06, "loss": 0.0166, "step": 136560 }, { "epoch": 1.1050246783720366, "grad_norm": 0.3250546455383301, "learning_rate": 4.956151971325864e-06, "loss": 0.0289, "step": 136570 }, { "epoch": 1.1051055910672385, "grad_norm": 0.1545400321483612, "learning_rate": 4.955445902448514e-06, "loss": 0.016, "step": 136580 }, { "epoch": 1.1051865037624404, "grad_norm": 0.07528392225503922, "learning_rate": 4.954739834459703e-06, "loss": 0.032, "step": 136590 }, { "epoch": 1.1052674164576421, "grad_norm": 0.3796873390674591, "learning_rate": 4.954033767373512e-06, "loss": 0.0236, "step": 136600 }, { "epoch": 1.105348329152844, "grad_norm": 0.29590046405792236, "learning_rate": 4.953327701204017e-06, "loss": 0.0166, "step": 136610 }, { "epoch": 1.105429241848046, "grad_norm": 0.20996084809303284, "learning_rate": 4.952621635965304e-06, "loss": 0.0182, "step": 136620 }, { "epoch": 1.1055101545432477, "grad_norm": 0.5313892364501953, "learning_rate": 4.951915571671453e-06, "loss": 0.0274, "step": 136630 }, { "epoch": 1.1055910672384497, "grad_norm": 0.13597191870212555, "learning_rate": 4.951209508336545e-06, "loss": 0.0243, "step": 136640 }, { "epoch": 1.1056719799336516, "grad_norm": 0.35163047909736633, "learning_rate": 4.950503445974659e-06, "loss": 0.0182, "step": 136650 }, { "epoch": 1.1057528926288536, "grad_norm": 0.26356765627861023, "learning_rate": 4.949797384599878e-06, "loss": 0.0191, "step": 136660 }, { "epoch": 1.1058338053240553, "grad_norm": 0.3705388903617859, "learning_rate": 4.949091324226283e-06, "loss": 0.0307, "step": 136670 }, { "epoch": 1.1059147180192572, "grad_norm": 0.30819588899612427, "learning_rate": 4.948385264867952e-06, "loss": 0.0104, "step": 136680 }, { "epoch": 1.1059956307144592, "grad_norm": 0.3544728755950928, "learning_rate": 4.947679206538968e-06, "loss": 0.0276, "step": 136690 }, { "epoch": 1.1060765434096609, "grad_norm": 0.2186550348997116, "learning_rate": 4.946973149253414e-06, "loss": 0.0285, "step": 136700 }, { "epoch": 1.1061574561048628, "grad_norm": 0.3983646035194397, "learning_rate": 4.946267093025367e-06, "loss": 0.0205, "step": 136710 }, { "epoch": 1.1062383688000648, "grad_norm": 0.2526622712612152, "learning_rate": 4.94556103786891e-06, "loss": 0.0287, "step": 136720 }, { "epoch": 1.1063192814952667, "grad_norm": 0.5274900794029236, "learning_rate": 4.944854983798123e-06, "loss": 0.0205, "step": 136730 }, { "epoch": 1.1064001941904684, "grad_norm": 0.3890821933746338, "learning_rate": 4.944148930827087e-06, "loss": 0.0271, "step": 136740 }, { "epoch": 1.1064811068856704, "grad_norm": 0.520277202129364, "learning_rate": 4.943442878969882e-06, "loss": 0.0238, "step": 136750 }, { "epoch": 1.1065620195808723, "grad_norm": 0.2710305154323578, "learning_rate": 4.9427368282405916e-06, "loss": 0.009, "step": 136760 }, { "epoch": 1.106642932276074, "grad_norm": 0.4065696895122528, "learning_rate": 4.942030778653291e-06, "loss": 0.0194, "step": 136770 }, { "epoch": 1.106723844971276, "grad_norm": 0.17359034717082977, "learning_rate": 4.941324730222066e-06, "loss": 0.0257, "step": 136780 }, { "epoch": 1.106804757666478, "grad_norm": 0.30801719427108765, "learning_rate": 4.940618682960994e-06, "loss": 0.0283, "step": 136790 }, { "epoch": 1.1068856703616798, "grad_norm": 0.5442469120025635, "learning_rate": 4.93991263688416e-06, "loss": 0.0259, "step": 136800 }, { "epoch": 1.1069665830568816, "grad_norm": 0.6278727650642395, "learning_rate": 4.939206592005639e-06, "loss": 0.0193, "step": 136810 }, { "epoch": 1.1070474957520835, "grad_norm": 0.17070771753787994, "learning_rate": 4.938500548339515e-06, "loss": 0.021, "step": 136820 }, { "epoch": 1.1071284084472854, "grad_norm": 0.08873093873262405, "learning_rate": 4.937794505899869e-06, "loss": 0.0165, "step": 136830 }, { "epoch": 1.1072093211424872, "grad_norm": 0.257352739572525, "learning_rate": 4.937088464700777e-06, "loss": 0.0256, "step": 136840 }, { "epoch": 1.107290233837689, "grad_norm": 0.19315043091773987, "learning_rate": 4.936382424756325e-06, "loss": 0.0158, "step": 136850 }, { "epoch": 1.107371146532891, "grad_norm": 0.7047288417816162, "learning_rate": 4.935676386080592e-06, "loss": 0.0172, "step": 136860 }, { "epoch": 1.107452059228093, "grad_norm": 0.46684643626213074, "learning_rate": 4.934970348687656e-06, "loss": 0.0212, "step": 136870 }, { "epoch": 1.1075329719232947, "grad_norm": 0.2676743268966675, "learning_rate": 4.9342643125916e-06, "loss": 0.0173, "step": 136880 }, { "epoch": 1.1076138846184966, "grad_norm": 0.4007599651813507, "learning_rate": 4.933558277806504e-06, "loss": 0.0349, "step": 136890 }, { "epoch": 1.1076947973136986, "grad_norm": 0.6493993997573853, "learning_rate": 4.9328522443464464e-06, "loss": 0.0243, "step": 136900 }, { "epoch": 1.1077757100089003, "grad_norm": 0.7074665427207947, "learning_rate": 4.932146212225508e-06, "loss": 0.0248, "step": 136910 }, { "epoch": 1.1078566227041022, "grad_norm": 0.30172500014305115, "learning_rate": 4.931440181457774e-06, "loss": 0.0192, "step": 136920 }, { "epoch": 1.1079375353993042, "grad_norm": 0.5195546746253967, "learning_rate": 4.930734152057318e-06, "loss": 0.0324, "step": 136930 }, { "epoch": 1.1080184480945061, "grad_norm": 0.24481956660747528, "learning_rate": 4.930028124038223e-06, "loss": 0.0179, "step": 136940 }, { "epoch": 1.1080993607897078, "grad_norm": 0.2880289554595947, "learning_rate": 4.929322097414571e-06, "loss": 0.0149, "step": 136950 }, { "epoch": 1.1081802734849098, "grad_norm": 0.1543477475643158, "learning_rate": 4.928616072200441e-06, "loss": 0.0216, "step": 136960 }, { "epoch": 1.1082611861801117, "grad_norm": 0.43698471784591675, "learning_rate": 4.92791004840991e-06, "loss": 0.0176, "step": 136970 }, { "epoch": 1.1083420988753137, "grad_norm": 1.0578263998031616, "learning_rate": 4.927204026057061e-06, "loss": 0.0278, "step": 136980 }, { "epoch": 1.1084230115705154, "grad_norm": 0.18097910284996033, "learning_rate": 4.9264980051559765e-06, "loss": 0.0231, "step": 136990 }, { "epoch": 1.1085039242657173, "grad_norm": 0.2725190818309784, "learning_rate": 4.925791985720734e-06, "loss": 0.0154, "step": 137000 }, { "epoch": 1.1085848369609193, "grad_norm": 0.4293343126773834, "learning_rate": 4.925085967765411e-06, "loss": 0.017, "step": 137010 }, { "epoch": 1.108665749656121, "grad_norm": 0.4064241945743561, "learning_rate": 4.924379951304094e-06, "loss": 0.0217, "step": 137020 }, { "epoch": 1.108746662351323, "grad_norm": 0.6260240077972412, "learning_rate": 4.923673936350857e-06, "loss": 0.0221, "step": 137030 }, { "epoch": 1.1088275750465248, "grad_norm": 0.6984662413597107, "learning_rate": 4.922967922919781e-06, "loss": 0.0264, "step": 137040 }, { "epoch": 1.1089084877417266, "grad_norm": 0.2961379885673523, "learning_rate": 4.92226191102495e-06, "loss": 0.017, "step": 137050 }, { "epoch": 1.1089894004369285, "grad_norm": 0.2333700805902481, "learning_rate": 4.921555900680438e-06, "loss": 0.0162, "step": 137060 }, { "epoch": 1.1090703131321304, "grad_norm": 0.5513296723365784, "learning_rate": 4.92084989190033e-06, "loss": 0.0255, "step": 137070 }, { "epoch": 1.1091512258273324, "grad_norm": 0.5656410455703735, "learning_rate": 4.920143884698705e-06, "loss": 0.0229, "step": 137080 }, { "epoch": 1.109232138522534, "grad_norm": 0.25188156962394714, "learning_rate": 4.919437879089639e-06, "loss": 0.0211, "step": 137090 }, { "epoch": 1.109313051217736, "grad_norm": 0.43498632311820984, "learning_rate": 4.9187318750872145e-06, "loss": 0.0279, "step": 137100 }, { "epoch": 1.109393963912938, "grad_norm": 0.3253936767578125, "learning_rate": 4.918025872705513e-06, "loss": 0.0239, "step": 137110 }, { "epoch": 1.10947487660814, "grad_norm": 0.33977803587913513, "learning_rate": 4.917319871958611e-06, "loss": 0.0241, "step": 137120 }, { "epoch": 1.1095557893033416, "grad_norm": 0.3645235300064087, "learning_rate": 4.91661387286059e-06, "loss": 0.0301, "step": 137130 }, { "epoch": 1.1096367019985436, "grad_norm": 0.19968309998512268, "learning_rate": 4.91590787542553e-06, "loss": 0.0156, "step": 137140 }, { "epoch": 1.1097176146937455, "grad_norm": 0.5881197452545166, "learning_rate": 4.915201879667509e-06, "loss": 0.0161, "step": 137150 }, { "epoch": 1.1097985273889472, "grad_norm": 0.6238812208175659, "learning_rate": 4.914495885600608e-06, "loss": 0.0257, "step": 137160 }, { "epoch": 1.1098794400841492, "grad_norm": 0.25492730736732483, "learning_rate": 4.913789893238905e-06, "loss": 0.0166, "step": 137170 }, { "epoch": 1.1099603527793511, "grad_norm": 0.2862280607223511, "learning_rate": 4.913083902596482e-06, "loss": 0.0363, "step": 137180 }, { "epoch": 1.1100412654745528, "grad_norm": 0.4357794523239136, "learning_rate": 4.912377913687417e-06, "loss": 0.0308, "step": 137190 }, { "epoch": 1.1101221781697548, "grad_norm": 0.5083141326904297, "learning_rate": 4.911671926525788e-06, "loss": 0.0158, "step": 137200 }, { "epoch": 1.1102030908649567, "grad_norm": 0.23462994396686554, "learning_rate": 4.910965941125678e-06, "loss": 0.0179, "step": 137210 }, { "epoch": 1.1102840035601587, "grad_norm": 0.3535262644290924, "learning_rate": 4.910259957501164e-06, "loss": 0.023, "step": 137220 }, { "epoch": 1.1103649162553604, "grad_norm": 0.15244324505329132, "learning_rate": 4.9095539756663235e-06, "loss": 0.0194, "step": 137230 }, { "epoch": 1.1104458289505623, "grad_norm": 0.3415049910545349, "learning_rate": 4.908847995635241e-06, "loss": 0.0169, "step": 137240 }, { "epoch": 1.1105267416457643, "grad_norm": 0.9195286631584167, "learning_rate": 4.90814201742199e-06, "loss": 0.0381, "step": 137250 }, { "epoch": 1.1106076543409662, "grad_norm": 0.31287696957588196, "learning_rate": 4.907436041040654e-06, "loss": 0.0227, "step": 137260 }, { "epoch": 1.110688567036168, "grad_norm": 0.6014354825019836, "learning_rate": 4.906730066505312e-06, "loss": 0.0245, "step": 137270 }, { "epoch": 1.1107694797313699, "grad_norm": 1.0050156116485596, "learning_rate": 4.90602409383004e-06, "loss": 0.0277, "step": 137280 }, { "epoch": 1.1108503924265718, "grad_norm": 0.3504403829574585, "learning_rate": 4.9053181230289195e-06, "loss": 0.019, "step": 137290 }, { "epoch": 1.1109313051217735, "grad_norm": 0.20885057747364044, "learning_rate": 4.90461215411603e-06, "loss": 0.0248, "step": 137300 }, { "epoch": 1.1110122178169755, "grad_norm": 0.03939872607588768, "learning_rate": 4.903906187105448e-06, "loss": 0.0161, "step": 137310 }, { "epoch": 1.1110931305121774, "grad_norm": 0.30762979388237, "learning_rate": 4.903200222011255e-06, "loss": 0.0246, "step": 137320 }, { "epoch": 1.1111740432073793, "grad_norm": 0.12455862015485764, "learning_rate": 4.902494258847529e-06, "loss": 0.0224, "step": 137330 }, { "epoch": 1.111254955902581, "grad_norm": 0.26661422848701477, "learning_rate": 4.901788297628351e-06, "loss": 0.0224, "step": 137340 }, { "epoch": 1.111335868597783, "grad_norm": 0.3196169435977936, "learning_rate": 4.901082338367795e-06, "loss": 0.0218, "step": 137350 }, { "epoch": 1.111416781292985, "grad_norm": 0.3390042781829834, "learning_rate": 4.900376381079945e-06, "loss": 0.022, "step": 137360 }, { "epoch": 1.1114976939881867, "grad_norm": 0.25991329550743103, "learning_rate": 4.899670425778879e-06, "loss": 0.0158, "step": 137370 }, { "epoch": 1.1115786066833886, "grad_norm": 0.2537256181240082, "learning_rate": 4.898964472478672e-06, "loss": 0.0139, "step": 137380 }, { "epoch": 1.1116595193785905, "grad_norm": 0.2580552101135254, "learning_rate": 4.898258521193405e-06, "loss": 0.0232, "step": 137390 }, { "epoch": 1.1117404320737925, "grad_norm": 0.3796073794364929, "learning_rate": 4.897552571937159e-06, "loss": 0.027, "step": 137400 }, { "epoch": 1.1118213447689942, "grad_norm": 0.3061491847038269, "learning_rate": 4.896846624724011e-06, "loss": 0.0262, "step": 137410 }, { "epoch": 1.1119022574641961, "grad_norm": 0.33918261528015137, "learning_rate": 4.896140679568037e-06, "loss": 0.0254, "step": 137420 }, { "epoch": 1.111983170159398, "grad_norm": 0.4030899405479431, "learning_rate": 4.89543473648332e-06, "loss": 0.0214, "step": 137430 }, { "epoch": 1.1120640828545998, "grad_norm": 0.6252205967903137, "learning_rate": 4.894728795483936e-06, "loss": 0.0221, "step": 137440 }, { "epoch": 1.1121449955498017, "grad_norm": 0.17036473751068115, "learning_rate": 4.894022856583962e-06, "loss": 0.0232, "step": 137450 }, { "epoch": 1.1122259082450037, "grad_norm": 0.3883069157600403, "learning_rate": 4.893316919797481e-06, "loss": 0.0266, "step": 137460 }, { "epoch": 1.1123068209402056, "grad_norm": 0.2448614090681076, "learning_rate": 4.892610985138567e-06, "loss": 0.0276, "step": 137470 }, { "epoch": 1.1123877336354073, "grad_norm": 0.22058705985546112, "learning_rate": 4.891905052621301e-06, "loss": 0.0141, "step": 137480 }, { "epoch": 1.1124686463306093, "grad_norm": 0.27510032057762146, "learning_rate": 4.891199122259762e-06, "loss": 0.0174, "step": 137490 }, { "epoch": 1.1125495590258112, "grad_norm": 0.22511780261993408, "learning_rate": 4.890493194068024e-06, "loss": 0.0145, "step": 137500 }, { "epoch": 1.112630471721013, "grad_norm": 0.24181927740573883, "learning_rate": 4.889787268060169e-06, "loss": 0.0186, "step": 137510 }, { "epoch": 1.1127113844162149, "grad_norm": 0.4261183738708496, "learning_rate": 4.889081344250274e-06, "loss": 0.0184, "step": 137520 }, { "epoch": 1.1127922971114168, "grad_norm": 0.7839192748069763, "learning_rate": 4.888375422652419e-06, "loss": 0.0178, "step": 137530 }, { "epoch": 1.1128732098066187, "grad_norm": 0.4796622693538666, "learning_rate": 4.887669503280679e-06, "loss": 0.0221, "step": 137540 }, { "epoch": 1.1129541225018205, "grad_norm": 0.42028823494911194, "learning_rate": 4.8869635861491335e-06, "loss": 0.0185, "step": 137550 }, { "epoch": 1.1130350351970224, "grad_norm": 0.5491944551467896, "learning_rate": 4.886257671271863e-06, "loss": 0.0264, "step": 137560 }, { "epoch": 1.1131159478922243, "grad_norm": 0.16421833634376526, "learning_rate": 4.8855517586629405e-06, "loss": 0.0173, "step": 137570 }, { "epoch": 1.113196860587426, "grad_norm": 0.4823038876056671, "learning_rate": 4.884845848336447e-06, "loss": 0.0259, "step": 137580 }, { "epoch": 1.113277773282628, "grad_norm": 0.22427083551883698, "learning_rate": 4.8841399403064624e-06, "loss": 0.0203, "step": 137590 }, { "epoch": 1.11335868597783, "grad_norm": 0.6355114579200745, "learning_rate": 4.883434034587059e-06, "loss": 0.0178, "step": 137600 }, { "epoch": 1.1134395986730319, "grad_norm": 0.49202823638916016, "learning_rate": 4.8827281311923175e-06, "loss": 0.0294, "step": 137610 }, { "epoch": 1.1135205113682336, "grad_norm": 0.24227501451969147, "learning_rate": 4.8820222301363185e-06, "loss": 0.0297, "step": 137620 }, { "epoch": 1.1136014240634355, "grad_norm": 0.2626812756061554, "learning_rate": 4.881316331433136e-06, "loss": 0.0163, "step": 137630 }, { "epoch": 1.1136823367586375, "grad_norm": 0.21189996600151062, "learning_rate": 4.880610435096847e-06, "loss": 0.025, "step": 137640 }, { "epoch": 1.1137632494538394, "grad_norm": 0.7670639753341675, "learning_rate": 4.879904541141534e-06, "loss": 0.0326, "step": 137650 }, { "epoch": 1.1138441621490411, "grad_norm": 0.005274935159832239, "learning_rate": 4.879198649581269e-06, "loss": 0.022, "step": 137660 }, { "epoch": 1.113925074844243, "grad_norm": 0.5555145740509033, "learning_rate": 4.878492760430133e-06, "loss": 0.0248, "step": 137670 }, { "epoch": 1.114005987539445, "grad_norm": 0.43790528178215027, "learning_rate": 4.8777868737022024e-06, "loss": 0.0179, "step": 137680 }, { "epoch": 1.1140869002346467, "grad_norm": 0.36563408374786377, "learning_rate": 4.877080989411554e-06, "loss": 0.0182, "step": 137690 }, { "epoch": 1.1141678129298487, "grad_norm": 0.22725653648376465, "learning_rate": 4.876375107572266e-06, "loss": 0.0233, "step": 137700 }, { "epoch": 1.1142487256250506, "grad_norm": 0.3713858723640442, "learning_rate": 4.8756692281984144e-06, "loss": 0.0227, "step": 137710 }, { "epoch": 1.1143296383202523, "grad_norm": 0.26615065336227417, "learning_rate": 4.874963351304079e-06, "loss": 0.0131, "step": 137720 }, { "epoch": 1.1144105510154543, "grad_norm": 0.5148043632507324, "learning_rate": 4.874257476903335e-06, "loss": 0.0198, "step": 137730 }, { "epoch": 1.1144914637106562, "grad_norm": 0.31094735860824585, "learning_rate": 4.87355160501026e-06, "loss": 0.0248, "step": 137740 }, { "epoch": 1.1145723764058582, "grad_norm": 0.30991488695144653, "learning_rate": 4.872845735638933e-06, "loss": 0.0177, "step": 137750 }, { "epoch": 1.1146532891010599, "grad_norm": 0.2562153935432434, "learning_rate": 4.872139868803427e-06, "loss": 0.0149, "step": 137760 }, { "epoch": 1.1147342017962618, "grad_norm": 0.5090672373771667, "learning_rate": 4.871434004517822e-06, "loss": 0.0279, "step": 137770 }, { "epoch": 1.1148151144914638, "grad_norm": 0.29348814487457275, "learning_rate": 4.870728142796197e-06, "loss": 0.0216, "step": 137780 }, { "epoch": 1.1148960271866657, "grad_norm": 0.4029272496700287, "learning_rate": 4.870022283652623e-06, "loss": 0.0269, "step": 137790 }, { "epoch": 1.1149769398818674, "grad_norm": 0.7857515215873718, "learning_rate": 4.86931642710118e-06, "loss": 0.0309, "step": 137800 }, { "epoch": 1.1150578525770694, "grad_norm": 0.5199955105781555, "learning_rate": 4.868610573155948e-06, "loss": 0.022, "step": 137810 }, { "epoch": 1.1151387652722713, "grad_norm": 0.11922214180231094, "learning_rate": 4.867904721830998e-06, "loss": 0.0191, "step": 137820 }, { "epoch": 1.115219677967473, "grad_norm": 0.196914941072464, "learning_rate": 4.8671988731404095e-06, "loss": 0.0196, "step": 137830 }, { "epoch": 1.115300590662675, "grad_norm": 0.47312915325164795, "learning_rate": 4.866493027098262e-06, "loss": 0.0263, "step": 137840 }, { "epoch": 1.115381503357877, "grad_norm": 0.12572656571865082, "learning_rate": 4.865787183718627e-06, "loss": 0.0166, "step": 137850 }, { "epoch": 1.1154624160530788, "grad_norm": 0.042613860219717026, "learning_rate": 4.865081343015583e-06, "loss": 0.0136, "step": 137860 }, { "epoch": 1.1155433287482805, "grad_norm": 0.18376556038856506, "learning_rate": 4.864375505003206e-06, "loss": 0.0193, "step": 137870 }, { "epoch": 1.1156242414434825, "grad_norm": 0.4766969382762909, "learning_rate": 4.863669669695576e-06, "loss": 0.0286, "step": 137880 }, { "epoch": 1.1157051541386844, "grad_norm": 0.5474727153778076, "learning_rate": 4.862963837106765e-06, "loss": 0.0186, "step": 137890 }, { "epoch": 1.1157860668338861, "grad_norm": 0.3473810851573944, "learning_rate": 4.86225800725085e-06, "loss": 0.0189, "step": 137900 }, { "epoch": 1.115866979529088, "grad_norm": 0.22161361575126648, "learning_rate": 4.861552180141911e-06, "loss": 0.0147, "step": 137910 }, { "epoch": 1.11594789222429, "grad_norm": 0.4409887492656708, "learning_rate": 4.86084635579402e-06, "loss": 0.0168, "step": 137920 }, { "epoch": 1.116028804919492, "grad_norm": 0.5798505544662476, "learning_rate": 4.860140534221254e-06, "loss": 0.0249, "step": 137930 }, { "epoch": 1.1161097176146937, "grad_norm": 0.15085077285766602, "learning_rate": 4.859434715437692e-06, "loss": 0.0165, "step": 137940 }, { "epoch": 1.1161906303098956, "grad_norm": 0.5342578887939453, "learning_rate": 4.858728899457404e-06, "loss": 0.0212, "step": 137950 }, { "epoch": 1.1162715430050976, "grad_norm": 0.44997015595436096, "learning_rate": 4.858023086294473e-06, "loss": 0.0307, "step": 137960 }, { "epoch": 1.1163524557002993, "grad_norm": 0.196675643324852, "learning_rate": 4.857317275962972e-06, "loss": 0.0121, "step": 137970 }, { "epoch": 1.1164333683955012, "grad_norm": 0.3547457754611969, "learning_rate": 4.856611468476974e-06, "loss": 0.0162, "step": 137980 }, { "epoch": 1.1165142810907032, "grad_norm": 0.27333834767341614, "learning_rate": 4.8559056638505594e-06, "loss": 0.0151, "step": 137990 }, { "epoch": 1.116595193785905, "grad_norm": 0.4463709592819214, "learning_rate": 4.855199862097804e-06, "loss": 0.036, "step": 138000 }, { "epoch": 1.1166761064811068, "grad_norm": 0.3911530375480652, "learning_rate": 4.854494063232778e-06, "loss": 0.0219, "step": 138010 }, { "epoch": 1.1167570191763088, "grad_norm": 0.21888180077075958, "learning_rate": 4.853788267269561e-06, "loss": 0.0164, "step": 138020 }, { "epoch": 1.1168379318715107, "grad_norm": 0.5124346017837524, "learning_rate": 4.853082474222232e-06, "loss": 0.0305, "step": 138030 }, { "epoch": 1.1169188445667124, "grad_norm": 0.37467753887176514, "learning_rate": 4.85237668410486e-06, "loss": 0.0176, "step": 138040 }, { "epoch": 1.1169997572619144, "grad_norm": 0.3967403769493103, "learning_rate": 4.851670896931523e-06, "loss": 0.0266, "step": 138050 }, { "epoch": 1.1170806699571163, "grad_norm": 0.7603587508201599, "learning_rate": 4.850965112716298e-06, "loss": 0.0268, "step": 138060 }, { "epoch": 1.1171615826523182, "grad_norm": 0.4998309314250946, "learning_rate": 4.85025933147326e-06, "loss": 0.017, "step": 138070 }, { "epoch": 1.11724249534752, "grad_norm": 0.6453018188476562, "learning_rate": 4.849553553216482e-06, "loss": 0.05, "step": 138080 }, { "epoch": 1.117323408042722, "grad_norm": 0.4092952013015747, "learning_rate": 4.84884777796004e-06, "loss": 0.0191, "step": 138090 }, { "epoch": 1.1174043207379238, "grad_norm": 0.3021036684513092, "learning_rate": 4.848142005718012e-06, "loss": 0.0221, "step": 138100 }, { "epoch": 1.1174852334331256, "grad_norm": 0.6008918881416321, "learning_rate": 4.847436236504471e-06, "loss": 0.0296, "step": 138110 }, { "epoch": 1.1175661461283275, "grad_norm": 0.4946078658103943, "learning_rate": 4.846730470333491e-06, "loss": 0.0325, "step": 138120 }, { "epoch": 1.1176470588235294, "grad_norm": 0.4621659219264984, "learning_rate": 4.846024707219149e-06, "loss": 0.0348, "step": 138130 }, { "epoch": 1.1177279715187314, "grad_norm": 0.21543525159358978, "learning_rate": 4.84531894717552e-06, "loss": 0.0198, "step": 138140 }, { "epoch": 1.117808884213933, "grad_norm": 0.23243990540504456, "learning_rate": 4.844613190216675e-06, "loss": 0.0215, "step": 138150 }, { "epoch": 1.117889796909135, "grad_norm": 0.2589965760707855, "learning_rate": 4.8439074363566955e-06, "loss": 0.01, "step": 138160 }, { "epoch": 1.117970709604337, "grad_norm": 0.19128833711147308, "learning_rate": 4.84320168560965e-06, "loss": 0.0227, "step": 138170 }, { "epoch": 1.1180516222995387, "grad_norm": 0.5006116628646851, "learning_rate": 4.842495937989617e-06, "loss": 0.0218, "step": 138180 }, { "epoch": 1.1181325349947406, "grad_norm": 0.2709532678127289, "learning_rate": 4.8417901935106705e-06, "loss": 0.0168, "step": 138190 }, { "epoch": 1.1182134476899426, "grad_norm": 0.4170434772968292, "learning_rate": 4.8410844521868835e-06, "loss": 0.0154, "step": 138200 }, { "epoch": 1.1182943603851445, "grad_norm": 0.34540116786956787, "learning_rate": 4.840378714032331e-06, "loss": 0.0299, "step": 138210 }, { "epoch": 1.1183752730803462, "grad_norm": 0.34252721071243286, "learning_rate": 4.8396729790610905e-06, "loss": 0.0153, "step": 138220 }, { "epoch": 1.1184561857755482, "grad_norm": 0.3862716853618622, "learning_rate": 4.838967247287233e-06, "loss": 0.0161, "step": 138230 }, { "epoch": 1.1185370984707501, "grad_norm": 0.5813004374504089, "learning_rate": 4.838261518724832e-06, "loss": 0.0213, "step": 138240 }, { "epoch": 1.1186180111659518, "grad_norm": 0.28343841433525085, "learning_rate": 4.837555793387965e-06, "loss": 0.0202, "step": 138250 }, { "epoch": 1.1186989238611538, "grad_norm": 1.0009610652923584, "learning_rate": 4.836850071290706e-06, "loss": 0.0306, "step": 138260 }, { "epoch": 1.1187798365563557, "grad_norm": 0.05141504481434822, "learning_rate": 4.836144352447126e-06, "loss": 0.0152, "step": 138270 }, { "epoch": 1.1188607492515577, "grad_norm": 0.7460417151451111, "learning_rate": 4.8354386368713e-06, "loss": 0.0328, "step": 138280 }, { "epoch": 1.1189416619467594, "grad_norm": 0.4589037001132965, "learning_rate": 4.834732924577306e-06, "loss": 0.0136, "step": 138290 }, { "epoch": 1.1190225746419613, "grad_norm": 0.6125006079673767, "learning_rate": 4.834027215579214e-06, "loss": 0.0217, "step": 138300 }, { "epoch": 1.1191034873371632, "grad_norm": 0.6365453600883484, "learning_rate": 4.833321509891097e-06, "loss": 0.0188, "step": 138310 }, { "epoch": 1.1191844000323652, "grad_norm": 0.6196280121803284, "learning_rate": 4.832615807527034e-06, "loss": 0.0298, "step": 138320 }, { "epoch": 1.119265312727567, "grad_norm": 0.4774401783943176, "learning_rate": 4.831910108501093e-06, "loss": 0.0249, "step": 138330 }, { "epoch": 1.1193462254227688, "grad_norm": 0.30809643864631653, "learning_rate": 4.8312044128273495e-06, "loss": 0.0215, "step": 138340 }, { "epoch": 1.1194271381179708, "grad_norm": 0.23247118294239044, "learning_rate": 4.830498720519879e-06, "loss": 0.0212, "step": 138350 }, { "epoch": 1.1195080508131725, "grad_norm": 0.10643882304430008, "learning_rate": 4.8297930315927525e-06, "loss": 0.0195, "step": 138360 }, { "epoch": 1.1195889635083744, "grad_norm": 0.521347165107727, "learning_rate": 4.829087346060046e-06, "loss": 0.0254, "step": 138370 }, { "epoch": 1.1196698762035764, "grad_norm": 0.34690797328948975, "learning_rate": 4.828381663935832e-06, "loss": 0.0323, "step": 138380 }, { "epoch": 1.119750788898778, "grad_norm": 0.7250932455062866, "learning_rate": 4.827675985234181e-06, "loss": 0.0199, "step": 138390 }, { "epoch": 1.11983170159398, "grad_norm": 0.4639681875705719, "learning_rate": 4.82697030996917e-06, "loss": 0.041, "step": 138400 }, { "epoch": 1.119912614289182, "grad_norm": 0.1932530552148819, "learning_rate": 4.826264638154872e-06, "loss": 0.0256, "step": 138410 }, { "epoch": 1.119993526984384, "grad_norm": 0.3121589124202728, "learning_rate": 4.825558969805357e-06, "loss": 0.0253, "step": 138420 }, { "epoch": 1.1200744396795856, "grad_norm": 0.16137340664863586, "learning_rate": 4.8248533049347e-06, "loss": 0.0175, "step": 138430 }, { "epoch": 1.1201553523747876, "grad_norm": 0.36229947209358215, "learning_rate": 4.824147643556975e-06, "loss": 0.0248, "step": 138440 }, { "epoch": 1.1202362650699895, "grad_norm": 0.17084546387195587, "learning_rate": 4.8234419856862555e-06, "loss": 0.0162, "step": 138450 }, { "epoch": 1.1203171777651915, "grad_norm": 0.2759135067462921, "learning_rate": 4.82273633133661e-06, "loss": 0.0277, "step": 138460 }, { "epoch": 1.1203980904603932, "grad_norm": 0.18287776410579681, "learning_rate": 4.822030680522116e-06, "loss": 0.0249, "step": 138470 }, { "epoch": 1.1204790031555951, "grad_norm": 0.5137223601341248, "learning_rate": 4.821325033256845e-06, "loss": 0.0204, "step": 138480 }, { "epoch": 1.120559915850797, "grad_norm": 0.28733396530151367, "learning_rate": 4.8206193895548676e-06, "loss": 0.0141, "step": 138490 }, { "epoch": 1.1206408285459988, "grad_norm": 0.40513837337493896, "learning_rate": 4.819913749430256e-06, "loss": 0.0258, "step": 138500 }, { "epoch": 1.1207217412412007, "grad_norm": 0.04808098077774048, "learning_rate": 4.8192081128970885e-06, "loss": 0.0184, "step": 138510 }, { "epoch": 1.1208026539364027, "grad_norm": 0.2878132462501526, "learning_rate": 4.818502479969432e-06, "loss": 0.0273, "step": 138520 }, { "epoch": 1.1208835666316046, "grad_norm": 0.17372438311576843, "learning_rate": 4.817796850661359e-06, "loss": 0.0187, "step": 138530 }, { "epoch": 1.1209644793268063, "grad_norm": 0.19898997247219086, "learning_rate": 4.817091224986945e-06, "loss": 0.0164, "step": 138540 }, { "epoch": 1.1210453920220083, "grad_norm": 0.5889077186584473, "learning_rate": 4.81638560296026e-06, "loss": 0.0204, "step": 138550 }, { "epoch": 1.1211263047172102, "grad_norm": 0.08163417875766754, "learning_rate": 4.815679984595376e-06, "loss": 0.0245, "step": 138560 }, { "epoch": 1.121207217412412, "grad_norm": 0.15501365065574646, "learning_rate": 4.814974369906366e-06, "loss": 0.0186, "step": 138570 }, { "epoch": 1.1212881301076139, "grad_norm": 0.04884669929742813, "learning_rate": 4.8142687589073e-06, "loss": 0.0169, "step": 138580 }, { "epoch": 1.1213690428028158, "grad_norm": 0.3732700049877167, "learning_rate": 4.813563151612253e-06, "loss": 0.0294, "step": 138590 }, { "epoch": 1.1214499554980177, "grad_norm": 0.2755415141582489, "learning_rate": 4.812857548035295e-06, "loss": 0.0336, "step": 138600 }, { "epoch": 1.1215308681932195, "grad_norm": 0.3182700574398041, "learning_rate": 4.8121519481904994e-06, "loss": 0.0205, "step": 138610 }, { "epoch": 1.1216117808884214, "grad_norm": 0.26576539874076843, "learning_rate": 4.811446352091935e-06, "loss": 0.0161, "step": 138620 }, { "epoch": 1.1216926935836233, "grad_norm": 0.5989582538604736, "learning_rate": 4.810740759753675e-06, "loss": 0.0474, "step": 138630 }, { "epoch": 1.121773606278825, "grad_norm": 0.29388248920440674, "learning_rate": 4.8100351711897925e-06, "loss": 0.0211, "step": 138640 }, { "epoch": 1.121854518974027, "grad_norm": 0.1590328812599182, "learning_rate": 4.809329586414356e-06, "loss": 0.0136, "step": 138650 }, { "epoch": 1.121935431669229, "grad_norm": 0.4132266938686371, "learning_rate": 4.8086240054414394e-06, "loss": 0.0223, "step": 138660 }, { "epoch": 1.1220163443644309, "grad_norm": 0.599843442440033, "learning_rate": 4.8079184282851135e-06, "loss": 0.0189, "step": 138670 }, { "epoch": 1.1220972570596326, "grad_norm": 0.675352931022644, "learning_rate": 4.807212854959447e-06, "loss": 0.0173, "step": 138680 }, { "epoch": 1.1221781697548345, "grad_norm": 0.5624329447746277, "learning_rate": 4.806507285478514e-06, "loss": 0.0423, "step": 138690 }, { "epoch": 1.1222590824500365, "grad_norm": 0.6607673764228821, "learning_rate": 4.805801719856385e-06, "loss": 0.0256, "step": 138700 }, { "epoch": 1.1223399951452382, "grad_norm": 0.2300182431936264, "learning_rate": 4.805096158107131e-06, "loss": 0.0163, "step": 138710 }, { "epoch": 1.1224209078404401, "grad_norm": 0.24461433291435242, "learning_rate": 4.804390600244821e-06, "loss": 0.0119, "step": 138720 }, { "epoch": 1.122501820535642, "grad_norm": 0.7402563691139221, "learning_rate": 4.80368504628353e-06, "loss": 0.0224, "step": 138730 }, { "epoch": 1.122582733230844, "grad_norm": 0.3196914792060852, "learning_rate": 4.802979496237326e-06, "loss": 0.0183, "step": 138740 }, { "epoch": 1.1226636459260457, "grad_norm": 0.8245180249214172, "learning_rate": 4.802273950120277e-06, "loss": 0.0344, "step": 138750 }, { "epoch": 1.1227445586212477, "grad_norm": 0.32793301343917847, "learning_rate": 4.80156840794646e-06, "loss": 0.018, "step": 138760 }, { "epoch": 1.1228254713164496, "grad_norm": 0.36852672696113586, "learning_rate": 4.80086286972994e-06, "loss": 0.0212, "step": 138770 }, { "epoch": 1.1229063840116513, "grad_norm": 0.3022100627422333, "learning_rate": 4.800157335484791e-06, "loss": 0.0215, "step": 138780 }, { "epoch": 1.1229872967068533, "grad_norm": 0.20419034361839294, "learning_rate": 4.799451805225081e-06, "loss": 0.0193, "step": 138790 }, { "epoch": 1.1230682094020552, "grad_norm": 0.1562427431344986, "learning_rate": 4.798746278964883e-06, "loss": 0.0136, "step": 138800 }, { "epoch": 1.1231491220972571, "grad_norm": 0.22133886814117432, "learning_rate": 4.798040756718265e-06, "loss": 0.0271, "step": 138810 }, { "epoch": 1.1232300347924589, "grad_norm": 0.28531378507614136, "learning_rate": 4.797335238499296e-06, "loss": 0.0204, "step": 138820 }, { "epoch": 1.1233109474876608, "grad_norm": 0.17036974430084229, "learning_rate": 4.796629724322051e-06, "loss": 0.02, "step": 138830 }, { "epoch": 1.1233918601828627, "grad_norm": 0.1962958127260208, "learning_rate": 4.795924214200594e-06, "loss": 0.0269, "step": 138840 }, { "epoch": 1.1234727728780647, "grad_norm": 0.512160062789917, "learning_rate": 4.795218708148999e-06, "loss": 0.0299, "step": 138850 }, { "epoch": 1.1235536855732664, "grad_norm": 0.006781912408769131, "learning_rate": 4.794513206181336e-06, "loss": 0.0159, "step": 138860 }, { "epoch": 1.1236345982684683, "grad_norm": 0.370394229888916, "learning_rate": 4.79380770831167e-06, "loss": 0.0267, "step": 138870 }, { "epoch": 1.1237155109636703, "grad_norm": 0.3018147945404053, "learning_rate": 4.793102214554075e-06, "loss": 0.0233, "step": 138880 }, { "epoch": 1.123796423658872, "grad_norm": 0.19917939603328705, "learning_rate": 4.792396724922622e-06, "loss": 0.0116, "step": 138890 }, { "epoch": 1.123877336354074, "grad_norm": 0.30360665917396545, "learning_rate": 4.7916912394313756e-06, "loss": 0.0187, "step": 138900 }, { "epoch": 1.1239582490492759, "grad_norm": 0.5018405318260193, "learning_rate": 4.790985758094407e-06, "loss": 0.0243, "step": 138910 }, { "epoch": 1.1240391617444776, "grad_norm": 0.31745022535324097, "learning_rate": 4.7902802809257885e-06, "loss": 0.0378, "step": 138920 }, { "epoch": 1.1241200744396795, "grad_norm": 0.2632172107696533, "learning_rate": 4.789574807939586e-06, "loss": 0.0258, "step": 138930 }, { "epoch": 1.1242009871348815, "grad_norm": 0.30519890785217285, "learning_rate": 4.788869339149869e-06, "loss": 0.0209, "step": 138940 }, { "epoch": 1.1242818998300834, "grad_norm": 0.31769028306007385, "learning_rate": 4.788163874570709e-06, "loss": 0.0297, "step": 138950 }, { "epoch": 1.1243628125252851, "grad_norm": 1.1874920129776, "learning_rate": 4.787458414216173e-06, "loss": 0.0234, "step": 138960 }, { "epoch": 1.124443725220487, "grad_norm": 0.40173009037971497, "learning_rate": 4.786752958100329e-06, "loss": 0.0177, "step": 138970 }, { "epoch": 1.124524637915689, "grad_norm": 0.2931114137172699, "learning_rate": 4.786047506237246e-06, "loss": 0.0245, "step": 138980 }, { "epoch": 1.124605550610891, "grad_norm": 0.5505731105804443, "learning_rate": 4.785342058640997e-06, "loss": 0.0213, "step": 138990 }, { "epoch": 1.1246864633060927, "grad_norm": 0.23658889532089233, "learning_rate": 4.784636615325646e-06, "loss": 0.0515, "step": 139000 }, { "epoch": 1.1247673760012946, "grad_norm": 0.5373610258102417, "learning_rate": 4.783931176305262e-06, "loss": 0.0231, "step": 139010 }, { "epoch": 1.1248482886964966, "grad_norm": 0.27491772174835205, "learning_rate": 4.783225741593917e-06, "loss": 0.0269, "step": 139020 }, { "epoch": 1.1249292013916983, "grad_norm": 0.482339471578598, "learning_rate": 4.782520311205676e-06, "loss": 0.0257, "step": 139030 }, { "epoch": 1.1250101140869002, "grad_norm": 0.43408894538879395, "learning_rate": 4.781814885154607e-06, "loss": 0.0297, "step": 139040 }, { "epoch": 1.1250910267821022, "grad_norm": 0.19874508678913116, "learning_rate": 4.781109463454781e-06, "loss": 0.0197, "step": 139050 }, { "epoch": 1.1251719394773039, "grad_norm": 0.19322462379932404, "learning_rate": 4.780404046120263e-06, "loss": 0.0523, "step": 139060 }, { "epoch": 1.1252528521725058, "grad_norm": 0.35235485434532166, "learning_rate": 4.779698633165124e-06, "loss": 0.0257, "step": 139070 }, { "epoch": 1.1253337648677078, "grad_norm": 0.4032801389694214, "learning_rate": 4.778993224603432e-06, "loss": 0.0224, "step": 139080 }, { "epoch": 1.1254146775629097, "grad_norm": 0.5497995018959045, "learning_rate": 4.778287820449251e-06, "loss": 0.0323, "step": 139090 }, { "epoch": 1.1254955902581114, "grad_norm": 0.4296051263809204, "learning_rate": 4.7775824207166525e-06, "loss": 0.0167, "step": 139100 }, { "epoch": 1.1255765029533134, "grad_norm": 0.41916295886039734, "learning_rate": 4.776877025419704e-06, "loss": 0.0112, "step": 139110 }, { "epoch": 1.1256574156485153, "grad_norm": 0.4879678189754486, "learning_rate": 4.776171634572469e-06, "loss": 0.0269, "step": 139120 }, { "epoch": 1.1257383283437172, "grad_norm": 0.21017758548259735, "learning_rate": 4.77546624818902e-06, "loss": 0.0099, "step": 139130 }, { "epoch": 1.125819241038919, "grad_norm": 0.4331344664096832, "learning_rate": 4.774760866283423e-06, "loss": 0.0253, "step": 139140 }, { "epoch": 1.125900153734121, "grad_norm": 0.5309089422225952, "learning_rate": 4.774055488869746e-06, "loss": 0.0149, "step": 139150 }, { "epoch": 1.1259810664293228, "grad_norm": 0.2809674143791199, "learning_rate": 4.7733501159620536e-06, "loss": 0.0198, "step": 139160 }, { "epoch": 1.1260619791245245, "grad_norm": 0.07602138817310333, "learning_rate": 4.772644747574415e-06, "loss": 0.0133, "step": 139170 }, { "epoch": 1.1261428918197265, "grad_norm": 0.2879362106323242, "learning_rate": 4.771939383720899e-06, "loss": 0.0176, "step": 139180 }, { "epoch": 1.1262238045149284, "grad_norm": 0.2487967163324356, "learning_rate": 4.771234024415568e-06, "loss": 0.0126, "step": 139190 }, { "epoch": 1.1263047172101301, "grad_norm": 0.9169918894767761, "learning_rate": 4.770528669672491e-06, "loss": 0.0375, "step": 139200 }, { "epoch": 1.126385629905332, "grad_norm": 0.3203243315219879, "learning_rate": 4.769823319505737e-06, "loss": 0.0202, "step": 139210 }, { "epoch": 1.126466542600534, "grad_norm": 0.42168986797332764, "learning_rate": 4.769117973929371e-06, "loss": 0.0211, "step": 139220 }, { "epoch": 1.126547455295736, "grad_norm": 0.05355274677276611, "learning_rate": 4.768412632957459e-06, "loss": 0.0146, "step": 139230 }, { "epoch": 1.1266283679909377, "grad_norm": 0.3676462769508362, "learning_rate": 4.767707296604069e-06, "loss": 0.0179, "step": 139240 }, { "epoch": 1.1267092806861396, "grad_norm": 0.3468533754348755, "learning_rate": 4.767001964883264e-06, "loss": 0.0215, "step": 139250 }, { "epoch": 1.1267901933813416, "grad_norm": 0.4665391445159912, "learning_rate": 4.7662966378091155e-06, "loss": 0.0264, "step": 139260 }, { "epoch": 1.1268711060765435, "grad_norm": 0.5120178461074829, "learning_rate": 4.765591315395688e-06, "loss": 0.0158, "step": 139270 }, { "epoch": 1.1269520187717452, "grad_norm": 0.5220310091972351, "learning_rate": 4.764885997657044e-06, "loss": 0.0184, "step": 139280 }, { "epoch": 1.1270329314669472, "grad_norm": 0.41431236267089844, "learning_rate": 4.764180684607254e-06, "loss": 0.023, "step": 139290 }, { "epoch": 1.127113844162149, "grad_norm": 0.5266072750091553, "learning_rate": 4.763475376260383e-06, "loss": 0.0275, "step": 139300 }, { "epoch": 1.1271947568573508, "grad_norm": 0.0013891708804294467, "learning_rate": 4.762770072630494e-06, "loss": 0.023, "step": 139310 }, { "epoch": 1.1272756695525528, "grad_norm": 0.11067147552967072, "learning_rate": 4.762064773731656e-06, "loss": 0.0212, "step": 139320 }, { "epoch": 1.1273565822477547, "grad_norm": 0.4505634605884552, "learning_rate": 4.761359479577935e-06, "loss": 0.0269, "step": 139330 }, { "epoch": 1.1274374949429566, "grad_norm": 0.33865994215011597, "learning_rate": 4.760654190183395e-06, "loss": 0.0264, "step": 139340 }, { "epoch": 1.1275184076381584, "grad_norm": 0.4333495795726776, "learning_rate": 4.759948905562101e-06, "loss": 0.0202, "step": 139350 }, { "epoch": 1.1275993203333603, "grad_norm": 0.26510268449783325, "learning_rate": 4.759243625728119e-06, "loss": 0.0172, "step": 139360 }, { "epoch": 1.1276802330285622, "grad_norm": 0.574471652507782, "learning_rate": 4.758538350695517e-06, "loss": 0.018, "step": 139370 }, { "epoch": 1.1277611457237642, "grad_norm": 0.14701692759990692, "learning_rate": 4.757833080478356e-06, "loss": 0.01, "step": 139380 }, { "epoch": 1.127842058418966, "grad_norm": 0.2918386161327362, "learning_rate": 4.7571278150907015e-06, "loss": 0.0279, "step": 139390 }, { "epoch": 1.1279229711141678, "grad_norm": 0.4681812524795532, "learning_rate": 4.756422554546624e-06, "loss": 0.0285, "step": 139400 }, { "epoch": 1.1280038838093698, "grad_norm": 0.3533725440502167, "learning_rate": 4.755717298860182e-06, "loss": 0.0183, "step": 139410 }, { "epoch": 1.1280847965045715, "grad_norm": 0.07405026257038116, "learning_rate": 4.755012048045442e-06, "loss": 0.0244, "step": 139420 }, { "epoch": 1.1281657091997734, "grad_norm": 0.24101349711418152, "learning_rate": 4.7543068021164715e-06, "loss": 0.0297, "step": 139430 }, { "epoch": 1.1282466218949754, "grad_norm": 0.5428290963172913, "learning_rate": 4.753601561087332e-06, "loss": 0.0168, "step": 139440 }, { "epoch": 1.128327534590177, "grad_norm": 0.043515902012586594, "learning_rate": 4.7528963249720885e-06, "loss": 0.0118, "step": 139450 }, { "epoch": 1.128408447285379, "grad_norm": 0.7338846325874329, "learning_rate": 4.752191093784808e-06, "loss": 0.0236, "step": 139460 }, { "epoch": 1.128489359980581, "grad_norm": 0.28472110629081726, "learning_rate": 4.751485867539551e-06, "loss": 0.0217, "step": 139470 }, { "epoch": 1.128570272675783, "grad_norm": 0.6281080842018127, "learning_rate": 4.750780646250385e-06, "loss": 0.02, "step": 139480 }, { "epoch": 1.1286511853709846, "grad_norm": 0.5058284997940063, "learning_rate": 4.750075429931374e-06, "loss": 0.0336, "step": 139490 }, { "epoch": 1.1287320980661866, "grad_norm": 0.46633243560791016, "learning_rate": 4.749370218596579e-06, "loss": 0.0326, "step": 139500 }, { "epoch": 1.1288130107613885, "grad_norm": 0.26844191551208496, "learning_rate": 4.748665012260067e-06, "loss": 0.0178, "step": 139510 }, { "epoch": 1.1288939234565905, "grad_norm": 0.3102841377258301, "learning_rate": 4.747959810935898e-06, "loss": 0.0194, "step": 139520 }, { "epoch": 1.1289748361517922, "grad_norm": 0.3093794286251068, "learning_rate": 4.747254614638142e-06, "loss": 0.0218, "step": 139530 }, { "epoch": 1.1290557488469941, "grad_norm": 0.481477826833725, "learning_rate": 4.746549423380857e-06, "loss": 0.0301, "step": 139540 }, { "epoch": 1.129136661542196, "grad_norm": 0.2718377113342285, "learning_rate": 4.74584423717811e-06, "loss": 0.0288, "step": 139550 }, { "epoch": 1.1292175742373978, "grad_norm": 0.1598450392484665, "learning_rate": 4.7451390560439635e-06, "loss": 0.028, "step": 139560 }, { "epoch": 1.1292984869325997, "grad_norm": 0.18528985977172852, "learning_rate": 4.744433879992478e-06, "loss": 0.0199, "step": 139570 }, { "epoch": 1.1293793996278017, "grad_norm": 0.5370901226997375, "learning_rate": 4.743728709037722e-06, "loss": 0.0225, "step": 139580 }, { "epoch": 1.1294603123230034, "grad_norm": 0.5317739248275757, "learning_rate": 4.743023543193755e-06, "loss": 0.0238, "step": 139590 }, { "epoch": 1.1295412250182053, "grad_norm": 0.21686483919620514, "learning_rate": 4.74231838247464e-06, "loss": 0.0148, "step": 139600 }, { "epoch": 1.1296221377134072, "grad_norm": 0.08780908584594727, "learning_rate": 4.741613226894441e-06, "loss": 0.0131, "step": 139610 }, { "epoch": 1.1297030504086092, "grad_norm": 0.39307090640068054, "learning_rate": 4.740908076467222e-06, "loss": 0.0165, "step": 139620 }, { "epoch": 1.129783963103811, "grad_norm": 0.4571455121040344, "learning_rate": 4.740202931207044e-06, "loss": 0.0222, "step": 139630 }, { "epoch": 1.1298648757990128, "grad_norm": 1.0945345163345337, "learning_rate": 4.739497791127969e-06, "loss": 0.0346, "step": 139640 }, { "epoch": 1.1299457884942148, "grad_norm": 0.42190098762512207, "learning_rate": 4.738792656244062e-06, "loss": 0.0202, "step": 139650 }, { "epoch": 1.1300267011894167, "grad_norm": 0.551145076751709, "learning_rate": 4.7380875265693835e-06, "loss": 0.0244, "step": 139660 }, { "epoch": 1.1301076138846184, "grad_norm": 0.39350414276123047, "learning_rate": 4.737382402117996e-06, "loss": 0.0233, "step": 139670 }, { "epoch": 1.1301885265798204, "grad_norm": 0.5154123902320862, "learning_rate": 4.736677282903964e-06, "loss": 0.0216, "step": 139680 }, { "epoch": 1.1302694392750223, "grad_norm": 0.49682730436325073, "learning_rate": 4.735972168941345e-06, "loss": 0.0151, "step": 139690 }, { "epoch": 1.130350351970224, "grad_norm": 0.33934763073921204, "learning_rate": 4.735267060244204e-06, "loss": 0.0283, "step": 139700 }, { "epoch": 1.130431264665426, "grad_norm": 0.42930886149406433, "learning_rate": 4.7345619568266025e-06, "loss": 0.0272, "step": 139710 }, { "epoch": 1.130512177360628, "grad_norm": 0.2848491072654724, "learning_rate": 4.733856858702604e-06, "loss": 0.0395, "step": 139720 }, { "epoch": 1.1305930900558296, "grad_norm": 0.378680557012558, "learning_rate": 4.7331517658862685e-06, "loss": 0.0198, "step": 139730 }, { "epoch": 1.1306740027510316, "grad_norm": 0.3267304301261902, "learning_rate": 4.732446678391656e-06, "loss": 0.0223, "step": 139740 }, { "epoch": 1.1307549154462335, "grad_norm": 0.4682653844356537, "learning_rate": 4.731741596232832e-06, "loss": 0.0185, "step": 139750 }, { "epoch": 1.1308358281414355, "grad_norm": 0.3048476576805115, "learning_rate": 4.731036519423854e-06, "loss": 0.0139, "step": 139760 }, { "epoch": 1.1309167408366372, "grad_norm": 0.6473799347877502, "learning_rate": 4.730331447978785e-06, "loss": 0.0177, "step": 139770 }, { "epoch": 1.1309976535318391, "grad_norm": 0.2747330367565155, "learning_rate": 4.729626381911687e-06, "loss": 0.0331, "step": 139780 }, { "epoch": 1.131078566227041, "grad_norm": 0.7755059003829956, "learning_rate": 4.728921321236618e-06, "loss": 0.0299, "step": 139790 }, { "epoch": 1.131159478922243, "grad_norm": 0.7378945350646973, "learning_rate": 4.728216265967641e-06, "loss": 0.0258, "step": 139800 }, { "epoch": 1.1312403916174447, "grad_norm": 0.5864738821983337, "learning_rate": 4.727511216118819e-06, "loss": 0.0371, "step": 139810 }, { "epoch": 1.1313213043126467, "grad_norm": 0.584737241268158, "learning_rate": 4.72680617170421e-06, "loss": 0.0139, "step": 139820 }, { "epoch": 1.1314022170078486, "grad_norm": 0.289532870054245, "learning_rate": 4.726101132737874e-06, "loss": 0.0275, "step": 139830 }, { "epoch": 1.1314831297030503, "grad_norm": 0.5086820721626282, "learning_rate": 4.725396099233874e-06, "loss": 0.0219, "step": 139840 }, { "epoch": 1.1315640423982523, "grad_norm": 0.7624716758728027, "learning_rate": 4.724691071206269e-06, "loss": 0.031, "step": 139850 }, { "epoch": 1.1316449550934542, "grad_norm": 0.36623480916023254, "learning_rate": 4.723986048669117e-06, "loss": 0.0265, "step": 139860 }, { "epoch": 1.1317258677886561, "grad_norm": 0.24151617288589478, "learning_rate": 4.723281031636482e-06, "loss": 0.0226, "step": 139870 }, { "epoch": 1.1318067804838579, "grad_norm": 0.3689807951450348, "learning_rate": 4.722576020122424e-06, "loss": 0.0211, "step": 139880 }, { "epoch": 1.1318876931790598, "grad_norm": 0.664573609828949, "learning_rate": 4.721871014141001e-06, "loss": 0.0145, "step": 139890 }, { "epoch": 1.1319686058742617, "grad_norm": 0.4429883658885956, "learning_rate": 4.721166013706272e-06, "loss": 0.0241, "step": 139900 }, { "epoch": 1.1320495185694635, "grad_norm": 0.6474564671516418, "learning_rate": 4.7204610188323e-06, "loss": 0.0234, "step": 139910 }, { "epoch": 1.1321304312646654, "grad_norm": 0.3159106969833374, "learning_rate": 4.719756029533143e-06, "loss": 0.0202, "step": 139920 }, { "epoch": 1.1322113439598673, "grad_norm": 0.6468414068222046, "learning_rate": 4.719051045822857e-06, "loss": 0.0162, "step": 139930 }, { "epoch": 1.1322922566550693, "grad_norm": 0.627924382686615, "learning_rate": 4.71834606771551e-06, "loss": 0.0243, "step": 139940 }, { "epoch": 1.132373169350271, "grad_norm": 0.31115713715553284, "learning_rate": 4.7176410952251516e-06, "loss": 0.022, "step": 139950 }, { "epoch": 1.132454082045473, "grad_norm": 0.22752588987350464, "learning_rate": 4.716936128365847e-06, "loss": 0.0205, "step": 139960 }, { "epoch": 1.1325349947406749, "grad_norm": 0.5064579248428345, "learning_rate": 4.716231167151656e-06, "loss": 0.0374, "step": 139970 }, { "epoch": 1.1326159074358766, "grad_norm": 0.37299343943595886, "learning_rate": 4.7155262115966325e-06, "loss": 0.0212, "step": 139980 }, { "epoch": 1.1326968201310785, "grad_norm": 0.14494729042053223, "learning_rate": 4.714821261714838e-06, "loss": 0.021, "step": 139990 }, { "epoch": 1.1327777328262805, "grad_norm": 0.35715335607528687, "learning_rate": 4.714116317520334e-06, "loss": 0.0261, "step": 140000 }, { "epoch": 1.1328586455214824, "grad_norm": 0.5233197808265686, "learning_rate": 4.713411379027174e-06, "loss": 0.0148, "step": 140010 }, { "epoch": 1.1329395582166841, "grad_norm": 0.3859809339046478, "learning_rate": 4.712706446249419e-06, "loss": 0.0235, "step": 140020 }, { "epoch": 1.133020470911886, "grad_norm": 0.46589794754981995, "learning_rate": 4.712001519201129e-06, "loss": 0.0173, "step": 140030 }, { "epoch": 1.133101383607088, "grad_norm": 0.29180675745010376, "learning_rate": 4.71129659789636e-06, "loss": 0.0132, "step": 140040 }, { "epoch": 1.13318229630229, "grad_norm": 0.4996418356895447, "learning_rate": 4.71059168234917e-06, "loss": 0.0282, "step": 140050 }, { "epoch": 1.1332632089974917, "grad_norm": 0.64225834608078, "learning_rate": 4.7098867725736195e-06, "loss": 0.019, "step": 140060 }, { "epoch": 1.1333441216926936, "grad_norm": 0.6757585406303406, "learning_rate": 4.709181868583765e-06, "loss": 0.0149, "step": 140070 }, { "epoch": 1.1334250343878955, "grad_norm": 0.6639934778213501, "learning_rate": 4.708476970393662e-06, "loss": 0.0193, "step": 140080 }, { "epoch": 1.1335059470830973, "grad_norm": 0.4894384443759918, "learning_rate": 4.7077720780173716e-06, "loss": 0.0204, "step": 140090 }, { "epoch": 1.1335868597782992, "grad_norm": 0.3961009681224823, "learning_rate": 4.707067191468952e-06, "loss": 0.0186, "step": 140100 }, { "epoch": 1.1336677724735011, "grad_norm": 0.5521336793899536, "learning_rate": 4.706362310762457e-06, "loss": 0.0321, "step": 140110 }, { "epoch": 1.1337486851687029, "grad_norm": 0.8132656812667847, "learning_rate": 4.705657435911945e-06, "loss": 0.0291, "step": 140120 }, { "epoch": 1.1338295978639048, "grad_norm": 0.4202566146850586, "learning_rate": 4.704952566931475e-06, "loss": 0.0246, "step": 140130 }, { "epoch": 1.1339105105591067, "grad_norm": 0.7331360578536987, "learning_rate": 4.704247703835104e-06, "loss": 0.04, "step": 140140 }, { "epoch": 1.1339914232543087, "grad_norm": 0.3928784132003784, "learning_rate": 4.703542846636886e-06, "loss": 0.0195, "step": 140150 }, { "epoch": 1.1340723359495104, "grad_norm": 0.08746183663606644, "learning_rate": 4.702837995350883e-06, "loss": 0.0155, "step": 140160 }, { "epoch": 1.1341532486447123, "grad_norm": 0.15219080448150635, "learning_rate": 4.702133149991146e-06, "loss": 0.0174, "step": 140170 }, { "epoch": 1.1342341613399143, "grad_norm": 0.33561214804649353, "learning_rate": 4.701428310571737e-06, "loss": 0.0209, "step": 140180 }, { "epoch": 1.1343150740351162, "grad_norm": 0.2499338835477829, "learning_rate": 4.700723477106709e-06, "loss": 0.0144, "step": 140190 }, { "epoch": 1.134395986730318, "grad_norm": 0.46898359060287476, "learning_rate": 4.700018649610119e-06, "loss": 0.0236, "step": 140200 }, { "epoch": 1.1344768994255199, "grad_norm": 0.584283173084259, "learning_rate": 4.6993138280960246e-06, "loss": 0.0228, "step": 140210 }, { "epoch": 1.1345578121207218, "grad_norm": 0.4302841126918793, "learning_rate": 4.698609012578481e-06, "loss": 0.0172, "step": 140220 }, { "epoch": 1.1346387248159235, "grad_norm": 0.20960776507854462, "learning_rate": 4.697904203071543e-06, "loss": 0.0205, "step": 140230 }, { "epoch": 1.1347196375111255, "grad_norm": 0.17843742668628693, "learning_rate": 4.697199399589269e-06, "loss": 0.0163, "step": 140240 }, { "epoch": 1.1348005502063274, "grad_norm": 0.3981761634349823, "learning_rate": 4.696494602145713e-06, "loss": 0.0194, "step": 140250 }, { "epoch": 1.1348814629015291, "grad_norm": 0.222922682762146, "learning_rate": 4.6957898107549335e-06, "loss": 0.0179, "step": 140260 }, { "epoch": 1.134962375596731, "grad_norm": 0.5080646276473999, "learning_rate": 4.695085025430982e-06, "loss": 0.0237, "step": 140270 }, { "epoch": 1.135043288291933, "grad_norm": 0.06013695150613785, "learning_rate": 4.694380246187916e-06, "loss": 0.0178, "step": 140280 }, { "epoch": 1.135124200987135, "grad_norm": 0.2555735111236572, "learning_rate": 4.693675473039792e-06, "loss": 0.025, "step": 140290 }, { "epoch": 1.1352051136823367, "grad_norm": 0.624360203742981, "learning_rate": 4.692970706000663e-06, "loss": 0.0255, "step": 140300 }, { "epoch": 1.1352860263775386, "grad_norm": 0.34659504890441895, "learning_rate": 4.692265945084584e-06, "loss": 0.0274, "step": 140310 }, { "epoch": 1.1353669390727406, "grad_norm": 0.5004399418830872, "learning_rate": 4.691561190305614e-06, "loss": 0.02, "step": 140320 }, { "epoch": 1.1354478517679425, "grad_norm": 0.43957415223121643, "learning_rate": 4.6908564416778035e-06, "loss": 0.02, "step": 140330 }, { "epoch": 1.1355287644631442, "grad_norm": 0.18252946436405182, "learning_rate": 4.690151699215208e-06, "loss": 0.0231, "step": 140340 }, { "epoch": 1.1356096771583462, "grad_norm": 0.3826797604560852, "learning_rate": 4.689446962931885e-06, "loss": 0.0231, "step": 140350 }, { "epoch": 1.135690589853548, "grad_norm": 0.5584749579429626, "learning_rate": 4.688742232841883e-06, "loss": 0.0221, "step": 140360 }, { "epoch": 1.1357715025487498, "grad_norm": 0.2510167956352234, "learning_rate": 4.688037508959263e-06, "loss": 0.0295, "step": 140370 }, { "epoch": 1.1358524152439518, "grad_norm": 0.44226863980293274, "learning_rate": 4.687332791298076e-06, "loss": 0.0258, "step": 140380 }, { "epoch": 1.1359333279391537, "grad_norm": 0.16744646430015564, "learning_rate": 4.686628079872375e-06, "loss": 0.0212, "step": 140390 }, { "epoch": 1.1360142406343554, "grad_norm": 0.4643787741661072, "learning_rate": 4.685923374696216e-06, "loss": 0.0199, "step": 140400 }, { "epoch": 1.1360951533295574, "grad_norm": 0.23430275917053223, "learning_rate": 4.685218675783652e-06, "loss": 0.0186, "step": 140410 }, { "epoch": 1.1361760660247593, "grad_norm": 0.2994559407234192, "learning_rate": 4.684513983148738e-06, "loss": 0.0156, "step": 140420 }, { "epoch": 1.1362569787199612, "grad_norm": 0.6055737733840942, "learning_rate": 4.683809296805526e-06, "loss": 0.0189, "step": 140430 }, { "epoch": 1.136337891415163, "grad_norm": 0.15208186209201813, "learning_rate": 4.683104616768069e-06, "loss": 0.0168, "step": 140440 }, { "epoch": 1.1364188041103649, "grad_norm": 0.2501024603843689, "learning_rate": 4.6823999430504245e-06, "loss": 0.0266, "step": 140450 }, { "epoch": 1.1364997168055668, "grad_norm": 0.36179327964782715, "learning_rate": 4.68169527566664e-06, "loss": 0.0146, "step": 140460 }, { "epoch": 1.1365806295007688, "grad_norm": 0.8146106004714966, "learning_rate": 4.680990614630772e-06, "loss": 0.0123, "step": 140470 }, { "epoch": 1.1366615421959705, "grad_norm": 0.26570504903793335, "learning_rate": 4.680285959956873e-06, "loss": 0.0244, "step": 140480 }, { "epoch": 1.1367424548911724, "grad_norm": 0.3584285080432892, "learning_rate": 4.679581311658995e-06, "loss": 0.0129, "step": 140490 }, { "epoch": 1.1368233675863744, "grad_norm": 0.3434198498725891, "learning_rate": 4.67887666975119e-06, "loss": 0.0227, "step": 140500 }, { "epoch": 1.136904280281576, "grad_norm": 0.5130296945571899, "learning_rate": 4.678172034247514e-06, "loss": 0.0223, "step": 140510 }, { "epoch": 1.136985192976778, "grad_norm": 0.2126816362142563, "learning_rate": 4.677467405162018e-06, "loss": 0.0164, "step": 140520 }, { "epoch": 1.13706610567198, "grad_norm": 0.1835155040025711, "learning_rate": 4.6767627825087514e-06, "loss": 0.0183, "step": 140530 }, { "epoch": 1.137147018367182, "grad_norm": 0.2143891453742981, "learning_rate": 4.676058166301771e-06, "loss": 0.021, "step": 140540 }, { "epoch": 1.1372279310623836, "grad_norm": 1.969329833984375, "learning_rate": 4.675353556555126e-06, "loss": 0.0188, "step": 140550 }, { "epoch": 1.1373088437575856, "grad_norm": 0.48416653275489807, "learning_rate": 4.674648953282867e-06, "loss": 0.0226, "step": 140560 }, { "epoch": 1.1373897564527875, "grad_norm": 0.5227890610694885, "learning_rate": 4.67394435649905e-06, "loss": 0.0211, "step": 140570 }, { "epoch": 1.1374706691479894, "grad_norm": 0.5076339840888977, "learning_rate": 4.673239766217722e-06, "loss": 0.0228, "step": 140580 }, { "epoch": 1.1375515818431912, "grad_norm": 0.17827144265174866, "learning_rate": 4.6725351824529395e-06, "loss": 0.0131, "step": 140590 }, { "epoch": 1.137632494538393, "grad_norm": 0.5028852820396423, "learning_rate": 4.6718306052187495e-06, "loss": 0.0179, "step": 140600 }, { "epoch": 1.137713407233595, "grad_norm": 0.22237764298915863, "learning_rate": 4.671126034529208e-06, "loss": 0.0125, "step": 140610 }, { "epoch": 1.1377943199287968, "grad_norm": 0.2720339298248291, "learning_rate": 4.670421470398362e-06, "loss": 0.0187, "step": 140620 }, { "epoch": 1.1378752326239987, "grad_norm": 0.4675000309944153, "learning_rate": 4.6697169128402625e-06, "loss": 0.0424, "step": 140630 }, { "epoch": 1.1379561453192006, "grad_norm": 0.2691347897052765, "learning_rate": 4.669012361868965e-06, "loss": 0.0166, "step": 140640 }, { "epoch": 1.1380370580144024, "grad_norm": 0.3228664994239807, "learning_rate": 4.668307817498514e-06, "loss": 0.0136, "step": 140650 }, { "epoch": 1.1381179707096043, "grad_norm": 0.4737350344657898, "learning_rate": 4.667603279742965e-06, "loss": 0.0207, "step": 140660 }, { "epoch": 1.1381988834048062, "grad_norm": 0.17390741407871246, "learning_rate": 4.666898748616368e-06, "loss": 0.0295, "step": 140670 }, { "epoch": 1.1382797961000082, "grad_norm": 0.3654743432998657, "learning_rate": 4.66619422413277e-06, "loss": 0.0273, "step": 140680 }, { "epoch": 1.13836070879521, "grad_norm": 0.4422350227832794, "learning_rate": 4.665489706306225e-06, "loss": 0.0313, "step": 140690 }, { "epoch": 1.1384416214904118, "grad_norm": 0.4993226230144501, "learning_rate": 4.6647851951507824e-06, "loss": 0.0263, "step": 140700 }, { "epoch": 1.1385225341856138, "grad_norm": 0.7773889899253845, "learning_rate": 4.6640806906804885e-06, "loss": 0.0211, "step": 140710 }, { "epoch": 1.1386034468808157, "grad_norm": 0.8190346956253052, "learning_rate": 4.663376192909397e-06, "loss": 0.033, "step": 140720 }, { "epoch": 1.1386843595760174, "grad_norm": 0.2771857976913452, "learning_rate": 4.662671701851559e-06, "loss": 0.0209, "step": 140730 }, { "epoch": 1.1387652722712194, "grad_norm": 0.40639105439186096, "learning_rate": 4.66196721752102e-06, "loss": 0.0276, "step": 140740 }, { "epoch": 1.1388461849664213, "grad_norm": 0.4398901164531708, "learning_rate": 4.66126273993183e-06, "loss": 0.0237, "step": 140750 }, { "epoch": 1.138927097661623, "grad_norm": 0.24917683005332947, "learning_rate": 4.660558269098042e-06, "loss": 0.0229, "step": 140760 }, { "epoch": 1.139008010356825, "grad_norm": 0.7616283297538757, "learning_rate": 4.659853805033701e-06, "loss": 0.0342, "step": 140770 }, { "epoch": 1.139088923052027, "grad_norm": 0.38708043098449707, "learning_rate": 4.6591493477528585e-06, "loss": 0.013, "step": 140780 }, { "epoch": 1.1391698357472286, "grad_norm": 0.2454669177532196, "learning_rate": 4.658444897269561e-06, "loss": 0.023, "step": 140790 }, { "epoch": 1.1392507484424306, "grad_norm": 0.4664502441883087, "learning_rate": 4.6577404535978625e-06, "loss": 0.0234, "step": 140800 }, { "epoch": 1.1393316611376325, "grad_norm": 0.45479878783226013, "learning_rate": 4.657036016751807e-06, "loss": 0.0266, "step": 140810 }, { "epoch": 1.1394125738328345, "grad_norm": 0.24664369225502014, "learning_rate": 4.656331586745441e-06, "loss": 0.0366, "step": 140820 }, { "epoch": 1.1394934865280362, "grad_norm": 0.32053592801094055, "learning_rate": 4.65562716359282e-06, "loss": 0.0181, "step": 140830 }, { "epoch": 1.139574399223238, "grad_norm": 0.48573559522628784, "learning_rate": 4.654922747307985e-06, "loss": 0.0317, "step": 140840 }, { "epoch": 1.13965531191844, "grad_norm": 0.35501962900161743, "learning_rate": 4.65421833790499e-06, "loss": 0.0279, "step": 140850 }, { "epoch": 1.139736224613642, "grad_norm": 0.3626803755760193, "learning_rate": 4.65351393539788e-06, "loss": 0.0311, "step": 140860 }, { "epoch": 1.1398171373088437, "grad_norm": 0.5676857829093933, "learning_rate": 4.652809539800701e-06, "loss": 0.0186, "step": 140870 }, { "epoch": 1.1398980500040456, "grad_norm": 0.47500988841056824, "learning_rate": 4.652105151127504e-06, "loss": 0.0222, "step": 140880 }, { "epoch": 1.1399789626992476, "grad_norm": 1.2184970378875732, "learning_rate": 4.651400769392337e-06, "loss": 0.0264, "step": 140890 }, { "epoch": 1.1400598753944493, "grad_norm": 0.2204662263393402, "learning_rate": 4.6506963946092425e-06, "loss": 0.0224, "step": 140900 }, { "epoch": 1.1401407880896512, "grad_norm": 0.391938716173172, "learning_rate": 4.6499920267922715e-06, "loss": 0.0314, "step": 140910 }, { "epoch": 1.1402217007848532, "grad_norm": 0.38635388016700745, "learning_rate": 4.649287665955473e-06, "loss": 0.0151, "step": 140920 }, { "epoch": 1.140302613480055, "grad_norm": 0.7444953918457031, "learning_rate": 4.64858331211289e-06, "loss": 0.0267, "step": 140930 }, { "epoch": 1.1403835261752568, "grad_norm": 0.7585209608078003, "learning_rate": 4.64787896527857e-06, "loss": 0.0345, "step": 140940 }, { "epoch": 1.1404644388704588, "grad_norm": 0.5593176484107971, "learning_rate": 4.647174625466563e-06, "loss": 0.0279, "step": 140950 }, { "epoch": 1.1405453515656607, "grad_norm": 0.8029960989952087, "learning_rate": 4.6464702926909135e-06, "loss": 0.0191, "step": 140960 }, { "epoch": 1.1406262642608624, "grad_norm": 0.5200011730194092, "learning_rate": 4.645765966965665e-06, "loss": 0.0117, "step": 140970 }, { "epoch": 1.1407071769560644, "grad_norm": 0.16321426630020142, "learning_rate": 4.645061648304866e-06, "loss": 0.0173, "step": 140980 }, { "epoch": 1.1407880896512663, "grad_norm": 0.48359420895576477, "learning_rate": 4.644357336722567e-06, "loss": 0.0201, "step": 140990 }, { "epoch": 1.1408690023464683, "grad_norm": 0.1772257536649704, "learning_rate": 4.643653032232809e-06, "loss": 0.0144, "step": 141000 }, { "epoch": 1.14094991504167, "grad_norm": 0.27224892377853394, "learning_rate": 4.642948734849636e-06, "loss": 0.0159, "step": 141010 }, { "epoch": 1.141030827736872, "grad_norm": 0.3554241359233856, "learning_rate": 4.6422444445871e-06, "loss": 0.0169, "step": 141020 }, { "epoch": 1.1411117404320739, "grad_norm": 0.3603227734565735, "learning_rate": 4.641540161459242e-06, "loss": 0.0155, "step": 141030 }, { "epoch": 1.1411926531272756, "grad_norm": 0.29783543944358826, "learning_rate": 4.640835885480109e-06, "loss": 0.0187, "step": 141040 }, { "epoch": 1.1412735658224775, "grad_norm": 0.05427664890885353, "learning_rate": 4.640131616663746e-06, "loss": 0.0105, "step": 141050 }, { "epoch": 1.1413544785176795, "grad_norm": 0.3589193820953369, "learning_rate": 4.6394273550241966e-06, "loss": 0.0156, "step": 141060 }, { "epoch": 1.1414353912128812, "grad_norm": 0.5301965475082397, "learning_rate": 4.6387231005755094e-06, "loss": 0.013, "step": 141070 }, { "epoch": 1.1415163039080831, "grad_norm": 0.22704999148845673, "learning_rate": 4.638018853331728e-06, "loss": 0.0235, "step": 141080 }, { "epoch": 1.141597216603285, "grad_norm": 0.3145395815372467, "learning_rate": 4.6373146133068945e-06, "loss": 0.0113, "step": 141090 }, { "epoch": 1.141678129298487, "grad_norm": 0.5835087299346924, "learning_rate": 4.636610380515055e-06, "loss": 0.0168, "step": 141100 }, { "epoch": 1.1417590419936887, "grad_norm": 0.22645826637744904, "learning_rate": 4.635906154970257e-06, "loss": 0.021, "step": 141110 }, { "epoch": 1.1418399546888907, "grad_norm": 0.6988983154296875, "learning_rate": 4.635201936686539e-06, "loss": 0.0274, "step": 141120 }, { "epoch": 1.1419208673840926, "grad_norm": 0.5201784372329712, "learning_rate": 4.634497725677949e-06, "loss": 0.0182, "step": 141130 }, { "epoch": 1.1420017800792945, "grad_norm": 0.5283613204956055, "learning_rate": 4.633793521958531e-06, "loss": 0.0275, "step": 141140 }, { "epoch": 1.1420826927744963, "grad_norm": 0.2617143988609314, "learning_rate": 4.63308932554233e-06, "loss": 0.038, "step": 141150 }, { "epoch": 1.1421636054696982, "grad_norm": 0.218925341963768, "learning_rate": 4.632385136443384e-06, "loss": 0.0139, "step": 141160 }, { "epoch": 1.1422445181649001, "grad_norm": 0.6328645348548889, "learning_rate": 4.631680954675743e-06, "loss": 0.0122, "step": 141170 }, { "epoch": 1.1423254308601019, "grad_norm": 0.42639124393463135, "learning_rate": 4.630976780253448e-06, "loss": 0.0156, "step": 141180 }, { "epoch": 1.1424063435553038, "grad_norm": 0.4037379026412964, "learning_rate": 4.630272613190539e-06, "loss": 0.0309, "step": 141190 }, { "epoch": 1.1424872562505057, "grad_norm": 0.31629860401153564, "learning_rate": 4.6295684535010635e-06, "loss": 0.0225, "step": 141200 }, { "epoch": 1.1425681689457077, "grad_norm": 0.2220735102891922, "learning_rate": 4.628864301199065e-06, "loss": 0.0288, "step": 141210 }, { "epoch": 1.1426490816409094, "grad_norm": 0.04990285262465477, "learning_rate": 4.628160156298584e-06, "loss": 0.0337, "step": 141220 }, { "epoch": 1.1427299943361113, "grad_norm": 0.38695964217185974, "learning_rate": 4.627456018813662e-06, "loss": 0.0185, "step": 141230 }, { "epoch": 1.1428109070313133, "grad_norm": 0.39937543869018555, "learning_rate": 4.6267518887583455e-06, "loss": 0.0219, "step": 141240 }, { "epoch": 1.1428918197265152, "grad_norm": 0.36067822575569153, "learning_rate": 4.626047766146674e-06, "loss": 0.0201, "step": 141250 }, { "epoch": 1.142972732421717, "grad_norm": 0.2291010320186615, "learning_rate": 4.625343650992689e-06, "loss": 0.0268, "step": 141260 }, { "epoch": 1.1430536451169189, "grad_norm": 0.4197462797164917, "learning_rate": 4.624639543310436e-06, "loss": 0.0227, "step": 141270 }, { "epoch": 1.1431345578121208, "grad_norm": 0.28436192870140076, "learning_rate": 4.623935443113952e-06, "loss": 0.0249, "step": 141280 }, { "epoch": 1.1432154705073225, "grad_norm": 0.2820405960083008, "learning_rate": 4.623231350417284e-06, "loss": 0.0236, "step": 141290 }, { "epoch": 1.1432963832025245, "grad_norm": 0.381253182888031, "learning_rate": 4.622527265234472e-06, "loss": 0.0136, "step": 141300 }, { "epoch": 1.1433772958977264, "grad_norm": 0.4866344928741455, "learning_rate": 4.621823187579554e-06, "loss": 0.0175, "step": 141310 }, { "epoch": 1.1434582085929281, "grad_norm": 0.18587462604045868, "learning_rate": 4.621119117466575e-06, "loss": 0.0285, "step": 141320 }, { "epoch": 1.14353912128813, "grad_norm": 0.29333436489105225, "learning_rate": 4.620415054909576e-06, "loss": 0.0196, "step": 141330 }, { "epoch": 1.143620033983332, "grad_norm": 0.5540621876716614, "learning_rate": 4.619710999922598e-06, "loss": 0.0208, "step": 141340 }, { "epoch": 1.143700946678534, "grad_norm": 0.13098782300949097, "learning_rate": 4.61900695251968e-06, "loss": 0.0219, "step": 141350 }, { "epoch": 1.1437818593737357, "grad_norm": 0.36030715703964233, "learning_rate": 4.618302912714865e-06, "loss": 0.0197, "step": 141360 }, { "epoch": 1.1438627720689376, "grad_norm": 0.18427658081054688, "learning_rate": 4.617598880522194e-06, "loss": 0.0162, "step": 141370 }, { "epoch": 1.1439436847641395, "grad_norm": 0.24552227556705475, "learning_rate": 4.616894855955703e-06, "loss": 0.0352, "step": 141380 }, { "epoch": 1.1440245974593415, "grad_norm": 0.709578275680542, "learning_rate": 4.6161908390294355e-06, "loss": 0.0233, "step": 141390 }, { "epoch": 1.1441055101545432, "grad_norm": 0.21656478941440582, "learning_rate": 4.615486829757434e-06, "loss": 0.017, "step": 141400 }, { "epoch": 1.1441864228497451, "grad_norm": 0.4819853603839874, "learning_rate": 4.614782828153734e-06, "loss": 0.0248, "step": 141410 }, { "epoch": 1.144267335544947, "grad_norm": 0.5449548363685608, "learning_rate": 4.614078834232377e-06, "loss": 0.0179, "step": 141420 }, { "epoch": 1.1443482482401488, "grad_norm": 0.546317458152771, "learning_rate": 4.613374848007406e-06, "loss": 0.0321, "step": 141430 }, { "epoch": 1.1444291609353507, "grad_norm": 0.2992466986179352, "learning_rate": 4.612670869492855e-06, "loss": 0.0126, "step": 141440 }, { "epoch": 1.1445100736305527, "grad_norm": 0.4507339596748352, "learning_rate": 4.611966898702765e-06, "loss": 0.0242, "step": 141450 }, { "epoch": 1.1445909863257544, "grad_norm": 0.32716038823127747, "learning_rate": 4.611262935651178e-06, "loss": 0.0102, "step": 141460 }, { "epoch": 1.1446718990209563, "grad_norm": 0.29506292939186096, "learning_rate": 4.61055898035213e-06, "loss": 0.0172, "step": 141470 }, { "epoch": 1.1447528117161583, "grad_norm": 0.5087459087371826, "learning_rate": 4.60985503281966e-06, "loss": 0.0197, "step": 141480 }, { "epoch": 1.1448337244113602, "grad_norm": 0.6313491463661194, "learning_rate": 4.609151093067811e-06, "loss": 0.0213, "step": 141490 }, { "epoch": 1.144914637106562, "grad_norm": 0.28319233655929565, "learning_rate": 4.608447161110615e-06, "loss": 0.0116, "step": 141500 }, { "epoch": 1.1449955498017639, "grad_norm": 0.007609292399138212, "learning_rate": 4.607743236962115e-06, "loss": 0.0134, "step": 141510 }, { "epoch": 1.1450764624969658, "grad_norm": 0.4159370958805084, "learning_rate": 4.607039320636347e-06, "loss": 0.0199, "step": 141520 }, { "epoch": 1.1451573751921678, "grad_norm": 0.46221691370010376, "learning_rate": 4.606335412147353e-06, "loss": 0.0215, "step": 141530 }, { "epoch": 1.1452382878873695, "grad_norm": 0.40418142080307007, "learning_rate": 4.605631511509165e-06, "loss": 0.0187, "step": 141540 }, { "epoch": 1.1453192005825714, "grad_norm": 0.19988439977169037, "learning_rate": 4.604927618735825e-06, "loss": 0.0159, "step": 141550 }, { "epoch": 1.1454001132777734, "grad_norm": 0.28602689504623413, "learning_rate": 4.604223733841372e-06, "loss": 0.0129, "step": 141560 }, { "epoch": 1.145481025972975, "grad_norm": 0.5444626212120056, "learning_rate": 4.603519856839837e-06, "loss": 0.024, "step": 141570 }, { "epoch": 1.145561938668177, "grad_norm": 0.5208100080490112, "learning_rate": 4.6028159877452635e-06, "loss": 0.0202, "step": 141580 }, { "epoch": 1.145642851363379, "grad_norm": 0.6596076488494873, "learning_rate": 4.6021121265716885e-06, "loss": 0.0149, "step": 141590 }, { "epoch": 1.1457237640585807, "grad_norm": 0.6489405035972595, "learning_rate": 4.6014082733331435e-06, "loss": 0.026, "step": 141600 }, { "epoch": 1.1458046767537826, "grad_norm": 0.38518065214157104, "learning_rate": 4.60070442804367e-06, "loss": 0.0188, "step": 141610 }, { "epoch": 1.1458855894489846, "grad_norm": 0.17661991715431213, "learning_rate": 4.600000590717306e-06, "loss": 0.0149, "step": 141620 }, { "epoch": 1.1459665021441865, "grad_norm": 0.430718332529068, "learning_rate": 4.599296761368085e-06, "loss": 0.0122, "step": 141630 }, { "epoch": 1.1460474148393882, "grad_norm": 0.6074655055999756, "learning_rate": 4.598592940010041e-06, "loss": 0.023, "step": 141640 }, { "epoch": 1.1461283275345902, "grad_norm": 0.9190225005149841, "learning_rate": 4.597889126657218e-06, "loss": 0.0353, "step": 141650 }, { "epoch": 1.146209240229792, "grad_norm": 0.6710594296455383, "learning_rate": 4.597185321323645e-06, "loss": 0.0236, "step": 141660 }, { "epoch": 1.146290152924994, "grad_norm": 0.28263869881629944, "learning_rate": 4.59648152402336e-06, "loss": 0.0103, "step": 141670 }, { "epoch": 1.1463710656201958, "grad_norm": 0.33055379986763, "learning_rate": 4.595777734770399e-06, "loss": 0.0297, "step": 141680 }, { "epoch": 1.1464519783153977, "grad_norm": 0.3341127336025238, "learning_rate": 4.5950739535788e-06, "loss": 0.0185, "step": 141690 }, { "epoch": 1.1465328910105996, "grad_norm": 0.22871336340904236, "learning_rate": 4.594370180462595e-06, "loss": 0.0328, "step": 141700 }, { "epoch": 1.1466138037058013, "grad_norm": 0.49116045236587524, "learning_rate": 4.593666415435819e-06, "loss": 0.0193, "step": 141710 }, { "epoch": 1.1466947164010033, "grad_norm": 0.1457352340221405, "learning_rate": 4.592962658512511e-06, "loss": 0.0191, "step": 141720 }, { "epoch": 1.1467756290962052, "grad_norm": 0.003476689336821437, "learning_rate": 4.5922589097067024e-06, "loss": 0.0294, "step": 141730 }, { "epoch": 1.146856541791407, "grad_norm": 0.4771413207054138, "learning_rate": 4.5915551690324275e-06, "loss": 0.0342, "step": 141740 }, { "epoch": 1.1469374544866089, "grad_norm": 0.14553047716617584, "learning_rate": 4.5908514365037245e-06, "loss": 0.0286, "step": 141750 }, { "epoch": 1.1470183671818108, "grad_norm": 0.48701199889183044, "learning_rate": 4.590147712134625e-06, "loss": 0.0302, "step": 141760 }, { "epoch": 1.1470992798770128, "grad_norm": 0.3897922933101654, "learning_rate": 4.589443995939165e-06, "loss": 0.0141, "step": 141770 }, { "epoch": 1.1471801925722145, "grad_norm": 0.38881945610046387, "learning_rate": 4.588740287931378e-06, "loss": 0.0194, "step": 141780 }, { "epoch": 1.1472611052674164, "grad_norm": 0.19483807682991028, "learning_rate": 4.588036588125295e-06, "loss": 0.02, "step": 141790 }, { "epoch": 1.1473420179626184, "grad_norm": 0.7437313795089722, "learning_rate": 4.587332896534955e-06, "loss": 0.022, "step": 141800 }, { "epoch": 1.1474229306578203, "grad_norm": 0.30835506319999695, "learning_rate": 4.586629213174389e-06, "loss": 0.017, "step": 141810 }, { "epoch": 1.147503843353022, "grad_norm": 0.1978970766067505, "learning_rate": 4.58592553805763e-06, "loss": 0.019, "step": 141820 }, { "epoch": 1.147584756048224, "grad_norm": 0.40772297978401184, "learning_rate": 4.585221871198711e-06, "loss": 0.0197, "step": 141830 }, { "epoch": 1.147665668743426, "grad_norm": 0.3028278052806854, "learning_rate": 4.584518212611669e-06, "loss": 0.0261, "step": 141840 }, { "epoch": 1.1477465814386276, "grad_norm": 0.22474607825279236, "learning_rate": 4.583814562310533e-06, "loss": 0.0098, "step": 141850 }, { "epoch": 1.1478274941338296, "grad_norm": 0.5119064450263977, "learning_rate": 4.583110920309336e-06, "loss": 0.0212, "step": 141860 }, { "epoch": 1.1479084068290315, "grad_norm": 0.4479379951953888, "learning_rate": 4.58240728662211e-06, "loss": 0.0179, "step": 141870 }, { "epoch": 1.1479893195242334, "grad_norm": 0.5006698966026306, "learning_rate": 4.581703661262893e-06, "loss": 0.015, "step": 141880 }, { "epoch": 1.1480702322194352, "grad_norm": 0.5620636940002441, "learning_rate": 4.581000044245711e-06, "loss": 0.018, "step": 141890 }, { "epoch": 1.148151144914637, "grad_norm": 0.5437542796134949, "learning_rate": 4.580296435584599e-06, "loss": 0.0166, "step": 141900 }, { "epoch": 1.148232057609839, "grad_norm": 0.34297823905944824, "learning_rate": 4.57959283529359e-06, "loss": 0.0166, "step": 141910 }, { "epoch": 1.148312970305041, "grad_norm": 0.37572401762008667, "learning_rate": 4.578889243386713e-06, "loss": 0.0268, "step": 141920 }, { "epoch": 1.1483938830002427, "grad_norm": 0.4592345356941223, "learning_rate": 4.578185659878001e-06, "loss": 0.0216, "step": 141930 }, { "epoch": 1.1484747956954446, "grad_norm": 0.7146378755569458, "learning_rate": 4.577482084781486e-06, "loss": 0.0306, "step": 141940 }, { "epoch": 1.1485557083906466, "grad_norm": 0.3151109516620636, "learning_rate": 4.576778518111198e-06, "loss": 0.021, "step": 141950 }, { "epoch": 1.1486366210858483, "grad_norm": 0.6366711258888245, "learning_rate": 4.57607495988117e-06, "loss": 0.0242, "step": 141960 }, { "epoch": 1.1487175337810502, "grad_norm": 0.09849072247743607, "learning_rate": 4.575371410105432e-06, "loss": 0.0163, "step": 141970 }, { "epoch": 1.1487984464762522, "grad_norm": 0.5355592966079712, "learning_rate": 4.574667868798014e-06, "loss": 0.0226, "step": 141980 }, { "epoch": 1.148879359171454, "grad_norm": 0.18168842792510986, "learning_rate": 4.5739643359729474e-06, "loss": 0.0238, "step": 141990 }, { "epoch": 1.1489602718666558, "grad_norm": 0.27915695309638977, "learning_rate": 4.5732608116442644e-06, "loss": 0.0432, "step": 142000 }, { "epoch": 1.1490411845618578, "grad_norm": 0.6547215580940247, "learning_rate": 4.572557295825991e-06, "loss": 0.0189, "step": 142010 }, { "epoch": 1.1491220972570597, "grad_norm": 0.4479122757911682, "learning_rate": 4.571853788532161e-06, "loss": 0.0305, "step": 142020 }, { "epoch": 1.1492030099522614, "grad_norm": 0.5327391624450684, "learning_rate": 4.5711502897768045e-06, "loss": 0.0154, "step": 142030 }, { "epoch": 1.1492839226474634, "grad_norm": 0.3414992392063141, "learning_rate": 4.570446799573949e-06, "loss": 0.0174, "step": 142040 }, { "epoch": 1.1493648353426653, "grad_norm": 0.5585977435112, "learning_rate": 4.569743317937625e-06, "loss": 0.0093, "step": 142050 }, { "epoch": 1.1494457480378673, "grad_norm": 0.6912830471992493, "learning_rate": 4.569039844881862e-06, "loss": 0.0227, "step": 142060 }, { "epoch": 1.149526660733069, "grad_norm": 0.44249945878982544, "learning_rate": 4.568336380420692e-06, "loss": 0.012, "step": 142070 }, { "epoch": 1.149607573428271, "grad_norm": 0.6929486989974976, "learning_rate": 4.5676329245681386e-06, "loss": 0.0301, "step": 142080 }, { "epoch": 1.1496884861234729, "grad_norm": 0.20133715867996216, "learning_rate": 4.566929477338234e-06, "loss": 0.0157, "step": 142090 }, { "epoch": 1.1497693988186746, "grad_norm": 0.2749386727809906, "learning_rate": 4.566226038745009e-06, "loss": 0.0172, "step": 142100 }, { "epoch": 1.1498503115138765, "grad_norm": 0.4373643398284912, "learning_rate": 4.565522608802489e-06, "loss": 0.0235, "step": 142110 }, { "epoch": 1.1499312242090785, "grad_norm": 0.3342132568359375, "learning_rate": 4.5648191875247015e-06, "loss": 0.0234, "step": 142120 }, { "epoch": 1.1500121369042802, "grad_norm": 0.18802182376384735, "learning_rate": 4.564115774925679e-06, "loss": 0.0153, "step": 142130 }, { "epoch": 1.150093049599482, "grad_norm": 0.5949185490608215, "learning_rate": 4.5634123710194465e-06, "loss": 0.0185, "step": 142140 }, { "epoch": 1.150173962294684, "grad_norm": 0.453843891620636, "learning_rate": 4.562708975820032e-06, "loss": 0.033, "step": 142150 }, { "epoch": 1.150254874989886, "grad_norm": 0.25640612840652466, "learning_rate": 4.562005589341465e-06, "loss": 0.0149, "step": 142160 }, { "epoch": 1.1503357876850877, "grad_norm": 0.32018548250198364, "learning_rate": 4.561302211597769e-06, "loss": 0.0185, "step": 142170 }, { "epoch": 1.1504167003802896, "grad_norm": 0.3219504952430725, "learning_rate": 4.560598842602977e-06, "loss": 0.0298, "step": 142180 }, { "epoch": 1.1504976130754916, "grad_norm": 0.10376822203397751, "learning_rate": 4.559895482371114e-06, "loss": 0.0181, "step": 142190 }, { "epoch": 1.1505785257706935, "grad_norm": 0.47533559799194336, "learning_rate": 4.559192130916204e-06, "loss": 0.0295, "step": 142200 }, { "epoch": 1.1506594384658952, "grad_norm": 0.6571474075317383, "learning_rate": 4.558488788252278e-06, "loss": 0.0244, "step": 142210 }, { "epoch": 1.1507403511610972, "grad_norm": 0.30829641222953796, "learning_rate": 4.557785454393359e-06, "loss": 0.0224, "step": 142220 }, { "epoch": 1.1508212638562991, "grad_norm": 0.1359967142343521, "learning_rate": 4.5570821293534785e-06, "loss": 0.0247, "step": 142230 }, { "epoch": 1.1509021765515008, "grad_norm": 0.21428847312927246, "learning_rate": 4.556378813146658e-06, "loss": 0.0177, "step": 142240 }, { "epoch": 1.1509830892467028, "grad_norm": 0.4945829212665558, "learning_rate": 4.5556755057869265e-06, "loss": 0.0167, "step": 142250 }, { "epoch": 1.1510640019419047, "grad_norm": 0.3108648955821991, "learning_rate": 4.55497220728831e-06, "loss": 0.0202, "step": 142260 }, { "epoch": 1.1511449146371064, "grad_norm": 0.8189323544502258, "learning_rate": 4.554268917664831e-06, "loss": 0.0262, "step": 142270 }, { "epoch": 1.1512258273323084, "grad_norm": 1.038671612739563, "learning_rate": 4.553565636930518e-06, "loss": 0.0251, "step": 142280 }, { "epoch": 1.1513067400275103, "grad_norm": 0.3681236207485199, "learning_rate": 4.5528623650993974e-06, "loss": 0.0253, "step": 142290 }, { "epoch": 1.1513876527227123, "grad_norm": 0.5175453424453735, "learning_rate": 4.552159102185491e-06, "loss": 0.019, "step": 142300 }, { "epoch": 1.151468565417914, "grad_norm": 0.43393123149871826, "learning_rate": 4.551455848202826e-06, "loss": 0.0234, "step": 142310 }, { "epoch": 1.151549478113116, "grad_norm": 0.3276901841163635, "learning_rate": 4.55075260316543e-06, "loss": 0.0242, "step": 142320 }, { "epoch": 1.1516303908083179, "grad_norm": 0.2144765555858612, "learning_rate": 4.550049367087322e-06, "loss": 0.0248, "step": 142330 }, { "epoch": 1.1517113035035198, "grad_norm": 0.42927277088165283, "learning_rate": 4.54934613998253e-06, "loss": 0.0295, "step": 142340 }, { "epoch": 1.1517922161987215, "grad_norm": 0.008416619151830673, "learning_rate": 4.548642921865079e-06, "loss": 0.0123, "step": 142350 }, { "epoch": 1.1518731288939235, "grad_norm": 0.3886892795562744, "learning_rate": 4.547939712748991e-06, "loss": 0.0206, "step": 142360 }, { "epoch": 1.1519540415891254, "grad_norm": 0.5060633420944214, "learning_rate": 4.547236512648291e-06, "loss": 0.021, "step": 142370 }, { "epoch": 1.1520349542843271, "grad_norm": 0.7101492285728455, "learning_rate": 4.546533321577004e-06, "loss": 0.0346, "step": 142380 }, { "epoch": 1.152115866979529, "grad_norm": 0.3107530176639557, "learning_rate": 4.545830139549151e-06, "loss": 0.0233, "step": 142390 }, { "epoch": 1.152196779674731, "grad_norm": 0.3384953737258911, "learning_rate": 4.545126966578759e-06, "loss": 0.0297, "step": 142400 }, { "epoch": 1.152277692369933, "grad_norm": 0.3222072124481201, "learning_rate": 4.544423802679847e-06, "loss": 0.0153, "step": 142410 }, { "epoch": 1.1523586050651347, "grad_norm": 0.2561980187892914, "learning_rate": 4.543720647866443e-06, "loss": 0.0188, "step": 142420 }, { "epoch": 1.1524395177603366, "grad_norm": 0.25407347083091736, "learning_rate": 4.543017502152566e-06, "loss": 0.0213, "step": 142430 }, { "epoch": 1.1525204304555385, "grad_norm": 0.45140576362609863, "learning_rate": 4.542314365552239e-06, "loss": 0.0169, "step": 142440 }, { "epoch": 1.1526013431507403, "grad_norm": 0.27830982208251953, "learning_rate": 4.541611238079489e-06, "loss": 0.0266, "step": 142450 }, { "epoch": 1.1526822558459422, "grad_norm": 0.3475201725959778, "learning_rate": 4.540908119748332e-06, "loss": 0.0255, "step": 142460 }, { "epoch": 1.1527631685411441, "grad_norm": 0.7071613669395447, "learning_rate": 4.540205010572795e-06, "loss": 0.0381, "step": 142470 }, { "epoch": 1.152844081236346, "grad_norm": 0.18197010457515717, "learning_rate": 4.5395019105668995e-06, "loss": 0.0183, "step": 142480 }, { "epoch": 1.1529249939315478, "grad_norm": 0.19182930886745453, "learning_rate": 4.5387988197446645e-06, "loss": 0.0285, "step": 142490 }, { "epoch": 1.1530059066267497, "grad_norm": 0.6618106961250305, "learning_rate": 4.538095738120112e-06, "loss": 0.0192, "step": 142500 }, { "epoch": 1.1530868193219517, "grad_norm": 0.250730037689209, "learning_rate": 4.537392665707269e-06, "loss": 0.0251, "step": 142510 }, { "epoch": 1.1531677320171534, "grad_norm": 0.2089024782180786, "learning_rate": 4.5366896025201505e-06, "loss": 0.0195, "step": 142520 }, { "epoch": 1.1532486447123553, "grad_norm": 0.37150105834007263, "learning_rate": 4.535986548572777e-06, "loss": 0.0119, "step": 142530 }, { "epoch": 1.1533295574075573, "grad_norm": 0.3211984932422638, "learning_rate": 4.535283503879177e-06, "loss": 0.0164, "step": 142540 }, { "epoch": 1.1534104701027592, "grad_norm": 0.2682522237300873, "learning_rate": 4.534580468453365e-06, "loss": 0.0179, "step": 142550 }, { "epoch": 1.153491382797961, "grad_norm": 0.4260481894016266, "learning_rate": 4.533877442309361e-06, "loss": 0.0266, "step": 142560 }, { "epoch": 1.1535722954931629, "grad_norm": 0.23018601536750793, "learning_rate": 4.53317442546119e-06, "loss": 0.0161, "step": 142570 }, { "epoch": 1.1536532081883648, "grad_norm": 0.5305248498916626, "learning_rate": 4.532471417922867e-06, "loss": 0.0249, "step": 142580 }, { "epoch": 1.1537341208835667, "grad_norm": 0.14344492554664612, "learning_rate": 4.531768419708416e-06, "loss": 0.0102, "step": 142590 }, { "epoch": 1.1538150335787685, "grad_norm": 0.18238824605941772, "learning_rate": 4.531065430831854e-06, "loss": 0.0287, "step": 142600 }, { "epoch": 1.1538959462739704, "grad_norm": 0.4353940784931183, "learning_rate": 4.530362451307205e-06, "loss": 0.0215, "step": 142610 }, { "epoch": 1.1539768589691723, "grad_norm": 0.5165084004402161, "learning_rate": 4.529659481148483e-06, "loss": 0.0264, "step": 142620 }, { "epoch": 1.154057771664374, "grad_norm": 0.5056084990501404, "learning_rate": 4.52895652036971e-06, "loss": 0.0185, "step": 142630 }, { "epoch": 1.154138684359576, "grad_norm": 0.18154528737068176, "learning_rate": 4.528253568984906e-06, "loss": 0.0252, "step": 142640 }, { "epoch": 1.154219597054778, "grad_norm": 0.49839991331100464, "learning_rate": 4.527550627008087e-06, "loss": 0.0221, "step": 142650 }, { "epoch": 1.1543005097499797, "grad_norm": 0.7922970056533813, "learning_rate": 4.5268476944532744e-06, "loss": 0.0299, "step": 142660 }, { "epoch": 1.1543814224451816, "grad_norm": 0.29378652572631836, "learning_rate": 4.526144771334487e-06, "loss": 0.0172, "step": 142670 }, { "epoch": 1.1544623351403835, "grad_norm": 0.3352317810058594, "learning_rate": 4.525441857665739e-06, "loss": 0.02, "step": 142680 }, { "epoch": 1.1545432478355855, "grad_norm": 0.42358461022377014, "learning_rate": 4.524738953461053e-06, "loss": 0.0163, "step": 142690 }, { "epoch": 1.1546241605307872, "grad_norm": 0.15238143503665924, "learning_rate": 4.524036058734446e-06, "loss": 0.0187, "step": 142700 }, { "epoch": 1.1547050732259891, "grad_norm": 0.3855268955230713, "learning_rate": 4.523333173499933e-06, "loss": 0.0202, "step": 142710 }, { "epoch": 1.154785985921191, "grad_norm": 0.3277043402194977, "learning_rate": 4.522630297771533e-06, "loss": 0.0232, "step": 142720 }, { "epoch": 1.154866898616393, "grad_norm": 0.1385010927915573, "learning_rate": 4.521927431563267e-06, "loss": 0.0159, "step": 142730 }, { "epoch": 1.1549478113115947, "grad_norm": 0.2596992552280426, "learning_rate": 4.521224574889148e-06, "loss": 0.0161, "step": 142740 }, { "epoch": 1.1550287240067967, "grad_norm": 0.10718319565057755, "learning_rate": 4.520521727763192e-06, "loss": 0.0201, "step": 142750 }, { "epoch": 1.1551096367019986, "grad_norm": 0.2186097800731659, "learning_rate": 4.5198188901994195e-06, "loss": 0.0165, "step": 142760 }, { "epoch": 1.1551905493972003, "grad_norm": 0.42866525053977966, "learning_rate": 4.519116062211847e-06, "loss": 0.033, "step": 142770 }, { "epoch": 1.1552714620924023, "grad_norm": 0.7254008650779724, "learning_rate": 4.518413243814487e-06, "loss": 0.0283, "step": 142780 }, { "epoch": 1.1553523747876042, "grad_norm": 0.2127239853143692, "learning_rate": 4.517710435021358e-06, "loss": 0.0221, "step": 142790 }, { "epoch": 1.155433287482806, "grad_norm": 0.20671343803405762, "learning_rate": 4.517007635846479e-06, "loss": 0.0204, "step": 142800 }, { "epoch": 1.1555142001780079, "grad_norm": 0.2768133878707886, "learning_rate": 4.5163048463038615e-06, "loss": 0.0137, "step": 142810 }, { "epoch": 1.1555951128732098, "grad_norm": 0.4770773649215698, "learning_rate": 4.515602066407521e-06, "loss": 0.0248, "step": 142820 }, { "epoch": 1.1556760255684118, "grad_norm": 0.4329066574573517, "learning_rate": 4.514899296171477e-06, "loss": 0.0253, "step": 142830 }, { "epoch": 1.1557569382636135, "grad_norm": 0.6243679523468018, "learning_rate": 4.514196535609743e-06, "loss": 0.0224, "step": 142840 }, { "epoch": 1.1558378509588154, "grad_norm": 0.3529009521007538, "learning_rate": 4.513493784736331e-06, "loss": 0.0171, "step": 142850 }, { "epoch": 1.1559187636540174, "grad_norm": 0.8132719397544861, "learning_rate": 4.512791043565261e-06, "loss": 0.0422, "step": 142860 }, { "epoch": 1.1559996763492193, "grad_norm": 0.3399217128753662, "learning_rate": 4.512088312110543e-06, "loss": 0.0251, "step": 142870 }, { "epoch": 1.156080589044421, "grad_norm": 0.20109473168849945, "learning_rate": 4.511385590386194e-06, "loss": 0.0121, "step": 142880 }, { "epoch": 1.156161501739623, "grad_norm": 0.4953770339488983, "learning_rate": 4.510682878406229e-06, "loss": 0.0202, "step": 142890 }, { "epoch": 1.156242414434825, "grad_norm": 0.0035887944977730513, "learning_rate": 4.50998017618466e-06, "loss": 0.0178, "step": 142900 }, { "epoch": 1.1563233271300266, "grad_norm": 0.5460926294326782, "learning_rate": 4.5092774837355015e-06, "loss": 0.0151, "step": 142910 }, { "epoch": 1.1564042398252286, "grad_norm": 0.8279779553413391, "learning_rate": 4.508574801072769e-06, "loss": 0.0197, "step": 142920 }, { "epoch": 1.1564851525204305, "grad_norm": 0.44982120394706726, "learning_rate": 4.507872128210474e-06, "loss": 0.021, "step": 142930 }, { "epoch": 1.1565660652156322, "grad_norm": 0.3591266870498657, "learning_rate": 4.50716946516263e-06, "loss": 0.0219, "step": 142940 }, { "epoch": 1.1566469779108342, "grad_norm": 0.4017445743083954, "learning_rate": 4.506466811943251e-06, "loss": 0.0203, "step": 142950 }, { "epoch": 1.156727890606036, "grad_norm": 0.2471233457326889, "learning_rate": 4.5057641685663505e-06, "loss": 0.0243, "step": 142960 }, { "epoch": 1.156808803301238, "grad_norm": 0.33894234895706177, "learning_rate": 4.505061535045938e-06, "loss": 0.022, "step": 142970 }, { "epoch": 1.1568897159964397, "grad_norm": 0.2009386420249939, "learning_rate": 4.504358911396029e-06, "loss": 0.022, "step": 142980 }, { "epoch": 1.1569706286916417, "grad_norm": 0.2921326458454132, "learning_rate": 4.503656297630637e-06, "loss": 0.0272, "step": 142990 }, { "epoch": 1.1570515413868436, "grad_norm": 0.49101027846336365, "learning_rate": 4.502953693763772e-06, "loss": 0.0156, "step": 143000 }, { "epoch": 1.1571324540820456, "grad_norm": 0.15787257254123688, "learning_rate": 4.502251099809444e-06, "loss": 0.0221, "step": 143010 }, { "epoch": 1.1572133667772473, "grad_norm": 0.2553579807281494, "learning_rate": 4.50154851578167e-06, "loss": 0.0272, "step": 143020 }, { "epoch": 1.1572942794724492, "grad_norm": 0.3204219341278076, "learning_rate": 4.500845941694457e-06, "loss": 0.0246, "step": 143030 }, { "epoch": 1.1573751921676512, "grad_norm": 0.4689527451992035, "learning_rate": 4.500143377561816e-06, "loss": 0.017, "step": 143040 }, { "epoch": 1.1574561048628529, "grad_norm": 0.20429545640945435, "learning_rate": 4.499440823397764e-06, "loss": 0.0112, "step": 143050 }, { "epoch": 1.1575370175580548, "grad_norm": 0.3951420485973358, "learning_rate": 4.4987382792163045e-06, "loss": 0.0238, "step": 143060 }, { "epoch": 1.1576179302532568, "grad_norm": 0.41326838731765747, "learning_rate": 4.498035745031454e-06, "loss": 0.021, "step": 143070 }, { "epoch": 1.1576988429484587, "grad_norm": 0.2658800780773163, "learning_rate": 4.497333220857221e-06, "loss": 0.0288, "step": 143080 }, { "epoch": 1.1577797556436604, "grad_norm": 0.2809365689754486, "learning_rate": 4.496630706707614e-06, "loss": 0.0213, "step": 143090 }, { "epoch": 1.1578606683388624, "grad_norm": 0.409513384103775, "learning_rate": 4.495928202596647e-06, "loss": 0.0158, "step": 143100 }, { "epoch": 1.1579415810340643, "grad_norm": 0.3398725092411041, "learning_rate": 4.495225708538327e-06, "loss": 0.026, "step": 143110 }, { "epoch": 1.1580224937292662, "grad_norm": 0.2872450053691864, "learning_rate": 4.4945232245466635e-06, "loss": 0.0298, "step": 143120 }, { "epoch": 1.158103406424468, "grad_norm": 0.4911848306655884, "learning_rate": 4.493820750635667e-06, "loss": 0.0187, "step": 143130 }, { "epoch": 1.15818431911967, "grad_norm": 0.31361839175224304, "learning_rate": 4.493118286819348e-06, "loss": 0.0211, "step": 143140 }, { "epoch": 1.1582652318148718, "grad_norm": 0.33108171820640564, "learning_rate": 4.4924158331117166e-06, "loss": 0.0224, "step": 143150 }, { "epoch": 1.1583461445100736, "grad_norm": 0.20953764021396637, "learning_rate": 4.4917133895267765e-06, "loss": 0.0193, "step": 143160 }, { "epoch": 1.1584270572052755, "grad_norm": 0.30243849754333496, "learning_rate": 4.491010956078542e-06, "loss": 0.0161, "step": 143170 }, { "epoch": 1.1585079699004774, "grad_norm": 0.6501978635787964, "learning_rate": 4.4903085327810195e-06, "loss": 0.0224, "step": 143180 }, { "epoch": 1.1585888825956792, "grad_norm": 0.45817437767982483, "learning_rate": 4.489606119648216e-06, "loss": 0.0253, "step": 143190 }, { "epoch": 1.158669795290881, "grad_norm": 0.2294049710035324, "learning_rate": 4.488903716694141e-06, "loss": 0.0262, "step": 143200 }, { "epoch": 1.158750707986083, "grad_norm": 0.3596555292606354, "learning_rate": 4.488201323932804e-06, "loss": 0.031, "step": 143210 }, { "epoch": 1.158831620681285, "grad_norm": 0.4169609844684601, "learning_rate": 4.48749894137821e-06, "loss": 0.0133, "step": 143220 }, { "epoch": 1.1589125333764867, "grad_norm": 0.14412125945091248, "learning_rate": 4.4867965690443664e-06, "loss": 0.0168, "step": 143230 }, { "epoch": 1.1589934460716886, "grad_norm": 0.36875978112220764, "learning_rate": 4.486094206945284e-06, "loss": 0.0211, "step": 143240 }, { "epoch": 1.1590743587668906, "grad_norm": 0.21202202141284943, "learning_rate": 4.485391855094968e-06, "loss": 0.0248, "step": 143250 }, { "epoch": 1.1591552714620925, "grad_norm": 0.5513287782669067, "learning_rate": 4.484689513507423e-06, "loss": 0.0144, "step": 143260 }, { "epoch": 1.1592361841572942, "grad_norm": 0.6156781315803528, "learning_rate": 4.48398718219666e-06, "loss": 0.0144, "step": 143270 }, { "epoch": 1.1593170968524962, "grad_norm": 0.4563104808330536, "learning_rate": 4.483284861176681e-06, "loss": 0.02, "step": 143280 }, { "epoch": 1.1593980095476981, "grad_norm": 0.21168912947177887, "learning_rate": 4.482582550461495e-06, "loss": 0.0197, "step": 143290 }, { "epoch": 1.1594789222428998, "grad_norm": 0.2595351040363312, "learning_rate": 4.48188025006511e-06, "loss": 0.0216, "step": 143300 }, { "epoch": 1.1595598349381018, "grad_norm": 0.22663424909114838, "learning_rate": 4.481177960001527e-06, "loss": 0.0183, "step": 143310 }, { "epoch": 1.1596407476333037, "grad_norm": 0.13560180366039276, "learning_rate": 4.480475680284755e-06, "loss": 0.0208, "step": 143320 }, { "epoch": 1.1597216603285054, "grad_norm": 0.29130056500434875, "learning_rate": 4.4797734109287985e-06, "loss": 0.0163, "step": 143330 }, { "epoch": 1.1598025730237074, "grad_norm": 0.3718106150627136, "learning_rate": 4.479071151947665e-06, "loss": 0.0159, "step": 143340 }, { "epoch": 1.1598834857189093, "grad_norm": 0.27702030539512634, "learning_rate": 4.4783689033553555e-06, "loss": 0.0163, "step": 143350 }, { "epoch": 1.1599643984141113, "grad_norm": 0.2243073284626007, "learning_rate": 4.477666665165878e-06, "loss": 0.0206, "step": 143360 }, { "epoch": 1.160045311109313, "grad_norm": 0.2945643663406372, "learning_rate": 4.476964437393237e-06, "loss": 0.0184, "step": 143370 }, { "epoch": 1.160126223804515, "grad_norm": 0.45519939064979553, "learning_rate": 4.476262220051434e-06, "loss": 0.0192, "step": 143380 }, { "epoch": 1.1602071364997169, "grad_norm": 0.5875052809715271, "learning_rate": 4.4755600131544764e-06, "loss": 0.0343, "step": 143390 }, { "epoch": 1.1602880491949188, "grad_norm": 0.6528890132904053, "learning_rate": 4.4748578167163675e-06, "loss": 0.0223, "step": 143400 }, { "epoch": 1.1603689618901205, "grad_norm": 0.4014417231082916, "learning_rate": 4.474155630751111e-06, "loss": 0.0228, "step": 143410 }, { "epoch": 1.1604498745853224, "grad_norm": 0.31524091958999634, "learning_rate": 4.473453455272709e-06, "loss": 0.0201, "step": 143420 }, { "epoch": 1.1605307872805244, "grad_norm": 0.19395224750041962, "learning_rate": 4.472751290295168e-06, "loss": 0.0295, "step": 143430 }, { "epoch": 1.160611699975726, "grad_norm": 0.5312215089797974, "learning_rate": 4.472049135832489e-06, "loss": 0.0229, "step": 143440 }, { "epoch": 1.160692612670928, "grad_norm": 0.6639629602432251, "learning_rate": 4.471346991898674e-06, "loss": 0.0196, "step": 143450 }, { "epoch": 1.16077352536613, "grad_norm": 0.3424975275993347, "learning_rate": 4.470644858507729e-06, "loss": 0.0195, "step": 143460 }, { "epoch": 1.1608544380613317, "grad_norm": 0.15876105427742004, "learning_rate": 4.469942735673654e-06, "loss": 0.0143, "step": 143470 }, { "epoch": 1.1609353507565336, "grad_norm": 0.27866658568382263, "learning_rate": 4.469240623410452e-06, "loss": 0.0188, "step": 143480 }, { "epoch": 1.1610162634517356, "grad_norm": 0.4168539345264435, "learning_rate": 4.468538521732124e-06, "loss": 0.0251, "step": 143490 }, { "epoch": 1.1610971761469375, "grad_norm": 0.46959608793258667, "learning_rate": 4.467836430652676e-06, "loss": 0.0286, "step": 143500 }, { "epoch": 1.1611780888421392, "grad_norm": 0.39797407388687134, "learning_rate": 4.467134350186106e-06, "loss": 0.0207, "step": 143510 }, { "epoch": 1.1612590015373412, "grad_norm": 0.4454457461833954, "learning_rate": 4.466432280346415e-06, "loss": 0.0238, "step": 143520 }, { "epoch": 1.1613399142325431, "grad_norm": 0.5522028803825378, "learning_rate": 4.465730221147609e-06, "loss": 0.0188, "step": 143530 }, { "epoch": 1.161420826927745, "grad_norm": 0.36062294244766235, "learning_rate": 4.465028172603682e-06, "loss": 0.0228, "step": 143540 }, { "epoch": 1.1615017396229468, "grad_norm": 0.28144824504852295, "learning_rate": 4.4643261347286405e-06, "loss": 0.0215, "step": 143550 }, { "epoch": 1.1615826523181487, "grad_norm": 0.2678001821041107, "learning_rate": 4.463624107536485e-06, "loss": 0.0183, "step": 143560 }, { "epoch": 1.1616635650133507, "grad_norm": 0.3503480553627014, "learning_rate": 4.4629220910412105e-06, "loss": 0.0223, "step": 143570 }, { "epoch": 1.1617444777085524, "grad_norm": 0.22491887211799622, "learning_rate": 4.462220085256824e-06, "loss": 0.0216, "step": 143580 }, { "epoch": 1.1618253904037543, "grad_norm": 0.5438963770866394, "learning_rate": 4.461518090197323e-06, "loss": 0.0124, "step": 143590 }, { "epoch": 1.1619063030989563, "grad_norm": 0.3661254644393921, "learning_rate": 4.4608161058767045e-06, "loss": 0.0213, "step": 143600 }, { "epoch": 1.161987215794158, "grad_norm": 0.3646778464317322, "learning_rate": 4.460114132308971e-06, "loss": 0.03, "step": 143610 }, { "epoch": 1.16206812848936, "grad_norm": 0.5672107338905334, "learning_rate": 4.459412169508123e-06, "loss": 0.0178, "step": 143620 }, { "epoch": 1.1621490411845619, "grad_norm": 0.4748257100582123, "learning_rate": 4.458710217488157e-06, "loss": 0.0207, "step": 143630 }, { "epoch": 1.1622299538797638, "grad_norm": 0.31313663721084595, "learning_rate": 4.458008276263072e-06, "loss": 0.0151, "step": 143640 }, { "epoch": 1.1623108665749655, "grad_norm": 0.33518892526626587, "learning_rate": 4.4573063458468704e-06, "loss": 0.0305, "step": 143650 }, { "epoch": 1.1623917792701675, "grad_norm": 0.0299280546605587, "learning_rate": 4.456604426253547e-06, "loss": 0.0269, "step": 143660 }, { "epoch": 1.1624726919653694, "grad_norm": 0.2366494983434677, "learning_rate": 4.4559025174971e-06, "loss": 0.0242, "step": 143670 }, { "epoch": 1.1625536046605713, "grad_norm": 0.09293326735496521, "learning_rate": 4.455200619591528e-06, "loss": 0.0104, "step": 143680 }, { "epoch": 1.162634517355773, "grad_norm": 0.2410755604505539, "learning_rate": 4.454498732550832e-06, "loss": 0.0123, "step": 143690 }, { "epoch": 1.162715430050975, "grad_norm": 0.32696375250816345, "learning_rate": 4.453796856389006e-06, "loss": 0.0294, "step": 143700 }, { "epoch": 1.162796342746177, "grad_norm": 0.42439842224121094, "learning_rate": 4.453094991120047e-06, "loss": 0.0109, "step": 143710 }, { "epoch": 1.1628772554413787, "grad_norm": 0.28927579522132874, "learning_rate": 4.452393136757957e-06, "loss": 0.019, "step": 143720 }, { "epoch": 1.1629581681365806, "grad_norm": 0.34977245330810547, "learning_rate": 4.451691293316729e-06, "loss": 0.0166, "step": 143730 }, { "epoch": 1.1630390808317825, "grad_norm": 0.3078145384788513, "learning_rate": 4.450989460810359e-06, "loss": 0.0136, "step": 143740 }, { "epoch": 1.1631199935269845, "grad_norm": 0.3270203173160553, "learning_rate": 4.450287639252847e-06, "loss": 0.0264, "step": 143750 }, { "epoch": 1.1632009062221862, "grad_norm": 0.3875695466995239, "learning_rate": 4.449585828658186e-06, "loss": 0.0237, "step": 143760 }, { "epoch": 1.1632818189173881, "grad_norm": 0.5294573903083801, "learning_rate": 4.448884029040374e-06, "loss": 0.0227, "step": 143770 }, { "epoch": 1.16336273161259, "grad_norm": 0.358507364988327, "learning_rate": 4.448182240413408e-06, "loss": 0.0185, "step": 143780 }, { "epoch": 1.163443644307792, "grad_norm": 0.3340647220611572, "learning_rate": 4.44748046279128e-06, "loss": 0.023, "step": 143790 }, { "epoch": 1.1635245570029937, "grad_norm": 0.6758440136909485, "learning_rate": 4.44677869618799e-06, "loss": 0.0218, "step": 143800 }, { "epoch": 1.1636054696981957, "grad_norm": 0.27716854214668274, "learning_rate": 4.4460769406175305e-06, "loss": 0.0164, "step": 143810 }, { "epoch": 1.1636863823933976, "grad_norm": 0.2755054831504822, "learning_rate": 4.445375196093895e-06, "loss": 0.0199, "step": 143820 }, { "epoch": 1.1637672950885993, "grad_norm": 0.5428021550178528, "learning_rate": 4.4446734626310815e-06, "loss": 0.0226, "step": 143830 }, { "epoch": 1.1638482077838013, "grad_norm": 0.5341155529022217, "learning_rate": 4.443971740243085e-06, "loss": 0.0301, "step": 143840 }, { "epoch": 1.1639291204790032, "grad_norm": 0.337163507938385, "learning_rate": 4.443270028943897e-06, "loss": 0.019, "step": 143850 }, { "epoch": 1.164010033174205, "grad_norm": 0.35648852586746216, "learning_rate": 4.442568328747512e-06, "loss": 0.0308, "step": 143860 }, { "epoch": 1.1640909458694069, "grad_norm": 0.40452903509140015, "learning_rate": 4.441866639667926e-06, "loss": 0.0229, "step": 143870 }, { "epoch": 1.1641718585646088, "grad_norm": 0.2876441478729248, "learning_rate": 4.441164961719133e-06, "loss": 0.0129, "step": 143880 }, { "epoch": 1.1642527712598107, "grad_norm": 0.2476121187210083, "learning_rate": 4.440463294915123e-06, "loss": 0.0253, "step": 143890 }, { "epoch": 1.1643336839550125, "grad_norm": 0.5817387104034424, "learning_rate": 4.439761639269891e-06, "loss": 0.0217, "step": 143900 }, { "epoch": 1.1644145966502144, "grad_norm": 0.16479408740997314, "learning_rate": 4.439059994797432e-06, "loss": 0.0196, "step": 143910 }, { "epoch": 1.1644955093454163, "grad_norm": 0.3140712082386017, "learning_rate": 4.438358361511736e-06, "loss": 0.0237, "step": 143920 }, { "epoch": 1.1645764220406183, "grad_norm": 0.3111860752105713, "learning_rate": 4.437656739426797e-06, "loss": 0.0219, "step": 143930 }, { "epoch": 1.16465733473582, "grad_norm": 0.5190033912658691, "learning_rate": 4.436955128556609e-06, "loss": 0.0269, "step": 143940 }, { "epoch": 1.164738247431022, "grad_norm": 0.38599836826324463, "learning_rate": 4.43625352891516e-06, "loss": 0.0223, "step": 143950 }, { "epoch": 1.1648191601262239, "grad_norm": 0.687382161617279, "learning_rate": 4.4355519405164445e-06, "loss": 0.0225, "step": 143960 }, { "epoch": 1.1649000728214256, "grad_norm": 0.4063835144042969, "learning_rate": 4.4348503633744565e-06, "loss": 0.0192, "step": 143970 }, { "epoch": 1.1649809855166275, "grad_norm": 1.288100004196167, "learning_rate": 4.434148797503182e-06, "loss": 0.0202, "step": 143980 }, { "epoch": 1.1650618982118295, "grad_norm": 0.7994094491004944, "learning_rate": 4.433447242916616e-06, "loss": 0.0261, "step": 143990 }, { "epoch": 1.1651428109070312, "grad_norm": 0.20946767926216125, "learning_rate": 4.43274569962875e-06, "loss": 0.0275, "step": 144000 }, { "epoch": 1.1652237236022331, "grad_norm": 0.4627535939216614, "learning_rate": 4.432044167653571e-06, "loss": 0.0275, "step": 144010 }, { "epoch": 1.165304636297435, "grad_norm": 0.3164171278476715, "learning_rate": 4.431342647005073e-06, "loss": 0.0146, "step": 144020 }, { "epoch": 1.165385548992637, "grad_norm": 0.424384206533432, "learning_rate": 4.430641137697246e-06, "loss": 0.0157, "step": 144030 }, { "epoch": 1.1654664616878387, "grad_norm": 0.3328774571418762, "learning_rate": 4.42993963974408e-06, "loss": 0.022, "step": 144040 }, { "epoch": 1.1655473743830407, "grad_norm": 0.03487110882997513, "learning_rate": 4.429238153159563e-06, "loss": 0.0213, "step": 144050 }, { "epoch": 1.1656282870782426, "grad_norm": 0.3995082676410675, "learning_rate": 4.428536677957687e-06, "loss": 0.0152, "step": 144060 }, { "epoch": 1.1657091997734446, "grad_norm": 0.293584942817688, "learning_rate": 4.427835214152442e-06, "loss": 0.0217, "step": 144070 }, { "epoch": 1.1657901124686463, "grad_norm": 0.16718561947345734, "learning_rate": 4.427133761757813e-06, "loss": 0.0172, "step": 144080 }, { "epoch": 1.1658710251638482, "grad_norm": 0.5250076651573181, "learning_rate": 4.4264323207877915e-06, "loss": 0.0215, "step": 144090 }, { "epoch": 1.1659519378590502, "grad_norm": 0.8295296430587769, "learning_rate": 4.42573089125637e-06, "loss": 0.0274, "step": 144100 }, { "epoch": 1.1660328505542519, "grad_norm": 0.21033427119255066, "learning_rate": 4.425029473177531e-06, "loss": 0.0142, "step": 144110 }, { "epoch": 1.1661137632494538, "grad_norm": 0.4332476854324341, "learning_rate": 4.424328066565265e-06, "loss": 0.0257, "step": 144120 }, { "epoch": 1.1661946759446558, "grad_norm": 0.39861994981765747, "learning_rate": 4.4236266714335625e-06, "loss": 0.0131, "step": 144130 }, { "epoch": 1.1662755886398575, "grad_norm": 0.4276360273361206, "learning_rate": 4.422925287796408e-06, "loss": 0.0403, "step": 144140 }, { "epoch": 1.1663565013350594, "grad_norm": 0.4322742223739624, "learning_rate": 4.422223915667789e-06, "loss": 0.024, "step": 144150 }, { "epoch": 1.1664374140302614, "grad_norm": 0.4857739508152008, "learning_rate": 4.4215225550616976e-06, "loss": 0.021, "step": 144160 }, { "epoch": 1.1665183267254633, "grad_norm": 0.38737815618515015, "learning_rate": 4.420821205992114e-06, "loss": 0.025, "step": 144170 }, { "epoch": 1.166599239420665, "grad_norm": 0.3881593942642212, "learning_rate": 4.4201198684730305e-06, "loss": 0.0134, "step": 144180 }, { "epoch": 1.166680152115867, "grad_norm": 0.4000212252140045, "learning_rate": 4.419418542518433e-06, "loss": 0.0344, "step": 144190 }, { "epoch": 1.166761064811069, "grad_norm": 0.578053891658783, "learning_rate": 4.418717228142306e-06, "loss": 0.0161, "step": 144200 }, { "epoch": 1.1668419775062708, "grad_norm": 0.2469630241394043, "learning_rate": 4.418015925358636e-06, "loss": 0.0197, "step": 144210 }, { "epoch": 1.1669228902014726, "grad_norm": 0.5261432528495789, "learning_rate": 4.41731463418141e-06, "loss": 0.0173, "step": 144220 }, { "epoch": 1.1670038028966745, "grad_norm": 0.4187134802341461, "learning_rate": 4.416613354624615e-06, "loss": 0.0231, "step": 144230 }, { "epoch": 1.1670847155918764, "grad_norm": 0.23443420231342316, "learning_rate": 4.415912086702233e-06, "loss": 0.0282, "step": 144240 }, { "epoch": 1.1671656282870781, "grad_norm": 0.7155407071113586, "learning_rate": 4.4152108304282524e-06, "loss": 0.0299, "step": 144250 }, { "epoch": 1.16724654098228, "grad_norm": 0.281485378742218, "learning_rate": 4.4145095858166585e-06, "loss": 0.0246, "step": 144260 }, { "epoch": 1.167327453677482, "grad_norm": 0.2128055840730667, "learning_rate": 4.413808352881433e-06, "loss": 0.018, "step": 144270 }, { "epoch": 1.167408366372684, "grad_norm": 0.3725266456604004, "learning_rate": 4.413107131636563e-06, "loss": 0.0262, "step": 144280 }, { "epoch": 1.1674892790678857, "grad_norm": 0.24980010092258453, "learning_rate": 4.412405922096034e-06, "loss": 0.0352, "step": 144290 }, { "epoch": 1.1675701917630876, "grad_norm": 0.6052035093307495, "learning_rate": 4.411704724273826e-06, "loss": 0.0253, "step": 144300 }, { "epoch": 1.1676511044582896, "grad_norm": 0.20952054858207703, "learning_rate": 4.411003538183924e-06, "loss": 0.0187, "step": 144310 }, { "epoch": 1.1677320171534913, "grad_norm": 0.3809339702129364, "learning_rate": 4.410302363840316e-06, "loss": 0.0258, "step": 144320 }, { "epoch": 1.1678129298486932, "grad_norm": 0.40303537249565125, "learning_rate": 4.409601201256982e-06, "loss": 0.0226, "step": 144330 }, { "epoch": 1.1678938425438952, "grad_norm": 0.7059223651885986, "learning_rate": 4.408900050447904e-06, "loss": 0.0252, "step": 144340 }, { "epoch": 1.167974755239097, "grad_norm": 0.14743821322917938, "learning_rate": 4.408198911427069e-06, "loss": 0.0281, "step": 144350 }, { "epoch": 1.1680556679342988, "grad_norm": 0.4447573721408844, "learning_rate": 4.407497784208457e-06, "loss": 0.022, "step": 144360 }, { "epoch": 1.1681365806295008, "grad_norm": 0.2634197175502777, "learning_rate": 4.4067966688060485e-06, "loss": 0.019, "step": 144370 }, { "epoch": 1.1682174933247027, "grad_norm": 0.5453130006790161, "learning_rate": 4.40609556523383e-06, "loss": 0.0279, "step": 144380 }, { "epoch": 1.1682984060199044, "grad_norm": 0.1696825474500656, "learning_rate": 4.405394473505781e-06, "loss": 0.0257, "step": 144390 }, { "epoch": 1.1683793187151064, "grad_norm": 0.5795655846595764, "learning_rate": 4.404693393635884e-06, "loss": 0.0233, "step": 144400 }, { "epoch": 1.1684602314103083, "grad_norm": 0.3051210045814514, "learning_rate": 4.4039923256381204e-06, "loss": 0.0152, "step": 144410 }, { "epoch": 1.1685411441055102, "grad_norm": 0.6191545128822327, "learning_rate": 4.403291269526473e-06, "loss": 0.0209, "step": 144420 }, { "epoch": 1.168622056800712, "grad_norm": 0.2329530268907547, "learning_rate": 4.4025902253149205e-06, "loss": 0.0163, "step": 144430 }, { "epoch": 1.168702969495914, "grad_norm": 0.504011332988739, "learning_rate": 4.401889193017444e-06, "loss": 0.0247, "step": 144440 }, { "epoch": 1.1687838821911158, "grad_norm": 0.10084433853626251, "learning_rate": 4.401188172648027e-06, "loss": 0.0153, "step": 144450 }, { "epoch": 1.1688647948863178, "grad_norm": 0.6786404252052307, "learning_rate": 4.400487164220645e-06, "loss": 0.0269, "step": 144460 }, { "epoch": 1.1689457075815195, "grad_norm": 0.18080899119377136, "learning_rate": 4.399786167749283e-06, "loss": 0.0217, "step": 144470 }, { "epoch": 1.1690266202767214, "grad_norm": 0.30899640917778015, "learning_rate": 4.399085183247919e-06, "loss": 0.0272, "step": 144480 }, { "epoch": 1.1691075329719234, "grad_norm": 0.7125238180160522, "learning_rate": 4.3983842107305306e-06, "loss": 0.0266, "step": 144490 }, { "epoch": 1.169188445667125, "grad_norm": 0.6968746185302734, "learning_rate": 4.397683250211099e-06, "loss": 0.028, "step": 144500 }, { "epoch": 1.169269358362327, "grad_norm": 0.45866525173187256, "learning_rate": 4.396982301703605e-06, "loss": 0.0332, "step": 144510 }, { "epoch": 1.169350271057529, "grad_norm": 0.5551543235778809, "learning_rate": 4.396281365222026e-06, "loss": 0.022, "step": 144520 }, { "epoch": 1.1694311837527307, "grad_norm": 0.19769984483718872, "learning_rate": 4.3955804407803385e-06, "loss": 0.0234, "step": 144530 }, { "epoch": 1.1695120964479326, "grad_norm": 0.17312273383140564, "learning_rate": 4.394879528392526e-06, "loss": 0.0124, "step": 144540 }, { "epoch": 1.1695930091431346, "grad_norm": 0.5101621747016907, "learning_rate": 4.394178628072563e-06, "loss": 0.0208, "step": 144550 }, { "epoch": 1.1696739218383365, "grad_norm": 0.14613692462444305, "learning_rate": 4.393477739834427e-06, "loss": 0.033, "step": 144560 }, { "epoch": 1.1697548345335382, "grad_norm": 0.7905586361885071, "learning_rate": 4.392776863692099e-06, "loss": 0.0275, "step": 144570 }, { "epoch": 1.1698357472287402, "grad_norm": 0.43758705258369446, "learning_rate": 4.392075999659552e-06, "loss": 0.0263, "step": 144580 }, { "epoch": 1.1699166599239421, "grad_norm": 0.520622193813324, "learning_rate": 4.391375147750767e-06, "loss": 0.0318, "step": 144590 }, { "epoch": 1.169997572619144, "grad_norm": 0.2670874297618866, "learning_rate": 4.390674307979719e-06, "loss": 0.0208, "step": 144600 }, { "epoch": 1.1700784853143458, "grad_norm": 0.5757308602333069, "learning_rate": 4.389973480360389e-06, "loss": 0.0225, "step": 144610 }, { "epoch": 1.1701593980095477, "grad_norm": 0.3622269332408905, "learning_rate": 4.389272664906747e-06, "loss": 0.0125, "step": 144620 }, { "epoch": 1.1702403107047497, "grad_norm": 0.29128679633140564, "learning_rate": 4.388571861632772e-06, "loss": 0.016, "step": 144630 }, { "epoch": 1.1703212233999514, "grad_norm": 0.2718517780303955, "learning_rate": 4.387871070552443e-06, "loss": 0.0316, "step": 144640 }, { "epoch": 1.1704021360951533, "grad_norm": 0.19826649129390717, "learning_rate": 4.387170291679731e-06, "loss": 0.0184, "step": 144650 }, { "epoch": 1.1704830487903553, "grad_norm": 0.2687571048736572, "learning_rate": 4.386469525028614e-06, "loss": 0.0173, "step": 144660 }, { "epoch": 1.170563961485557, "grad_norm": 0.33108824491500854, "learning_rate": 4.385768770613069e-06, "loss": 0.0102, "step": 144670 }, { "epoch": 1.170644874180759, "grad_norm": 0.49971309304237366, "learning_rate": 4.385068028447067e-06, "loss": 0.022, "step": 144680 }, { "epoch": 1.1707257868759608, "grad_norm": 0.2972196936607361, "learning_rate": 4.384367298544586e-06, "loss": 0.0228, "step": 144690 }, { "epoch": 1.1708066995711628, "grad_norm": 0.1402003914117813, "learning_rate": 4.383666580919601e-06, "loss": 0.0188, "step": 144700 }, { "epoch": 1.1708876122663645, "grad_norm": 0.36432117223739624, "learning_rate": 4.382965875586082e-06, "loss": 0.0202, "step": 144710 }, { "epoch": 1.1709685249615664, "grad_norm": 0.35388943552970886, "learning_rate": 4.382265182558006e-06, "loss": 0.0311, "step": 144720 }, { "epoch": 1.1710494376567684, "grad_norm": 0.4193668067455292, "learning_rate": 4.38156450184935e-06, "loss": 0.0189, "step": 144730 }, { "epoch": 1.1711303503519703, "grad_norm": 0.007784165907651186, "learning_rate": 4.380863833474083e-06, "loss": 0.0224, "step": 144740 }, { "epoch": 1.171211263047172, "grad_norm": 0.3820536434650421, "learning_rate": 4.380163177446178e-06, "loss": 0.0167, "step": 144750 }, { "epoch": 1.171292175742374, "grad_norm": 0.15114358067512512, "learning_rate": 4.379462533779611e-06, "loss": 0.0267, "step": 144760 }, { "epoch": 1.171373088437576, "grad_norm": 0.4956136643886566, "learning_rate": 4.3787619024883556e-06, "loss": 0.0252, "step": 144770 }, { "epoch": 1.1714540011327776, "grad_norm": 0.35043132305145264, "learning_rate": 4.37806128358638e-06, "loss": 0.0115, "step": 144780 }, { "epoch": 1.1715349138279796, "grad_norm": 0.494466632604599, "learning_rate": 4.37736067708766e-06, "loss": 0.0166, "step": 144790 }, { "epoch": 1.1716158265231815, "grad_norm": 0.2677847743034363, "learning_rate": 4.376660083006167e-06, "loss": 0.0172, "step": 144800 }, { "epoch": 1.1716967392183832, "grad_norm": 0.5844210982322693, "learning_rate": 4.375959501355873e-06, "loss": 0.0251, "step": 144810 }, { "epoch": 1.1717776519135852, "grad_norm": 0.3764208257198334, "learning_rate": 4.375258932150748e-06, "loss": 0.0234, "step": 144820 }, { "epoch": 1.1718585646087871, "grad_norm": 0.27622637152671814, "learning_rate": 4.374558375404767e-06, "loss": 0.0183, "step": 144830 }, { "epoch": 1.171939477303989, "grad_norm": 0.26504358649253845, "learning_rate": 4.373857831131897e-06, "loss": 0.0361, "step": 144840 }, { "epoch": 1.1720203899991908, "grad_norm": 0.44557076692581177, "learning_rate": 4.3731572993461105e-06, "loss": 0.0197, "step": 144850 }, { "epoch": 1.1721013026943927, "grad_norm": 0.3572101593017578, "learning_rate": 4.372456780061381e-06, "loss": 0.02, "step": 144860 }, { "epoch": 1.1721822153895947, "grad_norm": 0.4394705593585968, "learning_rate": 4.371756273291673e-06, "loss": 0.0297, "step": 144870 }, { "epoch": 1.1722631280847966, "grad_norm": 0.08830080926418304, "learning_rate": 4.371055779050961e-06, "loss": 0.0272, "step": 144880 }, { "epoch": 1.1723440407799983, "grad_norm": 0.3089495599269867, "learning_rate": 4.370355297353216e-06, "loss": 0.0127, "step": 144890 }, { "epoch": 1.1724249534752003, "grad_norm": 0.9544740319252014, "learning_rate": 4.369654828212402e-06, "loss": 0.0403, "step": 144900 }, { "epoch": 1.1725058661704022, "grad_norm": 0.3352424204349518, "learning_rate": 4.368954371642493e-06, "loss": 0.0208, "step": 144910 }, { "epoch": 1.172586778865604, "grad_norm": 1.2800952196121216, "learning_rate": 4.368253927657457e-06, "loss": 0.0218, "step": 144920 }, { "epoch": 1.1726676915608059, "grad_norm": 0.3170813322067261, "learning_rate": 4.3675534962712625e-06, "loss": 0.0167, "step": 144930 }, { "epoch": 1.1727486042560078, "grad_norm": 0.20022691786289215, "learning_rate": 4.366853077497877e-06, "loss": 0.0188, "step": 144940 }, { "epoch": 1.1728295169512097, "grad_norm": 0.2848338484764099, "learning_rate": 4.366152671351273e-06, "loss": 0.028, "step": 144950 }, { "epoch": 1.1729104296464115, "grad_norm": 0.47350409626960754, "learning_rate": 4.365452277845415e-06, "loss": 0.0231, "step": 144960 }, { "epoch": 1.1729913423416134, "grad_norm": 0.296924352645874, "learning_rate": 4.36475189699427e-06, "loss": 0.0164, "step": 144970 }, { "epoch": 1.1730722550368153, "grad_norm": 0.38454756140708923, "learning_rate": 4.364051528811809e-06, "loss": 0.016, "step": 144980 }, { "epoch": 1.1731531677320173, "grad_norm": 0.2160748541355133, "learning_rate": 4.363351173311997e-06, "loss": 0.0229, "step": 144990 }, { "epoch": 1.173234080427219, "grad_norm": 0.1335088461637497, "learning_rate": 4.362650830508803e-06, "loss": 0.0188, "step": 145000 }, { "epoch": 1.173314993122421, "grad_norm": 0.33078834414482117, "learning_rate": 4.361950500416191e-06, "loss": 0.0271, "step": 145010 }, { "epoch": 1.1733959058176229, "grad_norm": 0.4335356056690216, "learning_rate": 4.36125018304813e-06, "loss": 0.0233, "step": 145020 }, { "epoch": 1.1734768185128246, "grad_norm": 0.4679431915283203, "learning_rate": 4.360549878418587e-06, "loss": 0.0247, "step": 145030 }, { "epoch": 1.1735577312080265, "grad_norm": 0.28300076723098755, "learning_rate": 4.3598495865415245e-06, "loss": 0.0177, "step": 145040 }, { "epoch": 1.1736386439032285, "grad_norm": 0.33649149537086487, "learning_rate": 4.3591493074309125e-06, "loss": 0.0229, "step": 145050 }, { "epoch": 1.1737195565984302, "grad_norm": 0.7502655386924744, "learning_rate": 4.358449041100713e-06, "loss": 0.0204, "step": 145060 }, { "epoch": 1.1738004692936321, "grad_norm": 0.21209414303302765, "learning_rate": 4.357748787564894e-06, "loss": 0.0263, "step": 145070 }, { "epoch": 1.173881381988834, "grad_norm": 0.3844326138496399, "learning_rate": 4.3570485468374205e-06, "loss": 0.0204, "step": 145080 }, { "epoch": 1.173962294684036, "grad_norm": 0.3039173483848572, "learning_rate": 4.356348318932254e-06, "loss": 0.0129, "step": 145090 }, { "epoch": 1.1740432073792377, "grad_norm": 0.7659505009651184, "learning_rate": 4.355648103863363e-06, "loss": 0.0216, "step": 145100 }, { "epoch": 1.1741241200744397, "grad_norm": 0.4955388605594635, "learning_rate": 4.354947901644711e-06, "loss": 0.029, "step": 145110 }, { "epoch": 1.1742050327696416, "grad_norm": 0.7697127461433411, "learning_rate": 4.35424771229026e-06, "loss": 0.0111, "step": 145120 }, { "epoch": 1.1742859454648436, "grad_norm": 0.5669322609901428, "learning_rate": 4.353547535813974e-06, "loss": 0.0256, "step": 145130 }, { "epoch": 1.1743668581600453, "grad_norm": 0.454929381608963, "learning_rate": 4.352847372229819e-06, "loss": 0.0182, "step": 145140 }, { "epoch": 1.1744477708552472, "grad_norm": 0.4043557941913605, "learning_rate": 4.352147221551758e-06, "loss": 0.0162, "step": 145150 }, { "epoch": 1.1745286835504491, "grad_norm": 0.22520092129707336, "learning_rate": 4.351447083793751e-06, "loss": 0.023, "step": 145160 }, { "epoch": 1.1746095962456509, "grad_norm": 0.7454725503921509, "learning_rate": 4.350746958969765e-06, "loss": 0.0125, "step": 145170 }, { "epoch": 1.1746905089408528, "grad_norm": 0.2781158983707428, "learning_rate": 4.35004684709376e-06, "loss": 0.0109, "step": 145180 }, { "epoch": 1.1747714216360547, "grad_norm": 0.3056391179561615, "learning_rate": 4.349346748179696e-06, "loss": 0.023, "step": 145190 }, { "epoch": 1.1748523343312565, "grad_norm": 0.011341370642185211, "learning_rate": 4.3486466622415385e-06, "loss": 0.0191, "step": 145200 }, { "epoch": 1.1749332470264584, "grad_norm": 0.42508816719055176, "learning_rate": 4.34794658929325e-06, "loss": 0.0204, "step": 145210 }, { "epoch": 1.1750141597216603, "grad_norm": 0.3920746147632599, "learning_rate": 4.347246529348789e-06, "loss": 0.0194, "step": 145220 }, { "epoch": 1.1750950724168623, "grad_norm": 0.38076701760292053, "learning_rate": 4.346546482422117e-06, "loss": 0.0172, "step": 145230 }, { "epoch": 1.175175985112064, "grad_norm": 0.4990305006504059, "learning_rate": 4.345846448527199e-06, "loss": 0.0247, "step": 145240 }, { "epoch": 1.175256897807266, "grad_norm": 0.2253803014755249, "learning_rate": 4.345146427677991e-06, "loss": 0.0207, "step": 145250 }, { "epoch": 1.1753378105024679, "grad_norm": 0.495534211397171, "learning_rate": 4.344446419888453e-06, "loss": 0.0212, "step": 145260 }, { "epoch": 1.1754187231976698, "grad_norm": 0.4001275897026062, "learning_rate": 4.3437464251725505e-06, "loss": 0.0187, "step": 145270 }, { "epoch": 1.1754996358928715, "grad_norm": 0.4501922130584717, "learning_rate": 4.343046443544237e-06, "loss": 0.018, "step": 145280 }, { "epoch": 1.1755805485880735, "grad_norm": 0.3478977680206299, "learning_rate": 4.342346475017477e-06, "loss": 0.0169, "step": 145290 }, { "epoch": 1.1756614612832754, "grad_norm": 0.5052446722984314, "learning_rate": 4.3416465196062265e-06, "loss": 0.0184, "step": 145300 }, { "epoch": 1.1757423739784771, "grad_norm": 0.2721516489982605, "learning_rate": 4.340946577324449e-06, "loss": 0.0156, "step": 145310 }, { "epoch": 1.175823286673679, "grad_norm": 0.7845718264579773, "learning_rate": 4.3402466481861e-06, "loss": 0.0284, "step": 145320 }, { "epoch": 1.175904199368881, "grad_norm": 0.24019552767276764, "learning_rate": 4.339546732205137e-06, "loss": 0.021, "step": 145330 }, { "epoch": 1.1759851120640827, "grad_norm": 0.18901142477989197, "learning_rate": 4.338846829395523e-06, "loss": 0.0351, "step": 145340 }, { "epoch": 1.1760660247592847, "grad_norm": 0.13382771611213684, "learning_rate": 4.338146939771211e-06, "loss": 0.025, "step": 145350 }, { "epoch": 1.1761469374544866, "grad_norm": 0.451722651720047, "learning_rate": 4.3374470633461615e-06, "loss": 0.0271, "step": 145360 }, { "epoch": 1.1762278501496886, "grad_norm": 0.3892417252063751, "learning_rate": 4.336747200134333e-06, "loss": 0.0222, "step": 145370 }, { "epoch": 1.1763087628448903, "grad_norm": 0.7288975715637207, "learning_rate": 4.336047350149679e-06, "loss": 0.0192, "step": 145380 }, { "epoch": 1.1763896755400922, "grad_norm": 0.525332510471344, "learning_rate": 4.3353475134061605e-06, "loss": 0.0271, "step": 145390 }, { "epoch": 1.1764705882352942, "grad_norm": 0.4822816550731659, "learning_rate": 4.334647689917734e-06, "loss": 0.0229, "step": 145400 }, { "epoch": 1.176551500930496, "grad_norm": 0.5000054836273193, "learning_rate": 4.333947879698351e-06, "loss": 0.0272, "step": 145410 }, { "epoch": 1.1766324136256978, "grad_norm": 0.6955594420433044, "learning_rate": 4.333248082761973e-06, "loss": 0.0173, "step": 145420 }, { "epoch": 1.1767133263208998, "grad_norm": 0.36042511463165283, "learning_rate": 4.332548299122557e-06, "loss": 0.022, "step": 145430 }, { "epoch": 1.1767942390161017, "grad_norm": 0.4305042028427124, "learning_rate": 4.331848528794054e-06, "loss": 0.0128, "step": 145440 }, { "epoch": 1.1768751517113034, "grad_norm": 0.4260183870792389, "learning_rate": 4.33114877179042e-06, "loss": 0.0335, "step": 145450 }, { "epoch": 1.1769560644065054, "grad_norm": 0.4050617516040802, "learning_rate": 4.330449028125615e-06, "loss": 0.015, "step": 145460 }, { "epoch": 1.1770369771017073, "grad_norm": 0.6660955548286438, "learning_rate": 4.329749297813589e-06, "loss": 0.0325, "step": 145470 }, { "epoch": 1.177117889796909, "grad_norm": 0.20982050895690918, "learning_rate": 4.329049580868297e-06, "loss": 0.0204, "step": 145480 }, { "epoch": 1.177198802492111, "grad_norm": 0.4814833700656891, "learning_rate": 4.328349877303695e-06, "loss": 0.0178, "step": 145490 }, { "epoch": 1.177279715187313, "grad_norm": 0.631693422794342, "learning_rate": 4.327650187133739e-06, "loss": 0.017, "step": 145500 }, { "epoch": 1.1773606278825148, "grad_norm": 0.2599393129348755, "learning_rate": 4.32695051037238e-06, "loss": 0.0316, "step": 145510 }, { "epoch": 1.1774415405777165, "grad_norm": 0.5980231165885925, "learning_rate": 4.32625084703357e-06, "loss": 0.0157, "step": 145520 }, { "epoch": 1.1775224532729185, "grad_norm": 0.19133888185024261, "learning_rate": 4.325551197131268e-06, "loss": 0.0197, "step": 145530 }, { "epoch": 1.1776033659681204, "grad_norm": 0.1420045644044876, "learning_rate": 4.32485156067942e-06, "loss": 0.0335, "step": 145540 }, { "epoch": 1.1776842786633224, "grad_norm": 0.44561654329299927, "learning_rate": 4.324151937691984e-06, "loss": 0.0238, "step": 145550 }, { "epoch": 1.177765191358524, "grad_norm": 0.1572069376707077, "learning_rate": 4.323452328182912e-06, "loss": 0.0278, "step": 145560 }, { "epoch": 1.177846104053726, "grad_norm": 0.3247643709182739, "learning_rate": 4.322752732166153e-06, "loss": 0.014, "step": 145570 }, { "epoch": 1.177927016748928, "grad_norm": 0.4405912160873413, "learning_rate": 4.322053149655662e-06, "loss": 0.0347, "step": 145580 }, { "epoch": 1.1780079294441297, "grad_norm": 0.5437319278717041, "learning_rate": 4.3213535806653905e-06, "loss": 0.029, "step": 145590 }, { "epoch": 1.1780888421393316, "grad_norm": 0.37871497869491577, "learning_rate": 4.320654025209288e-06, "loss": 0.013, "step": 145600 }, { "epoch": 1.1781697548345336, "grad_norm": 0.030390093103051186, "learning_rate": 4.319954483301305e-06, "loss": 0.018, "step": 145610 }, { "epoch": 1.1782506675297355, "grad_norm": 0.36663907766342163, "learning_rate": 4.319254954955398e-06, "loss": 0.0151, "step": 145620 }, { "epoch": 1.1783315802249372, "grad_norm": 0.27806007862091064, "learning_rate": 4.318555440185513e-06, "loss": 0.0175, "step": 145630 }, { "epoch": 1.1784124929201392, "grad_norm": 0.4280206859111786, "learning_rate": 4.317855939005598e-06, "loss": 0.0182, "step": 145640 }, { "epoch": 1.178493405615341, "grad_norm": 0.14075613021850586, "learning_rate": 4.31715645142961e-06, "loss": 0.0155, "step": 145650 }, { "epoch": 1.178574318310543, "grad_norm": 0.4142850637435913, "learning_rate": 4.3164569774714935e-06, "loss": 0.0287, "step": 145660 }, { "epoch": 1.1786552310057448, "grad_norm": 0.2964300811290741, "learning_rate": 4.315757517145199e-06, "loss": 0.0238, "step": 145670 }, { "epoch": 1.1787361437009467, "grad_norm": 0.5212101340293884, "learning_rate": 4.315058070464675e-06, "loss": 0.0236, "step": 145680 }, { "epoch": 1.1788170563961486, "grad_norm": 0.36037951707839966, "learning_rate": 4.314358637443875e-06, "loss": 0.0176, "step": 145690 }, { "epoch": 1.1788979690913504, "grad_norm": 0.4188174903392792, "learning_rate": 4.313659218096743e-06, "loss": 0.0183, "step": 145700 }, { "epoch": 1.1789788817865523, "grad_norm": 0.232369065284729, "learning_rate": 4.312959812437229e-06, "loss": 0.0202, "step": 145710 }, { "epoch": 1.1790597944817542, "grad_norm": 1.0031907558441162, "learning_rate": 4.312260420479282e-06, "loss": 0.0325, "step": 145720 }, { "epoch": 1.179140707176956, "grad_norm": 0.3763003945350647, "learning_rate": 4.3115610422368485e-06, "loss": 0.013, "step": 145730 }, { "epoch": 1.179221619872158, "grad_norm": 0.5640885829925537, "learning_rate": 4.3108616777238755e-06, "loss": 0.0409, "step": 145740 }, { "epoch": 1.1793025325673598, "grad_norm": 0.49056702852249146, "learning_rate": 4.310162326954314e-06, "loss": 0.0231, "step": 145750 }, { "epoch": 1.1793834452625618, "grad_norm": 0.025454416871070862, "learning_rate": 4.309462989942105e-06, "loss": 0.0193, "step": 145760 }, { "epoch": 1.1794643579577635, "grad_norm": 0.19226773083209991, "learning_rate": 4.308763666701201e-06, "loss": 0.0183, "step": 145770 }, { "epoch": 1.1795452706529654, "grad_norm": 0.3133659362792969, "learning_rate": 4.308064357245548e-06, "loss": 0.0317, "step": 145780 }, { "epoch": 1.1796261833481674, "grad_norm": 0.31787335872650146, "learning_rate": 4.3073650615890875e-06, "loss": 0.0146, "step": 145790 }, { "epoch": 1.1797070960433693, "grad_norm": 0.35136422514915466, "learning_rate": 4.30666577974577e-06, "loss": 0.0285, "step": 145800 }, { "epoch": 1.179788008738571, "grad_norm": 0.7604760527610779, "learning_rate": 4.305966511729541e-06, "loss": 0.0234, "step": 145810 }, { "epoch": 1.179868921433773, "grad_norm": 0.37058553099632263, "learning_rate": 4.305267257554343e-06, "loss": 0.0356, "step": 145820 }, { "epoch": 1.179949834128975, "grad_norm": 0.31526169180870056, "learning_rate": 4.304568017234123e-06, "loss": 0.023, "step": 145830 }, { "epoch": 1.1800307468241766, "grad_norm": 0.5389429330825806, "learning_rate": 4.303868790782826e-06, "loss": 0.0157, "step": 145840 }, { "epoch": 1.1801116595193786, "grad_norm": 0.3132847547531128, "learning_rate": 4.303169578214398e-06, "loss": 0.0152, "step": 145850 }, { "epoch": 1.1801925722145805, "grad_norm": 0.34758344292640686, "learning_rate": 4.30247037954278e-06, "loss": 0.0154, "step": 145860 }, { "epoch": 1.1802734849097822, "grad_norm": 0.19062702357769012, "learning_rate": 4.301771194781918e-06, "loss": 0.0261, "step": 145870 }, { "epoch": 1.1803543976049842, "grad_norm": 0.5694665312767029, "learning_rate": 4.301072023945756e-06, "loss": 0.0187, "step": 145880 }, { "epoch": 1.1804353103001861, "grad_norm": 0.4981480836868286, "learning_rate": 4.300372867048236e-06, "loss": 0.0164, "step": 145890 }, { "epoch": 1.180516222995388, "grad_norm": 0.45316267013549805, "learning_rate": 4.299673724103302e-06, "loss": 0.0285, "step": 145900 }, { "epoch": 1.1805971356905898, "grad_norm": 0.295340895652771, "learning_rate": 4.298974595124899e-06, "loss": 0.0126, "step": 145910 }, { "epoch": 1.1806780483857917, "grad_norm": 0.4373501241207123, "learning_rate": 4.298275480126967e-06, "loss": 0.0129, "step": 145920 }, { "epoch": 1.1807589610809937, "grad_norm": 0.17011530697345734, "learning_rate": 4.2975763791234476e-06, "loss": 0.0155, "step": 145930 }, { "epoch": 1.1808398737761956, "grad_norm": 0.20934946835041046, "learning_rate": 4.296877292128287e-06, "loss": 0.019, "step": 145940 }, { "epoch": 1.1809207864713973, "grad_norm": 0.2772473394870758, "learning_rate": 4.296178219155423e-06, "loss": 0.0236, "step": 145950 }, { "epoch": 1.1810016991665993, "grad_norm": 0.4064703583717346, "learning_rate": 4.2954791602187975e-06, "loss": 0.0144, "step": 145960 }, { "epoch": 1.1810826118618012, "grad_norm": 0.14326368272304535, "learning_rate": 4.294780115332355e-06, "loss": 0.0082, "step": 145970 }, { "epoch": 1.181163524557003, "grad_norm": 0.3876648247241974, "learning_rate": 4.294081084510033e-06, "loss": 0.0208, "step": 145980 }, { "epoch": 1.1812444372522048, "grad_norm": 0.5866066217422485, "learning_rate": 4.293382067765774e-06, "loss": 0.0188, "step": 145990 }, { "epoch": 1.1813253499474068, "grad_norm": 0.4781380891799927, "learning_rate": 4.292683065113519e-06, "loss": 0.0193, "step": 146000 }, { "epoch": 1.1814062626426085, "grad_norm": 0.35192587971687317, "learning_rate": 4.291984076567205e-06, "loss": 0.02, "step": 146010 }, { "epoch": 1.1814871753378104, "grad_norm": 0.17832811176776886, "learning_rate": 4.291285102140775e-06, "loss": 0.0181, "step": 146020 }, { "epoch": 1.1815680880330124, "grad_norm": 0.309289813041687, "learning_rate": 4.290586141848166e-06, "loss": 0.0166, "step": 146030 }, { "epoch": 1.1816490007282143, "grad_norm": 0.496985524892807, "learning_rate": 4.289887195703321e-06, "loss": 0.0214, "step": 146040 }, { "epoch": 1.181729913423416, "grad_norm": 0.6752830147743225, "learning_rate": 4.289188263720175e-06, "loss": 0.0289, "step": 146050 }, { "epoch": 1.181810826118618, "grad_norm": 0.5672091245651245, "learning_rate": 4.288489345912669e-06, "loss": 0.0377, "step": 146060 }, { "epoch": 1.18189173881382, "grad_norm": 0.1834893375635147, "learning_rate": 4.287790442294743e-06, "loss": 0.016, "step": 146070 }, { "epoch": 1.1819726515090219, "grad_norm": 0.3463844656944275, "learning_rate": 4.28709155288033e-06, "loss": 0.0259, "step": 146080 }, { "epoch": 1.1820535642042236, "grad_norm": 0.3743935227394104, "learning_rate": 4.286392677683371e-06, "loss": 0.0144, "step": 146090 }, { "epoch": 1.1821344768994255, "grad_norm": 0.367336243391037, "learning_rate": 4.285693816717806e-06, "loss": 0.0181, "step": 146100 }, { "epoch": 1.1822153895946275, "grad_norm": 0.6581511497497559, "learning_rate": 4.284994969997569e-06, "loss": 0.0265, "step": 146110 }, { "epoch": 1.1822963022898292, "grad_norm": 0.49133285880088806, "learning_rate": 4.284296137536596e-06, "loss": 0.0263, "step": 146120 }, { "epoch": 1.1823772149850311, "grad_norm": 0.41525527834892273, "learning_rate": 4.2835973193488286e-06, "loss": 0.0179, "step": 146130 }, { "epoch": 1.182458127680233, "grad_norm": 0.3861214816570282, "learning_rate": 4.282898515448199e-06, "loss": 0.0207, "step": 146140 }, { "epoch": 1.182539040375435, "grad_norm": 0.40150460600852966, "learning_rate": 4.282199725848643e-06, "loss": 0.0265, "step": 146150 }, { "epoch": 1.1826199530706367, "grad_norm": 0.2195180505514145, "learning_rate": 4.281500950564101e-06, "loss": 0.018, "step": 146160 }, { "epoch": 1.1827008657658387, "grad_norm": 0.6219816207885742, "learning_rate": 4.280802189608503e-06, "loss": 0.0163, "step": 146170 }, { "epoch": 1.1827817784610406, "grad_norm": 0.31078651547431946, "learning_rate": 4.280103442995789e-06, "loss": 0.0236, "step": 146180 }, { "epoch": 1.1828626911562423, "grad_norm": 0.5343384742736816, "learning_rate": 4.279404710739892e-06, "loss": 0.0105, "step": 146190 }, { "epoch": 1.1829436038514443, "grad_norm": 0.3328782021999359, "learning_rate": 4.278705992854745e-06, "loss": 0.0202, "step": 146200 }, { "epoch": 1.1830245165466462, "grad_norm": 0.11999146640300751, "learning_rate": 4.278007289354286e-06, "loss": 0.0142, "step": 146210 }, { "epoch": 1.1831054292418481, "grad_norm": 0.5081042051315308, "learning_rate": 4.277308600252446e-06, "loss": 0.0154, "step": 146220 }, { "epoch": 1.1831863419370499, "grad_norm": 0.4171206057071686, "learning_rate": 4.276609925563162e-06, "loss": 0.0184, "step": 146230 }, { "epoch": 1.1832672546322518, "grad_norm": 0.4220568537712097, "learning_rate": 4.275911265300364e-06, "loss": 0.0251, "step": 146240 }, { "epoch": 1.1833481673274537, "grad_norm": 0.15659451484680176, "learning_rate": 4.275212619477989e-06, "loss": 0.0223, "step": 146250 }, { "epoch": 1.1834290800226555, "grad_norm": 0.24309854209423065, "learning_rate": 4.274513988109968e-06, "loss": 0.0176, "step": 146260 }, { "epoch": 1.1835099927178574, "grad_norm": 0.42656004428863525, "learning_rate": 4.273815371210232e-06, "loss": 0.0224, "step": 146270 }, { "epoch": 1.1835909054130593, "grad_norm": 0.21764443814754486, "learning_rate": 4.2731167687927174e-06, "loss": 0.0265, "step": 146280 }, { "epoch": 1.1836718181082613, "grad_norm": 0.5020245909690857, "learning_rate": 4.272418180871355e-06, "loss": 0.0143, "step": 146290 }, { "epoch": 1.183752730803463, "grad_norm": 0.3826567530632019, "learning_rate": 4.271719607460073e-06, "loss": 0.0252, "step": 146300 }, { "epoch": 1.183833643498665, "grad_norm": 0.29057836532592773, "learning_rate": 4.271021048572807e-06, "loss": 0.035, "step": 146310 }, { "epoch": 1.1839145561938669, "grad_norm": 0.26758718490600586, "learning_rate": 4.2703225042234895e-06, "loss": 0.0298, "step": 146320 }, { "epoch": 1.1839954688890688, "grad_norm": 0.14741776883602142, "learning_rate": 4.269623974426048e-06, "loss": 0.0365, "step": 146330 }, { "epoch": 1.1840763815842705, "grad_norm": 0.43945568799972534, "learning_rate": 4.268925459194413e-06, "loss": 0.0314, "step": 146340 }, { "epoch": 1.1841572942794725, "grad_norm": 0.32251840829849243, "learning_rate": 4.2682269585425185e-06, "loss": 0.0187, "step": 146350 }, { "epoch": 1.1842382069746744, "grad_norm": 0.3426666855812073, "learning_rate": 4.267528472484291e-06, "loss": 0.0207, "step": 146360 }, { "epoch": 1.1843191196698761, "grad_norm": 0.4659428894519806, "learning_rate": 4.266830001033661e-06, "loss": 0.022, "step": 146370 }, { "epoch": 1.184400032365078, "grad_norm": 0.3304874300956726, "learning_rate": 4.266131544204562e-06, "loss": 0.0112, "step": 146380 }, { "epoch": 1.18448094506028, "grad_norm": 0.4448952376842499, "learning_rate": 4.2654331020109164e-06, "loss": 0.0156, "step": 146390 }, { "epoch": 1.1845618577554817, "grad_norm": 0.761617124080658, "learning_rate": 4.264734674466659e-06, "loss": 0.019, "step": 146400 }, { "epoch": 1.1846427704506837, "grad_norm": 0.7373841404914856, "learning_rate": 4.264036261585714e-06, "loss": 0.0178, "step": 146410 }, { "epoch": 1.1847236831458856, "grad_norm": 0.4158814251422882, "learning_rate": 4.263337863382016e-06, "loss": 0.0149, "step": 146420 }, { "epoch": 1.1848045958410875, "grad_norm": 0.6546210050582886, "learning_rate": 4.262639479869487e-06, "loss": 0.0216, "step": 146430 }, { "epoch": 1.1848855085362893, "grad_norm": 0.5738276839256287, "learning_rate": 4.2619411110620566e-06, "loss": 0.021, "step": 146440 }, { "epoch": 1.1849664212314912, "grad_norm": 0.31428292393684387, "learning_rate": 4.261242756973654e-06, "loss": 0.0169, "step": 146450 }, { "epoch": 1.1850473339266931, "grad_norm": 0.42548835277557373, "learning_rate": 4.2605444176182025e-06, "loss": 0.0219, "step": 146460 }, { "epoch": 1.185128246621895, "grad_norm": 0.40627849102020264, "learning_rate": 4.259846093009634e-06, "loss": 0.0274, "step": 146470 }, { "epoch": 1.1852091593170968, "grad_norm": 0.19923381507396698, "learning_rate": 4.259147783161872e-06, "loss": 0.0136, "step": 146480 }, { "epoch": 1.1852900720122987, "grad_norm": 0.3121320605278015, "learning_rate": 4.2584494880888424e-06, "loss": 0.0218, "step": 146490 }, { "epoch": 1.1853709847075007, "grad_norm": 0.1436779946088791, "learning_rate": 4.257751207804474e-06, "loss": 0.0168, "step": 146500 }, { "epoch": 1.1854518974027024, "grad_norm": 0.32627272605895996, "learning_rate": 4.25705294232269e-06, "loss": 0.0184, "step": 146510 }, { "epoch": 1.1855328100979043, "grad_norm": 0.20735220611095428, "learning_rate": 4.256354691657416e-06, "loss": 0.013, "step": 146520 }, { "epoch": 1.1856137227931063, "grad_norm": 0.12012708187103271, "learning_rate": 4.255656455822577e-06, "loss": 0.0148, "step": 146530 }, { "epoch": 1.185694635488308, "grad_norm": 0.6679331660270691, "learning_rate": 4.254958234832101e-06, "loss": 0.0193, "step": 146540 }, { "epoch": 1.18577554818351, "grad_norm": 0.2892123758792877, "learning_rate": 4.254260028699909e-06, "loss": 0.0168, "step": 146550 }, { "epoch": 1.1858564608787119, "grad_norm": 0.6942198872566223, "learning_rate": 4.253561837439926e-06, "loss": 0.0359, "step": 146560 }, { "epoch": 1.1859373735739138, "grad_norm": 0.2696593403816223, "learning_rate": 4.252863661066077e-06, "loss": 0.0214, "step": 146570 }, { "epoch": 1.1860182862691155, "grad_norm": 0.31427231431007385, "learning_rate": 4.252165499592284e-06, "loss": 0.0242, "step": 146580 }, { "epoch": 1.1860991989643175, "grad_norm": 0.3091830313205719, "learning_rate": 4.251467353032473e-06, "loss": 0.0263, "step": 146590 }, { "epoch": 1.1861801116595194, "grad_norm": 0.10195443034172058, "learning_rate": 4.250769221400564e-06, "loss": 0.0177, "step": 146600 }, { "epoch": 1.1862610243547214, "grad_norm": 0.41144248843193054, "learning_rate": 4.250071104710482e-06, "loss": 0.0254, "step": 146610 }, { "epoch": 1.186341937049923, "grad_norm": 0.3465700149536133, "learning_rate": 4.249373002976149e-06, "loss": 0.0188, "step": 146620 }, { "epoch": 1.186422849745125, "grad_norm": 0.34041744470596313, "learning_rate": 4.248674916211485e-06, "loss": 0.0237, "step": 146630 }, { "epoch": 1.186503762440327, "grad_norm": 0.3413465917110443, "learning_rate": 4.2479768444304165e-06, "loss": 0.0195, "step": 146640 }, { "epoch": 1.1865846751355287, "grad_norm": 0.13995599746704102, "learning_rate": 4.24727878764686e-06, "loss": 0.0147, "step": 146650 }, { "epoch": 1.1866655878307306, "grad_norm": 0.40124234557151794, "learning_rate": 4.2465807458747395e-06, "loss": 0.0239, "step": 146660 }, { "epoch": 1.1867465005259326, "grad_norm": 0.026801694184541702, "learning_rate": 4.2458827191279765e-06, "loss": 0.017, "step": 146670 }, { "epoch": 1.1868274132211343, "grad_norm": 0.28206494450569153, "learning_rate": 4.2451847074204885e-06, "loss": 0.012, "step": 146680 }, { "epoch": 1.1869083259163362, "grad_norm": 0.17070885002613068, "learning_rate": 4.2444867107662e-06, "loss": 0.0377, "step": 146690 }, { "epoch": 1.1869892386115382, "grad_norm": 0.23242361843585968, "learning_rate": 4.243788729179029e-06, "loss": 0.0281, "step": 146700 }, { "epoch": 1.18707015130674, "grad_norm": 0.6287429332733154, "learning_rate": 4.243090762672894e-06, "loss": 0.0171, "step": 146710 }, { "epoch": 1.1871510640019418, "grad_norm": 0.22992590069770813, "learning_rate": 4.242392811261716e-06, "loss": 0.0196, "step": 146720 }, { "epoch": 1.1872319766971438, "grad_norm": 0.10978123545646667, "learning_rate": 4.241694874959416e-06, "loss": 0.0309, "step": 146730 }, { "epoch": 1.1873128893923457, "grad_norm": 0.2539745271205902, "learning_rate": 4.240996953779909e-06, "loss": 0.0251, "step": 146740 }, { "epoch": 1.1873938020875476, "grad_norm": 0.8482814431190491, "learning_rate": 4.2402990477371155e-06, "loss": 0.033, "step": 146750 }, { "epoch": 1.1874747147827494, "grad_norm": 0.30790361762046814, "learning_rate": 4.239601156844954e-06, "loss": 0.0279, "step": 146760 }, { "epoch": 1.1875556274779513, "grad_norm": 0.18685589730739594, "learning_rate": 4.238903281117343e-06, "loss": 0.0158, "step": 146770 }, { "epoch": 1.1876365401731532, "grad_norm": 0.3220423758029938, "learning_rate": 4.2382054205681974e-06, "loss": 0.0312, "step": 146780 }, { "epoch": 1.187717452868355, "grad_norm": 0.24713829159736633, "learning_rate": 4.237507575211437e-06, "loss": 0.0193, "step": 146790 }, { "epoch": 1.187798365563557, "grad_norm": 0.4085351526737213, "learning_rate": 4.23680974506098e-06, "loss": 0.0285, "step": 146800 }, { "epoch": 1.1878792782587588, "grad_norm": 0.4050062298774719, "learning_rate": 4.236111930130741e-06, "loss": 0.0211, "step": 146810 }, { "epoch": 1.1879601909539608, "grad_norm": 0.237600177526474, "learning_rate": 4.235414130434635e-06, "loss": 0.016, "step": 146820 }, { "epoch": 1.1880411036491625, "grad_norm": 0.13791503012180328, "learning_rate": 4.234716345986582e-06, "loss": 0.0285, "step": 146830 }, { "epoch": 1.1881220163443644, "grad_norm": 0.22620542347431183, "learning_rate": 4.234018576800496e-06, "loss": 0.0133, "step": 146840 }, { "epoch": 1.1882029290395664, "grad_norm": 0.34311598539352417, "learning_rate": 4.233320822890291e-06, "loss": 0.0202, "step": 146850 }, { "epoch": 1.1882838417347683, "grad_norm": 0.2980232834815979, "learning_rate": 4.232623084269885e-06, "loss": 0.0275, "step": 146860 }, { "epoch": 1.18836475442997, "grad_norm": 0.7946192026138306, "learning_rate": 4.2319253609531905e-06, "loss": 0.0424, "step": 146870 }, { "epoch": 1.188445667125172, "grad_norm": 0.30387789011001587, "learning_rate": 4.2312276529541235e-06, "loss": 0.0183, "step": 146880 }, { "epoch": 1.188526579820374, "grad_norm": 0.22701396048069, "learning_rate": 4.230529960286599e-06, "loss": 0.0212, "step": 146890 }, { "epoch": 1.1886074925155756, "grad_norm": 0.4234975278377533, "learning_rate": 4.229832282964529e-06, "loss": 0.0146, "step": 146900 }, { "epoch": 1.1886884052107776, "grad_norm": 0.3805668354034424, "learning_rate": 4.229134621001828e-06, "loss": 0.0208, "step": 146910 }, { "epoch": 1.1887693179059795, "grad_norm": 0.25474852323532104, "learning_rate": 4.2284369744124116e-06, "loss": 0.0204, "step": 146920 }, { "epoch": 1.1888502306011812, "grad_norm": 0.4979591965675354, "learning_rate": 4.227739343210188e-06, "loss": 0.0153, "step": 146930 }, { "epoch": 1.1889311432963832, "grad_norm": 0.46474939584732056, "learning_rate": 4.2270417274090735e-06, "loss": 0.0237, "step": 146940 }, { "epoch": 1.189012055991585, "grad_norm": 0.41223829984664917, "learning_rate": 4.22634412702298e-06, "loss": 0.0243, "step": 146950 }, { "epoch": 1.189092968686787, "grad_norm": 0.3253452181816101, "learning_rate": 4.225646542065822e-06, "loss": 0.0305, "step": 146960 }, { "epoch": 1.1891738813819888, "grad_norm": 0.4726167619228363, "learning_rate": 4.224948972551507e-06, "loss": 0.0208, "step": 146970 }, { "epoch": 1.1892547940771907, "grad_norm": 0.3729689121246338, "learning_rate": 4.224251418493949e-06, "loss": 0.0197, "step": 146980 }, { "epoch": 1.1893357067723926, "grad_norm": 0.32463914155960083, "learning_rate": 4.223553879907059e-06, "loss": 0.0313, "step": 146990 }, { "epoch": 1.1894166194675946, "grad_norm": 0.4291929602622986, "learning_rate": 4.222856356804747e-06, "loss": 0.0187, "step": 147000 }, { "epoch": 1.1894975321627963, "grad_norm": 0.3800695240497589, "learning_rate": 4.222158849200923e-06, "loss": 0.0327, "step": 147010 }, { "epoch": 1.1895784448579982, "grad_norm": 0.68376624584198, "learning_rate": 4.2214613571095015e-06, "loss": 0.0206, "step": 147020 }, { "epoch": 1.1896593575532002, "grad_norm": 1.057813286781311, "learning_rate": 4.220763880544389e-06, "loss": 0.0354, "step": 147030 }, { "epoch": 1.189740270248402, "grad_norm": 0.3756604790687561, "learning_rate": 4.220066419519494e-06, "loss": 0.0256, "step": 147040 }, { "epoch": 1.1898211829436038, "grad_norm": 0.45082518458366394, "learning_rate": 4.219368974048731e-06, "loss": 0.0199, "step": 147050 }, { "epoch": 1.1899020956388058, "grad_norm": 0.5223832726478577, "learning_rate": 4.218671544146003e-06, "loss": 0.0162, "step": 147060 }, { "epoch": 1.1899830083340075, "grad_norm": 0.34267863631248474, "learning_rate": 4.217974129825222e-06, "loss": 0.0245, "step": 147070 }, { "epoch": 1.1900639210292094, "grad_norm": 0.2665313184261322, "learning_rate": 4.217276731100298e-06, "loss": 0.017, "step": 147080 }, { "epoch": 1.1901448337244114, "grad_norm": 0.4541127383708954, "learning_rate": 4.216579347985135e-06, "loss": 0.0243, "step": 147090 }, { "epoch": 1.1902257464196133, "grad_norm": 0.20458397269248962, "learning_rate": 4.215881980493643e-06, "loss": 0.0207, "step": 147100 }, { "epoch": 1.190306659114815, "grad_norm": 0.5326263904571533, "learning_rate": 4.21518462863973e-06, "loss": 0.019, "step": 147110 }, { "epoch": 1.190387571810017, "grad_norm": 0.2816656231880188, "learning_rate": 4.214487292437304e-06, "loss": 0.0126, "step": 147120 }, { "epoch": 1.190468484505219, "grad_norm": 0.3508778512477875, "learning_rate": 4.2137899719002685e-06, "loss": 0.0131, "step": 147130 }, { "epoch": 1.1905493972004209, "grad_norm": 0.3208291530609131, "learning_rate": 4.213092667042534e-06, "loss": 0.025, "step": 147140 }, { "epoch": 1.1906303098956226, "grad_norm": 0.2599080502986908, "learning_rate": 4.212395377878006e-06, "loss": 0.0192, "step": 147150 }, { "epoch": 1.1907112225908245, "grad_norm": 0.28197425603866577, "learning_rate": 4.211698104420586e-06, "loss": 0.0157, "step": 147160 }, { "epoch": 1.1907921352860265, "grad_norm": 0.43633371591567993, "learning_rate": 4.211000846684186e-06, "loss": 0.0179, "step": 147170 }, { "epoch": 1.1908730479812282, "grad_norm": 0.14941851794719696, "learning_rate": 4.210303604682709e-06, "loss": 0.0226, "step": 147180 }, { "epoch": 1.1909539606764301, "grad_norm": 1.0660808086395264, "learning_rate": 4.209606378430057e-06, "loss": 0.019, "step": 147190 }, { "epoch": 1.191034873371632, "grad_norm": 0.7434876561164856, "learning_rate": 4.2089091679401375e-06, "loss": 0.0434, "step": 147200 }, { "epoch": 1.1911157860668338, "grad_norm": 0.20861086249351501, "learning_rate": 4.2082119732268565e-06, "loss": 0.0127, "step": 147210 }, { "epoch": 1.1911966987620357, "grad_norm": 0.05259429290890694, "learning_rate": 4.2075147943041145e-06, "loss": 0.0242, "step": 147220 }, { "epoch": 1.1912776114572377, "grad_norm": 0.5007588267326355, "learning_rate": 4.206817631185816e-06, "loss": 0.0111, "step": 147230 }, { "epoch": 1.1913585241524396, "grad_norm": 0.28503215312957764, "learning_rate": 4.206120483885868e-06, "loss": 0.0224, "step": 147240 }, { "epoch": 1.1914394368476413, "grad_norm": 0.49947211146354675, "learning_rate": 4.2054233524181704e-06, "loss": 0.021, "step": 147250 }, { "epoch": 1.1915203495428432, "grad_norm": 0.14607208967208862, "learning_rate": 4.204726236796625e-06, "loss": 0.0254, "step": 147260 }, { "epoch": 1.1916012622380452, "grad_norm": 0.2675633132457733, "learning_rate": 4.204029137035138e-06, "loss": 0.0148, "step": 147270 }, { "epoch": 1.1916821749332471, "grad_norm": 0.13749416172504425, "learning_rate": 4.2033320531476075e-06, "loss": 0.0215, "step": 147280 }, { "epoch": 1.1917630876284488, "grad_norm": 0.5934082269668579, "learning_rate": 4.202634985147939e-06, "loss": 0.0276, "step": 147290 }, { "epoch": 1.1918440003236508, "grad_norm": 0.32495996356010437, "learning_rate": 4.20193793305003e-06, "loss": 0.0234, "step": 147300 }, { "epoch": 1.1919249130188527, "grad_norm": 0.21781492233276367, "learning_rate": 4.201240896867787e-06, "loss": 0.0294, "step": 147310 }, { "epoch": 1.1920058257140544, "grad_norm": 0.6898128390312195, "learning_rate": 4.200543876615107e-06, "loss": 0.0348, "step": 147320 }, { "epoch": 1.1920867384092564, "grad_norm": 0.38004446029663086, "learning_rate": 4.1998468723058905e-06, "loss": 0.0306, "step": 147330 }, { "epoch": 1.1921676511044583, "grad_norm": 0.23632442951202393, "learning_rate": 4.199149883954041e-06, "loss": 0.0332, "step": 147340 }, { "epoch": 1.19224856379966, "grad_norm": 0.5368274450302124, "learning_rate": 4.1984529115734545e-06, "loss": 0.0217, "step": 147350 }, { "epoch": 1.192329476494862, "grad_norm": 0.3572251796722412, "learning_rate": 4.197755955178033e-06, "loss": 0.0385, "step": 147360 }, { "epoch": 1.192410389190064, "grad_norm": 0.5225732922554016, "learning_rate": 4.197059014781677e-06, "loss": 0.0215, "step": 147370 }, { "epoch": 1.1924913018852659, "grad_norm": 0.5812836289405823, "learning_rate": 4.196362090398282e-06, "loss": 0.022, "step": 147380 }, { "epoch": 1.1925722145804676, "grad_norm": 0.4660018980503082, "learning_rate": 4.195665182041749e-06, "loss": 0.0214, "step": 147390 }, { "epoch": 1.1926531272756695, "grad_norm": 0.20708836615085602, "learning_rate": 4.194968289725978e-06, "loss": 0.0231, "step": 147400 }, { "epoch": 1.1927340399708715, "grad_norm": 0.38142186403274536, "learning_rate": 4.194271413464861e-06, "loss": 0.0264, "step": 147410 }, { "epoch": 1.1928149526660734, "grad_norm": 0.3898427486419678, "learning_rate": 4.193574553272302e-06, "loss": 0.0417, "step": 147420 }, { "epoch": 1.1928958653612751, "grad_norm": 0.37778574228286743, "learning_rate": 4.192877709162196e-06, "loss": 0.013, "step": 147430 }, { "epoch": 1.192976778056477, "grad_norm": 0.49095383286476135, "learning_rate": 4.192180881148441e-06, "loss": 0.0261, "step": 147440 }, { "epoch": 1.193057690751679, "grad_norm": 0.3801662027835846, "learning_rate": 4.191484069244931e-06, "loss": 0.0163, "step": 147450 }, { "epoch": 1.1931386034468807, "grad_norm": 0.4062991440296173, "learning_rate": 4.190787273465565e-06, "loss": 0.025, "step": 147460 }, { "epoch": 1.1932195161420827, "grad_norm": 0.29732978343963623, "learning_rate": 4.19009049382424e-06, "loss": 0.0199, "step": 147470 }, { "epoch": 1.1933004288372846, "grad_norm": 0.3613604009151459, "learning_rate": 4.189393730334848e-06, "loss": 0.0196, "step": 147480 }, { "epoch": 1.1933813415324865, "grad_norm": 0.32936474680900574, "learning_rate": 4.188696983011287e-06, "loss": 0.0192, "step": 147490 }, { "epoch": 1.1934622542276883, "grad_norm": 0.609821617603302, "learning_rate": 4.188000251867454e-06, "loss": 0.0257, "step": 147500 }, { "epoch": 1.1935431669228902, "grad_norm": 0.3766328990459442, "learning_rate": 4.187303536917241e-06, "loss": 0.0135, "step": 147510 }, { "epoch": 1.1936240796180921, "grad_norm": 0.23195070028305054, "learning_rate": 4.186606838174542e-06, "loss": 0.0214, "step": 147520 }, { "epoch": 1.193704992313294, "grad_norm": 0.8404764533042908, "learning_rate": 4.185910155653254e-06, "loss": 0.0269, "step": 147530 }, { "epoch": 1.1937859050084958, "grad_norm": 0.4777480363845825, "learning_rate": 4.185213489367269e-06, "loss": 0.0149, "step": 147540 }, { "epoch": 1.1938668177036977, "grad_norm": 0.5544172525405884, "learning_rate": 4.184516839330479e-06, "loss": 0.0305, "step": 147550 }, { "epoch": 1.1939477303988997, "grad_norm": 0.1356923133134842, "learning_rate": 4.183820205556782e-06, "loss": 0.0159, "step": 147560 }, { "epoch": 1.1940286430941014, "grad_norm": 0.3698068857192993, "learning_rate": 4.183123588060066e-06, "loss": 0.0262, "step": 147570 }, { "epoch": 1.1941095557893033, "grad_norm": 0.5509588718414307, "learning_rate": 4.182426986854227e-06, "loss": 0.0213, "step": 147580 }, { "epoch": 1.1941904684845053, "grad_norm": 0.3072997033596039, "learning_rate": 4.181730401953156e-06, "loss": 0.0253, "step": 147590 }, { "epoch": 1.194271381179707, "grad_norm": 0.273244172334671, "learning_rate": 4.181033833370743e-06, "loss": 0.0267, "step": 147600 }, { "epoch": 1.194352293874909, "grad_norm": 0.48103660345077515, "learning_rate": 4.1803372811208805e-06, "loss": 0.032, "step": 147610 }, { "epoch": 1.1944332065701109, "grad_norm": 0.5460805296897888, "learning_rate": 4.179640745217463e-06, "loss": 0.0185, "step": 147620 }, { "epoch": 1.1945141192653128, "grad_norm": 0.48916372656822205, "learning_rate": 4.178944225674378e-06, "loss": 0.0234, "step": 147630 }, { "epoch": 1.1945950319605145, "grad_norm": 0.3066279888153076, "learning_rate": 4.178247722505515e-06, "loss": 0.014, "step": 147640 }, { "epoch": 1.1946759446557165, "grad_norm": 0.4468148648738861, "learning_rate": 4.1775512357247695e-06, "loss": 0.0277, "step": 147650 }, { "epoch": 1.1947568573509184, "grad_norm": 0.4238702654838562, "learning_rate": 4.176854765346028e-06, "loss": 0.0293, "step": 147660 }, { "epoch": 1.1948377700461204, "grad_norm": 0.4357626140117645, "learning_rate": 4.176158311383179e-06, "loss": 0.0187, "step": 147670 }, { "epoch": 1.194918682741322, "grad_norm": 0.2487359493970871, "learning_rate": 4.175461873850113e-06, "loss": 0.0151, "step": 147680 }, { "epoch": 1.194999595436524, "grad_norm": 0.46079888939857483, "learning_rate": 4.174765452760721e-06, "loss": 0.0149, "step": 147690 }, { "epoch": 1.195080508131726, "grad_norm": 0.4392865300178528, "learning_rate": 4.1740690481288896e-06, "loss": 0.0182, "step": 147700 }, { "epoch": 1.1951614208269277, "grad_norm": 0.39529919624328613, "learning_rate": 4.173372659968506e-06, "loss": 0.0323, "step": 147710 }, { "epoch": 1.1952423335221296, "grad_norm": 0.7104536890983582, "learning_rate": 4.172676288293462e-06, "loss": 0.0304, "step": 147720 }, { "epoch": 1.1953232462173315, "grad_norm": 0.470692902803421, "learning_rate": 4.171979933117641e-06, "loss": 0.0267, "step": 147730 }, { "epoch": 1.1954041589125333, "grad_norm": 0.20059943199157715, "learning_rate": 4.171283594454932e-06, "loss": 0.0182, "step": 147740 }, { "epoch": 1.1954850716077352, "grad_norm": 0.24328383803367615, "learning_rate": 4.170587272319224e-06, "loss": 0.0141, "step": 147750 }, { "epoch": 1.1955659843029371, "grad_norm": 0.19760552048683167, "learning_rate": 4.169890966724399e-06, "loss": 0.0186, "step": 147760 }, { "epoch": 1.195646896998139, "grad_norm": 0.3590201735496521, "learning_rate": 4.169194677684348e-06, "loss": 0.0213, "step": 147770 }, { "epoch": 1.1957278096933408, "grad_norm": 0.2847725749015808, "learning_rate": 4.168498405212957e-06, "loss": 0.029, "step": 147780 }, { "epoch": 1.1958087223885427, "grad_norm": 0.14338141679763794, "learning_rate": 4.167802149324107e-06, "loss": 0.0249, "step": 147790 }, { "epoch": 1.1958896350837447, "grad_norm": 0.10879145562648773, "learning_rate": 4.1671059100316874e-06, "loss": 0.0266, "step": 147800 }, { "epoch": 1.1959705477789466, "grad_norm": 0.39446863532066345, "learning_rate": 4.166409687349584e-06, "loss": 0.028, "step": 147810 }, { "epoch": 1.1960514604741483, "grad_norm": 0.3828850984573364, "learning_rate": 4.165713481291676e-06, "loss": 0.0273, "step": 147820 }, { "epoch": 1.1961323731693503, "grad_norm": 0.47722503542900085, "learning_rate": 4.165017291871852e-06, "loss": 0.021, "step": 147830 }, { "epoch": 1.1962132858645522, "grad_norm": 0.2492830902338028, "learning_rate": 4.164321119103996e-06, "loss": 0.0163, "step": 147840 }, { "epoch": 1.196294198559754, "grad_norm": 0.5138459801673889, "learning_rate": 4.163624963001993e-06, "loss": 0.0229, "step": 147850 }, { "epoch": 1.1963751112549559, "grad_norm": 0.5101761221885681, "learning_rate": 4.1629288235797225e-06, "loss": 0.0279, "step": 147860 }, { "epoch": 1.1964560239501578, "grad_norm": 0.1309267282485962, "learning_rate": 4.16223270085107e-06, "loss": 0.0211, "step": 147870 }, { "epoch": 1.1965369366453595, "grad_norm": 0.31829002499580383, "learning_rate": 4.161536594829919e-06, "loss": 0.0186, "step": 147880 }, { "epoch": 1.1966178493405615, "grad_norm": 0.20667822659015656, "learning_rate": 4.1608405055301485e-06, "loss": 0.0162, "step": 147890 }, { "epoch": 1.1966987620357634, "grad_norm": 0.2401922345161438, "learning_rate": 4.160144432965642e-06, "loss": 0.0328, "step": 147900 }, { "epoch": 1.1967796747309654, "grad_norm": 0.8282978534698486, "learning_rate": 4.159448377150284e-06, "loss": 0.0284, "step": 147910 }, { "epoch": 1.196860587426167, "grad_norm": 0.3373427391052246, "learning_rate": 4.158752338097953e-06, "loss": 0.0128, "step": 147920 }, { "epoch": 1.196941500121369, "grad_norm": 0.34982016682624817, "learning_rate": 4.158056315822529e-06, "loss": 0.0135, "step": 147930 }, { "epoch": 1.197022412816571, "grad_norm": 0.5769600868225098, "learning_rate": 4.157360310337897e-06, "loss": 0.0338, "step": 147940 }, { "epoch": 1.197103325511773, "grad_norm": 0.030688554048538208, "learning_rate": 4.156664321657934e-06, "loss": 0.0173, "step": 147950 }, { "epoch": 1.1971842382069746, "grad_norm": 0.5721964836120605, "learning_rate": 4.155968349796518e-06, "loss": 0.0208, "step": 147960 }, { "epoch": 1.1972651509021766, "grad_norm": 0.677538275718689, "learning_rate": 4.155272394767536e-06, "loss": 0.0192, "step": 147970 }, { "epoch": 1.1973460635973785, "grad_norm": 0.31158187985420227, "learning_rate": 4.154576456584859e-06, "loss": 0.0187, "step": 147980 }, { "epoch": 1.1974269762925802, "grad_norm": 0.37253957986831665, "learning_rate": 4.153880535262371e-06, "loss": 0.0132, "step": 147990 }, { "epoch": 1.1975078889877822, "grad_norm": 0.6593771576881409, "learning_rate": 4.153184630813951e-06, "loss": 0.026, "step": 148000 }, { "epoch": 1.197588801682984, "grad_norm": 0.46730324625968933, "learning_rate": 4.152488743253473e-06, "loss": 0.0273, "step": 148010 }, { "epoch": 1.197669714378186, "grad_norm": 0.42882323265075684, "learning_rate": 4.15179287259482e-06, "loss": 0.0212, "step": 148020 }, { "epoch": 1.1977506270733878, "grad_norm": 0.28815630078315735, "learning_rate": 4.151097018851866e-06, "loss": 0.0181, "step": 148030 }, { "epoch": 1.1978315397685897, "grad_norm": 0.5038126707077026, "learning_rate": 4.150401182038491e-06, "loss": 0.0322, "step": 148040 }, { "epoch": 1.1979124524637916, "grad_norm": 0.1119881197810173, "learning_rate": 4.14970536216857e-06, "loss": 0.0165, "step": 148050 }, { "epoch": 1.1979933651589934, "grad_norm": 0.4257355034351349, "learning_rate": 4.149009559255982e-06, "loss": 0.0254, "step": 148060 }, { "epoch": 1.1980742778541953, "grad_norm": 0.13545063138008118, "learning_rate": 4.148313773314602e-06, "loss": 0.0281, "step": 148070 }, { "epoch": 1.1981551905493972, "grad_norm": 0.31636884808540344, "learning_rate": 4.147618004358303e-06, "loss": 0.0247, "step": 148080 }, { "epoch": 1.1982361032445992, "grad_norm": 0.2734517455101013, "learning_rate": 4.146922252400966e-06, "loss": 0.0137, "step": 148090 }, { "epoch": 1.198317015939801, "grad_norm": 0.36220821738243103, "learning_rate": 4.146226517456463e-06, "loss": 0.0159, "step": 148100 }, { "epoch": 1.1983979286350028, "grad_norm": 0.2541028559207916, "learning_rate": 4.145530799538669e-06, "loss": 0.0192, "step": 148110 }, { "epoch": 1.1984788413302048, "grad_norm": 0.2838985025882721, "learning_rate": 4.144835098661459e-06, "loss": 0.0192, "step": 148120 }, { "epoch": 1.1985597540254065, "grad_norm": 0.3651401400566101, "learning_rate": 4.144139414838709e-06, "loss": 0.0293, "step": 148130 }, { "epoch": 1.1986406667206084, "grad_norm": 0.17906679213047028, "learning_rate": 4.1434437480842915e-06, "loss": 0.0306, "step": 148140 }, { "epoch": 1.1987215794158104, "grad_norm": 0.4632326364517212, "learning_rate": 4.14274809841208e-06, "loss": 0.0213, "step": 148150 }, { "epoch": 1.1988024921110123, "grad_norm": 0.2767048180103302, "learning_rate": 4.142052465835948e-06, "loss": 0.0322, "step": 148160 }, { "epoch": 1.198883404806214, "grad_norm": 0.27437013387680054, "learning_rate": 4.141356850369767e-06, "loss": 0.0214, "step": 148170 }, { "epoch": 1.198964317501416, "grad_norm": 0.5232949256896973, "learning_rate": 4.140661252027412e-06, "loss": 0.0327, "step": 148180 }, { "epoch": 1.199045230196618, "grad_norm": 0.27219873666763306, "learning_rate": 4.139965670822756e-06, "loss": 0.0225, "step": 148190 }, { "epoch": 1.1991261428918198, "grad_norm": 0.5002831220626831, "learning_rate": 4.139270106769666e-06, "loss": 0.0164, "step": 148200 }, { "epoch": 1.1992070555870216, "grad_norm": 0.5074521899223328, "learning_rate": 4.138574559882017e-06, "loss": 0.0227, "step": 148210 }, { "epoch": 1.1992879682822235, "grad_norm": 0.26432639360427856, "learning_rate": 4.13787903017368e-06, "loss": 0.0178, "step": 148220 }, { "epoch": 1.1993688809774254, "grad_norm": 0.3470766544342041, "learning_rate": 4.1371835176585285e-06, "loss": 0.0157, "step": 148230 }, { "epoch": 1.1994497936726272, "grad_norm": 0.3634422719478607, "learning_rate": 4.136488022350427e-06, "loss": 0.0219, "step": 148240 }, { "epoch": 1.199530706367829, "grad_norm": 0.6245244145393372, "learning_rate": 4.1357925442632506e-06, "loss": 0.0334, "step": 148250 }, { "epoch": 1.199611619063031, "grad_norm": 0.27805519104003906, "learning_rate": 4.135097083410868e-06, "loss": 0.0179, "step": 148260 }, { "epoch": 1.1996925317582328, "grad_norm": 0.5639743804931641, "learning_rate": 4.1344016398071465e-06, "loss": 0.0298, "step": 148270 }, { "epoch": 1.1997734444534347, "grad_norm": 0.597296416759491, "learning_rate": 4.133706213465957e-06, "loss": 0.0221, "step": 148280 }, { "epoch": 1.1998543571486366, "grad_norm": 0.3416621685028076, "learning_rate": 4.133010804401171e-06, "loss": 0.03, "step": 148290 }, { "epoch": 1.1999352698438386, "grad_norm": 0.3203287124633789, "learning_rate": 4.132315412626651e-06, "loss": 0.0201, "step": 148300 }, { "epoch": 1.2000161825390403, "grad_norm": 0.34684035181999207, "learning_rate": 4.131620038156268e-06, "loss": 0.0209, "step": 148310 }, { "epoch": 1.2000970952342422, "grad_norm": 0.4566330313682556, "learning_rate": 4.130924681003893e-06, "loss": 0.0239, "step": 148320 }, { "epoch": 1.2001780079294442, "grad_norm": 0.21951328217983246, "learning_rate": 4.13022934118339e-06, "loss": 0.0199, "step": 148330 }, { "epoch": 1.2002589206246461, "grad_norm": 0.5899771451950073, "learning_rate": 4.129534018708624e-06, "loss": 0.0301, "step": 148340 }, { "epoch": 1.2003398333198478, "grad_norm": 0.35844147205352783, "learning_rate": 4.128838713593467e-06, "loss": 0.0301, "step": 148350 }, { "epoch": 1.2004207460150498, "grad_norm": 0.37445083260536194, "learning_rate": 4.128143425851782e-06, "loss": 0.0259, "step": 148360 }, { "epoch": 1.2005016587102517, "grad_norm": 0.08502142131328583, "learning_rate": 4.127448155497435e-06, "loss": 0.0158, "step": 148370 }, { "epoch": 1.2005825714054534, "grad_norm": 0.3074614405632019, "learning_rate": 4.126752902544292e-06, "loss": 0.0222, "step": 148380 }, { "epoch": 1.2006634841006554, "grad_norm": 0.4584829807281494, "learning_rate": 4.1260576670062215e-06, "loss": 0.0239, "step": 148390 }, { "epoch": 1.2007443967958573, "grad_norm": 0.0489577017724514, "learning_rate": 4.125362448897085e-06, "loss": 0.0193, "step": 148400 }, { "epoch": 1.200825309491059, "grad_norm": 0.38861191272735596, "learning_rate": 4.124667248230746e-06, "loss": 0.031, "step": 148410 }, { "epoch": 1.200906222186261, "grad_norm": 0.5515737533569336, "learning_rate": 4.123972065021073e-06, "loss": 0.0125, "step": 148420 }, { "epoch": 1.200987134881463, "grad_norm": 0.5033882856369019, "learning_rate": 4.1232768992819274e-06, "loss": 0.0292, "step": 148430 }, { "epoch": 1.2010680475766649, "grad_norm": 0.42290711402893066, "learning_rate": 4.122581751027172e-06, "loss": 0.0245, "step": 148440 }, { "epoch": 1.2011489602718666, "grad_norm": 0.3872564136981964, "learning_rate": 4.121886620270673e-06, "loss": 0.0213, "step": 148450 }, { "epoch": 1.2012298729670685, "grad_norm": 0.107369065284729, "learning_rate": 4.12119150702629e-06, "loss": 0.0146, "step": 148460 }, { "epoch": 1.2013107856622705, "grad_norm": 0.2691655457019806, "learning_rate": 4.120496411307887e-06, "loss": 0.0213, "step": 148470 }, { "epoch": 1.2013916983574724, "grad_norm": 0.5304169654846191, "learning_rate": 4.119801333129329e-06, "loss": 0.0245, "step": 148480 }, { "epoch": 1.201472611052674, "grad_norm": 0.34990134835243225, "learning_rate": 4.1191062725044725e-06, "loss": 0.0159, "step": 148490 }, { "epoch": 1.201553523747876, "grad_norm": 0.5824005007743835, "learning_rate": 4.118411229447182e-06, "loss": 0.0271, "step": 148500 }, { "epoch": 1.201634436443078, "grad_norm": 0.7778406739234924, "learning_rate": 4.117716203971321e-06, "loss": 0.0285, "step": 148510 }, { "epoch": 1.2017153491382797, "grad_norm": 0.4622518718242645, "learning_rate": 4.117021196090744e-06, "loss": 0.0154, "step": 148520 }, { "epoch": 1.2017962618334816, "grad_norm": 0.6488736271858215, "learning_rate": 4.116326205819316e-06, "loss": 0.0196, "step": 148530 }, { "epoch": 1.2018771745286836, "grad_norm": 0.2873300611972809, "learning_rate": 4.115631233170898e-06, "loss": 0.0153, "step": 148540 }, { "epoch": 1.2019580872238853, "grad_norm": 0.42757830023765564, "learning_rate": 4.114936278159348e-06, "loss": 0.0146, "step": 148550 }, { "epoch": 1.2020389999190872, "grad_norm": 0.3549163043498993, "learning_rate": 4.114241340798524e-06, "loss": 0.0161, "step": 148560 }, { "epoch": 1.2021199126142892, "grad_norm": 0.6275404691696167, "learning_rate": 4.113546421102287e-06, "loss": 0.0224, "step": 148570 }, { "epoch": 1.2022008253094911, "grad_norm": 0.16685669124126434, "learning_rate": 4.1128515190844975e-06, "loss": 0.0313, "step": 148580 }, { "epoch": 1.2022817380046928, "grad_norm": 0.36502891778945923, "learning_rate": 4.112156634759008e-06, "loss": 0.0193, "step": 148590 }, { "epoch": 1.2023626506998948, "grad_norm": 0.37560468912124634, "learning_rate": 4.111461768139681e-06, "loss": 0.0283, "step": 148600 }, { "epoch": 1.2024435633950967, "grad_norm": 0.6275725960731506, "learning_rate": 4.110766919240376e-06, "loss": 0.0449, "step": 148610 }, { "epoch": 1.2025244760902987, "grad_norm": 0.3320259749889374, "learning_rate": 4.110072088074945e-06, "loss": 0.0148, "step": 148620 }, { "epoch": 1.2026053887855004, "grad_norm": 0.3445461392402649, "learning_rate": 4.109377274657247e-06, "loss": 0.0148, "step": 148630 }, { "epoch": 1.2026863014807023, "grad_norm": 0.3176647424697876, "learning_rate": 4.108682479001142e-06, "loss": 0.0215, "step": 148640 }, { "epoch": 1.2027672141759043, "grad_norm": 0.7032450437545776, "learning_rate": 4.107987701120481e-06, "loss": 0.0205, "step": 148650 }, { "epoch": 1.202848126871106, "grad_norm": 0.3103855550289154, "learning_rate": 4.107292941029122e-06, "loss": 0.0208, "step": 148660 }, { "epoch": 1.202929039566308, "grad_norm": 0.23146790266036987, "learning_rate": 4.106598198740922e-06, "loss": 0.0201, "step": 148670 }, { "epoch": 1.2030099522615099, "grad_norm": 0.5468520522117615, "learning_rate": 4.105903474269734e-06, "loss": 0.0163, "step": 148680 }, { "epoch": 1.2030908649567118, "grad_norm": 0.38600999116897583, "learning_rate": 4.1052087676294135e-06, "loss": 0.0182, "step": 148690 }, { "epoch": 1.2031717776519135, "grad_norm": 0.6224632263183594, "learning_rate": 4.1045140788338166e-06, "loss": 0.0146, "step": 148700 }, { "epoch": 1.2032526903471155, "grad_norm": 0.46487733721733093, "learning_rate": 4.103819407896794e-06, "loss": 0.0362, "step": 148710 }, { "epoch": 1.2033336030423174, "grad_norm": 0.531388521194458, "learning_rate": 4.103124754832201e-06, "loss": 0.0294, "step": 148720 }, { "epoch": 1.2034145157375193, "grad_norm": 0.23352107405662537, "learning_rate": 4.102430119653894e-06, "loss": 0.0265, "step": 148730 }, { "epoch": 1.203495428432721, "grad_norm": 0.26584914326667786, "learning_rate": 4.101735502375722e-06, "loss": 0.0145, "step": 148740 }, { "epoch": 1.203576341127923, "grad_norm": 0.21661986410617828, "learning_rate": 4.101040903011538e-06, "loss": 0.0175, "step": 148750 }, { "epoch": 1.203657253823125, "grad_norm": 0.467943012714386, "learning_rate": 4.1003463215751965e-06, "loss": 0.0256, "step": 148760 }, { "epoch": 1.2037381665183267, "grad_norm": 0.5691961050033569, "learning_rate": 4.09965175808055e-06, "loss": 0.0175, "step": 148770 }, { "epoch": 1.2038190792135286, "grad_norm": 0.41319555044174194, "learning_rate": 4.098957212541446e-06, "loss": 0.0218, "step": 148780 }, { "epoch": 1.2038999919087305, "grad_norm": 0.7458542585372925, "learning_rate": 4.098262684971737e-06, "loss": 0.0156, "step": 148790 }, { "epoch": 1.2039809046039323, "grad_norm": 0.4726783037185669, "learning_rate": 4.0975681753852795e-06, "loss": 0.0273, "step": 148800 }, { "epoch": 1.2040618172991342, "grad_norm": 0.37563854455947876, "learning_rate": 4.096873683795916e-06, "loss": 0.0176, "step": 148810 }, { "epoch": 1.2041427299943361, "grad_norm": 0.4395001232624054, "learning_rate": 4.0961792102175016e-06, "loss": 0.0218, "step": 148820 }, { "epoch": 1.204223642689538, "grad_norm": 0.19842183589935303, "learning_rate": 4.095484754663886e-06, "loss": 0.0202, "step": 148830 }, { "epoch": 1.2043045553847398, "grad_norm": 0.6633030772209167, "learning_rate": 4.0947903171489165e-06, "loss": 0.0194, "step": 148840 }, { "epoch": 1.2043854680799417, "grad_norm": 0.25816118717193604, "learning_rate": 4.094095897686442e-06, "loss": 0.0196, "step": 148850 }, { "epoch": 1.2044663807751437, "grad_norm": 0.02039387822151184, "learning_rate": 4.093401496290314e-06, "loss": 0.0296, "step": 148860 }, { "epoch": 1.2045472934703456, "grad_norm": 0.8031390905380249, "learning_rate": 4.092707112974377e-06, "loss": 0.0209, "step": 148870 }, { "epoch": 1.2046282061655473, "grad_norm": 0.3158145248889923, "learning_rate": 4.0920127477524835e-06, "loss": 0.023, "step": 148880 }, { "epoch": 1.2047091188607493, "grad_norm": 0.14873555302619934, "learning_rate": 4.091318400638479e-06, "loss": 0.0219, "step": 148890 }, { "epoch": 1.2047900315559512, "grad_norm": 0.14791151881217957, "learning_rate": 4.090624071646209e-06, "loss": 0.0209, "step": 148900 }, { "epoch": 1.204870944251153, "grad_norm": 0.4035734236240387, "learning_rate": 4.089929760789523e-06, "loss": 0.0292, "step": 148910 }, { "epoch": 1.2049518569463549, "grad_norm": 0.6693038940429688, "learning_rate": 4.089235468082266e-06, "loss": 0.0282, "step": 148920 }, { "epoch": 1.2050327696415568, "grad_norm": 0.34752628207206726, "learning_rate": 4.088541193538287e-06, "loss": 0.0157, "step": 148930 }, { "epoch": 1.2051136823367585, "grad_norm": 0.3495369255542755, "learning_rate": 4.087846937171426e-06, "loss": 0.0241, "step": 148940 }, { "epoch": 1.2051945950319605, "grad_norm": 0.22388166189193726, "learning_rate": 4.087152698995535e-06, "loss": 0.0142, "step": 148950 }, { "epoch": 1.2052755077271624, "grad_norm": 0.21897923946380615, "learning_rate": 4.086458479024457e-06, "loss": 0.0193, "step": 148960 }, { "epoch": 1.2053564204223643, "grad_norm": 0.020075757056474686, "learning_rate": 4.085764277272034e-06, "loss": 0.0121, "step": 148970 }, { "epoch": 1.205437333117566, "grad_norm": 0.17459645867347717, "learning_rate": 4.0850700937521135e-06, "loss": 0.0177, "step": 148980 }, { "epoch": 1.205518245812768, "grad_norm": 0.9513663649559021, "learning_rate": 4.084375928478539e-06, "loss": 0.0211, "step": 148990 }, { "epoch": 1.20559915850797, "grad_norm": 0.5584172606468201, "learning_rate": 4.083681781465152e-06, "loss": 0.0184, "step": 149000 }, { "epoch": 1.2056800712031719, "grad_norm": 0.43469759821891785, "learning_rate": 4.082987652725797e-06, "loss": 0.0276, "step": 149010 }, { "epoch": 1.2057609838983736, "grad_norm": 0.8008705377578735, "learning_rate": 4.08229354227432e-06, "loss": 0.0284, "step": 149020 }, { "epoch": 1.2058418965935755, "grad_norm": 0.5834517478942871, "learning_rate": 4.08159945012456e-06, "loss": 0.0188, "step": 149030 }, { "epoch": 1.2059228092887775, "grad_norm": 0.5111740231513977, "learning_rate": 4.080905376290358e-06, "loss": 0.0167, "step": 149040 }, { "epoch": 1.2060037219839792, "grad_norm": 0.3292037546634674, "learning_rate": 4.080211320785561e-06, "loss": 0.0147, "step": 149050 }, { "epoch": 1.2060846346791811, "grad_norm": 0.3904610574245453, "learning_rate": 4.079517283624006e-06, "loss": 0.0255, "step": 149060 }, { "epoch": 1.206165547374383, "grad_norm": 0.16527897119522095, "learning_rate": 4.078823264819535e-06, "loss": 0.0187, "step": 149070 }, { "epoch": 1.2062464600695848, "grad_norm": 0.3443278968334198, "learning_rate": 4.0781292643859915e-06, "loss": 0.0156, "step": 149080 }, { "epoch": 1.2063273727647867, "grad_norm": 0.35336315631866455, "learning_rate": 4.077435282337211e-06, "loss": 0.0182, "step": 149090 }, { "epoch": 1.2064082854599887, "grad_norm": 0.2830565273761749, "learning_rate": 4.076741318687037e-06, "loss": 0.0144, "step": 149100 }, { "epoch": 1.2064891981551906, "grad_norm": 0.3404325246810913, "learning_rate": 4.076047373449308e-06, "loss": 0.017, "step": 149110 }, { "epoch": 1.2065701108503923, "grad_norm": 0.4836497902870178, "learning_rate": 4.075353446637866e-06, "loss": 0.022, "step": 149120 }, { "epoch": 1.2066510235455943, "grad_norm": 0.6054404973983765, "learning_rate": 4.074659538266546e-06, "loss": 0.0284, "step": 149130 }, { "epoch": 1.2067319362407962, "grad_norm": 0.4943048357963562, "learning_rate": 4.073965648349186e-06, "loss": 0.023, "step": 149140 }, { "epoch": 1.2068128489359982, "grad_norm": 0.41250237822532654, "learning_rate": 4.073271776899629e-06, "loss": 0.0247, "step": 149150 }, { "epoch": 1.2068937616311999, "grad_norm": 0.550149142742157, "learning_rate": 4.0725779239317085e-06, "loss": 0.0248, "step": 149160 }, { "epoch": 1.2069746743264018, "grad_norm": 0.2923838794231415, "learning_rate": 4.071884089459264e-06, "loss": 0.0258, "step": 149170 }, { "epoch": 1.2070555870216038, "grad_norm": 0.5629143118858337, "learning_rate": 4.0711902734961325e-06, "loss": 0.0221, "step": 149180 }, { "epoch": 1.2071364997168055, "grad_norm": 0.0712839812040329, "learning_rate": 4.070496476056149e-06, "loss": 0.0313, "step": 149190 }, { "epoch": 1.2072174124120074, "grad_norm": 0.25738781690597534, "learning_rate": 4.0698026971531515e-06, "loss": 0.022, "step": 149200 }, { "epoch": 1.2072983251072094, "grad_norm": 0.225946843624115, "learning_rate": 4.0691089368009755e-06, "loss": 0.0194, "step": 149210 }, { "epoch": 1.207379237802411, "grad_norm": 0.33735334873199463, "learning_rate": 4.068415195013457e-06, "loss": 0.0171, "step": 149220 }, { "epoch": 1.207460150497613, "grad_norm": 0.5856367945671082, "learning_rate": 4.067721471804429e-06, "loss": 0.0192, "step": 149230 }, { "epoch": 1.207541063192815, "grad_norm": 0.23123042285442352, "learning_rate": 4.067027767187731e-06, "loss": 0.0285, "step": 149240 }, { "epoch": 1.207621975888017, "grad_norm": 0.3531647324562073, "learning_rate": 4.066334081177193e-06, "loss": 0.0247, "step": 149250 }, { "epoch": 1.2077028885832186, "grad_norm": 0.5063069462776184, "learning_rate": 4.06564041378665e-06, "loss": 0.0417, "step": 149260 }, { "epoch": 1.2077838012784206, "grad_norm": 0.0831083282828331, "learning_rate": 4.064946765029938e-06, "loss": 0.025, "step": 149270 }, { "epoch": 1.2078647139736225, "grad_norm": 0.32758796215057373, "learning_rate": 4.064253134920887e-06, "loss": 0.0214, "step": 149280 }, { "epoch": 1.2079456266688244, "grad_norm": 0.4068495035171509, "learning_rate": 4.063559523473333e-06, "loss": 0.0281, "step": 149290 }, { "epoch": 1.2080265393640262, "grad_norm": 0.3690086603164673, "learning_rate": 4.0628659307011065e-06, "loss": 0.0247, "step": 149300 }, { "epoch": 1.208107452059228, "grad_norm": 0.25651389360427856, "learning_rate": 4.062172356618043e-06, "loss": 0.038, "step": 149310 }, { "epoch": 1.20818836475443, "grad_norm": 0.15474745631217957, "learning_rate": 4.06147880123797e-06, "loss": 0.0243, "step": 149320 }, { "epoch": 1.2082692774496318, "grad_norm": 0.08659137785434723, "learning_rate": 4.06078526457472e-06, "loss": 0.0226, "step": 149330 }, { "epoch": 1.2083501901448337, "grad_norm": 0.3438456654548645, "learning_rate": 4.060091746642128e-06, "loss": 0.0223, "step": 149340 }, { "epoch": 1.2084311028400356, "grad_norm": 0.21829316020011902, "learning_rate": 4.0593982474540195e-06, "loss": 0.0213, "step": 149350 }, { "epoch": 1.2085120155352376, "grad_norm": 0.44461190700531006, "learning_rate": 4.058704767024228e-06, "loss": 0.02, "step": 149360 }, { "epoch": 1.2085929282304393, "grad_norm": 0.46238061785697937, "learning_rate": 4.058011305366584e-06, "loss": 0.0208, "step": 149370 }, { "epoch": 1.2086738409256412, "grad_norm": 0.5254388451576233, "learning_rate": 4.057317862494914e-06, "loss": 0.0222, "step": 149380 }, { "epoch": 1.2087547536208432, "grad_norm": 0.43734076619148254, "learning_rate": 4.05662443842305e-06, "loss": 0.0175, "step": 149390 }, { "epoch": 1.208835666316045, "grad_norm": 0.2997569441795349, "learning_rate": 4.0559310331648206e-06, "loss": 0.0166, "step": 149400 }, { "epoch": 1.2089165790112468, "grad_norm": 0.28429871797561646, "learning_rate": 4.0552376467340524e-06, "loss": 0.0307, "step": 149410 }, { "epoch": 1.2089974917064488, "grad_norm": 0.17908716201782227, "learning_rate": 4.054544279144574e-06, "loss": 0.0153, "step": 149420 }, { "epoch": 1.2090784044016507, "grad_norm": 0.1954672634601593, "learning_rate": 4.053850930410217e-06, "loss": 0.0137, "step": 149430 }, { "epoch": 1.2091593170968524, "grad_norm": 0.4123915731906891, "learning_rate": 4.053157600544804e-06, "loss": 0.0183, "step": 149440 }, { "epoch": 1.2092402297920544, "grad_norm": 0.2594252824783325, "learning_rate": 4.052464289562163e-06, "loss": 0.0178, "step": 149450 }, { "epoch": 1.2093211424872563, "grad_norm": 0.4618820250034332, "learning_rate": 4.051770997476124e-06, "loss": 0.0153, "step": 149460 }, { "epoch": 1.209402055182458, "grad_norm": 0.42382314801216125, "learning_rate": 4.05107772430051e-06, "loss": 0.0239, "step": 149470 }, { "epoch": 1.20948296787766, "grad_norm": 0.2589491009712219, "learning_rate": 4.050384470049145e-06, "loss": 0.0138, "step": 149480 }, { "epoch": 1.209563880572862, "grad_norm": 0.6236729025840759, "learning_rate": 4.049691234735857e-06, "loss": 0.019, "step": 149490 }, { "epoch": 1.2096447932680638, "grad_norm": 0.7069911956787109, "learning_rate": 4.048998018374474e-06, "loss": 0.0128, "step": 149500 }, { "epoch": 1.2097257059632656, "grad_norm": 0.405244916677475, "learning_rate": 4.048304820978818e-06, "loss": 0.0146, "step": 149510 }, { "epoch": 1.2098066186584675, "grad_norm": 0.31158944964408875, "learning_rate": 4.04761164256271e-06, "loss": 0.0199, "step": 149520 }, { "epoch": 1.2098875313536694, "grad_norm": 0.45483264327049255, "learning_rate": 4.04691848313998e-06, "loss": 0.0245, "step": 149530 }, { "epoch": 1.2099684440488714, "grad_norm": 0.5838292241096497, "learning_rate": 4.046225342724449e-06, "loss": 0.0147, "step": 149540 }, { "epoch": 1.210049356744073, "grad_norm": 0.37472325563430786, "learning_rate": 4.0455322213299375e-06, "loss": 0.0295, "step": 149550 }, { "epoch": 1.210130269439275, "grad_norm": 0.14393389225006104, "learning_rate": 4.044839118970273e-06, "loss": 0.024, "step": 149560 }, { "epoch": 1.210211182134477, "grad_norm": 0.35407698154449463, "learning_rate": 4.044146035659275e-06, "loss": 0.0153, "step": 149570 }, { "epoch": 1.2102920948296787, "grad_norm": 0.427718847990036, "learning_rate": 4.043452971410766e-06, "loss": 0.0303, "step": 149580 }, { "epoch": 1.2103730075248806, "grad_norm": 0.40449243783950806, "learning_rate": 4.0427599262385696e-06, "loss": 0.0171, "step": 149590 }, { "epoch": 1.2104539202200826, "grad_norm": 0.4745405614376068, "learning_rate": 4.042066900156504e-06, "loss": 0.0212, "step": 149600 }, { "epoch": 1.2105348329152843, "grad_norm": 0.44516175985336304, "learning_rate": 4.041373893178393e-06, "loss": 0.0153, "step": 149610 }, { "epoch": 1.2106157456104862, "grad_norm": 0.3253158628940582, "learning_rate": 4.040680905318056e-06, "loss": 0.0211, "step": 149620 }, { "epoch": 1.2106966583056882, "grad_norm": 0.4321900010108948, "learning_rate": 4.039987936589312e-06, "loss": 0.0288, "step": 149630 }, { "epoch": 1.2107775710008901, "grad_norm": 0.33505386114120483, "learning_rate": 4.039294987005982e-06, "loss": 0.0265, "step": 149640 }, { "epoch": 1.2108584836960918, "grad_norm": 0.23510834574699402, "learning_rate": 4.038602056581886e-06, "loss": 0.0139, "step": 149650 }, { "epoch": 1.2109393963912938, "grad_norm": 0.3384837806224823, "learning_rate": 4.037909145330843e-06, "loss": 0.0253, "step": 149660 }, { "epoch": 1.2110203090864957, "grad_norm": 0.2284383922815323, "learning_rate": 4.037216253266669e-06, "loss": 0.031, "step": 149670 }, { "epoch": 1.2111012217816977, "grad_norm": 0.5547754168510437, "learning_rate": 4.036523380403186e-06, "loss": 0.0141, "step": 149680 }, { "epoch": 1.2111821344768994, "grad_norm": 0.32501810789108276, "learning_rate": 4.035830526754211e-06, "loss": 0.0203, "step": 149690 }, { "epoch": 1.2112630471721013, "grad_norm": 0.7704153060913086, "learning_rate": 4.035137692333559e-06, "loss": 0.0268, "step": 149700 }, { "epoch": 1.2113439598673033, "grad_norm": 0.175121009349823, "learning_rate": 4.034444877155048e-06, "loss": 0.021, "step": 149710 }, { "epoch": 1.211424872562505, "grad_norm": 0.2783338129520416, "learning_rate": 4.033752081232498e-06, "loss": 0.0121, "step": 149720 }, { "epoch": 1.211505785257707, "grad_norm": 0.4965261220932007, "learning_rate": 4.033059304579721e-06, "loss": 0.0234, "step": 149730 }, { "epoch": 1.2115866979529089, "grad_norm": 0.6668272614479065, "learning_rate": 4.0323665472105356e-06, "loss": 0.0232, "step": 149740 }, { "epoch": 1.2116676106481106, "grad_norm": 0.2252085953950882, "learning_rate": 4.031673809138758e-06, "loss": 0.0248, "step": 149750 }, { "epoch": 1.2117485233433125, "grad_norm": 0.34354400634765625, "learning_rate": 4.030981090378199e-06, "loss": 0.0178, "step": 149760 }, { "epoch": 1.2118294360385145, "grad_norm": 0.3199363946914673, "learning_rate": 4.030288390942678e-06, "loss": 0.0293, "step": 149770 }, { "epoch": 1.2119103487337164, "grad_norm": 0.14557351171970367, "learning_rate": 4.02959571084601e-06, "loss": 0.0222, "step": 149780 }, { "epoch": 1.211991261428918, "grad_norm": 0.18974314630031586, "learning_rate": 4.0289030501020034e-06, "loss": 0.0167, "step": 149790 }, { "epoch": 1.21207217412412, "grad_norm": 0.13894835114479065, "learning_rate": 4.028210408724478e-06, "loss": 0.0242, "step": 149800 }, { "epoch": 1.212153086819322, "grad_norm": 0.43763303756713867, "learning_rate": 4.027517786727244e-06, "loss": 0.0227, "step": 149810 }, { "epoch": 1.212233999514524, "grad_norm": 0.18177556991577148, "learning_rate": 4.026825184124113e-06, "loss": 0.0115, "step": 149820 }, { "epoch": 1.2123149122097256, "grad_norm": 0.13257966935634613, "learning_rate": 4.0261326009288995e-06, "loss": 0.023, "step": 149830 }, { "epoch": 1.2123958249049276, "grad_norm": 0.4230838119983673, "learning_rate": 4.025440037155416e-06, "loss": 0.0158, "step": 149840 }, { "epoch": 1.2124767376001295, "grad_norm": 0.3371661603450775, "learning_rate": 4.024747492817475e-06, "loss": 0.0316, "step": 149850 }, { "epoch": 1.2125576502953312, "grad_norm": 0.3160911202430725, "learning_rate": 4.024054967928885e-06, "loss": 0.0134, "step": 149860 }, { "epoch": 1.2126385629905332, "grad_norm": 0.4259748160839081, "learning_rate": 4.0233624625034585e-06, "loss": 0.0237, "step": 149870 }, { "epoch": 1.2127194756857351, "grad_norm": 0.4691063463687897, "learning_rate": 4.022669976555007e-06, "loss": 0.017, "step": 149880 }, { "epoch": 1.2128003883809368, "grad_norm": 0.30750495195388794, "learning_rate": 4.021977510097338e-06, "loss": 0.0177, "step": 149890 }, { "epoch": 1.2128813010761388, "grad_norm": 0.24025648832321167, "learning_rate": 4.021285063144262e-06, "loss": 0.0311, "step": 149900 }, { "epoch": 1.2129622137713407, "grad_norm": 0.2766917943954468, "learning_rate": 4.020592635709592e-06, "loss": 0.0249, "step": 149910 }, { "epoch": 1.2130431264665427, "grad_norm": 0.6278117299079895, "learning_rate": 4.019900227807133e-06, "loss": 0.0307, "step": 149920 }, { "epoch": 1.2131240391617444, "grad_norm": 0.42662763595581055, "learning_rate": 4.019207839450694e-06, "loss": 0.0255, "step": 149930 }, { "epoch": 1.2132049518569463, "grad_norm": 0.13771983981132507, "learning_rate": 4.018515470654087e-06, "loss": 0.0271, "step": 149940 }, { "epoch": 1.2132858645521483, "grad_norm": 0.08254511654376984, "learning_rate": 4.017823121431115e-06, "loss": 0.0155, "step": 149950 }, { "epoch": 1.2133667772473502, "grad_norm": 0.27520012855529785, "learning_rate": 4.017130791795586e-06, "loss": 0.0201, "step": 149960 }, { "epoch": 1.213447689942552, "grad_norm": 0.5604733228683472, "learning_rate": 4.01643848176131e-06, "loss": 0.0206, "step": 149970 }, { "epoch": 1.2135286026377539, "grad_norm": 0.14049823582172394, "learning_rate": 4.015746191342091e-06, "loss": 0.0152, "step": 149980 }, { "epoch": 1.2136095153329558, "grad_norm": 0.2295580953359604, "learning_rate": 4.015053920551737e-06, "loss": 0.0273, "step": 149990 }, { "epoch": 1.2136904280281575, "grad_norm": 0.3060203790664673, "learning_rate": 4.014361669404056e-06, "loss": 0.0172, "step": 150000 }, { "epoch": 1.2137713407233595, "grad_norm": 0.1726115494966507, "learning_rate": 4.013669437912846e-06, "loss": 0.0313, "step": 150010 }, { "epoch": 1.2138522534185614, "grad_norm": 0.23589858412742615, "learning_rate": 4.012977226091919e-06, "loss": 0.0136, "step": 150020 }, { "epoch": 1.2139331661137633, "grad_norm": 0.1480366289615631, "learning_rate": 4.012285033955077e-06, "loss": 0.0173, "step": 150030 }, { "epoch": 1.214014078808965, "grad_norm": 0.30789634585380554, "learning_rate": 4.011592861516126e-06, "loss": 0.0118, "step": 150040 }, { "epoch": 1.214094991504167, "grad_norm": 0.18087148666381836, "learning_rate": 4.010900708788868e-06, "loss": 0.0146, "step": 150050 }, { "epoch": 1.214175904199369, "grad_norm": 0.3124147057533264, "learning_rate": 4.010208575787107e-06, "loss": 0.0151, "step": 150060 }, { "epoch": 1.2142568168945709, "grad_norm": 0.27121391892433167, "learning_rate": 4.009516462524647e-06, "loss": 0.0196, "step": 150070 }, { "epoch": 1.2143377295897726, "grad_norm": 0.3890949487686157, "learning_rate": 4.008824369015289e-06, "loss": 0.0175, "step": 150080 }, { "epoch": 1.2144186422849745, "grad_norm": 0.13274478912353516, "learning_rate": 4.008132295272838e-06, "loss": 0.0195, "step": 150090 }, { "epoch": 1.2144995549801765, "grad_norm": 0.49480921030044556, "learning_rate": 4.0074402413110954e-06, "loss": 0.0192, "step": 150100 }, { "epoch": 1.2145804676753782, "grad_norm": 0.7244926691055298, "learning_rate": 4.006748207143859e-06, "loss": 0.0363, "step": 150110 }, { "epoch": 1.2146613803705801, "grad_norm": 0.48545655608177185, "learning_rate": 4.006056192784933e-06, "loss": 0.0218, "step": 150120 }, { "epoch": 1.214742293065782, "grad_norm": 0.3733636438846588, "learning_rate": 4.00536419824812e-06, "loss": 0.0216, "step": 150130 }, { "epoch": 1.2148232057609838, "grad_norm": 0.7040488719940186, "learning_rate": 4.004672223547218e-06, "loss": 0.0242, "step": 150140 }, { "epoch": 1.2149041184561857, "grad_norm": 0.40384212136268616, "learning_rate": 4.003980268696025e-06, "loss": 0.0155, "step": 150150 }, { "epoch": 1.2149850311513877, "grad_norm": 0.24832400679588318, "learning_rate": 4.003288333708346e-06, "loss": 0.0314, "step": 150160 }, { "epoch": 1.2150659438465896, "grad_norm": 0.40383464097976685, "learning_rate": 4.002596418597975e-06, "loss": 0.0177, "step": 150170 }, { "epoch": 1.2151468565417913, "grad_norm": 0.5746826529502869, "learning_rate": 4.001904523378712e-06, "loss": 0.0155, "step": 150180 }, { "epoch": 1.2152277692369933, "grad_norm": 0.11052066087722778, "learning_rate": 4.001212648064357e-06, "loss": 0.0125, "step": 150190 }, { "epoch": 1.2153086819321952, "grad_norm": 0.186288520693779, "learning_rate": 4.000520792668708e-06, "loss": 0.0204, "step": 150200 }, { "epoch": 1.2153895946273972, "grad_norm": 0.41241076588630676, "learning_rate": 3.999828957205562e-06, "loss": 0.018, "step": 150210 }, { "epoch": 1.2154705073225989, "grad_norm": 0.07306230813264847, "learning_rate": 3.999137141688714e-06, "loss": 0.0192, "step": 150220 }, { "epoch": 1.2155514200178008, "grad_norm": 0.2739337682723999, "learning_rate": 3.998445346131966e-06, "loss": 0.0175, "step": 150230 }, { "epoch": 1.2156323327130028, "grad_norm": 0.6005316376686096, "learning_rate": 3.9977535705491094e-06, "loss": 0.0176, "step": 150240 }, { "epoch": 1.2157132454082045, "grad_norm": 0.49866238236427307, "learning_rate": 3.997061814953941e-06, "loss": 0.0255, "step": 150250 }, { "epoch": 1.2157941581034064, "grad_norm": 0.6046014428138733, "learning_rate": 3.996370079360259e-06, "loss": 0.016, "step": 150260 }, { "epoch": 1.2158750707986083, "grad_norm": 0.11002788692712784, "learning_rate": 3.9956783637818556e-06, "loss": 0.0247, "step": 150270 }, { "epoch": 1.21595598349381, "grad_norm": 0.4149967432022095, "learning_rate": 3.9949866682325285e-06, "loss": 0.012, "step": 150280 }, { "epoch": 1.216036896189012, "grad_norm": 0.2520742118358612, "learning_rate": 3.994294992726071e-06, "loss": 0.0152, "step": 150290 }, { "epoch": 1.216117808884214, "grad_norm": 0.5172634720802307, "learning_rate": 3.993603337276274e-06, "loss": 0.0211, "step": 150300 }, { "epoch": 1.2161987215794159, "grad_norm": 0.5913721323013306, "learning_rate": 3.992911701896935e-06, "loss": 0.0269, "step": 150310 }, { "epoch": 1.2162796342746176, "grad_norm": 0.5354458093643188, "learning_rate": 3.992220086601848e-06, "loss": 0.0167, "step": 150320 }, { "epoch": 1.2163605469698195, "grad_norm": 0.28820884227752686, "learning_rate": 3.991528491404802e-06, "loss": 0.0208, "step": 150330 }, { "epoch": 1.2164414596650215, "grad_norm": 0.49973592162132263, "learning_rate": 3.990836916319591e-06, "loss": 0.023, "step": 150340 }, { "epoch": 1.2165223723602234, "grad_norm": 0.4996718466281891, "learning_rate": 3.990145361360009e-06, "loss": 0.0251, "step": 150350 }, { "epoch": 1.2166032850554251, "grad_norm": 0.15489165484905243, "learning_rate": 3.989453826539845e-06, "loss": 0.0227, "step": 150360 }, { "epoch": 1.216684197750627, "grad_norm": 0.2928808331489563, "learning_rate": 3.98876231187289e-06, "loss": 0.0313, "step": 150370 }, { "epoch": 1.216765110445829, "grad_norm": 0.15890580415725708, "learning_rate": 3.988070817372936e-06, "loss": 0.033, "step": 150380 }, { "epoch": 1.2168460231410307, "grad_norm": 0.35363584756851196, "learning_rate": 3.987379343053775e-06, "loss": 0.0165, "step": 150390 }, { "epoch": 1.2169269358362327, "grad_norm": 0.2141926884651184, "learning_rate": 3.986687888929195e-06, "loss": 0.0232, "step": 150400 }, { "epoch": 1.2170078485314346, "grad_norm": 0.18100760877132416, "learning_rate": 3.985996455012983e-06, "loss": 0.0118, "step": 150410 }, { "epoch": 1.2170887612266363, "grad_norm": 0.2773754596710205, "learning_rate": 3.985305041318936e-06, "loss": 0.0234, "step": 150420 }, { "epoch": 1.2171696739218383, "grad_norm": 0.19587042927742004, "learning_rate": 3.9846136478608345e-06, "loss": 0.0223, "step": 150430 }, { "epoch": 1.2172505866170402, "grad_norm": 0.3284485936164856, "learning_rate": 3.98392227465247e-06, "loss": 0.015, "step": 150440 }, { "epoch": 1.2173314993122422, "grad_norm": 0.6557078957557678, "learning_rate": 3.983230921707632e-06, "loss": 0.0209, "step": 150450 }, { "epoch": 1.2174124120074439, "grad_norm": 0.5972317457199097, "learning_rate": 3.982539589040105e-06, "loss": 0.0197, "step": 150460 }, { "epoch": 1.2174933247026458, "grad_norm": 0.5606015920639038, "learning_rate": 3.98184827666368e-06, "loss": 0.0283, "step": 150470 }, { "epoch": 1.2175742373978478, "grad_norm": 0.31208929419517517, "learning_rate": 3.981156984592142e-06, "loss": 0.0197, "step": 150480 }, { "epoch": 1.2176551500930497, "grad_norm": 0.20360125601291656, "learning_rate": 3.9804657128392745e-06, "loss": 0.0156, "step": 150490 }, { "epoch": 1.2177360627882514, "grad_norm": 0.5540934801101685, "learning_rate": 3.979774461418868e-06, "loss": 0.0185, "step": 150500 }, { "epoch": 1.2178169754834534, "grad_norm": 0.4169832170009613, "learning_rate": 3.979083230344706e-06, "loss": 0.0408, "step": 150510 }, { "epoch": 1.2178978881786553, "grad_norm": 0.3578546643257141, "learning_rate": 3.978392019630573e-06, "loss": 0.017, "step": 150520 }, { "epoch": 1.217978800873857, "grad_norm": 0.28488704562187195, "learning_rate": 3.977700829290253e-06, "loss": 0.0196, "step": 150530 }, { "epoch": 1.218059713569059, "grad_norm": 0.7908775210380554, "learning_rate": 3.977009659337535e-06, "loss": 0.0209, "step": 150540 }, { "epoch": 1.218140626264261, "grad_norm": 0.0018985794158652425, "learning_rate": 3.976318509786199e-06, "loss": 0.0092, "step": 150550 }, { "epoch": 1.2182215389594628, "grad_norm": 0.26969385147094727, "learning_rate": 3.975627380650027e-06, "loss": 0.0187, "step": 150560 }, { "epoch": 1.2183024516546646, "grad_norm": 0.26303038001060486, "learning_rate": 3.974936271942806e-06, "loss": 0.0233, "step": 150570 }, { "epoch": 1.2183833643498665, "grad_norm": 0.14549529552459717, "learning_rate": 3.974245183678318e-06, "loss": 0.017, "step": 150580 }, { "epoch": 1.2184642770450684, "grad_norm": 0.08631398528814316, "learning_rate": 3.9735541158703425e-06, "loss": 0.0099, "step": 150590 }, { "epoch": 1.2185451897402702, "grad_norm": 0.19493067264556885, "learning_rate": 3.972863068532663e-06, "loss": 0.0244, "step": 150600 }, { "epoch": 1.218626102435472, "grad_norm": 0.0023138385731726885, "learning_rate": 3.972172041679063e-06, "loss": 0.0234, "step": 150610 }, { "epoch": 1.218707015130674, "grad_norm": 0.6653045415878296, "learning_rate": 3.971481035323322e-06, "loss": 0.0211, "step": 150620 }, { "epoch": 1.218787927825876, "grad_norm": 0.19374944269657135, "learning_rate": 3.970790049479217e-06, "loss": 0.0169, "step": 150630 }, { "epoch": 1.2188688405210777, "grad_norm": 0.4334172010421753, "learning_rate": 3.970099084160536e-06, "loss": 0.0273, "step": 150640 }, { "epoch": 1.2189497532162796, "grad_norm": 0.3641863465309143, "learning_rate": 3.969408139381053e-06, "loss": 0.0166, "step": 150650 }, { "epoch": 1.2190306659114816, "grad_norm": 0.31710344552993774, "learning_rate": 3.968717215154547e-06, "loss": 0.0202, "step": 150660 }, { "epoch": 1.2191115786066833, "grad_norm": 0.5072859525680542, "learning_rate": 3.968026311494802e-06, "loss": 0.025, "step": 150670 }, { "epoch": 1.2191924913018852, "grad_norm": 0.33346492052078247, "learning_rate": 3.967335428415591e-06, "loss": 0.0243, "step": 150680 }, { "epoch": 1.2192734039970872, "grad_norm": 0.6850444078445435, "learning_rate": 3.966644565930697e-06, "loss": 0.0154, "step": 150690 }, { "epoch": 1.219354316692289, "grad_norm": 0.39247626066207886, "learning_rate": 3.965953724053896e-06, "loss": 0.0178, "step": 150700 }, { "epoch": 1.2194352293874908, "grad_norm": 0.2295232117176056, "learning_rate": 3.965262902798962e-06, "loss": 0.0179, "step": 150710 }, { "epoch": 1.2195161420826928, "grad_norm": 0.2237158715724945, "learning_rate": 3.964572102179678e-06, "loss": 0.0207, "step": 150720 }, { "epoch": 1.2195970547778947, "grad_norm": 0.2825532853603363, "learning_rate": 3.963881322209814e-06, "loss": 0.0201, "step": 150730 }, { "epoch": 1.2196779674730966, "grad_norm": 0.1776670515537262, "learning_rate": 3.963190562903154e-06, "loss": 0.0342, "step": 150740 }, { "epoch": 1.2197588801682984, "grad_norm": 0.09419858455657959, "learning_rate": 3.962499824273465e-06, "loss": 0.0263, "step": 150750 }, { "epoch": 1.2198397928635003, "grad_norm": 0.23121525347232819, "learning_rate": 3.96180910633453e-06, "loss": 0.0104, "step": 150760 }, { "epoch": 1.2199207055587022, "grad_norm": 0.3121841847896576, "learning_rate": 3.961118409100121e-06, "loss": 0.0143, "step": 150770 }, { "epoch": 1.220001618253904, "grad_norm": 0.0803692638874054, "learning_rate": 3.96042773258401e-06, "loss": 0.0156, "step": 150780 }, { "epoch": 1.220082530949106, "grad_norm": 0.4087449312210083, "learning_rate": 3.959737076799974e-06, "loss": 0.017, "step": 150790 }, { "epoch": 1.2201634436443078, "grad_norm": 0.598293662071228, "learning_rate": 3.959046441761787e-06, "loss": 0.0251, "step": 150800 }, { "epoch": 1.2202443563395096, "grad_norm": 0.20259100198745728, "learning_rate": 3.95835582748322e-06, "loss": 0.0166, "step": 150810 }, { "epoch": 1.2203252690347115, "grad_norm": 0.2772304117679596, "learning_rate": 3.957665233978046e-06, "loss": 0.0237, "step": 150820 }, { "epoch": 1.2204061817299134, "grad_norm": 0.8506708741188049, "learning_rate": 3.956974661260041e-06, "loss": 0.0206, "step": 150830 }, { "epoch": 1.2204870944251154, "grad_norm": 0.5206201076507568, "learning_rate": 3.956284109342973e-06, "loss": 0.0241, "step": 150840 }, { "epoch": 1.220568007120317, "grad_norm": 0.5827845335006714, "learning_rate": 3.955593578240614e-06, "loss": 0.0194, "step": 150850 }, { "epoch": 1.220648919815519, "grad_norm": 0.44017812609672546, "learning_rate": 3.9549030679667384e-06, "loss": 0.0184, "step": 150860 }, { "epoch": 1.220729832510721, "grad_norm": 0.5751535296440125, "learning_rate": 3.954212578535113e-06, "loss": 0.0259, "step": 150870 }, { "epoch": 1.220810745205923, "grad_norm": 0.5799734592437744, "learning_rate": 3.953522109959511e-06, "loss": 0.021, "step": 150880 }, { "epoch": 1.2208916579011246, "grad_norm": 0.1955631971359253, "learning_rate": 3.952831662253703e-06, "loss": 0.0208, "step": 150890 }, { "epoch": 1.2209725705963266, "grad_norm": 0.24551981687545776, "learning_rate": 3.952141235431454e-06, "loss": 0.0165, "step": 150900 }, { "epoch": 1.2210534832915285, "grad_norm": 0.45053353905677795, "learning_rate": 3.951450829506537e-06, "loss": 0.0209, "step": 150910 }, { "epoch": 1.2211343959867302, "grad_norm": 0.23547901213169098, "learning_rate": 3.950760444492719e-06, "loss": 0.0305, "step": 150920 }, { "epoch": 1.2212153086819322, "grad_norm": 0.48753878474235535, "learning_rate": 3.950070080403769e-06, "loss": 0.0189, "step": 150930 }, { "epoch": 1.2212962213771341, "grad_norm": 0.34899041056632996, "learning_rate": 3.949379737253455e-06, "loss": 0.0164, "step": 150940 }, { "epoch": 1.2213771340723358, "grad_norm": 0.4148293733596802, "learning_rate": 3.948689415055544e-06, "loss": 0.0285, "step": 150950 }, { "epoch": 1.2214580467675378, "grad_norm": 0.3162224292755127, "learning_rate": 3.9479991138238046e-06, "loss": 0.0135, "step": 150960 }, { "epoch": 1.2215389594627397, "grad_norm": 1.8437130451202393, "learning_rate": 3.947308833572e-06, "loss": 0.0173, "step": 150970 }, { "epoch": 1.2216198721579417, "grad_norm": 0.5197650194168091, "learning_rate": 3.9466185743139e-06, "loss": 0.0165, "step": 150980 }, { "epoch": 1.2217007848531434, "grad_norm": 0.37036916613578796, "learning_rate": 3.945928336063269e-06, "loss": 0.0196, "step": 150990 }, { "epoch": 1.2217816975483453, "grad_norm": 0.481898695230484, "learning_rate": 3.94523811883387e-06, "loss": 0.0351, "step": 151000 }, { "epoch": 1.2218626102435473, "grad_norm": 0.5629045963287354, "learning_rate": 3.94454792263947e-06, "loss": 0.0266, "step": 151010 }, { "epoch": 1.2219435229387492, "grad_norm": 0.3437831997871399, "learning_rate": 3.943857747493836e-06, "loss": 0.0203, "step": 151020 }, { "epoch": 1.222024435633951, "grad_norm": 0.23325717449188232, "learning_rate": 3.943167593410728e-06, "loss": 0.0224, "step": 151030 }, { "epoch": 1.2221053483291529, "grad_norm": 0.44017094373703003, "learning_rate": 3.942477460403911e-06, "loss": 0.0173, "step": 151040 }, { "epoch": 1.2221862610243548, "grad_norm": 0.8214559555053711, "learning_rate": 3.94178734848715e-06, "loss": 0.0252, "step": 151050 }, { "epoch": 1.2222671737195565, "grad_norm": 0.3434585928916931, "learning_rate": 3.941097257674206e-06, "loss": 0.0198, "step": 151060 }, { "epoch": 1.2223480864147585, "grad_norm": 0.43471789360046387, "learning_rate": 3.94040718797884e-06, "loss": 0.0216, "step": 151070 }, { "epoch": 1.2224289991099604, "grad_norm": 0.4933444857597351, "learning_rate": 3.9397171394148185e-06, "loss": 0.0244, "step": 151080 }, { "epoch": 1.222509911805162, "grad_norm": 1.015724778175354, "learning_rate": 3.939027111995898e-06, "loss": 0.0211, "step": 151090 }, { "epoch": 1.222590824500364, "grad_norm": 0.28849583864212036, "learning_rate": 3.938337105735842e-06, "loss": 0.0181, "step": 151100 }, { "epoch": 1.222671737195566, "grad_norm": 0.48875924944877625, "learning_rate": 3.937647120648411e-06, "loss": 0.0288, "step": 151110 }, { "epoch": 1.222752649890768, "grad_norm": 0.5255448222160339, "learning_rate": 3.936957156747367e-06, "loss": 0.0227, "step": 151120 }, { "epoch": 1.2228335625859696, "grad_norm": 0.2974431812763214, "learning_rate": 3.936267214046469e-06, "loss": 0.0207, "step": 151130 }, { "epoch": 1.2229144752811716, "grad_norm": 0.4052809476852417, "learning_rate": 3.935577292559472e-06, "loss": 0.0203, "step": 151140 }, { "epoch": 1.2229953879763735, "grad_norm": 0.28484055399894714, "learning_rate": 3.934887392300143e-06, "loss": 0.0262, "step": 151150 }, { "epoch": 1.2230763006715755, "grad_norm": 0.39663824439048767, "learning_rate": 3.934197513282234e-06, "loss": 0.0195, "step": 151160 }, { "epoch": 1.2231572133667772, "grad_norm": 0.5186317563056946, "learning_rate": 3.9335076555195065e-06, "loss": 0.0194, "step": 151170 }, { "epoch": 1.2232381260619791, "grad_norm": 0.3426670730113983, "learning_rate": 3.932817819025718e-06, "loss": 0.0187, "step": 151180 }, { "epoch": 1.223319038757181, "grad_norm": 0.7722316384315491, "learning_rate": 3.932128003814623e-06, "loss": 0.0274, "step": 151190 }, { "epoch": 1.2233999514523828, "grad_norm": 0.5293872356414795, "learning_rate": 3.931438209899982e-06, "loss": 0.0196, "step": 151200 }, { "epoch": 1.2234808641475847, "grad_norm": 0.26059776544570923, "learning_rate": 3.9307484372955516e-06, "loss": 0.0296, "step": 151210 }, { "epoch": 1.2235617768427867, "grad_norm": 0.044409677386283875, "learning_rate": 3.930058686015083e-06, "loss": 0.0315, "step": 151220 }, { "epoch": 1.2236426895379886, "grad_norm": 0.24180839955806732, "learning_rate": 3.929368956072335e-06, "loss": 0.014, "step": 151230 }, { "epoch": 1.2237236022331903, "grad_norm": 0.5533067584037781, "learning_rate": 3.928679247481066e-06, "loss": 0.0323, "step": 151240 }, { "epoch": 1.2238045149283923, "grad_norm": 0.47295287251472473, "learning_rate": 3.927989560255027e-06, "loss": 0.0166, "step": 151250 }, { "epoch": 1.2238854276235942, "grad_norm": 0.5371041297912598, "learning_rate": 3.927299894407972e-06, "loss": 0.0321, "step": 151260 }, { "epoch": 1.2239663403187961, "grad_norm": 0.23228901624679565, "learning_rate": 3.926610249953658e-06, "loss": 0.0166, "step": 151270 }, { "epoch": 1.2240472530139979, "grad_norm": 0.4866563677787781, "learning_rate": 3.925920626905834e-06, "loss": 0.0251, "step": 151280 }, { "epoch": 1.2241281657091998, "grad_norm": 0.21233372390270233, "learning_rate": 3.925231025278257e-06, "loss": 0.0167, "step": 151290 }, { "epoch": 1.2242090784044017, "grad_norm": 0.5492976307868958, "learning_rate": 3.924541445084677e-06, "loss": 0.0253, "step": 151300 }, { "epoch": 1.2242899910996035, "grad_norm": 0.7704167366027832, "learning_rate": 3.92385188633885e-06, "loss": 0.0177, "step": 151310 }, { "epoch": 1.2243709037948054, "grad_norm": 0.4066074788570404, "learning_rate": 3.923162349054523e-06, "loss": 0.0329, "step": 151320 }, { "epoch": 1.2244518164900073, "grad_norm": 0.2517164349555969, "learning_rate": 3.92247283324545e-06, "loss": 0.0208, "step": 151330 }, { "epoch": 1.224532729185209, "grad_norm": 0.47917068004608154, "learning_rate": 3.921783338925383e-06, "loss": 0.0304, "step": 151340 }, { "epoch": 1.224613641880411, "grad_norm": 0.3800544738769531, "learning_rate": 3.9210938661080696e-06, "loss": 0.0182, "step": 151350 }, { "epoch": 1.224694554575613, "grad_norm": 0.5614285469055176, "learning_rate": 3.920404414807262e-06, "loss": 0.0176, "step": 151360 }, { "epoch": 1.2247754672708149, "grad_norm": 0.6563559174537659, "learning_rate": 3.91971498503671e-06, "loss": 0.0241, "step": 151370 }, { "epoch": 1.2248563799660166, "grad_norm": 0.2655175030231476, "learning_rate": 3.9190255768101604e-06, "loss": 0.0179, "step": 151380 }, { "epoch": 1.2249372926612185, "grad_norm": 0.46466097235679626, "learning_rate": 3.918336190141364e-06, "loss": 0.0308, "step": 151390 }, { "epoch": 1.2250182053564205, "grad_norm": 0.31612735986709595, "learning_rate": 3.917646825044071e-06, "loss": 0.0207, "step": 151400 }, { "epoch": 1.2250991180516224, "grad_norm": 0.3561636209487915, "learning_rate": 3.916957481532024e-06, "loss": 0.0364, "step": 151410 }, { "epoch": 1.2251800307468241, "grad_norm": 0.22970663011074066, "learning_rate": 3.916268159618974e-06, "loss": 0.0222, "step": 151420 }, { "epoch": 1.225260943442026, "grad_norm": 0.5507858395576477, "learning_rate": 3.91557885931867e-06, "loss": 0.0293, "step": 151430 }, { "epoch": 1.225341856137228, "grad_norm": 0.22858211398124695, "learning_rate": 3.914889580644854e-06, "loss": 0.0215, "step": 151440 }, { "epoch": 1.2254227688324297, "grad_norm": 0.26767048239707947, "learning_rate": 3.914200323611275e-06, "loss": 0.0252, "step": 151450 }, { "epoch": 1.2255036815276317, "grad_norm": 0.5438433885574341, "learning_rate": 3.913511088231679e-06, "loss": 0.0188, "step": 151460 }, { "epoch": 1.2255845942228336, "grad_norm": 0.40637820959091187, "learning_rate": 3.912821874519812e-06, "loss": 0.0184, "step": 151470 }, { "epoch": 1.2256655069180353, "grad_norm": 0.3445482552051544, "learning_rate": 3.912132682489415e-06, "loss": 0.0232, "step": 151480 }, { "epoch": 1.2257464196132373, "grad_norm": 0.3123988211154938, "learning_rate": 3.911443512154235e-06, "loss": 0.0238, "step": 151490 }, { "epoch": 1.2258273323084392, "grad_norm": 0.13373634219169617, "learning_rate": 3.910754363528018e-06, "loss": 0.0125, "step": 151500 }, { "epoch": 1.2259082450036412, "grad_norm": 0.25084760785102844, "learning_rate": 3.910065236624505e-06, "loss": 0.0205, "step": 151510 }, { "epoch": 1.2259891576988429, "grad_norm": 0.38474082946777344, "learning_rate": 3.9093761314574394e-06, "loss": 0.0177, "step": 151520 }, { "epoch": 1.2260700703940448, "grad_norm": 0.40786436200141907, "learning_rate": 3.908687048040566e-06, "loss": 0.0267, "step": 151530 }, { "epoch": 1.2261509830892467, "grad_norm": 0.20169205963611603, "learning_rate": 3.907997986387624e-06, "loss": 0.0108, "step": 151540 }, { "epoch": 1.2262318957844487, "grad_norm": 0.28287753462791443, "learning_rate": 3.907308946512357e-06, "loss": 0.0136, "step": 151550 }, { "epoch": 1.2263128084796504, "grad_norm": 0.33374837040901184, "learning_rate": 3.9066199284285075e-06, "loss": 0.019, "step": 151560 }, { "epoch": 1.2263937211748523, "grad_norm": 0.4564754068851471, "learning_rate": 3.905930932149813e-06, "loss": 0.0247, "step": 151570 }, { "epoch": 1.2264746338700543, "grad_norm": 0.29811128973960876, "learning_rate": 3.905241957690018e-06, "loss": 0.0171, "step": 151580 }, { "epoch": 1.226555546565256, "grad_norm": 0.46254751086235046, "learning_rate": 3.904553005062862e-06, "loss": 0.0183, "step": 151590 }, { "epoch": 1.226636459260458, "grad_norm": 0.5443646311759949, "learning_rate": 3.903864074282081e-06, "loss": 0.0238, "step": 151600 }, { "epoch": 1.2267173719556599, "grad_norm": 0.29433050751686096, "learning_rate": 3.903175165361419e-06, "loss": 0.0148, "step": 151610 }, { "epoch": 1.2267982846508616, "grad_norm": 0.4704108238220215, "learning_rate": 3.902486278314614e-06, "loss": 0.0164, "step": 151620 }, { "epoch": 1.2268791973460635, "grad_norm": 0.9206986427307129, "learning_rate": 3.9017974131554e-06, "loss": 0.029, "step": 151630 }, { "epoch": 1.2269601100412655, "grad_norm": 0.8642820119857788, "learning_rate": 3.901108569897519e-06, "loss": 0.0279, "step": 151640 }, { "epoch": 1.2270410227364674, "grad_norm": 0.42887914180755615, "learning_rate": 3.900419748554709e-06, "loss": 0.0177, "step": 151650 }, { "epoch": 1.2271219354316691, "grad_norm": 0.4574354887008667, "learning_rate": 3.899730949140706e-06, "loss": 0.0281, "step": 151660 }, { "epoch": 1.227202848126871, "grad_norm": 0.3899747133255005, "learning_rate": 3.899042171669245e-06, "loss": 0.0153, "step": 151670 }, { "epoch": 1.227283760822073, "grad_norm": 0.5453084111213684, "learning_rate": 3.8983534161540645e-06, "loss": 0.0181, "step": 151680 }, { "epoch": 1.227364673517275, "grad_norm": 0.590152382850647, "learning_rate": 3.8976646826089e-06, "loss": 0.0171, "step": 151690 }, { "epoch": 1.2274455862124767, "grad_norm": 0.33222347497940063, "learning_rate": 3.896975971047485e-06, "loss": 0.0177, "step": 151700 }, { "epoch": 1.2275264989076786, "grad_norm": 0.3739974796772003, "learning_rate": 3.896287281483555e-06, "loss": 0.0192, "step": 151710 }, { "epoch": 1.2276074116028806, "grad_norm": 0.23902949690818787, "learning_rate": 3.895598613930848e-06, "loss": 0.0187, "step": 151720 }, { "epoch": 1.2276883242980823, "grad_norm": 0.5325513482093811, "learning_rate": 3.894909968403093e-06, "loss": 0.0187, "step": 151730 }, { "epoch": 1.2277692369932842, "grad_norm": 0.4350929260253906, "learning_rate": 3.894221344914025e-06, "loss": 0.0222, "step": 151740 }, { "epoch": 1.2278501496884862, "grad_norm": 0.3434826731681824, "learning_rate": 3.893532743477381e-06, "loss": 0.0136, "step": 151750 }, { "epoch": 1.2279310623836879, "grad_norm": 0.16753849387168884, "learning_rate": 3.892844164106889e-06, "loss": 0.0166, "step": 151760 }, { "epoch": 1.2280119750788898, "grad_norm": 0.2794250249862671, "learning_rate": 3.892155606816281e-06, "loss": 0.0224, "step": 151770 }, { "epoch": 1.2280928877740918, "grad_norm": 0.3644822835922241, "learning_rate": 3.891467071619293e-06, "loss": 0.0248, "step": 151780 }, { "epoch": 1.2281738004692937, "grad_norm": 0.29661160707473755, "learning_rate": 3.890778558529653e-06, "loss": 0.0224, "step": 151790 }, { "epoch": 1.2282547131644954, "grad_norm": 0.12897205352783203, "learning_rate": 3.890090067561093e-06, "loss": 0.0147, "step": 151800 }, { "epoch": 1.2283356258596974, "grad_norm": 0.49752822518348694, "learning_rate": 3.889401598727345e-06, "loss": 0.0274, "step": 151810 }, { "epoch": 1.2284165385548993, "grad_norm": 0.10652884095907211, "learning_rate": 3.888713152042135e-06, "loss": 0.0107, "step": 151820 }, { "epoch": 1.2284974512501012, "grad_norm": 0.4682168960571289, "learning_rate": 3.888024727519196e-06, "loss": 0.0144, "step": 151830 }, { "epoch": 1.228578363945303, "grad_norm": 0.6306813955307007, "learning_rate": 3.887336325172256e-06, "loss": 0.0248, "step": 151840 }, { "epoch": 1.228659276640505, "grad_norm": 0.5691981315612793, "learning_rate": 3.886647945015047e-06, "loss": 0.0182, "step": 151850 }, { "epoch": 1.2287401893357068, "grad_norm": 0.42825964093208313, "learning_rate": 3.88595958706129e-06, "loss": 0.0188, "step": 151860 }, { "epoch": 1.2288211020309086, "grad_norm": 0.17838424444198608, "learning_rate": 3.88527125132472e-06, "loss": 0.0116, "step": 151870 }, { "epoch": 1.2289020147261105, "grad_norm": 0.4559338390827179, "learning_rate": 3.884582937819062e-06, "loss": 0.0196, "step": 151880 }, { "epoch": 1.2289829274213124, "grad_norm": 0.4166780114173889, "learning_rate": 3.883894646558041e-06, "loss": 0.0162, "step": 151890 }, { "epoch": 1.2290638401165144, "grad_norm": 0.3070064187049866, "learning_rate": 3.883206377555384e-06, "loss": 0.0288, "step": 151900 }, { "epoch": 1.229144752811716, "grad_norm": 0.4151087701320648, "learning_rate": 3.88251813082482e-06, "loss": 0.0265, "step": 151910 }, { "epoch": 1.229225665506918, "grad_norm": 0.40896785259246826, "learning_rate": 3.8818299063800735e-06, "loss": 0.0275, "step": 151920 }, { "epoch": 1.22930657820212, "grad_norm": 0.29541096091270447, "learning_rate": 3.881141704234867e-06, "loss": 0.0142, "step": 151930 }, { "epoch": 1.229387490897322, "grad_norm": 0.5166245102882385, "learning_rate": 3.880453524402929e-06, "loss": 0.0209, "step": 151940 }, { "epoch": 1.2294684035925236, "grad_norm": 0.20520561933517456, "learning_rate": 3.879765366897982e-06, "loss": 0.0137, "step": 151950 }, { "epoch": 1.2295493162877256, "grad_norm": 0.34057414531707764, "learning_rate": 3.879077231733747e-06, "loss": 0.0261, "step": 151960 }, { "epoch": 1.2296302289829275, "grad_norm": 0.38478773832321167, "learning_rate": 3.878389118923953e-06, "loss": 0.0255, "step": 151970 }, { "epoch": 1.2297111416781292, "grad_norm": 0.23768776655197144, "learning_rate": 3.877701028482319e-06, "loss": 0.0108, "step": 151980 }, { "epoch": 1.2297920543733312, "grad_norm": 0.08443249017000198, "learning_rate": 3.877012960422569e-06, "loss": 0.0099, "step": 151990 }, { "epoch": 1.229872967068533, "grad_norm": 0.3123520314693451, "learning_rate": 3.876324914758424e-06, "loss": 0.0314, "step": 152000 }, { "epoch": 1.2299538797637348, "grad_norm": 0.48641476035118103, "learning_rate": 3.875636891503608e-06, "loss": 0.0224, "step": 152010 }, { "epoch": 1.2300347924589368, "grad_norm": 0.29829537868499756, "learning_rate": 3.87494889067184e-06, "loss": 0.0424, "step": 152020 }, { "epoch": 1.2301157051541387, "grad_norm": 0.5637738108634949, "learning_rate": 3.87426091227684e-06, "loss": 0.0245, "step": 152030 }, { "epoch": 1.2301966178493406, "grad_norm": 0.21008604764938354, "learning_rate": 3.873572956332332e-06, "loss": 0.0192, "step": 152040 }, { "epoch": 1.2302775305445424, "grad_norm": 0.32685616612434387, "learning_rate": 3.872885022852031e-06, "loss": 0.0304, "step": 152050 }, { "epoch": 1.2303584432397443, "grad_norm": 0.1680598258972168, "learning_rate": 3.87219711184966e-06, "loss": 0.0219, "step": 152060 }, { "epoch": 1.2304393559349462, "grad_norm": 0.1399872750043869, "learning_rate": 3.871509223338938e-06, "loss": 0.0222, "step": 152070 }, { "epoch": 1.2305202686301482, "grad_norm": 0.3852701783180237, "learning_rate": 3.87082135733358e-06, "loss": 0.0164, "step": 152080 }, { "epoch": 1.23060118132535, "grad_norm": 0.48209625482559204, "learning_rate": 3.870133513847306e-06, "loss": 0.0354, "step": 152090 }, { "epoch": 1.2306820940205518, "grad_norm": 0.41720426082611084, "learning_rate": 3.8694456928938365e-06, "loss": 0.019, "step": 152100 }, { "epoch": 1.2307630067157538, "grad_norm": 0.38062265515327454, "learning_rate": 3.868757894486883e-06, "loss": 0.0198, "step": 152110 }, { "epoch": 1.2308439194109555, "grad_norm": 0.5577558279037476, "learning_rate": 3.868070118640165e-06, "loss": 0.0225, "step": 152120 }, { "epoch": 1.2309248321061574, "grad_norm": 0.7421906590461731, "learning_rate": 3.867382365367402e-06, "loss": 0.0151, "step": 152130 }, { "epoch": 1.2310057448013594, "grad_norm": 0.42736512422561646, "learning_rate": 3.866694634682305e-06, "loss": 0.0255, "step": 152140 }, { "epoch": 1.231086657496561, "grad_norm": 0.6660416722297668, "learning_rate": 3.8660069265985896e-06, "loss": 0.0258, "step": 152150 }, { "epoch": 1.231167570191763, "grad_norm": 0.4953797161579132, "learning_rate": 3.8653192411299746e-06, "loss": 0.0146, "step": 152160 }, { "epoch": 1.231248482886965, "grad_norm": 0.5320931673049927, "learning_rate": 3.864631578290171e-06, "loss": 0.0282, "step": 152170 }, { "epoch": 1.231329395582167, "grad_norm": 0.7412394285202026, "learning_rate": 3.863943938092892e-06, "loss": 0.0329, "step": 152180 }, { "epoch": 1.2314103082773686, "grad_norm": 0.6826586723327637, "learning_rate": 3.863256320551854e-06, "loss": 0.0229, "step": 152190 }, { "epoch": 1.2314912209725706, "grad_norm": 0.6704111099243164, "learning_rate": 3.86256872568077e-06, "loss": 0.0212, "step": 152200 }, { "epoch": 1.2315721336677725, "grad_norm": 0.27940237522125244, "learning_rate": 3.86188115349335e-06, "loss": 0.0241, "step": 152210 }, { "epoch": 1.2316530463629745, "grad_norm": 0.3386733829975128, "learning_rate": 3.861193604003307e-06, "loss": 0.0199, "step": 152220 }, { "epoch": 1.2317339590581762, "grad_norm": 0.3769756555557251, "learning_rate": 3.860506077224357e-06, "loss": 0.0142, "step": 152230 }, { "epoch": 1.2318148717533781, "grad_norm": 0.4169459939002991, "learning_rate": 3.8598185731702056e-06, "loss": 0.0204, "step": 152240 }, { "epoch": 1.23189578444858, "grad_norm": 0.45527172088623047, "learning_rate": 3.859131091854564e-06, "loss": 0.0265, "step": 152250 }, { "epoch": 1.2319766971437818, "grad_norm": 0.2267422080039978, "learning_rate": 3.858443633291148e-06, "loss": 0.0219, "step": 152260 }, { "epoch": 1.2320576098389837, "grad_norm": 0.3868425488471985, "learning_rate": 3.857756197493661e-06, "loss": 0.0246, "step": 152270 }, { "epoch": 1.2321385225341857, "grad_norm": 0.5407257080078125, "learning_rate": 3.857068784475815e-06, "loss": 0.0208, "step": 152280 }, { "epoch": 1.2322194352293874, "grad_norm": 0.378401517868042, "learning_rate": 3.8563813942513216e-06, "loss": 0.016, "step": 152290 }, { "epoch": 1.2323003479245893, "grad_norm": 0.32661375403404236, "learning_rate": 3.855694026833885e-06, "loss": 0.0176, "step": 152300 }, { "epoch": 1.2323812606197913, "grad_norm": 0.4296956956386566, "learning_rate": 3.8550066822372165e-06, "loss": 0.0213, "step": 152310 }, { "epoch": 1.2324621733149932, "grad_norm": 0.4145197868347168, "learning_rate": 3.854319360475022e-06, "loss": 0.0123, "step": 152320 }, { "epoch": 1.232543086010195, "grad_norm": 0.1897476464509964, "learning_rate": 3.853632061561009e-06, "loss": 0.0106, "step": 152330 }, { "epoch": 1.2326239987053969, "grad_norm": 0.49656590819358826, "learning_rate": 3.852944785508884e-06, "loss": 0.0232, "step": 152340 }, { "epoch": 1.2327049114005988, "grad_norm": 0.325061559677124, "learning_rate": 3.8522575323323555e-06, "loss": 0.017, "step": 152350 }, { "epoch": 1.2327858240958007, "grad_norm": 0.41242942214012146, "learning_rate": 3.851570302045126e-06, "loss": 0.0314, "step": 152360 }, { "epoch": 1.2328667367910024, "grad_norm": 0.3208851218223572, "learning_rate": 3.850883094660902e-06, "loss": 0.0342, "step": 152370 }, { "epoch": 1.2329476494862044, "grad_norm": 0.3933376669883728, "learning_rate": 3.850195910193388e-06, "loss": 0.0139, "step": 152380 }, { "epoch": 1.2330285621814063, "grad_norm": 0.3813555836677551, "learning_rate": 3.849508748656292e-06, "loss": 0.0229, "step": 152390 }, { "epoch": 1.233109474876608, "grad_norm": 0.18274806439876556, "learning_rate": 3.848821610063315e-06, "loss": 0.0153, "step": 152400 }, { "epoch": 1.23319038757181, "grad_norm": 0.39789247512817383, "learning_rate": 3.848134494428159e-06, "loss": 0.022, "step": 152410 }, { "epoch": 1.233271300267012, "grad_norm": 0.549730658531189, "learning_rate": 3.84744740176453e-06, "loss": 0.018, "step": 152420 }, { "epoch": 1.2333522129622139, "grad_norm": 0.4395909011363983, "learning_rate": 3.846760332086129e-06, "loss": 0.014, "step": 152430 }, { "epoch": 1.2334331256574156, "grad_norm": 0.3351185917854309, "learning_rate": 3.846073285406658e-06, "loss": 0.0242, "step": 152440 }, { "epoch": 1.2335140383526175, "grad_norm": 0.32121604681015015, "learning_rate": 3.845386261739821e-06, "loss": 0.0226, "step": 152450 }, { "epoch": 1.2335949510478195, "grad_norm": 0.2964201271533966, "learning_rate": 3.844699261099315e-06, "loss": 0.0173, "step": 152460 }, { "epoch": 1.2336758637430212, "grad_norm": 0.38201844692230225, "learning_rate": 3.8440122834988445e-06, "loss": 0.0157, "step": 152470 }, { "epoch": 1.2337567764382231, "grad_norm": 0.2607526481151581, "learning_rate": 3.8433253289521104e-06, "loss": 0.0162, "step": 152480 }, { "epoch": 1.233837689133425, "grad_norm": 0.4359076917171478, "learning_rate": 3.842638397472808e-06, "loss": 0.0246, "step": 152490 }, { "epoch": 1.233918601828627, "grad_norm": 0.30173078179359436, "learning_rate": 3.841951489074641e-06, "loss": 0.0223, "step": 152500 }, { "epoch": 1.2339995145238287, "grad_norm": 0.2983461618423462, "learning_rate": 3.841264603771308e-06, "loss": 0.0191, "step": 152510 }, { "epoch": 1.2340804272190307, "grad_norm": 0.5039222836494446, "learning_rate": 3.840577741576503e-06, "loss": 0.0229, "step": 152520 }, { "epoch": 1.2341613399142326, "grad_norm": 0.31259143352508545, "learning_rate": 3.8398909025039275e-06, "loss": 0.0254, "step": 152530 }, { "epoch": 1.2342422526094343, "grad_norm": 0.21325211226940155, "learning_rate": 3.839204086567282e-06, "loss": 0.02, "step": 152540 }, { "epoch": 1.2343231653046363, "grad_norm": 0.14611853659152985, "learning_rate": 3.838517293780259e-06, "loss": 0.0189, "step": 152550 }, { "epoch": 1.2344040779998382, "grad_norm": 0.47754353284835815, "learning_rate": 3.837830524156555e-06, "loss": 0.0166, "step": 152560 }, { "epoch": 1.2344849906950401, "grad_norm": 0.6206961274147034, "learning_rate": 3.8371437777098695e-06, "loss": 0.0295, "step": 152570 }, { "epoch": 1.2345659033902419, "grad_norm": 0.6434082388877869, "learning_rate": 3.836457054453897e-06, "loss": 0.0189, "step": 152580 }, { "epoch": 1.2346468160854438, "grad_norm": 0.5657694339752197, "learning_rate": 3.835770354402331e-06, "loss": 0.0185, "step": 152590 }, { "epoch": 1.2347277287806457, "grad_norm": 0.8132098913192749, "learning_rate": 3.835083677568866e-06, "loss": 0.0207, "step": 152600 }, { "epoch": 1.2348086414758477, "grad_norm": 0.2542913854122162, "learning_rate": 3.834397023967201e-06, "loss": 0.0142, "step": 152610 }, { "epoch": 1.2348895541710494, "grad_norm": 0.213356152176857, "learning_rate": 3.833710393611025e-06, "loss": 0.0236, "step": 152620 }, { "epoch": 1.2349704668662513, "grad_norm": 0.600788950920105, "learning_rate": 3.833023786514032e-06, "loss": 0.0224, "step": 152630 }, { "epoch": 1.2350513795614533, "grad_norm": 0.25565212965011597, "learning_rate": 3.832337202689918e-06, "loss": 0.02, "step": 152640 }, { "epoch": 1.235132292256655, "grad_norm": 0.18869240581989288, "learning_rate": 3.831650642152374e-06, "loss": 0.0282, "step": 152650 }, { "epoch": 1.235213204951857, "grad_norm": 0.25742945075035095, "learning_rate": 3.830964104915088e-06, "loss": 0.0209, "step": 152660 }, { "epoch": 1.2352941176470589, "grad_norm": 0.19672684371471405, "learning_rate": 3.8302775909917585e-06, "loss": 0.0143, "step": 152670 }, { "epoch": 1.2353750303422606, "grad_norm": 0.2966305613517761, "learning_rate": 3.829591100396071e-06, "loss": 0.0296, "step": 152680 }, { "epoch": 1.2354559430374625, "grad_norm": 0.3018496334552765, "learning_rate": 3.828904633141719e-06, "loss": 0.0131, "step": 152690 }, { "epoch": 1.2355368557326645, "grad_norm": 0.5109556317329407, "learning_rate": 3.828218189242393e-06, "loss": 0.0228, "step": 152700 }, { "epoch": 1.2356177684278664, "grad_norm": 0.440697580575943, "learning_rate": 3.827531768711779e-06, "loss": 0.0277, "step": 152710 }, { "epoch": 1.2356986811230681, "grad_norm": 0.05683078244328499, "learning_rate": 3.82684537156357e-06, "loss": 0.0279, "step": 152720 }, { "epoch": 1.23577959381827, "grad_norm": 0.4351658523082733, "learning_rate": 3.826158997811452e-06, "loss": 0.0164, "step": 152730 }, { "epoch": 1.235860506513472, "grad_norm": 0.21123985946178436, "learning_rate": 3.825472647469117e-06, "loss": 0.0333, "step": 152740 }, { "epoch": 1.235941419208674, "grad_norm": 0.21828658878803253, "learning_rate": 3.824786320550248e-06, "loss": 0.0305, "step": 152750 }, { "epoch": 1.2360223319038757, "grad_norm": 0.46205300092697144, "learning_rate": 3.824100017068537e-06, "loss": 0.0136, "step": 152760 }, { "epoch": 1.2361032445990776, "grad_norm": 0.03286824747920036, "learning_rate": 3.823413737037669e-06, "loss": 0.0331, "step": 152770 }, { "epoch": 1.2361841572942796, "grad_norm": 0.1816919893026352, "learning_rate": 3.822727480471328e-06, "loss": 0.0164, "step": 152780 }, { "epoch": 1.2362650699894813, "grad_norm": 0.27529817819595337, "learning_rate": 3.822041247383203e-06, "loss": 0.0184, "step": 152790 }, { "epoch": 1.2363459826846832, "grad_norm": 0.24527978897094727, "learning_rate": 3.82135503778698e-06, "loss": 0.0146, "step": 152800 }, { "epoch": 1.2364268953798851, "grad_norm": 0.08297176659107208, "learning_rate": 3.82066885169634e-06, "loss": 0.0178, "step": 152810 }, { "epoch": 1.2365078080750869, "grad_norm": 0.29998791217803955, "learning_rate": 3.8199826891249704e-06, "loss": 0.0191, "step": 152820 }, { "epoch": 1.2365887207702888, "grad_norm": 0.33412066102027893, "learning_rate": 3.819296550086558e-06, "loss": 0.0158, "step": 152830 }, { "epoch": 1.2366696334654907, "grad_norm": 0.23011980950832367, "learning_rate": 3.81861043459478e-06, "loss": 0.0158, "step": 152840 }, { "epoch": 1.2367505461606927, "grad_norm": 0.32127001881599426, "learning_rate": 3.817924342663324e-06, "loss": 0.0242, "step": 152850 }, { "epoch": 1.2368314588558944, "grad_norm": 0.1775527447462082, "learning_rate": 3.817238274305872e-06, "loss": 0.0159, "step": 152860 }, { "epoch": 1.2369123715510963, "grad_norm": 0.5077799558639526, "learning_rate": 3.816552229536104e-06, "loss": 0.0233, "step": 152870 }, { "epoch": 1.2369932842462983, "grad_norm": 0.3352898955345154, "learning_rate": 3.815866208367706e-06, "loss": 0.0274, "step": 152880 }, { "epoch": 1.2370741969415002, "grad_norm": 0.15460322797298431, "learning_rate": 3.815180210814356e-06, "loss": 0.0129, "step": 152890 }, { "epoch": 1.237155109636702, "grad_norm": 0.7043026685714722, "learning_rate": 3.8144942368897335e-06, "loss": 0.0295, "step": 152900 }, { "epoch": 1.2372360223319039, "grad_norm": 0.11883275210857391, "learning_rate": 3.813808286607522e-06, "loss": 0.0183, "step": 152910 }, { "epoch": 1.2373169350271058, "grad_norm": 0.33895930647850037, "learning_rate": 3.8131223599813993e-06, "loss": 0.0125, "step": 152920 }, { "epoch": 1.2373978477223075, "grad_norm": 0.4410916864871979, "learning_rate": 3.8124364570250477e-06, "loss": 0.0178, "step": 152930 }, { "epoch": 1.2374787604175095, "grad_norm": 0.4915918707847595, "learning_rate": 3.8117505777521414e-06, "loss": 0.0239, "step": 152940 }, { "epoch": 1.2375596731127114, "grad_norm": 0.19424493610858917, "learning_rate": 3.8110647221763624e-06, "loss": 0.0155, "step": 152950 }, { "epoch": 1.2376405858079131, "grad_norm": 0.28998515009880066, "learning_rate": 3.810378890311389e-06, "loss": 0.0216, "step": 152960 }, { "epoch": 1.237721498503115, "grad_norm": 0.23910106718540192, "learning_rate": 3.8096930821708942e-06, "loss": 0.0336, "step": 152970 }, { "epoch": 1.237802411198317, "grad_norm": 0.38845112919807434, "learning_rate": 3.80900729776856e-06, "loss": 0.0207, "step": 152980 }, { "epoch": 1.237883323893519, "grad_norm": 0.3644903302192688, "learning_rate": 3.808321537118062e-06, "loss": 0.021, "step": 152990 }, { "epoch": 1.2379642365887207, "grad_norm": 0.5310061573982239, "learning_rate": 3.8076358002330726e-06, "loss": 0.022, "step": 153000 }, { "epoch": 1.2380451492839226, "grad_norm": 0.27220281958580017, "learning_rate": 3.80695008712727e-06, "loss": 0.0174, "step": 153010 }, { "epoch": 1.2381260619791246, "grad_norm": 0.1704902946949005, "learning_rate": 3.806264397814332e-06, "loss": 0.0252, "step": 153020 }, { "epoch": 1.2382069746743265, "grad_norm": 0.30866870284080505, "learning_rate": 3.805578732307929e-06, "loss": 0.0189, "step": 153030 }, { "epoch": 1.2382878873695282, "grad_norm": 0.4179675281047821, "learning_rate": 3.8048930906217356e-06, "loss": 0.0176, "step": 153040 }, { "epoch": 1.2383688000647302, "grad_norm": 0.6298564076423645, "learning_rate": 3.804207472769429e-06, "loss": 0.0258, "step": 153050 }, { "epoch": 1.238449712759932, "grad_norm": 0.4966944754123688, "learning_rate": 3.803521878764678e-06, "loss": 0.0162, "step": 153060 }, { "epoch": 1.2385306254551338, "grad_norm": 0.283013254404068, "learning_rate": 3.8028363086211566e-06, "loss": 0.0138, "step": 153070 }, { "epoch": 1.2386115381503358, "grad_norm": 0.6124868392944336, "learning_rate": 3.80215076235254e-06, "loss": 0.0266, "step": 153080 }, { "epoch": 1.2386924508455377, "grad_norm": 0.16939429938793182, "learning_rate": 3.8014652399724956e-06, "loss": 0.0181, "step": 153090 }, { "epoch": 1.2387733635407396, "grad_norm": 0.24943484365940094, "learning_rate": 3.800779741494698e-06, "loss": 0.027, "step": 153100 }, { "epoch": 1.2388542762359414, "grad_norm": 0.22558486461639404, "learning_rate": 3.800094266932815e-06, "loss": 0.0235, "step": 153110 }, { "epoch": 1.2389351889311433, "grad_norm": 0.6163736581802368, "learning_rate": 3.7994088163005216e-06, "loss": 0.0218, "step": 153120 }, { "epoch": 1.2390161016263452, "grad_norm": 0.45647290349006653, "learning_rate": 3.7987233896114834e-06, "loss": 0.0253, "step": 153130 }, { "epoch": 1.2390970143215472, "grad_norm": 0.334800660610199, "learning_rate": 3.79803798687937e-06, "loss": 0.02, "step": 153140 }, { "epoch": 1.239177927016749, "grad_norm": 0.32573407888412476, "learning_rate": 3.7973526081178535e-06, "loss": 0.0179, "step": 153150 }, { "epoch": 1.2392588397119508, "grad_norm": 0.5885554552078247, "learning_rate": 3.796667253340599e-06, "loss": 0.0185, "step": 153160 }, { "epoch": 1.2393397524071528, "grad_norm": 0.46726104617118835, "learning_rate": 3.795981922561276e-06, "loss": 0.0108, "step": 153170 }, { "epoch": 1.2394206651023545, "grad_norm": 0.05664373189210892, "learning_rate": 3.7952966157935534e-06, "loss": 0.0177, "step": 153180 }, { "epoch": 1.2395015777975564, "grad_norm": 0.06603281944990158, "learning_rate": 3.7946113330510943e-06, "loss": 0.0238, "step": 153190 }, { "epoch": 1.2395824904927584, "grad_norm": 0.4033389389514923, "learning_rate": 3.7939260743475683e-06, "loss": 0.0093, "step": 153200 }, { "epoch": 1.23966340318796, "grad_norm": 0.2693396210670471, "learning_rate": 3.793240839696642e-06, "loss": 0.0141, "step": 153210 }, { "epoch": 1.239744315883162, "grad_norm": 0.33704879879951477, "learning_rate": 3.792555629111977e-06, "loss": 0.0303, "step": 153220 }, { "epoch": 1.239825228578364, "grad_norm": 0.17531752586364746, "learning_rate": 3.791870442607241e-06, "loss": 0.0196, "step": 153230 }, { "epoch": 1.239906141273566, "grad_norm": 0.3385940492153168, "learning_rate": 3.7911852801961014e-06, "loss": 0.0209, "step": 153240 }, { "epoch": 1.2399870539687676, "grad_norm": 0.5473846197128296, "learning_rate": 3.790500141892217e-06, "loss": 0.0183, "step": 153250 }, { "epoch": 1.2400679666639696, "grad_norm": 0.5011297464370728, "learning_rate": 3.789815027709254e-06, "loss": 0.0196, "step": 153260 }, { "epoch": 1.2401488793591715, "grad_norm": 0.5293536186218262, "learning_rate": 3.7891299376608766e-06, "loss": 0.0243, "step": 153270 }, { "epoch": 1.2402297920543734, "grad_norm": 0.27937522530555725, "learning_rate": 3.788444871760747e-06, "loss": 0.0148, "step": 153280 }, { "epoch": 1.2403107047495752, "grad_norm": 0.2070438265800476, "learning_rate": 3.7877598300225245e-06, "loss": 0.0266, "step": 153290 }, { "epoch": 1.240391617444777, "grad_norm": 0.2996928095817566, "learning_rate": 3.7870748124598735e-06, "loss": 0.0259, "step": 153300 }, { "epoch": 1.240472530139979, "grad_norm": 0.14509572088718414, "learning_rate": 3.786389819086456e-06, "loss": 0.0162, "step": 153310 }, { "epoch": 1.2405534428351808, "grad_norm": 0.5270760655403137, "learning_rate": 3.785704849915931e-06, "loss": 0.0271, "step": 153320 }, { "epoch": 1.2406343555303827, "grad_norm": 0.34769153594970703, "learning_rate": 3.7850199049619583e-06, "loss": 0.0273, "step": 153330 }, { "epoch": 1.2407152682255846, "grad_norm": 0.259918749332428, "learning_rate": 3.7843349842382003e-06, "loss": 0.016, "step": 153340 }, { "epoch": 1.2407961809207864, "grad_norm": 0.2806963622570038, "learning_rate": 3.7836500877583145e-06, "loss": 0.0279, "step": 153350 }, { "epoch": 1.2408770936159883, "grad_norm": 0.4701220691204071, "learning_rate": 3.782965215535958e-06, "loss": 0.0108, "step": 153360 }, { "epoch": 1.2409580063111902, "grad_norm": 0.21585723757743835, "learning_rate": 3.7822803675847935e-06, "loss": 0.0234, "step": 153370 }, { "epoch": 1.2410389190063922, "grad_norm": 0.20638929307460785, "learning_rate": 3.781595543918474e-06, "loss": 0.0112, "step": 153380 }, { "epoch": 1.241119831701594, "grad_norm": 0.23047761619091034, "learning_rate": 3.780910744550661e-06, "loss": 0.0168, "step": 153390 }, { "epoch": 1.2412007443967958, "grad_norm": 0.36776816844940186, "learning_rate": 3.7802259694950098e-06, "loss": 0.0217, "step": 153400 }, { "epoch": 1.2412816570919978, "grad_norm": 0.4214296340942383, "learning_rate": 3.7795412187651746e-06, "loss": 0.0141, "step": 153410 }, { "epoch": 1.2413625697871997, "grad_norm": 0.3679518699645996, "learning_rate": 3.778856492374814e-06, "loss": 0.0354, "step": 153420 }, { "epoch": 1.2414434824824014, "grad_norm": 0.8441891670227051, "learning_rate": 3.7781717903375838e-06, "loss": 0.0386, "step": 153430 }, { "epoch": 1.2415243951776034, "grad_norm": 0.4645460844039917, "learning_rate": 3.7774871126671376e-06, "loss": 0.0279, "step": 153440 }, { "epoch": 1.2416053078728053, "grad_norm": 0.13165968656539917, "learning_rate": 3.7768024593771284e-06, "loss": 0.0112, "step": 153450 }, { "epoch": 1.241686220568007, "grad_norm": 0.3367115557193756, "learning_rate": 3.776117830481213e-06, "loss": 0.024, "step": 153460 }, { "epoch": 1.241767133263209, "grad_norm": 0.3898586332798004, "learning_rate": 3.7754332259930454e-06, "loss": 0.0242, "step": 153470 }, { "epoch": 1.241848045958411, "grad_norm": 0.18579243123531342, "learning_rate": 3.774748645926274e-06, "loss": 0.0207, "step": 153480 }, { "epoch": 1.2419289586536126, "grad_norm": 0.3625110685825348, "learning_rate": 3.774064090294555e-06, "loss": 0.0201, "step": 153490 }, { "epoch": 1.2420098713488146, "grad_norm": 0.4364614188671112, "learning_rate": 3.7733795591115413e-06, "loss": 0.0251, "step": 153500 }, { "epoch": 1.2420907840440165, "grad_norm": 0.3742404282093048, "learning_rate": 3.7726950523908817e-06, "loss": 0.0171, "step": 153510 }, { "epoch": 1.2421716967392185, "grad_norm": 0.392147421836853, "learning_rate": 3.772010570146227e-06, "loss": 0.0143, "step": 153520 }, { "epoch": 1.2422526094344202, "grad_norm": 0.29615598917007446, "learning_rate": 3.771326112391232e-06, "loss": 0.0167, "step": 153530 }, { "epoch": 1.2423335221296221, "grad_norm": 0.2678363025188446, "learning_rate": 3.770641679139543e-06, "loss": 0.0173, "step": 153540 }, { "epoch": 1.242414434824824, "grad_norm": 0.558085560798645, "learning_rate": 3.769957270404808e-06, "loss": 0.0285, "step": 153550 }, { "epoch": 1.242495347520026, "grad_norm": 0.14629730582237244, "learning_rate": 3.7692728862006818e-06, "loss": 0.0392, "step": 153560 }, { "epoch": 1.2425762602152277, "grad_norm": 0.8268360495567322, "learning_rate": 3.768588526540807e-06, "loss": 0.0228, "step": 153570 }, { "epoch": 1.2426571729104297, "grad_norm": 0.25512152910232544, "learning_rate": 3.767904191438836e-06, "loss": 0.0218, "step": 153580 }, { "epoch": 1.2427380856056316, "grad_norm": 0.3968676030635834, "learning_rate": 3.7672198809084155e-06, "loss": 0.0174, "step": 153590 }, { "epoch": 1.2428189983008333, "grad_norm": 0.2524189054965973, "learning_rate": 3.76653559496319e-06, "loss": 0.0327, "step": 153600 }, { "epoch": 1.2428999109960353, "grad_norm": 0.5436311364173889, "learning_rate": 3.765851333616809e-06, "loss": 0.0195, "step": 153610 }, { "epoch": 1.2429808236912372, "grad_norm": 0.21704024076461792, "learning_rate": 3.7651670968829184e-06, "loss": 0.0254, "step": 153620 }, { "epoch": 1.243061736386439, "grad_norm": 0.4011719822883606, "learning_rate": 3.764482884775162e-06, "loss": 0.0196, "step": 153630 }, { "epoch": 1.2431426490816408, "grad_norm": 0.3297424614429474, "learning_rate": 3.7637986973071855e-06, "loss": 0.0197, "step": 153640 }, { "epoch": 1.2432235617768428, "grad_norm": 0.5680356025695801, "learning_rate": 3.7631145344926356e-06, "loss": 0.0324, "step": 153650 }, { "epoch": 1.2433044744720447, "grad_norm": 0.3164413273334503, "learning_rate": 3.7624303963451557e-06, "loss": 0.0195, "step": 153660 }, { "epoch": 1.2433853871672464, "grad_norm": 0.5821256637573242, "learning_rate": 3.7617462828783874e-06, "loss": 0.0276, "step": 153670 }, { "epoch": 1.2434662998624484, "grad_norm": 0.21671928465366364, "learning_rate": 3.7610621941059763e-06, "loss": 0.0171, "step": 153680 }, { "epoch": 1.2435472125576503, "grad_norm": 0.6412838101387024, "learning_rate": 3.7603781300415653e-06, "loss": 0.0234, "step": 153690 }, { "epoch": 1.2436281252528523, "grad_norm": 0.24003072082996368, "learning_rate": 3.7596940906987934e-06, "loss": 0.0183, "step": 153700 }, { "epoch": 1.243709037948054, "grad_norm": 0.2710514962673187, "learning_rate": 3.759010076091304e-06, "loss": 0.0163, "step": 153710 }, { "epoch": 1.243789950643256, "grad_norm": 0.4043630063533783, "learning_rate": 3.758326086232742e-06, "loss": 0.0262, "step": 153720 }, { "epoch": 1.2438708633384579, "grad_norm": 0.3697417974472046, "learning_rate": 3.757642121136743e-06, "loss": 0.0159, "step": 153730 }, { "epoch": 1.2439517760336596, "grad_norm": 0.3354056775569916, "learning_rate": 3.756958180816948e-06, "loss": 0.0401, "step": 153740 }, { "epoch": 1.2440326887288615, "grad_norm": 0.3727691173553467, "learning_rate": 3.7562742652870002e-06, "loss": 0.0326, "step": 153750 }, { "epoch": 1.2441136014240635, "grad_norm": 0.7754021286964417, "learning_rate": 3.755590374560536e-06, "loss": 0.0179, "step": 153760 }, { "epoch": 1.2441945141192654, "grad_norm": 0.33937492966651917, "learning_rate": 3.754906508651193e-06, "loss": 0.0368, "step": 153770 }, { "epoch": 1.2442754268144671, "grad_norm": 0.43968939781188965, "learning_rate": 3.7542226675726135e-06, "loss": 0.0263, "step": 153780 }, { "epoch": 1.244356339509669, "grad_norm": 0.28950035572052, "learning_rate": 3.7535388513384313e-06, "loss": 0.0152, "step": 153790 }, { "epoch": 1.244437252204871, "grad_norm": 0.40028858184814453, "learning_rate": 3.752855059962286e-06, "loss": 0.0149, "step": 153800 }, { "epoch": 1.244518164900073, "grad_norm": 0.5421611070632935, "learning_rate": 3.7521712934578126e-06, "loss": 0.0288, "step": 153810 }, { "epoch": 1.2445990775952747, "grad_norm": 0.011324487626552582, "learning_rate": 3.7514875518386505e-06, "loss": 0.0127, "step": 153820 }, { "epoch": 1.2446799902904766, "grad_norm": 0.14405186474323273, "learning_rate": 3.7508038351184323e-06, "loss": 0.0126, "step": 153830 }, { "epoch": 1.2447609029856785, "grad_norm": 0.3273710608482361, "learning_rate": 3.7501201433107935e-06, "loss": 0.0201, "step": 153840 }, { "epoch": 1.2448418156808803, "grad_norm": 0.26970186829566956, "learning_rate": 3.7494364764293722e-06, "loss": 0.0181, "step": 153850 }, { "epoch": 1.2449227283760822, "grad_norm": 0.33318474888801575, "learning_rate": 3.7487528344877976e-06, "loss": 0.023, "step": 153860 }, { "epoch": 1.2450036410712841, "grad_norm": 0.77936190366745, "learning_rate": 3.748069217499708e-06, "loss": 0.0089, "step": 153870 }, { "epoch": 1.2450845537664859, "grad_norm": 0.42425045371055603, "learning_rate": 3.7473856254787356e-06, "loss": 0.0166, "step": 153880 }, { "epoch": 1.2451654664616878, "grad_norm": 0.37669751048088074, "learning_rate": 3.7467020584385103e-06, "loss": 0.0144, "step": 153890 }, { "epoch": 1.2452463791568897, "grad_norm": 0.09345671534538269, "learning_rate": 3.7460185163926676e-06, "loss": 0.0093, "step": 153900 }, { "epoch": 1.2453272918520917, "grad_norm": 0.40032729506492615, "learning_rate": 3.7453349993548394e-06, "loss": 0.0142, "step": 153910 }, { "epoch": 1.2454082045472934, "grad_norm": 0.3942057192325592, "learning_rate": 3.7446515073386546e-06, "loss": 0.0211, "step": 153920 }, { "epoch": 1.2454891172424953, "grad_norm": 0.6109198927879333, "learning_rate": 3.743968040357745e-06, "loss": 0.0268, "step": 153930 }, { "epoch": 1.2455700299376973, "grad_norm": 0.2576468884944916, "learning_rate": 3.7432845984257425e-06, "loss": 0.0241, "step": 153940 }, { "epoch": 1.2456509426328992, "grad_norm": 0.4723447561264038, "learning_rate": 3.7426011815562757e-06, "loss": 0.0239, "step": 153950 }, { "epoch": 1.245731855328101, "grad_norm": 0.535069465637207, "learning_rate": 3.7419177897629723e-06, "loss": 0.0319, "step": 153960 }, { "epoch": 1.2458127680233029, "grad_norm": 0.21432165801525116, "learning_rate": 3.7412344230594646e-06, "loss": 0.016, "step": 153970 }, { "epoch": 1.2458936807185048, "grad_norm": 0.5210793018341064, "learning_rate": 3.740551081459377e-06, "loss": 0.0312, "step": 153980 }, { "epoch": 1.2459745934137065, "grad_norm": 0.5265430212020874, "learning_rate": 3.7398677649763405e-06, "loss": 0.016, "step": 153990 }, { "epoch": 1.2460555061089085, "grad_norm": 0.4058828055858612, "learning_rate": 3.7391844736239796e-06, "loss": 0.0159, "step": 154000 }, { "epoch": 1.2461364188041104, "grad_norm": 0.3570758104324341, "learning_rate": 3.7385012074159255e-06, "loss": 0.0381, "step": 154010 }, { "epoch": 1.2462173314993121, "grad_norm": 0.2633489966392517, "learning_rate": 3.7378179663658e-06, "loss": 0.022, "step": 154020 }, { "epoch": 1.246298244194514, "grad_norm": 0.3594949543476105, "learning_rate": 3.7371347504872297e-06, "loss": 0.0325, "step": 154030 }, { "epoch": 1.246379156889716, "grad_norm": 0.15858374536037445, "learning_rate": 3.736451559793843e-06, "loss": 0.0156, "step": 154040 }, { "epoch": 1.246460069584918, "grad_norm": 0.3478407859802246, "learning_rate": 3.73576839429926e-06, "loss": 0.0168, "step": 154050 }, { "epoch": 1.2465409822801197, "grad_norm": 0.6574262976646423, "learning_rate": 3.735085254017109e-06, "loss": 0.0267, "step": 154060 }, { "epoch": 1.2466218949753216, "grad_norm": 0.2730821669101715, "learning_rate": 3.734402138961013e-06, "loss": 0.0281, "step": 154070 }, { "epoch": 1.2467028076705235, "grad_norm": 0.4088101089000702, "learning_rate": 3.733719049144593e-06, "loss": 0.0229, "step": 154080 }, { "epoch": 1.2467837203657255, "grad_norm": 0.4870307147502899, "learning_rate": 3.7330359845814735e-06, "loss": 0.0228, "step": 154090 }, { "epoch": 1.2468646330609272, "grad_norm": 0.43913137912750244, "learning_rate": 3.732352945285278e-06, "loss": 0.0241, "step": 154100 }, { "epoch": 1.2469455457561291, "grad_norm": 0.41476017236709595, "learning_rate": 3.7316699312696246e-06, "loss": 0.0246, "step": 154110 }, { "epoch": 1.247026458451331, "grad_norm": 0.3180988132953644, "learning_rate": 3.7309869425481373e-06, "loss": 0.0241, "step": 154120 }, { "epoch": 1.2471073711465328, "grad_norm": 0.2147231251001358, "learning_rate": 3.7303039791344388e-06, "loss": 0.023, "step": 154130 }, { "epoch": 1.2471882838417347, "grad_norm": 0.18975546956062317, "learning_rate": 3.7296210410421452e-06, "loss": 0.0292, "step": 154140 }, { "epoch": 1.2472691965369367, "grad_norm": 0.6538833975791931, "learning_rate": 3.7289381282848773e-06, "loss": 0.0273, "step": 154150 }, { "epoch": 1.2473501092321384, "grad_norm": 0.473972886800766, "learning_rate": 3.7282552408762575e-06, "loss": 0.0216, "step": 154160 }, { "epoch": 1.2474310219273403, "grad_norm": 0.6605470776557922, "learning_rate": 3.7275723788299005e-06, "loss": 0.0158, "step": 154170 }, { "epoch": 1.2475119346225423, "grad_norm": 0.24687393009662628, "learning_rate": 3.7268895421594254e-06, "loss": 0.0233, "step": 154180 }, { "epoch": 1.2475928473177442, "grad_norm": 0.28737568855285645, "learning_rate": 3.726206730878451e-06, "loss": 0.0138, "step": 154190 }, { "epoch": 1.247673760012946, "grad_norm": 0.1771281510591507, "learning_rate": 3.725523945000596e-06, "loss": 0.0201, "step": 154200 }, { "epoch": 1.2477546727081479, "grad_norm": 0.23191028833389282, "learning_rate": 3.7248411845394742e-06, "loss": 0.0165, "step": 154210 }, { "epoch": 1.2478355854033498, "grad_norm": 0.4559308886528015, "learning_rate": 3.7241584495087024e-06, "loss": 0.0127, "step": 154220 }, { "epoch": 1.2479164980985518, "grad_norm": 0.4601913094520569, "learning_rate": 3.723475739921899e-06, "loss": 0.0278, "step": 154230 }, { "epoch": 1.2479974107937535, "grad_norm": 0.520370364189148, "learning_rate": 3.7227930557926755e-06, "loss": 0.0219, "step": 154240 }, { "epoch": 1.2480783234889554, "grad_norm": 0.5360919833183289, "learning_rate": 3.722110397134647e-06, "loss": 0.0236, "step": 154250 }, { "epoch": 1.2481592361841574, "grad_norm": 0.5529686808586121, "learning_rate": 3.721427763961432e-06, "loss": 0.0191, "step": 154260 }, { "epoch": 1.248240148879359, "grad_norm": 0.42754754424095154, "learning_rate": 3.720745156286638e-06, "loss": 0.0246, "step": 154270 }, { "epoch": 1.248321061574561, "grad_norm": 0.42279815673828125, "learning_rate": 3.7200625741238834e-06, "loss": 0.0186, "step": 154280 }, { "epoch": 1.248401974269763, "grad_norm": 0.736683189868927, "learning_rate": 3.719380017486779e-06, "loss": 0.0268, "step": 154290 }, { "epoch": 1.248482886964965, "grad_norm": 0.0605812706053257, "learning_rate": 3.7186974863889347e-06, "loss": 0.0181, "step": 154300 }, { "epoch": 1.2485637996601666, "grad_norm": 0.3418835401535034, "learning_rate": 3.718014980843965e-06, "loss": 0.0201, "step": 154310 }, { "epoch": 1.2486447123553686, "grad_norm": 0.44088125228881836, "learning_rate": 3.7173325008654814e-06, "loss": 0.0224, "step": 154320 }, { "epoch": 1.2487256250505705, "grad_norm": 0.7691391706466675, "learning_rate": 3.7166500464670906e-06, "loss": 0.0205, "step": 154330 }, { "epoch": 1.2488065377457722, "grad_norm": 0.3869122862815857, "learning_rate": 3.715967617662406e-06, "loss": 0.0157, "step": 154340 }, { "epoch": 1.2488874504409742, "grad_norm": 0.22215905785560608, "learning_rate": 3.715285214465038e-06, "loss": 0.0182, "step": 154350 }, { "epoch": 1.248968363136176, "grad_norm": 0.5322940349578857, "learning_rate": 3.7146028368885936e-06, "loss": 0.0231, "step": 154360 }, { "epoch": 1.249049275831378, "grad_norm": 0.2672711908817291, "learning_rate": 3.7139204849466808e-06, "loss": 0.0279, "step": 154370 }, { "epoch": 1.2491301885265798, "grad_norm": 0.5799291133880615, "learning_rate": 3.71323815865291e-06, "loss": 0.0208, "step": 154380 }, { "epoch": 1.2492111012217817, "grad_norm": 0.37123173475265503, "learning_rate": 3.7125558580208887e-06, "loss": 0.0203, "step": 154390 }, { "epoch": 1.2492920139169836, "grad_norm": 0.3623465299606323, "learning_rate": 3.711873583064221e-06, "loss": 0.0162, "step": 154400 }, { "epoch": 1.2493729266121854, "grad_norm": 0.3976886570453644, "learning_rate": 3.7111913337965154e-06, "loss": 0.0256, "step": 154410 }, { "epoch": 1.2494538393073873, "grad_norm": 0.3061923384666443, "learning_rate": 3.7105091102313794e-06, "loss": 0.0138, "step": 154420 }, { "epoch": 1.2495347520025892, "grad_norm": 0.32406577467918396, "learning_rate": 3.7098269123824166e-06, "loss": 0.0146, "step": 154430 }, { "epoch": 1.2496156646977912, "grad_norm": 0.6168735027313232, "learning_rate": 3.7091447402632306e-06, "loss": 0.023, "step": 154440 }, { "epoch": 1.249696577392993, "grad_norm": 0.3634799122810364, "learning_rate": 3.7084625938874304e-06, "loss": 0.0223, "step": 154450 }, { "epoch": 1.2497774900881948, "grad_norm": 0.3535867929458618, "learning_rate": 3.707780473268615e-06, "loss": 0.013, "step": 154460 }, { "epoch": 1.2498584027833968, "grad_norm": 0.2663934826850891, "learning_rate": 3.707098378420391e-06, "loss": 0.0163, "step": 154470 }, { "epoch": 1.2499393154785987, "grad_norm": 0.2865050435066223, "learning_rate": 3.706416309356361e-06, "loss": 0.0192, "step": 154480 }, { "epoch": 1.2500202281738004, "grad_norm": 0.23765835165977478, "learning_rate": 3.705734266090125e-06, "loss": 0.023, "step": 154490 }, { "epoch": 1.2501011408690024, "grad_norm": 0.4762696921825409, "learning_rate": 3.705052248635288e-06, "loss": 0.0208, "step": 154500 }, { "epoch": 1.2501820535642043, "grad_norm": 0.35908252000808716, "learning_rate": 3.704370257005451e-06, "loss": 0.0169, "step": 154510 }, { "epoch": 1.250262966259406, "grad_norm": 0.41680505871772766, "learning_rate": 3.7036882912142123e-06, "loss": 0.0206, "step": 154520 }, { "epoch": 1.250343878954608, "grad_norm": 0.39668774604797363, "learning_rate": 3.7030063512751737e-06, "loss": 0.0186, "step": 154530 }, { "epoch": 1.25042479164981, "grad_norm": 0.20110955834388733, "learning_rate": 3.702324437201936e-06, "loss": 0.0201, "step": 154540 }, { "epoch": 1.2505057043450116, "grad_norm": 0.12915240228176117, "learning_rate": 3.7016425490080997e-06, "loss": 0.0208, "step": 154550 }, { "epoch": 1.2505866170402136, "grad_norm": 0.30378395318984985, "learning_rate": 3.7009606867072595e-06, "loss": 0.0243, "step": 154560 }, { "epoch": 1.2506675297354155, "grad_norm": 0.49865177273750305, "learning_rate": 3.7002788503130173e-06, "loss": 0.0177, "step": 154570 }, { "epoch": 1.2507484424306174, "grad_norm": 0.9343805313110352, "learning_rate": 3.69959703983897e-06, "loss": 0.0246, "step": 154580 }, { "epoch": 1.2508293551258192, "grad_norm": 0.3341410756111145, "learning_rate": 3.6989152552987123e-06, "loss": 0.0186, "step": 154590 }, { "epoch": 1.250910267821021, "grad_norm": 0.4709428548812866, "learning_rate": 3.6982334967058435e-06, "loss": 0.0312, "step": 154600 }, { "epoch": 1.250991180516223, "grad_norm": 0.271016001701355, "learning_rate": 3.6975517640739616e-06, "loss": 0.0163, "step": 154610 }, { "epoch": 1.251072093211425, "grad_norm": 0.37660813331604004, "learning_rate": 3.6968700574166596e-06, "loss": 0.0159, "step": 154620 }, { "epoch": 1.2511530059066267, "grad_norm": 0.3079780042171478, "learning_rate": 3.6961883767475316e-06, "loss": 0.0168, "step": 154630 }, { "epoch": 1.2512339186018286, "grad_norm": 0.4036044180393219, "learning_rate": 3.6955067220801755e-06, "loss": 0.021, "step": 154640 }, { "epoch": 1.2513148312970306, "grad_norm": 0.6767419576644897, "learning_rate": 3.6948250934281837e-06, "loss": 0.016, "step": 154650 }, { "epoch": 1.2513957439922323, "grad_norm": 0.2134213149547577, "learning_rate": 3.6941434908051488e-06, "loss": 0.0198, "step": 154660 }, { "epoch": 1.2514766566874342, "grad_norm": 0.2420799732208252, "learning_rate": 3.6934619142246674e-06, "loss": 0.0186, "step": 154670 }, { "epoch": 1.2515575693826362, "grad_norm": 0.42098677158355713, "learning_rate": 3.6927803637003277e-06, "loss": 0.026, "step": 154680 }, { "epoch": 1.251638482077838, "grad_norm": 0.40695226192474365, "learning_rate": 3.6920988392457246e-06, "loss": 0.0183, "step": 154690 }, { "epoch": 1.2517193947730398, "grad_norm": 0.29852887988090515, "learning_rate": 3.6914173408744507e-06, "loss": 0.0184, "step": 154700 }, { "epoch": 1.2518003074682418, "grad_norm": 0.21146823465824127, "learning_rate": 3.6907358686000934e-06, "loss": 0.0143, "step": 154710 }, { "epoch": 1.2518812201634437, "grad_norm": 0.39223578572273254, "learning_rate": 3.6900544224362458e-06, "loss": 0.024, "step": 154720 }, { "epoch": 1.2519621328586457, "grad_norm": 0.18350383639335632, "learning_rate": 3.689373002396497e-06, "loss": 0.0154, "step": 154730 }, { "epoch": 1.2520430455538474, "grad_norm": 0.23185431957244873, "learning_rate": 3.688691608494438e-06, "loss": 0.0281, "step": 154740 }, { "epoch": 1.2521239582490493, "grad_norm": 0.3706808388233185, "learning_rate": 3.6880102407436547e-06, "loss": 0.0234, "step": 154750 }, { "epoch": 1.2522048709442513, "grad_norm": 0.3701370358467102, "learning_rate": 3.687328899157739e-06, "loss": 0.0419, "step": 154760 }, { "epoch": 1.252285783639453, "grad_norm": 0.24194782972335815, "learning_rate": 3.686647583750278e-06, "loss": 0.0205, "step": 154770 }, { "epoch": 1.252366696334655, "grad_norm": 0.38068681955337524, "learning_rate": 3.6859662945348563e-06, "loss": 0.0181, "step": 154780 }, { "epoch": 1.2524476090298569, "grad_norm": 0.13969117403030396, "learning_rate": 3.685285031525063e-06, "loss": 0.0194, "step": 154790 }, { "epoch": 1.2525285217250586, "grad_norm": 0.09966258704662323, "learning_rate": 3.684603794734486e-06, "loss": 0.0347, "step": 154800 }, { "epoch": 1.2526094344202605, "grad_norm": 0.4197935163974762, "learning_rate": 3.6839225841767078e-06, "loss": 0.018, "step": 154810 }, { "epoch": 1.2526903471154625, "grad_norm": 0.522659420967102, "learning_rate": 3.683241399865315e-06, "loss": 0.0208, "step": 154820 }, { "epoch": 1.2527712598106642, "grad_norm": 0.18132105469703674, "learning_rate": 3.682560241813894e-06, "loss": 0.0174, "step": 154830 }, { "epoch": 1.2528521725058661, "grad_norm": 0.5375286936759949, "learning_rate": 3.6818791100360274e-06, "loss": 0.0243, "step": 154840 }, { "epoch": 1.252933085201068, "grad_norm": 0.42890632152557373, "learning_rate": 3.6811980045452978e-06, "loss": 0.0198, "step": 154850 }, { "epoch": 1.25301399789627, "grad_norm": 0.3974464237689972, "learning_rate": 3.6805169253552927e-06, "loss": 0.0336, "step": 154860 }, { "epoch": 1.253094910591472, "grad_norm": 0.7454802989959717, "learning_rate": 3.67983587247959e-06, "loss": 0.0186, "step": 154870 }, { "epoch": 1.2531758232866737, "grad_norm": 0.7018547654151917, "learning_rate": 3.679154845931774e-06, "loss": 0.0259, "step": 154880 }, { "epoch": 1.2532567359818756, "grad_norm": 0.17060236632823944, "learning_rate": 3.6784738457254277e-06, "loss": 0.0139, "step": 154890 }, { "epoch": 1.2533376486770775, "grad_norm": 0.5530403852462769, "learning_rate": 3.677792871874128e-06, "loss": 0.0299, "step": 154900 }, { "epoch": 1.2534185613722792, "grad_norm": 0.2808263301849365, "learning_rate": 3.67711192439146e-06, "loss": 0.0207, "step": 154910 }, { "epoch": 1.2534994740674812, "grad_norm": 0.23147574067115784, "learning_rate": 3.676431003291e-06, "loss": 0.0265, "step": 154920 }, { "epoch": 1.2535803867626831, "grad_norm": 0.2972731590270996, "learning_rate": 3.675750108586332e-06, "loss": 0.0212, "step": 154930 }, { "epoch": 1.2536612994578848, "grad_norm": 0.4887083172798157, "learning_rate": 3.6750692402910316e-06, "loss": 0.0304, "step": 154940 }, { "epoch": 1.2537422121530868, "grad_norm": 0.33119362592697144, "learning_rate": 3.674388398418677e-06, "loss": 0.0251, "step": 154950 }, { "epoch": 1.2538231248482887, "grad_norm": 0.01609647460281849, "learning_rate": 3.6737075829828494e-06, "loss": 0.0163, "step": 154960 }, { "epoch": 1.2539040375434904, "grad_norm": 0.2749592363834381, "learning_rate": 3.673026793997122e-06, "loss": 0.0161, "step": 154970 }, { "epoch": 1.2539849502386924, "grad_norm": 0.19249509274959564, "learning_rate": 3.6723460314750747e-06, "loss": 0.0247, "step": 154980 }, { "epoch": 1.2540658629338943, "grad_norm": 0.15098883211612701, "learning_rate": 3.6716652954302846e-06, "loss": 0.0223, "step": 154990 }, { "epoch": 1.2541467756290963, "grad_norm": 0.21766528487205505, "learning_rate": 3.6709845858763237e-06, "loss": 0.0167, "step": 155000 }, { "epoch": 1.2542276883242982, "grad_norm": 0.6218220591545105, "learning_rate": 3.6703039028267696e-06, "loss": 0.031, "step": 155010 }, { "epoch": 1.2543086010195, "grad_norm": 0.4385712742805481, "learning_rate": 3.6696232462951984e-06, "loss": 0.0282, "step": 155020 }, { "epoch": 1.2543895137147019, "grad_norm": 0.4013165533542633, "learning_rate": 3.6689426162951822e-06, "loss": 0.0201, "step": 155030 }, { "epoch": 1.2544704264099038, "grad_norm": 0.4403097629547119, "learning_rate": 3.668262012840295e-06, "loss": 0.0295, "step": 155040 }, { "epoch": 1.2545513391051055, "grad_norm": 0.7098623514175415, "learning_rate": 3.6675814359441126e-06, "loss": 0.0234, "step": 155050 }, { "epoch": 1.2546322518003075, "grad_norm": 0.19992657005786896, "learning_rate": 3.6669008856202044e-06, "loss": 0.0183, "step": 155060 }, { "epoch": 1.2547131644955094, "grad_norm": 0.36599746346473694, "learning_rate": 3.6662203618821423e-06, "loss": 0.0191, "step": 155070 }, { "epoch": 1.2547940771907111, "grad_norm": 0.43607568740844727, "learning_rate": 3.6655398647435e-06, "loss": 0.0162, "step": 155080 }, { "epoch": 1.254874989885913, "grad_norm": 0.3030533790588379, "learning_rate": 3.6648593942178494e-06, "loss": 0.0207, "step": 155090 }, { "epoch": 1.254955902581115, "grad_norm": 0.3865235447883606, "learning_rate": 3.6641789503187593e-06, "loss": 0.0207, "step": 155100 }, { "epoch": 1.2550368152763167, "grad_norm": 0.28949543833732605, "learning_rate": 3.6634985330597982e-06, "loss": 0.0154, "step": 155110 }, { "epoch": 1.2551177279715187, "grad_norm": 0.684106171131134, "learning_rate": 3.6628181424545393e-06, "loss": 0.0216, "step": 155120 }, { "epoch": 1.2551986406667206, "grad_norm": 0.43398699164390564, "learning_rate": 3.6621377785165496e-06, "loss": 0.0189, "step": 155130 }, { "epoch": 1.2552795533619225, "grad_norm": 0.4404716491699219, "learning_rate": 3.6614574412593963e-06, "loss": 0.0216, "step": 155140 }, { "epoch": 1.2553604660571245, "grad_norm": 0.10511647909879684, "learning_rate": 3.66077713069665e-06, "loss": 0.0209, "step": 155150 }, { "epoch": 1.2554413787523262, "grad_norm": 0.05398304760456085, "learning_rate": 3.6600968468418746e-06, "loss": 0.0144, "step": 155160 }, { "epoch": 1.2555222914475281, "grad_norm": 0.22450990974903107, "learning_rate": 3.6594165897086396e-06, "loss": 0.015, "step": 155170 }, { "epoch": 1.25560320414273, "grad_norm": 0.3395146429538727, "learning_rate": 3.658736359310513e-06, "loss": 0.021, "step": 155180 }, { "epoch": 1.2556841168379318, "grad_norm": 0.19432343542575836, "learning_rate": 3.658056155661055e-06, "loss": 0.0168, "step": 155190 }, { "epoch": 1.2557650295331337, "grad_norm": 0.38110071420669556, "learning_rate": 3.6573759787738356e-06, "loss": 0.0212, "step": 155200 }, { "epoch": 1.2558459422283357, "grad_norm": 0.18592196702957153, "learning_rate": 3.656695828662419e-06, "loss": 0.0182, "step": 155210 }, { "epoch": 1.2559268549235374, "grad_norm": 0.4430897533893585, "learning_rate": 3.6560157053403657e-06, "loss": 0.0291, "step": 155220 }, { "epoch": 1.2560077676187393, "grad_norm": 0.8803776502609253, "learning_rate": 3.655335608821242e-06, "loss": 0.0171, "step": 155230 }, { "epoch": 1.2560886803139413, "grad_norm": 0.5945136547088623, "learning_rate": 3.6546555391186127e-06, "loss": 0.0167, "step": 155240 }, { "epoch": 1.2561695930091432, "grad_norm": 0.32196688652038574, "learning_rate": 3.6539754962460382e-06, "loss": 0.0306, "step": 155250 }, { "epoch": 1.256250505704345, "grad_norm": 0.1043943390250206, "learning_rate": 3.6532954802170797e-06, "loss": 0.0239, "step": 155260 }, { "epoch": 1.2563314183995469, "grad_norm": 0.2588275671005249, "learning_rate": 3.6526154910452997e-06, "loss": 0.0296, "step": 155270 }, { "epoch": 1.2564123310947488, "grad_norm": 0.4447914958000183, "learning_rate": 3.6519355287442614e-06, "loss": 0.0334, "step": 155280 }, { "epoch": 1.2564932437899508, "grad_norm": 0.19326718151569366, "learning_rate": 3.651255593327521e-06, "loss": 0.0157, "step": 155290 }, { "epoch": 1.2565741564851525, "grad_norm": 0.26953455805778503, "learning_rate": 3.6505756848086405e-06, "loss": 0.0199, "step": 155300 }, { "epoch": 1.2566550691803544, "grad_norm": 0.30847740173339844, "learning_rate": 3.6498958032011806e-06, "loss": 0.0155, "step": 155310 }, { "epoch": 1.2567359818755564, "grad_norm": 0.33702322840690613, "learning_rate": 3.6492159485186984e-06, "loss": 0.021, "step": 155320 }, { "epoch": 1.256816894570758, "grad_norm": 0.19793404638767242, "learning_rate": 3.6485361207747506e-06, "loss": 0.0103, "step": 155330 }, { "epoch": 1.25689780726596, "grad_norm": 0.28624480962753296, "learning_rate": 3.6478563199829e-06, "loss": 0.0225, "step": 155340 }, { "epoch": 1.256978719961162, "grad_norm": 0.2095310389995575, "learning_rate": 3.647176546156699e-06, "loss": 0.0145, "step": 155350 }, { "epoch": 1.2570596326563637, "grad_norm": 0.2476046234369278, "learning_rate": 3.6464967993097046e-06, "loss": 0.0157, "step": 155360 }, { "epoch": 1.2571405453515656, "grad_norm": 0.6061119437217712, "learning_rate": 3.6458170794554764e-06, "loss": 0.0247, "step": 155370 }, { "epoch": 1.2572214580467675, "grad_norm": 0.4206654727458954, "learning_rate": 3.6451373866075657e-06, "loss": 0.0183, "step": 155380 }, { "epoch": 1.2573023707419695, "grad_norm": 0.2956068515777588, "learning_rate": 3.6444577207795307e-06, "loss": 0.0189, "step": 155390 }, { "epoch": 1.2573832834371714, "grad_norm": 0.36563780903816223, "learning_rate": 3.6437780819849254e-06, "loss": 0.023, "step": 155400 }, { "epoch": 1.2574641961323731, "grad_norm": 0.33960068225860596, "learning_rate": 3.643098470237302e-06, "loss": 0.0209, "step": 155410 }, { "epoch": 1.257545108827575, "grad_norm": 0.39169469475746155, "learning_rate": 3.642418885550215e-06, "loss": 0.0112, "step": 155420 }, { "epoch": 1.257626021522777, "grad_norm": 0.39735016226768494, "learning_rate": 3.6417393279372193e-06, "loss": 0.0247, "step": 155430 }, { "epoch": 1.2577069342179787, "grad_norm": 0.5535113215446472, "learning_rate": 3.641059797411862e-06, "loss": 0.0216, "step": 155440 }, { "epoch": 1.2577878469131807, "grad_norm": 0.4294659197330475, "learning_rate": 3.6403802939876996e-06, "loss": 0.0353, "step": 155450 }, { "epoch": 1.2578687596083826, "grad_norm": 0.38274088501930237, "learning_rate": 3.6397008176782823e-06, "loss": 0.0253, "step": 155460 }, { "epoch": 1.2579496723035843, "grad_norm": 0.4104353189468384, "learning_rate": 3.639021368497162e-06, "loss": 0.028, "step": 155470 }, { "epoch": 1.2580305849987863, "grad_norm": 1.0708354711532593, "learning_rate": 3.6383419464578847e-06, "loss": 0.0286, "step": 155480 }, { "epoch": 1.2581114976939882, "grad_norm": 0.19687677919864655, "learning_rate": 3.637662551574004e-06, "loss": 0.0279, "step": 155490 }, { "epoch": 1.25819241038919, "grad_norm": 0.4430650770664215, "learning_rate": 3.6369831838590673e-06, "loss": 0.0215, "step": 155500 }, { "epoch": 1.2582733230843919, "grad_norm": 0.5522352457046509, "learning_rate": 3.636303843326624e-06, "loss": 0.0263, "step": 155510 }, { "epoch": 1.2583542357795938, "grad_norm": 0.20272128283977509, "learning_rate": 3.6356245299902204e-06, "loss": 0.0284, "step": 155520 }, { "epoch": 1.2584351484747958, "grad_norm": 0.17035679519176483, "learning_rate": 3.6349452438634074e-06, "loss": 0.0203, "step": 155530 }, { "epoch": 1.2585160611699977, "grad_norm": 0.6345595717430115, "learning_rate": 3.6342659849597285e-06, "loss": 0.0186, "step": 155540 }, { "epoch": 1.2585969738651994, "grad_norm": 0.5872098207473755, "learning_rate": 3.6335867532927303e-06, "loss": 0.0281, "step": 155550 }, { "epoch": 1.2586778865604014, "grad_norm": 0.16120024025440216, "learning_rate": 3.6329075488759623e-06, "loss": 0.0191, "step": 155560 }, { "epoch": 1.2587587992556033, "grad_norm": 0.5143738389015198, "learning_rate": 3.6322283717229645e-06, "loss": 0.0133, "step": 155570 }, { "epoch": 1.258839711950805, "grad_norm": 0.30402836203575134, "learning_rate": 3.631549221847286e-06, "loss": 0.0149, "step": 155580 }, { "epoch": 1.258920624646007, "grad_norm": 0.19510476291179657, "learning_rate": 3.63087009926247e-06, "loss": 0.0149, "step": 155590 }, { "epoch": 1.259001537341209, "grad_norm": 0.6283817887306213, "learning_rate": 3.6301910039820577e-06, "loss": 0.021, "step": 155600 }, { "epoch": 1.2590824500364106, "grad_norm": 0.3969791531562805, "learning_rate": 3.6295119360195942e-06, "loss": 0.0141, "step": 155610 }, { "epoch": 1.2591633627316126, "grad_norm": 0.45090553164482117, "learning_rate": 3.6288328953886216e-06, "loss": 0.0236, "step": 155620 }, { "epoch": 1.2592442754268145, "grad_norm": 0.3787028193473816, "learning_rate": 3.628153882102684e-06, "loss": 0.0342, "step": 155630 }, { "epoch": 1.2593251881220162, "grad_norm": 0.3059706687927246, "learning_rate": 3.6274748961753193e-06, "loss": 0.0226, "step": 155640 }, { "epoch": 1.2594061008172182, "grad_norm": 0.468337744474411, "learning_rate": 3.626795937620071e-06, "loss": 0.0207, "step": 155650 }, { "epoch": 1.25948701351242, "grad_norm": 0.3669589161872864, "learning_rate": 3.6261170064504805e-06, "loss": 0.014, "step": 155660 }, { "epoch": 1.259567926207622, "grad_norm": 0.3201170861721039, "learning_rate": 3.625438102680083e-06, "loss": 0.0221, "step": 155670 }, { "epoch": 1.259648838902824, "grad_norm": 0.40085986256599426, "learning_rate": 3.624759226322422e-06, "loss": 0.0216, "step": 155680 }, { "epoch": 1.2597297515980257, "grad_norm": 0.3153845965862274, "learning_rate": 3.624080377391036e-06, "loss": 0.0226, "step": 155690 }, { "epoch": 1.2598106642932276, "grad_norm": 0.12800051271915436, "learning_rate": 3.6234015558994606e-06, "loss": 0.0197, "step": 155700 }, { "epoch": 1.2598915769884296, "grad_norm": 0.6230905652046204, "learning_rate": 3.6227227618612347e-06, "loss": 0.0152, "step": 155710 }, { "epoch": 1.2599724896836313, "grad_norm": 0.27311405539512634, "learning_rate": 3.6220439952898977e-06, "loss": 0.0195, "step": 155720 }, { "epoch": 1.2600534023788332, "grad_norm": 0.37428227066993713, "learning_rate": 3.621365256198983e-06, "loss": 0.02, "step": 155730 }, { "epoch": 1.2601343150740352, "grad_norm": 0.4379180669784546, "learning_rate": 3.620686544602027e-06, "loss": 0.027, "step": 155740 }, { "epoch": 1.260215227769237, "grad_norm": 0.14665964245796204, "learning_rate": 3.6200078605125686e-06, "loss": 0.0162, "step": 155750 }, { "epoch": 1.2602961404644388, "grad_norm": 0.10555414110422134, "learning_rate": 3.6193292039441385e-06, "loss": 0.0204, "step": 155760 }, { "epoch": 1.2603770531596408, "grad_norm": 0.27251487970352173, "learning_rate": 3.618650574910272e-06, "loss": 0.0195, "step": 155770 }, { "epoch": 1.2604579658548425, "grad_norm": 0.5529559850692749, "learning_rate": 3.6179719734245056e-06, "loss": 0.0296, "step": 155780 }, { "epoch": 1.2605388785500444, "grad_norm": 0.4878374934196472, "learning_rate": 3.6172933995003677e-06, "loss": 0.0233, "step": 155790 }, { "epoch": 1.2606197912452464, "grad_norm": 0.4309328496456146, "learning_rate": 3.6166148531513964e-06, "loss": 0.027, "step": 155800 }, { "epoch": 1.2607007039404483, "grad_norm": 0.5194452404975891, "learning_rate": 3.615936334391119e-06, "loss": 0.0173, "step": 155810 }, { "epoch": 1.2607816166356502, "grad_norm": 0.3406357765197754, "learning_rate": 3.6152578432330725e-06, "loss": 0.0209, "step": 155820 }, { "epoch": 1.260862529330852, "grad_norm": 0.12187721580266953, "learning_rate": 3.6145793796907837e-06, "loss": 0.0371, "step": 155830 }, { "epoch": 1.260943442026054, "grad_norm": 0.31136226654052734, "learning_rate": 3.6139009437777837e-06, "loss": 0.0139, "step": 155840 }, { "epoch": 1.2610243547212558, "grad_norm": 0.8369305729866028, "learning_rate": 3.6132225355076045e-06, "loss": 0.0297, "step": 155850 }, { "epoch": 1.2611052674164576, "grad_norm": 0.2289036214351654, "learning_rate": 3.6125441548937727e-06, "loss": 0.0147, "step": 155860 }, { "epoch": 1.2611861801116595, "grad_norm": 0.32607144117355347, "learning_rate": 3.61186580194982e-06, "loss": 0.0202, "step": 155870 }, { "epoch": 1.2612670928068614, "grad_norm": 0.3612646162509918, "learning_rate": 3.611187476689274e-06, "loss": 0.0245, "step": 155880 }, { "epoch": 1.2613480055020632, "grad_norm": 0.12804575264453888, "learning_rate": 3.61050917912566e-06, "loss": 0.0249, "step": 155890 }, { "epoch": 1.261428918197265, "grad_norm": 0.6954081654548645, "learning_rate": 3.6098309092725082e-06, "loss": 0.0241, "step": 155900 }, { "epoch": 1.261509830892467, "grad_norm": 0.6382588744163513, "learning_rate": 3.6091526671433443e-06, "loss": 0.0141, "step": 155910 }, { "epoch": 1.261590743587669, "grad_norm": 0.39974430203437805, "learning_rate": 3.608474452751693e-06, "loss": 0.0202, "step": 155920 }, { "epoch": 1.2616716562828707, "grad_norm": 0.31781747937202454, "learning_rate": 3.607796266111081e-06, "loss": 0.0158, "step": 155930 }, { "epoch": 1.2617525689780726, "grad_norm": 0.3161892890930176, "learning_rate": 3.6071181072350354e-06, "loss": 0.0172, "step": 155940 }, { "epoch": 1.2618334816732746, "grad_norm": 0.15585923194885254, "learning_rate": 3.606439976137077e-06, "loss": 0.0191, "step": 155950 }, { "epoch": 1.2619143943684765, "grad_norm": 0.5211132168769836, "learning_rate": 3.6057618728307314e-06, "loss": 0.0165, "step": 155960 }, { "epoch": 1.2619953070636782, "grad_norm": 0.4404914081096649, "learning_rate": 3.6050837973295233e-06, "loss": 0.0264, "step": 155970 }, { "epoch": 1.2620762197588802, "grad_norm": 0.3968820571899414, "learning_rate": 3.6044057496469732e-06, "loss": 0.0174, "step": 155980 }, { "epoch": 1.2621571324540821, "grad_norm": 0.654180645942688, "learning_rate": 3.603727729796603e-06, "loss": 0.0243, "step": 155990 }, { "epoch": 1.2622380451492838, "grad_norm": 0.3378596007823944, "learning_rate": 3.603049737791936e-06, "loss": 0.0191, "step": 156000 }, { "epoch": 1.2623189578444858, "grad_norm": 0.3227827847003937, "learning_rate": 3.6023717736464945e-06, "loss": 0.0153, "step": 156010 }, { "epoch": 1.2623998705396877, "grad_norm": 0.7647005915641785, "learning_rate": 3.6016938373737962e-06, "loss": 0.0187, "step": 156020 }, { "epoch": 1.2624807832348894, "grad_norm": 0.21208342909812927, "learning_rate": 3.601015928987362e-06, "loss": 0.0311, "step": 156030 }, { "epoch": 1.2625616959300914, "grad_norm": 0.5775209665298462, "learning_rate": 3.6003380485007134e-06, "loss": 0.0293, "step": 156040 }, { "epoch": 1.2626426086252933, "grad_norm": 0.48727306723594666, "learning_rate": 3.5996601959273657e-06, "loss": 0.0258, "step": 156050 }, { "epoch": 1.2627235213204953, "grad_norm": 0.32719001173973083, "learning_rate": 3.5989823712808404e-06, "loss": 0.0259, "step": 156060 }, { "epoch": 1.2628044340156972, "grad_norm": 0.23588232696056366, "learning_rate": 3.598304574574655e-06, "loss": 0.0196, "step": 156070 }, { "epoch": 1.262885346710899, "grad_norm": 0.21331007778644562, "learning_rate": 3.597626805822323e-06, "loss": 0.0167, "step": 156080 }, { "epoch": 1.2629662594061009, "grad_norm": 0.4509246051311493, "learning_rate": 3.596949065037365e-06, "loss": 0.0261, "step": 156090 }, { "epoch": 1.2630471721013028, "grad_norm": 0.25763341784477234, "learning_rate": 3.596271352233297e-06, "loss": 0.0261, "step": 156100 }, { "epoch": 1.2631280847965045, "grad_norm": 0.5006033778190613, "learning_rate": 3.5955936674236325e-06, "loss": 0.024, "step": 156110 }, { "epoch": 1.2632089974917065, "grad_norm": 0.40500709414482117, "learning_rate": 3.594916010621886e-06, "loss": 0.0219, "step": 156120 }, { "epoch": 1.2632899101869084, "grad_norm": 0.485655277967453, "learning_rate": 3.594238381841576e-06, "loss": 0.0274, "step": 156130 }, { "epoch": 1.2633708228821101, "grad_norm": 0.471293181180954, "learning_rate": 3.593560781096213e-06, "loss": 0.0204, "step": 156140 }, { "epoch": 1.263451735577312, "grad_norm": 0.49671226739883423, "learning_rate": 3.5928832083993104e-06, "loss": 0.0356, "step": 156150 }, { "epoch": 1.263532648272514, "grad_norm": 0.6209225654602051, "learning_rate": 3.5922056637643833e-06, "loss": 0.0135, "step": 156160 }, { "epoch": 1.2636135609677157, "grad_norm": 0.39040857553482056, "learning_rate": 3.591528147204941e-06, "loss": 0.019, "step": 156170 }, { "epoch": 1.2636944736629177, "grad_norm": 0.35032206773757935, "learning_rate": 3.590850658734495e-06, "loss": 0.0281, "step": 156180 }, { "epoch": 1.2637753863581196, "grad_norm": 0.5999857187271118, "learning_rate": 3.5901731983665585e-06, "loss": 0.0352, "step": 156190 }, { "epoch": 1.2638562990533215, "grad_norm": 0.5978662371635437, "learning_rate": 3.5894957661146427e-06, "loss": 0.0282, "step": 156200 }, { "epoch": 1.2639372117485235, "grad_norm": 0.5068869590759277, "learning_rate": 3.5888183619922557e-06, "loss": 0.0142, "step": 156210 }, { "epoch": 1.2640181244437252, "grad_norm": 0.29607951641082764, "learning_rate": 3.5881409860129065e-06, "loss": 0.0186, "step": 156220 }, { "epoch": 1.2640990371389271, "grad_norm": 0.4193187952041626, "learning_rate": 3.5874636381901064e-06, "loss": 0.0229, "step": 156230 }, { "epoch": 1.264179949834129, "grad_norm": 0.3448072671890259, "learning_rate": 3.586786318537361e-06, "loss": 0.0179, "step": 156240 }, { "epoch": 1.2642608625293308, "grad_norm": 0.749474048614502, "learning_rate": 3.5861090270681774e-06, "loss": 0.0381, "step": 156250 }, { "epoch": 1.2643417752245327, "grad_norm": 0.4904156029224396, "learning_rate": 3.585431763796068e-06, "loss": 0.0174, "step": 156260 }, { "epoch": 1.2644226879197347, "grad_norm": 0.3867056369781494, "learning_rate": 3.5847545287345328e-06, "loss": 0.0204, "step": 156270 }, { "epoch": 1.2645036006149364, "grad_norm": 0.26263561844825745, "learning_rate": 3.5840773218970815e-06, "loss": 0.013, "step": 156280 }, { "epoch": 1.2645845133101383, "grad_norm": 0.601021409034729, "learning_rate": 3.5834001432972205e-06, "loss": 0.0255, "step": 156290 }, { "epoch": 1.2646654260053403, "grad_norm": 0.40168297290802, "learning_rate": 3.5827229929484507e-06, "loss": 0.0218, "step": 156300 }, { "epoch": 1.264746338700542, "grad_norm": 0.21624654531478882, "learning_rate": 3.58204587086428e-06, "loss": 0.0189, "step": 156310 }, { "epoch": 1.264827251395744, "grad_norm": 0.3839937448501587, "learning_rate": 3.5813687770582118e-06, "loss": 0.016, "step": 156320 }, { "epoch": 1.2649081640909459, "grad_norm": 0.010196216404438019, "learning_rate": 3.580691711543747e-06, "loss": 0.0271, "step": 156330 }, { "epoch": 1.2649890767861478, "grad_norm": 0.2930561900138855, "learning_rate": 3.580014674334389e-06, "loss": 0.0149, "step": 156340 }, { "epoch": 1.2650699894813497, "grad_norm": 0.6701322197914124, "learning_rate": 3.579337665443642e-06, "loss": 0.0247, "step": 156350 }, { "epoch": 1.2651509021765515, "grad_norm": 0.35730427503585815, "learning_rate": 3.578660684885007e-06, "loss": 0.0267, "step": 156360 }, { "epoch": 1.2652318148717534, "grad_norm": 0.4402150511741638, "learning_rate": 3.577983732671982e-06, "loss": 0.033, "step": 156370 }, { "epoch": 1.2653127275669553, "grad_norm": 0.11596997082233429, "learning_rate": 3.5773068088180705e-06, "loss": 0.02, "step": 156380 }, { "epoch": 1.265393640262157, "grad_norm": 0.5177624821662903, "learning_rate": 3.576629913336772e-06, "loss": 0.0267, "step": 156390 }, { "epoch": 1.265474552957359, "grad_norm": 0.24854038655757904, "learning_rate": 3.575953046241584e-06, "loss": 0.0215, "step": 156400 }, { "epoch": 1.265555465652561, "grad_norm": 0.6984561085700989, "learning_rate": 3.5752762075460047e-06, "loss": 0.0145, "step": 156410 }, { "epoch": 1.2656363783477627, "grad_norm": 0.20794568955898285, "learning_rate": 3.574599397263537e-06, "loss": 0.0121, "step": 156420 }, { "epoch": 1.2657172910429646, "grad_norm": 0.5681817531585693, "learning_rate": 3.5739226154076733e-06, "loss": 0.0206, "step": 156430 }, { "epoch": 1.2657982037381665, "grad_norm": 0.40148335695266724, "learning_rate": 3.5732458619919118e-06, "loss": 0.0222, "step": 156440 }, { "epoch": 1.2658791164333685, "grad_norm": 0.4072367250919342, "learning_rate": 3.5725691370297517e-06, "loss": 0.0122, "step": 156450 }, { "epoch": 1.2659600291285702, "grad_norm": 0.2454853653907776, "learning_rate": 3.5718924405346855e-06, "loss": 0.0257, "step": 156460 }, { "epoch": 1.2660409418237721, "grad_norm": 0.20975171029567719, "learning_rate": 3.571215772520209e-06, "loss": 0.0224, "step": 156470 }, { "epoch": 1.266121854518974, "grad_norm": 0.3232795000076294, "learning_rate": 3.5705391329998195e-06, "loss": 0.0199, "step": 156480 }, { "epoch": 1.266202767214176, "grad_norm": 0.4346905052661896, "learning_rate": 3.569862521987007e-06, "loss": 0.0297, "step": 156490 }, { "epoch": 1.2662836799093777, "grad_norm": 0.24821805953979492, "learning_rate": 3.5691859394952686e-06, "loss": 0.0227, "step": 156500 }, { "epoch": 1.2663645926045797, "grad_norm": 0.3307507336139679, "learning_rate": 3.568509385538097e-06, "loss": 0.0167, "step": 156510 }, { "epoch": 1.2664455052997816, "grad_norm": 0.5699498057365417, "learning_rate": 3.5678328601289812e-06, "loss": 0.0246, "step": 156520 }, { "epoch": 1.2665264179949833, "grad_norm": 0.2699446976184845, "learning_rate": 3.5671563632814175e-06, "loss": 0.0242, "step": 156530 }, { "epoch": 1.2666073306901853, "grad_norm": 0.5490309000015259, "learning_rate": 3.5664798950088935e-06, "loss": 0.0159, "step": 156540 }, { "epoch": 1.2666882433853872, "grad_norm": 0.23323482275009155, "learning_rate": 3.5658034553249042e-06, "loss": 0.0175, "step": 156550 }, { "epoch": 1.266769156080589, "grad_norm": 0.27117016911506653, "learning_rate": 3.5651270442429352e-06, "loss": 0.022, "step": 156560 }, { "epoch": 1.2668500687757909, "grad_norm": 0.2920032739639282, "learning_rate": 3.564450661776479e-06, "loss": 0.0274, "step": 156570 }, { "epoch": 1.2669309814709928, "grad_norm": 0.5801329612731934, "learning_rate": 3.563774307939025e-06, "loss": 0.0216, "step": 156580 }, { "epoch": 1.2670118941661948, "grad_norm": 0.399332195520401, "learning_rate": 3.5630979827440582e-06, "loss": 0.0314, "step": 156590 }, { "epoch": 1.2670928068613965, "grad_norm": 0.27569836378097534, "learning_rate": 3.562421686205069e-06, "loss": 0.0159, "step": 156600 }, { "epoch": 1.2671737195565984, "grad_norm": 0.27366185188293457, "learning_rate": 3.561745418335546e-06, "loss": 0.0225, "step": 156610 }, { "epoch": 1.2672546322518004, "grad_norm": 0.15661166608333588, "learning_rate": 3.5610691791489734e-06, "loss": 0.0156, "step": 156620 }, { "epoch": 1.2673355449470023, "grad_norm": 0.4758327603340149, "learning_rate": 3.5603929686588368e-06, "loss": 0.0136, "step": 156630 }, { "epoch": 1.267416457642204, "grad_norm": 1.1417583227157593, "learning_rate": 3.5597167868786265e-06, "loss": 0.0219, "step": 156640 }, { "epoch": 1.267497370337406, "grad_norm": 0.41972166299819946, "learning_rate": 3.5590406338218223e-06, "loss": 0.0278, "step": 156650 }, { "epoch": 1.2675782830326079, "grad_norm": 0.33185744285583496, "learning_rate": 3.5583645095019102e-06, "loss": 0.0206, "step": 156660 }, { "epoch": 1.2676591957278096, "grad_norm": 0.44513195753097534, "learning_rate": 3.5576884139323762e-06, "loss": 0.023, "step": 156670 }, { "epoch": 1.2677401084230115, "grad_norm": 0.40381142497062683, "learning_rate": 3.5570123471267005e-06, "loss": 0.0282, "step": 156680 }, { "epoch": 1.2678210211182135, "grad_norm": 0.38722917437553406, "learning_rate": 3.5563363090983677e-06, "loss": 0.0207, "step": 156690 }, { "epoch": 1.2679019338134152, "grad_norm": 0.6708945631980896, "learning_rate": 3.55566029986086e-06, "loss": 0.0212, "step": 156700 }, { "epoch": 1.2679828465086171, "grad_norm": 0.260768860578537, "learning_rate": 3.554984319427658e-06, "loss": 0.0233, "step": 156710 }, { "epoch": 1.268063759203819, "grad_norm": 0.5037230849266052, "learning_rate": 3.554308367812243e-06, "loss": 0.0182, "step": 156720 }, { "epoch": 1.268144671899021, "grad_norm": 0.4746505320072174, "learning_rate": 3.5536324450280944e-06, "loss": 0.0179, "step": 156730 }, { "epoch": 1.268225584594223, "grad_norm": 0.40193837881088257, "learning_rate": 3.5529565510886964e-06, "loss": 0.008, "step": 156740 }, { "epoch": 1.2683064972894247, "grad_norm": 0.3461441695690155, "learning_rate": 3.552280686007523e-06, "loss": 0.0161, "step": 156750 }, { "epoch": 1.2683874099846266, "grad_norm": 0.45728808641433716, "learning_rate": 3.5516048497980547e-06, "loss": 0.0257, "step": 156760 }, { "epoch": 1.2684683226798286, "grad_norm": 0.5931256413459778, "learning_rate": 3.5509290424737725e-06, "loss": 0.0284, "step": 156770 }, { "epoch": 1.2685492353750303, "grad_norm": 0.4451225697994232, "learning_rate": 3.5502532640481483e-06, "loss": 0.0248, "step": 156780 }, { "epoch": 1.2686301480702322, "grad_norm": 0.2948567271232605, "learning_rate": 3.549577514534664e-06, "loss": 0.0219, "step": 156790 }, { "epoch": 1.2687110607654342, "grad_norm": 0.3626709580421448, "learning_rate": 3.5489017939467953e-06, "loss": 0.04, "step": 156800 }, { "epoch": 1.2687919734606359, "grad_norm": 0.24413637816905975, "learning_rate": 3.548226102298014e-06, "loss": 0.0077, "step": 156810 }, { "epoch": 1.2688728861558378, "grad_norm": 0.3451717495918274, "learning_rate": 3.547550439601799e-06, "loss": 0.0171, "step": 156820 }, { "epoch": 1.2689537988510398, "grad_norm": 0.6000708341598511, "learning_rate": 3.5468748058716264e-06, "loss": 0.0304, "step": 156830 }, { "epoch": 1.2690347115462415, "grad_norm": 0.07767753303050995, "learning_rate": 3.546199201120966e-06, "loss": 0.0135, "step": 156840 }, { "epoch": 1.2691156242414434, "grad_norm": 0.4175598919391632, "learning_rate": 3.545523625363293e-06, "loss": 0.022, "step": 156850 }, { "epoch": 1.2691965369366454, "grad_norm": 0.27876758575439453, "learning_rate": 3.544848078612083e-06, "loss": 0.0199, "step": 156860 }, { "epoch": 1.2692774496318473, "grad_norm": 0.3493479788303375, "learning_rate": 3.5441725608808053e-06, "loss": 0.0137, "step": 156870 }, { "epoch": 1.2693583623270492, "grad_norm": 0.21063652634620667, "learning_rate": 3.5434970721829307e-06, "loss": 0.0343, "step": 156880 }, { "epoch": 1.269439275022251, "grad_norm": 0.18169857561588287, "learning_rate": 3.542821612531933e-06, "loss": 0.028, "step": 156890 }, { "epoch": 1.269520187717453, "grad_norm": 0.5657616853713989, "learning_rate": 3.542146181941284e-06, "loss": 0.022, "step": 156900 }, { "epoch": 1.2696011004126548, "grad_norm": 0.7614794969558716, "learning_rate": 3.54147078042445e-06, "loss": 0.0136, "step": 156910 }, { "epoch": 1.2696820131078566, "grad_norm": 0.19348640739917755, "learning_rate": 3.540795407994901e-06, "loss": 0.0261, "step": 156920 }, { "epoch": 1.2697629258030585, "grad_norm": 0.31843292713165283, "learning_rate": 3.5401200646661107e-06, "loss": 0.0171, "step": 156930 }, { "epoch": 1.2698438384982604, "grad_norm": 0.09103738516569138, "learning_rate": 3.5394447504515417e-06, "loss": 0.0208, "step": 156940 }, { "epoch": 1.2699247511934622, "grad_norm": 0.20983152091503143, "learning_rate": 3.5387694653646625e-06, "loss": 0.0173, "step": 156950 }, { "epoch": 1.270005663888664, "grad_norm": 0.4458784759044647, "learning_rate": 3.5380942094189443e-06, "loss": 0.024, "step": 156960 }, { "epoch": 1.270086576583866, "grad_norm": 0.3442647159099579, "learning_rate": 3.537418982627848e-06, "loss": 0.0256, "step": 156970 }, { "epoch": 1.2701674892790678, "grad_norm": 0.19370117783546448, "learning_rate": 3.5367437850048436e-06, "loss": 0.0159, "step": 156980 }, { "epoch": 1.2702484019742697, "grad_norm": 0.5122284889221191, "learning_rate": 3.5360686165633968e-06, "loss": 0.02, "step": 156990 }, { "epoch": 1.2703293146694716, "grad_norm": 0.37959548830986023, "learning_rate": 3.5353934773169686e-06, "loss": 0.0293, "step": 157000 }, { "epoch": 1.2704102273646736, "grad_norm": 0.44250524044036865, "learning_rate": 3.534718367279027e-06, "loss": 0.0207, "step": 157010 }, { "epoch": 1.2704911400598755, "grad_norm": 0.2254369556903839, "learning_rate": 3.534043286463035e-06, "loss": 0.0327, "step": 157020 }, { "epoch": 1.2705720527550772, "grad_norm": 0.4394747018814087, "learning_rate": 3.5333682348824526e-06, "loss": 0.023, "step": 157030 }, { "epoch": 1.2706529654502792, "grad_norm": 0.4461134374141693, "learning_rate": 3.5326932125507446e-06, "loss": 0.0339, "step": 157040 }, { "epoch": 1.270733878145481, "grad_norm": 0.6000203490257263, "learning_rate": 3.532018219481375e-06, "loss": 0.0373, "step": 157050 }, { "epoch": 1.2708147908406828, "grad_norm": 0.3999563157558441, "learning_rate": 3.5313432556878025e-06, "loss": 0.0202, "step": 157060 }, { "epoch": 1.2708957035358848, "grad_norm": 0.44587385654449463, "learning_rate": 3.5306683211834865e-06, "loss": 0.0212, "step": 157070 }, { "epoch": 1.2709766162310867, "grad_norm": 0.48358067870140076, "learning_rate": 3.5299934159818904e-06, "loss": 0.0257, "step": 157080 }, { "epoch": 1.2710575289262884, "grad_norm": 0.15028992295265198, "learning_rate": 3.5293185400964717e-06, "loss": 0.0223, "step": 157090 }, { "epoch": 1.2711384416214904, "grad_norm": 0.21044884622097015, "learning_rate": 3.5286436935406903e-06, "loss": 0.0139, "step": 157100 }, { "epoch": 1.2712193543166923, "grad_norm": 0.7158397436141968, "learning_rate": 3.5279688763280035e-06, "loss": 0.0247, "step": 157110 }, { "epoch": 1.2713002670118942, "grad_norm": 0.5373969674110413, "learning_rate": 3.527294088471872e-06, "loss": 0.0242, "step": 157120 }, { "epoch": 1.271381179707096, "grad_norm": 0.4885769784450531, "learning_rate": 3.5266193299857494e-06, "loss": 0.0388, "step": 157130 }, { "epoch": 1.271462092402298, "grad_norm": 0.2680274546146393, "learning_rate": 3.5259446008830932e-06, "loss": 0.0201, "step": 157140 }, { "epoch": 1.2715430050974998, "grad_norm": 0.435006707906723, "learning_rate": 3.525269901177362e-06, "loss": 0.0199, "step": 157150 }, { "epoch": 1.2716239177927018, "grad_norm": 0.4469914734363556, "learning_rate": 3.524595230882007e-06, "loss": 0.0274, "step": 157160 }, { "epoch": 1.2717048304879035, "grad_norm": 0.33768829703330994, "learning_rate": 3.523920590010487e-06, "loss": 0.0256, "step": 157170 }, { "epoch": 1.2717857431831054, "grad_norm": 0.4352269470691681, "learning_rate": 3.5232459785762553e-06, "loss": 0.0185, "step": 157180 }, { "epoch": 1.2718666558783074, "grad_norm": 0.2978900074958801, "learning_rate": 3.522571396592762e-06, "loss": 0.0322, "step": 157190 }, { "epoch": 1.271947568573509, "grad_norm": 0.3900465667247772, "learning_rate": 3.521896844073466e-06, "loss": 0.0158, "step": 157200 }, { "epoch": 1.272028481268711, "grad_norm": 0.3955253064632416, "learning_rate": 3.521222321031817e-06, "loss": 0.0253, "step": 157210 }, { "epoch": 1.272109393963913, "grad_norm": 0.5408086180686951, "learning_rate": 3.5205478274812654e-06, "loss": 0.0181, "step": 157220 }, { "epoch": 1.2721903066591147, "grad_norm": 0.5296180844306946, "learning_rate": 3.519873363435263e-06, "loss": 0.026, "step": 157230 }, { "epoch": 1.2722712193543166, "grad_norm": 0.3623071312904358, "learning_rate": 3.5191989289072655e-06, "loss": 0.0158, "step": 157240 }, { "epoch": 1.2723521320495186, "grad_norm": 0.3785043954849243, "learning_rate": 3.5185245239107167e-06, "loss": 0.0302, "step": 157250 }, { "epoch": 1.2724330447447205, "grad_norm": 1.1194649934768677, "learning_rate": 3.517850148459069e-06, "loss": 0.0187, "step": 157260 }, { "epoch": 1.2725139574399225, "grad_norm": 0.46684038639068604, "learning_rate": 3.517175802565771e-06, "loss": 0.0167, "step": 157270 }, { "epoch": 1.2725948701351242, "grad_norm": 0.5549755692481995, "learning_rate": 3.516501486244273e-06, "loss": 0.0218, "step": 157280 }, { "epoch": 1.2726757828303261, "grad_norm": 0.5120166540145874, "learning_rate": 3.5158271995080186e-06, "loss": 0.0118, "step": 157290 }, { "epoch": 1.272756695525528, "grad_norm": 0.41442057490348816, "learning_rate": 3.515152942370458e-06, "loss": 0.0207, "step": 157300 }, { "epoch": 1.2728376082207298, "grad_norm": 0.6413852572441101, "learning_rate": 3.5144787148450387e-06, "loss": 0.02, "step": 157310 }, { "epoch": 1.2729185209159317, "grad_norm": 0.4329921305179596, "learning_rate": 3.513804516945205e-06, "loss": 0.0325, "step": 157320 }, { "epoch": 1.2729994336111337, "grad_norm": 0.2124684602022171, "learning_rate": 3.5131303486844016e-06, "loss": 0.0317, "step": 157330 }, { "epoch": 1.2730803463063354, "grad_norm": 0.43221092224121094, "learning_rate": 3.5124562100760763e-06, "loss": 0.0241, "step": 157340 }, { "epoch": 1.2731612590015373, "grad_norm": 0.4870538115501404, "learning_rate": 3.51178210113367e-06, "loss": 0.0355, "step": 157350 }, { "epoch": 1.2732421716967393, "grad_norm": 0.21896688640117645, "learning_rate": 3.5111080218706274e-06, "loss": 0.0227, "step": 157360 }, { "epoch": 1.273323084391941, "grad_norm": 0.2916187047958374, "learning_rate": 3.5104339723003946e-06, "loss": 0.0196, "step": 157370 }, { "epoch": 1.273403997087143, "grad_norm": 0.3996080756187439, "learning_rate": 3.509759952436409e-06, "loss": 0.0267, "step": 157380 }, { "epoch": 1.2734849097823449, "grad_norm": 0.3627797067165375, "learning_rate": 3.509085962292116e-06, "loss": 0.0166, "step": 157390 }, { "epoch": 1.2735658224775468, "grad_norm": 0.07902289181947708, "learning_rate": 3.5084120018809574e-06, "loss": 0.0149, "step": 157400 }, { "epoch": 1.2736467351727487, "grad_norm": 0.2928799092769623, "learning_rate": 3.50773807121637e-06, "loss": 0.0299, "step": 157410 }, { "epoch": 1.2737276478679505, "grad_norm": 0.0011909191962331533, "learning_rate": 3.507064170311798e-06, "loss": 0.0181, "step": 157420 }, { "epoch": 1.2738085605631524, "grad_norm": 0.10130791366100311, "learning_rate": 3.5063902991806807e-06, "loss": 0.0121, "step": 157430 }, { "epoch": 1.2738894732583543, "grad_norm": 0.20378732681274414, "learning_rate": 3.505716457836453e-06, "loss": 0.0114, "step": 157440 }, { "epoch": 1.273970385953556, "grad_norm": 0.2016199380159378, "learning_rate": 3.5050426462925565e-06, "loss": 0.0137, "step": 157450 }, { "epoch": 1.274051298648758, "grad_norm": 0.24190258979797363, "learning_rate": 3.504368864562429e-06, "loss": 0.0109, "step": 157460 }, { "epoch": 1.27413221134396, "grad_norm": 0.7518405318260193, "learning_rate": 3.503695112659509e-06, "loss": 0.0307, "step": 157470 }, { "epoch": 1.2742131240391616, "grad_norm": 0.22322076559066772, "learning_rate": 3.5030213905972277e-06, "loss": 0.0226, "step": 157480 }, { "epoch": 1.2742940367343636, "grad_norm": 0.43641653656959534, "learning_rate": 3.502347698389026e-06, "loss": 0.02, "step": 157490 }, { "epoch": 1.2743749494295655, "grad_norm": 0.5496145486831665, "learning_rate": 3.501674036048339e-06, "loss": 0.0174, "step": 157500 }, { "epoch": 1.2744558621247672, "grad_norm": 0.3181706666946411, "learning_rate": 3.5010004035885984e-06, "loss": 0.0302, "step": 157510 }, { "epoch": 1.2745367748199692, "grad_norm": 0.4039633572101593, "learning_rate": 3.5003268010232404e-06, "loss": 0.0199, "step": 157520 }, { "epoch": 1.2746176875151711, "grad_norm": 0.13938844203948975, "learning_rate": 3.4996532283656996e-06, "loss": 0.0166, "step": 157530 }, { "epoch": 1.274698600210373, "grad_norm": 0.21241727471351624, "learning_rate": 3.498979685629408e-06, "loss": 0.0221, "step": 157540 }, { "epoch": 1.274779512905575, "grad_norm": 0.29572921991348267, "learning_rate": 3.4983061728277955e-06, "loss": 0.0213, "step": 157550 }, { "epoch": 1.2748604256007767, "grad_norm": 0.4072814881801605, "learning_rate": 3.4976326899742985e-06, "loss": 0.0294, "step": 157560 }, { "epoch": 1.2749413382959787, "grad_norm": 0.09213819354772568, "learning_rate": 3.496959237082345e-06, "loss": 0.0479, "step": 157570 }, { "epoch": 1.2750222509911806, "grad_norm": 0.2853723168373108, "learning_rate": 3.4962858141653657e-06, "loss": 0.0212, "step": 157580 }, { "epoch": 1.2751031636863823, "grad_norm": 0.805841326713562, "learning_rate": 3.4956124212367937e-06, "loss": 0.0185, "step": 157590 }, { "epoch": 1.2751840763815843, "grad_norm": 0.04981236159801483, "learning_rate": 3.494939058310053e-06, "loss": 0.0149, "step": 157600 }, { "epoch": 1.2752649890767862, "grad_norm": 0.5905625224113464, "learning_rate": 3.4942657253985767e-06, "loss": 0.0214, "step": 157610 }, { "epoch": 1.275345901771988, "grad_norm": 0.348920077085495, "learning_rate": 3.4935924225157915e-06, "loss": 0.0124, "step": 157620 }, { "epoch": 1.2754268144671899, "grad_norm": 0.31059530377388, "learning_rate": 3.4929191496751267e-06, "loss": 0.0121, "step": 157630 }, { "epoch": 1.2755077271623918, "grad_norm": 0.37856578826904297, "learning_rate": 3.492245906890006e-06, "loss": 0.0148, "step": 157640 }, { "epoch": 1.2755886398575935, "grad_norm": 0.25072094798088074, "learning_rate": 3.4915726941738586e-06, "loss": 0.0254, "step": 157650 }, { "epoch": 1.2756695525527955, "grad_norm": 0.2600950300693512, "learning_rate": 3.4908995115401105e-06, "loss": 0.0178, "step": 157660 }, { "epoch": 1.2757504652479974, "grad_norm": 0.22145825624465942, "learning_rate": 3.4902263590021833e-06, "loss": 0.0275, "step": 157670 }, { "epoch": 1.2758313779431993, "grad_norm": 0.46124234795570374, "learning_rate": 3.4895532365735063e-06, "loss": 0.0187, "step": 157680 }, { "epoch": 1.2759122906384013, "grad_norm": 0.3549489974975586, "learning_rate": 3.488880144267501e-06, "loss": 0.02, "step": 157690 }, { "epoch": 1.275993203333603, "grad_norm": 0.2434125691652298, "learning_rate": 3.48820708209759e-06, "loss": 0.0281, "step": 157700 }, { "epoch": 1.276074116028805, "grad_norm": 0.3393144905567169, "learning_rate": 3.487534050077197e-06, "loss": 0.0186, "step": 157710 }, { "epoch": 1.2761550287240069, "grad_norm": 0.5614421963691711, "learning_rate": 3.4868610482197464e-06, "loss": 0.0205, "step": 157720 }, { "epoch": 1.2762359414192086, "grad_norm": 0.12169162929058075, "learning_rate": 3.4861880765386563e-06, "loss": 0.0256, "step": 157730 }, { "epoch": 1.2763168541144105, "grad_norm": 0.2597903609275818, "learning_rate": 3.485515135047348e-06, "loss": 0.0235, "step": 157740 }, { "epoch": 1.2763977668096125, "grad_norm": 0.689854085445404, "learning_rate": 3.484842223759246e-06, "loss": 0.0148, "step": 157750 }, { "epoch": 1.2764786795048142, "grad_norm": 0.33489173650741577, "learning_rate": 3.4841693426877666e-06, "loss": 0.0383, "step": 157760 }, { "epoch": 1.2765595922000161, "grad_norm": 0.609609842300415, "learning_rate": 3.4834964918463276e-06, "loss": 0.0206, "step": 157770 }, { "epoch": 1.276640504895218, "grad_norm": 0.6620964407920837, "learning_rate": 3.482823671248352e-06, "loss": 0.0284, "step": 157780 }, { "epoch": 1.27672141759042, "grad_norm": 0.28983232378959656, "learning_rate": 3.4821508809072528e-06, "loss": 0.0193, "step": 157790 }, { "epoch": 1.2768023302856217, "grad_norm": 0.32129570841789246, "learning_rate": 3.4814781208364517e-06, "loss": 0.0218, "step": 157800 }, { "epoch": 1.2768832429808237, "grad_norm": 0.5407509803771973, "learning_rate": 3.480805391049362e-06, "loss": 0.0183, "step": 157810 }, { "epoch": 1.2769641556760256, "grad_norm": 0.547744631767273, "learning_rate": 3.4801326915594047e-06, "loss": 0.0257, "step": 157820 }, { "epoch": 1.2770450683712276, "grad_norm": 0.5905135273933411, "learning_rate": 3.4794600223799903e-06, "loss": 0.0197, "step": 157830 }, { "epoch": 1.2771259810664293, "grad_norm": 0.33986881375312805, "learning_rate": 3.4787873835245354e-06, "loss": 0.0286, "step": 157840 }, { "epoch": 1.2772068937616312, "grad_norm": 0.6126912832260132, "learning_rate": 3.4781147750064566e-06, "loss": 0.0333, "step": 157850 }, { "epoch": 1.2772878064568332, "grad_norm": 0.7215332388877869, "learning_rate": 3.4774421968391637e-06, "loss": 0.0189, "step": 157860 }, { "epoch": 1.2773687191520349, "grad_norm": 0.3055201768875122, "learning_rate": 3.4767696490360724e-06, "loss": 0.016, "step": 157870 }, { "epoch": 1.2774496318472368, "grad_norm": 0.24099226295948029, "learning_rate": 3.4760971316105963e-06, "loss": 0.0223, "step": 157880 }, { "epoch": 1.2775305445424388, "grad_norm": 0.2427314966917038, "learning_rate": 3.4754246445761436e-06, "loss": 0.016, "step": 157890 }, { "epoch": 1.2776114572376405, "grad_norm": 0.0840773954987526, "learning_rate": 3.4747521879461287e-06, "loss": 0.0175, "step": 157900 }, { "epoch": 1.2776923699328424, "grad_norm": 0.4579409062862396, "learning_rate": 3.4740797617339626e-06, "loss": 0.0214, "step": 157910 }, { "epoch": 1.2777732826280443, "grad_norm": 0.23252181708812714, "learning_rate": 3.473407365953053e-06, "loss": 0.0156, "step": 157920 }, { "epoch": 1.2778541953232463, "grad_norm": 0.248870387673378, "learning_rate": 3.4727350006168104e-06, "loss": 0.0122, "step": 157930 }, { "epoch": 1.2779351080184482, "grad_norm": 0.31267181038856506, "learning_rate": 3.472062665738645e-06, "loss": 0.0216, "step": 157940 }, { "epoch": 1.27801602071365, "grad_norm": 0.23432791233062744, "learning_rate": 3.4713903613319645e-06, "loss": 0.0348, "step": 157950 }, { "epoch": 1.2780969334088519, "grad_norm": 0.5235417485237122, "learning_rate": 3.470718087410174e-06, "loss": 0.0249, "step": 157960 }, { "epoch": 1.2781778461040538, "grad_norm": 0.2677251398563385, "learning_rate": 3.4700458439866856e-06, "loss": 0.0208, "step": 157970 }, { "epoch": 1.2782587587992555, "grad_norm": 0.2917913794517517, "learning_rate": 3.4693736310749014e-06, "loss": 0.0101, "step": 157980 }, { "epoch": 1.2783396714944575, "grad_norm": 0.35464954376220703, "learning_rate": 3.4687014486882288e-06, "loss": 0.0244, "step": 157990 }, { "epoch": 1.2784205841896594, "grad_norm": 0.32659611105918884, "learning_rate": 3.468029296840072e-06, "loss": 0.0222, "step": 158000 }, { "epoch": 1.2785014968848611, "grad_norm": 0.2013498842716217, "learning_rate": 3.4673571755438394e-06, "loss": 0.0271, "step": 158010 }, { "epoch": 1.278582409580063, "grad_norm": 0.18404163420200348, "learning_rate": 3.466685084812931e-06, "loss": 0.0262, "step": 158020 }, { "epoch": 1.278663322275265, "grad_norm": 0.212554931640625, "learning_rate": 3.4660130246607502e-06, "loss": 0.0249, "step": 158030 }, { "epoch": 1.2787442349704667, "grad_norm": 0.1272333264350891, "learning_rate": 3.4653409951007044e-06, "loss": 0.0102, "step": 158040 }, { "epoch": 1.2788251476656687, "grad_norm": 0.15588022768497467, "learning_rate": 3.464668996146191e-06, "loss": 0.0267, "step": 158050 }, { "epoch": 1.2789060603608706, "grad_norm": 0.8379594683647156, "learning_rate": 3.4639970278106123e-06, "loss": 0.0238, "step": 158060 }, { "epoch": 1.2789869730560726, "grad_norm": 0.42334696650505066, "learning_rate": 3.463325090107372e-06, "loss": 0.0166, "step": 158070 }, { "epoch": 1.2790678857512745, "grad_norm": 0.31327497959136963, "learning_rate": 3.4626531830498674e-06, "loss": 0.022, "step": 158080 }, { "epoch": 1.2791487984464762, "grad_norm": 0.4631817042827606, "learning_rate": 3.4619813066514994e-06, "loss": 0.0269, "step": 158090 }, { "epoch": 1.2792297111416782, "grad_norm": 0.6091910004615784, "learning_rate": 3.461309460925669e-06, "loss": 0.0216, "step": 158100 }, { "epoch": 1.27931062383688, "grad_norm": 0.15403014421463013, "learning_rate": 3.4606376458857716e-06, "loss": 0.0196, "step": 158110 }, { "epoch": 1.2793915365320818, "grad_norm": 0.5304278135299683, "learning_rate": 3.4599658615452068e-06, "loss": 0.0148, "step": 158120 }, { "epoch": 1.2794724492272838, "grad_norm": 0.7402802109718323, "learning_rate": 3.4592941079173727e-06, "loss": 0.0295, "step": 158130 }, { "epoch": 1.2795533619224857, "grad_norm": 0.322299599647522, "learning_rate": 3.458622385015664e-06, "loss": 0.0181, "step": 158140 }, { "epoch": 1.2796342746176874, "grad_norm": 0.4064719080924988, "learning_rate": 3.4579506928534772e-06, "loss": 0.0171, "step": 158150 }, { "epoch": 1.2797151873128894, "grad_norm": 0.27095070481300354, "learning_rate": 3.4572790314442097e-06, "loss": 0.019, "step": 158160 }, { "epoch": 1.2797961000080913, "grad_norm": 0.4015199542045593, "learning_rate": 3.456607400801256e-06, "loss": 0.0159, "step": 158170 }, { "epoch": 1.279877012703293, "grad_norm": 0.4647523760795593, "learning_rate": 3.4559358009380073e-06, "loss": 0.015, "step": 158180 }, { "epoch": 1.279957925398495, "grad_norm": 0.25803083181381226, "learning_rate": 3.455264231867859e-06, "loss": 0.0194, "step": 158190 }, { "epoch": 1.280038838093697, "grad_norm": 0.47789767384529114, "learning_rate": 3.4545926936042072e-06, "loss": 0.0308, "step": 158200 }, { "epoch": 1.2801197507888988, "grad_norm": 0.36011001467704773, "learning_rate": 3.4539211861604404e-06, "loss": 0.0188, "step": 158210 }, { "epoch": 1.2802006634841008, "grad_norm": 0.49566325545310974, "learning_rate": 3.4532497095499506e-06, "loss": 0.0159, "step": 158220 }, { "epoch": 1.2802815761793025, "grad_norm": 0.4695207178592682, "learning_rate": 3.4525782637861315e-06, "loss": 0.025, "step": 158230 }, { "epoch": 1.2803624888745044, "grad_norm": 0.3474366366863251, "learning_rate": 3.4519068488823722e-06, "loss": 0.0207, "step": 158240 }, { "epoch": 1.2804434015697064, "grad_norm": 0.1936003416776657, "learning_rate": 3.451235464852061e-06, "loss": 0.0268, "step": 158250 }, { "epoch": 1.280524314264908, "grad_norm": 0.38189697265625, "learning_rate": 3.450564111708591e-06, "loss": 0.0195, "step": 158260 }, { "epoch": 1.28060522696011, "grad_norm": 0.29504451155662537, "learning_rate": 3.4498927894653465e-06, "loss": 0.0203, "step": 158270 }, { "epoch": 1.280686139655312, "grad_norm": 0.39321890473365784, "learning_rate": 3.449221498135719e-06, "loss": 0.0164, "step": 158280 }, { "epoch": 1.2807670523505137, "grad_norm": 0.004088704474270344, "learning_rate": 3.448550237733096e-06, "loss": 0.0208, "step": 158290 }, { "epoch": 1.2808479650457156, "grad_norm": 0.2857438921928406, "learning_rate": 3.4478790082708614e-06, "loss": 0.0211, "step": 158300 }, { "epoch": 1.2809288777409176, "grad_norm": 0.4283430874347687, "learning_rate": 3.447207809762404e-06, "loss": 0.0205, "step": 158310 }, { "epoch": 1.2810097904361193, "grad_norm": 0.2025664895772934, "learning_rate": 3.44653664222111e-06, "loss": 0.0206, "step": 158320 }, { "epoch": 1.2810907031313212, "grad_norm": 0.4732162356376648, "learning_rate": 3.4458655056603613e-06, "loss": 0.0145, "step": 158330 }, { "epoch": 1.2811716158265232, "grad_norm": 0.33380335569381714, "learning_rate": 3.445194400093544e-06, "loss": 0.015, "step": 158340 }, { "epoch": 1.281252528521725, "grad_norm": 0.5392194986343384, "learning_rate": 3.444523325534043e-06, "loss": 0.0291, "step": 158350 }, { "epoch": 1.281333441216927, "grad_norm": 0.6610472202301025, "learning_rate": 3.4438522819952415e-06, "loss": 0.0179, "step": 158360 }, { "epoch": 1.2814143539121288, "grad_norm": 0.11750059574842453, "learning_rate": 3.4431812694905185e-06, "loss": 0.0142, "step": 158370 }, { "epoch": 1.2814952666073307, "grad_norm": 0.272074431180954, "learning_rate": 3.4425102880332606e-06, "loss": 0.016, "step": 158380 }, { "epoch": 1.2815761793025326, "grad_norm": 0.35209864377975464, "learning_rate": 3.4418393376368473e-06, "loss": 0.0187, "step": 158390 }, { "epoch": 1.2816570919977344, "grad_norm": 0.6314521431922913, "learning_rate": 3.4411684183146577e-06, "loss": 0.0209, "step": 158400 }, { "epoch": 1.2817380046929363, "grad_norm": 0.6950510144233704, "learning_rate": 3.4404975300800726e-06, "loss": 0.019, "step": 158410 }, { "epoch": 1.2818189173881382, "grad_norm": 0.6565167903900146, "learning_rate": 3.4398266729464736e-06, "loss": 0.0292, "step": 158420 }, { "epoch": 1.28189983008334, "grad_norm": 0.21970009803771973, "learning_rate": 3.4391558469272374e-06, "loss": 0.0118, "step": 158430 }, { "epoch": 1.281980742778542, "grad_norm": 0.21921329200267792, "learning_rate": 3.4384850520357416e-06, "loss": 0.0184, "step": 158440 }, { "epoch": 1.2820616554737438, "grad_norm": 0.438571959733963, "learning_rate": 3.437814288285367e-06, "loss": 0.017, "step": 158450 }, { "epoch": 1.2821425681689458, "grad_norm": 0.13850608468055725, "learning_rate": 3.4371435556894876e-06, "loss": 0.027, "step": 158460 }, { "epoch": 1.2822234808641475, "grad_norm": 0.1622879058122635, "learning_rate": 3.4364728542614795e-06, "loss": 0.01, "step": 158470 }, { "epoch": 1.2823043935593494, "grad_norm": 0.2562496066093445, "learning_rate": 3.4358021840147215e-06, "loss": 0.0208, "step": 158480 }, { "epoch": 1.2823853062545514, "grad_norm": 0.49309995770454407, "learning_rate": 3.4351315449625856e-06, "loss": 0.0262, "step": 158490 }, { "epoch": 1.2824662189497533, "grad_norm": 0.5111656785011292, "learning_rate": 3.4344609371184474e-06, "loss": 0.0279, "step": 158500 }, { "epoch": 1.282547131644955, "grad_norm": 0.4431898295879364, "learning_rate": 3.4337903604956822e-06, "loss": 0.0179, "step": 158510 }, { "epoch": 1.282628044340157, "grad_norm": 0.19711609184741974, "learning_rate": 3.4331198151076594e-06, "loss": 0.0248, "step": 158520 }, { "epoch": 1.282708957035359, "grad_norm": 0.5781208872795105, "learning_rate": 3.4324493009677564e-06, "loss": 0.0392, "step": 158530 }, { "epoch": 1.2827898697305606, "grad_norm": 0.47872936725616455, "learning_rate": 3.4317788180893403e-06, "loss": 0.0323, "step": 158540 }, { "epoch": 1.2828707824257626, "grad_norm": 0.3789842128753662, "learning_rate": 3.431108366485788e-06, "loss": 0.0185, "step": 158550 }, { "epoch": 1.2829516951209645, "grad_norm": 0.3035467267036438, "learning_rate": 3.4304379461704646e-06, "loss": 0.0184, "step": 158560 }, { "epoch": 1.2830326078161662, "grad_norm": 0.31927427649497986, "learning_rate": 3.429767557156744e-06, "loss": 0.0177, "step": 158570 }, { "epoch": 1.2831135205113682, "grad_norm": 0.2552330791950226, "learning_rate": 3.429097199457996e-06, "loss": 0.022, "step": 158580 }, { "epoch": 1.2831944332065701, "grad_norm": 0.5687428116798401, "learning_rate": 3.428426873087586e-06, "loss": 0.0188, "step": 158590 }, { "epoch": 1.283275345901772, "grad_norm": 0.5695645213127136, "learning_rate": 3.427756578058885e-06, "loss": 0.0185, "step": 158600 }, { "epoch": 1.283356258596974, "grad_norm": 0.5366696715354919, "learning_rate": 3.4270863143852605e-06, "loss": 0.0302, "step": 158610 }, { "epoch": 1.2834371712921757, "grad_norm": 0.07577541470527649, "learning_rate": 3.4264160820800773e-06, "loss": 0.021, "step": 158620 }, { "epoch": 1.2835180839873777, "grad_norm": 0.3810282051563263, "learning_rate": 3.4257458811567036e-06, "loss": 0.014, "step": 158630 }, { "epoch": 1.2835989966825796, "grad_norm": 0.44323575496673584, "learning_rate": 3.4250757116285066e-06, "loss": 0.0263, "step": 158640 }, { "epoch": 1.2836799093777813, "grad_norm": 0.374226838350296, "learning_rate": 3.4244055735088487e-06, "loss": 0.0216, "step": 158650 }, { "epoch": 1.2837608220729833, "grad_norm": 0.5072699189186096, "learning_rate": 3.4237354668110947e-06, "loss": 0.0119, "step": 158660 }, { "epoch": 1.2838417347681852, "grad_norm": 0.18522688746452332, "learning_rate": 3.423065391548611e-06, "loss": 0.024, "step": 158670 }, { "epoch": 1.283922647463387, "grad_norm": 0.3576478362083435, "learning_rate": 3.4223953477347573e-06, "loss": 0.0279, "step": 158680 }, { "epoch": 1.2840035601585889, "grad_norm": 0.5900814533233643, "learning_rate": 3.421725335382898e-06, "loss": 0.033, "step": 158690 }, { "epoch": 1.2840844728537908, "grad_norm": 0.33211395144462585, "learning_rate": 3.421055354506395e-06, "loss": 0.0193, "step": 158700 }, { "epoch": 1.2841653855489925, "grad_norm": 0.2399570196866989, "learning_rate": 3.4203854051186115e-06, "loss": 0.0191, "step": 158710 }, { "epoch": 1.2842462982441945, "grad_norm": 0.4723215699195862, "learning_rate": 3.419715487232905e-06, "loss": 0.0277, "step": 158720 }, { "epoch": 1.2843272109393964, "grad_norm": 0.43257981538772583, "learning_rate": 3.419045600862636e-06, "loss": 0.0236, "step": 158730 }, { "epoch": 1.2844081236345983, "grad_norm": 0.521187424659729, "learning_rate": 3.418375746021167e-06, "loss": 0.0339, "step": 158740 }, { "epoch": 1.2844890363298003, "grad_norm": 0.42162761092185974, "learning_rate": 3.417705922721853e-06, "loss": 0.0131, "step": 158750 }, { "epoch": 1.284569949025002, "grad_norm": 0.5626024603843689, "learning_rate": 3.4170361309780546e-06, "loss": 0.0248, "step": 158760 }, { "epoch": 1.284650861720204, "grad_norm": 0.3601769506931305, "learning_rate": 3.41636637080313e-06, "loss": 0.0184, "step": 158770 }, { "epoch": 1.2847317744154059, "grad_norm": 0.29486986994743347, "learning_rate": 3.415696642210433e-06, "loss": 0.0137, "step": 158780 }, { "epoch": 1.2848126871106076, "grad_norm": 0.30652114748954773, "learning_rate": 3.4150269452133233e-06, "loss": 0.0258, "step": 158790 }, { "epoch": 1.2848935998058095, "grad_norm": 0.46604910492897034, "learning_rate": 3.414357279825156e-06, "loss": 0.0288, "step": 158800 }, { "epoch": 1.2849745125010115, "grad_norm": 0.38208821415901184, "learning_rate": 3.413687646059283e-06, "loss": 0.0202, "step": 158810 }, { "epoch": 1.2850554251962132, "grad_norm": 0.36726367473602295, "learning_rate": 3.4130180439290606e-06, "loss": 0.0172, "step": 158820 }, { "epoch": 1.2851363378914151, "grad_norm": 0.4155101478099823, "learning_rate": 3.4123484734478453e-06, "loss": 0.0286, "step": 158830 }, { "epoch": 1.285217250586617, "grad_norm": 0.33723026514053345, "learning_rate": 3.4116789346289875e-06, "loss": 0.0118, "step": 158840 }, { "epoch": 1.2852981632818188, "grad_norm": 0.22382286190986633, "learning_rate": 3.4110094274858387e-06, "loss": 0.0243, "step": 158850 }, { "epoch": 1.2853790759770207, "grad_norm": 0.3445250391960144, "learning_rate": 3.4103399520317547e-06, "loss": 0.0231, "step": 158860 }, { "epoch": 1.2854599886722227, "grad_norm": 0.30248549580574036, "learning_rate": 3.4096705082800836e-06, "loss": 0.0175, "step": 158870 }, { "epoch": 1.2855409013674246, "grad_norm": 0.3080192506313324, "learning_rate": 3.4090010962441755e-06, "loss": 0.0175, "step": 158880 }, { "epoch": 1.2856218140626265, "grad_norm": 0.17994524538516998, "learning_rate": 3.4083317159373817e-06, "loss": 0.0147, "step": 158890 }, { "epoch": 1.2857027267578283, "grad_norm": 0.1459970325231552, "learning_rate": 3.407662367373054e-06, "loss": 0.0156, "step": 158900 }, { "epoch": 1.2857836394530302, "grad_norm": 0.39039552211761475, "learning_rate": 3.406993050564538e-06, "loss": 0.0246, "step": 158910 }, { "epoch": 1.2858645521482321, "grad_norm": 0.38270333409309387, "learning_rate": 3.406323765525181e-06, "loss": 0.0169, "step": 158920 }, { "epoch": 1.2859454648434339, "grad_norm": 0.3108465075492859, "learning_rate": 3.405654512268335e-06, "loss": 0.021, "step": 158930 }, { "epoch": 1.2860263775386358, "grad_norm": 0.1866496354341507, "learning_rate": 3.404985290807342e-06, "loss": 0.0228, "step": 158940 }, { "epoch": 1.2861072902338377, "grad_norm": 0.13963553309440613, "learning_rate": 3.404316101155549e-06, "loss": 0.0147, "step": 158950 }, { "epoch": 1.2861882029290395, "grad_norm": 0.4861307442188263, "learning_rate": 3.403646943326305e-06, "loss": 0.0334, "step": 158960 }, { "epoch": 1.2862691156242414, "grad_norm": 0.5225846171379089, "learning_rate": 3.402977817332952e-06, "loss": 0.0169, "step": 158970 }, { "epoch": 1.2863500283194433, "grad_norm": 0.15984858572483063, "learning_rate": 3.402308723188834e-06, "loss": 0.014, "step": 158980 }, { "epoch": 1.2864309410146453, "grad_norm": 0.32902589440345764, "learning_rate": 3.4016396609072977e-06, "loss": 0.0157, "step": 158990 }, { "epoch": 1.286511853709847, "grad_norm": 0.23037318885326385, "learning_rate": 3.400970630501682e-06, "loss": 0.019, "step": 159000 }, { "epoch": 1.286592766405049, "grad_norm": 0.3954814076423645, "learning_rate": 3.4003016319853322e-06, "loss": 0.0185, "step": 159010 }, { "epoch": 1.2866736791002509, "grad_norm": 0.3745087683200836, "learning_rate": 3.3996326653715906e-06, "loss": 0.0237, "step": 159020 }, { "epoch": 1.2867545917954528, "grad_norm": 0.22723284363746643, "learning_rate": 3.3989637306737943e-06, "loss": 0.0172, "step": 159030 }, { "epoch": 1.2868355044906545, "grad_norm": 0.4661870002746582, "learning_rate": 3.3982948279052873e-06, "loss": 0.0193, "step": 159040 }, { "epoch": 1.2869164171858565, "grad_norm": 0.7198070287704468, "learning_rate": 3.39762595707941e-06, "loss": 0.0349, "step": 159050 }, { "epoch": 1.2869973298810584, "grad_norm": 0.32721391320228577, "learning_rate": 3.3969571182094995e-06, "loss": 0.0226, "step": 159060 }, { "epoch": 1.2870782425762601, "grad_norm": 0.2415635585784912, "learning_rate": 3.396288311308894e-06, "loss": 0.0238, "step": 159070 }, { "epoch": 1.287159155271462, "grad_norm": 0.4259222447872162, "learning_rate": 3.3956195363909335e-06, "loss": 0.0209, "step": 159080 }, { "epoch": 1.287240067966664, "grad_norm": 0.2361031472682953, "learning_rate": 3.3949507934689554e-06, "loss": 0.0168, "step": 159090 }, { "epoch": 1.2873209806618657, "grad_norm": 0.7389777898788452, "learning_rate": 3.394282082556293e-06, "loss": 0.0368, "step": 159100 }, { "epoch": 1.2874018933570677, "grad_norm": 0.2653675675392151, "learning_rate": 3.393613403666285e-06, "loss": 0.0157, "step": 159110 }, { "epoch": 1.2874828060522696, "grad_norm": 0.4126560389995575, "learning_rate": 3.392944756812268e-06, "loss": 0.0136, "step": 159120 }, { "epoch": 1.2875637187474716, "grad_norm": 0.3073379099369049, "learning_rate": 3.3922761420075743e-06, "loss": 0.0185, "step": 159130 }, { "epoch": 1.2876446314426735, "grad_norm": 0.2786485254764557, "learning_rate": 3.3916075592655383e-06, "loss": 0.0169, "step": 159140 }, { "epoch": 1.2877255441378752, "grad_norm": 0.34243154525756836, "learning_rate": 3.3909390085994965e-06, "loss": 0.0153, "step": 159150 }, { "epoch": 1.2878064568330772, "grad_norm": 0.18998202681541443, "learning_rate": 3.3902704900227774e-06, "loss": 0.017, "step": 159160 }, { "epoch": 1.287887369528279, "grad_norm": 0.38469842076301575, "learning_rate": 3.389602003548714e-06, "loss": 0.0185, "step": 159170 }, { "epoch": 1.2879682822234808, "grad_norm": 0.1252131313085556, "learning_rate": 3.3889335491906416e-06, "loss": 0.0261, "step": 159180 }, { "epoch": 1.2880491949186827, "grad_norm": 0.24808134138584137, "learning_rate": 3.388265126961886e-06, "loss": 0.0207, "step": 159190 }, { "epoch": 1.2881301076138847, "grad_norm": 0.5550106763839722, "learning_rate": 3.3875967368757812e-06, "loss": 0.018, "step": 159200 }, { "epoch": 1.2882110203090864, "grad_norm": 0.5717602968215942, "learning_rate": 3.3869283789456563e-06, "loss": 0.02, "step": 159210 }, { "epoch": 1.2882919330042883, "grad_norm": 0.3282541334629059, "learning_rate": 3.3862600531848378e-06, "loss": 0.0273, "step": 159220 }, { "epoch": 1.2883728456994903, "grad_norm": 0.2885236144065857, "learning_rate": 3.3855917596066557e-06, "loss": 0.0245, "step": 159230 }, { "epoch": 1.288453758394692, "grad_norm": 0.3706870675086975, "learning_rate": 3.3849234982244395e-06, "loss": 0.0167, "step": 159240 }, { "epoch": 1.288534671089894, "grad_norm": 0.3233661353588104, "learning_rate": 3.384255269051514e-06, "loss": 0.0271, "step": 159250 }, { "epoch": 1.2886155837850959, "grad_norm": 0.2409054934978485, "learning_rate": 3.383587072101205e-06, "loss": 0.0137, "step": 159260 }, { "epoch": 1.2886964964802978, "grad_norm": 0.4762958586215973, "learning_rate": 3.3829189073868403e-06, "loss": 0.02, "step": 159270 }, { "epoch": 1.2887774091754998, "grad_norm": 0.5528576970100403, "learning_rate": 3.382250774921745e-06, "loss": 0.0196, "step": 159280 }, { "epoch": 1.2888583218707015, "grad_norm": 0.22604724764823914, "learning_rate": 3.3815826747192415e-06, "loss": 0.0339, "step": 159290 }, { "epoch": 1.2889392345659034, "grad_norm": 0.2336147278547287, "learning_rate": 3.3809146067926542e-06, "loss": 0.0131, "step": 159300 }, { "epoch": 1.2890201472611054, "grad_norm": 0.47164401412010193, "learning_rate": 3.380246571155309e-06, "loss": 0.0223, "step": 159310 }, { "epoch": 1.289101059956307, "grad_norm": 0.22972702980041504, "learning_rate": 3.379578567820526e-06, "loss": 0.0188, "step": 159320 }, { "epoch": 1.289181972651509, "grad_norm": 0.8113715648651123, "learning_rate": 3.3789105968016268e-06, "loss": 0.0198, "step": 159330 }, { "epoch": 1.289262885346711, "grad_norm": 0.10827277600765228, "learning_rate": 3.378242658111935e-06, "loss": 0.0115, "step": 159340 }, { "epoch": 1.2893437980419127, "grad_norm": 0.23947252333164215, "learning_rate": 3.3775747517647683e-06, "loss": 0.0157, "step": 159350 }, { "epoch": 1.2894247107371146, "grad_norm": 0.20413801074028015, "learning_rate": 3.3769068777734475e-06, "loss": 0.0299, "step": 159360 }, { "epoch": 1.2895056234323166, "grad_norm": 0.08979521691799164, "learning_rate": 3.3762390361512946e-06, "loss": 0.0222, "step": 159370 }, { "epoch": 1.2895865361275183, "grad_norm": 0.3877938985824585, "learning_rate": 3.3755712269116247e-06, "loss": 0.0296, "step": 159380 }, { "epoch": 1.2896674488227202, "grad_norm": 0.24825750291347504, "learning_rate": 3.3749034500677572e-06, "loss": 0.0196, "step": 159390 }, { "epoch": 1.2897483615179222, "grad_norm": 0.4960608184337616, "learning_rate": 3.3742357056330115e-06, "loss": 0.0161, "step": 159400 }, { "epoch": 1.289829274213124, "grad_norm": 0.2387634813785553, "learning_rate": 3.3735679936207007e-06, "loss": 0.0189, "step": 159410 }, { "epoch": 1.289910186908326, "grad_norm": 0.5925619602203369, "learning_rate": 3.372900314044143e-06, "loss": 0.0268, "step": 159420 }, { "epoch": 1.2899910996035278, "grad_norm": 0.4113768935203552, "learning_rate": 3.372232666916653e-06, "loss": 0.0191, "step": 159430 }, { "epoch": 1.2900720122987297, "grad_norm": 0.49233007431030273, "learning_rate": 3.371565052251548e-06, "loss": 0.0228, "step": 159440 }, { "epoch": 1.2901529249939316, "grad_norm": 0.343624085187912, "learning_rate": 3.3708974700621385e-06, "loss": 0.0133, "step": 159450 }, { "epoch": 1.2902338376891334, "grad_norm": 0.5706363320350647, "learning_rate": 3.3702299203617407e-06, "loss": 0.0183, "step": 159460 }, { "epoch": 1.2903147503843353, "grad_norm": 0.5776071548461914, "learning_rate": 3.369562403163667e-06, "loss": 0.0189, "step": 159470 }, { "epoch": 1.2903956630795372, "grad_norm": 0.3458300530910492, "learning_rate": 3.3688949184812274e-06, "loss": 0.0199, "step": 159480 }, { "epoch": 1.290476575774739, "grad_norm": 1.1469279527664185, "learning_rate": 3.3682274663277366e-06, "loss": 0.0156, "step": 159490 }, { "epoch": 1.290557488469941, "grad_norm": 0.21765142679214478, "learning_rate": 3.367560046716505e-06, "loss": 0.0181, "step": 159500 }, { "epoch": 1.2906384011651428, "grad_norm": 0.6161353588104248, "learning_rate": 3.36689265966084e-06, "loss": 0.0277, "step": 159510 }, { "epoch": 1.2907193138603446, "grad_norm": 0.5680205225944519, "learning_rate": 3.366225305174053e-06, "loss": 0.0076, "step": 159520 }, { "epoch": 1.2908002265555465, "grad_norm": 0.3238552510738373, "learning_rate": 3.3655579832694553e-06, "loss": 0.0225, "step": 159530 }, { "epoch": 1.2908811392507484, "grad_norm": 0.1880057007074356, "learning_rate": 3.364890693960352e-06, "loss": 0.0271, "step": 159540 }, { "epoch": 1.2909620519459504, "grad_norm": 0.3762253224849701, "learning_rate": 3.364223437260051e-06, "loss": 0.0206, "step": 159550 }, { "epoch": 1.2910429646411523, "grad_norm": 0.3295031785964966, "learning_rate": 3.3635562131818632e-06, "loss": 0.0237, "step": 159560 }, { "epoch": 1.291123877336354, "grad_norm": 0.1442379653453827, "learning_rate": 3.3628890217390904e-06, "loss": 0.0173, "step": 159570 }, { "epoch": 1.291204790031556, "grad_norm": 0.38013720512390137, "learning_rate": 3.3622218629450385e-06, "loss": 0.0241, "step": 159580 }, { "epoch": 1.291285702726758, "grad_norm": 0.4792056977748871, "learning_rate": 3.361554736813017e-06, "loss": 0.0202, "step": 159590 }, { "epoch": 1.2913666154219596, "grad_norm": 0.5683022141456604, "learning_rate": 3.3608876433563244e-06, "loss": 0.0275, "step": 159600 }, { "epoch": 1.2914475281171616, "grad_norm": 0.4428476393222809, "learning_rate": 3.3602205825882694e-06, "loss": 0.0319, "step": 159610 }, { "epoch": 1.2915284408123635, "grad_norm": 0.25286969542503357, "learning_rate": 3.359553554522151e-06, "loss": 0.0165, "step": 159620 }, { "epoch": 1.2916093535075652, "grad_norm": 0.3158058822154999, "learning_rate": 3.3588865591712766e-06, "loss": 0.0199, "step": 159630 }, { "epoch": 1.2916902662027672, "grad_norm": 0.2870660424232483, "learning_rate": 3.358219596548944e-06, "loss": 0.029, "step": 159640 }, { "epoch": 1.291771178897969, "grad_norm": 0.19663025438785553, "learning_rate": 3.357552666668455e-06, "loss": 0.0167, "step": 159650 }, { "epoch": 1.291852091593171, "grad_norm": 0.14452381432056427, "learning_rate": 3.3568857695431124e-06, "loss": 0.0148, "step": 159660 }, { "epoch": 1.2919330042883728, "grad_norm": 0.5602425932884216, "learning_rate": 3.356218905186212e-06, "loss": 0.0307, "step": 159670 }, { "epoch": 1.2920139169835747, "grad_norm": 0.33928319811820984, "learning_rate": 3.355552073611057e-06, "loss": 0.0282, "step": 159680 }, { "epoch": 1.2920948296787766, "grad_norm": 0.892898440361023, "learning_rate": 3.3548852748309444e-06, "loss": 0.0219, "step": 159690 }, { "epoch": 1.2921757423739786, "grad_norm": 0.3947105407714844, "learning_rate": 3.354218508859171e-06, "loss": 0.0243, "step": 159700 }, { "epoch": 1.2922566550691803, "grad_norm": 0.18010300397872925, "learning_rate": 3.3535517757090357e-06, "loss": 0.0207, "step": 159710 }, { "epoch": 1.2923375677643822, "grad_norm": 0.24773187935352325, "learning_rate": 3.3528850753938346e-06, "loss": 0.0385, "step": 159720 }, { "epoch": 1.2924184804595842, "grad_norm": 0.2941301465034485, "learning_rate": 3.3522184079268627e-06, "loss": 0.0213, "step": 159730 }, { "epoch": 1.292499393154786, "grad_norm": 0.4686371982097626, "learning_rate": 3.351551773321416e-06, "loss": 0.0241, "step": 159740 }, { "epoch": 1.2925803058499878, "grad_norm": 0.5491641163825989, "learning_rate": 3.3508851715907907e-06, "loss": 0.015, "step": 159750 }, { "epoch": 1.2926612185451898, "grad_norm": 0.35656872391700745, "learning_rate": 3.350218602748278e-06, "loss": 0.0245, "step": 159760 }, { "epoch": 1.2927421312403915, "grad_norm": 0.41043156385421753, "learning_rate": 3.349552066807171e-06, "loss": 0.0183, "step": 159770 }, { "epoch": 1.2928230439355934, "grad_norm": 0.3540094494819641, "learning_rate": 3.348885563780767e-06, "loss": 0.0212, "step": 159780 }, { "epoch": 1.2929039566307954, "grad_norm": 0.04625554755330086, "learning_rate": 3.348219093682352e-06, "loss": 0.0213, "step": 159790 }, { "epoch": 1.2929848693259973, "grad_norm": 0.4962809085845947, "learning_rate": 3.3475526565252216e-06, "loss": 0.0146, "step": 159800 }, { "epoch": 1.2930657820211993, "grad_norm": 0.5842797160148621, "learning_rate": 3.3468862523226634e-06, "loss": 0.0257, "step": 159810 }, { "epoch": 1.293146694716401, "grad_norm": 0.21324951946735382, "learning_rate": 3.346219881087972e-06, "loss": 0.0161, "step": 159820 }, { "epoch": 1.293227607411603, "grad_norm": 0.36943745613098145, "learning_rate": 3.345553542834432e-06, "loss": 0.0173, "step": 159830 }, { "epoch": 1.2933085201068049, "grad_norm": 0.002798479748889804, "learning_rate": 3.344887237575333e-06, "loss": 0.0155, "step": 159840 }, { "epoch": 1.2933894328020066, "grad_norm": 0.3638725280761719, "learning_rate": 3.3442209653239654e-06, "loss": 0.0179, "step": 159850 }, { "epoch": 1.2934703454972085, "grad_norm": 0.3876785635948181, "learning_rate": 3.343554726093614e-06, "loss": 0.026, "step": 159860 }, { "epoch": 1.2935512581924105, "grad_norm": 0.45151326060295105, "learning_rate": 3.3428885198975675e-06, "loss": 0.0226, "step": 159870 }, { "epoch": 1.2936321708876122, "grad_norm": 0.5878350138664246, "learning_rate": 3.342222346749112e-06, "loss": 0.0228, "step": 159880 }, { "epoch": 1.2937130835828141, "grad_norm": 0.34123164415359497, "learning_rate": 3.3415562066615303e-06, "loss": 0.0179, "step": 159890 }, { "epoch": 1.293793996278016, "grad_norm": 0.40854761004447937, "learning_rate": 3.3408900996481096e-06, "loss": 0.0349, "step": 159900 }, { "epoch": 1.2938749089732178, "grad_norm": 0.34623488783836365, "learning_rate": 3.340224025722134e-06, "loss": 0.0298, "step": 159910 }, { "epoch": 1.2939558216684197, "grad_norm": 0.27145564556121826, "learning_rate": 3.3395579848968853e-06, "loss": 0.0236, "step": 159920 }, { "epoch": 1.2940367343636217, "grad_norm": 0.29865145683288574, "learning_rate": 3.3388919771856467e-06, "loss": 0.0258, "step": 159930 }, { "epoch": 1.2941176470588236, "grad_norm": 0.7594157457351685, "learning_rate": 3.3382260026017027e-06, "loss": 0.0191, "step": 159940 }, { "epoch": 1.2941985597540255, "grad_norm": 0.30315884947776794, "learning_rate": 3.337560061158332e-06, "loss": 0.0179, "step": 159950 }, { "epoch": 1.2942794724492273, "grad_norm": 0.21372942626476288, "learning_rate": 3.3368941528688154e-06, "loss": 0.014, "step": 159960 }, { "epoch": 1.2943603851444292, "grad_norm": 0.31502869725227356, "learning_rate": 3.336228277746435e-06, "loss": 0.0146, "step": 159970 }, { "epoch": 1.2944412978396311, "grad_norm": 0.538878321647644, "learning_rate": 3.33556243580447e-06, "loss": 0.0147, "step": 159980 }, { "epoch": 1.2945222105348329, "grad_norm": 0.08882088959217072, "learning_rate": 3.3348966270561966e-06, "loss": 0.0143, "step": 159990 }, { "epoch": 1.2946031232300348, "grad_norm": 0.286144495010376, "learning_rate": 3.334230851514895e-06, "loss": 0.0126, "step": 160000 }, { "epoch": 1.2946840359252367, "grad_norm": 0.24355390667915344, "learning_rate": 3.3335651091938436e-06, "loss": 0.0227, "step": 160010 }, { "epoch": 1.2947649486204384, "grad_norm": 0.27567771077156067, "learning_rate": 3.332899400106318e-06, "loss": 0.016, "step": 160020 }, { "epoch": 1.2948458613156404, "grad_norm": 0.381181538105011, "learning_rate": 3.3322337242655935e-06, "loss": 0.0237, "step": 160030 }, { "epoch": 1.2949267740108423, "grad_norm": 0.3973488509654999, "learning_rate": 3.3315680816849483e-06, "loss": 0.0122, "step": 160040 }, { "epoch": 1.295007686706044, "grad_norm": 0.3878679573535919, "learning_rate": 3.330902472377654e-06, "loss": 0.0217, "step": 160050 }, { "epoch": 1.295088599401246, "grad_norm": 0.8978599309921265, "learning_rate": 3.3302368963569853e-06, "loss": 0.0157, "step": 160060 }, { "epoch": 1.295169512096448, "grad_norm": 0.2558680772781372, "learning_rate": 3.3295713536362196e-06, "loss": 0.0153, "step": 160070 }, { "epoch": 1.2952504247916499, "grad_norm": 0.7432816624641418, "learning_rate": 3.3289058442286244e-06, "loss": 0.0201, "step": 160080 }, { "epoch": 1.2953313374868518, "grad_norm": 0.3055188059806824, "learning_rate": 3.3282403681474753e-06, "loss": 0.0214, "step": 160090 }, { "epoch": 1.2954122501820535, "grad_norm": 0.30332112312316895, "learning_rate": 3.3275749254060436e-06, "loss": 0.0138, "step": 160100 }, { "epoch": 1.2954931628772555, "grad_norm": 0.2976782023906708, "learning_rate": 3.326909516017597e-06, "loss": 0.0185, "step": 160110 }, { "epoch": 1.2955740755724574, "grad_norm": 0.431395560503006, "learning_rate": 3.32624413999541e-06, "loss": 0.0233, "step": 160120 }, { "epoch": 1.2956549882676591, "grad_norm": 0.38453730940818787, "learning_rate": 3.3255787973527507e-06, "loss": 0.0167, "step": 160130 }, { "epoch": 1.295735900962861, "grad_norm": 0.42449986934661865, "learning_rate": 3.3249134881028856e-06, "loss": 0.0225, "step": 160140 }, { "epoch": 1.295816813658063, "grad_norm": 0.6578623056411743, "learning_rate": 3.324248212259084e-06, "loss": 0.0295, "step": 160150 }, { "epoch": 1.2958977263532647, "grad_norm": 0.6070812940597534, "learning_rate": 3.3235829698346156e-06, "loss": 0.0391, "step": 160160 }, { "epoch": 1.2959786390484667, "grad_norm": 0.4460415840148926, "learning_rate": 3.322917760842747e-06, "loss": 0.0226, "step": 160170 }, { "epoch": 1.2960595517436686, "grad_norm": 0.3958260715007782, "learning_rate": 3.3222525852967413e-06, "loss": 0.0258, "step": 160180 }, { "epoch": 1.2961404644388703, "grad_norm": 0.4492506980895996, "learning_rate": 3.3215874432098668e-06, "loss": 0.0208, "step": 160190 }, { "epoch": 1.2962213771340723, "grad_norm": 0.12178999930620193, "learning_rate": 3.3209223345953877e-06, "loss": 0.0296, "step": 160200 }, { "epoch": 1.2963022898292742, "grad_norm": 0.37148672342300415, "learning_rate": 3.320257259466567e-06, "loss": 0.0191, "step": 160210 }, { "epoch": 1.2963832025244761, "grad_norm": 0.1420569270849228, "learning_rate": 3.319592217836669e-06, "loss": 0.016, "step": 160220 }, { "epoch": 1.296464115219678, "grad_norm": 0.4960959553718567, "learning_rate": 3.3189272097189586e-06, "loss": 0.0139, "step": 160230 }, { "epoch": 1.2965450279148798, "grad_norm": 0.3280629515647888, "learning_rate": 3.3182622351266945e-06, "loss": 0.017, "step": 160240 }, { "epoch": 1.2966259406100817, "grad_norm": 0.29801976680755615, "learning_rate": 3.3175972940731395e-06, "loss": 0.0124, "step": 160250 }, { "epoch": 1.2967068533052837, "grad_norm": 0.4392504394054413, "learning_rate": 3.3169323865715566e-06, "loss": 0.022, "step": 160260 }, { "epoch": 1.2967877660004854, "grad_norm": 0.14596027135849, "learning_rate": 3.3162675126352017e-06, "loss": 0.0195, "step": 160270 }, { "epoch": 1.2968686786956873, "grad_norm": 0.6454938054084778, "learning_rate": 3.315602672277338e-06, "loss": 0.023, "step": 160280 }, { "epoch": 1.2969495913908893, "grad_norm": 0.7508395314216614, "learning_rate": 3.314937865511224e-06, "loss": 0.0318, "step": 160290 }, { "epoch": 1.297030504086091, "grad_norm": 0.7009519338607788, "learning_rate": 3.3142730923501155e-06, "loss": 0.0322, "step": 160300 }, { "epoch": 1.297111416781293, "grad_norm": 0.15723970532417297, "learning_rate": 3.313608352807272e-06, "loss": 0.0214, "step": 160310 }, { "epoch": 1.2971923294764949, "grad_norm": 0.2441638559103012, "learning_rate": 3.312943646895951e-06, "loss": 0.0191, "step": 160320 }, { "epoch": 1.2972732421716968, "grad_norm": 0.45804068446159363, "learning_rate": 3.312278974629405e-06, "loss": 0.0168, "step": 160330 }, { "epoch": 1.2973541548668985, "grad_norm": 0.3446653187274933, "learning_rate": 3.3116143360208926e-06, "loss": 0.0254, "step": 160340 }, { "epoch": 1.2974350675621005, "grad_norm": 0.5707917213439941, "learning_rate": 3.310949731083668e-06, "loss": 0.021, "step": 160350 }, { "epoch": 1.2975159802573024, "grad_norm": 0.6513200998306274, "learning_rate": 3.310285159830987e-06, "loss": 0.0247, "step": 160360 }, { "epoch": 1.2975968929525044, "grad_norm": 0.2891332507133484, "learning_rate": 3.3096206222761e-06, "loss": 0.0211, "step": 160370 }, { "epoch": 1.297677805647706, "grad_norm": 0.5956494808197021, "learning_rate": 3.3089561184322607e-06, "loss": 0.0245, "step": 160380 }, { "epoch": 1.297758718342908, "grad_norm": 0.3149332106113434, "learning_rate": 3.3082916483127232e-06, "loss": 0.0152, "step": 160390 }, { "epoch": 1.29783963103811, "grad_norm": 0.23962129652500153, "learning_rate": 3.307627211930735e-06, "loss": 0.0217, "step": 160400 }, { "epoch": 1.2979205437333117, "grad_norm": 0.40993884205818176, "learning_rate": 3.3069628092995497e-06, "loss": 0.0378, "step": 160410 }, { "epoch": 1.2980014564285136, "grad_norm": 0.4168713390827179, "learning_rate": 3.3062984404324195e-06, "loss": 0.0183, "step": 160420 }, { "epoch": 1.2980823691237156, "grad_norm": 0.38123390078544617, "learning_rate": 3.3056341053425898e-06, "loss": 0.0236, "step": 160430 }, { "epoch": 1.2981632818189173, "grad_norm": 0.13274119794368744, "learning_rate": 3.30496980404331e-06, "loss": 0.013, "step": 160440 }, { "epoch": 1.2982441945141192, "grad_norm": 0.6547654867172241, "learning_rate": 3.3043055365478317e-06, "loss": 0.0234, "step": 160450 }, { "epoch": 1.2983251072093211, "grad_norm": 0.5944104790687561, "learning_rate": 3.303641302869398e-06, "loss": 0.0296, "step": 160460 }, { "epoch": 1.298406019904523, "grad_norm": 0.560395359992981, "learning_rate": 3.3029771030212563e-06, "loss": 0.0149, "step": 160470 }, { "epoch": 1.298486932599725, "grad_norm": 0.26622021198272705, "learning_rate": 3.3023129370166556e-06, "loss": 0.0218, "step": 160480 }, { "epoch": 1.2985678452949267, "grad_norm": 0.3122249245643616, "learning_rate": 3.3016488048688376e-06, "loss": 0.0265, "step": 160490 }, { "epoch": 1.2986487579901287, "grad_norm": 0.11232973635196686, "learning_rate": 3.30098470659105e-06, "loss": 0.0205, "step": 160500 }, { "epoch": 1.2987296706853306, "grad_norm": 0.27098941802978516, "learning_rate": 3.3003206421965367e-06, "loss": 0.0208, "step": 160510 }, { "epoch": 1.2988105833805323, "grad_norm": 0.2897869050502777, "learning_rate": 3.2996566116985373e-06, "loss": 0.023, "step": 160520 }, { "epoch": 1.2988914960757343, "grad_norm": 0.09113790839910507, "learning_rate": 3.298992615110299e-06, "loss": 0.0269, "step": 160530 }, { "epoch": 1.2989724087709362, "grad_norm": 0.40299737453460693, "learning_rate": 3.2983286524450608e-06, "loss": 0.0203, "step": 160540 }, { "epoch": 1.299053321466138, "grad_norm": 0.20394334197044373, "learning_rate": 3.2976647237160675e-06, "loss": 0.0122, "step": 160550 }, { "epoch": 1.2991342341613399, "grad_norm": 0.3155132234096527, "learning_rate": 3.297000828936555e-06, "loss": 0.0163, "step": 160560 }, { "epoch": 1.2992151468565418, "grad_norm": 0.17167671024799347, "learning_rate": 3.2963369681197666e-06, "loss": 0.0282, "step": 160570 }, { "epoch": 1.2992960595517435, "grad_norm": 0.3022560179233551, "learning_rate": 3.2956731412789423e-06, "loss": 0.0147, "step": 160580 }, { "epoch": 1.2993769722469455, "grad_norm": 0.9247854351997375, "learning_rate": 3.295009348427317e-06, "loss": 0.0252, "step": 160590 }, { "epoch": 1.2994578849421474, "grad_norm": 0.561534583568573, "learning_rate": 3.294345589578132e-06, "loss": 0.0198, "step": 160600 }, { "epoch": 1.2995387976373494, "grad_norm": 0.38177984952926636, "learning_rate": 3.2936818647446243e-06, "loss": 0.0242, "step": 160610 }, { "epoch": 1.2996197103325513, "grad_norm": 0.3228362500667572, "learning_rate": 3.2930181739400272e-06, "loss": 0.0177, "step": 160620 }, { "epoch": 1.299700623027753, "grad_norm": 0.3041318953037262, "learning_rate": 3.2923545171775793e-06, "loss": 0.0121, "step": 160630 }, { "epoch": 1.299781535722955, "grad_norm": 0.3891902565956116, "learning_rate": 3.2916908944705183e-06, "loss": 0.019, "step": 160640 }, { "epoch": 1.299862448418157, "grad_norm": 0.3693794906139374, "learning_rate": 3.291027305832074e-06, "loss": 0.0223, "step": 160650 }, { "epoch": 1.2999433611133586, "grad_norm": 0.6944921612739563, "learning_rate": 3.2903637512754816e-06, "loss": 0.0284, "step": 160660 }, { "epoch": 1.3000242738085606, "grad_norm": 0.48053842782974243, "learning_rate": 3.289700230813977e-06, "loss": 0.0298, "step": 160670 }, { "epoch": 1.3001051865037625, "grad_norm": 0.038188859820365906, "learning_rate": 3.2890367444607902e-06, "loss": 0.0191, "step": 160680 }, { "epoch": 1.3001860991989642, "grad_norm": 0.21419695019721985, "learning_rate": 3.288373292229152e-06, "loss": 0.0271, "step": 160690 }, { "epoch": 1.3002670118941662, "grad_norm": 0.5149732232093811, "learning_rate": 3.287709874132295e-06, "loss": 0.02, "step": 160700 }, { "epoch": 1.300347924589368, "grad_norm": 0.25750410556793213, "learning_rate": 3.287046490183452e-06, "loss": 0.0232, "step": 160710 }, { "epoch": 1.3004288372845698, "grad_norm": 0.14083006978034973, "learning_rate": 3.2863831403958492e-06, "loss": 0.0155, "step": 160720 }, { "epoch": 1.3005097499797718, "grad_norm": 0.3586888313293457, "learning_rate": 3.2857198247827165e-06, "loss": 0.0289, "step": 160730 }, { "epoch": 1.3005906626749737, "grad_norm": 0.6309822797775269, "learning_rate": 3.2850565433572845e-06, "loss": 0.0218, "step": 160740 }, { "epoch": 1.3006715753701756, "grad_norm": 0.46106797456741333, "learning_rate": 3.2843932961327784e-06, "loss": 0.0124, "step": 160750 }, { "epoch": 1.3007524880653776, "grad_norm": 0.2428821474313736, "learning_rate": 3.2837300831224255e-06, "loss": 0.0149, "step": 160760 }, { "epoch": 1.3008334007605793, "grad_norm": 0.16290302574634552, "learning_rate": 3.2830669043394546e-06, "loss": 0.0151, "step": 160770 }, { "epoch": 1.3009143134557812, "grad_norm": 0.2525752782821655, "learning_rate": 3.282403759797088e-06, "loss": 0.0187, "step": 160780 }, { "epoch": 1.3009952261509832, "grad_norm": 0.33293280005455017, "learning_rate": 3.2817406495085537e-06, "loss": 0.0194, "step": 160790 }, { "epoch": 1.301076138846185, "grad_norm": 0.5794699192047119, "learning_rate": 3.281077573487075e-06, "loss": 0.0224, "step": 160800 }, { "epoch": 1.3011570515413868, "grad_norm": 0.6911867260932922, "learning_rate": 3.2804145317458733e-06, "loss": 0.017, "step": 160810 }, { "epoch": 1.3012379642365888, "grad_norm": 0.0002985547762364149, "learning_rate": 3.2797515242981736e-06, "loss": 0.0177, "step": 160820 }, { "epoch": 1.3013188769317905, "grad_norm": 0.5861226320266724, "learning_rate": 3.2790885511572006e-06, "loss": 0.0212, "step": 160830 }, { "epoch": 1.3013997896269924, "grad_norm": 0.2061099410057068, "learning_rate": 3.2784256123361725e-06, "loss": 0.0153, "step": 160840 }, { "epoch": 1.3014807023221944, "grad_norm": 0.461908221244812, "learning_rate": 3.27776270784831e-06, "loss": 0.0323, "step": 160850 }, { "epoch": 1.3015616150173963, "grad_norm": 0.4876868724822998, "learning_rate": 3.2770998377068364e-06, "loss": 0.0268, "step": 160860 }, { "epoch": 1.301642527712598, "grad_norm": 0.6031381487846375, "learning_rate": 3.276437001924968e-06, "loss": 0.0222, "step": 160870 }, { "epoch": 1.3017234404078, "grad_norm": 0.38222387433052063, "learning_rate": 3.2757742005159244e-06, "loss": 0.0197, "step": 160880 }, { "epoch": 1.301804353103002, "grad_norm": 0.3711465001106262, "learning_rate": 3.2751114334929237e-06, "loss": 0.0196, "step": 160890 }, { "epoch": 1.3018852657982039, "grad_norm": 0.4840967357158661, "learning_rate": 3.274448700869187e-06, "loss": 0.0159, "step": 160900 }, { "epoch": 1.3019661784934056, "grad_norm": 0.5526808500289917, "learning_rate": 3.2737860026579266e-06, "loss": 0.0273, "step": 160910 }, { "epoch": 1.3020470911886075, "grad_norm": 0.3468133211135864, "learning_rate": 3.2731233388723595e-06, "loss": 0.0326, "step": 160920 }, { "epoch": 1.3021280038838094, "grad_norm": 0.11488057672977448, "learning_rate": 3.2724607095257035e-06, "loss": 0.0222, "step": 160930 }, { "epoch": 1.3022089165790112, "grad_norm": 0.27255865931510925, "learning_rate": 3.2717981146311707e-06, "loss": 0.0144, "step": 160940 }, { "epoch": 1.302289829274213, "grad_norm": 0.31945762038230896, "learning_rate": 3.2711355542019753e-06, "loss": 0.0134, "step": 160950 }, { "epoch": 1.302370741969415, "grad_norm": 0.34582120180130005, "learning_rate": 3.270473028251333e-06, "loss": 0.0165, "step": 160960 }, { "epoch": 1.3024516546646168, "grad_norm": 0.47924062609672546, "learning_rate": 3.2698105367924537e-06, "loss": 0.0171, "step": 160970 }, { "epoch": 1.3025325673598187, "grad_norm": 0.2062363177537918, "learning_rate": 3.269148079838551e-06, "loss": 0.0182, "step": 160980 }, { "epoch": 1.3026134800550206, "grad_norm": 0.3024488687515259, "learning_rate": 3.268485657402837e-06, "loss": 0.0184, "step": 160990 }, { "epoch": 1.3026943927502226, "grad_norm": 0.5040630102157593, "learning_rate": 3.2678232694985194e-06, "loss": 0.0248, "step": 161000 }, { "epoch": 1.3027753054454245, "grad_norm": 0.3580121099948883, "learning_rate": 3.267160916138811e-06, "loss": 0.0181, "step": 161010 }, { "epoch": 1.3028562181406262, "grad_norm": 0.20043998956680298, "learning_rate": 3.266498597336921e-06, "loss": 0.0255, "step": 161020 }, { "epoch": 1.3029371308358282, "grad_norm": 0.04514436423778534, "learning_rate": 3.2658363131060545e-06, "loss": 0.0168, "step": 161030 }, { "epoch": 1.3030180435310301, "grad_norm": 0.4719822406768799, "learning_rate": 3.265174063459422e-06, "loss": 0.0199, "step": 161040 }, { "epoch": 1.3030989562262318, "grad_norm": 0.4218842685222626, "learning_rate": 3.264511848410232e-06, "loss": 0.0182, "step": 161050 }, { "epoch": 1.3031798689214338, "grad_norm": 0.021852172911167145, "learning_rate": 3.263849667971689e-06, "loss": 0.0248, "step": 161060 }, { "epoch": 1.3032607816166357, "grad_norm": 0.2980298101902008, "learning_rate": 3.2631875221569973e-06, "loss": 0.0219, "step": 161070 }, { "epoch": 1.3033416943118374, "grad_norm": 0.17690938711166382, "learning_rate": 3.262525410979366e-06, "loss": 0.0196, "step": 161080 }, { "epoch": 1.3034226070070394, "grad_norm": 0.14383931457996368, "learning_rate": 3.2618633344519978e-06, "loss": 0.0255, "step": 161090 }, { "epoch": 1.3035035197022413, "grad_norm": 0.32921871542930603, "learning_rate": 3.2612012925880944e-06, "loss": 0.0161, "step": 161100 }, { "epoch": 1.303584432397443, "grad_norm": 0.36333033442497253, "learning_rate": 3.26053928540086e-06, "loss": 0.0206, "step": 161110 }, { "epoch": 1.303665345092645, "grad_norm": 0.41522619128227234, "learning_rate": 3.2598773129035e-06, "loss": 0.0195, "step": 161120 }, { "epoch": 1.303746257787847, "grad_norm": 0.5080431699752808, "learning_rate": 3.2592153751092114e-06, "loss": 0.0241, "step": 161130 }, { "epoch": 1.3038271704830489, "grad_norm": 0.19632713496685028, "learning_rate": 3.2585534720311973e-06, "loss": 0.019, "step": 161140 }, { "epoch": 1.3039080831782508, "grad_norm": 0.4580707848072052, "learning_rate": 3.257891603682659e-06, "loss": 0.0242, "step": 161150 }, { "epoch": 1.3039889958734525, "grad_norm": 0.8992816209793091, "learning_rate": 3.2572297700767947e-06, "loss": 0.038, "step": 161160 }, { "epoch": 1.3040699085686545, "grad_norm": 0.19444169104099274, "learning_rate": 3.256567971226802e-06, "loss": 0.0131, "step": 161170 }, { "epoch": 1.3041508212638564, "grad_norm": 0.4349680244922638, "learning_rate": 3.2559062071458826e-06, "loss": 0.0233, "step": 161180 }, { "epoch": 1.3042317339590581, "grad_norm": 0.2122819721698761, "learning_rate": 3.2552444778472304e-06, "loss": 0.0204, "step": 161190 }, { "epoch": 1.30431264665426, "grad_norm": 0.3431200087070465, "learning_rate": 3.2545827833440437e-06, "loss": 0.0152, "step": 161200 }, { "epoch": 1.304393559349462, "grad_norm": 0.41090261936187744, "learning_rate": 3.2539211236495205e-06, "loss": 0.0196, "step": 161210 }, { "epoch": 1.3044744720446637, "grad_norm": 0.44301268458366394, "learning_rate": 3.2532594987768517e-06, "loss": 0.017, "step": 161220 }, { "epoch": 1.3045553847398657, "grad_norm": 0.3572489321231842, "learning_rate": 3.2525979087392366e-06, "loss": 0.0378, "step": 161230 }, { "epoch": 1.3046362974350676, "grad_norm": 0.16911175847053528, "learning_rate": 3.2519363535498656e-06, "loss": 0.0164, "step": 161240 }, { "epoch": 1.3047172101302693, "grad_norm": 0.09136240184307098, "learning_rate": 3.2512748332219355e-06, "loss": 0.0169, "step": 161250 }, { "epoch": 1.3047981228254713, "grad_norm": 0.5254941582679749, "learning_rate": 3.250613347768636e-06, "loss": 0.0356, "step": 161260 }, { "epoch": 1.3048790355206732, "grad_norm": 0.47555533051490784, "learning_rate": 3.2499518972031605e-06, "loss": 0.0199, "step": 161270 }, { "epoch": 1.3049599482158751, "grad_norm": 0.14619164168834686, "learning_rate": 3.249290481538701e-06, "loss": 0.0217, "step": 161280 }, { "epoch": 1.305040860911077, "grad_norm": 0.25500330328941345, "learning_rate": 3.248629100788445e-06, "loss": 0.0175, "step": 161290 }, { "epoch": 1.3051217736062788, "grad_norm": 0.3724226653575897, "learning_rate": 3.247967754965585e-06, "loss": 0.0217, "step": 161300 }, { "epoch": 1.3052026863014807, "grad_norm": 0.20592613518238068, "learning_rate": 3.2473064440833102e-06, "loss": 0.0165, "step": 161310 }, { "epoch": 1.3052835989966827, "grad_norm": 0.7199425101280212, "learning_rate": 3.246645168154807e-06, "loss": 0.028, "step": 161320 }, { "epoch": 1.3053645116918844, "grad_norm": 0.5303881168365479, "learning_rate": 3.2459839271932647e-06, "loss": 0.0271, "step": 161330 }, { "epoch": 1.3054454243870863, "grad_norm": 0.38487058877944946, "learning_rate": 3.2453227212118716e-06, "loss": 0.0124, "step": 161340 }, { "epoch": 1.3055263370822883, "grad_norm": 0.5053668022155762, "learning_rate": 3.2446615502238116e-06, "loss": 0.0194, "step": 161350 }, { "epoch": 1.30560724977749, "grad_norm": 0.4214823246002197, "learning_rate": 3.24400041424227e-06, "loss": 0.0256, "step": 161360 }, { "epoch": 1.305688162472692, "grad_norm": 0.3116826117038727, "learning_rate": 3.243339313280436e-06, "loss": 0.0256, "step": 161370 }, { "epoch": 1.3057690751678939, "grad_norm": 0.4056820273399353, "learning_rate": 3.242678247351487e-06, "loss": 0.0203, "step": 161380 }, { "epoch": 1.3058499878630956, "grad_norm": 0.6637219190597534, "learning_rate": 3.242017216468613e-06, "loss": 0.0174, "step": 161390 }, { "epoch": 1.3059309005582975, "grad_norm": 0.43637925386428833, "learning_rate": 3.2413562206449957e-06, "loss": 0.0338, "step": 161400 }, { "epoch": 1.3060118132534995, "grad_norm": 0.42286810278892517, "learning_rate": 3.2406952598938125e-06, "loss": 0.0232, "step": 161410 }, { "epoch": 1.3060927259487014, "grad_norm": 0.4708978831768036, "learning_rate": 3.24003433422825e-06, "loss": 0.0186, "step": 161420 }, { "epoch": 1.3061736386439033, "grad_norm": 0.33537301421165466, "learning_rate": 3.2393734436614863e-06, "loss": 0.0291, "step": 161430 }, { "epoch": 1.306254551339105, "grad_norm": 0.33764535188674927, "learning_rate": 3.2387125882067048e-06, "loss": 0.0192, "step": 161440 }, { "epoch": 1.306335464034307, "grad_norm": 0.23859809339046478, "learning_rate": 3.2380517678770793e-06, "loss": 0.0187, "step": 161450 }, { "epoch": 1.306416376729509, "grad_norm": 0.39059293270111084, "learning_rate": 3.237390982685794e-06, "loss": 0.0144, "step": 161460 }, { "epoch": 1.3064972894247107, "grad_norm": 1.1301875114440918, "learning_rate": 3.2367302326460247e-06, "loss": 0.0265, "step": 161470 }, { "epoch": 1.3065782021199126, "grad_norm": 0.07681458443403244, "learning_rate": 3.2360695177709466e-06, "loss": 0.0203, "step": 161480 }, { "epoch": 1.3066591148151145, "grad_norm": 0.5637093782424927, "learning_rate": 3.2354088380737398e-06, "loss": 0.0237, "step": 161490 }, { "epoch": 1.3067400275103163, "grad_norm": 0.324260950088501, "learning_rate": 3.234748193567579e-06, "loss": 0.0165, "step": 161500 }, { "epoch": 1.3068209402055182, "grad_norm": 0.38806089758872986, "learning_rate": 3.2340875842656367e-06, "loss": 0.0188, "step": 161510 }, { "epoch": 1.3069018529007201, "grad_norm": 0.22948358952999115, "learning_rate": 3.2334270101810895e-06, "loss": 0.0105, "step": 161520 }, { "epoch": 1.306982765595922, "grad_norm": 0.4248676896095276, "learning_rate": 3.2327664713271133e-06, "loss": 0.027, "step": 161530 }, { "epoch": 1.3070636782911238, "grad_norm": 0.39719483256340027, "learning_rate": 3.232105967716879e-06, "loss": 0.0101, "step": 161540 }, { "epoch": 1.3071445909863257, "grad_norm": 0.259635865688324, "learning_rate": 3.2314454993635564e-06, "loss": 0.0092, "step": 161550 }, { "epoch": 1.3072255036815277, "grad_norm": 0.3998209238052368, "learning_rate": 3.2307850662803225e-06, "loss": 0.0334, "step": 161560 }, { "epoch": 1.3073064163767296, "grad_norm": 0.39775803685188293, "learning_rate": 3.2301246684803442e-06, "loss": 0.0116, "step": 161570 }, { "epoch": 1.3073873290719313, "grad_norm": 0.2551398277282715, "learning_rate": 3.2294643059767922e-06, "loss": 0.0233, "step": 161580 }, { "epoch": 1.3074682417671333, "grad_norm": 0.49363797903060913, "learning_rate": 3.228803978782839e-06, "loss": 0.0176, "step": 161590 }, { "epoch": 1.3075491544623352, "grad_norm": 0.2812524139881134, "learning_rate": 3.2281436869116488e-06, "loss": 0.0292, "step": 161600 }, { "epoch": 1.307630067157537, "grad_norm": 0.006572913844138384, "learning_rate": 3.2274834303763937e-06, "loss": 0.0149, "step": 161610 }, { "epoch": 1.3077109798527389, "grad_norm": 0.5421080589294434, "learning_rate": 3.2268232091902385e-06, "loss": 0.0227, "step": 161620 }, { "epoch": 1.3077918925479408, "grad_norm": 0.6537254452705383, "learning_rate": 3.226163023366353e-06, "loss": 0.0182, "step": 161630 }, { "epoch": 1.3078728052431425, "grad_norm": 1.26583731174469, "learning_rate": 3.2255028729179005e-06, "loss": 0.0376, "step": 161640 }, { "epoch": 1.3079537179383445, "grad_norm": 0.21648287773132324, "learning_rate": 3.2248427578580457e-06, "loss": 0.0397, "step": 161650 }, { "epoch": 1.3080346306335464, "grad_norm": 0.228980153799057, "learning_rate": 3.224182678199957e-06, "loss": 0.0117, "step": 161660 }, { "epoch": 1.3081155433287484, "grad_norm": 0.15676303207874298, "learning_rate": 3.223522633956793e-06, "loss": 0.0167, "step": 161670 }, { "epoch": 1.3081964560239503, "grad_norm": 0.611358106136322, "learning_rate": 3.2228626251417217e-06, "loss": 0.0292, "step": 161680 }, { "epoch": 1.308277368719152, "grad_norm": 0.3339431583881378, "learning_rate": 3.2222026517679038e-06, "loss": 0.0136, "step": 161690 }, { "epoch": 1.308358281414354, "grad_norm": 0.5561009645462036, "learning_rate": 3.2215427138485e-06, "loss": 0.0305, "step": 161700 }, { "epoch": 1.308439194109556, "grad_norm": 0.36001262068748474, "learning_rate": 3.2208828113966727e-06, "loss": 0.0205, "step": 161710 }, { "epoch": 1.3085201068047576, "grad_norm": 0.2123630940914154, "learning_rate": 3.2202229444255827e-06, "loss": 0.0207, "step": 161720 }, { "epoch": 1.3086010194999596, "grad_norm": 0.621915340423584, "learning_rate": 3.2195631129483873e-06, "loss": 0.0412, "step": 161730 }, { "epoch": 1.3086819321951615, "grad_norm": 0.3179779052734375, "learning_rate": 3.218903316978246e-06, "loss": 0.0165, "step": 161740 }, { "epoch": 1.3087628448903632, "grad_norm": 0.31415116786956787, "learning_rate": 3.218243556528321e-06, "loss": 0.0208, "step": 161750 }, { "epoch": 1.3088437575855651, "grad_norm": 0.5928645730018616, "learning_rate": 3.2175838316117647e-06, "loss": 0.023, "step": 161760 }, { "epoch": 1.308924670280767, "grad_norm": 0.20003141462802887, "learning_rate": 3.216924142241736e-06, "loss": 0.0106, "step": 161770 }, { "epoch": 1.3090055829759688, "grad_norm": 0.42535579204559326, "learning_rate": 3.216264488431392e-06, "loss": 0.0153, "step": 161780 }, { "epoch": 1.3090864956711707, "grad_norm": 0.18347397446632385, "learning_rate": 3.215604870193887e-06, "loss": 0.0137, "step": 161790 }, { "epoch": 1.3091674083663727, "grad_norm": 0.5438123941421509, "learning_rate": 3.2149452875423755e-06, "loss": 0.0257, "step": 161800 }, { "epoch": 1.3092483210615746, "grad_norm": 0.3853567838668823, "learning_rate": 3.214285740490011e-06, "loss": 0.0171, "step": 161810 }, { "epoch": 1.3093292337567766, "grad_norm": 1.028083324432373, "learning_rate": 3.2136262290499497e-06, "loss": 0.023, "step": 161820 }, { "epoch": 1.3094101464519783, "grad_norm": 0.29392698407173157, "learning_rate": 3.212966753235341e-06, "loss": 0.0188, "step": 161830 }, { "epoch": 1.3094910591471802, "grad_norm": 0.1988244205713272, "learning_rate": 3.2123073130593362e-06, "loss": 0.0172, "step": 161840 }, { "epoch": 1.3095719718423822, "grad_norm": 0.30074751377105713, "learning_rate": 3.211647908535091e-06, "loss": 0.0189, "step": 161850 }, { "epoch": 1.3096528845375839, "grad_norm": 0.14255674183368683, "learning_rate": 3.2109885396757505e-06, "loss": 0.0144, "step": 161860 }, { "epoch": 1.3097337972327858, "grad_norm": 0.3541543483734131, "learning_rate": 3.210329206494468e-06, "loss": 0.0137, "step": 161870 }, { "epoch": 1.3098147099279878, "grad_norm": 1.1067051887512207, "learning_rate": 3.2096699090043927e-06, "loss": 0.0184, "step": 161880 }, { "epoch": 1.3098956226231895, "grad_norm": 0.1728387475013733, "learning_rate": 3.209010647218669e-06, "loss": 0.0178, "step": 161890 }, { "epoch": 1.3099765353183914, "grad_norm": 0.3173454701900482, "learning_rate": 3.2083514211504474e-06, "loss": 0.0107, "step": 161900 }, { "epoch": 1.3100574480135934, "grad_norm": 0.27515047788619995, "learning_rate": 3.207692230812876e-06, "loss": 0.019, "step": 161910 }, { "epoch": 1.310138360708795, "grad_norm": 0.9345093965530396, "learning_rate": 3.207033076219097e-06, "loss": 0.0153, "step": 161920 }, { "epoch": 1.310219273403997, "grad_norm": 0.10207826644182205, "learning_rate": 3.206373957382258e-06, "loss": 0.0174, "step": 161930 }, { "epoch": 1.310300186099199, "grad_norm": 0.7044234871864319, "learning_rate": 3.205714874315507e-06, "loss": 0.0176, "step": 161940 }, { "epoch": 1.310381098794401, "grad_norm": 0.43730488419532776, "learning_rate": 3.205055827031983e-06, "loss": 0.0195, "step": 161950 }, { "epoch": 1.3104620114896028, "grad_norm": 0.5064066052436829, "learning_rate": 3.204396815544831e-06, "loss": 0.0218, "step": 161960 }, { "epoch": 1.3105429241848046, "grad_norm": 0.3979422450065613, "learning_rate": 3.203737839867194e-06, "loss": 0.0113, "step": 161970 }, { "epoch": 1.3106238368800065, "grad_norm": 0.39578306674957275, "learning_rate": 3.2030789000122155e-06, "loss": 0.0212, "step": 161980 }, { "epoch": 1.3107047495752084, "grad_norm": 0.3887602984905243, "learning_rate": 3.2024199959930325e-06, "loss": 0.02, "step": 161990 }, { "epoch": 1.3107856622704102, "grad_norm": 0.16992580890655518, "learning_rate": 3.201761127822787e-06, "loss": 0.0237, "step": 162000 }, { "epoch": 1.310866574965612, "grad_norm": 0.04114875942468643, "learning_rate": 3.2011022955146226e-06, "loss": 0.0164, "step": 162010 }, { "epoch": 1.310947487660814, "grad_norm": 0.7972283363342285, "learning_rate": 3.200443499081674e-06, "loss": 0.031, "step": 162020 }, { "epoch": 1.3110284003560158, "grad_norm": 0.3344062268733978, "learning_rate": 3.1997847385370796e-06, "loss": 0.0326, "step": 162030 }, { "epoch": 1.3111093130512177, "grad_norm": 0.30807995796203613, "learning_rate": 3.1991260138939804e-06, "loss": 0.0135, "step": 162040 }, { "epoch": 1.3111902257464196, "grad_norm": 0.5160409212112427, "learning_rate": 3.1984673251655097e-06, "loss": 0.032, "step": 162050 }, { "epoch": 1.3112711384416214, "grad_norm": 0.44344863295555115, "learning_rate": 3.1978086723648046e-06, "loss": 0.0171, "step": 162060 }, { "epoch": 1.3113520511368233, "grad_norm": 0.30166682600975037, "learning_rate": 3.197150055505003e-06, "loss": 0.0234, "step": 162070 }, { "epoch": 1.3114329638320252, "grad_norm": 0.3818473815917969, "learning_rate": 3.1964914745992347e-06, "loss": 0.022, "step": 162080 }, { "epoch": 1.3115138765272272, "grad_norm": 0.31268882751464844, "learning_rate": 3.195832929660638e-06, "loss": 0.0253, "step": 162090 }, { "epoch": 1.3115947892224291, "grad_norm": 0.5719522833824158, "learning_rate": 3.195174420702346e-06, "loss": 0.016, "step": 162100 }, { "epoch": 1.3116757019176308, "grad_norm": 0.4501696228981018, "learning_rate": 3.1945159477374875e-06, "loss": 0.0273, "step": 162110 }, { "epoch": 1.3117566146128328, "grad_norm": 0.10242985188961029, "learning_rate": 3.193857510779198e-06, "loss": 0.025, "step": 162120 }, { "epoch": 1.3118375273080347, "grad_norm": 0.25597521662712097, "learning_rate": 3.193199109840609e-06, "loss": 0.0162, "step": 162130 }, { "epoch": 1.3119184400032364, "grad_norm": 0.31150326132774353, "learning_rate": 3.1925407449348465e-06, "loss": 0.022, "step": 162140 }, { "epoch": 1.3119993526984384, "grad_norm": 0.2485010176897049, "learning_rate": 3.1918824160750438e-06, "loss": 0.0159, "step": 162150 }, { "epoch": 1.3120802653936403, "grad_norm": 0.44054439663887024, "learning_rate": 3.1912241232743302e-06, "loss": 0.0208, "step": 162160 }, { "epoch": 1.312161178088842, "grad_norm": 0.1263258159160614, "learning_rate": 3.190565866545834e-06, "loss": 0.0227, "step": 162170 }, { "epoch": 1.312242090784044, "grad_norm": 0.24530957639217377, "learning_rate": 3.1899076459026803e-06, "loss": 0.0166, "step": 162180 }, { "epoch": 1.312323003479246, "grad_norm": 0.4729427993297577, "learning_rate": 3.1892494613579973e-06, "loss": 0.0181, "step": 162190 }, { "epoch": 1.3124039161744478, "grad_norm": 0.3595905900001526, "learning_rate": 3.1885913129249122e-06, "loss": 0.0166, "step": 162200 }, { "epoch": 1.3124848288696496, "grad_norm": 0.32363995909690857, "learning_rate": 3.1879332006165474e-06, "loss": 0.0287, "step": 162210 }, { "epoch": 1.3125657415648515, "grad_norm": 0.4297197759151459, "learning_rate": 3.1872751244460297e-06, "loss": 0.0189, "step": 162220 }, { "epoch": 1.3126466542600534, "grad_norm": 0.26849520206451416, "learning_rate": 3.1866170844264847e-06, "loss": 0.0117, "step": 162230 }, { "epoch": 1.3127275669552554, "grad_norm": 0.21014556288719177, "learning_rate": 3.1859590805710326e-06, "loss": 0.0216, "step": 162240 }, { "epoch": 1.312808479650457, "grad_norm": 0.3707835078239441, "learning_rate": 3.185301112892797e-06, "loss": 0.0169, "step": 162250 }, { "epoch": 1.312889392345659, "grad_norm": 0.24587410688400269, "learning_rate": 3.1846431814049006e-06, "loss": 0.0138, "step": 162260 }, { "epoch": 1.312970305040861, "grad_norm": 0.7702353596687317, "learning_rate": 3.183985286120463e-06, "loss": 0.015, "step": 162270 }, { "epoch": 1.3130512177360627, "grad_norm": 0.22801122069358826, "learning_rate": 3.183327427052604e-06, "loss": 0.0218, "step": 162280 }, { "epoch": 1.3131321304312646, "grad_norm": 0.7578248977661133, "learning_rate": 3.1826696042144458e-06, "loss": 0.022, "step": 162290 }, { "epoch": 1.3132130431264666, "grad_norm": 0.5837726593017578, "learning_rate": 3.1820118176191046e-06, "loss": 0.0356, "step": 162300 }, { "epoch": 1.3132939558216683, "grad_norm": 0.21746166050434113, "learning_rate": 3.1813540672797005e-06, "loss": 0.0286, "step": 162310 }, { "epoch": 1.3133748685168702, "grad_norm": 0.5364417433738708, "learning_rate": 3.1806963532093507e-06, "loss": 0.0229, "step": 162320 }, { "epoch": 1.3134557812120722, "grad_norm": 0.21332277357578278, "learning_rate": 3.18003867542117e-06, "loss": 0.0205, "step": 162330 }, { "epoch": 1.3135366939072741, "grad_norm": 0.1456099897623062, "learning_rate": 3.1793810339282754e-06, "loss": 0.0152, "step": 162340 }, { "epoch": 1.313617606602476, "grad_norm": 0.19654513895511627, "learning_rate": 3.178723428743784e-06, "loss": 0.0226, "step": 162350 }, { "epoch": 1.3136985192976778, "grad_norm": 0.003572263987734914, "learning_rate": 3.17806585988081e-06, "loss": 0.0232, "step": 162360 }, { "epoch": 1.3137794319928797, "grad_norm": 0.530408501625061, "learning_rate": 3.1774083273524636e-06, "loss": 0.0175, "step": 162370 }, { "epoch": 1.3138603446880817, "grad_norm": 0.20602279901504517, "learning_rate": 3.176750831171862e-06, "loss": 0.0118, "step": 162380 }, { "epoch": 1.3139412573832834, "grad_norm": 0.37078866362571716, "learning_rate": 3.1760933713521164e-06, "loss": 0.0251, "step": 162390 }, { "epoch": 1.3140221700784853, "grad_norm": 0.3425992727279663, "learning_rate": 3.175435947906337e-06, "loss": 0.0242, "step": 162400 }, { "epoch": 1.3141030827736873, "grad_norm": 0.3677610158920288, "learning_rate": 3.1747785608476346e-06, "loss": 0.0305, "step": 162410 }, { "epoch": 1.314183995468889, "grad_norm": 0.5589055418968201, "learning_rate": 3.1741212101891233e-06, "loss": 0.0218, "step": 162420 }, { "epoch": 1.314264908164091, "grad_norm": 0.2969258427619934, "learning_rate": 3.173463895943909e-06, "loss": 0.0273, "step": 162430 }, { "epoch": 1.3143458208592929, "grad_norm": 0.3215252757072449, "learning_rate": 3.1728066181251e-06, "loss": 0.0236, "step": 162440 }, { "epoch": 1.3144267335544946, "grad_norm": 0.396950900554657, "learning_rate": 3.172149376745808e-06, "loss": 0.0172, "step": 162450 }, { "epoch": 1.3145076462496965, "grad_norm": 0.4902876615524292, "learning_rate": 3.171492171819136e-06, "loss": 0.0235, "step": 162460 }, { "epoch": 1.3145885589448985, "grad_norm": 0.3073599636554718, "learning_rate": 3.1708350033581927e-06, "loss": 0.027, "step": 162470 }, { "epoch": 1.3146694716401004, "grad_norm": 0.37351158261299133, "learning_rate": 3.1701778713760844e-06, "loss": 0.0242, "step": 162480 }, { "epoch": 1.3147503843353023, "grad_norm": 0.23427964746952057, "learning_rate": 3.1695207758859147e-06, "loss": 0.022, "step": 162490 }, { "epoch": 1.314831297030504, "grad_norm": 0.77630615234375, "learning_rate": 3.16886371690079e-06, "loss": 0.0212, "step": 162500 }, { "epoch": 1.314912209725706, "grad_norm": 0.3660842478275299, "learning_rate": 3.168206694433811e-06, "loss": 0.0205, "step": 162510 }, { "epoch": 1.314993122420908, "grad_norm": 0.4503422677516937, "learning_rate": 3.167549708498084e-06, "loss": 0.0296, "step": 162520 }, { "epoch": 1.3150740351161097, "grad_norm": 0.22803063690662384, "learning_rate": 3.166892759106709e-06, "loss": 0.0254, "step": 162530 }, { "epoch": 1.3151549478113116, "grad_norm": 0.38607358932495117, "learning_rate": 3.166235846272787e-06, "loss": 0.0262, "step": 162540 }, { "epoch": 1.3152358605065135, "grad_norm": 0.3733702301979065, "learning_rate": 3.165578970009422e-06, "loss": 0.0162, "step": 162550 }, { "epoch": 1.3153167732017153, "grad_norm": 0.48903799057006836, "learning_rate": 3.16492213032971e-06, "loss": 0.0197, "step": 162560 }, { "epoch": 1.3153976858969172, "grad_norm": 0.3352905809879303, "learning_rate": 3.1642653272467526e-06, "loss": 0.0179, "step": 162570 }, { "epoch": 1.3154785985921191, "grad_norm": 0.8784321546554565, "learning_rate": 3.1636085607736496e-06, "loss": 0.0351, "step": 162580 }, { "epoch": 1.3155595112873208, "grad_norm": 0.011340126395225525, "learning_rate": 3.162951830923494e-06, "loss": 0.0117, "step": 162590 }, { "epoch": 1.3156404239825228, "grad_norm": 0.3377457559108734, "learning_rate": 3.1622951377093882e-06, "loss": 0.0237, "step": 162600 }, { "epoch": 1.3157213366777247, "grad_norm": 0.6855118274688721, "learning_rate": 3.161638481144427e-06, "loss": 0.0233, "step": 162610 }, { "epoch": 1.3158022493729267, "grad_norm": 0.5205461978912354, "learning_rate": 3.160981861241702e-06, "loss": 0.0133, "step": 162620 }, { "epoch": 1.3158831620681286, "grad_norm": 0.08827827870845795, "learning_rate": 3.1603252780143124e-06, "loss": 0.0234, "step": 162630 }, { "epoch": 1.3159640747633303, "grad_norm": 0.4322344958782196, "learning_rate": 3.159668731475354e-06, "loss": 0.0295, "step": 162640 }, { "epoch": 1.3160449874585323, "grad_norm": 0.26376745104789734, "learning_rate": 3.159012221637916e-06, "loss": 0.0162, "step": 162650 }, { "epoch": 1.3161259001537342, "grad_norm": 0.5299333930015564, "learning_rate": 3.158355748515092e-06, "loss": 0.0212, "step": 162660 }, { "epoch": 1.316206812848936, "grad_norm": 0.5517048239707947, "learning_rate": 3.157699312119976e-06, "loss": 0.0247, "step": 162670 }, { "epoch": 1.3162877255441379, "grad_norm": 0.4868042469024658, "learning_rate": 3.1570429124656575e-06, "loss": 0.0236, "step": 162680 }, { "epoch": 1.3163686382393398, "grad_norm": 0.3792479336261749, "learning_rate": 3.156386549565227e-06, "loss": 0.0116, "step": 162690 }, { "epoch": 1.3164495509345415, "grad_norm": 0.027652133256196976, "learning_rate": 3.155730223431773e-06, "loss": 0.0309, "step": 162700 }, { "epoch": 1.3165304636297435, "grad_norm": 0.20940303802490234, "learning_rate": 3.1550739340783897e-06, "loss": 0.0208, "step": 162710 }, { "epoch": 1.3166113763249454, "grad_norm": 0.46810734272003174, "learning_rate": 3.154417681518161e-06, "loss": 0.0228, "step": 162720 }, { "epoch": 1.3166922890201473, "grad_norm": 0.29601433873176575, "learning_rate": 3.153761465764174e-06, "loss": 0.0121, "step": 162730 }, { "epoch": 1.316773201715349, "grad_norm": 0.4269343912601471, "learning_rate": 3.153105286829519e-06, "loss": 0.0199, "step": 162740 }, { "epoch": 1.316854114410551, "grad_norm": 0.23540475964546204, "learning_rate": 3.15244914472728e-06, "loss": 0.0283, "step": 162750 }, { "epoch": 1.316935027105753, "grad_norm": 0.45490866899490356, "learning_rate": 3.15179303947054e-06, "loss": 0.018, "step": 162760 }, { "epoch": 1.3170159398009549, "grad_norm": 0.47424253821372986, "learning_rate": 3.151136971072389e-06, "loss": 0.0298, "step": 162770 }, { "epoch": 1.3170968524961566, "grad_norm": 0.3852784335613251, "learning_rate": 3.1504809395459057e-06, "loss": 0.0159, "step": 162780 }, { "epoch": 1.3171777651913585, "grad_norm": 0.6102083921432495, "learning_rate": 3.149824944904176e-06, "loss": 0.0159, "step": 162790 }, { "epoch": 1.3172586778865605, "grad_norm": 0.33813291788101196, "learning_rate": 3.1491689871602837e-06, "loss": 0.0196, "step": 162800 }, { "epoch": 1.3173395905817622, "grad_norm": 0.19070835411548615, "learning_rate": 3.148513066327307e-06, "loss": 0.021, "step": 162810 }, { "epoch": 1.3174205032769641, "grad_norm": 0.24463801085948944, "learning_rate": 3.147857182418329e-06, "loss": 0.0154, "step": 162820 }, { "epoch": 1.317501415972166, "grad_norm": 0.2667180299758911, "learning_rate": 3.147201335446429e-06, "loss": 0.0165, "step": 162830 }, { "epoch": 1.3175823286673678, "grad_norm": 0.2586504817008972, "learning_rate": 3.146545525424688e-06, "loss": 0.0182, "step": 162840 }, { "epoch": 1.3176632413625697, "grad_norm": 0.3170592486858368, "learning_rate": 3.145889752366181e-06, "loss": 0.0219, "step": 162850 }, { "epoch": 1.3177441540577717, "grad_norm": 0.35692331194877625, "learning_rate": 3.1452340162839925e-06, "loss": 0.0165, "step": 162860 }, { "epoch": 1.3178250667529736, "grad_norm": 0.5309211611747742, "learning_rate": 3.1445783171911935e-06, "loss": 0.0251, "step": 162870 }, { "epoch": 1.3179059794481756, "grad_norm": 0.49873238801956177, "learning_rate": 3.1439226551008628e-06, "loss": 0.0184, "step": 162880 }, { "epoch": 1.3179868921433773, "grad_norm": 0.17723266780376434, "learning_rate": 3.1432670300260764e-06, "loss": 0.0169, "step": 162890 }, { "epoch": 1.3180678048385792, "grad_norm": 0.011505364440381527, "learning_rate": 3.1426114419799113e-06, "loss": 0.0166, "step": 162900 }, { "epoch": 1.3181487175337812, "grad_norm": 0.3349466919898987, "learning_rate": 3.141955890975439e-06, "loss": 0.0345, "step": 162910 }, { "epoch": 1.3182296302289829, "grad_norm": 0.6283305883407593, "learning_rate": 3.1413003770257327e-06, "loss": 0.0308, "step": 162920 }, { "epoch": 1.3183105429241848, "grad_norm": 0.38740110397338867, "learning_rate": 3.140644900143869e-06, "loss": 0.0149, "step": 162930 }, { "epoch": 1.3183914556193868, "grad_norm": 0.29558539390563965, "learning_rate": 3.139989460342916e-06, "loss": 0.0142, "step": 162940 }, { "epoch": 1.3184723683145885, "grad_norm": 0.2888144850730896, "learning_rate": 3.1393340576359455e-06, "loss": 0.0175, "step": 162950 }, { "epoch": 1.3185532810097904, "grad_norm": 0.39367708563804626, "learning_rate": 3.1386786920360314e-06, "loss": 0.0238, "step": 162960 }, { "epoch": 1.3186341937049924, "grad_norm": 0.6274378895759583, "learning_rate": 3.1380233635562385e-06, "loss": 0.0244, "step": 162970 }, { "epoch": 1.318715106400194, "grad_norm": 0.3006354868412018, "learning_rate": 3.137368072209641e-06, "loss": 0.0225, "step": 162980 }, { "epoch": 1.318796019095396, "grad_norm": 0.5854172706604004, "learning_rate": 3.136712818009305e-06, "loss": 0.0246, "step": 162990 }, { "epoch": 1.318876931790598, "grad_norm": 0.1344326138496399, "learning_rate": 3.1360576009682974e-06, "loss": 0.0346, "step": 163000 }, { "epoch": 1.3189578444858, "grad_norm": 0.2307727038860321, "learning_rate": 3.1354024210996863e-06, "loss": 0.0197, "step": 163010 }, { "epoch": 1.3190387571810018, "grad_norm": 0.49028918147087097, "learning_rate": 3.134747278416539e-06, "loss": 0.0138, "step": 163020 }, { "epoch": 1.3191196698762035, "grad_norm": 0.3535199463367462, "learning_rate": 3.1340921729319173e-06, "loss": 0.0184, "step": 163030 }, { "epoch": 1.3192005825714055, "grad_norm": 0.22206540405750275, "learning_rate": 3.133437104658888e-06, "loss": 0.019, "step": 163040 }, { "epoch": 1.3192814952666074, "grad_norm": 0.5813913941383362, "learning_rate": 3.1327820736105163e-06, "loss": 0.0263, "step": 163050 }, { "epoch": 1.3193624079618091, "grad_norm": 0.31867074966430664, "learning_rate": 3.1321270797998648e-06, "loss": 0.0251, "step": 163060 }, { "epoch": 1.319443320657011, "grad_norm": 0.5805930495262146, "learning_rate": 3.1314721232399943e-06, "loss": 0.0437, "step": 163070 }, { "epoch": 1.319524233352213, "grad_norm": 0.499688059091568, "learning_rate": 3.1308172039439682e-06, "loss": 0.0187, "step": 163080 }, { "epoch": 1.3196051460474147, "grad_norm": 0.11895804852247238, "learning_rate": 3.130162321924848e-06, "loss": 0.0188, "step": 163090 }, { "epoch": 1.3196860587426167, "grad_norm": 0.23129138350486755, "learning_rate": 3.1295074771956906e-06, "loss": 0.0188, "step": 163100 }, { "epoch": 1.3197669714378186, "grad_norm": 0.3136471211910248, "learning_rate": 3.128852669769558e-06, "loss": 0.0285, "step": 163110 }, { "epoch": 1.3198478841330203, "grad_norm": 0.013880291022360325, "learning_rate": 3.128197899659511e-06, "loss": 0.0326, "step": 163120 }, { "epoch": 1.3199287968282223, "grad_norm": 0.27494245767593384, "learning_rate": 3.1275431668786038e-06, "loss": 0.016, "step": 163130 }, { "epoch": 1.3200097095234242, "grad_norm": 0.2608553469181061, "learning_rate": 3.126888471439895e-06, "loss": 0.0113, "step": 163140 }, { "epoch": 1.3200906222186262, "grad_norm": 0.6782314777374268, "learning_rate": 3.126233813356443e-06, "loss": 0.0232, "step": 163150 }, { "epoch": 1.320171534913828, "grad_norm": 0.4724136292934418, "learning_rate": 3.1255791926413e-06, "loss": 0.0202, "step": 163160 }, { "epoch": 1.3202524476090298, "grad_norm": 0.3279850482940674, "learning_rate": 3.124924609307523e-06, "loss": 0.0395, "step": 163170 }, { "epoch": 1.3203333603042318, "grad_norm": 0.6836313605308533, "learning_rate": 3.1242700633681678e-06, "loss": 0.0231, "step": 163180 }, { "epoch": 1.3204142729994337, "grad_norm": 0.4797872304916382, "learning_rate": 3.1236155548362844e-06, "loss": 0.0193, "step": 163190 }, { "epoch": 1.3204951856946354, "grad_norm": 0.28829070925712585, "learning_rate": 3.1229610837249293e-06, "loss": 0.025, "step": 163200 }, { "epoch": 1.3205760983898374, "grad_norm": 0.31863516569137573, "learning_rate": 3.122306650047152e-06, "loss": 0.0146, "step": 163210 }, { "epoch": 1.3206570110850393, "grad_norm": 0.19926586747169495, "learning_rate": 3.1216522538160045e-06, "loss": 0.0154, "step": 163220 }, { "epoch": 1.320737923780241, "grad_norm": 0.20634467899799347, "learning_rate": 3.1209978950445374e-06, "loss": 0.0248, "step": 163230 }, { "epoch": 1.320818836475443, "grad_norm": 0.2857857346534729, "learning_rate": 3.1203435737457994e-06, "loss": 0.0217, "step": 163240 }, { "epoch": 1.320899749170645, "grad_norm": 0.06745702773332596, "learning_rate": 3.119689289932843e-06, "loss": 0.019, "step": 163250 }, { "epoch": 1.3209806618658466, "grad_norm": 0.5077411532402039, "learning_rate": 3.119035043618712e-06, "loss": 0.0241, "step": 163260 }, { "epoch": 1.3210615745610486, "grad_norm": 0.20614708960056305, "learning_rate": 3.1183808348164574e-06, "loss": 0.0235, "step": 163270 }, { "epoch": 1.3211424872562505, "grad_norm": 0.28843262791633606, "learning_rate": 3.1177266635391255e-06, "loss": 0.0206, "step": 163280 }, { "epoch": 1.3212233999514524, "grad_norm": 0.21612782776355743, "learning_rate": 3.11707252979976e-06, "loss": 0.0083, "step": 163290 }, { "epoch": 1.3213043126466544, "grad_norm": 0.34216272830963135, "learning_rate": 3.116418433611409e-06, "loss": 0.0247, "step": 163300 }, { "epoch": 1.321385225341856, "grad_norm": 0.4840203821659088, "learning_rate": 3.1157643749871166e-06, "loss": 0.0225, "step": 163310 }, { "epoch": 1.321466138037058, "grad_norm": 0.6598905920982361, "learning_rate": 3.1151103539399243e-06, "loss": 0.0149, "step": 163320 }, { "epoch": 1.32154705073226, "grad_norm": 0.23908157646656036, "learning_rate": 3.1144563704828756e-06, "loss": 0.0211, "step": 163330 }, { "epoch": 1.3216279634274617, "grad_norm": 0.4091673195362091, "learning_rate": 3.1138024246290177e-06, "loss": 0.0364, "step": 163340 }, { "epoch": 1.3217088761226636, "grad_norm": 0.2862241864204407, "learning_rate": 3.1131485163913865e-06, "loss": 0.0223, "step": 163350 }, { "epoch": 1.3217897888178656, "grad_norm": 0.2892676591873169, "learning_rate": 3.112494645783024e-06, "loss": 0.0165, "step": 163360 }, { "epoch": 1.3218707015130673, "grad_norm": 0.520916223526001, "learning_rate": 3.111840812816973e-06, "loss": 0.0178, "step": 163370 }, { "epoch": 1.3219516142082692, "grad_norm": 0.13116782903671265, "learning_rate": 3.111187017506269e-06, "loss": 0.0219, "step": 163380 }, { "epoch": 1.3220325269034712, "grad_norm": 0.4332225024700165, "learning_rate": 3.110533259863954e-06, "loss": 0.0241, "step": 163390 }, { "epoch": 1.3221134395986731, "grad_norm": 0.31045717000961304, "learning_rate": 3.1098795399030647e-06, "loss": 0.0357, "step": 163400 }, { "epoch": 1.3221943522938748, "grad_norm": 0.3362574875354767, "learning_rate": 3.1092258576366354e-06, "loss": 0.0157, "step": 163410 }, { "epoch": 1.3222752649890768, "grad_norm": 0.6771503686904907, "learning_rate": 3.1085722130777065e-06, "loss": 0.0211, "step": 163420 }, { "epoch": 1.3223561776842787, "grad_norm": 0.011231536976993084, "learning_rate": 3.107918606239311e-06, "loss": 0.0225, "step": 163430 }, { "epoch": 1.3224370903794807, "grad_norm": 0.35937052965164185, "learning_rate": 3.1072650371344855e-06, "loss": 0.0254, "step": 163440 }, { "epoch": 1.3225180030746824, "grad_norm": 0.6896353363990784, "learning_rate": 3.1066115057762614e-06, "loss": 0.0174, "step": 163450 }, { "epoch": 1.3225989157698843, "grad_norm": 0.11444099992513657, "learning_rate": 3.1059580121776755e-06, "loss": 0.0113, "step": 163460 }, { "epoch": 1.3226798284650862, "grad_norm": 0.3720993101596832, "learning_rate": 3.1053045563517585e-06, "loss": 0.0227, "step": 163470 }, { "epoch": 1.322760741160288, "grad_norm": 0.29870352149009705, "learning_rate": 3.104651138311541e-06, "loss": 0.0153, "step": 163480 }, { "epoch": 1.32284165385549, "grad_norm": 0.41032013297080994, "learning_rate": 3.1039977580700564e-06, "loss": 0.0192, "step": 163490 }, { "epoch": 1.3229225665506918, "grad_norm": 1.0142959356307983, "learning_rate": 3.1033444156403343e-06, "loss": 0.0282, "step": 163500 }, { "epoch": 1.3230034792458936, "grad_norm": 0.32732173800468445, "learning_rate": 3.1026911110354017e-06, "loss": 0.0213, "step": 163510 }, { "epoch": 1.3230843919410955, "grad_norm": 0.37986356019973755, "learning_rate": 3.10203784426829e-06, "loss": 0.0199, "step": 163520 }, { "epoch": 1.3231653046362974, "grad_norm": 0.19342072308063507, "learning_rate": 3.101384615352029e-06, "loss": 0.0266, "step": 163530 }, { "epoch": 1.3232462173314994, "grad_norm": 0.37025997042655945, "learning_rate": 3.1007314242996424e-06, "loss": 0.0265, "step": 163540 }, { "epoch": 1.3233271300267013, "grad_norm": 0.776538610458374, "learning_rate": 3.1000782711241562e-06, "loss": 0.0198, "step": 163550 }, { "epoch": 1.323408042721903, "grad_norm": 0.3207849860191345, "learning_rate": 3.0994251558386014e-06, "loss": 0.0147, "step": 163560 }, { "epoch": 1.323488955417105, "grad_norm": 0.3464394807815552, "learning_rate": 3.0987720784559972e-06, "loss": 0.0234, "step": 163570 }, { "epoch": 1.323569868112307, "grad_norm": 0.4361715316772461, "learning_rate": 3.09811903898937e-06, "loss": 0.0236, "step": 163580 }, { "epoch": 1.3236507808075086, "grad_norm": 0.46198442578315735, "learning_rate": 3.097466037451743e-06, "loss": 0.0368, "step": 163590 }, { "epoch": 1.3237316935027106, "grad_norm": 0.3999917805194855, "learning_rate": 3.096813073856142e-06, "loss": 0.0196, "step": 163600 }, { "epoch": 1.3238126061979125, "grad_norm": 0.5482275485992432, "learning_rate": 3.0961601482155855e-06, "loss": 0.0276, "step": 163610 }, { "epoch": 1.3238935188931142, "grad_norm": 0.2924404740333557, "learning_rate": 3.095507260543094e-06, "loss": 0.0282, "step": 163620 }, { "epoch": 1.3239744315883162, "grad_norm": 0.4542754888534546, "learning_rate": 3.094854410851692e-06, "loss": 0.0199, "step": 163630 }, { "epoch": 1.3240553442835181, "grad_norm": 0.38834914565086365, "learning_rate": 3.0942015991543962e-06, "loss": 0.012, "step": 163640 }, { "epoch": 1.3241362569787198, "grad_norm": 0.32095032930374146, "learning_rate": 3.093548825464225e-06, "loss": 0.0242, "step": 163650 }, { "epoch": 1.3242171696739218, "grad_norm": 0.35141634941101074, "learning_rate": 3.0928960897941994e-06, "loss": 0.0226, "step": 163660 }, { "epoch": 1.3242980823691237, "grad_norm": 0.2951580882072449, "learning_rate": 3.0922433921573335e-06, "loss": 0.0257, "step": 163670 }, { "epoch": 1.3243789950643257, "grad_norm": 0.23034319281578064, "learning_rate": 3.091590732566647e-06, "loss": 0.0229, "step": 163680 }, { "epoch": 1.3244599077595276, "grad_norm": 0.31429392099380493, "learning_rate": 3.090938111035154e-06, "loss": 0.0169, "step": 163690 }, { "epoch": 1.3245408204547293, "grad_norm": 0.41571786999702454, "learning_rate": 3.09028552757587e-06, "loss": 0.0229, "step": 163700 }, { "epoch": 1.3246217331499313, "grad_norm": 0.5633751153945923, "learning_rate": 3.08963298220181e-06, "loss": 0.0357, "step": 163710 }, { "epoch": 1.3247026458451332, "grad_norm": 0.21517059206962585, "learning_rate": 3.088980474925988e-06, "loss": 0.0199, "step": 163720 }, { "epoch": 1.324783558540335, "grad_norm": 0.04288012161850929, "learning_rate": 3.088328005761414e-06, "loss": 0.0236, "step": 163730 }, { "epoch": 1.3248644712355369, "grad_norm": 0.22430665791034698, "learning_rate": 3.087675574721103e-06, "loss": 0.0184, "step": 163740 }, { "epoch": 1.3249453839307388, "grad_norm": 0.45728084444999695, "learning_rate": 3.087023181818066e-06, "loss": 0.0186, "step": 163750 }, { "epoch": 1.3250262966259405, "grad_norm": 0.07995900511741638, "learning_rate": 3.086370827065314e-06, "loss": 0.0231, "step": 163760 }, { "epoch": 1.3251072093211425, "grad_norm": 0.5227985382080078, "learning_rate": 3.0857185104758538e-06, "loss": 0.0208, "step": 163770 }, { "epoch": 1.3251881220163444, "grad_norm": 0.23458091914653778, "learning_rate": 3.085066232062698e-06, "loss": 0.0119, "step": 163780 }, { "epoch": 1.3252690347115461, "grad_norm": 0.4426451623439789, "learning_rate": 3.0844139918388545e-06, "loss": 0.0256, "step": 163790 }, { "epoch": 1.325349947406748, "grad_norm": 0.39809274673461914, "learning_rate": 3.0837617898173277e-06, "loss": 0.0278, "step": 163800 }, { "epoch": 1.32543086010195, "grad_norm": 0.5772947072982788, "learning_rate": 3.083109626011127e-06, "loss": 0.0187, "step": 163810 }, { "epoch": 1.325511772797152, "grad_norm": 0.2758791446685791, "learning_rate": 3.08245750043326e-06, "loss": 0.0108, "step": 163820 }, { "epoch": 1.3255926854923539, "grad_norm": 0.4292323887348175, "learning_rate": 3.0818054130967294e-06, "loss": 0.0155, "step": 163830 }, { "epoch": 1.3256735981875556, "grad_norm": 0.35130342841148376, "learning_rate": 3.081153364014539e-06, "loss": 0.0196, "step": 163840 }, { "epoch": 1.3257545108827575, "grad_norm": 0.38376009464263916, "learning_rate": 3.080501353199696e-06, "loss": 0.0193, "step": 163850 }, { "epoch": 1.3258354235779595, "grad_norm": 0.1571551412343979, "learning_rate": 3.0798493806652e-06, "loss": 0.0192, "step": 163860 }, { "epoch": 1.3259163362731612, "grad_norm": 0.1720850169658661, "learning_rate": 3.079197446424054e-06, "loss": 0.0154, "step": 163870 }, { "epoch": 1.3259972489683631, "grad_norm": 0.5407447814941406, "learning_rate": 3.0785455504892616e-06, "loss": 0.0257, "step": 163880 }, { "epoch": 1.326078161663565, "grad_norm": 0.3928089737892151, "learning_rate": 3.07789369287382e-06, "loss": 0.0166, "step": 163890 }, { "epoch": 1.3261590743587668, "grad_norm": 0.4851464331150055, "learning_rate": 3.0772418735907315e-06, "loss": 0.0258, "step": 163900 }, { "epoch": 1.3262399870539687, "grad_norm": 0.38024088740348816, "learning_rate": 3.0765900926529963e-06, "loss": 0.0161, "step": 163910 }, { "epoch": 1.3263208997491707, "grad_norm": 0.07104073464870453, "learning_rate": 3.0759383500736085e-06, "loss": 0.018, "step": 163920 }, { "epoch": 1.3264018124443724, "grad_norm": 0.2917593717575073, "learning_rate": 3.0752866458655696e-06, "loss": 0.0171, "step": 163930 }, { "epoch": 1.3264827251395743, "grad_norm": 0.5360897183418274, "learning_rate": 3.074634980041876e-06, "loss": 0.018, "step": 163940 }, { "epoch": 1.3265636378347763, "grad_norm": 0.3820699155330658, "learning_rate": 3.0739833526155226e-06, "loss": 0.0209, "step": 163950 }, { "epoch": 1.3266445505299782, "grad_norm": 0.1908310502767563, "learning_rate": 3.073331763599504e-06, "loss": 0.0384, "step": 163960 }, { "epoch": 1.3267254632251801, "grad_norm": 0.5241608023643494, "learning_rate": 3.0726802130068177e-06, "loss": 0.024, "step": 163970 }, { "epoch": 1.3268063759203819, "grad_norm": 0.29809269309043884, "learning_rate": 3.072028700850457e-06, "loss": 0.0101, "step": 163980 }, { "epoch": 1.3268872886155838, "grad_norm": 0.34386423230171204, "learning_rate": 3.0713772271434107e-06, "loss": 0.0213, "step": 163990 }, { "epoch": 1.3269682013107857, "grad_norm": 0.3277037441730499, "learning_rate": 3.070725791898674e-06, "loss": 0.0136, "step": 164000 }, { "epoch": 1.3270491140059875, "grad_norm": 0.003237942699342966, "learning_rate": 3.070074395129241e-06, "loss": 0.0198, "step": 164010 }, { "epoch": 1.3271300267011894, "grad_norm": 0.2710258960723877, "learning_rate": 3.0694230368480993e-06, "loss": 0.0162, "step": 164020 }, { "epoch": 1.3272109393963913, "grad_norm": 0.4069051146507263, "learning_rate": 3.0687717170682378e-06, "loss": 0.0152, "step": 164030 }, { "epoch": 1.327291852091593, "grad_norm": 0.3781047761440277, "learning_rate": 3.06812043580265e-06, "loss": 0.0162, "step": 164040 }, { "epoch": 1.327372764786795, "grad_norm": 0.23199820518493652, "learning_rate": 3.067469193064321e-06, "loss": 0.0199, "step": 164050 }, { "epoch": 1.327453677481997, "grad_norm": 0.24752242863178253, "learning_rate": 3.0668179888662373e-06, "loss": 0.0245, "step": 164060 }, { "epoch": 1.3275345901771989, "grad_norm": 0.41998279094696045, "learning_rate": 3.0661668232213903e-06, "loss": 0.0252, "step": 164070 }, { "epoch": 1.3276155028724006, "grad_norm": 0.2895800769329071, "learning_rate": 3.0655156961427614e-06, "loss": 0.029, "step": 164080 }, { "epoch": 1.3276964155676025, "grad_norm": 0.6314170360565186, "learning_rate": 3.0648646076433386e-06, "loss": 0.0196, "step": 164090 }, { "epoch": 1.3277773282628045, "grad_norm": 1.2694600820541382, "learning_rate": 3.064213557736107e-06, "loss": 0.0164, "step": 164100 }, { "epoch": 1.3278582409580064, "grad_norm": 0.21374289691448212, "learning_rate": 3.063562546434048e-06, "loss": 0.0238, "step": 164110 }, { "epoch": 1.3279391536532081, "grad_norm": 0.25319212675094604, "learning_rate": 3.0629115737501462e-06, "loss": 0.0229, "step": 164120 }, { "epoch": 1.32802006634841, "grad_norm": 0.5981480479240417, "learning_rate": 3.0622606396973855e-06, "loss": 0.0138, "step": 164130 }, { "epoch": 1.328100979043612, "grad_norm": 0.23981916904449463, "learning_rate": 3.0616097442887428e-06, "loss": 0.0111, "step": 164140 }, { "epoch": 1.3281818917388137, "grad_norm": 0.3565245270729065, "learning_rate": 3.060958887537202e-06, "loss": 0.0223, "step": 164150 }, { "epoch": 1.3282628044340157, "grad_norm": 0.0396057665348053, "learning_rate": 3.060308069455744e-06, "loss": 0.0168, "step": 164160 }, { "epoch": 1.3283437171292176, "grad_norm": 0.642911434173584, "learning_rate": 3.0596572900573475e-06, "loss": 0.0171, "step": 164170 }, { "epoch": 1.3284246298244193, "grad_norm": 0.5484945774078369, "learning_rate": 3.059006549354988e-06, "loss": 0.0167, "step": 164180 }, { "epoch": 1.3285055425196213, "grad_norm": 0.5366618037223816, "learning_rate": 3.058355847361646e-06, "loss": 0.0158, "step": 164190 }, { "epoch": 1.3285864552148232, "grad_norm": 0.2452317476272583, "learning_rate": 3.0577051840902994e-06, "loss": 0.0215, "step": 164200 }, { "epoch": 1.3286673679100252, "grad_norm": 0.3299197256565094, "learning_rate": 3.05705455955392e-06, "loss": 0.0186, "step": 164210 }, { "epoch": 1.328748280605227, "grad_norm": 0.05049138888716698, "learning_rate": 3.0564039737654857e-06, "loss": 0.0158, "step": 164220 }, { "epoch": 1.3288291933004288, "grad_norm": 0.5960729718208313, "learning_rate": 3.055753426737973e-06, "loss": 0.0334, "step": 164230 }, { "epoch": 1.3289101059956308, "grad_norm": 0.29549509286880493, "learning_rate": 3.055102918484353e-06, "loss": 0.0116, "step": 164240 }, { "epoch": 1.3289910186908327, "grad_norm": 0.2040991187095642, "learning_rate": 3.0544524490175975e-06, "loss": 0.0218, "step": 164250 }, { "epoch": 1.3290719313860344, "grad_norm": 0.17024971544742584, "learning_rate": 3.0538020183506837e-06, "loss": 0.0146, "step": 164260 }, { "epoch": 1.3291528440812364, "grad_norm": 0.20139412581920624, "learning_rate": 3.053151626496579e-06, "loss": 0.0153, "step": 164270 }, { "epoch": 1.3292337567764383, "grad_norm": 0.21500267088413239, "learning_rate": 3.0525012734682536e-06, "loss": 0.0143, "step": 164280 }, { "epoch": 1.32931466947164, "grad_norm": 0.2964136004447937, "learning_rate": 3.0518509592786815e-06, "loss": 0.0281, "step": 164290 }, { "epoch": 1.329395582166842, "grad_norm": 0.4381480813026428, "learning_rate": 3.051200683940827e-06, "loss": 0.0174, "step": 164300 }, { "epoch": 1.329476494862044, "grad_norm": 0.2696826159954071, "learning_rate": 3.0505504474676617e-06, "loss": 0.0203, "step": 164310 }, { "epoch": 1.3295574075572456, "grad_norm": 0.300859659910202, "learning_rate": 3.049900249872151e-06, "loss": 0.0233, "step": 164320 }, { "epoch": 1.3296383202524475, "grad_norm": 0.17613141238689423, "learning_rate": 3.049250091167265e-06, "loss": 0.0155, "step": 164330 }, { "epoch": 1.3297192329476495, "grad_norm": 0.2966769337654114, "learning_rate": 3.0485999713659677e-06, "loss": 0.0216, "step": 164340 }, { "epoch": 1.3298001456428514, "grad_norm": 0.2614070475101471, "learning_rate": 3.0479498904812223e-06, "loss": 0.0282, "step": 164350 }, { "epoch": 1.3298810583380534, "grad_norm": 0.27339422702789307, "learning_rate": 3.0472998485259976e-06, "loss": 0.0125, "step": 164360 }, { "epoch": 1.329961971033255, "grad_norm": 0.5603348016738892, "learning_rate": 3.0466498455132533e-06, "loss": 0.0214, "step": 164370 }, { "epoch": 1.330042883728457, "grad_norm": 0.20331206917762756, "learning_rate": 3.0459998814559553e-06, "loss": 0.0151, "step": 164380 }, { "epoch": 1.330123796423659, "grad_norm": 0.31231069564819336, "learning_rate": 3.0453499563670654e-06, "loss": 0.0179, "step": 164390 }, { "epoch": 1.3302047091188607, "grad_norm": 0.4378114938735962, "learning_rate": 3.0447000702595418e-06, "loss": 0.0248, "step": 164400 }, { "epoch": 1.3302856218140626, "grad_norm": 0.30025801062583923, "learning_rate": 3.044050223146349e-06, "loss": 0.0171, "step": 164410 }, { "epoch": 1.3303665345092646, "grad_norm": 0.33639654517173767, "learning_rate": 3.043400415040445e-06, "loss": 0.0175, "step": 164420 }, { "epoch": 1.3304474472044663, "grad_norm": 0.30572083592414856, "learning_rate": 3.0427506459547894e-06, "loss": 0.0271, "step": 164430 }, { "epoch": 1.3305283598996682, "grad_norm": 0.5284699201583862, "learning_rate": 3.0421009159023392e-06, "loss": 0.0186, "step": 164440 }, { "epoch": 1.3306092725948702, "grad_norm": 0.315107524394989, "learning_rate": 3.0414512248960547e-06, "loss": 0.0231, "step": 164450 }, { "epoch": 1.3306901852900719, "grad_norm": 0.3740381896495819, "learning_rate": 3.04080157294889e-06, "loss": 0.0161, "step": 164460 }, { "epoch": 1.3307710979852738, "grad_norm": 0.4295123219490051, "learning_rate": 3.0401519600738006e-06, "loss": 0.0279, "step": 164470 }, { "epoch": 1.3308520106804758, "grad_norm": 0.2854579985141754, "learning_rate": 3.0395023862837453e-06, "loss": 0.0227, "step": 164480 }, { "epoch": 1.3309329233756777, "grad_norm": 0.23040896654129028, "learning_rate": 3.0388528515916737e-06, "loss": 0.0309, "step": 164490 }, { "epoch": 1.3310138360708796, "grad_norm": 0.5957125425338745, "learning_rate": 3.0382033560105433e-06, "loss": 0.0151, "step": 164500 }, { "epoch": 1.3310947487660814, "grad_norm": 0.18983186781406403, "learning_rate": 3.0375538995533036e-06, "loss": 0.0114, "step": 164510 }, { "epoch": 1.3311756614612833, "grad_norm": 0.6114193797111511, "learning_rate": 3.036904482232912e-06, "loss": 0.0203, "step": 164520 }, { "epoch": 1.3312565741564852, "grad_norm": 0.5879924893379211, "learning_rate": 3.036255104062314e-06, "loss": 0.0177, "step": 164530 }, { "epoch": 1.331337486851687, "grad_norm": 0.4373946785926819, "learning_rate": 3.035605765054461e-06, "loss": 0.0194, "step": 164540 }, { "epoch": 1.331418399546889, "grad_norm": 0.29029372334480286, "learning_rate": 3.0349564652223073e-06, "loss": 0.0183, "step": 164550 }, { "epoch": 1.3314993122420908, "grad_norm": 0.30619359016418457, "learning_rate": 3.0343072045787956e-06, "loss": 0.0161, "step": 164560 }, { "epoch": 1.3315802249372926, "grad_norm": 0.46331822872161865, "learning_rate": 3.0336579831368784e-06, "loss": 0.0258, "step": 164570 }, { "epoch": 1.3316611376324945, "grad_norm": 0.18912605941295624, "learning_rate": 3.0330088009095022e-06, "loss": 0.0185, "step": 164580 }, { "epoch": 1.3317420503276964, "grad_norm": 0.49680453538894653, "learning_rate": 3.0323596579096114e-06, "loss": 0.0229, "step": 164590 }, { "epoch": 1.3318229630228982, "grad_norm": 0.28245675563812256, "learning_rate": 3.0317105541501536e-06, "loss": 0.0149, "step": 164600 }, { "epoch": 1.3319038757181, "grad_norm": 0.2777923345565796, "learning_rate": 3.031061489644075e-06, "loss": 0.0192, "step": 164610 }, { "epoch": 1.331984788413302, "grad_norm": 0.41182607412338257, "learning_rate": 3.0304124644043165e-06, "loss": 0.0214, "step": 164620 }, { "epoch": 1.332065701108504, "grad_norm": 0.38504815101623535, "learning_rate": 3.0297634784438233e-06, "loss": 0.0235, "step": 164630 }, { "epoch": 1.332146613803706, "grad_norm": 0.1910502314567566, "learning_rate": 3.02911453177554e-06, "loss": 0.0135, "step": 164640 }, { "epoch": 1.3322275264989076, "grad_norm": 0.2786352038383484, "learning_rate": 3.028465624412406e-06, "loss": 0.0143, "step": 164650 }, { "epoch": 1.3323084391941096, "grad_norm": 0.2973884046077728, "learning_rate": 3.0278167563673623e-06, "loss": 0.0262, "step": 164660 }, { "epoch": 1.3323893518893115, "grad_norm": 0.29756876826286316, "learning_rate": 3.027167927653351e-06, "loss": 0.0263, "step": 164670 }, { "epoch": 1.3324702645845132, "grad_norm": 0.19483226537704468, "learning_rate": 3.0265191382833104e-06, "loss": 0.0274, "step": 164680 }, { "epoch": 1.3325511772797152, "grad_norm": 0.6070358157157898, "learning_rate": 3.025870388270178e-06, "loss": 0.0266, "step": 164690 }, { "epoch": 1.3326320899749171, "grad_norm": 0.5068915486335754, "learning_rate": 3.025221677626893e-06, "loss": 0.0172, "step": 164700 }, { "epoch": 1.3327130026701188, "grad_norm": 0.05878260359168053, "learning_rate": 3.0245730063663946e-06, "loss": 0.0125, "step": 164710 }, { "epoch": 1.3327939153653208, "grad_norm": 0.3067798316478729, "learning_rate": 3.023924374501617e-06, "loss": 0.0317, "step": 164720 }, { "epoch": 1.3328748280605227, "grad_norm": 0.3431122601032257, "learning_rate": 3.023275782045494e-06, "loss": 0.0158, "step": 164730 }, { "epoch": 1.3329557407557246, "grad_norm": 0.24165815114974976, "learning_rate": 3.0226272290109653e-06, "loss": 0.0267, "step": 164740 }, { "epoch": 1.3330366534509264, "grad_norm": 0.323229044675827, "learning_rate": 3.021978715410961e-06, "loss": 0.0194, "step": 164750 }, { "epoch": 1.3331175661461283, "grad_norm": 0.3042176067829132, "learning_rate": 3.0213302412584146e-06, "loss": 0.0233, "step": 164760 }, { "epoch": 1.3331984788413302, "grad_norm": 0.31690075993537903, "learning_rate": 3.02068180656626e-06, "loss": 0.0195, "step": 164770 }, { "epoch": 1.3332793915365322, "grad_norm": 0.4217151999473572, "learning_rate": 3.020033411347428e-06, "loss": 0.0192, "step": 164780 }, { "epoch": 1.333360304231734, "grad_norm": 0.4146975874900818, "learning_rate": 3.0193850556148497e-06, "loss": 0.0363, "step": 164790 }, { "epoch": 1.3334412169269358, "grad_norm": 0.493775874376297, "learning_rate": 3.018736739381457e-06, "loss": 0.0145, "step": 164800 }, { "epoch": 1.3335221296221378, "grad_norm": 0.37839359045028687, "learning_rate": 3.0180884626601748e-06, "loss": 0.0236, "step": 164810 }, { "epoch": 1.3336030423173395, "grad_norm": 0.6324874758720398, "learning_rate": 3.017440225463935e-06, "loss": 0.0275, "step": 164820 }, { "epoch": 1.3336839550125414, "grad_norm": 1.028652548789978, "learning_rate": 3.016792027805666e-06, "loss": 0.0281, "step": 164830 }, { "epoch": 1.3337648677077434, "grad_norm": 0.3107301890850067, "learning_rate": 3.0161438696982914e-06, "loss": 0.0147, "step": 164840 }, { "epoch": 1.333845780402945, "grad_norm": 0.11582036316394806, "learning_rate": 3.015495751154739e-06, "loss": 0.0307, "step": 164850 }, { "epoch": 1.333926693098147, "grad_norm": 0.3064478635787964, "learning_rate": 3.014847672187935e-06, "loss": 0.0189, "step": 164860 }, { "epoch": 1.334007605793349, "grad_norm": 0.3461161255836487, "learning_rate": 3.0141996328108047e-06, "loss": 0.0152, "step": 164870 }, { "epoch": 1.334088518488551, "grad_norm": 0.376665323972702, "learning_rate": 3.0135516330362687e-06, "loss": 0.015, "step": 164880 }, { "epoch": 1.3341694311837529, "grad_norm": 0.34633857011795044, "learning_rate": 3.0129036728772527e-06, "loss": 0.0163, "step": 164890 }, { "epoch": 1.3342503438789546, "grad_norm": 0.5646267533302307, "learning_rate": 3.012255752346679e-06, "loss": 0.0313, "step": 164900 }, { "epoch": 1.3343312565741565, "grad_norm": 0.21461521089076996, "learning_rate": 3.011607871457466e-06, "loss": 0.0114, "step": 164910 }, { "epoch": 1.3344121692693585, "grad_norm": 0.24985797703266144, "learning_rate": 3.010960030222536e-06, "loss": 0.0103, "step": 164920 }, { "epoch": 1.3344930819645602, "grad_norm": 0.003839538199827075, "learning_rate": 3.0103122286548116e-06, "loss": 0.0192, "step": 164930 }, { "epoch": 1.3345739946597621, "grad_norm": 0.35384291410446167, "learning_rate": 3.0096644667672075e-06, "loss": 0.0168, "step": 164940 }, { "epoch": 1.334654907354964, "grad_norm": 0.5756669640541077, "learning_rate": 3.009016744572644e-06, "loss": 0.0167, "step": 164950 }, { "epoch": 1.3347358200501658, "grad_norm": 0.9831402897834778, "learning_rate": 3.0083690620840393e-06, "loss": 0.0103, "step": 164960 }, { "epoch": 1.3348167327453677, "grad_norm": 0.4880586266517639, "learning_rate": 3.007721419314308e-06, "loss": 0.026, "step": 164970 }, { "epoch": 1.3348976454405697, "grad_norm": 0.22737058997154236, "learning_rate": 3.007073816276368e-06, "loss": 0.0186, "step": 164980 }, { "epoch": 1.3349785581357714, "grad_norm": 0.28010448813438416, "learning_rate": 3.006426252983134e-06, "loss": 0.0144, "step": 164990 }, { "epoch": 1.3350594708309733, "grad_norm": 0.27181971073150635, "learning_rate": 3.0057787294475183e-06, "loss": 0.0174, "step": 165000 }, { "epoch": 1.3351403835261753, "grad_norm": 0.26349613070487976, "learning_rate": 3.0051312456824362e-06, "loss": 0.0172, "step": 165010 }, { "epoch": 1.3352212962213772, "grad_norm": 0.3401748538017273, "learning_rate": 3.0044838017008014e-06, "loss": 0.019, "step": 165020 }, { "epoch": 1.3353022089165791, "grad_norm": 0.05667132884263992, "learning_rate": 3.0038363975155226e-06, "loss": 0.0187, "step": 165030 }, { "epoch": 1.3353831216117809, "grad_norm": 0.7531188130378723, "learning_rate": 3.0031890331395125e-06, "loss": 0.0258, "step": 165040 }, { "epoch": 1.3354640343069828, "grad_norm": 0.18399952352046967, "learning_rate": 3.0025417085856828e-06, "loss": 0.0268, "step": 165050 }, { "epoch": 1.3355449470021847, "grad_norm": 0.6548342704772949, "learning_rate": 3.0018944238669427e-06, "loss": 0.0212, "step": 165060 }, { "epoch": 1.3356258596973865, "grad_norm": 0.6858502626419067, "learning_rate": 3.0012471789961984e-06, "loss": 0.0421, "step": 165070 }, { "epoch": 1.3357067723925884, "grad_norm": 0.5771891474723816, "learning_rate": 3.0005999739863605e-06, "loss": 0.0125, "step": 165080 }, { "epoch": 1.3357876850877903, "grad_norm": 0.5734643340110779, "learning_rate": 2.999952808850336e-06, "loss": 0.0375, "step": 165090 }, { "epoch": 1.335868597782992, "grad_norm": 0.49352172017097473, "learning_rate": 2.9993056836010294e-06, "loss": 0.031, "step": 165100 }, { "epoch": 1.335949510478194, "grad_norm": 0.40660011768341064, "learning_rate": 2.998658598251346e-06, "loss": 0.0236, "step": 165110 }, { "epoch": 1.336030423173396, "grad_norm": 0.4285357892513275, "learning_rate": 2.9980115528141947e-06, "loss": 0.0204, "step": 165120 }, { "epoch": 1.3361113358685976, "grad_norm": 0.6472920775413513, "learning_rate": 2.9973645473024747e-06, "loss": 0.0182, "step": 165130 }, { "epoch": 1.3361922485637996, "grad_norm": 0.30430150032043457, "learning_rate": 2.996717581729091e-06, "loss": 0.016, "step": 165140 }, { "epoch": 1.3362731612590015, "grad_norm": 0.21572408080101013, "learning_rate": 2.9960706561069475e-06, "loss": 0.024, "step": 165150 }, { "epoch": 1.3363540739542035, "grad_norm": 0.3226979672908783, "learning_rate": 2.9954237704489437e-06, "loss": 0.0327, "step": 165160 }, { "epoch": 1.3364349866494054, "grad_norm": 0.34479060769081116, "learning_rate": 2.994776924767979e-06, "loss": 0.0134, "step": 165170 }, { "epoch": 1.3365158993446071, "grad_norm": 0.30254223942756653, "learning_rate": 2.994130119076959e-06, "loss": 0.0158, "step": 165180 }, { "epoch": 1.336596812039809, "grad_norm": 0.37642887234687805, "learning_rate": 2.9934833533887757e-06, "loss": 0.0232, "step": 165190 }, { "epoch": 1.336677724735011, "grad_norm": 0.06340628117322922, "learning_rate": 2.9928366277163323e-06, "loss": 0.0221, "step": 165200 }, { "epoch": 1.3367586374302127, "grad_norm": 0.20874029397964478, "learning_rate": 2.9921899420725264e-06, "loss": 0.0224, "step": 165210 }, { "epoch": 1.3368395501254147, "grad_norm": 0.4911126494407654, "learning_rate": 2.991543296470251e-06, "loss": 0.0285, "step": 165220 }, { "epoch": 1.3369204628206166, "grad_norm": 0.5931302309036255, "learning_rate": 2.990896690922406e-06, "loss": 0.0276, "step": 165230 }, { "epoch": 1.3370013755158183, "grad_norm": 0.5463725924491882, "learning_rate": 2.9902501254418838e-06, "loss": 0.0382, "step": 165240 }, { "epoch": 1.3370822882110203, "grad_norm": 0.4111063778400421, "learning_rate": 2.989603600041582e-06, "loss": 0.022, "step": 165250 }, { "epoch": 1.3371632009062222, "grad_norm": 0.2667638659477234, "learning_rate": 2.988957114734391e-06, "loss": 0.018, "step": 165260 }, { "epoch": 1.3372441136014241, "grad_norm": 0.30235037207603455, "learning_rate": 2.988310669533205e-06, "loss": 0.0293, "step": 165270 }, { "epoch": 1.3373250262966259, "grad_norm": 0.9360891580581665, "learning_rate": 2.987664264450917e-06, "loss": 0.0226, "step": 165280 }, { "epoch": 1.3374059389918278, "grad_norm": 0.48488420248031616, "learning_rate": 2.987017899500415e-06, "loss": 0.0242, "step": 165290 }, { "epoch": 1.3374868516870297, "grad_norm": 0.41782549023628235, "learning_rate": 2.9863715746945928e-06, "loss": 0.0143, "step": 165300 }, { "epoch": 1.3375677643822317, "grad_norm": 0.4694792330265045, "learning_rate": 2.985725290046339e-06, "loss": 0.0183, "step": 165310 }, { "epoch": 1.3376486770774334, "grad_norm": 0.2910619378089905, "learning_rate": 2.98507904556854e-06, "loss": 0.0206, "step": 165320 }, { "epoch": 1.3377295897726353, "grad_norm": 0.23625260591506958, "learning_rate": 2.9844328412740865e-06, "loss": 0.0144, "step": 165330 }, { "epoch": 1.3378105024678373, "grad_norm": 0.47607314586639404, "learning_rate": 2.983786677175866e-06, "loss": 0.0116, "step": 165340 }, { "epoch": 1.337891415163039, "grad_norm": 0.43381139636039734, "learning_rate": 2.983140553286763e-06, "loss": 0.0176, "step": 165350 }, { "epoch": 1.337972327858241, "grad_norm": 0.2384837567806244, "learning_rate": 2.9824944696196634e-06, "loss": 0.0218, "step": 165360 }, { "epoch": 1.3380532405534429, "grad_norm": 0.2544013559818268, "learning_rate": 2.981848426187454e-06, "loss": 0.0329, "step": 165370 }, { "epoch": 1.3381341532486446, "grad_norm": 0.19896015524864197, "learning_rate": 2.9812024230030166e-06, "loss": 0.0149, "step": 165380 }, { "epoch": 1.3382150659438465, "grad_norm": 0.4647406041622162, "learning_rate": 2.9805564600792335e-06, "loss": 0.0262, "step": 165390 }, { "epoch": 1.3382959786390485, "grad_norm": 0.4215780794620514, "learning_rate": 2.9799105374289917e-06, "loss": 0.0207, "step": 165400 }, { "epoch": 1.3383768913342504, "grad_norm": 0.43430089950561523, "learning_rate": 2.979264655065166e-06, "loss": 0.0227, "step": 165410 }, { "epoch": 1.3384578040294524, "grad_norm": 0.326006144285202, "learning_rate": 2.9786188130006434e-06, "loss": 0.0213, "step": 165420 }, { "epoch": 1.338538716724654, "grad_norm": 0.2523525059223175, "learning_rate": 2.977973011248299e-06, "loss": 0.0211, "step": 165430 }, { "epoch": 1.338619629419856, "grad_norm": 0.6062928438186646, "learning_rate": 2.977327249821017e-06, "loss": 0.0254, "step": 165440 }, { "epoch": 1.338700542115058, "grad_norm": 0.2567528486251831, "learning_rate": 2.9766815287316714e-06, "loss": 0.0203, "step": 165450 }, { "epoch": 1.3387814548102597, "grad_norm": 0.42392271757125854, "learning_rate": 2.9760358479931407e-06, "loss": 0.0315, "step": 165460 }, { "epoch": 1.3388623675054616, "grad_norm": 0.24093417823314667, "learning_rate": 2.975390207618304e-06, "loss": 0.0204, "step": 165470 }, { "epoch": 1.3389432802006636, "grad_norm": 0.3399111032485962, "learning_rate": 2.974744607620033e-06, "loss": 0.0155, "step": 165480 }, { "epoch": 1.3390241928958653, "grad_norm": 0.30536872148513794, "learning_rate": 2.974099048011207e-06, "loss": 0.0202, "step": 165490 }, { "epoch": 1.3391051055910672, "grad_norm": 0.816667377948761, "learning_rate": 2.9734535288046995e-06, "loss": 0.0275, "step": 165500 }, { "epoch": 1.3391860182862692, "grad_norm": 0.29525619745254517, "learning_rate": 2.972808050013381e-06, "loss": 0.0223, "step": 165510 }, { "epoch": 1.3392669309814709, "grad_norm": 0.0755791962146759, "learning_rate": 2.9721626116501267e-06, "loss": 0.0153, "step": 165520 }, { "epoch": 1.3393478436766728, "grad_norm": 0.07366308569908142, "learning_rate": 2.971517213727809e-06, "loss": 0.0276, "step": 165530 }, { "epoch": 1.3394287563718748, "grad_norm": 0.3303599953651428, "learning_rate": 2.9708718562592986e-06, "loss": 0.0174, "step": 165540 }, { "epoch": 1.3395096690670767, "grad_norm": 0.519960880279541, "learning_rate": 2.9702265392574635e-06, "loss": 0.02, "step": 165550 }, { "epoch": 1.3395905817622786, "grad_norm": 0.9700416326522827, "learning_rate": 2.969581262735177e-06, "loss": 0.0273, "step": 165560 }, { "epoch": 1.3396714944574803, "grad_norm": 0.19020144641399384, "learning_rate": 2.968936026705306e-06, "loss": 0.0178, "step": 165570 }, { "epoch": 1.3397524071526823, "grad_norm": 0.3799755573272705, "learning_rate": 2.9682908311807157e-06, "loss": 0.0196, "step": 165580 }, { "epoch": 1.3398333198478842, "grad_norm": 0.22327974438667297, "learning_rate": 2.967645676174277e-06, "loss": 0.0179, "step": 165590 }, { "epoch": 1.339914232543086, "grad_norm": 0.29322028160095215, "learning_rate": 2.9670005616988555e-06, "loss": 0.0219, "step": 165600 }, { "epoch": 1.3399951452382879, "grad_norm": 0.3796844780445099, "learning_rate": 2.966355487767316e-06, "loss": 0.0186, "step": 165610 }, { "epoch": 1.3400760579334898, "grad_norm": 0.8153772950172424, "learning_rate": 2.965710454392521e-06, "loss": 0.0192, "step": 165620 }, { "epoch": 1.3401569706286915, "grad_norm": 0.4663545489311218, "learning_rate": 2.965065461587339e-06, "loss": 0.0211, "step": 165630 }, { "epoch": 1.3402378833238935, "grad_norm": 0.21310725808143616, "learning_rate": 2.9644205093646296e-06, "loss": 0.0139, "step": 165640 }, { "epoch": 1.3403187960190954, "grad_norm": 0.4748510718345642, "learning_rate": 2.9637755977372547e-06, "loss": 0.0149, "step": 165650 }, { "epoch": 1.3403997087142971, "grad_norm": 0.2906581163406372, "learning_rate": 2.963130726718079e-06, "loss": 0.0199, "step": 165660 }, { "epoch": 1.340480621409499, "grad_norm": 0.46502602100372314, "learning_rate": 2.9624858963199587e-06, "loss": 0.0238, "step": 165670 }, { "epoch": 1.340561534104701, "grad_norm": 0.9769712686538696, "learning_rate": 2.961841106555756e-06, "loss": 0.0236, "step": 165680 }, { "epoch": 1.340642446799903, "grad_norm": 0.5180217623710632, "learning_rate": 2.961196357438332e-06, "loss": 0.0368, "step": 165690 }, { "epoch": 1.340723359495105, "grad_norm": 0.3075866103172302, "learning_rate": 2.96055164898054e-06, "loss": 0.0289, "step": 165700 }, { "epoch": 1.3408042721903066, "grad_norm": 0.16739529371261597, "learning_rate": 2.9599069811952407e-06, "loss": 0.0137, "step": 165710 }, { "epoch": 1.3408851848855086, "grad_norm": 0.47048327326774597, "learning_rate": 2.9592623540952914e-06, "loss": 0.0164, "step": 165720 }, { "epoch": 1.3409660975807105, "grad_norm": 0.3887208104133606, "learning_rate": 2.958617767693544e-06, "loss": 0.0255, "step": 165730 }, { "epoch": 1.3410470102759122, "grad_norm": 0.20323480665683746, "learning_rate": 2.9579732220028557e-06, "loss": 0.0261, "step": 165740 }, { "epoch": 1.3411279229711142, "grad_norm": 0.42561954259872437, "learning_rate": 2.9573287170360827e-06, "loss": 0.0259, "step": 165750 }, { "epoch": 1.341208835666316, "grad_norm": 0.13162055611610413, "learning_rate": 2.9566842528060747e-06, "loss": 0.0145, "step": 165760 }, { "epoch": 1.3412897483615178, "grad_norm": 0.22414876520633698, "learning_rate": 2.956039829325685e-06, "loss": 0.0207, "step": 165770 }, { "epoch": 1.3413706610567198, "grad_norm": 0.308872252702713, "learning_rate": 2.9553954466077672e-06, "loss": 0.0222, "step": 165780 }, { "epoch": 1.3414515737519217, "grad_norm": 0.38824307918548584, "learning_rate": 2.9547511046651713e-06, "loss": 0.0236, "step": 165790 }, { "epoch": 1.3415324864471234, "grad_norm": 0.45725002884864807, "learning_rate": 2.9541068035107452e-06, "loss": 0.0285, "step": 165800 }, { "epoch": 1.3416133991423254, "grad_norm": 0.4052649736404419, "learning_rate": 2.9534625431573393e-06, "loss": 0.0234, "step": 165810 }, { "epoch": 1.3416943118375273, "grad_norm": 0.1718451976776123, "learning_rate": 2.9528183236178055e-06, "loss": 0.0194, "step": 165820 }, { "epoch": 1.3417752245327292, "grad_norm": 0.43672049045562744, "learning_rate": 2.9521741449049866e-06, "loss": 0.0238, "step": 165830 }, { "epoch": 1.3418561372279312, "grad_norm": 0.31495189666748047, "learning_rate": 2.951530007031731e-06, "loss": 0.0122, "step": 165840 }, { "epoch": 1.341937049923133, "grad_norm": 0.4421983063220978, "learning_rate": 2.950885910010886e-06, "loss": 0.0237, "step": 165850 }, { "epoch": 1.3420179626183348, "grad_norm": 0.46804362535476685, "learning_rate": 2.950241853855295e-06, "loss": 0.019, "step": 165860 }, { "epoch": 1.3420988753135368, "grad_norm": 0.18935580551624298, "learning_rate": 2.9495978385778023e-06, "loss": 0.0142, "step": 165870 }, { "epoch": 1.3421797880087385, "grad_norm": 0.3966177999973297, "learning_rate": 2.948953864191254e-06, "loss": 0.0169, "step": 165880 }, { "epoch": 1.3422607007039404, "grad_norm": 0.5508571863174438, "learning_rate": 2.9483099307084885e-06, "loss": 0.0332, "step": 165890 }, { "epoch": 1.3423416133991424, "grad_norm": 0.3922637403011322, "learning_rate": 2.9476660381423515e-06, "loss": 0.0196, "step": 165900 }, { "epoch": 1.342422526094344, "grad_norm": 0.33823850750923157, "learning_rate": 2.9470221865056835e-06, "loss": 0.0168, "step": 165910 }, { "epoch": 1.342503438789546, "grad_norm": 0.002841696608811617, "learning_rate": 2.9463783758113228e-06, "loss": 0.0199, "step": 165920 }, { "epoch": 1.342584351484748, "grad_norm": 0.658945620059967, "learning_rate": 2.9457346060721104e-06, "loss": 0.03, "step": 165930 }, { "epoch": 1.34266526417995, "grad_norm": 0.3617921769618988, "learning_rate": 2.945090877300885e-06, "loss": 0.0275, "step": 165940 }, { "epoch": 1.3427461768751516, "grad_norm": 0.37194427847862244, "learning_rate": 2.9444471895104827e-06, "loss": 0.0161, "step": 165950 }, { "epoch": 1.3428270895703536, "grad_norm": 0.28538474440574646, "learning_rate": 2.9438035427137413e-06, "loss": 0.0214, "step": 165960 }, { "epoch": 1.3429080022655555, "grad_norm": 0.13519197702407837, "learning_rate": 2.943159936923499e-06, "loss": 0.024, "step": 165970 }, { "epoch": 1.3429889149607575, "grad_norm": 0.24148516356945038, "learning_rate": 2.9425163721525895e-06, "loss": 0.0417, "step": 165980 }, { "epoch": 1.3430698276559592, "grad_norm": 0.3480282425880432, "learning_rate": 2.9418728484138467e-06, "loss": 0.029, "step": 165990 }, { "epoch": 1.343150740351161, "grad_norm": 0.407889187335968, "learning_rate": 2.941229365720105e-06, "loss": 0.018, "step": 166000 }, { "epoch": 1.343231653046363, "grad_norm": 0.18316452205181122, "learning_rate": 2.9405859240841976e-06, "loss": 0.0283, "step": 166010 }, { "epoch": 1.3433125657415648, "grad_norm": 0.4278623163700104, "learning_rate": 2.9399425235189565e-06, "loss": 0.0189, "step": 166020 }, { "epoch": 1.3433934784367667, "grad_norm": 0.5467223525047302, "learning_rate": 2.939299164037212e-06, "loss": 0.0268, "step": 166030 }, { "epoch": 1.3434743911319686, "grad_norm": 0.4578455984592438, "learning_rate": 2.938655845651797e-06, "loss": 0.0184, "step": 166040 }, { "epoch": 1.3435553038271704, "grad_norm": 0.3440694808959961, "learning_rate": 2.9380125683755388e-06, "loss": 0.0213, "step": 166050 }, { "epoch": 1.3436362165223723, "grad_norm": 0.30986398458480835, "learning_rate": 2.9373693322212655e-06, "loss": 0.0167, "step": 166060 }, { "epoch": 1.3437171292175742, "grad_norm": 0.3039703667163849, "learning_rate": 2.9367261372018086e-06, "loss": 0.0177, "step": 166070 }, { "epoch": 1.3437980419127762, "grad_norm": 0.4231272339820862, "learning_rate": 2.936082983329991e-06, "loss": 0.0199, "step": 166080 }, { "epoch": 1.3438789546079781, "grad_norm": 0.45379969477653503, "learning_rate": 2.9354398706186427e-06, "loss": 0.0271, "step": 166090 }, { "epoch": 1.3439598673031798, "grad_norm": 0.3707025349140167, "learning_rate": 2.934796799080588e-06, "loss": 0.0224, "step": 166100 }, { "epoch": 1.3440407799983818, "grad_norm": 0.35629022121429443, "learning_rate": 2.9341537687286497e-06, "loss": 0.0341, "step": 166110 }, { "epoch": 1.3441216926935837, "grad_norm": 0.3909967243671417, "learning_rate": 2.9335107795756544e-06, "loss": 0.0151, "step": 166120 }, { "epoch": 1.3442026053887854, "grad_norm": 0.32489389181137085, "learning_rate": 2.932867831634423e-06, "loss": 0.0122, "step": 166130 }, { "epoch": 1.3442835180839874, "grad_norm": 0.23849061131477356, "learning_rate": 2.9322249249177804e-06, "loss": 0.0244, "step": 166140 }, { "epoch": 1.3443644307791893, "grad_norm": 0.25962385535240173, "learning_rate": 2.931582059438545e-06, "loss": 0.0132, "step": 166150 }, { "epoch": 1.344445343474391, "grad_norm": 0.489444762468338, "learning_rate": 2.930939235209539e-06, "loss": 0.0267, "step": 166160 }, { "epoch": 1.344526256169593, "grad_norm": 0.3255743682384491, "learning_rate": 2.9302964522435838e-06, "loss": 0.0253, "step": 166170 }, { "epoch": 1.344607168864795, "grad_norm": 0.29674801230430603, "learning_rate": 2.9296537105534938e-06, "loss": 0.017, "step": 166180 }, { "epoch": 1.3446880815599966, "grad_norm": 0.32671260833740234, "learning_rate": 2.9290110101520913e-06, "loss": 0.0313, "step": 166190 }, { "epoch": 1.3447689942551986, "grad_norm": 0.26325780153274536, "learning_rate": 2.928368351052193e-06, "loss": 0.0157, "step": 166200 }, { "epoch": 1.3448499069504005, "grad_norm": 0.17230689525604248, "learning_rate": 2.927725733266613e-06, "loss": 0.0188, "step": 166210 }, { "epoch": 1.3449308196456025, "grad_norm": 0.1122075766324997, "learning_rate": 2.9270831568081683e-06, "loss": 0.0162, "step": 166220 }, { "epoch": 1.3450117323408044, "grad_norm": 0.57609623670578, "learning_rate": 2.9264406216896757e-06, "loss": 0.0219, "step": 166230 }, { "epoch": 1.3450926450360061, "grad_norm": 0.06513715535402298, "learning_rate": 2.925798127923946e-06, "loss": 0.0178, "step": 166240 }, { "epoch": 1.345173557731208, "grad_norm": 0.4989725351333618, "learning_rate": 2.925155675523793e-06, "loss": 0.0222, "step": 166250 }, { "epoch": 1.34525447042641, "grad_norm": 0.35273393988609314, "learning_rate": 2.9245132645020326e-06, "loss": 0.0223, "step": 166260 }, { "epoch": 1.3453353831216117, "grad_norm": 0.3057921826839447, "learning_rate": 2.9238708948714717e-06, "loss": 0.0066, "step": 166270 }, { "epoch": 1.3454162958168137, "grad_norm": 0.29849356412887573, "learning_rate": 2.9232285666449223e-06, "loss": 0.0106, "step": 166280 }, { "epoch": 1.3454972085120156, "grad_norm": 0.14778633415699005, "learning_rate": 2.9225862798351966e-06, "loss": 0.0193, "step": 166290 }, { "epoch": 1.3455781212072173, "grad_norm": 0.27071502804756165, "learning_rate": 2.921944034455101e-06, "loss": 0.0254, "step": 166300 }, { "epoch": 1.3456590339024193, "grad_norm": 0.12294107675552368, "learning_rate": 2.9213018305174443e-06, "loss": 0.0165, "step": 166310 }, { "epoch": 1.3457399465976212, "grad_norm": 0.3649042844772339, "learning_rate": 2.920659668035034e-06, "loss": 0.0101, "step": 166320 }, { "epoch": 1.345820859292823, "grad_norm": 0.31622254848480225, "learning_rate": 2.920017547020678e-06, "loss": 0.0161, "step": 166330 }, { "epoch": 1.3459017719880249, "grad_norm": 0.3822069466114044, "learning_rate": 2.919375467487181e-06, "loss": 0.0241, "step": 166340 }, { "epoch": 1.3459826846832268, "grad_norm": 0.25664228200912476, "learning_rate": 2.9187334294473458e-06, "loss": 0.0249, "step": 166350 }, { "epoch": 1.3460635973784287, "grad_norm": 0.6631972789764404, "learning_rate": 2.9180914329139828e-06, "loss": 0.0229, "step": 166360 }, { "epoch": 1.3461445100736307, "grad_norm": 0.4060465395450592, "learning_rate": 2.9174494778998887e-06, "loss": 0.015, "step": 166370 }, { "epoch": 1.3462254227688324, "grad_norm": 0.7408204078674316, "learning_rate": 2.916807564417866e-06, "loss": 0.0237, "step": 166380 }, { "epoch": 1.3463063354640343, "grad_norm": 0.4653363525867462, "learning_rate": 2.916165692480723e-06, "loss": 0.0213, "step": 166390 }, { "epoch": 1.3463872481592363, "grad_norm": 0.7774494886398315, "learning_rate": 2.9155238621012537e-06, "loss": 0.0209, "step": 166400 }, { "epoch": 1.346468160854438, "grad_norm": 0.33280420303344727, "learning_rate": 2.914882073292259e-06, "loss": 0.0156, "step": 166410 }, { "epoch": 1.34654907354964, "grad_norm": 0.3539029061794281, "learning_rate": 2.914240326066543e-06, "loss": 0.0201, "step": 166420 }, { "epoch": 1.3466299862448419, "grad_norm": 0.6347823143005371, "learning_rate": 2.913598620436897e-06, "loss": 0.02, "step": 166430 }, { "epoch": 1.3467108989400436, "grad_norm": 0.4345138669013977, "learning_rate": 2.912956956416124e-06, "loss": 0.0105, "step": 166440 }, { "epoch": 1.3467918116352455, "grad_norm": 0.3823001980781555, "learning_rate": 2.9123153340170194e-06, "loss": 0.012, "step": 166450 }, { "epoch": 1.3468727243304475, "grad_norm": 0.2769419848918915, "learning_rate": 2.911673753252375e-06, "loss": 0.017, "step": 166460 }, { "epoch": 1.3469536370256492, "grad_norm": 0.4511202275753021, "learning_rate": 2.911032214134991e-06, "loss": 0.0237, "step": 166470 }, { "epoch": 1.3470345497208511, "grad_norm": 0.43942689895629883, "learning_rate": 2.910390716677662e-06, "loss": 0.0257, "step": 166480 }, { "epoch": 1.347115462416053, "grad_norm": 0.21609552204608917, "learning_rate": 2.9097492608931745e-06, "loss": 0.0199, "step": 166490 }, { "epoch": 1.347196375111255, "grad_norm": 0.34537675976753235, "learning_rate": 2.909107846794327e-06, "loss": 0.0208, "step": 166500 }, { "epoch": 1.347277287806457, "grad_norm": 0.6282265782356262, "learning_rate": 2.9084664743939088e-06, "loss": 0.0159, "step": 166510 }, { "epoch": 1.3473582005016587, "grad_norm": 0.21582399308681488, "learning_rate": 2.907825143704712e-06, "loss": 0.0194, "step": 166520 }, { "epoch": 1.3474391131968606, "grad_norm": 0.49894678592681885, "learning_rate": 2.907183854739526e-06, "loss": 0.0145, "step": 166530 }, { "epoch": 1.3475200258920625, "grad_norm": 0.45964527130126953, "learning_rate": 2.906542607511139e-06, "loss": 0.022, "step": 166540 }, { "epoch": 1.3476009385872643, "grad_norm": 0.485746294260025, "learning_rate": 2.9059014020323416e-06, "loss": 0.0181, "step": 166550 }, { "epoch": 1.3476818512824662, "grad_norm": 0.21530941128730774, "learning_rate": 2.905260238315919e-06, "loss": 0.0177, "step": 166560 }, { "epoch": 1.3477627639776681, "grad_norm": 0.33406782150268555, "learning_rate": 2.9046191163746575e-06, "loss": 0.0176, "step": 166570 }, { "epoch": 1.3478436766728699, "grad_norm": 0.5374637842178345, "learning_rate": 2.9039780362213475e-06, "loss": 0.0147, "step": 166580 }, { "epoch": 1.3479245893680718, "grad_norm": 0.42689618468284607, "learning_rate": 2.9033369978687687e-06, "loss": 0.0219, "step": 166590 }, { "epoch": 1.3480055020632737, "grad_norm": 0.4798322319984436, "learning_rate": 2.902696001329706e-06, "loss": 0.0202, "step": 166600 }, { "epoch": 1.3480864147584757, "grad_norm": 0.1073802188038826, "learning_rate": 2.9020550466169465e-06, "loss": 0.0281, "step": 166610 }, { "epoch": 1.3481673274536774, "grad_norm": 0.4520964026451111, "learning_rate": 2.9014141337432687e-06, "loss": 0.0239, "step": 166620 }, { "epoch": 1.3482482401488793, "grad_norm": 0.03891579806804657, "learning_rate": 2.900773262721455e-06, "loss": 0.0143, "step": 166630 }, { "epoch": 1.3483291528440813, "grad_norm": 0.5376147031784058, "learning_rate": 2.900132433564289e-06, "loss": 0.0306, "step": 166640 }, { "epoch": 1.3484100655392832, "grad_norm": 0.3244847357273102, "learning_rate": 2.899491646284546e-06, "loss": 0.0169, "step": 166650 }, { "epoch": 1.348490978234485, "grad_norm": 0.382087379693985, "learning_rate": 2.898850900895009e-06, "loss": 0.0187, "step": 166660 }, { "epoch": 1.3485718909296869, "grad_norm": 0.2445778250694275, "learning_rate": 2.8982101974084555e-06, "loss": 0.0131, "step": 166670 }, { "epoch": 1.3486528036248888, "grad_norm": 0.4626883864402771, "learning_rate": 2.897569535837662e-06, "loss": 0.0192, "step": 166680 }, { "epoch": 1.3487337163200905, "grad_norm": 0.34196293354034424, "learning_rate": 2.8969289161954063e-06, "loss": 0.0184, "step": 166690 }, { "epoch": 1.3488146290152925, "grad_norm": 0.0005058886599726975, "learning_rate": 2.896288338494463e-06, "loss": 0.0215, "step": 166700 }, { "epoch": 1.3488955417104944, "grad_norm": 0.24736633896827698, "learning_rate": 2.8956478027476076e-06, "loss": 0.0283, "step": 166710 }, { "epoch": 1.3489764544056961, "grad_norm": 0.3232669234275818, "learning_rate": 2.8950073089676145e-06, "loss": 0.0161, "step": 166720 }, { "epoch": 1.349057367100898, "grad_norm": 0.7838327288627625, "learning_rate": 2.8943668571672574e-06, "loss": 0.0168, "step": 166730 }, { "epoch": 1.3491382797961, "grad_norm": 0.3400149941444397, "learning_rate": 2.8937264473593074e-06, "loss": 0.0195, "step": 166740 }, { "epoch": 1.349219192491302, "grad_norm": 0.3594937324523926, "learning_rate": 2.8930860795565374e-06, "loss": 0.0295, "step": 166750 }, { "epoch": 1.349300105186504, "grad_norm": 0.44618740677833557, "learning_rate": 2.892445753771717e-06, "loss": 0.0286, "step": 166760 }, { "epoch": 1.3493810178817056, "grad_norm": 0.25911223888397217, "learning_rate": 2.891805470017617e-06, "loss": 0.0209, "step": 166770 }, { "epoch": 1.3494619305769076, "grad_norm": 0.2943708598613739, "learning_rate": 2.8911652283070067e-06, "loss": 0.0196, "step": 166780 }, { "epoch": 1.3495428432721095, "grad_norm": 0.4698578417301178, "learning_rate": 2.890525028652652e-06, "loss": 0.0359, "step": 166790 }, { "epoch": 1.3496237559673112, "grad_norm": 0.622047483921051, "learning_rate": 2.8898848710673255e-06, "loss": 0.0254, "step": 166800 }, { "epoch": 1.3497046686625132, "grad_norm": 0.3029865324497223, "learning_rate": 2.8892447555637894e-06, "loss": 0.0297, "step": 166810 }, { "epoch": 1.349785581357715, "grad_norm": 0.5002264976501465, "learning_rate": 2.8886046821548084e-06, "loss": 0.0171, "step": 166820 }, { "epoch": 1.3498664940529168, "grad_norm": 0.09568587690591812, "learning_rate": 2.887964650853154e-06, "loss": 0.0139, "step": 166830 }, { "epoch": 1.3499474067481188, "grad_norm": 0.25688374042510986, "learning_rate": 2.887324661671581e-06, "loss": 0.0253, "step": 166840 }, { "epoch": 1.3500283194433207, "grad_norm": 0.48728907108306885, "learning_rate": 2.8866847146228607e-06, "loss": 0.0183, "step": 166850 }, { "epoch": 1.3501092321385224, "grad_norm": 0.37792831659317017, "learning_rate": 2.8860448097197512e-06, "loss": 0.0157, "step": 166860 }, { "epoch": 1.3501901448337243, "grad_norm": 0.16125626862049103, "learning_rate": 2.885404946975016e-06, "loss": 0.0149, "step": 166870 }, { "epoch": 1.3502710575289263, "grad_norm": 0.423980712890625, "learning_rate": 2.8847651264014155e-06, "loss": 0.0203, "step": 166880 }, { "epoch": 1.3503519702241282, "grad_norm": 0.3684252202510834, "learning_rate": 2.884125348011708e-06, "loss": 0.0151, "step": 166890 }, { "epoch": 1.3504328829193302, "grad_norm": 0.5528481006622314, "learning_rate": 2.883485611818654e-06, "loss": 0.0221, "step": 166900 }, { "epoch": 1.3505137956145319, "grad_norm": 0.14248645305633545, "learning_rate": 2.8828459178350116e-06, "loss": 0.0142, "step": 166910 }, { "epoch": 1.3505947083097338, "grad_norm": 0.35638830065727234, "learning_rate": 2.882206266073537e-06, "loss": 0.0213, "step": 166920 }, { "epoch": 1.3506756210049358, "grad_norm": 0.20853234827518463, "learning_rate": 2.881566656546989e-06, "loss": 0.0137, "step": 166930 }, { "epoch": 1.3507565337001375, "grad_norm": 0.66376131772995, "learning_rate": 2.880927089268121e-06, "loss": 0.0224, "step": 166940 }, { "epoch": 1.3508374463953394, "grad_norm": 0.45553621649742126, "learning_rate": 2.8802875642496888e-06, "loss": 0.0174, "step": 166950 }, { "epoch": 1.3509183590905414, "grad_norm": 0.24150770902633667, "learning_rate": 2.879648081504446e-06, "loss": 0.0136, "step": 166960 }, { "epoch": 1.350999271785743, "grad_norm": 0.4148920774459839, "learning_rate": 2.8790086410451458e-06, "loss": 0.0157, "step": 166970 }, { "epoch": 1.351080184480945, "grad_norm": 0.6149539947509766, "learning_rate": 2.878369242884539e-06, "loss": 0.0223, "step": 166980 }, { "epoch": 1.351161097176147, "grad_norm": 0.4784074127674103, "learning_rate": 2.8777298870353828e-06, "loss": 0.0255, "step": 166990 }, { "epoch": 1.3512420098713487, "grad_norm": 0.31458407640457153, "learning_rate": 2.8770905735104216e-06, "loss": 0.012, "step": 167000 }, { "epoch": 1.3513229225665506, "grad_norm": 0.3691856265068054, "learning_rate": 2.876451302322405e-06, "loss": 0.0156, "step": 167010 }, { "epoch": 1.3514038352617526, "grad_norm": 0.17540182173252106, "learning_rate": 2.875812073484088e-06, "loss": 0.0274, "step": 167020 }, { "epoch": 1.3514847479569545, "grad_norm": 0.3433423340320587, "learning_rate": 2.875172887008213e-06, "loss": 0.0177, "step": 167030 }, { "epoch": 1.3515656606521564, "grad_norm": 0.2996974587440491, "learning_rate": 2.874533742907527e-06, "loss": 0.0294, "step": 167040 }, { "epoch": 1.3516465733473582, "grad_norm": 0.06262526661157608, "learning_rate": 2.873894641194781e-06, "loss": 0.0188, "step": 167050 }, { "epoch": 1.35172748604256, "grad_norm": 0.7433143258094788, "learning_rate": 2.8732555818827174e-06, "loss": 0.0234, "step": 167060 }, { "epoch": 1.351808398737762, "grad_norm": 0.3705054521560669, "learning_rate": 2.8726165649840822e-06, "loss": 0.0122, "step": 167070 }, { "epoch": 1.3518893114329638, "grad_norm": 0.3661161959171295, "learning_rate": 2.8719775905116177e-06, "loss": 0.0205, "step": 167080 }, { "epoch": 1.3519702241281657, "grad_norm": 0.2564118206501007, "learning_rate": 2.8713386584780677e-06, "loss": 0.0202, "step": 167090 }, { "epoch": 1.3520511368233676, "grad_norm": 0.3927602469921112, "learning_rate": 2.8706997688961747e-06, "loss": 0.0328, "step": 167100 }, { "epoch": 1.3521320495185694, "grad_norm": 0.3856906592845917, "learning_rate": 2.8700609217786794e-06, "loss": 0.0166, "step": 167110 }, { "epoch": 1.3522129622137713, "grad_norm": 0.6283966898918152, "learning_rate": 2.869422117138323e-06, "loss": 0.0319, "step": 167120 }, { "epoch": 1.3522938749089732, "grad_norm": 0.30411040782928467, "learning_rate": 2.868783354987844e-06, "loss": 0.0212, "step": 167130 }, { "epoch": 1.3523747876041752, "grad_norm": 0.4687272310256958, "learning_rate": 2.868144635339982e-06, "loss": 0.0157, "step": 167140 }, { "epoch": 1.352455700299377, "grad_norm": 0.35513627529144287, "learning_rate": 2.867505958207475e-06, "loss": 0.0221, "step": 167150 }, { "epoch": 1.3525366129945788, "grad_norm": 0.2701861560344696, "learning_rate": 2.8668673236030597e-06, "loss": 0.0203, "step": 167160 }, { "epoch": 1.3526175256897808, "grad_norm": 0.4873643219470978, "learning_rate": 2.8662287315394722e-06, "loss": 0.0272, "step": 167170 }, { "epoch": 1.3526984383849827, "grad_norm": 0.37026846408843994, "learning_rate": 2.865590182029448e-06, "loss": 0.0152, "step": 167180 }, { "epoch": 1.3527793510801844, "grad_norm": 0.5557742714881897, "learning_rate": 2.864951675085722e-06, "loss": 0.0304, "step": 167190 }, { "epoch": 1.3528602637753864, "grad_norm": 0.4679136872291565, "learning_rate": 2.8643132107210255e-06, "loss": 0.0307, "step": 167200 }, { "epoch": 1.3529411764705883, "grad_norm": 0.3559033274650574, "learning_rate": 2.863674788948097e-06, "loss": 0.0175, "step": 167210 }, { "epoch": 1.35302208916579, "grad_norm": 0.40598347783088684, "learning_rate": 2.8630364097796627e-06, "loss": 0.0329, "step": 167220 }, { "epoch": 1.353103001860992, "grad_norm": 0.4757242202758789, "learning_rate": 2.8623980732284545e-06, "loss": 0.0215, "step": 167230 }, { "epoch": 1.353183914556194, "grad_norm": 0.21407224237918854, "learning_rate": 2.861759779307206e-06, "loss": 0.0148, "step": 167240 }, { "epoch": 1.3532648272513956, "grad_norm": 0.31034258008003235, "learning_rate": 2.8611215280286464e-06, "loss": 0.0313, "step": 167250 }, { "epoch": 1.3533457399465976, "grad_norm": 0.03763865306973457, "learning_rate": 2.860483319405498e-06, "loss": 0.0192, "step": 167260 }, { "epoch": 1.3534266526417995, "grad_norm": 0.08121876418590546, "learning_rate": 2.859845153450496e-06, "loss": 0.0285, "step": 167270 }, { "epoch": 1.3535075653370015, "grad_norm": 0.3659438192844391, "learning_rate": 2.859207030176364e-06, "loss": 0.019, "step": 167280 }, { "epoch": 1.3535884780322034, "grad_norm": 0.4702783226966858, "learning_rate": 2.858568949595829e-06, "loss": 0.0238, "step": 167290 }, { "epoch": 1.353669390727405, "grad_norm": 0.4056608974933624, "learning_rate": 2.8579309117216147e-06, "loss": 0.0173, "step": 167300 }, { "epoch": 1.353750303422607, "grad_norm": 0.3912610709667206, "learning_rate": 2.8572929165664464e-06, "loss": 0.0185, "step": 167310 }, { "epoch": 1.353831216117809, "grad_norm": 0.4705992043018341, "learning_rate": 2.8566549641430476e-06, "loss": 0.0273, "step": 167320 }, { "epoch": 1.3539121288130107, "grad_norm": 0.22151720523834229, "learning_rate": 2.8560170544641407e-06, "loss": 0.0156, "step": 167330 }, { "epoch": 1.3539930415082126, "grad_norm": 0.41981494426727295, "learning_rate": 2.8553791875424473e-06, "loss": 0.0244, "step": 167340 }, { "epoch": 1.3540739542034146, "grad_norm": 0.3246605694293976, "learning_rate": 2.854741363390689e-06, "loss": 0.0284, "step": 167350 }, { "epoch": 1.3541548668986163, "grad_norm": 0.7924293875694275, "learning_rate": 2.854103582021585e-06, "loss": 0.026, "step": 167360 }, { "epoch": 1.3542357795938182, "grad_norm": 0.5410882234573364, "learning_rate": 2.8534658434478544e-06, "loss": 0.0347, "step": 167370 }, { "epoch": 1.3543166922890202, "grad_norm": 0.35330405831336975, "learning_rate": 2.8528281476822174e-06, "loss": 0.0257, "step": 167380 }, { "epoch": 1.354397604984222, "grad_norm": 0.01720483973622322, "learning_rate": 2.8521904947373867e-06, "loss": 0.0122, "step": 167390 }, { "epoch": 1.3544785176794238, "grad_norm": 0.3328589200973511, "learning_rate": 2.8515528846260854e-06, "loss": 0.0225, "step": 167400 }, { "epoch": 1.3545594303746258, "grad_norm": 0.10384611040353775, "learning_rate": 2.850915317361027e-06, "loss": 0.016, "step": 167410 }, { "epoch": 1.3546403430698277, "grad_norm": 0.44573715329170227, "learning_rate": 2.850277792954923e-06, "loss": 0.0152, "step": 167420 }, { "epoch": 1.3547212557650297, "grad_norm": 0.2928745746612549, "learning_rate": 2.8496403114204917e-06, "loss": 0.0204, "step": 167430 }, { "epoch": 1.3548021684602314, "grad_norm": 0.3486948013305664, "learning_rate": 2.8490028727704468e-06, "loss": 0.0239, "step": 167440 }, { "epoch": 1.3548830811554333, "grad_norm": 0.011294669471681118, "learning_rate": 2.848365477017494e-06, "loss": 0.0197, "step": 167450 }, { "epoch": 1.3549639938506353, "grad_norm": 0.35218679904937744, "learning_rate": 2.8477281241743522e-06, "loss": 0.0225, "step": 167460 }, { "epoch": 1.355044906545837, "grad_norm": 0.38759875297546387, "learning_rate": 2.847090814253729e-06, "loss": 0.0254, "step": 167470 }, { "epoch": 1.355125819241039, "grad_norm": 0.605633020401001, "learning_rate": 2.846453547268335e-06, "loss": 0.0346, "step": 167480 }, { "epoch": 1.3552067319362409, "grad_norm": 0.519066572189331, "learning_rate": 2.845816323230878e-06, "loss": 0.0138, "step": 167490 }, { "epoch": 1.3552876446314426, "grad_norm": 0.2262984961271286, "learning_rate": 2.845179142154067e-06, "loss": 0.0287, "step": 167500 }, { "epoch": 1.3553685573266445, "grad_norm": 0.6398515105247498, "learning_rate": 2.844542004050609e-06, "loss": 0.0177, "step": 167510 }, { "epoch": 1.3554494700218465, "grad_norm": 0.2657759487628937, "learning_rate": 2.8439049089332105e-06, "loss": 0.0168, "step": 167520 }, { "epoch": 1.3555303827170482, "grad_norm": 0.4306158423423767, "learning_rate": 2.8432678568145766e-06, "loss": 0.0202, "step": 167530 }, { "epoch": 1.3556112954122501, "grad_norm": 0.49313923716545105, "learning_rate": 2.8426308477074126e-06, "loss": 0.0192, "step": 167540 }, { "epoch": 1.355692208107452, "grad_norm": 0.6441653370857239, "learning_rate": 2.8419938816244215e-06, "loss": 0.0278, "step": 167550 }, { "epoch": 1.355773120802654, "grad_norm": 0.4476068317890167, "learning_rate": 2.841356958578307e-06, "loss": 0.0193, "step": 167560 }, { "epoch": 1.355854033497856, "grad_norm": 0.42759251594543457, "learning_rate": 2.84072007858177e-06, "loss": 0.0197, "step": 167570 }, { "epoch": 1.3559349461930577, "grad_norm": 0.18579821288585663, "learning_rate": 2.8400832416475137e-06, "loss": 0.0225, "step": 167580 }, { "epoch": 1.3560158588882596, "grad_norm": 0.42530590295791626, "learning_rate": 2.8394464477882356e-06, "loss": 0.0339, "step": 167590 }, { "epoch": 1.3560967715834615, "grad_norm": 0.11859145015478134, "learning_rate": 2.83880969701664e-06, "loss": 0.0166, "step": 167600 }, { "epoch": 1.3561776842786633, "grad_norm": 0.31073763966560364, "learning_rate": 2.838172989345419e-06, "loss": 0.0229, "step": 167610 }, { "epoch": 1.3562585969738652, "grad_norm": 0.5484644770622253, "learning_rate": 2.8375363247872756e-06, "loss": 0.0208, "step": 167620 }, { "epoch": 1.3563395096690671, "grad_norm": 0.2869758903980255, "learning_rate": 2.8368997033549074e-06, "loss": 0.0211, "step": 167630 }, { "epoch": 1.3564204223642689, "grad_norm": 0.49861031770706177, "learning_rate": 2.836263125061004e-06, "loss": 0.0271, "step": 167640 }, { "epoch": 1.3565013350594708, "grad_norm": 0.5160744786262512, "learning_rate": 2.8356265899182665e-06, "loss": 0.0228, "step": 167650 }, { "epoch": 1.3565822477546727, "grad_norm": 0.16645285487174988, "learning_rate": 2.8349900979393895e-06, "loss": 0.007, "step": 167660 }, { "epoch": 1.3566631604498745, "grad_norm": 0.5278163552284241, "learning_rate": 2.8343536491370603e-06, "loss": 0.0203, "step": 167670 }, { "epoch": 1.3567440731450764, "grad_norm": 1.7342907190322876, "learning_rate": 2.833717243523978e-06, "loss": 0.024, "step": 167680 }, { "epoch": 1.3568249858402783, "grad_norm": 0.5243797302246094, "learning_rate": 2.833080881112832e-06, "loss": 0.0244, "step": 167690 }, { "epoch": 1.3569058985354803, "grad_norm": 0.5453090667724609, "learning_rate": 2.8324445619163134e-06, "loss": 0.0178, "step": 167700 }, { "epoch": 1.3569868112306822, "grad_norm": 0.5089791417121887, "learning_rate": 2.831808285947112e-06, "loss": 0.0135, "step": 167710 }, { "epoch": 1.357067723925884, "grad_norm": 0.624235987663269, "learning_rate": 2.8311720532179173e-06, "loss": 0.0259, "step": 167720 }, { "epoch": 1.3571486366210859, "grad_norm": 0.21923698484897614, "learning_rate": 2.8305358637414173e-06, "loss": 0.0159, "step": 167730 }, { "epoch": 1.3572295493162878, "grad_norm": 0.32887929677963257, "learning_rate": 2.829899717530299e-06, "loss": 0.0336, "step": 167740 }, { "epoch": 1.3573104620114895, "grad_norm": 0.4255634844303131, "learning_rate": 2.82926361459725e-06, "loss": 0.0177, "step": 167750 }, { "epoch": 1.3573913747066915, "grad_norm": 0.5015585422515869, "learning_rate": 2.8286275549549557e-06, "loss": 0.014, "step": 167760 }, { "epoch": 1.3574722874018934, "grad_norm": 0.3577883839607239, "learning_rate": 2.8279915386161007e-06, "loss": 0.0119, "step": 167770 }, { "epoch": 1.3575532000970951, "grad_norm": 0.3029015064239502, "learning_rate": 2.8273555655933672e-06, "loss": 0.0215, "step": 167780 }, { "epoch": 1.357634112792297, "grad_norm": 0.581081211566925, "learning_rate": 2.826719635899444e-06, "loss": 0.0147, "step": 167790 }, { "epoch": 1.357715025487499, "grad_norm": 0.6168042421340942, "learning_rate": 2.8260837495470077e-06, "loss": 0.028, "step": 167800 }, { "epoch": 1.357795938182701, "grad_norm": 0.0020923875272274017, "learning_rate": 2.82544790654874e-06, "loss": 0.018, "step": 167810 }, { "epoch": 1.3578768508779027, "grad_norm": 0.28418609499931335, "learning_rate": 2.8248121069173275e-06, "loss": 0.0121, "step": 167820 }, { "epoch": 1.3579577635731046, "grad_norm": 0.540001392364502, "learning_rate": 2.824176350665441e-06, "loss": 0.0285, "step": 167830 }, { "epoch": 1.3580386762683065, "grad_norm": 0.5896385908126831, "learning_rate": 2.823540637805765e-06, "loss": 0.0163, "step": 167840 }, { "epoch": 1.3581195889635085, "grad_norm": 0.3250649869441986, "learning_rate": 2.822904968350979e-06, "loss": 0.0241, "step": 167850 }, { "epoch": 1.3582005016587102, "grad_norm": 0.5509213209152222, "learning_rate": 2.8222693423137537e-06, "loss": 0.0184, "step": 167860 }, { "epoch": 1.3582814143539121, "grad_norm": 0.36883777379989624, "learning_rate": 2.821633759706771e-06, "loss": 0.0143, "step": 167870 }, { "epoch": 1.358362327049114, "grad_norm": 0.5952306985855103, "learning_rate": 2.8209982205427034e-06, "loss": 0.0284, "step": 167880 }, { "epoch": 1.3584432397443158, "grad_norm": 0.007077871356159449, "learning_rate": 2.820362724834227e-06, "loss": 0.0172, "step": 167890 }, { "epoch": 1.3585241524395177, "grad_norm": 0.2330719232559204, "learning_rate": 2.8197272725940146e-06, "loss": 0.0147, "step": 167900 }, { "epoch": 1.3586050651347197, "grad_norm": 0.3198467195034027, "learning_rate": 2.8190918638347387e-06, "loss": 0.0239, "step": 167910 }, { "epoch": 1.3586859778299214, "grad_norm": 0.481580913066864, "learning_rate": 2.818456498569071e-06, "loss": 0.0283, "step": 167920 }, { "epoch": 1.3587668905251233, "grad_norm": 0.5126122236251831, "learning_rate": 2.817821176809684e-06, "loss": 0.0164, "step": 167930 }, { "epoch": 1.3588478032203253, "grad_norm": 0.33118557929992676, "learning_rate": 2.8171858985692445e-06, "loss": 0.0212, "step": 167940 }, { "epoch": 1.3589287159155272, "grad_norm": 0.48645055294036865, "learning_rate": 2.816550663860428e-06, "loss": 0.0242, "step": 167950 }, { "epoch": 1.3590096286107292, "grad_norm": 0.30198949575424194, "learning_rate": 2.815915472695897e-06, "loss": 0.0166, "step": 167960 }, { "epoch": 1.3590905413059309, "grad_norm": 0.7064507603645325, "learning_rate": 2.815280325088319e-06, "loss": 0.0204, "step": 167970 }, { "epoch": 1.3591714540011328, "grad_norm": 0.5911794900894165, "learning_rate": 2.8146452210503673e-06, "loss": 0.0162, "step": 167980 }, { "epoch": 1.3592523666963348, "grad_norm": 0.7394247055053711, "learning_rate": 2.8140101605947002e-06, "loss": 0.0256, "step": 167990 }, { "epoch": 1.3593332793915365, "grad_norm": 0.19550751149654388, "learning_rate": 2.813375143733984e-06, "loss": 0.0148, "step": 168000 }, { "epoch": 1.3594141920867384, "grad_norm": 0.18701894581317902, "learning_rate": 2.8127401704808886e-06, "loss": 0.0156, "step": 168010 }, { "epoch": 1.3594951047819404, "grad_norm": 0.3938525319099426, "learning_rate": 2.8121052408480694e-06, "loss": 0.0298, "step": 168020 }, { "epoch": 1.359576017477142, "grad_norm": 0.5291107892990112, "learning_rate": 2.8114703548481937e-06, "loss": 0.0189, "step": 168030 }, { "epoch": 1.359656930172344, "grad_norm": 0.513148307800293, "learning_rate": 2.8108355124939235e-06, "loss": 0.0158, "step": 168040 }, { "epoch": 1.359737842867546, "grad_norm": 0.26293742656707764, "learning_rate": 2.810200713797914e-06, "loss": 0.0245, "step": 168050 }, { "epoch": 1.3598187555627477, "grad_norm": 0.5101709365844727, "learning_rate": 2.80956595877283e-06, "loss": 0.0134, "step": 168060 }, { "epoch": 1.3598996682579496, "grad_norm": 0.3031063675880432, "learning_rate": 2.8089312474313315e-06, "loss": 0.0227, "step": 168070 }, { "epoch": 1.3599805809531516, "grad_norm": 0.23842008411884308, "learning_rate": 2.8082965797860684e-06, "loss": 0.0181, "step": 168080 }, { "epoch": 1.3600614936483535, "grad_norm": 0.2534187436103821, "learning_rate": 2.8076619558497066e-06, "loss": 0.0226, "step": 168090 }, { "epoch": 1.3601424063435554, "grad_norm": 0.704199492931366, "learning_rate": 2.8070273756348976e-06, "loss": 0.0309, "step": 168100 }, { "epoch": 1.3602233190387572, "grad_norm": 0.31294095516204834, "learning_rate": 2.8063928391542984e-06, "loss": 0.0241, "step": 168110 }, { "epoch": 1.360304231733959, "grad_norm": 0.7064655423164368, "learning_rate": 2.8057583464205635e-06, "loss": 0.0205, "step": 168120 }, { "epoch": 1.360385144429161, "grad_norm": 0.4455793797969818, "learning_rate": 2.805123897446346e-06, "loss": 0.0233, "step": 168130 }, { "epoch": 1.3604660571243627, "grad_norm": 0.21552255749702454, "learning_rate": 2.8044894922442983e-06, "loss": 0.0165, "step": 168140 }, { "epoch": 1.3605469698195647, "grad_norm": 0.2050207555294037, "learning_rate": 2.8038551308270735e-06, "loss": 0.0241, "step": 168150 }, { "epoch": 1.3606278825147666, "grad_norm": 0.43331581354141235, "learning_rate": 2.803220813207319e-06, "loss": 0.0306, "step": 168160 }, { "epoch": 1.3607087952099683, "grad_norm": 0.5944920778274536, "learning_rate": 2.8025865393976926e-06, "loss": 0.0208, "step": 168170 }, { "epoch": 1.3607897079051703, "grad_norm": 0.4934467077255249, "learning_rate": 2.8019523094108357e-06, "loss": 0.0248, "step": 168180 }, { "epoch": 1.3608706206003722, "grad_norm": 0.594916582107544, "learning_rate": 2.8013181232593987e-06, "loss": 0.0154, "step": 168190 }, { "epoch": 1.360951533295574, "grad_norm": 0.1305658221244812, "learning_rate": 2.800683980956034e-06, "loss": 0.024, "step": 168200 }, { "epoch": 1.3610324459907759, "grad_norm": 0.38303300738334656, "learning_rate": 2.8000498825133814e-06, "loss": 0.021, "step": 168210 }, { "epoch": 1.3611133586859778, "grad_norm": 0.16983182728290558, "learning_rate": 2.7994158279440887e-06, "loss": 0.0105, "step": 168220 }, { "epoch": 1.3611942713811798, "grad_norm": 0.25790637731552124, "learning_rate": 2.798781817260805e-06, "loss": 0.0129, "step": 168230 }, { "epoch": 1.3612751840763817, "grad_norm": 0.33417290449142456, "learning_rate": 2.7981478504761673e-06, "loss": 0.0305, "step": 168240 }, { "epoch": 1.3613560967715834, "grad_norm": 0.40071508288383484, "learning_rate": 2.7975139276028246e-06, "loss": 0.0177, "step": 168250 }, { "epoch": 1.3614370094667854, "grad_norm": 0.344051718711853, "learning_rate": 2.7968800486534185e-06, "loss": 0.0296, "step": 168260 }, { "epoch": 1.3615179221619873, "grad_norm": 0.3502531051635742, "learning_rate": 2.7962462136405854e-06, "loss": 0.017, "step": 168270 }, { "epoch": 1.361598834857189, "grad_norm": 0.02377196028828621, "learning_rate": 2.795612422576971e-06, "loss": 0.0135, "step": 168280 }, { "epoch": 1.361679747552391, "grad_norm": 0.5262479782104492, "learning_rate": 2.7949786754752145e-06, "loss": 0.026, "step": 168290 }, { "epoch": 1.361760660247593, "grad_norm": 0.20316919684410095, "learning_rate": 2.79434497234795e-06, "loss": 0.0197, "step": 168300 }, { "epoch": 1.3618415729427946, "grad_norm": 0.578305721282959, "learning_rate": 2.7937113132078207e-06, "loss": 0.0192, "step": 168310 }, { "epoch": 1.3619224856379966, "grad_norm": 0.24224810302257538, "learning_rate": 2.7930776980674613e-06, "loss": 0.0223, "step": 168320 }, { "epoch": 1.3620033983331985, "grad_norm": 0.3068462312221527, "learning_rate": 2.792444126939508e-06, "loss": 0.026, "step": 168330 }, { "epoch": 1.3620843110284002, "grad_norm": 0.5720532536506653, "learning_rate": 2.7918105998365963e-06, "loss": 0.0126, "step": 168340 }, { "epoch": 1.3621652237236022, "grad_norm": 0.8852776885032654, "learning_rate": 2.7911771167713608e-06, "loss": 0.0296, "step": 168350 }, { "epoch": 1.362246136418804, "grad_norm": 0.18777918815612793, "learning_rate": 2.790543677756435e-06, "loss": 0.0146, "step": 168360 }, { "epoch": 1.362327049114006, "grad_norm": 0.4427133798599243, "learning_rate": 2.7899102828044505e-06, "loss": 0.0211, "step": 168370 }, { "epoch": 1.362407961809208, "grad_norm": 0.2642051875591278, "learning_rate": 2.7892769319280377e-06, "loss": 0.0158, "step": 168380 }, { "epoch": 1.3624888745044097, "grad_norm": 0.4181737005710602, "learning_rate": 2.788643625139834e-06, "loss": 0.0268, "step": 168390 }, { "epoch": 1.3625697871996116, "grad_norm": 0.4459789991378784, "learning_rate": 2.7880103624524625e-06, "loss": 0.0171, "step": 168400 }, { "epoch": 1.3626506998948136, "grad_norm": 0.38876232504844666, "learning_rate": 2.7873771438785523e-06, "loss": 0.0239, "step": 168410 }, { "epoch": 1.3627316125900153, "grad_norm": 0.8217918276786804, "learning_rate": 2.7867439694307384e-06, "loss": 0.0254, "step": 168420 }, { "epoch": 1.3628125252852172, "grad_norm": 0.31398439407348633, "learning_rate": 2.7861108391216398e-06, "loss": 0.0188, "step": 168430 }, { "epoch": 1.3628934379804192, "grad_norm": 0.06560388207435608, "learning_rate": 2.785477752963889e-06, "loss": 0.011, "step": 168440 }, { "epoch": 1.362974350675621, "grad_norm": 0.20800454914569855, "learning_rate": 2.7848447109701104e-06, "loss": 0.0077, "step": 168450 }, { "epoch": 1.3630552633708228, "grad_norm": 0.4501728415489197, "learning_rate": 2.7842117131529237e-06, "loss": 0.027, "step": 168460 }, { "epoch": 1.3631361760660248, "grad_norm": 0.37634652853012085, "learning_rate": 2.7835787595249597e-06, "loss": 0.0226, "step": 168470 }, { "epoch": 1.3632170887612267, "grad_norm": 0.679504930973053, "learning_rate": 2.782945850098837e-06, "loss": 0.0279, "step": 168480 }, { "epoch": 1.3632980014564284, "grad_norm": 0.5683857202529907, "learning_rate": 2.78231298488718e-06, "loss": 0.0312, "step": 168490 }, { "epoch": 1.3633789141516304, "grad_norm": 0.5548268556594849, "learning_rate": 2.781680163902608e-06, "loss": 0.0239, "step": 168500 }, { "epoch": 1.3634598268468323, "grad_norm": 0.221005380153656, "learning_rate": 2.7810473871577427e-06, "loss": 0.0183, "step": 168510 }, { "epoch": 1.3635407395420343, "grad_norm": 0.5003829002380371, "learning_rate": 2.780414654665202e-06, "loss": 0.0183, "step": 168520 }, { "epoch": 1.363621652237236, "grad_norm": 0.19608955085277557, "learning_rate": 2.7797819664376057e-06, "loss": 0.0129, "step": 168530 }, { "epoch": 1.363702564932438, "grad_norm": 0.5369895100593567, "learning_rate": 2.779149322487571e-06, "loss": 0.0274, "step": 168540 }, { "epoch": 1.3637834776276399, "grad_norm": 0.2869773209095001, "learning_rate": 2.7785167228277143e-06, "loss": 0.0289, "step": 168550 }, { "epoch": 1.3638643903228416, "grad_norm": 0.4335385262966156, "learning_rate": 2.7778841674706526e-06, "loss": 0.0205, "step": 168560 }, { "epoch": 1.3639453030180435, "grad_norm": 0.45471566915512085, "learning_rate": 2.7772516564289977e-06, "loss": 0.0177, "step": 168570 }, { "epoch": 1.3640262157132454, "grad_norm": 0.25055038928985596, "learning_rate": 2.77661918971537e-06, "loss": 0.0167, "step": 168580 }, { "epoch": 1.3641071284084472, "grad_norm": 0.19808076322078705, "learning_rate": 2.775986767342377e-06, "loss": 0.0156, "step": 168590 }, { "epoch": 1.364188041103649, "grad_norm": 0.31075021624565125, "learning_rate": 2.7753543893226304e-06, "loss": 0.0226, "step": 168600 }, { "epoch": 1.364268953798851, "grad_norm": 0.09126933664083481, "learning_rate": 2.774722055668748e-06, "loss": 0.0214, "step": 168610 }, { "epoch": 1.364349866494053, "grad_norm": 0.07390367984771729, "learning_rate": 2.774089766393335e-06, "loss": 0.022, "step": 168620 }, { "epoch": 1.364430779189255, "grad_norm": 0.28463220596313477, "learning_rate": 2.7734575215090005e-06, "loss": 0.0261, "step": 168630 }, { "epoch": 1.3645116918844566, "grad_norm": 0.4888712763786316, "learning_rate": 2.7728253210283585e-06, "loss": 0.0188, "step": 168640 }, { "epoch": 1.3645926045796586, "grad_norm": 0.20472276210784912, "learning_rate": 2.7721931649640104e-06, "loss": 0.0177, "step": 168650 }, { "epoch": 1.3646735172748605, "grad_norm": 0.39837536215782166, "learning_rate": 2.7715610533285687e-06, "loss": 0.028, "step": 168660 }, { "epoch": 1.3647544299700622, "grad_norm": 0.22058402001857758, "learning_rate": 2.7709289861346367e-06, "loss": 0.023, "step": 168670 }, { "epoch": 1.3648353426652642, "grad_norm": 0.40027695894241333, "learning_rate": 2.7702969633948208e-06, "loss": 0.0179, "step": 168680 }, { "epoch": 1.3649162553604661, "grad_norm": 0.7329269647598267, "learning_rate": 2.769664985121725e-06, "loss": 0.021, "step": 168690 }, { "epoch": 1.3649971680556678, "grad_norm": 0.1094912588596344, "learning_rate": 2.769033051327952e-06, "loss": 0.0139, "step": 168700 }, { "epoch": 1.3650780807508698, "grad_norm": 0.45875290036201477, "learning_rate": 2.7684011620261054e-06, "loss": 0.0221, "step": 168710 }, { "epoch": 1.3651589934460717, "grad_norm": 0.39399483799934387, "learning_rate": 2.767769317228786e-06, "loss": 0.0173, "step": 168720 }, { "epoch": 1.3652399061412734, "grad_norm": 0.5625720024108887, "learning_rate": 2.767137516948596e-06, "loss": 0.0242, "step": 168730 }, { "epoch": 1.3653208188364754, "grad_norm": 0.28369373083114624, "learning_rate": 2.766505761198134e-06, "loss": 0.02, "step": 168740 }, { "epoch": 1.3654017315316773, "grad_norm": 0.3506944477558136, "learning_rate": 2.7658740499899992e-06, "loss": 0.0268, "step": 168750 }, { "epoch": 1.3654826442268793, "grad_norm": 0.348510205745697, "learning_rate": 2.7652423833367904e-06, "loss": 0.0173, "step": 168760 }, { "epoch": 1.3655635569220812, "grad_norm": 0.28959155082702637, "learning_rate": 2.7646107612511043e-06, "loss": 0.0292, "step": 168770 }, { "epoch": 1.365644469617283, "grad_norm": 0.33733701705932617, "learning_rate": 2.7639791837455378e-06, "loss": 0.0227, "step": 168780 }, { "epoch": 1.3657253823124849, "grad_norm": 0.337887167930603, "learning_rate": 2.7633476508326827e-06, "loss": 0.0167, "step": 168790 }, { "epoch": 1.3658062950076868, "grad_norm": 0.46492645144462585, "learning_rate": 2.7627161625251424e-06, "loss": 0.0174, "step": 168800 }, { "epoch": 1.3658872077028885, "grad_norm": 0.14065515995025635, "learning_rate": 2.7620847188355027e-06, "loss": 0.0131, "step": 168810 }, { "epoch": 1.3659681203980905, "grad_norm": 0.3398807644844055, "learning_rate": 2.7614533197763573e-06, "loss": 0.0288, "step": 168820 }, { "epoch": 1.3660490330932924, "grad_norm": 0.3314025402069092, "learning_rate": 2.760821965360303e-06, "loss": 0.0182, "step": 168830 }, { "epoch": 1.3661299457884941, "grad_norm": 0.21823911368846893, "learning_rate": 2.7601906555999254e-06, "loss": 0.0207, "step": 168840 }, { "epoch": 1.366210858483696, "grad_norm": 0.49598783254623413, "learning_rate": 2.759559390507815e-06, "loss": 0.0171, "step": 168850 }, { "epoch": 1.366291771178898, "grad_norm": 0.6405356526374817, "learning_rate": 2.7589281700965646e-06, "loss": 0.0191, "step": 168860 }, { "epoch": 1.3663726838740997, "grad_norm": 0.2951127886772156, "learning_rate": 2.7582969943787608e-06, "loss": 0.0336, "step": 168870 }, { "epoch": 1.3664535965693017, "grad_norm": 0.2758899927139282, "learning_rate": 2.7576658633669912e-06, "loss": 0.0203, "step": 168880 }, { "epoch": 1.3665345092645036, "grad_norm": 0.47390371561050415, "learning_rate": 2.757034777073842e-06, "loss": 0.0204, "step": 168890 }, { "epoch": 1.3666154219597055, "grad_norm": 0.08428661525249481, "learning_rate": 2.756403735511899e-06, "loss": 0.0251, "step": 168900 }, { "epoch": 1.3666963346549075, "grad_norm": 0.13684149086475372, "learning_rate": 2.755772738693747e-06, "loss": 0.0112, "step": 168910 }, { "epoch": 1.3667772473501092, "grad_norm": 0.47904548048973083, "learning_rate": 2.7551417866319707e-06, "loss": 0.0219, "step": 168920 }, { "epoch": 1.3668581600453111, "grad_norm": 0.4162895381450653, "learning_rate": 2.754510879339152e-06, "loss": 0.017, "step": 168930 }, { "epoch": 1.366939072740513, "grad_norm": 0.17962582409381866, "learning_rate": 2.753880016827873e-06, "loss": 0.0173, "step": 168940 }, { "epoch": 1.3670199854357148, "grad_norm": 0.12680044770240784, "learning_rate": 2.7532491991107153e-06, "loss": 0.0166, "step": 168950 }, { "epoch": 1.3671008981309167, "grad_norm": 0.5245247483253479, "learning_rate": 2.7526184262002596e-06, "loss": 0.0176, "step": 168960 }, { "epoch": 1.3671818108261187, "grad_norm": 0.48346060514450073, "learning_rate": 2.7519876981090844e-06, "loss": 0.011, "step": 168970 }, { "epoch": 1.3672627235213204, "grad_norm": 0.3084257245063782, "learning_rate": 2.751357014849767e-06, "loss": 0.0275, "step": 168980 }, { "epoch": 1.3673436362165223, "grad_norm": 0.256170392036438, "learning_rate": 2.7507263764348914e-06, "loss": 0.0194, "step": 168990 }, { "epoch": 1.3674245489117243, "grad_norm": 0.22579623758792877, "learning_rate": 2.750095782877027e-06, "loss": 0.0148, "step": 169000 }, { "epoch": 1.3675054616069262, "grad_norm": 0.736581027507782, "learning_rate": 2.7494652341887507e-06, "loss": 0.0275, "step": 169010 }, { "epoch": 1.367586374302128, "grad_norm": 0.25610050559043884, "learning_rate": 2.7488347303826436e-06, "loss": 0.0146, "step": 169020 }, { "epoch": 1.3676672869973299, "grad_norm": 0.3672107756137848, "learning_rate": 2.748204271471273e-06, "loss": 0.0325, "step": 169030 }, { "epoch": 1.3677481996925318, "grad_norm": 0.48786813020706177, "learning_rate": 2.747573857467213e-06, "loss": 0.017, "step": 169040 }, { "epoch": 1.3678291123877337, "grad_norm": 0.22682799398899078, "learning_rate": 2.7469434883830386e-06, "loss": 0.0112, "step": 169050 }, { "epoch": 1.3679100250829355, "grad_norm": 0.36063918471336365, "learning_rate": 2.7463131642313202e-06, "loss": 0.019, "step": 169060 }, { "epoch": 1.3679909377781374, "grad_norm": 0.4540158212184906, "learning_rate": 2.7456828850246282e-06, "loss": 0.0165, "step": 169070 }, { "epoch": 1.3680718504733393, "grad_norm": 0.49397921562194824, "learning_rate": 2.7450526507755317e-06, "loss": 0.0227, "step": 169080 }, { "epoch": 1.368152763168541, "grad_norm": 0.34421834349632263, "learning_rate": 2.7444224614965997e-06, "loss": 0.0204, "step": 169090 }, { "epoch": 1.368233675863743, "grad_norm": 0.42513328790664673, "learning_rate": 2.7437923172004e-06, "loss": 0.0205, "step": 169100 }, { "epoch": 1.368314588558945, "grad_norm": 0.31593695282936096, "learning_rate": 2.7431622178994998e-06, "loss": 0.0128, "step": 169110 }, { "epoch": 1.3683955012541467, "grad_norm": 0.3868034780025482, "learning_rate": 2.7425321636064645e-06, "loss": 0.025, "step": 169120 }, { "epoch": 1.3684764139493486, "grad_norm": 0.6208928227424622, "learning_rate": 2.7419021543338592e-06, "loss": 0.0229, "step": 169130 }, { "epoch": 1.3685573266445505, "grad_norm": 0.36836716532707214, "learning_rate": 2.7412721900942485e-06, "loss": 0.0176, "step": 169140 }, { "epoch": 1.3686382393397525, "grad_norm": 0.0396743044257164, "learning_rate": 2.7406422709001956e-06, "loss": 0.0083, "step": 169150 }, { "epoch": 1.3687191520349544, "grad_norm": 0.17168375849723816, "learning_rate": 2.7400123967642635e-06, "loss": 0.0149, "step": 169160 }, { "epoch": 1.3688000647301561, "grad_norm": 0.298095166683197, "learning_rate": 2.7393825676990126e-06, "loss": 0.0155, "step": 169170 }, { "epoch": 1.368880977425358, "grad_norm": 0.11548919230699539, "learning_rate": 2.7387527837170035e-06, "loss": 0.0213, "step": 169180 }, { "epoch": 1.36896189012056, "grad_norm": 0.49312347173690796, "learning_rate": 2.7381230448307973e-06, "loss": 0.0227, "step": 169190 }, { "epoch": 1.3690428028157617, "grad_norm": 0.5671917200088501, "learning_rate": 2.73749335105295e-06, "loss": 0.0261, "step": 169200 }, { "epoch": 1.3691237155109637, "grad_norm": 0.3479393422603607, "learning_rate": 2.7368637023960225e-06, "loss": 0.0165, "step": 169210 }, { "epoch": 1.3692046282061656, "grad_norm": 0.41321852803230286, "learning_rate": 2.7362340988725735e-06, "loss": 0.0145, "step": 169220 }, { "epoch": 1.3692855409013673, "grad_norm": 0.394122838973999, "learning_rate": 2.735604540495152e-06, "loss": 0.0368, "step": 169230 }, { "epoch": 1.3693664535965693, "grad_norm": 0.4245058596134186, "learning_rate": 2.7349750272763197e-06, "loss": 0.0217, "step": 169240 }, { "epoch": 1.3694473662917712, "grad_norm": 0.23839789628982544, "learning_rate": 2.734345559228631e-06, "loss": 0.011, "step": 169250 }, { "epoch": 1.369528278986973, "grad_norm": 0.32600536942481995, "learning_rate": 2.733716136364633e-06, "loss": 0.0196, "step": 169260 }, { "epoch": 1.3696091916821749, "grad_norm": 0.9152481555938721, "learning_rate": 2.7330867586968847e-06, "loss": 0.0181, "step": 169270 }, { "epoch": 1.3696901043773768, "grad_norm": 0.3253726661205292, "learning_rate": 2.7324574262379346e-06, "loss": 0.0188, "step": 169280 }, { "epoch": 1.3697710170725788, "grad_norm": 0.3204314112663269, "learning_rate": 2.731828139000334e-06, "loss": 0.0129, "step": 169290 }, { "epoch": 1.3698519297677807, "grad_norm": 0.2587268054485321, "learning_rate": 2.7311988969966334e-06, "loss": 0.0157, "step": 169300 }, { "epoch": 1.3699328424629824, "grad_norm": 0.3395329415798187, "learning_rate": 2.7305697002393816e-06, "loss": 0.0206, "step": 169310 }, { "epoch": 1.3700137551581844, "grad_norm": 0.17996113002300262, "learning_rate": 2.7299405487411255e-06, "loss": 0.014, "step": 169320 }, { "epoch": 1.3700946678533863, "grad_norm": 0.0004513685416895896, "learning_rate": 2.729311442514413e-06, "loss": 0.033, "step": 169330 }, { "epoch": 1.370175580548588, "grad_norm": 0.010362002067267895, "learning_rate": 2.72868238157179e-06, "loss": 0.0205, "step": 169340 }, { "epoch": 1.37025649324379, "grad_norm": 0.23703765869140625, "learning_rate": 2.728053365925802e-06, "loss": 0.0169, "step": 169350 }, { "epoch": 1.370337405938992, "grad_norm": 0.6071069240570068, "learning_rate": 2.7274243955889933e-06, "loss": 0.0224, "step": 169360 }, { "epoch": 1.3704183186341936, "grad_norm": 0.4719249904155731, "learning_rate": 2.7267954705739073e-06, "loss": 0.013, "step": 169370 }, { "epoch": 1.3704992313293956, "grad_norm": 0.33958667516708374, "learning_rate": 2.726166590893087e-06, "loss": 0.0144, "step": 169380 }, { "epoch": 1.3705801440245975, "grad_norm": 0.8231051564216614, "learning_rate": 2.7255377565590737e-06, "loss": 0.0142, "step": 169390 }, { "epoch": 1.3706610567197992, "grad_norm": 0.40116778016090393, "learning_rate": 2.724908967584406e-06, "loss": 0.0205, "step": 169400 }, { "epoch": 1.3707419694150011, "grad_norm": 0.420226126909256, "learning_rate": 2.72428022398163e-06, "loss": 0.0288, "step": 169410 }, { "epoch": 1.370822882110203, "grad_norm": 0.06165314465761185, "learning_rate": 2.7236515257632767e-06, "loss": 0.017, "step": 169420 }, { "epoch": 1.370903794805405, "grad_norm": 0.339057594537735, "learning_rate": 2.7230228729418904e-06, "loss": 0.0208, "step": 169430 }, { "epoch": 1.370984707500607, "grad_norm": 0.623589038848877, "learning_rate": 2.7223942655300082e-06, "loss": 0.0186, "step": 169440 }, { "epoch": 1.3710656201958087, "grad_norm": 0.43178972601890564, "learning_rate": 2.7217657035401602e-06, "loss": 0.0147, "step": 169450 }, { "epoch": 1.3711465328910106, "grad_norm": 0.4039303958415985, "learning_rate": 2.721137186984887e-06, "loss": 0.0185, "step": 169460 }, { "epoch": 1.3712274455862126, "grad_norm": 0.19319626688957214, "learning_rate": 2.720508715876722e-06, "loss": 0.0088, "step": 169470 }, { "epoch": 1.3713083582814143, "grad_norm": 0.4354579448699951, "learning_rate": 2.7198802902281986e-06, "loss": 0.0203, "step": 169480 }, { "epoch": 1.3713892709766162, "grad_norm": 0.5189086198806763, "learning_rate": 2.7192519100518488e-06, "loss": 0.0181, "step": 169490 }, { "epoch": 1.3714701836718182, "grad_norm": 0.3557056486606598, "learning_rate": 2.718623575360205e-06, "loss": 0.0173, "step": 169500 }, { "epoch": 1.3715510963670199, "grad_norm": 0.3163824677467346, "learning_rate": 2.7179952861657975e-06, "loss": 0.0213, "step": 169510 }, { "epoch": 1.3716320090622218, "grad_norm": 0.2840098440647125, "learning_rate": 2.717367042481157e-06, "loss": 0.0193, "step": 169520 }, { "epoch": 1.3717129217574238, "grad_norm": 0.514287531375885, "learning_rate": 2.7167388443188127e-06, "loss": 0.036, "step": 169530 }, { "epoch": 1.3717938344526255, "grad_norm": 0.2951362431049347, "learning_rate": 2.7161106916912903e-06, "loss": 0.0155, "step": 169540 }, { "epoch": 1.3718747471478274, "grad_norm": 0.5155137181282043, "learning_rate": 2.71548258461112e-06, "loss": 0.0313, "step": 169550 }, { "epoch": 1.3719556598430294, "grad_norm": 0.38336655497550964, "learning_rate": 2.7148545230908264e-06, "loss": 0.0162, "step": 169560 }, { "epoch": 1.3720365725382313, "grad_norm": 0.586635172367096, "learning_rate": 2.7142265071429346e-06, "loss": 0.0324, "step": 169570 }, { "epoch": 1.3721174852334332, "grad_norm": 0.3593367338180542, "learning_rate": 2.71359853677997e-06, "loss": 0.0214, "step": 169580 }, { "epoch": 1.372198397928635, "grad_norm": 0.05320562794804573, "learning_rate": 2.712970612014454e-06, "loss": 0.0201, "step": 169590 }, { "epoch": 1.372279310623837, "grad_norm": 0.5031298398971558, "learning_rate": 2.7123427328589146e-06, "loss": 0.0182, "step": 169600 }, { "epoch": 1.3723602233190388, "grad_norm": 0.20208624005317688, "learning_rate": 2.7117148993258664e-06, "loss": 0.0174, "step": 169610 }, { "epoch": 1.3724411360142406, "grad_norm": 0.39327600598335266, "learning_rate": 2.711087111427835e-06, "loss": 0.0216, "step": 169620 }, { "epoch": 1.3725220487094425, "grad_norm": 0.49106594920158386, "learning_rate": 2.7104593691773416e-06, "loss": 0.0249, "step": 169630 }, { "epoch": 1.3726029614046444, "grad_norm": 0.22870637476444244, "learning_rate": 2.709831672586899e-06, "loss": 0.0142, "step": 169640 }, { "epoch": 1.3726838740998462, "grad_norm": 0.2919504940509796, "learning_rate": 2.70920402166903e-06, "loss": 0.0193, "step": 169650 }, { "epoch": 1.372764786795048, "grad_norm": 0.4681641757488251, "learning_rate": 2.708576416436254e-06, "loss": 0.0108, "step": 169660 }, { "epoch": 1.37284569949025, "grad_norm": 0.19177626073360443, "learning_rate": 2.707948856901079e-06, "loss": 0.013, "step": 169670 }, { "epoch": 1.372926612185452, "grad_norm": 0.4527391791343689, "learning_rate": 2.707321343076027e-06, "loss": 0.0128, "step": 169680 }, { "epoch": 1.3730075248806537, "grad_norm": 0.6400918960571289, "learning_rate": 2.7066938749736117e-06, "loss": 0.0128, "step": 169690 }, { "epoch": 1.3730884375758556, "grad_norm": 0.05959804356098175, "learning_rate": 2.7060664526063452e-06, "loss": 0.0281, "step": 169700 }, { "epoch": 1.3731693502710576, "grad_norm": 0.4533214867115021, "learning_rate": 2.7054390759867404e-06, "loss": 0.0221, "step": 169710 }, { "epoch": 1.3732502629662595, "grad_norm": 0.20515786111354828, "learning_rate": 2.704811745127309e-06, "loss": 0.0172, "step": 169720 }, { "epoch": 1.3733311756614612, "grad_norm": 0.3066313862800598, "learning_rate": 2.704184460040563e-06, "loss": 0.0161, "step": 169730 }, { "epoch": 1.3734120883566632, "grad_norm": 0.5317272543907166, "learning_rate": 2.7035572207390105e-06, "loss": 0.0212, "step": 169740 }, { "epoch": 1.3734930010518651, "grad_norm": 0.5957231521606445, "learning_rate": 2.7029300272351594e-06, "loss": 0.0236, "step": 169750 }, { "epoch": 1.3735739137470668, "grad_norm": 0.5499454736709595, "learning_rate": 2.7023028795415233e-06, "loss": 0.0238, "step": 169760 }, { "epoch": 1.3736548264422688, "grad_norm": 0.47003933787345886, "learning_rate": 2.7016757776706036e-06, "loss": 0.0249, "step": 169770 }, { "epoch": 1.3737357391374707, "grad_norm": 0.49985072016716003, "learning_rate": 2.701048721634907e-06, "loss": 0.0153, "step": 169780 }, { "epoch": 1.3738166518326724, "grad_norm": 0.185278058052063, "learning_rate": 2.7004217114469442e-06, "loss": 0.0215, "step": 169790 }, { "epoch": 1.3738975645278744, "grad_norm": 0.42815643548965454, "learning_rate": 2.6997947471192145e-06, "loss": 0.0251, "step": 169800 }, { "epoch": 1.3739784772230763, "grad_norm": 0.24166584014892578, "learning_rate": 2.6991678286642196e-06, "loss": 0.0168, "step": 169810 }, { "epoch": 1.3740593899182783, "grad_norm": 0.3466264605522156, "learning_rate": 2.698540956094469e-06, "loss": 0.0219, "step": 169820 }, { "epoch": 1.3741403026134802, "grad_norm": 0.7430739998817444, "learning_rate": 2.697914129422457e-06, "loss": 0.0122, "step": 169830 }, { "epoch": 1.374221215308682, "grad_norm": 0.6461272835731506, "learning_rate": 2.6972873486606893e-06, "loss": 0.0217, "step": 169840 }, { "epoch": 1.3743021280038838, "grad_norm": 0.13883207738399506, "learning_rate": 2.6966606138216665e-06, "loss": 0.0272, "step": 169850 }, { "epoch": 1.3743830406990858, "grad_norm": 0.5642642378807068, "learning_rate": 2.6960339249178812e-06, "loss": 0.0259, "step": 169860 }, { "epoch": 1.3744639533942875, "grad_norm": 0.16991619765758514, "learning_rate": 2.695407281961837e-06, "loss": 0.0304, "step": 169870 }, { "epoch": 1.3745448660894894, "grad_norm": 0.31260281801223755, "learning_rate": 2.694780684966032e-06, "loss": 0.0307, "step": 169880 }, { "epoch": 1.3746257787846914, "grad_norm": 0.29639849066734314, "learning_rate": 2.6941541339429555e-06, "loss": 0.0176, "step": 169890 }, { "epoch": 1.374706691479893, "grad_norm": 0.21108224987983704, "learning_rate": 2.693527628905108e-06, "loss": 0.0148, "step": 169900 }, { "epoch": 1.374787604175095, "grad_norm": 0.5784043073654175, "learning_rate": 2.692901169864984e-06, "loss": 0.0334, "step": 169910 }, { "epoch": 1.374868516870297, "grad_norm": 0.4090352952480316, "learning_rate": 2.692274756835075e-06, "loss": 0.0138, "step": 169920 }, { "epoch": 1.3749494295654987, "grad_norm": 0.46019843220710754, "learning_rate": 2.6916483898278746e-06, "loss": 0.0174, "step": 169930 }, { "epoch": 1.3750303422607006, "grad_norm": 0.43133339285850525, "learning_rate": 2.6910220688558736e-06, "loss": 0.0236, "step": 169940 }, { "epoch": 1.3751112549559026, "grad_norm": 0.4097122550010681, "learning_rate": 2.690395793931563e-06, "loss": 0.0298, "step": 169950 }, { "epoch": 1.3751921676511045, "grad_norm": 0.3272750973701477, "learning_rate": 2.689769565067433e-06, "loss": 0.0184, "step": 169960 }, { "epoch": 1.3752730803463065, "grad_norm": 0.46203458309173584, "learning_rate": 2.6891433822759695e-06, "loss": 0.0209, "step": 169970 }, { "epoch": 1.3753539930415082, "grad_norm": 0.2961089313030243, "learning_rate": 2.688517245569666e-06, "loss": 0.0262, "step": 169980 }, { "epoch": 1.3754349057367101, "grad_norm": 0.19732779264450073, "learning_rate": 2.6878911549610055e-06, "loss": 0.0207, "step": 169990 }, { "epoch": 1.375515818431912, "grad_norm": 0.5089918971061707, "learning_rate": 2.6872651104624723e-06, "loss": 0.0231, "step": 170000 }, { "epoch": 1.3755967311271138, "grad_norm": 0.3930474817752838, "learning_rate": 2.686639112086558e-06, "loss": 0.0171, "step": 170010 }, { "epoch": 1.3756776438223157, "grad_norm": 0.26803404092788696, "learning_rate": 2.6860131598457397e-06, "loss": 0.0239, "step": 170020 }, { "epoch": 1.3757585565175177, "grad_norm": 0.37848180532455444, "learning_rate": 2.685387253752505e-06, "loss": 0.0217, "step": 170030 }, { "epoch": 1.3758394692127194, "grad_norm": 0.38087964057922363, "learning_rate": 2.6847613938193372e-06, "loss": 0.0217, "step": 170040 }, { "epoch": 1.3759203819079213, "grad_norm": 0.13190269470214844, "learning_rate": 2.6841355800587123e-06, "loss": 0.0154, "step": 170050 }, { "epoch": 1.3760012946031233, "grad_norm": 0.2995418906211853, "learning_rate": 2.683509812483115e-06, "loss": 0.0151, "step": 170060 }, { "epoch": 1.376082207298325, "grad_norm": 0.36223500967025757, "learning_rate": 2.6828840911050268e-06, "loss": 0.0138, "step": 170070 }, { "epoch": 1.376163119993527, "grad_norm": 0.38316914439201355, "learning_rate": 2.6822584159369202e-06, "loss": 0.0209, "step": 170080 }, { "epoch": 1.3762440326887289, "grad_norm": 0.5010007619857788, "learning_rate": 2.6816327869912784e-06, "loss": 0.0184, "step": 170090 }, { "epoch": 1.3763249453839308, "grad_norm": 0.45683565735816956, "learning_rate": 2.681007204280577e-06, "loss": 0.0197, "step": 170100 }, { "epoch": 1.3764058580791327, "grad_norm": 0.503693163394928, "learning_rate": 2.68038166781729e-06, "loss": 0.0123, "step": 170110 }, { "epoch": 1.3764867707743345, "grad_norm": 0.043267831206321716, "learning_rate": 2.6797561776138942e-06, "loss": 0.0245, "step": 170120 }, { "epoch": 1.3765676834695364, "grad_norm": 0.27661651372909546, "learning_rate": 2.679130733682863e-06, "loss": 0.021, "step": 170130 }, { "epoch": 1.3766485961647383, "grad_norm": 0.20038250088691711, "learning_rate": 2.6785053360366703e-06, "loss": 0.0219, "step": 170140 }, { "epoch": 1.37672950885994, "grad_norm": 0.1892004758119583, "learning_rate": 2.677879984687788e-06, "loss": 0.0245, "step": 170150 }, { "epoch": 1.376810421555142, "grad_norm": 0.32861971855163574, "learning_rate": 2.6772546796486843e-06, "loss": 0.0205, "step": 170160 }, { "epoch": 1.376891334250344, "grad_norm": 0.6811236143112183, "learning_rate": 2.6766294209318365e-06, "loss": 0.0164, "step": 170170 }, { "epoch": 1.3769722469455457, "grad_norm": 0.3315722346305847, "learning_rate": 2.6760042085497086e-06, "loss": 0.0165, "step": 170180 }, { "epoch": 1.3770531596407476, "grad_norm": 0.2118912935256958, "learning_rate": 2.6753790425147684e-06, "loss": 0.0186, "step": 170190 }, { "epoch": 1.3771340723359495, "grad_norm": 0.15756773948669434, "learning_rate": 2.67475392283949e-06, "loss": 0.0158, "step": 170200 }, { "epoch": 1.3772149850311513, "grad_norm": 0.3799356520175934, "learning_rate": 2.674128849536333e-06, "loss": 0.014, "step": 170210 }, { "epoch": 1.3772958977263532, "grad_norm": 0.29596444964408875, "learning_rate": 2.6735038226177644e-06, "loss": 0.0288, "step": 170220 }, { "epoch": 1.3773768104215551, "grad_norm": 0.3119357228279114, "learning_rate": 2.6728788420962547e-06, "loss": 0.0152, "step": 170230 }, { "epoch": 1.377457723116757, "grad_norm": 0.36429017782211304, "learning_rate": 2.6722539079842603e-06, "loss": 0.0141, "step": 170240 }, { "epoch": 1.377538635811959, "grad_norm": 0.326029509305954, "learning_rate": 2.6716290202942484e-06, "loss": 0.0164, "step": 170250 }, { "epoch": 1.3776195485071607, "grad_norm": 0.3432043194770813, "learning_rate": 2.671004179038683e-06, "loss": 0.0134, "step": 170260 }, { "epoch": 1.3777004612023627, "grad_norm": 0.3590518534183502, "learning_rate": 2.6703793842300186e-06, "loss": 0.0115, "step": 170270 }, { "epoch": 1.3777813738975646, "grad_norm": 0.3756798803806305, "learning_rate": 2.6697546358807213e-06, "loss": 0.0277, "step": 170280 }, { "epoch": 1.3778622865927663, "grad_norm": 0.28239431977272034, "learning_rate": 2.6691299340032496e-06, "loss": 0.0181, "step": 170290 }, { "epoch": 1.3779431992879683, "grad_norm": 0.16417233645915985, "learning_rate": 2.6685052786100575e-06, "loss": 0.0106, "step": 170300 }, { "epoch": 1.3780241119831702, "grad_norm": 0.27360519766807556, "learning_rate": 2.6678806697136074e-06, "loss": 0.0174, "step": 170310 }, { "epoch": 1.378105024678372, "grad_norm": 0.10064762085676193, "learning_rate": 2.667256107326354e-06, "loss": 0.0298, "step": 170320 }, { "epoch": 1.3781859373735739, "grad_norm": 0.3373158276081085, "learning_rate": 2.6666315914607533e-06, "loss": 0.0223, "step": 170330 }, { "epoch": 1.3782668500687758, "grad_norm": 0.29049280285835266, "learning_rate": 2.6660071221292595e-06, "loss": 0.0302, "step": 170340 }, { "epoch": 1.3783477627639777, "grad_norm": 0.3593105673789978, "learning_rate": 2.6653826993443256e-06, "loss": 0.0164, "step": 170350 }, { "epoch": 1.3784286754591795, "grad_norm": 0.6050651669502258, "learning_rate": 2.664758323118406e-06, "loss": 0.0171, "step": 170360 }, { "epoch": 1.3785095881543814, "grad_norm": 0.6412796378135681, "learning_rate": 2.664133993463951e-06, "loss": 0.013, "step": 170370 }, { "epoch": 1.3785905008495833, "grad_norm": 0.3235081732273102, "learning_rate": 2.663509710393411e-06, "loss": 0.0191, "step": 170380 }, { "epoch": 1.3786714135447853, "grad_norm": 0.5106979608535767, "learning_rate": 2.6628854739192413e-06, "loss": 0.0274, "step": 170390 }, { "epoch": 1.378752326239987, "grad_norm": 0.35728657245635986, "learning_rate": 2.662261284053884e-06, "loss": 0.0243, "step": 170400 }, { "epoch": 1.378833238935189, "grad_norm": 0.1637914478778839, "learning_rate": 2.6616371408097886e-06, "loss": 0.0132, "step": 170410 }, { "epoch": 1.3789141516303909, "grad_norm": 0.2567473351955414, "learning_rate": 2.661013044199409e-06, "loss": 0.0164, "step": 170420 }, { "epoch": 1.3789950643255926, "grad_norm": 0.42483973503112793, "learning_rate": 2.660388994235183e-06, "loss": 0.0258, "step": 170430 }, { "epoch": 1.3790759770207945, "grad_norm": 0.2543736696243286, "learning_rate": 2.6597649909295574e-06, "loss": 0.0192, "step": 170440 }, { "epoch": 1.3791568897159965, "grad_norm": 0.3041194975376129, "learning_rate": 2.6591410342949835e-06, "loss": 0.0158, "step": 170450 }, { "epoch": 1.3792378024111982, "grad_norm": 0.523638129234314, "learning_rate": 2.6585171243438957e-06, "loss": 0.0247, "step": 170460 }, { "epoch": 1.3793187151064001, "grad_norm": 0.18980859220027924, "learning_rate": 2.657893261088742e-06, "loss": 0.0121, "step": 170470 }, { "epoch": 1.379399627801602, "grad_norm": 0.13882651925086975, "learning_rate": 2.657269444541964e-06, "loss": 0.0177, "step": 170480 }, { "epoch": 1.379480540496804, "grad_norm": 0.4614960849285126, "learning_rate": 2.6566456747160003e-06, "loss": 0.0232, "step": 170490 }, { "epoch": 1.379561453192006, "grad_norm": 0.4349816143512726, "learning_rate": 2.6560219516232923e-06, "loss": 0.0191, "step": 170500 }, { "epoch": 1.3796423658872077, "grad_norm": 0.41539666056632996, "learning_rate": 2.6553982752762774e-06, "loss": 0.0144, "step": 170510 }, { "epoch": 1.3797232785824096, "grad_norm": 0.45158448815345764, "learning_rate": 2.6547746456873946e-06, "loss": 0.0191, "step": 170520 }, { "epoch": 1.3798041912776116, "grad_norm": 0.17541372776031494, "learning_rate": 2.6541510628690804e-06, "loss": 0.0176, "step": 170530 }, { "epoch": 1.3798851039728133, "grad_norm": 0.2661362588405609, "learning_rate": 2.6535275268337707e-06, "loss": 0.0128, "step": 170540 }, { "epoch": 1.3799660166680152, "grad_norm": 0.6228001713752747, "learning_rate": 2.6529040375939007e-06, "loss": 0.0213, "step": 170550 }, { "epoch": 1.3800469293632172, "grad_norm": 0.1385851502418518, "learning_rate": 2.6522805951619057e-06, "loss": 0.0122, "step": 170560 }, { "epoch": 1.3801278420584189, "grad_norm": 0.440190851688385, "learning_rate": 2.651657199550215e-06, "loss": 0.0292, "step": 170570 }, { "epoch": 1.3802087547536208, "grad_norm": 0.5067793726921082, "learning_rate": 2.6510338507712687e-06, "loss": 0.0288, "step": 170580 }, { "epoch": 1.3802896674488228, "grad_norm": 0.23151175677776337, "learning_rate": 2.650410548837491e-06, "loss": 0.0179, "step": 170590 }, { "epoch": 1.3803705801440245, "grad_norm": 0.5167950987815857, "learning_rate": 2.6497872937613124e-06, "loss": 0.0241, "step": 170600 }, { "epoch": 1.3804514928392264, "grad_norm": 0.5100075006484985, "learning_rate": 2.649164085555169e-06, "loss": 0.0144, "step": 170610 }, { "epoch": 1.3805324055344284, "grad_norm": 0.47572118043899536, "learning_rate": 2.648540924231483e-06, "loss": 0.0224, "step": 170620 }, { "epoch": 1.3806133182296303, "grad_norm": 0.32958412170410156, "learning_rate": 2.647917809802682e-06, "loss": 0.0193, "step": 170630 }, { "epoch": 1.3806942309248322, "grad_norm": 0.2702564299106598, "learning_rate": 2.647294742281199e-06, "loss": 0.0112, "step": 170640 }, { "epoch": 1.380775143620034, "grad_norm": 0.37752819061279297, "learning_rate": 2.646671721679451e-06, "loss": 0.0182, "step": 170650 }, { "epoch": 1.380856056315236, "grad_norm": 0.17405402660369873, "learning_rate": 2.646048748009871e-06, "loss": 0.0239, "step": 170660 }, { "epoch": 1.3809369690104378, "grad_norm": 0.5307783484458923, "learning_rate": 2.6454258212848784e-06, "loss": 0.0224, "step": 170670 }, { "epoch": 1.3810178817056395, "grad_norm": 0.36787763237953186, "learning_rate": 2.6448029415168964e-06, "loss": 0.0173, "step": 170680 }, { "epoch": 1.3810987944008415, "grad_norm": 0.43996158242225647, "learning_rate": 2.644180108718348e-06, "loss": 0.0177, "step": 170690 }, { "epoch": 1.3811797070960434, "grad_norm": 0.13124467432498932, "learning_rate": 2.6435573229016543e-06, "loss": 0.0115, "step": 170700 }, { "epoch": 1.3812606197912451, "grad_norm": 0.27345991134643555, "learning_rate": 2.6429345840792355e-06, "loss": 0.0166, "step": 170710 }, { "epoch": 1.381341532486447, "grad_norm": 0.41396740078926086, "learning_rate": 2.6423118922635094e-06, "loss": 0.0219, "step": 170720 }, { "epoch": 1.381422445181649, "grad_norm": 0.3868761658668518, "learning_rate": 2.6416892474668965e-06, "loss": 0.0165, "step": 170730 }, { "epoch": 1.3815033578768507, "grad_norm": 0.3500429391860962, "learning_rate": 2.641066649701812e-06, "loss": 0.0242, "step": 170740 }, { "epoch": 1.3815842705720527, "grad_norm": 0.26099276542663574, "learning_rate": 2.6404440989806734e-06, "loss": 0.0201, "step": 170750 }, { "epoch": 1.3816651832672546, "grad_norm": 0.1897863745689392, "learning_rate": 2.639821595315896e-06, "loss": 0.0256, "step": 170760 }, { "epoch": 1.3817460959624566, "grad_norm": 0.4468905031681061, "learning_rate": 2.6391991387198947e-06, "loss": 0.0217, "step": 170770 }, { "epoch": 1.3818270086576585, "grad_norm": 0.3010253310203552, "learning_rate": 2.638576729205082e-06, "loss": 0.0167, "step": 170780 }, { "epoch": 1.3819079213528602, "grad_norm": 0.18549641966819763, "learning_rate": 2.6379543667838704e-06, "loss": 0.0241, "step": 170790 }, { "epoch": 1.3819888340480622, "grad_norm": 0.3047175407409668, "learning_rate": 2.6373320514686757e-06, "loss": 0.0189, "step": 170800 }, { "epoch": 1.382069746743264, "grad_norm": 0.15577319264411926, "learning_rate": 2.6367097832719036e-06, "loss": 0.0134, "step": 170810 }, { "epoch": 1.3821506594384658, "grad_norm": 0.17553511261940002, "learning_rate": 2.6360875622059633e-06, "loss": 0.018, "step": 170820 }, { "epoch": 1.3822315721336678, "grad_norm": 0.34696128964424133, "learning_rate": 2.6354653882832705e-06, "loss": 0.0172, "step": 170830 }, { "epoch": 1.3823124848288697, "grad_norm": 0.38167089223861694, "learning_rate": 2.6348432615162266e-06, "loss": 0.0154, "step": 170840 }, { "epoch": 1.3823933975240714, "grad_norm": 0.32635697722435, "learning_rate": 2.634221181917239e-06, "loss": 0.0156, "step": 170850 }, { "epoch": 1.3824743102192734, "grad_norm": 0.22114744782447815, "learning_rate": 2.633599149498718e-06, "loss": 0.0126, "step": 170860 }, { "epoch": 1.3825552229144753, "grad_norm": 0.4020783305168152, "learning_rate": 2.6329771642730654e-06, "loss": 0.0207, "step": 170870 }, { "epoch": 1.3826361356096772, "grad_norm": 0.4588180482387543, "learning_rate": 2.632355226252687e-06, "loss": 0.0225, "step": 170880 }, { "epoch": 1.382717048304879, "grad_norm": 0.3203652501106262, "learning_rate": 2.6317333354499846e-06, "loss": 0.0154, "step": 170890 }, { "epoch": 1.382797961000081, "grad_norm": 0.3338574767112732, "learning_rate": 2.6311114918773618e-06, "loss": 0.0192, "step": 170900 }, { "epoch": 1.3828788736952828, "grad_norm": 1.2857823371887207, "learning_rate": 2.6304896955472193e-06, "loss": 0.0224, "step": 170910 }, { "epoch": 1.3829597863904848, "grad_norm": 0.6836661100387573, "learning_rate": 2.6298679464719566e-06, "loss": 0.0281, "step": 170920 }, { "epoch": 1.3830406990856865, "grad_norm": 0.5212481021881104, "learning_rate": 2.6292462446639754e-06, "loss": 0.0195, "step": 170930 }, { "epoch": 1.3831216117808884, "grad_norm": 0.2075885385274887, "learning_rate": 2.628624590135672e-06, "loss": 0.0157, "step": 170940 }, { "epoch": 1.3832025244760904, "grad_norm": 0.5053066611289978, "learning_rate": 2.6280029828994457e-06, "loss": 0.0142, "step": 170950 }, { "epoch": 1.383283437171292, "grad_norm": 0.5158957839012146, "learning_rate": 2.627381422967692e-06, "loss": 0.0225, "step": 170960 }, { "epoch": 1.383364349866494, "grad_norm": 0.2201414853334427, "learning_rate": 2.626759910352807e-06, "loss": 0.0172, "step": 170970 }, { "epoch": 1.383445262561696, "grad_norm": 0.23879936337471008, "learning_rate": 2.6261384450671855e-06, "loss": 0.0267, "step": 170980 }, { "epoch": 1.3835261752568977, "grad_norm": 0.25405803322792053, "learning_rate": 2.625517027123221e-06, "loss": 0.0221, "step": 170990 }, { "epoch": 1.3836070879520996, "grad_norm": 0.17539064586162567, "learning_rate": 2.624895656533307e-06, "loss": 0.0149, "step": 171000 }, { "epoch": 1.3836880006473016, "grad_norm": 0.4695659577846527, "learning_rate": 2.624274333309833e-06, "loss": 0.0204, "step": 171010 }, { "epoch": 1.3837689133425035, "grad_norm": 0.3731648027896881, "learning_rate": 2.623653057465194e-06, "loss": 0.0184, "step": 171020 }, { "epoch": 1.3838498260377052, "grad_norm": 0.40478992462158203, "learning_rate": 2.623031829011781e-06, "loss": 0.0267, "step": 171030 }, { "epoch": 1.3839307387329072, "grad_norm": 0.7021138072013855, "learning_rate": 2.6224106479619747e-06, "loss": 0.0318, "step": 171040 }, { "epoch": 1.3840116514281091, "grad_norm": 0.37694916129112244, "learning_rate": 2.6217895143281714e-06, "loss": 0.0106, "step": 171050 }, { "epoch": 1.384092564123311, "grad_norm": 0.397950142621994, "learning_rate": 2.6211684281227556e-06, "loss": 0.0213, "step": 171060 }, { "epoch": 1.3841734768185128, "grad_norm": 0.4669066369533539, "learning_rate": 2.6205473893581137e-06, "loss": 0.0327, "step": 171070 }, { "epoch": 1.3842543895137147, "grad_norm": 0.02370482310652733, "learning_rate": 2.6199263980466304e-06, "loss": 0.0256, "step": 171080 }, { "epoch": 1.3843353022089167, "grad_norm": 0.2528481185436249, "learning_rate": 2.6193054542006906e-06, "loss": 0.0132, "step": 171090 }, { "epoch": 1.3844162149041184, "grad_norm": 0.14948917925357819, "learning_rate": 2.618684557832678e-06, "loss": 0.0222, "step": 171100 }, { "epoch": 1.3844971275993203, "grad_norm": 0.3942679166793823, "learning_rate": 2.618063708954974e-06, "loss": 0.0141, "step": 171110 }, { "epoch": 1.3845780402945222, "grad_norm": 0.2661825716495514, "learning_rate": 2.617442907579962e-06, "loss": 0.0315, "step": 171120 }, { "epoch": 1.384658952989724, "grad_norm": 0.5115101933479309, "learning_rate": 2.6168221537200207e-06, "loss": 0.0223, "step": 171130 }, { "epoch": 1.384739865684926, "grad_norm": 0.5616542100906372, "learning_rate": 2.6162014473875306e-06, "loss": 0.028, "step": 171140 }, { "epoch": 1.3848207783801278, "grad_norm": 0.553318440914154, "learning_rate": 2.61558078859487e-06, "loss": 0.0213, "step": 171150 }, { "epoch": 1.3849016910753298, "grad_norm": 0.6913869976997375, "learning_rate": 2.614960177354417e-06, "loss": 0.0219, "step": 171160 }, { "epoch": 1.3849826037705317, "grad_norm": 0.4155220687389374, "learning_rate": 2.614339613678548e-06, "loss": 0.0126, "step": 171170 }, { "epoch": 1.3850635164657334, "grad_norm": 0.36150431632995605, "learning_rate": 2.613719097579639e-06, "loss": 0.0161, "step": 171180 }, { "epoch": 1.3851444291609354, "grad_norm": 0.1821732074022293, "learning_rate": 2.6130986290700653e-06, "loss": 0.0125, "step": 171190 }, { "epoch": 1.3852253418561373, "grad_norm": 0.054045747965574265, "learning_rate": 2.6124782081621985e-06, "loss": 0.0244, "step": 171200 }, { "epoch": 1.385306254551339, "grad_norm": 0.28719669580459595, "learning_rate": 2.6118578348684155e-06, "loss": 0.021, "step": 171210 }, { "epoch": 1.385387167246541, "grad_norm": 0.4326937198638916, "learning_rate": 2.6112375092010883e-06, "loss": 0.0199, "step": 171220 }, { "epoch": 1.385468079941743, "grad_norm": 0.30855000019073486, "learning_rate": 2.610617231172582e-06, "loss": 0.0099, "step": 171230 }, { "epoch": 1.3855489926369446, "grad_norm": 0.206807941198349, "learning_rate": 2.609997000795273e-06, "loss": 0.0282, "step": 171240 }, { "epoch": 1.3856299053321466, "grad_norm": 0.4234079420566559, "learning_rate": 2.6093768180815302e-06, "loss": 0.0178, "step": 171250 }, { "epoch": 1.3857108180273485, "grad_norm": 0.2627013623714447, "learning_rate": 2.6087566830437162e-06, "loss": 0.0144, "step": 171260 }, { "epoch": 1.3857917307225502, "grad_norm": 0.18177010118961334, "learning_rate": 2.6081365956942036e-06, "loss": 0.0111, "step": 171270 }, { "epoch": 1.3858726434177522, "grad_norm": 0.30517587065696716, "learning_rate": 2.6075165560453573e-06, "loss": 0.009, "step": 171280 }, { "epoch": 1.3859535561129541, "grad_norm": 0.019914155825972557, "learning_rate": 2.6068965641095423e-06, "loss": 0.0213, "step": 171290 }, { "epoch": 1.386034468808156, "grad_norm": 0.3823395073413849, "learning_rate": 2.6062766198991237e-06, "loss": 0.0153, "step": 171300 }, { "epoch": 1.386115381503358, "grad_norm": 0.276052325963974, "learning_rate": 2.605656723426464e-06, "loss": 0.0184, "step": 171310 }, { "epoch": 1.3861962941985597, "grad_norm": 0.22009479999542236, "learning_rate": 2.605036874703927e-06, "loss": 0.0181, "step": 171320 }, { "epoch": 1.3862772068937617, "grad_norm": 0.190055713057518, "learning_rate": 2.604417073743873e-06, "loss": 0.0232, "step": 171330 }, { "epoch": 1.3863581195889636, "grad_norm": 0.34830695390701294, "learning_rate": 2.603797320558663e-06, "loss": 0.027, "step": 171340 }, { "epoch": 1.3864390322841653, "grad_norm": 0.473584920167923, "learning_rate": 2.603177615160657e-06, "loss": 0.0246, "step": 171350 }, { "epoch": 1.3865199449793673, "grad_norm": 0.3798108994960785, "learning_rate": 2.602557957562214e-06, "loss": 0.0317, "step": 171360 }, { "epoch": 1.3866008576745692, "grad_norm": 0.45264509320259094, "learning_rate": 2.601938347775691e-06, "loss": 0.0117, "step": 171370 }, { "epoch": 1.386681770369771, "grad_norm": 0.49576911330223083, "learning_rate": 2.601318785813445e-06, "loss": 0.0246, "step": 171380 }, { "epoch": 1.3867626830649729, "grad_norm": 0.33191388845443726, "learning_rate": 2.600699271687832e-06, "loss": 0.0207, "step": 171390 }, { "epoch": 1.3868435957601748, "grad_norm": 0.01228626910597086, "learning_rate": 2.6000798054112055e-06, "loss": 0.0131, "step": 171400 }, { "epoch": 1.3869245084553765, "grad_norm": 0.12246400117874146, "learning_rate": 2.599460386995924e-06, "loss": 0.0233, "step": 171410 }, { "epoch": 1.3870054211505785, "grad_norm": 0.3966808617115021, "learning_rate": 2.5988410164543338e-06, "loss": 0.0228, "step": 171420 }, { "epoch": 1.3870863338457804, "grad_norm": 0.37230122089385986, "learning_rate": 2.598221693798792e-06, "loss": 0.0217, "step": 171430 }, { "epoch": 1.3871672465409823, "grad_norm": 0.47265881299972534, "learning_rate": 2.5976024190416506e-06, "loss": 0.0202, "step": 171440 }, { "epoch": 1.3872481592361843, "grad_norm": 0.7694887518882751, "learning_rate": 2.596983192195253e-06, "loss": 0.0204, "step": 171450 }, { "epoch": 1.387329071931386, "grad_norm": 0.6801693439483643, "learning_rate": 2.596364013271955e-06, "loss": 0.0253, "step": 171460 }, { "epoch": 1.387409984626588, "grad_norm": 0.5937861800193787, "learning_rate": 2.5957448822841037e-06, "loss": 0.0193, "step": 171470 }, { "epoch": 1.3874908973217899, "grad_norm": 0.3709501624107361, "learning_rate": 2.5951257992440417e-06, "loss": 0.0175, "step": 171480 }, { "epoch": 1.3875718100169916, "grad_norm": 0.44557467103004456, "learning_rate": 2.594506764164121e-06, "loss": 0.0247, "step": 171490 }, { "epoch": 1.3876527227121935, "grad_norm": 0.16493064165115356, "learning_rate": 2.5938877770566837e-06, "loss": 0.0127, "step": 171500 }, { "epoch": 1.3877336354073955, "grad_norm": 0.3107963800430298, "learning_rate": 2.593268837934076e-06, "loss": 0.0161, "step": 171510 }, { "epoch": 1.3878145481025972, "grad_norm": 0.8762153387069702, "learning_rate": 2.5926499468086404e-06, "loss": 0.042, "step": 171520 }, { "epoch": 1.3878954607977991, "grad_norm": 0.15732525289058685, "learning_rate": 2.5920311036927194e-06, "loss": 0.0158, "step": 171530 }, { "epoch": 1.387976373493001, "grad_norm": 0.2008642852306366, "learning_rate": 2.591412308598654e-06, "loss": 0.0138, "step": 171540 }, { "epoch": 1.388057286188203, "grad_norm": 0.2129957228899002, "learning_rate": 2.590793561538786e-06, "loss": 0.0217, "step": 171550 }, { "epoch": 1.3881381988834047, "grad_norm": 0.5828642845153809, "learning_rate": 2.590174862525452e-06, "loss": 0.0358, "step": 171560 }, { "epoch": 1.3882191115786067, "grad_norm": 0.4003422260284424, "learning_rate": 2.589556211570997e-06, "loss": 0.015, "step": 171570 }, { "epoch": 1.3883000242738086, "grad_norm": 0.6487656235694885, "learning_rate": 2.5889376086877534e-06, "loss": 0.0219, "step": 171580 }, { "epoch": 1.3883809369690105, "grad_norm": 0.556326687335968, "learning_rate": 2.5883190538880567e-06, "loss": 0.0253, "step": 171590 }, { "epoch": 1.3884618496642123, "grad_norm": 0.12980927526950836, "learning_rate": 2.5877005471842485e-06, "loss": 0.0151, "step": 171600 }, { "epoch": 1.3885427623594142, "grad_norm": 0.3619043231010437, "learning_rate": 2.587082088588657e-06, "loss": 0.022, "step": 171610 }, { "epoch": 1.3886236750546161, "grad_norm": 0.2277640551328659, "learning_rate": 2.586463678113622e-06, "loss": 0.0212, "step": 171620 }, { "epoch": 1.3887045877498179, "grad_norm": 0.24079500138759613, "learning_rate": 2.5858453157714747e-06, "loss": 0.0147, "step": 171630 }, { "epoch": 1.3887855004450198, "grad_norm": 0.23886531591415405, "learning_rate": 2.5852270015745427e-06, "loss": 0.011, "step": 171640 }, { "epoch": 1.3888664131402217, "grad_norm": 0.24745206534862518, "learning_rate": 2.5846087355351624e-06, "loss": 0.0278, "step": 171650 }, { "epoch": 1.3889473258354235, "grad_norm": 0.835570752620697, "learning_rate": 2.5839905176656632e-06, "loss": 0.0281, "step": 171660 }, { "epoch": 1.3890282385306254, "grad_norm": 0.3750862777233124, "learning_rate": 2.583372347978369e-06, "loss": 0.0297, "step": 171670 }, { "epoch": 1.3891091512258273, "grad_norm": 0.4373420476913452, "learning_rate": 2.582754226485613e-06, "loss": 0.0192, "step": 171680 }, { "epoch": 1.3891900639210293, "grad_norm": 0.4042925536632538, "learning_rate": 2.5821361531997215e-06, "loss": 0.0307, "step": 171690 }, { "epoch": 1.3892709766162312, "grad_norm": 0.20478101074695587, "learning_rate": 2.5815181281330205e-06, "loss": 0.0109, "step": 171700 }, { "epoch": 1.389351889311433, "grad_norm": 0.4282006323337555, "learning_rate": 2.5809001512978337e-06, "loss": 0.0163, "step": 171710 }, { "epoch": 1.3894328020066349, "grad_norm": 0.44481945037841797, "learning_rate": 2.580282222706487e-06, "loss": 0.0152, "step": 171720 }, { "epoch": 1.3895137147018368, "grad_norm": 0.5780158042907715, "learning_rate": 2.5796643423713038e-06, "loss": 0.0167, "step": 171730 }, { "epoch": 1.3895946273970385, "grad_norm": 0.4219359755516052, "learning_rate": 2.5790465103046046e-06, "loss": 0.02, "step": 171740 }, { "epoch": 1.3896755400922405, "grad_norm": 0.5181965827941895, "learning_rate": 2.5784287265187107e-06, "loss": 0.0302, "step": 171750 }, { "epoch": 1.3897564527874424, "grad_norm": 0.4897870123386383, "learning_rate": 2.5778109910259474e-06, "loss": 0.0369, "step": 171760 }, { "epoch": 1.3898373654826441, "grad_norm": 0.40093278884887695, "learning_rate": 2.5771933038386287e-06, "loss": 0.0233, "step": 171770 }, { "epoch": 1.389918278177846, "grad_norm": 0.3979991376399994, "learning_rate": 2.576575664969072e-06, "loss": 0.0233, "step": 171780 }, { "epoch": 1.389999190873048, "grad_norm": 0.36447829008102417, "learning_rate": 2.575958074429602e-06, "loss": 0.0217, "step": 171790 }, { "epoch": 1.3900801035682497, "grad_norm": 0.3581709563732147, "learning_rate": 2.5753405322325294e-06, "loss": 0.0262, "step": 171800 }, { "epoch": 1.3901610162634517, "grad_norm": 0.1426195353269577, "learning_rate": 2.574723038390169e-06, "loss": 0.0166, "step": 171810 }, { "epoch": 1.3902419289586536, "grad_norm": 0.3836055099964142, "learning_rate": 2.5741055929148405e-06, "loss": 0.0173, "step": 171820 }, { "epoch": 1.3903228416538556, "grad_norm": 0.3188350796699524, "learning_rate": 2.573488195818852e-06, "loss": 0.0303, "step": 171830 }, { "epoch": 1.3904037543490575, "grad_norm": 0.24413922429084778, "learning_rate": 2.5728708471145203e-06, "loss": 0.0244, "step": 171840 }, { "epoch": 1.3904846670442592, "grad_norm": 0.6056327223777771, "learning_rate": 2.5722535468141572e-06, "loss": 0.0241, "step": 171850 }, { "epoch": 1.3905655797394612, "grad_norm": 0.40890851616859436, "learning_rate": 2.5716362949300687e-06, "loss": 0.0186, "step": 171860 }, { "epoch": 1.390646492434663, "grad_norm": 0.1318320482969284, "learning_rate": 2.571019091474569e-06, "loss": 0.0193, "step": 171870 }, { "epoch": 1.3907274051298648, "grad_norm": 0.24218033254146576, "learning_rate": 2.5704019364599688e-06, "loss": 0.0241, "step": 171880 }, { "epoch": 1.3908083178250668, "grad_norm": 0.3505050539970398, "learning_rate": 2.5697848298985684e-06, "loss": 0.0183, "step": 171890 }, { "epoch": 1.3908892305202687, "grad_norm": 0.31060874462127686, "learning_rate": 2.569167771802681e-06, "loss": 0.0181, "step": 171900 }, { "epoch": 1.3909701432154704, "grad_norm": 0.1146259754896164, "learning_rate": 2.5685507621846106e-06, "loss": 0.0186, "step": 171910 }, { "epoch": 1.3910510559106724, "grad_norm": 0.20912683010101318, "learning_rate": 2.5679338010566624e-06, "loss": 0.0291, "step": 171920 }, { "epoch": 1.3911319686058743, "grad_norm": 0.4513378143310547, "learning_rate": 2.5673168884311396e-06, "loss": 0.0298, "step": 171930 }, { "epoch": 1.391212881301076, "grad_norm": 0.4401272237300873, "learning_rate": 2.5667000243203466e-06, "loss": 0.0188, "step": 171940 }, { "epoch": 1.391293793996278, "grad_norm": 0.13030003011226654, "learning_rate": 2.5660832087365845e-06, "loss": 0.0115, "step": 171950 }, { "epoch": 1.39137470669148, "grad_norm": 0.9235125780105591, "learning_rate": 2.565466441692155e-06, "loss": 0.03, "step": 171960 }, { "epoch": 1.3914556193866818, "grad_norm": 0.47347766160964966, "learning_rate": 2.564849723199355e-06, "loss": 0.0296, "step": 171970 }, { "epoch": 1.3915365320818838, "grad_norm": 0.5260983109474182, "learning_rate": 2.5642330532704905e-06, "loss": 0.0201, "step": 171980 }, { "epoch": 1.3916174447770855, "grad_norm": 0.32886090874671936, "learning_rate": 2.563616431917854e-06, "loss": 0.0275, "step": 171990 }, { "epoch": 1.3916983574722874, "grad_norm": 0.42040327191352844, "learning_rate": 2.5629998591537427e-06, "loss": 0.0129, "step": 172000 }, { "epoch": 1.3917792701674894, "grad_norm": 0.5907443165779114, "learning_rate": 2.5623833349904574e-06, "loss": 0.022, "step": 172010 }, { "epoch": 1.391860182862691, "grad_norm": 0.3664668798446655, "learning_rate": 2.561766859440289e-06, "loss": 0.0211, "step": 172020 }, { "epoch": 1.391941095557893, "grad_norm": 0.17959849536418915, "learning_rate": 2.5611504325155317e-06, "loss": 0.0275, "step": 172030 }, { "epoch": 1.392022008253095, "grad_norm": 1.4271777868270874, "learning_rate": 2.560534054228484e-06, "loss": 0.0247, "step": 172040 }, { "epoch": 1.3921029209482967, "grad_norm": 0.33663222193717957, "learning_rate": 2.5599177245914308e-06, "loss": 0.0204, "step": 172050 }, { "epoch": 1.3921838336434986, "grad_norm": 0.2955470085144043, "learning_rate": 2.5593014436166684e-06, "loss": 0.0236, "step": 172060 }, { "epoch": 1.3922647463387006, "grad_norm": 0.12284136563539505, "learning_rate": 2.5586852113164896e-06, "loss": 0.0205, "step": 172070 }, { "epoch": 1.3923456590339023, "grad_norm": 0.41556641459465027, "learning_rate": 2.5580690277031755e-06, "loss": 0.0186, "step": 172080 }, { "epoch": 1.3924265717291042, "grad_norm": 0.3599788546562195, "learning_rate": 2.557452892789022e-06, "loss": 0.0233, "step": 172090 }, { "epoch": 1.3925074844243062, "grad_norm": 0.3935631513595581, "learning_rate": 2.556836806586314e-06, "loss": 0.0321, "step": 172100 }, { "epoch": 1.392588397119508, "grad_norm": 0.5516219735145569, "learning_rate": 2.5562207691073377e-06, "loss": 0.0267, "step": 172110 }, { "epoch": 1.39266930981471, "grad_norm": 0.3271341621875763, "learning_rate": 2.5556047803643793e-06, "loss": 0.0123, "step": 172120 }, { "epoch": 1.3927502225099118, "grad_norm": 0.279479444026947, "learning_rate": 2.554988840369723e-06, "loss": 0.0259, "step": 172130 }, { "epoch": 1.3928311352051137, "grad_norm": 0.23842376470565796, "learning_rate": 2.5543729491356533e-06, "loss": 0.0274, "step": 172140 }, { "epoch": 1.3929120479003156, "grad_norm": 0.35210496187210083, "learning_rate": 2.553757106674452e-06, "loss": 0.0207, "step": 172150 }, { "epoch": 1.3929929605955174, "grad_norm": 0.26055532693862915, "learning_rate": 2.553141312998399e-06, "loss": 0.01, "step": 172160 }, { "epoch": 1.3930738732907193, "grad_norm": 0.2602175772190094, "learning_rate": 2.5525255681197803e-06, "loss": 0.016, "step": 172170 }, { "epoch": 1.3931547859859212, "grad_norm": 0.0010035036830231547, "learning_rate": 2.551909872050871e-06, "loss": 0.0238, "step": 172180 }, { "epoch": 1.393235698681123, "grad_norm": 0.3977634012699127, "learning_rate": 2.551294224803949e-06, "loss": 0.0348, "step": 172190 }, { "epoch": 1.393316611376325, "grad_norm": 0.5706931948661804, "learning_rate": 2.550678626391298e-06, "loss": 0.0301, "step": 172200 }, { "epoch": 1.3933975240715268, "grad_norm": 0.22375932335853577, "learning_rate": 2.5500630768251895e-06, "loss": 0.0154, "step": 172210 }, { "epoch": 1.3934784367667288, "grad_norm": 0.2378961592912674, "learning_rate": 2.5494475761178984e-06, "loss": 0.0241, "step": 172220 }, { "epoch": 1.3935593494619305, "grad_norm": 0.3737694025039673, "learning_rate": 2.548832124281706e-06, "loss": 0.0193, "step": 172230 }, { "epoch": 1.3936402621571324, "grad_norm": 0.19858162105083466, "learning_rate": 2.5482167213288788e-06, "loss": 0.0131, "step": 172240 }, { "epoch": 1.3937211748523344, "grad_norm": 0.5341928005218506, "learning_rate": 2.5476013672716947e-06, "loss": 0.0185, "step": 172250 }, { "epoch": 1.3938020875475363, "grad_norm": 0.48925942182540894, "learning_rate": 2.5469860621224263e-06, "loss": 0.0245, "step": 172260 }, { "epoch": 1.393883000242738, "grad_norm": 0.3677543103694916, "learning_rate": 2.5463708058933383e-06, "loss": 0.0325, "step": 172270 }, { "epoch": 1.39396391293794, "grad_norm": 0.529080867767334, "learning_rate": 2.5457555985967062e-06, "loss": 0.0155, "step": 172280 }, { "epoch": 1.394044825633142, "grad_norm": 0.24531835317611694, "learning_rate": 2.5451404402447986e-06, "loss": 0.0129, "step": 172290 }, { "epoch": 1.3941257383283436, "grad_norm": 0.26485562324523926, "learning_rate": 2.5445253308498817e-06, "loss": 0.024, "step": 172300 }, { "epoch": 1.3942066510235456, "grad_norm": 0.3686375021934509, "learning_rate": 2.5439102704242234e-06, "loss": 0.0213, "step": 172310 }, { "epoch": 1.3942875637187475, "grad_norm": 0.4694247841835022, "learning_rate": 2.54329525898009e-06, "loss": 0.0246, "step": 172320 }, { "epoch": 1.3943684764139492, "grad_norm": 0.27418428659439087, "learning_rate": 2.5426802965297466e-06, "loss": 0.0126, "step": 172330 }, { "epoch": 1.3944493891091512, "grad_norm": 0.3892434537410736, "learning_rate": 2.5420653830854564e-06, "loss": 0.0201, "step": 172340 }, { "epoch": 1.3945303018043531, "grad_norm": 0.20238831639289856, "learning_rate": 2.5414505186594833e-06, "loss": 0.0167, "step": 172350 }, { "epoch": 1.394611214499555, "grad_norm": 0.24968884885311127, "learning_rate": 2.5408357032640895e-06, "loss": 0.0299, "step": 172360 }, { "epoch": 1.394692127194757, "grad_norm": 0.1539337933063507, "learning_rate": 2.540220936911536e-06, "loss": 0.0201, "step": 172370 }, { "epoch": 1.3947730398899587, "grad_norm": 0.20495222508907318, "learning_rate": 2.5396062196140815e-06, "loss": 0.0206, "step": 172380 }, { "epoch": 1.3948539525851607, "grad_norm": 0.2997957766056061, "learning_rate": 2.5389915513839906e-06, "loss": 0.0297, "step": 172390 }, { "epoch": 1.3949348652803626, "grad_norm": 0.2633841037750244, "learning_rate": 2.5383769322335163e-06, "loss": 0.0149, "step": 172400 }, { "epoch": 1.3950157779755643, "grad_norm": 0.4802071750164032, "learning_rate": 2.537762362174914e-06, "loss": 0.028, "step": 172410 }, { "epoch": 1.3950966906707662, "grad_norm": 0.30315345525741577, "learning_rate": 2.537147841220449e-06, "loss": 0.0167, "step": 172420 }, { "epoch": 1.3951776033659682, "grad_norm": 0.38014569878578186, "learning_rate": 2.536533369382368e-06, "loss": 0.0154, "step": 172430 }, { "epoch": 1.39525851606117, "grad_norm": 0.15264995396137238, "learning_rate": 2.5359189466729262e-06, "loss": 0.0178, "step": 172440 }, { "epoch": 1.3953394287563718, "grad_norm": 0.27437666058540344, "learning_rate": 2.5353045731043833e-06, "loss": 0.0193, "step": 172450 }, { "epoch": 1.3954203414515738, "grad_norm": 0.16503645479679108, "learning_rate": 2.5346902486889836e-06, "loss": 0.0146, "step": 172460 }, { "epoch": 1.3955012541467755, "grad_norm": 0.2623527944087982, "learning_rate": 2.534075973438984e-06, "loss": 0.0126, "step": 172470 }, { "epoch": 1.3955821668419774, "grad_norm": 0.16380077600479126, "learning_rate": 2.5334617473666346e-06, "loss": 0.0144, "step": 172480 }, { "epoch": 1.3956630795371794, "grad_norm": 0.3565550148487091, "learning_rate": 2.5328475704841828e-06, "loss": 0.0204, "step": 172490 }, { "epoch": 1.3957439922323813, "grad_norm": 0.39384350180625916, "learning_rate": 2.532233442803877e-06, "loss": 0.0181, "step": 172500 }, { "epoch": 1.3958249049275833, "grad_norm": 0.3745685815811157, "learning_rate": 2.531619364337966e-06, "loss": 0.0156, "step": 172510 }, { "epoch": 1.395905817622785, "grad_norm": 0.6052621006965637, "learning_rate": 2.5310053350986962e-06, "loss": 0.028, "step": 172520 }, { "epoch": 1.395986730317987, "grad_norm": 0.385375440120697, "learning_rate": 2.5303913550983126e-06, "loss": 0.0096, "step": 172530 }, { "epoch": 1.3960676430131889, "grad_norm": 0.25907012820243835, "learning_rate": 2.52977742434906e-06, "loss": 0.0229, "step": 172540 }, { "epoch": 1.3961485557083906, "grad_norm": 0.16903743147850037, "learning_rate": 2.529163542863181e-06, "loss": 0.0184, "step": 172550 }, { "epoch": 1.3962294684035925, "grad_norm": 0.4375746250152588, "learning_rate": 2.52854971065292e-06, "loss": 0.0235, "step": 172560 }, { "epoch": 1.3963103810987945, "grad_norm": 0.343948096036911, "learning_rate": 2.5279359277305167e-06, "loss": 0.022, "step": 172570 }, { "epoch": 1.3963912937939962, "grad_norm": 0.15740643441677094, "learning_rate": 2.527322194108213e-06, "loss": 0.0181, "step": 172580 }, { "epoch": 1.3964722064891981, "grad_norm": 0.4396930932998657, "learning_rate": 2.526708509798248e-06, "loss": 0.0229, "step": 172590 }, { "epoch": 1.3965531191844, "grad_norm": 0.3372248411178589, "learning_rate": 2.5260948748128582e-06, "loss": 0.0161, "step": 172600 }, { "epoch": 1.3966340318796018, "grad_norm": 0.20588064193725586, "learning_rate": 2.5254812891642875e-06, "loss": 0.0253, "step": 172610 }, { "epoch": 1.3967149445748037, "grad_norm": 0.07501764595508575, "learning_rate": 2.524867752864767e-06, "loss": 0.0138, "step": 172620 }, { "epoch": 1.3967958572700057, "grad_norm": 0.3649185299873352, "learning_rate": 2.5242542659265318e-06, "loss": 0.0222, "step": 172630 }, { "epoch": 1.3968767699652076, "grad_norm": 0.6604465842247009, "learning_rate": 2.5236408283618226e-06, "loss": 0.0258, "step": 172640 }, { "epoch": 1.3969576826604095, "grad_norm": 0.22002652287483215, "learning_rate": 2.5230274401828653e-06, "loss": 0.0159, "step": 172650 }, { "epoch": 1.3970385953556113, "grad_norm": 0.48626938462257385, "learning_rate": 2.522414101401899e-06, "loss": 0.0217, "step": 172660 }, { "epoch": 1.3971195080508132, "grad_norm": 0.49507883191108704, "learning_rate": 2.5218008120311523e-06, "loss": 0.0165, "step": 172670 }, { "epoch": 1.3972004207460151, "grad_norm": 0.3948270082473755, "learning_rate": 2.521187572082857e-06, "loss": 0.0132, "step": 172680 }, { "epoch": 1.3972813334412169, "grad_norm": 0.7392638325691223, "learning_rate": 2.5205743815692423e-06, "loss": 0.0216, "step": 172690 }, { "epoch": 1.3973622461364188, "grad_norm": 0.1589447408914566, "learning_rate": 2.5199612405025375e-06, "loss": 0.0222, "step": 172700 }, { "epoch": 1.3974431588316207, "grad_norm": 0.05795959010720253, "learning_rate": 2.51934814889497e-06, "loss": 0.0158, "step": 172710 }, { "epoch": 1.3975240715268225, "grad_norm": 0.41531533002853394, "learning_rate": 2.5187351067587672e-06, "loss": 0.0184, "step": 172720 }, { "epoch": 1.3976049842220244, "grad_norm": 0.27990055084228516, "learning_rate": 2.518122114106154e-06, "loss": 0.0165, "step": 172730 }, { "epoch": 1.3976858969172263, "grad_norm": 0.26274749636650085, "learning_rate": 2.517509170949356e-06, "loss": 0.0182, "step": 172740 }, { "epoch": 1.397766809612428, "grad_norm": 0.5088269710540771, "learning_rate": 2.5168962773005967e-06, "loss": 0.02, "step": 172750 }, { "epoch": 1.39784772230763, "grad_norm": 0.3483021557331085, "learning_rate": 2.5162834331720996e-06, "loss": 0.015, "step": 172760 }, { "epoch": 1.397928635002832, "grad_norm": 0.26989278197288513, "learning_rate": 2.5156706385760852e-06, "loss": 0.0182, "step": 172770 }, { "epoch": 1.3980095476980339, "grad_norm": 0.20063838362693787, "learning_rate": 2.515057893524776e-06, "loss": 0.0205, "step": 172780 }, { "epoch": 1.3980904603932358, "grad_norm": 0.3040473759174347, "learning_rate": 2.5144451980303893e-06, "loss": 0.0196, "step": 172790 }, { "epoch": 1.3981713730884375, "grad_norm": 0.1736382097005844, "learning_rate": 2.5138325521051493e-06, "loss": 0.0189, "step": 172800 }, { "epoch": 1.3982522857836395, "grad_norm": 0.28795933723449707, "learning_rate": 2.513219955761269e-06, "loss": 0.0228, "step": 172810 }, { "epoch": 1.3983331984788414, "grad_norm": 0.48987579345703125, "learning_rate": 2.5126074090109644e-06, "loss": 0.0285, "step": 172820 }, { "epoch": 1.3984141111740431, "grad_norm": 0.20669536292552948, "learning_rate": 2.511994911866456e-06, "loss": 0.011, "step": 172830 }, { "epoch": 1.398495023869245, "grad_norm": 0.4300515055656433, "learning_rate": 2.5113824643399585e-06, "loss": 0.0253, "step": 172840 }, { "epoch": 1.398575936564447, "grad_norm": 0.5250355005264282, "learning_rate": 2.5107700664436807e-06, "loss": 0.0208, "step": 172850 }, { "epoch": 1.3986568492596487, "grad_norm": 0.43934300541877747, "learning_rate": 2.5101577181898407e-06, "loss": 0.0173, "step": 172860 }, { "epoch": 1.3987377619548507, "grad_norm": 0.5946778655052185, "learning_rate": 2.5095454195906483e-06, "loss": 0.024, "step": 172870 }, { "epoch": 1.3988186746500526, "grad_norm": 0.2915051579475403, "learning_rate": 2.5089331706583153e-06, "loss": 0.0154, "step": 172880 }, { "epoch": 1.3988995873452545, "grad_norm": 0.07571449130773544, "learning_rate": 2.508320971405051e-06, "loss": 0.0169, "step": 172890 }, { "epoch": 1.3989805000404563, "grad_norm": 0.35945287346839905, "learning_rate": 2.507708821843065e-06, "loss": 0.0238, "step": 172900 }, { "epoch": 1.3990614127356582, "grad_norm": 0.3947470784187317, "learning_rate": 2.5070967219845656e-06, "loss": 0.0181, "step": 172910 }, { "epoch": 1.3991423254308601, "grad_norm": 0.23535685241222382, "learning_rate": 2.506484671841758e-06, "loss": 0.0204, "step": 172920 }, { "epoch": 1.399223238126062, "grad_norm": 0.1775437593460083, "learning_rate": 2.5058726714268505e-06, "loss": 0.0121, "step": 172930 }, { "epoch": 1.3993041508212638, "grad_norm": 0.2279014140367508, "learning_rate": 2.5052607207520473e-06, "loss": 0.0093, "step": 172940 }, { "epoch": 1.3993850635164657, "grad_norm": 0.5227178335189819, "learning_rate": 2.504648819829552e-06, "loss": 0.021, "step": 172950 }, { "epoch": 1.3994659762116677, "grad_norm": 0.5567004680633545, "learning_rate": 2.5040369686715683e-06, "loss": 0.03, "step": 172960 }, { "epoch": 1.3995468889068694, "grad_norm": 0.27007418870925903, "learning_rate": 2.503425167290298e-06, "loss": 0.0192, "step": 172970 }, { "epoch": 1.3996278016020713, "grad_norm": 0.05856010690331459, "learning_rate": 2.502813415697942e-06, "loss": 0.0131, "step": 172980 }, { "epoch": 1.3997087142972733, "grad_norm": 0.3832435607910156, "learning_rate": 2.502201713906701e-06, "loss": 0.0128, "step": 172990 }, { "epoch": 1.399789626992475, "grad_norm": 0.26850226521492004, "learning_rate": 2.5015900619287735e-06, "loss": 0.0167, "step": 173000 }, { "epoch": 1.399870539687677, "grad_norm": 0.3648381531238556, "learning_rate": 2.5009784597763563e-06, "loss": 0.0135, "step": 173010 }, { "epoch": 1.3999514523828789, "grad_norm": 0.5754572153091431, "learning_rate": 2.50036690746165e-06, "loss": 0.0151, "step": 173020 }, { "epoch": 1.4000323650780808, "grad_norm": 0.49097713828086853, "learning_rate": 2.4997554049968504e-06, "loss": 0.0192, "step": 173030 }, { "epoch": 1.4001132777732828, "grad_norm": 0.2294216752052307, "learning_rate": 2.499143952394147e-06, "loss": 0.0262, "step": 173040 }, { "epoch": 1.4001941904684845, "grad_norm": 0.2998470962047577, "learning_rate": 2.4985325496657396e-06, "loss": 0.0211, "step": 173050 }, { "epoch": 1.4002751031636864, "grad_norm": 0.10746238380670547, "learning_rate": 2.497921196823822e-06, "loss": 0.0177, "step": 173060 }, { "epoch": 1.4003560158588884, "grad_norm": 0.6273729205131531, "learning_rate": 2.49730989388058e-06, "loss": 0.0224, "step": 173070 }, { "epoch": 1.40043692855409, "grad_norm": 0.8130905032157898, "learning_rate": 2.4966986408482115e-06, "loss": 0.0215, "step": 173080 }, { "epoch": 1.400517841249292, "grad_norm": 0.44207262992858887, "learning_rate": 2.4960874377389026e-06, "loss": 0.0177, "step": 173090 }, { "epoch": 1.400598753944494, "grad_norm": 0.47805890440940857, "learning_rate": 2.4954762845648444e-06, "loss": 0.033, "step": 173100 }, { "epoch": 1.4006796666396957, "grad_norm": 0.2107262760400772, "learning_rate": 2.4948651813382247e-06, "loss": 0.0162, "step": 173110 }, { "epoch": 1.4007605793348976, "grad_norm": 0.25252071022987366, "learning_rate": 2.49425412807123e-06, "loss": 0.0184, "step": 173120 }, { "epoch": 1.4008414920300996, "grad_norm": 0.48544856905937195, "learning_rate": 2.493643124776047e-06, "loss": 0.0226, "step": 173130 }, { "epoch": 1.4009224047253013, "grad_norm": 0.3381885290145874, "learning_rate": 2.49303217146486e-06, "loss": 0.0181, "step": 173140 }, { "epoch": 1.4010033174205032, "grad_norm": 0.4191887676715851, "learning_rate": 2.492421268149855e-06, "loss": 0.0115, "step": 173150 }, { "epoch": 1.4010842301157052, "grad_norm": 0.3439503610134125, "learning_rate": 2.4918104148432136e-06, "loss": 0.0117, "step": 173160 }, { "epoch": 1.401165142810907, "grad_norm": 0.4523918628692627, "learning_rate": 2.4911996115571185e-06, "loss": 0.0322, "step": 173170 }, { "epoch": 1.401246055506109, "grad_norm": 0.08537928014993668, "learning_rate": 2.4905888583037506e-06, "loss": 0.019, "step": 173180 }, { "epoch": 1.4013269682013108, "grad_norm": 0.39285656809806824, "learning_rate": 2.4899781550952907e-06, "loss": 0.0131, "step": 173190 }, { "epoch": 1.4014078808965127, "grad_norm": 0.263497918844223, "learning_rate": 2.4893675019439157e-06, "loss": 0.0332, "step": 173200 }, { "epoch": 1.4014887935917146, "grad_norm": 0.5884730219841003, "learning_rate": 2.4887568988618072e-06, "loss": 0.0131, "step": 173210 }, { "epoch": 1.4015697062869164, "grad_norm": 0.5925459265708923, "learning_rate": 2.488146345861143e-06, "loss": 0.0134, "step": 173220 }, { "epoch": 1.4016506189821183, "grad_norm": 0.1799398958683014, "learning_rate": 2.4875358429540934e-06, "loss": 0.017, "step": 173230 }, { "epoch": 1.4017315316773202, "grad_norm": 0.505439043045044, "learning_rate": 2.4869253901528393e-06, "loss": 0.0183, "step": 173240 }, { "epoch": 1.401812444372522, "grad_norm": 0.32464879751205444, "learning_rate": 2.4863149874695553e-06, "loss": 0.0113, "step": 173250 }, { "epoch": 1.4018933570677239, "grad_norm": 0.7002299427986145, "learning_rate": 2.4857046349164078e-06, "loss": 0.0262, "step": 173260 }, { "epoch": 1.4019742697629258, "grad_norm": 0.33556777238845825, "learning_rate": 2.4850943325055752e-06, "loss": 0.0225, "step": 173270 }, { "epoch": 1.4020551824581275, "grad_norm": 0.44566360116004944, "learning_rate": 2.4844840802492275e-06, "loss": 0.0238, "step": 173280 }, { "epoch": 1.4021360951533295, "grad_norm": 0.5371233820915222, "learning_rate": 2.483873878159534e-06, "loss": 0.0246, "step": 173290 }, { "epoch": 1.4022170078485314, "grad_norm": 0.4766843318939209, "learning_rate": 2.4832637262486642e-06, "loss": 0.0163, "step": 173300 }, { "epoch": 1.4022979205437334, "grad_norm": 0.4181113541126251, "learning_rate": 2.4826536245287864e-06, "loss": 0.0253, "step": 173310 }, { "epoch": 1.4023788332389353, "grad_norm": 0.318532258272171, "learning_rate": 2.4820435730120678e-06, "loss": 0.0349, "step": 173320 }, { "epoch": 1.402459745934137, "grad_norm": 0.3719923794269562, "learning_rate": 2.481433571710674e-06, "loss": 0.0288, "step": 173330 }, { "epoch": 1.402540658629339, "grad_norm": 0.3896245062351227, "learning_rate": 2.4808236206367716e-06, "loss": 0.0223, "step": 173340 }, { "epoch": 1.402621571324541, "grad_norm": 0.37725040316581726, "learning_rate": 2.4802137198025234e-06, "loss": 0.0192, "step": 173350 }, { "epoch": 1.4027024840197426, "grad_norm": 0.6062941551208496, "learning_rate": 2.479603869220093e-06, "loss": 0.024, "step": 173360 }, { "epoch": 1.4027833967149446, "grad_norm": 0.44720616936683655, "learning_rate": 2.4789940689016424e-06, "loss": 0.0309, "step": 173370 }, { "epoch": 1.4028643094101465, "grad_norm": 0.452594518661499, "learning_rate": 2.478384318859333e-06, "loss": 0.0202, "step": 173380 }, { "epoch": 1.4029452221053482, "grad_norm": 0.4801548719406128, "learning_rate": 2.4777746191053253e-06, "loss": 0.0298, "step": 173390 }, { "epoch": 1.4030261348005502, "grad_norm": 0.22824256122112274, "learning_rate": 2.4771649696517762e-06, "loss": 0.0108, "step": 173400 }, { "epoch": 1.403107047495752, "grad_norm": 0.3311469256877899, "learning_rate": 2.4765553705108496e-06, "loss": 0.0139, "step": 173410 }, { "epoch": 1.403187960190954, "grad_norm": 0.07478916645050049, "learning_rate": 2.4759458216946957e-06, "loss": 0.0175, "step": 173420 }, { "epoch": 1.4032688728861558, "grad_norm": 0.36847594380378723, "learning_rate": 2.4753363232154746e-06, "loss": 0.0136, "step": 173430 }, { "epoch": 1.4033497855813577, "grad_norm": 0.6177654266357422, "learning_rate": 2.4747268750853436e-06, "loss": 0.0204, "step": 173440 }, { "epoch": 1.4034306982765596, "grad_norm": 0.3256952464580536, "learning_rate": 2.47411747731645e-06, "loss": 0.017, "step": 173450 }, { "epoch": 1.4035116109717616, "grad_norm": 0.3864206075668335, "learning_rate": 2.473508129920953e-06, "loss": 0.022, "step": 173460 }, { "epoch": 1.4035925236669633, "grad_norm": 0.01358706783503294, "learning_rate": 2.4728988329110043e-06, "loss": 0.0201, "step": 173470 }, { "epoch": 1.4036734363621652, "grad_norm": 0.22974789142608643, "learning_rate": 2.4722895862987495e-06, "loss": 0.0376, "step": 173480 }, { "epoch": 1.4037543490573672, "grad_norm": 0.46491318941116333, "learning_rate": 2.4716803900963443e-06, "loss": 0.0184, "step": 173490 }, { "epoch": 1.403835261752569, "grad_norm": 0.26681599020957947, "learning_rate": 2.4710712443159367e-06, "loss": 0.0207, "step": 173500 }, { "epoch": 1.4039161744477708, "grad_norm": 0.20835904777050018, "learning_rate": 2.4704621489696747e-06, "loss": 0.028, "step": 173510 }, { "epoch": 1.4039970871429728, "grad_norm": 0.3445248305797577, "learning_rate": 2.4698531040697044e-06, "loss": 0.0213, "step": 173520 }, { "epoch": 1.4040779998381745, "grad_norm": 0.47597190737724304, "learning_rate": 2.4692441096281726e-06, "loss": 0.0227, "step": 173530 }, { "epoch": 1.4041589125333764, "grad_norm": 0.3565177619457245, "learning_rate": 2.4686351656572242e-06, "loss": 0.0187, "step": 173540 }, { "epoch": 1.4042398252285784, "grad_norm": 0.747597873210907, "learning_rate": 2.468026272169004e-06, "loss": 0.0285, "step": 173550 }, { "epoch": 1.4043207379237803, "grad_norm": 0.2618791162967682, "learning_rate": 2.4674174291756525e-06, "loss": 0.0203, "step": 173560 }, { "epoch": 1.4044016506189823, "grad_norm": 0.37611278891563416, "learning_rate": 2.4668086366893173e-06, "loss": 0.017, "step": 173570 }, { "epoch": 1.404482563314184, "grad_norm": 0.2594369053840637, "learning_rate": 2.466199894722135e-06, "loss": 0.0202, "step": 173580 }, { "epoch": 1.404563476009386, "grad_norm": 0.23624922335147858, "learning_rate": 2.465591203286244e-06, "loss": 0.0211, "step": 173590 }, { "epoch": 1.4046443887045879, "grad_norm": 0.27140647172927856, "learning_rate": 2.46498256239379e-06, "loss": 0.0156, "step": 173600 }, { "epoch": 1.4047253013997896, "grad_norm": 0.2160889208316803, "learning_rate": 2.4643739720569055e-06, "loss": 0.014, "step": 173610 }, { "epoch": 1.4048062140949915, "grad_norm": 0.3882227838039398, "learning_rate": 2.463765432287727e-06, "loss": 0.0206, "step": 173620 }, { "epoch": 1.4048871267901935, "grad_norm": 0.4154919981956482, "learning_rate": 2.4631569430983966e-06, "loss": 0.0223, "step": 173630 }, { "epoch": 1.4049680394853952, "grad_norm": 0.3388141393661499, "learning_rate": 2.4625485045010415e-06, "loss": 0.0173, "step": 173640 }, { "epoch": 1.405048952180597, "grad_norm": 0.31619930267333984, "learning_rate": 2.461940116507802e-06, "loss": 0.02, "step": 173650 }, { "epoch": 1.405129864875799, "grad_norm": 0.004212082829326391, "learning_rate": 2.4613317791308105e-06, "loss": 0.0245, "step": 173660 }, { "epoch": 1.4052107775710008, "grad_norm": 0.3207913935184479, "learning_rate": 2.4607234923821936e-06, "loss": 0.0182, "step": 173670 }, { "epoch": 1.4052916902662027, "grad_norm": 0.4390455484390259, "learning_rate": 2.460115256274088e-06, "loss": 0.0169, "step": 173680 }, { "epoch": 1.4053726029614046, "grad_norm": 0.34422802925109863, "learning_rate": 2.4595070708186213e-06, "loss": 0.0158, "step": 173690 }, { "epoch": 1.4054535156566066, "grad_norm": 0.356723815202713, "learning_rate": 2.458898936027923e-06, "loss": 0.0248, "step": 173700 }, { "epoch": 1.4055344283518085, "grad_norm": 0.45065024495124817, "learning_rate": 2.4582908519141207e-06, "loss": 0.011, "step": 173710 }, { "epoch": 1.4056153410470102, "grad_norm": 0.23208680748939514, "learning_rate": 2.457682818489342e-06, "loss": 0.018, "step": 173720 }, { "epoch": 1.4056962537422122, "grad_norm": 0.3150193989276886, "learning_rate": 2.457074835765712e-06, "loss": 0.023, "step": 173730 }, { "epoch": 1.4057771664374141, "grad_norm": 0.6343648433685303, "learning_rate": 2.456466903755357e-06, "loss": 0.0259, "step": 173740 }, { "epoch": 1.4058580791326158, "grad_norm": 0.1896401196718216, "learning_rate": 2.455859022470397e-06, "loss": 0.0169, "step": 173750 }, { "epoch": 1.4059389918278178, "grad_norm": 0.5322706699371338, "learning_rate": 2.455251191922962e-06, "loss": 0.0216, "step": 173760 }, { "epoch": 1.4060199045230197, "grad_norm": 0.06400437653064728, "learning_rate": 2.454643412125168e-06, "loss": 0.0168, "step": 173770 }, { "epoch": 1.4061008172182214, "grad_norm": 0.3937208652496338, "learning_rate": 2.4540356830891355e-06, "loss": 0.022, "step": 173780 }, { "epoch": 1.4061817299134234, "grad_norm": 0.5109426379203796, "learning_rate": 2.4534280048269905e-06, "loss": 0.0271, "step": 173790 }, { "epoch": 1.4062626426086253, "grad_norm": 0.22937284409999847, "learning_rate": 2.452820377350846e-06, "loss": 0.0322, "step": 173800 }, { "epoch": 1.406343555303827, "grad_norm": 0.5016211271286011, "learning_rate": 2.4522128006728197e-06, "loss": 0.0186, "step": 173810 }, { "epoch": 1.406424467999029, "grad_norm": 0.14901885390281677, "learning_rate": 2.4516052748050343e-06, "loss": 0.0158, "step": 173820 }, { "epoch": 1.406505380694231, "grad_norm": 0.31107786297798157, "learning_rate": 2.450997799759598e-06, "loss": 0.0103, "step": 173830 }, { "epoch": 1.4065862933894329, "grad_norm": 0.42021241784095764, "learning_rate": 2.4503903755486315e-06, "loss": 0.0261, "step": 173840 }, { "epoch": 1.4066672060846348, "grad_norm": 0.522527277469635, "learning_rate": 2.4497830021842485e-06, "loss": 0.0284, "step": 173850 }, { "epoch": 1.4067481187798365, "grad_norm": 0.3743305504322052, "learning_rate": 2.4491756796785553e-06, "loss": 0.0125, "step": 173860 }, { "epoch": 1.4068290314750385, "grad_norm": 0.36508429050445557, "learning_rate": 2.4485684080436704e-06, "loss": 0.0414, "step": 173870 }, { "epoch": 1.4069099441702404, "grad_norm": 0.5312705636024475, "learning_rate": 2.4479611872917046e-06, "loss": 0.0154, "step": 173880 }, { "epoch": 1.4069908568654421, "grad_norm": 0.16167429089546204, "learning_rate": 2.4473540174347617e-06, "loss": 0.0137, "step": 173890 }, { "epoch": 1.407071769560644, "grad_norm": 0.5098168849945068, "learning_rate": 2.446746898484955e-06, "loss": 0.0133, "step": 173900 }, { "epoch": 1.407152682255846, "grad_norm": 0.6384080648422241, "learning_rate": 2.4461398304543917e-06, "loss": 0.0198, "step": 173910 }, { "epoch": 1.4072335949510477, "grad_norm": 0.34792155027389526, "learning_rate": 2.445532813355178e-06, "loss": 0.0225, "step": 173920 }, { "epoch": 1.4073145076462497, "grad_norm": 0.22339197993278503, "learning_rate": 2.44492584719942e-06, "loss": 0.0169, "step": 173930 }, { "epoch": 1.4073954203414516, "grad_norm": 0.20650264620780945, "learning_rate": 2.4443189319992217e-06, "loss": 0.0165, "step": 173940 }, { "epoch": 1.4074763330366533, "grad_norm": 0.6439015865325928, "learning_rate": 2.4437120677666866e-06, "loss": 0.0211, "step": 173950 }, { "epoch": 1.4075572457318553, "grad_norm": 0.10784268379211426, "learning_rate": 2.4431052545139183e-06, "loss": 0.0151, "step": 173960 }, { "epoch": 1.4076381584270572, "grad_norm": 0.4320566952228546, "learning_rate": 2.442498492253016e-06, "loss": 0.0207, "step": 173970 }, { "epoch": 1.4077190711222591, "grad_norm": 0.25944483280181885, "learning_rate": 2.441891780996086e-06, "loss": 0.0165, "step": 173980 }, { "epoch": 1.407799983817461, "grad_norm": 0.43979400396347046, "learning_rate": 2.4412851207552214e-06, "loss": 0.0106, "step": 173990 }, { "epoch": 1.4078808965126628, "grad_norm": 0.2437053918838501, "learning_rate": 2.440678511542522e-06, "loss": 0.0234, "step": 174000 }, { "epoch": 1.4079618092078647, "grad_norm": 0.6117781400680542, "learning_rate": 2.4400719533700902e-06, "loss": 0.0245, "step": 174010 }, { "epoch": 1.4080427219030667, "grad_norm": 0.34072670340538025, "learning_rate": 2.4394654462500174e-06, "loss": 0.0254, "step": 174020 }, { "epoch": 1.4081236345982684, "grad_norm": 0.20771309733390808, "learning_rate": 2.438858990194399e-06, "loss": 0.0189, "step": 174030 }, { "epoch": 1.4082045472934703, "grad_norm": 0.32351845502853394, "learning_rate": 2.438252585215335e-06, "loss": 0.0112, "step": 174040 }, { "epoch": 1.4082854599886723, "grad_norm": 0.6736997961997986, "learning_rate": 2.437646231324911e-06, "loss": 0.0217, "step": 174050 }, { "epoch": 1.408366372683874, "grad_norm": 0.23332761228084564, "learning_rate": 2.4370399285352254e-06, "loss": 0.0158, "step": 174060 }, { "epoch": 1.408447285379076, "grad_norm": 0.1266447752714157, "learning_rate": 2.4364336768583703e-06, "loss": 0.0218, "step": 174070 }, { "epoch": 1.4085281980742779, "grad_norm": 0.45630860328674316, "learning_rate": 2.4358274763064288e-06, "loss": 0.0242, "step": 174080 }, { "epoch": 1.4086091107694798, "grad_norm": 0.1329268515110016, "learning_rate": 2.4352213268914976e-06, "loss": 0.0106, "step": 174090 }, { "epoch": 1.4086900234646815, "grad_norm": 0.5368356704711914, "learning_rate": 2.4346152286256623e-06, "loss": 0.017, "step": 174100 }, { "epoch": 1.4087709361598835, "grad_norm": 0.3957541584968567, "learning_rate": 2.434009181521011e-06, "loss": 0.0147, "step": 174110 }, { "epoch": 1.4088518488550854, "grad_norm": 0.27628347277641296, "learning_rate": 2.4334031855896297e-06, "loss": 0.0212, "step": 174120 }, { "epoch": 1.4089327615502873, "grad_norm": 0.22090862691402435, "learning_rate": 2.432797240843603e-06, "loss": 0.0238, "step": 174130 }, { "epoch": 1.409013674245489, "grad_norm": 0.2731417119503021, "learning_rate": 2.4321913472950155e-06, "loss": 0.0181, "step": 174140 }, { "epoch": 1.409094586940691, "grad_norm": 0.16639062762260437, "learning_rate": 2.4315855049559513e-06, "loss": 0.0218, "step": 174150 }, { "epoch": 1.409175499635893, "grad_norm": 0.4069221019744873, "learning_rate": 2.430979713838491e-06, "loss": 0.02, "step": 174160 }, { "epoch": 1.4092564123310947, "grad_norm": 0.8042177557945251, "learning_rate": 2.430373973954718e-06, "loss": 0.0256, "step": 174170 }, { "epoch": 1.4093373250262966, "grad_norm": 0.21787795424461365, "learning_rate": 2.4297682853167108e-06, "loss": 0.019, "step": 174180 }, { "epoch": 1.4094182377214985, "grad_norm": 0.522162675857544, "learning_rate": 2.4291626479365475e-06, "loss": 0.0182, "step": 174190 }, { "epoch": 1.4094991504167003, "grad_norm": 0.20182664692401886, "learning_rate": 2.4285570618263115e-06, "loss": 0.0203, "step": 174200 }, { "epoch": 1.4095800631119022, "grad_norm": 0.20063236355781555, "learning_rate": 2.4279515269980746e-06, "loss": 0.0257, "step": 174210 }, { "epoch": 1.4096609758071041, "grad_norm": 0.4843389689922333, "learning_rate": 2.427346043463913e-06, "loss": 0.0173, "step": 174220 }, { "epoch": 1.409741888502306, "grad_norm": 0.3549833595752716, "learning_rate": 2.4267406112359063e-06, "loss": 0.0168, "step": 174230 }, { "epoch": 1.409822801197508, "grad_norm": 0.3185743987560272, "learning_rate": 2.426135230326123e-06, "loss": 0.0226, "step": 174240 }, { "epoch": 1.4099037138927097, "grad_norm": 0.5121272206306458, "learning_rate": 2.4255299007466404e-06, "loss": 0.0226, "step": 174250 }, { "epoch": 1.4099846265879117, "grad_norm": 0.4309743642807007, "learning_rate": 2.4249246225095306e-06, "loss": 0.0209, "step": 174260 }, { "epoch": 1.4100655392831136, "grad_norm": 0.3547872006893158, "learning_rate": 2.4243193956268594e-06, "loss": 0.0181, "step": 174270 }, { "epoch": 1.4101464519783153, "grad_norm": 0.38844460248947144, "learning_rate": 2.4237142201107022e-06, "loss": 0.0252, "step": 174280 }, { "epoch": 1.4102273646735173, "grad_norm": 0.5887465476989746, "learning_rate": 2.4231090959731263e-06, "loss": 0.0206, "step": 174290 }, { "epoch": 1.4103082773687192, "grad_norm": 0.6085376143455505, "learning_rate": 2.4225040232261993e-06, "loss": 0.0188, "step": 174300 }, { "epoch": 1.410389190063921, "grad_norm": 0.3949333429336548, "learning_rate": 2.4218990018819884e-06, "loss": 0.0226, "step": 174310 }, { "epoch": 1.4104701027591229, "grad_norm": 0.13997091352939606, "learning_rate": 2.421294031952559e-06, "loss": 0.0128, "step": 174320 }, { "epoch": 1.4105510154543248, "grad_norm": 0.4863322973251343, "learning_rate": 2.420689113449976e-06, "loss": 0.0124, "step": 174330 }, { "epoch": 1.4106319281495265, "grad_norm": 0.6365358829498291, "learning_rate": 2.420084246386304e-06, "loss": 0.0243, "step": 174340 }, { "epoch": 1.4107128408447285, "grad_norm": 0.9245655536651611, "learning_rate": 2.4194794307736054e-06, "loss": 0.0283, "step": 174350 }, { "epoch": 1.4107937535399304, "grad_norm": 0.39028066396713257, "learning_rate": 2.4188746666239415e-06, "loss": 0.0134, "step": 174360 }, { "epoch": 1.4108746662351324, "grad_norm": 0.45062246918678284, "learning_rate": 2.418269953949373e-06, "loss": 0.0238, "step": 174370 }, { "epoch": 1.4109555789303343, "grad_norm": 0.6645148396492004, "learning_rate": 2.4176652927619587e-06, "loss": 0.0174, "step": 174380 }, { "epoch": 1.411036491625536, "grad_norm": 0.4054917097091675, "learning_rate": 2.417060683073762e-06, "loss": 0.0274, "step": 174390 }, { "epoch": 1.411117404320738, "grad_norm": 0.2225770354270935, "learning_rate": 2.4164561248968353e-06, "loss": 0.0147, "step": 174400 }, { "epoch": 1.41119831701594, "grad_norm": 0.635311484336853, "learning_rate": 2.4158516182432357e-06, "loss": 0.024, "step": 174410 }, { "epoch": 1.4112792297111416, "grad_norm": 0.5167704820632935, "learning_rate": 2.4152471631250236e-06, "loss": 0.0251, "step": 174420 }, { "epoch": 1.4113601424063436, "grad_norm": 0.4712851941585541, "learning_rate": 2.4146427595542483e-06, "loss": 0.0292, "step": 174430 }, { "epoch": 1.4114410551015455, "grad_norm": 0.3685878813266754, "learning_rate": 2.414038407542963e-06, "loss": 0.0153, "step": 174440 }, { "epoch": 1.4115219677967472, "grad_norm": 0.6683821082115173, "learning_rate": 2.4134341071032264e-06, "loss": 0.012, "step": 174450 }, { "epoch": 1.4116028804919492, "grad_norm": 0.14235621690750122, "learning_rate": 2.4128298582470826e-06, "loss": 0.022, "step": 174460 }, { "epoch": 1.411683793187151, "grad_norm": 0.4822183847427368, "learning_rate": 2.4122256609865874e-06, "loss": 0.0174, "step": 174470 }, { "epoch": 1.4117647058823528, "grad_norm": 0.4955366849899292, "learning_rate": 2.411621515333788e-06, "loss": 0.0185, "step": 174480 }, { "epoch": 1.4118456185775548, "grad_norm": 0.14128397405147552, "learning_rate": 2.411017421300733e-06, "loss": 0.0216, "step": 174490 }, { "epoch": 1.4119265312727567, "grad_norm": 0.3465084731578827, "learning_rate": 2.4104133788994705e-06, "loss": 0.0144, "step": 174500 }, { "epoch": 1.4120074439679586, "grad_norm": 0.4956071972846985, "learning_rate": 2.409809388142046e-06, "loss": 0.0262, "step": 174510 }, { "epoch": 1.4120883566631606, "grad_norm": 0.4087103605270386, "learning_rate": 2.4092054490405055e-06, "loss": 0.013, "step": 174520 }, { "epoch": 1.4121692693583623, "grad_norm": 0.5458714365959167, "learning_rate": 2.4086015616068925e-06, "loss": 0.0105, "step": 174530 }, { "epoch": 1.4122501820535642, "grad_norm": 0.3545357882976532, "learning_rate": 2.4079977258532503e-06, "loss": 0.0144, "step": 174540 }, { "epoch": 1.4123310947487662, "grad_norm": 0.32514676451683044, "learning_rate": 2.4073939417916226e-06, "loss": 0.016, "step": 174550 }, { "epoch": 1.4124120074439679, "grad_norm": 0.5071262121200562, "learning_rate": 2.406790209434049e-06, "loss": 0.0222, "step": 174560 }, { "epoch": 1.4124929201391698, "grad_norm": 0.6922034621238708, "learning_rate": 2.4061865287925693e-06, "loss": 0.0247, "step": 174570 }, { "epoch": 1.4125738328343718, "grad_norm": 0.3597404360771179, "learning_rate": 2.4055828998792247e-06, "loss": 0.0286, "step": 174580 }, { "epoch": 1.4126547455295735, "grad_norm": 0.4083443880081177, "learning_rate": 2.404979322706052e-06, "loss": 0.0146, "step": 174590 }, { "epoch": 1.4127356582247754, "grad_norm": 0.289772093296051, "learning_rate": 2.4043757972850854e-06, "loss": 0.0316, "step": 174600 }, { "epoch": 1.4128165709199774, "grad_norm": 0.44382333755493164, "learning_rate": 2.4037723236283687e-06, "loss": 0.031, "step": 174610 }, { "epoch": 1.412897483615179, "grad_norm": 0.19038991630077362, "learning_rate": 2.4031689017479304e-06, "loss": 0.024, "step": 174620 }, { "epoch": 1.412978396310381, "grad_norm": 0.460287481546402, "learning_rate": 2.402565531655804e-06, "loss": 0.0143, "step": 174630 }, { "epoch": 1.413059309005583, "grad_norm": 0.16557274758815765, "learning_rate": 2.4019622133640256e-06, "loss": 0.0123, "step": 174640 }, { "epoch": 1.413140221700785, "grad_norm": 0.4435906708240509, "learning_rate": 2.4013589468846293e-06, "loss": 0.016, "step": 174650 }, { "epoch": 1.4132211343959868, "grad_norm": 0.6981438398361206, "learning_rate": 2.4007557322296386e-06, "loss": 0.0166, "step": 174660 }, { "epoch": 1.4133020470911886, "grad_norm": 0.15566079318523407, "learning_rate": 2.4001525694110893e-06, "loss": 0.0115, "step": 174670 }, { "epoch": 1.4133829597863905, "grad_norm": 0.11315056681632996, "learning_rate": 2.399549458441009e-06, "loss": 0.0079, "step": 174680 }, { "epoch": 1.4134638724815924, "grad_norm": 0.08431535214185715, "learning_rate": 2.3989463993314244e-06, "loss": 0.0164, "step": 174690 }, { "epoch": 1.4135447851767942, "grad_norm": 0.13335178792476654, "learning_rate": 2.398343392094363e-06, "loss": 0.0113, "step": 174700 }, { "epoch": 1.413625697871996, "grad_norm": 0.4287165403366089, "learning_rate": 2.3977404367418506e-06, "loss": 0.0317, "step": 174710 }, { "epoch": 1.413706610567198, "grad_norm": 0.48982980847358704, "learning_rate": 2.397137533285911e-06, "loss": 0.022, "step": 174720 }, { "epoch": 1.4137875232623998, "grad_norm": 0.6266401410102844, "learning_rate": 2.396534681738569e-06, "loss": 0.0151, "step": 174730 }, { "epoch": 1.4138684359576017, "grad_norm": 0.5860128998756409, "learning_rate": 2.395931882111846e-06, "loss": 0.0137, "step": 174740 }, { "epoch": 1.4139493486528036, "grad_norm": 0.225476935505867, "learning_rate": 2.3953291344177645e-06, "loss": 0.017, "step": 174750 }, { "epoch": 1.4140302613480056, "grad_norm": 0.22810198366641998, "learning_rate": 2.394726438668345e-06, "loss": 0.0139, "step": 174760 }, { "epoch": 1.4141111740432073, "grad_norm": 0.3310527205467224, "learning_rate": 2.394123794875606e-06, "loss": 0.0158, "step": 174770 }, { "epoch": 1.4141920867384092, "grad_norm": 0.7563307285308838, "learning_rate": 2.393521203051567e-06, "loss": 0.019, "step": 174780 }, { "epoch": 1.4142729994336112, "grad_norm": 0.40805110335350037, "learning_rate": 2.3929186632082435e-06, "loss": 0.021, "step": 174790 }, { "epoch": 1.4143539121288131, "grad_norm": 0.5055603384971619, "learning_rate": 2.392316175357657e-06, "loss": 0.0264, "step": 174800 }, { "epoch": 1.4144348248240148, "grad_norm": 0.4467560052871704, "learning_rate": 2.3917137395118174e-06, "loss": 0.0121, "step": 174810 }, { "epoch": 1.4145157375192168, "grad_norm": 0.6026027798652649, "learning_rate": 2.391111355682739e-06, "loss": 0.0309, "step": 174820 }, { "epoch": 1.4145966502144187, "grad_norm": 0.43029022216796875, "learning_rate": 2.390509023882439e-06, "loss": 0.0224, "step": 174830 }, { "epoch": 1.4146775629096204, "grad_norm": 0.6365059614181519, "learning_rate": 2.3899067441229296e-06, "loss": 0.0399, "step": 174840 }, { "epoch": 1.4147584756048224, "grad_norm": 0.316035658121109, "learning_rate": 2.389304516416216e-06, "loss": 0.0188, "step": 174850 }, { "epoch": 1.4148393883000243, "grad_norm": 0.10598403215408325, "learning_rate": 2.388702340774315e-06, "loss": 0.0176, "step": 174860 }, { "epoch": 1.414920300995226, "grad_norm": 0.2788539528846741, "learning_rate": 2.3881002172092323e-06, "loss": 0.0164, "step": 174870 }, { "epoch": 1.415001213690428, "grad_norm": 0.39677682518959045, "learning_rate": 2.387498145732977e-06, "loss": 0.0217, "step": 174880 }, { "epoch": 1.41508212638563, "grad_norm": 0.25769954919815063, "learning_rate": 2.3868961263575556e-06, "loss": 0.0268, "step": 174890 }, { "epoch": 1.4151630390808319, "grad_norm": 0.4817765951156616, "learning_rate": 2.3862941590949753e-06, "loss": 0.0144, "step": 174900 }, { "epoch": 1.4152439517760338, "grad_norm": 0.4196981191635132, "learning_rate": 2.3856922439572394e-06, "loss": 0.0192, "step": 174910 }, { "epoch": 1.4153248644712355, "grad_norm": 0.5965697169303894, "learning_rate": 2.3850903809563536e-06, "loss": 0.0245, "step": 174920 }, { "epoch": 1.4154057771664375, "grad_norm": 0.37281325459480286, "learning_rate": 2.384488570104319e-06, "loss": 0.0269, "step": 174930 }, { "epoch": 1.4154866898616394, "grad_norm": 0.3238498270511627, "learning_rate": 2.3838868114131385e-06, "loss": 0.0142, "step": 174940 }, { "epoch": 1.415567602556841, "grad_norm": 0.26645028591156006, "learning_rate": 2.383285104894813e-06, "loss": 0.0206, "step": 174950 }, { "epoch": 1.415648515252043, "grad_norm": 0.1637473702430725, "learning_rate": 2.382683450561342e-06, "loss": 0.0248, "step": 174960 }, { "epoch": 1.415729427947245, "grad_norm": 0.3954162895679474, "learning_rate": 2.3820818484247233e-06, "loss": 0.0217, "step": 174970 }, { "epoch": 1.4158103406424467, "grad_norm": 0.3335916996002197, "learning_rate": 2.381480298496956e-06, "loss": 0.03, "step": 174980 }, { "epoch": 1.4158912533376486, "grad_norm": 0.32103195786476135, "learning_rate": 2.3808788007900362e-06, "loss": 0.0102, "step": 174990 }, { "epoch": 1.4159721660328506, "grad_norm": 0.49304282665252686, "learning_rate": 2.38027735531596e-06, "loss": 0.0258, "step": 175000 }, { "epoch": 1.4160530787280523, "grad_norm": 0.3821543753147125, "learning_rate": 2.379675962086719e-06, "loss": 0.0196, "step": 175010 }, { "epoch": 1.4161339914232542, "grad_norm": 0.31324151158332825, "learning_rate": 2.3790746211143113e-06, "loss": 0.0264, "step": 175020 }, { "epoch": 1.4162149041184562, "grad_norm": 0.33318936824798584, "learning_rate": 2.378473332410729e-06, "loss": 0.0145, "step": 175030 }, { "epoch": 1.4162958168136581, "grad_norm": 0.7569507360458374, "learning_rate": 2.3778720959879574e-06, "loss": 0.0275, "step": 175040 }, { "epoch": 1.41637672950886, "grad_norm": 0.15486903488636017, "learning_rate": 2.377270911857994e-06, "loss": 0.014, "step": 175050 }, { "epoch": 1.4164576422040618, "grad_norm": 0.6887615919113159, "learning_rate": 2.3766697800328277e-06, "loss": 0.0185, "step": 175060 }, { "epoch": 1.4165385548992637, "grad_norm": 0.8020652532577515, "learning_rate": 2.37606870052444e-06, "loss": 0.0227, "step": 175070 }, { "epoch": 1.4166194675944657, "grad_norm": 0.49238014221191406, "learning_rate": 2.3754676733448245e-06, "loss": 0.0191, "step": 175080 }, { "epoch": 1.4167003802896674, "grad_norm": 0.2098659873008728, "learning_rate": 2.374866698505966e-06, "loss": 0.0173, "step": 175090 }, { "epoch": 1.4167812929848693, "grad_norm": 0.6009799838066101, "learning_rate": 2.3742657760198496e-06, "loss": 0.0148, "step": 175100 }, { "epoch": 1.4168622056800713, "grad_norm": 0.3268892467021942, "learning_rate": 2.3736649058984583e-06, "loss": 0.0126, "step": 175110 }, { "epoch": 1.416943118375273, "grad_norm": 0.2463477998971939, "learning_rate": 2.373064088153777e-06, "loss": 0.0258, "step": 175120 }, { "epoch": 1.417024031070475, "grad_norm": 0.6386948823928833, "learning_rate": 2.3724633227977863e-06, "loss": 0.0215, "step": 175130 }, { "epoch": 1.4171049437656769, "grad_norm": 0.39357414841651917, "learning_rate": 2.3718626098424673e-06, "loss": 0.0212, "step": 175140 }, { "epoch": 1.4171858564608786, "grad_norm": 0.40168997645378113, "learning_rate": 2.3712619492998005e-06, "loss": 0.0164, "step": 175150 }, { "epoch": 1.4172667691560805, "grad_norm": 1.0831825733184814, "learning_rate": 2.3706613411817647e-06, "loss": 0.0176, "step": 175160 }, { "epoch": 1.4173476818512825, "grad_norm": 0.695016622543335, "learning_rate": 2.3700607855003376e-06, "loss": 0.0302, "step": 175170 }, { "epoch": 1.4174285945464844, "grad_norm": 0.33178579807281494, "learning_rate": 2.369460282267496e-06, "loss": 0.0286, "step": 175180 }, { "epoch": 1.4175095072416863, "grad_norm": 0.18406935036182404, "learning_rate": 2.368859831495216e-06, "loss": 0.0213, "step": 175190 }, { "epoch": 1.417590419936888, "grad_norm": 0.5009024739265442, "learning_rate": 2.3682594331954723e-06, "loss": 0.0197, "step": 175200 }, { "epoch": 1.41767133263209, "grad_norm": 0.6913208961486816, "learning_rate": 2.367659087380236e-06, "loss": 0.0202, "step": 175210 }, { "epoch": 1.417752245327292, "grad_norm": 0.5360564589500427, "learning_rate": 2.3670587940614857e-06, "loss": 0.0233, "step": 175220 }, { "epoch": 1.4178331580224937, "grad_norm": 0.47189998626708984, "learning_rate": 2.3664585532511864e-06, "loss": 0.0111, "step": 175230 }, { "epoch": 1.4179140707176956, "grad_norm": 0.5001083612442017, "learning_rate": 2.365858364961313e-06, "loss": 0.0212, "step": 175240 }, { "epoch": 1.4179949834128975, "grad_norm": 0.45779985189437866, "learning_rate": 2.3652582292038355e-06, "loss": 0.0142, "step": 175250 }, { "epoch": 1.4180758961080993, "grad_norm": 0.489308625459671, "learning_rate": 2.3646581459907165e-06, "loss": 0.0168, "step": 175260 }, { "epoch": 1.4181568088033012, "grad_norm": 0.4140941798686981, "learning_rate": 2.3640581153339293e-06, "loss": 0.0179, "step": 175270 }, { "epoch": 1.4182377214985031, "grad_norm": 0.48712512850761414, "learning_rate": 2.3634581372454384e-06, "loss": 0.0153, "step": 175280 }, { "epoch": 1.418318634193705, "grad_norm": 0.3042896091938019, "learning_rate": 2.3628582117372088e-06, "loss": 0.0154, "step": 175290 }, { "epoch": 1.4183995468889068, "grad_norm": 0.08212567120790482, "learning_rate": 2.362258338821205e-06, "loss": 0.0157, "step": 175300 }, { "epoch": 1.4184804595841087, "grad_norm": 0.08262572437524796, "learning_rate": 2.3616585185093902e-06, "loss": 0.0129, "step": 175310 }, { "epoch": 1.4185613722793107, "grad_norm": 0.3361980617046356, "learning_rate": 2.361058750813726e-06, "loss": 0.0179, "step": 175320 }, { "epoch": 1.4186422849745126, "grad_norm": 0.5828210711479187, "learning_rate": 2.360459035746175e-06, "loss": 0.0182, "step": 175330 }, { "epoch": 1.4187231976697143, "grad_norm": 0.6057043671607971, "learning_rate": 2.3598593733186953e-06, "loss": 0.0128, "step": 175340 }, { "epoch": 1.4188041103649163, "grad_norm": 0.250002920627594, "learning_rate": 2.359259763543247e-06, "loss": 0.0194, "step": 175350 }, { "epoch": 1.4188850230601182, "grad_norm": 0.010900271125137806, "learning_rate": 2.358660206431788e-06, "loss": 0.0195, "step": 175360 }, { "epoch": 1.41896593575532, "grad_norm": 0.39909884333610535, "learning_rate": 2.358060701996273e-06, "loss": 0.0129, "step": 175370 }, { "epoch": 1.4190468484505219, "grad_norm": 0.4974905848503113, "learning_rate": 2.3574612502486636e-06, "loss": 0.0158, "step": 175380 }, { "epoch": 1.4191277611457238, "grad_norm": 0.5315442681312561, "learning_rate": 2.3568618512009095e-06, "loss": 0.0111, "step": 175390 }, { "epoch": 1.4192086738409255, "grad_norm": 0.15079070627689362, "learning_rate": 2.3562625048649635e-06, "loss": 0.0177, "step": 175400 }, { "epoch": 1.4192895865361275, "grad_norm": 0.32851657271385193, "learning_rate": 2.355663211252785e-06, "loss": 0.0226, "step": 175410 }, { "epoch": 1.4193704992313294, "grad_norm": 0.4752170741558075, "learning_rate": 2.355063970376317e-06, "loss": 0.0241, "step": 175420 }, { "epoch": 1.4194514119265313, "grad_norm": 0.13228270411491394, "learning_rate": 2.3544647822475166e-06, "loss": 0.017, "step": 175430 }, { "epoch": 1.4195323246217333, "grad_norm": 0.3675115704536438, "learning_rate": 2.3538656468783326e-06, "loss": 0.0173, "step": 175440 }, { "epoch": 1.419613237316935, "grad_norm": 0.5446800589561462, "learning_rate": 2.353266564280709e-06, "loss": 0.0314, "step": 175450 }, { "epoch": 1.419694150012137, "grad_norm": 0.6630535125732422, "learning_rate": 2.352667534466597e-06, "loss": 0.012, "step": 175460 }, { "epoch": 1.4197750627073389, "grad_norm": 0.2071770429611206, "learning_rate": 2.352068557447945e-06, "loss": 0.0088, "step": 175470 }, { "epoch": 1.4198559754025406, "grad_norm": 0.2152911275625229, "learning_rate": 2.351469633236692e-06, "loss": 0.0175, "step": 175480 }, { "epoch": 1.4199368880977425, "grad_norm": 0.3509872555732727, "learning_rate": 2.3508707618447874e-06, "loss": 0.0116, "step": 175490 }, { "epoch": 1.4200178007929445, "grad_norm": 0.2106034904718399, "learning_rate": 2.350271943284173e-06, "loss": 0.0165, "step": 175500 }, { "epoch": 1.4200987134881462, "grad_norm": 0.3302455544471741, "learning_rate": 2.349673177566791e-06, "loss": 0.0173, "step": 175510 }, { "epoch": 1.4201796261833481, "grad_norm": 0.5201960802078247, "learning_rate": 2.349074464704582e-06, "loss": 0.022, "step": 175520 }, { "epoch": 1.42026053887855, "grad_norm": 0.2509247660636902, "learning_rate": 2.348475804709487e-06, "loss": 0.0181, "step": 175530 }, { "epoch": 1.4203414515737518, "grad_norm": 0.35304513573646545, "learning_rate": 2.3478771975934438e-06, "loss": 0.0235, "step": 175540 }, { "epoch": 1.4204223642689537, "grad_norm": 0.12154367566108704, "learning_rate": 2.3472786433683912e-06, "loss": 0.0208, "step": 175550 }, { "epoch": 1.4205032769641557, "grad_norm": 0.34683868288993835, "learning_rate": 2.346680142046264e-06, "loss": 0.0261, "step": 175560 }, { "epoch": 1.4205841896593576, "grad_norm": 0.3137753903865814, "learning_rate": 2.346081693639004e-06, "loss": 0.028, "step": 175570 }, { "epoch": 1.4206651023545596, "grad_norm": 0.5642436742782593, "learning_rate": 2.34548329815854e-06, "loss": 0.0156, "step": 175580 }, { "epoch": 1.4207460150497613, "grad_norm": 0.43946948647499084, "learning_rate": 2.344884955616806e-06, "loss": 0.034, "step": 175590 }, { "epoch": 1.4208269277449632, "grad_norm": 0.39072293043136597, "learning_rate": 2.3442866660257395e-06, "loss": 0.0177, "step": 175600 }, { "epoch": 1.4209078404401652, "grad_norm": 0.8334196209907532, "learning_rate": 2.3436884293972677e-06, "loss": 0.0208, "step": 175610 }, { "epoch": 1.4209887531353669, "grad_norm": 0.6219910383224487, "learning_rate": 2.3430902457433203e-06, "loss": 0.0327, "step": 175620 }, { "epoch": 1.4210696658305688, "grad_norm": 0.5144705772399902, "learning_rate": 2.3424921150758326e-06, "loss": 0.0204, "step": 175630 }, { "epoch": 1.4211505785257708, "grad_norm": 0.2350487858057022, "learning_rate": 2.341894037406726e-06, "loss": 0.0109, "step": 175640 }, { "epoch": 1.4212314912209725, "grad_norm": 0.435068815946579, "learning_rate": 2.341296012747933e-06, "loss": 0.02, "step": 175650 }, { "epoch": 1.4213124039161744, "grad_norm": 0.21708594262599945, "learning_rate": 2.34069804111138e-06, "loss": 0.0404, "step": 175660 }, { "epoch": 1.4213933166113764, "grad_norm": 0.39950355887413025, "learning_rate": 2.340100122508987e-06, "loss": 0.0217, "step": 175670 }, { "epoch": 1.421474229306578, "grad_norm": 0.195933997631073, "learning_rate": 2.3395022569526837e-06, "loss": 0.0128, "step": 175680 }, { "epoch": 1.42155514200178, "grad_norm": 0.2523292005062103, "learning_rate": 2.3389044444543914e-06, "loss": 0.0302, "step": 175690 }, { "epoch": 1.421636054696982, "grad_norm": 0.28622210025787354, "learning_rate": 2.3383066850260316e-06, "loss": 0.0215, "step": 175700 }, { "epoch": 1.421716967392184, "grad_norm": 0.5434716939926147, "learning_rate": 2.3377089786795265e-06, "loss": 0.0172, "step": 175710 }, { "epoch": 1.4217978800873858, "grad_norm": 0.38917434215545654, "learning_rate": 2.3371113254267953e-06, "loss": 0.0181, "step": 175720 }, { "epoch": 1.4218787927825876, "grad_norm": 0.370687335729599, "learning_rate": 2.3365137252797565e-06, "loss": 0.0212, "step": 175730 }, { "epoch": 1.4219597054777895, "grad_norm": 0.7428614497184753, "learning_rate": 2.335916178250329e-06, "loss": 0.027, "step": 175740 }, { "epoch": 1.4220406181729914, "grad_norm": 0.5021889209747314, "learning_rate": 2.3353186843504293e-06, "loss": 0.0138, "step": 175750 }, { "epoch": 1.4221215308681932, "grad_norm": 0.41811704635620117, "learning_rate": 2.334721243591973e-06, "loss": 0.0191, "step": 175760 }, { "epoch": 1.422202443563395, "grad_norm": 0.44169920682907104, "learning_rate": 2.334123855986875e-06, "loss": 0.0209, "step": 175770 }, { "epoch": 1.422283356258597, "grad_norm": 0.23238573968410492, "learning_rate": 2.333526521547047e-06, "loss": 0.0154, "step": 175780 }, { "epoch": 1.4223642689537987, "grad_norm": 0.43627601861953735, "learning_rate": 2.332929240284407e-06, "loss": 0.0118, "step": 175790 }, { "epoch": 1.4224451816490007, "grad_norm": 0.3969927132129669, "learning_rate": 2.3323320122108606e-06, "loss": 0.0275, "step": 175800 }, { "epoch": 1.4225260943442026, "grad_norm": 0.3037623465061188, "learning_rate": 2.331734837338319e-06, "loss": 0.015, "step": 175810 }, { "epoch": 1.4226070070394043, "grad_norm": 0.32643353939056396, "learning_rate": 2.3311377156786967e-06, "loss": 0.0144, "step": 175820 }, { "epoch": 1.4226879197346063, "grad_norm": 0.27772217988967896, "learning_rate": 2.330540647243895e-06, "loss": 0.0114, "step": 175830 }, { "epoch": 1.4227688324298082, "grad_norm": 0.43569889664649963, "learning_rate": 2.3299436320458264e-06, "loss": 0.0205, "step": 175840 }, { "epoch": 1.4228497451250102, "grad_norm": 0.3576725423336029, "learning_rate": 2.329346670096397e-06, "loss": 0.021, "step": 175850 }, { "epoch": 1.422930657820212, "grad_norm": 0.35172146558761597, "learning_rate": 2.328749761407506e-06, "loss": 0.0256, "step": 175860 }, { "epoch": 1.4230115705154138, "grad_norm": 0.41330742835998535, "learning_rate": 2.3281529059910647e-06, "loss": 0.0148, "step": 175870 }, { "epoch": 1.4230924832106158, "grad_norm": 0.36181867122650146, "learning_rate": 2.3275561038589745e-06, "loss": 0.0186, "step": 175880 }, { "epoch": 1.4231733959058177, "grad_norm": 0.277349054813385, "learning_rate": 2.3269593550231324e-06, "loss": 0.0213, "step": 175890 }, { "epoch": 1.4232543086010194, "grad_norm": 0.255754292011261, "learning_rate": 2.326362659495444e-06, "loss": 0.0113, "step": 175900 }, { "epoch": 1.4233352212962214, "grad_norm": 0.6617109179496765, "learning_rate": 2.325766017287809e-06, "loss": 0.0233, "step": 175910 }, { "epoch": 1.4234161339914233, "grad_norm": 0.3989369869232178, "learning_rate": 2.325169428412125e-06, "loss": 0.0174, "step": 175920 }, { "epoch": 1.423497046686625, "grad_norm": 0.26214277744293213, "learning_rate": 2.32457289288029e-06, "loss": 0.0253, "step": 175930 }, { "epoch": 1.423577959381827, "grad_norm": 0.5657521486282349, "learning_rate": 2.3239764107042006e-06, "loss": 0.0256, "step": 175940 }, { "epoch": 1.423658872077029, "grad_norm": 0.454796701669693, "learning_rate": 2.323379981895752e-06, "loss": 0.0151, "step": 175950 }, { "epoch": 1.4237397847722308, "grad_norm": 0.4520972967147827, "learning_rate": 2.3227836064668397e-06, "loss": 0.0198, "step": 175960 }, { "epoch": 1.4238206974674326, "grad_norm": 0.2095443159341812, "learning_rate": 2.3221872844293542e-06, "loss": 0.0136, "step": 175970 }, { "epoch": 1.4239016101626345, "grad_norm": 0.4440070390701294, "learning_rate": 2.3215910157951937e-06, "loss": 0.024, "step": 175980 }, { "epoch": 1.4239825228578364, "grad_norm": 0.46871668100357056, "learning_rate": 2.3209948005762446e-06, "loss": 0.0189, "step": 175990 }, { "epoch": 1.4240634355530384, "grad_norm": 0.5140416026115417, "learning_rate": 2.3203986387843963e-06, "loss": 0.0118, "step": 176000 }, { "epoch": 1.42414434824824, "grad_norm": 0.4271506369113922, "learning_rate": 2.3198025304315445e-06, "loss": 0.0205, "step": 176010 }, { "epoch": 1.424225260943442, "grad_norm": 0.4915369153022766, "learning_rate": 2.3192064755295714e-06, "loss": 0.025, "step": 176020 }, { "epoch": 1.424306173638644, "grad_norm": 0.4958707392215729, "learning_rate": 2.318610474090363e-06, "loss": 0.0211, "step": 176030 }, { "epoch": 1.4243870863338457, "grad_norm": 0.4094771146774292, "learning_rate": 2.3180145261258124e-06, "loss": 0.02, "step": 176040 }, { "epoch": 1.4244679990290476, "grad_norm": 0.33759844303131104, "learning_rate": 2.317418631647796e-06, "loss": 0.0142, "step": 176050 }, { "epoch": 1.4245489117242496, "grad_norm": 0.3113006055355072, "learning_rate": 2.316822790668204e-06, "loss": 0.0198, "step": 176060 }, { "epoch": 1.4246298244194513, "grad_norm": 0.49964413046836853, "learning_rate": 2.3162270031989187e-06, "loss": 0.0206, "step": 176070 }, { "epoch": 1.4247107371146532, "grad_norm": 0.6385756731033325, "learning_rate": 2.3156312692518163e-06, "loss": 0.0264, "step": 176080 }, { "epoch": 1.4247916498098552, "grad_norm": 0.41848963499069214, "learning_rate": 2.315035588838783e-06, "loss": 0.0212, "step": 176090 }, { "epoch": 1.4248725625050571, "grad_norm": 0.48072656989097595, "learning_rate": 2.3144399619716974e-06, "loss": 0.0212, "step": 176100 }, { "epoch": 1.424953475200259, "grad_norm": 0.37151777744293213, "learning_rate": 2.313844388662437e-06, "loss": 0.0102, "step": 176110 }, { "epoch": 1.4250343878954608, "grad_norm": 0.3340468406677246, "learning_rate": 2.313248868922879e-06, "loss": 0.0197, "step": 176120 }, { "epoch": 1.4251153005906627, "grad_norm": 0.28161028027534485, "learning_rate": 2.3126534027649013e-06, "loss": 0.0212, "step": 176130 }, { "epoch": 1.4251962132858647, "grad_norm": 0.5828272104263306, "learning_rate": 2.3120579902003776e-06, "loss": 0.0272, "step": 176140 }, { "epoch": 1.4252771259810664, "grad_norm": 0.35055264830589294, "learning_rate": 2.3114626312411823e-06, "loss": 0.0181, "step": 176150 }, { "epoch": 1.4253580386762683, "grad_norm": 0.35289525985717773, "learning_rate": 2.31086732589919e-06, "loss": 0.0268, "step": 176160 }, { "epoch": 1.4254389513714703, "grad_norm": 0.3520263135433197, "learning_rate": 2.3102720741862717e-06, "loss": 0.0172, "step": 176170 }, { "epoch": 1.425519864066672, "grad_norm": 0.2047116607427597, "learning_rate": 2.3096768761142983e-06, "loss": 0.018, "step": 176180 }, { "epoch": 1.425600776761874, "grad_norm": 0.3031744658946991, "learning_rate": 2.309081731695138e-06, "loss": 0.0194, "step": 176190 }, { "epoch": 1.4256816894570759, "grad_norm": 0.21047712862491608, "learning_rate": 2.308486640940666e-06, "loss": 0.0107, "step": 176200 }, { "epoch": 1.4257626021522776, "grad_norm": 0.3942456841468811, "learning_rate": 2.3078916038627442e-06, "loss": 0.0178, "step": 176210 }, { "epoch": 1.4258435148474795, "grad_norm": 0.2616548538208008, "learning_rate": 2.307296620473239e-06, "loss": 0.0135, "step": 176220 }, { "epoch": 1.4259244275426814, "grad_norm": 0.7661797404289246, "learning_rate": 2.3067016907840216e-06, "loss": 0.0322, "step": 176230 }, { "epoch": 1.4260053402378834, "grad_norm": 0.39884087443351746, "learning_rate": 2.3061068148069494e-06, "loss": 0.0212, "step": 176240 }, { "epoch": 1.4260862529330853, "grad_norm": 0.7129748463630676, "learning_rate": 2.3055119925538923e-06, "loss": 0.0174, "step": 176250 }, { "epoch": 1.426167165628287, "grad_norm": 0.35018351674079895, "learning_rate": 2.3049172240367112e-06, "loss": 0.0171, "step": 176260 }, { "epoch": 1.426248078323489, "grad_norm": 0.31348493695259094, "learning_rate": 2.3043225092672634e-06, "loss": 0.0207, "step": 176270 }, { "epoch": 1.426328991018691, "grad_norm": 0.19208167493343353, "learning_rate": 2.3037278482574136e-06, "loss": 0.023, "step": 176280 }, { "epoch": 1.4264099037138926, "grad_norm": 0.1433403044939041, "learning_rate": 2.30313324101902e-06, "loss": 0.0149, "step": 176290 }, { "epoch": 1.4264908164090946, "grad_norm": 0.41330477595329285, "learning_rate": 2.3025386875639404e-06, "loss": 0.0236, "step": 176300 }, { "epoch": 1.4265717291042965, "grad_norm": 0.24584990739822388, "learning_rate": 2.3019441879040326e-06, "loss": 0.0231, "step": 176310 }, { "epoch": 1.4266526417994982, "grad_norm": 0.3560406267642975, "learning_rate": 2.301349742051152e-06, "loss": 0.0291, "step": 176320 }, { "epoch": 1.4267335544947002, "grad_norm": 0.21138811111450195, "learning_rate": 2.3007553500171538e-06, "loss": 0.015, "step": 176330 }, { "epoch": 1.4268144671899021, "grad_norm": 0.1541517972946167, "learning_rate": 2.3001610118138912e-06, "loss": 0.0141, "step": 176340 }, { "epoch": 1.4268953798851038, "grad_norm": 0.4073323905467987, "learning_rate": 2.2995667274532178e-06, "loss": 0.02, "step": 176350 }, { "epoch": 1.4269762925803058, "grad_norm": 0.32300427556037903, "learning_rate": 2.2989724969469855e-06, "loss": 0.0201, "step": 176360 }, { "epoch": 1.4270572052755077, "grad_norm": 0.22750349342823029, "learning_rate": 2.298378320307045e-06, "loss": 0.0266, "step": 176370 }, { "epoch": 1.4271381179707097, "grad_norm": 0.17850665748119354, "learning_rate": 2.2977841975452423e-06, "loss": 0.03, "step": 176380 }, { "epoch": 1.4272190306659116, "grad_norm": 0.5406649708747864, "learning_rate": 2.297190128673434e-06, "loss": 0.0184, "step": 176390 }, { "epoch": 1.4272999433611133, "grad_norm": 0.002216774970293045, "learning_rate": 2.2965961137034604e-06, "loss": 0.0121, "step": 176400 }, { "epoch": 1.4273808560563153, "grad_norm": 0.45234498381614685, "learning_rate": 2.2960021526471686e-06, "loss": 0.0183, "step": 176410 }, { "epoch": 1.4274617687515172, "grad_norm": 0.6721468567848206, "learning_rate": 2.295408245516409e-06, "loss": 0.0153, "step": 176420 }, { "epoch": 1.427542681446719, "grad_norm": 0.3467828631401062, "learning_rate": 2.2948143923230204e-06, "loss": 0.0288, "step": 176430 }, { "epoch": 1.4276235941419209, "grad_norm": 0.4701399505138397, "learning_rate": 2.2942205930788457e-06, "loss": 0.0291, "step": 176440 }, { "epoch": 1.4277045068371228, "grad_norm": 0.23712508380413055, "learning_rate": 2.293626847795732e-06, "loss": 0.0197, "step": 176450 }, { "epoch": 1.4277854195323245, "grad_norm": 0.2273740917444229, "learning_rate": 2.2930331564855162e-06, "loss": 0.0142, "step": 176460 }, { "epoch": 1.4278663322275265, "grad_norm": 0.3035832345485687, "learning_rate": 2.2924395191600403e-06, "loss": 0.0153, "step": 176470 }, { "epoch": 1.4279472449227284, "grad_norm": 0.6753275990486145, "learning_rate": 2.291845935831142e-06, "loss": 0.0272, "step": 176480 }, { "epoch": 1.4280281576179301, "grad_norm": 0.4430670142173767, "learning_rate": 2.291252406510659e-06, "loss": 0.0219, "step": 176490 }, { "epoch": 1.428109070313132, "grad_norm": 0.6227287650108337, "learning_rate": 2.290658931210429e-06, "loss": 0.0138, "step": 176500 }, { "epoch": 1.428189983008334, "grad_norm": 0.3050200939178467, "learning_rate": 2.2900655099422865e-06, "loss": 0.029, "step": 176510 }, { "epoch": 1.428270895703536, "grad_norm": 0.43716728687286377, "learning_rate": 2.289472142718066e-06, "loss": 0.023, "step": 176520 }, { "epoch": 1.4283518083987379, "grad_norm": 0.3782910108566284, "learning_rate": 2.2888788295496024e-06, "loss": 0.0244, "step": 176530 }, { "epoch": 1.4284327210939396, "grad_norm": 0.16139747202396393, "learning_rate": 2.288285570448727e-06, "loss": 0.0182, "step": 176540 }, { "epoch": 1.4285136337891415, "grad_norm": 0.05481085553765297, "learning_rate": 2.2876923654272704e-06, "loss": 0.0153, "step": 176550 }, { "epoch": 1.4285945464843435, "grad_norm": 0.3782854378223419, "learning_rate": 2.2870992144970643e-06, "loss": 0.0256, "step": 176560 }, { "epoch": 1.4286754591795452, "grad_norm": 0.2715721130371094, "learning_rate": 2.286506117669937e-06, "loss": 0.03, "step": 176570 }, { "epoch": 1.4287563718747471, "grad_norm": 0.6698210835456848, "learning_rate": 2.285913074957717e-06, "loss": 0.0219, "step": 176580 }, { "epoch": 1.428837284569949, "grad_norm": 0.0077245598658919334, "learning_rate": 2.285320086372231e-06, "loss": 0.0247, "step": 176590 }, { "epoch": 1.4289181972651508, "grad_norm": 0.4192187190055847, "learning_rate": 2.2847271519253024e-06, "loss": 0.0171, "step": 176600 }, { "epoch": 1.4289991099603527, "grad_norm": 0.7370817065238953, "learning_rate": 2.284134271628763e-06, "loss": 0.0181, "step": 176610 }, { "epoch": 1.4290800226555547, "grad_norm": 0.05002419278025627, "learning_rate": 2.2835414454944298e-06, "loss": 0.016, "step": 176620 }, { "epoch": 1.4291609353507566, "grad_norm": 0.45905575156211853, "learning_rate": 2.282948673534126e-06, "loss": 0.0221, "step": 176630 }, { "epoch": 1.4292418480459583, "grad_norm": 0.5103424787521362, "learning_rate": 2.282355955759677e-06, "loss": 0.0201, "step": 176640 }, { "epoch": 1.4293227607411603, "grad_norm": 0.37273523211479187, "learning_rate": 2.281763292182903e-06, "loss": 0.0215, "step": 176650 }, { "epoch": 1.4294036734363622, "grad_norm": 0.3580529987812042, "learning_rate": 2.2811706828156182e-06, "loss": 0.0222, "step": 176660 }, { "epoch": 1.4294845861315641, "grad_norm": 0.4392637312412262, "learning_rate": 2.280578127669646e-06, "loss": 0.0239, "step": 176670 }, { "epoch": 1.4295654988267659, "grad_norm": 0.33478695154190063, "learning_rate": 2.2799856267568023e-06, "loss": 0.0223, "step": 176680 }, { "epoch": 1.4296464115219678, "grad_norm": 0.6490886807441711, "learning_rate": 2.279393180088903e-06, "loss": 0.0227, "step": 176690 }, { "epoch": 1.4297273242171697, "grad_norm": 0.12882369756698608, "learning_rate": 2.278800787677763e-06, "loss": 0.0171, "step": 176700 }, { "epoch": 1.4298082369123715, "grad_norm": 0.23984485864639282, "learning_rate": 2.278208449535197e-06, "loss": 0.021, "step": 176710 }, { "epoch": 1.4298891496075734, "grad_norm": 0.3565845787525177, "learning_rate": 2.2776161656730173e-06, "loss": 0.0282, "step": 176720 }, { "epoch": 1.4299700623027753, "grad_norm": 0.4925349950790405, "learning_rate": 2.2770239361030356e-06, "loss": 0.0388, "step": 176730 }, { "epoch": 1.430050974997977, "grad_norm": 0.4400525987148285, "learning_rate": 2.2764317608370633e-06, "loss": 0.0297, "step": 176740 }, { "epoch": 1.430131887693179, "grad_norm": 0.3288065493106842, "learning_rate": 2.27583963988691e-06, "loss": 0.0212, "step": 176750 }, { "epoch": 1.430212800388381, "grad_norm": 0.4374554753303528, "learning_rate": 2.2752475732643843e-06, "loss": 0.0188, "step": 176760 }, { "epoch": 1.4302937130835829, "grad_norm": 0.5309978723526001, "learning_rate": 2.2746555609812933e-06, "loss": 0.0224, "step": 176770 }, { "epoch": 1.4303746257787848, "grad_norm": 0.5779411792755127, "learning_rate": 2.274063603049444e-06, "loss": 0.0223, "step": 176780 }, { "epoch": 1.4304555384739865, "grad_norm": 0.43423599004745483, "learning_rate": 2.273471699480639e-06, "loss": 0.0139, "step": 176790 }, { "epoch": 1.4305364511691885, "grad_norm": 0.32593265175819397, "learning_rate": 2.2728798502866887e-06, "loss": 0.031, "step": 176800 }, { "epoch": 1.4306173638643904, "grad_norm": 0.3615182638168335, "learning_rate": 2.2722880554793912e-06, "loss": 0.0253, "step": 176810 }, { "epoch": 1.4306982765595921, "grad_norm": 0.5086110830307007, "learning_rate": 2.2716963150705478e-06, "loss": 0.0185, "step": 176820 }, { "epoch": 1.430779189254794, "grad_norm": 0.46918830275535583, "learning_rate": 2.2711046290719642e-06, "loss": 0.0271, "step": 176830 }, { "epoch": 1.430860101949996, "grad_norm": 0.18289721012115479, "learning_rate": 2.2705129974954386e-06, "loss": 0.0133, "step": 176840 }, { "epoch": 1.4309410146451977, "grad_norm": 0.4001957178115845, "learning_rate": 2.2699214203527666e-06, "loss": 0.0204, "step": 176850 }, { "epoch": 1.4310219273403997, "grad_norm": 0.24156539142131805, "learning_rate": 2.269329897655749e-06, "loss": 0.0161, "step": 176860 }, { "epoch": 1.4311028400356016, "grad_norm": 0.4237651526927948, "learning_rate": 2.2687384294161824e-06, "loss": 0.0261, "step": 176870 }, { "epoch": 1.4311837527308033, "grad_norm": 0.33766546845436096, "learning_rate": 2.2681470156458625e-06, "loss": 0.0264, "step": 176880 }, { "epoch": 1.4312646654260053, "grad_norm": 0.09927777945995331, "learning_rate": 2.267555656356582e-06, "loss": 0.014, "step": 176890 }, { "epoch": 1.4313455781212072, "grad_norm": 0.5879150629043579, "learning_rate": 2.2669643515601366e-06, "loss": 0.0202, "step": 176900 }, { "epoch": 1.4314264908164092, "grad_norm": 0.32245323061943054, "learning_rate": 2.266373101268317e-06, "loss": 0.0158, "step": 176910 }, { "epoch": 1.431507403511611, "grad_norm": 0.7071202993392944, "learning_rate": 2.2657819054929146e-06, "loss": 0.0245, "step": 176920 }, { "epoch": 1.4315883162068128, "grad_norm": 0.47353243827819824, "learning_rate": 2.2651907642457203e-06, "loss": 0.0158, "step": 176930 }, { "epoch": 1.4316692289020148, "grad_norm": 0.36756691336631775, "learning_rate": 2.264599677538523e-06, "loss": 0.0223, "step": 176940 }, { "epoch": 1.4317501415972167, "grad_norm": 0.0039408546872437, "learning_rate": 2.2640086453831102e-06, "loss": 0.0115, "step": 176950 }, { "epoch": 1.4318310542924184, "grad_norm": 0.23781782388687134, "learning_rate": 2.263417667791269e-06, "loss": 0.0164, "step": 176960 }, { "epoch": 1.4319119669876204, "grad_norm": 0.42853277921676636, "learning_rate": 2.2628267447747855e-06, "loss": 0.016, "step": 176970 }, { "epoch": 1.4319928796828223, "grad_norm": 0.41496795415878296, "learning_rate": 2.262235876345444e-06, "loss": 0.0155, "step": 176980 }, { "epoch": 1.432073792378024, "grad_norm": 0.27867618203163147, "learning_rate": 2.2616450625150277e-06, "loss": 0.0269, "step": 176990 }, { "epoch": 1.432154705073226, "grad_norm": 0.10281339287757874, "learning_rate": 2.2610543032953203e-06, "loss": 0.0115, "step": 177000 }, { "epoch": 1.432235617768428, "grad_norm": 0.3795073628425598, "learning_rate": 2.2604635986981005e-06, "loss": 0.0359, "step": 177010 }, { "epoch": 1.4323165304636296, "grad_norm": 0.3371090292930603, "learning_rate": 2.259872948735153e-06, "loss": 0.0365, "step": 177020 }, { "epoch": 1.4323974431588316, "grad_norm": 0.7353960275650024, "learning_rate": 2.2592823534182568e-06, "loss": 0.0263, "step": 177030 }, { "epoch": 1.4324783558540335, "grad_norm": 0.31744885444641113, "learning_rate": 2.258691812759184e-06, "loss": 0.0227, "step": 177040 }, { "epoch": 1.4325592685492354, "grad_norm": 0.41638216376304626, "learning_rate": 2.258101326769718e-06, "loss": 0.0187, "step": 177050 }, { "epoch": 1.4326401812444374, "grad_norm": 0.44575995206832886, "learning_rate": 2.2575108954616345e-06, "loss": 0.0099, "step": 177060 }, { "epoch": 1.432721093939639, "grad_norm": 0.8571671843528748, "learning_rate": 2.2569205188467026e-06, "loss": 0.0192, "step": 177070 }, { "epoch": 1.432802006634841, "grad_norm": 0.31411394476890564, "learning_rate": 2.2563301969367017e-06, "loss": 0.0273, "step": 177080 }, { "epoch": 1.432882919330043, "grad_norm": 0.261220246553421, "learning_rate": 2.2557399297434033e-06, "loss": 0.017, "step": 177090 }, { "epoch": 1.4329638320252447, "grad_norm": 0.41888147592544556, "learning_rate": 2.2551497172785786e-06, "loss": 0.0212, "step": 177100 }, { "epoch": 1.4330447447204466, "grad_norm": 0.4061315953731537, "learning_rate": 2.254559559553998e-06, "loss": 0.0153, "step": 177110 }, { "epoch": 1.4331256574156486, "grad_norm": 0.26559826731681824, "learning_rate": 2.2539694565814315e-06, "loss": 0.0223, "step": 177120 }, { "epoch": 1.4332065701108503, "grad_norm": 0.12077181041240692, "learning_rate": 2.253379408372647e-06, "loss": 0.0121, "step": 177130 }, { "epoch": 1.4332874828060522, "grad_norm": 0.6119199395179749, "learning_rate": 2.252789414939411e-06, "loss": 0.0265, "step": 177140 }, { "epoch": 1.4333683955012542, "grad_norm": 0.2070007473230362, "learning_rate": 2.2521994762934914e-06, "loss": 0.0165, "step": 177150 }, { "epoch": 1.433449308196456, "grad_norm": 0.3265356421470642, "learning_rate": 2.2516095924466517e-06, "loss": 0.0121, "step": 177160 }, { "epoch": 1.4335302208916578, "grad_norm": 0.08419644087553024, "learning_rate": 2.2510197634106567e-06, "loss": 0.0159, "step": 177170 }, { "epoch": 1.4336111335868598, "grad_norm": 0.32390493154525757, "learning_rate": 2.250429989197268e-06, "loss": 0.0272, "step": 177180 }, { "epoch": 1.4336920462820617, "grad_norm": 0.3632230758666992, "learning_rate": 2.2498402698182515e-06, "loss": 0.0308, "step": 177190 }, { "epoch": 1.4337729589772636, "grad_norm": 0.2962222695350647, "learning_rate": 2.2492506052853634e-06, "loss": 0.0298, "step": 177200 }, { "epoch": 1.4338538716724654, "grad_norm": 0.39212435483932495, "learning_rate": 2.2486609956103633e-06, "loss": 0.0197, "step": 177210 }, { "epoch": 1.4339347843676673, "grad_norm": 0.11036045849323273, "learning_rate": 2.248071440805015e-06, "loss": 0.0151, "step": 177220 }, { "epoch": 1.4340156970628692, "grad_norm": 0.33213067054748535, "learning_rate": 2.247481940881068e-06, "loss": 0.0142, "step": 177230 }, { "epoch": 1.434096609758071, "grad_norm": 0.323809415102005, "learning_rate": 2.2468924958502847e-06, "loss": 0.0233, "step": 177240 }, { "epoch": 1.434177522453273, "grad_norm": 0.1827307939529419, "learning_rate": 2.2463031057244213e-06, "loss": 0.0316, "step": 177250 }, { "epoch": 1.4342584351484748, "grad_norm": 0.48666179180145264, "learning_rate": 2.2457137705152244e-06, "loss": 0.0121, "step": 177260 }, { "epoch": 1.4343393478436766, "grad_norm": 0.5751258134841919, "learning_rate": 2.2451244902344545e-06, "loss": 0.0235, "step": 177270 }, { "epoch": 1.4344202605388785, "grad_norm": 0.1981963813304901, "learning_rate": 2.24453526489386e-06, "loss": 0.0187, "step": 177280 }, { "epoch": 1.4345011732340804, "grad_norm": 0.5668761730194092, "learning_rate": 2.243946094505193e-06, "loss": 0.027, "step": 177290 }, { "epoch": 1.4345820859292824, "grad_norm": 0.30600252747535706, "learning_rate": 2.243356979080203e-06, "loss": 0.0242, "step": 177300 }, { "epoch": 1.4346629986244843, "grad_norm": 0.0878656655550003, "learning_rate": 2.2427679186306383e-06, "loss": 0.0242, "step": 177310 }, { "epoch": 1.434743911319686, "grad_norm": 0.3341147303581238, "learning_rate": 2.2421789131682465e-06, "loss": 0.0186, "step": 177320 }, { "epoch": 1.434824824014888, "grad_norm": 0.5532216429710388, "learning_rate": 2.2415899627047743e-06, "loss": 0.0231, "step": 177330 }, { "epoch": 1.43490573671009, "grad_norm": 0.2930903136730194, "learning_rate": 2.2410010672519677e-06, "loss": 0.0153, "step": 177340 }, { "epoch": 1.4349866494052916, "grad_norm": 0.5790863633155823, "learning_rate": 2.24041222682157e-06, "loss": 0.023, "step": 177350 }, { "epoch": 1.4350675621004936, "grad_norm": 0.6063104271888733, "learning_rate": 2.2398234414253244e-06, "loss": 0.0178, "step": 177360 }, { "epoch": 1.4351484747956955, "grad_norm": 0.2832584083080292, "learning_rate": 2.2392347110749712e-06, "loss": 0.024, "step": 177370 }, { "epoch": 1.4352293874908972, "grad_norm": 0.6090068221092224, "learning_rate": 2.2386460357822577e-06, "loss": 0.0174, "step": 177380 }, { "epoch": 1.4353103001860992, "grad_norm": 0.021087298169732094, "learning_rate": 2.2380574155589172e-06, "loss": 0.0178, "step": 177390 }, { "epoch": 1.4353912128813011, "grad_norm": 0.6583229899406433, "learning_rate": 2.23746885041669e-06, "loss": 0.0271, "step": 177400 }, { "epoch": 1.4354721255765028, "grad_norm": 0.20931805670261383, "learning_rate": 2.236880340367317e-06, "loss": 0.031, "step": 177410 }, { "epoch": 1.4355530382717048, "grad_norm": 0.2046414017677307, "learning_rate": 2.2362918854225294e-06, "loss": 0.0181, "step": 177420 }, { "epoch": 1.4356339509669067, "grad_norm": 0.3517436981201172, "learning_rate": 2.2357034855940673e-06, "loss": 0.0232, "step": 177430 }, { "epoch": 1.4357148636621087, "grad_norm": 0.5625166893005371, "learning_rate": 2.2351151408936654e-06, "loss": 0.0265, "step": 177440 }, { "epoch": 1.4357957763573106, "grad_norm": 0.2950027585029602, "learning_rate": 2.234526851333052e-06, "loss": 0.0297, "step": 177450 }, { "epoch": 1.4358766890525123, "grad_norm": 0.45277485251426697, "learning_rate": 2.2339386169239637e-06, "loss": 0.0176, "step": 177460 }, { "epoch": 1.4359576017477143, "grad_norm": 0.4715467691421509, "learning_rate": 2.233350437678132e-06, "loss": 0.0296, "step": 177470 }, { "epoch": 1.4360385144429162, "grad_norm": 0.43179911375045776, "learning_rate": 2.232762313607282e-06, "loss": 0.0171, "step": 177480 }, { "epoch": 1.436119427138118, "grad_norm": 0.6937277913093567, "learning_rate": 2.232174244723147e-06, "loss": 0.0161, "step": 177490 }, { "epoch": 1.4362003398333199, "grad_norm": 0.35220539569854736, "learning_rate": 2.2315862310374538e-06, "loss": 0.018, "step": 177500 }, { "epoch": 1.4362812525285218, "grad_norm": 0.47013911604881287, "learning_rate": 2.2309982725619284e-06, "loss": 0.0193, "step": 177510 }, { "epoch": 1.4363621652237235, "grad_norm": 0.6448386907577515, "learning_rate": 2.2304103693082972e-06, "loss": 0.0148, "step": 177520 }, { "epoch": 1.4364430779189254, "grad_norm": 0.36419177055358887, "learning_rate": 2.2298225212882848e-06, "loss": 0.0164, "step": 177530 }, { "epoch": 1.4365239906141274, "grad_norm": 0.46565815806388855, "learning_rate": 2.2292347285136136e-06, "loss": 0.0258, "step": 177540 }, { "epoch": 1.436604903309329, "grad_norm": 0.4001944959163666, "learning_rate": 2.2286469909960063e-06, "loss": 0.0149, "step": 177550 }, { "epoch": 1.436685816004531, "grad_norm": 0.49577969312667847, "learning_rate": 2.2280593087471823e-06, "loss": 0.0246, "step": 177560 }, { "epoch": 1.436766728699733, "grad_norm": 0.6160478591918945, "learning_rate": 2.227471681778868e-06, "loss": 0.0266, "step": 177570 }, { "epoch": 1.436847641394935, "grad_norm": 0.15839529037475586, "learning_rate": 2.226884110102775e-06, "loss": 0.0118, "step": 177580 }, { "epoch": 1.4369285540901369, "grad_norm": 0.2466852068901062, "learning_rate": 2.226296593730623e-06, "loss": 0.0221, "step": 177590 }, { "epoch": 1.4370094667853386, "grad_norm": 0.35045987367630005, "learning_rate": 2.225709132674133e-06, "loss": 0.0194, "step": 177600 }, { "epoch": 1.4370903794805405, "grad_norm": 0.16900357604026794, "learning_rate": 2.2251217269450164e-06, "loss": 0.0104, "step": 177610 }, { "epoch": 1.4371712921757425, "grad_norm": 0.5071204900741577, "learning_rate": 2.2245343765549864e-06, "loss": 0.0171, "step": 177620 }, { "epoch": 1.4372522048709442, "grad_norm": 0.004903323482722044, "learning_rate": 2.223947081515763e-06, "loss": 0.0171, "step": 177630 }, { "epoch": 1.4373331175661461, "grad_norm": 0.4838678538799286, "learning_rate": 2.223359841839051e-06, "loss": 0.0287, "step": 177640 }, { "epoch": 1.437414030261348, "grad_norm": 0.41573387384414673, "learning_rate": 2.222772657536567e-06, "loss": 0.0202, "step": 177650 }, { "epoch": 1.4374949429565498, "grad_norm": 0.5644814968109131, "learning_rate": 2.222185528620022e-06, "loss": 0.0129, "step": 177660 }, { "epoch": 1.4375758556517517, "grad_norm": 0.27504247426986694, "learning_rate": 2.221598455101117e-06, "loss": 0.0217, "step": 177670 }, { "epoch": 1.4376567683469537, "grad_norm": 0.3288845121860504, "learning_rate": 2.2210114369915685e-06, "loss": 0.0154, "step": 177680 }, { "epoch": 1.4377376810421554, "grad_norm": 0.04869057238101959, "learning_rate": 2.2204244743030815e-06, "loss": 0.0241, "step": 177690 }, { "epoch": 1.4378185937373573, "grad_norm": 0.7047961950302124, "learning_rate": 2.219837567047357e-06, "loss": 0.0232, "step": 177700 }, { "epoch": 1.4378995064325593, "grad_norm": 0.2361411601305008, "learning_rate": 2.2192507152361046e-06, "loss": 0.0307, "step": 177710 }, { "epoch": 1.4379804191277612, "grad_norm": 0.39972618222236633, "learning_rate": 2.2186639188810267e-06, "loss": 0.0168, "step": 177720 }, { "epoch": 1.4380613318229631, "grad_norm": 0.6306209564208984, "learning_rate": 2.2180771779938247e-06, "loss": 0.0174, "step": 177730 }, { "epoch": 1.4381422445181649, "grad_norm": 0.5458841919898987, "learning_rate": 2.2174904925862008e-06, "loss": 0.025, "step": 177740 }, { "epoch": 1.4382231572133668, "grad_norm": 0.019340036436915398, "learning_rate": 2.2169038626698546e-06, "loss": 0.0207, "step": 177750 }, { "epoch": 1.4383040699085687, "grad_norm": 0.2428595870733261, "learning_rate": 2.216317288256486e-06, "loss": 0.0115, "step": 177760 }, { "epoch": 1.4383849826037705, "grad_norm": 0.2592311501502991, "learning_rate": 2.215730769357792e-06, "loss": 0.0219, "step": 177770 }, { "epoch": 1.4384658952989724, "grad_norm": 0.3263644278049469, "learning_rate": 2.215144305985468e-06, "loss": 0.0211, "step": 177780 }, { "epoch": 1.4385468079941743, "grad_norm": 0.16054049134254456, "learning_rate": 2.214557898151216e-06, "loss": 0.0207, "step": 177790 }, { "epoch": 1.438627720689376, "grad_norm": 0.17169831693172455, "learning_rate": 2.2139715458667243e-06, "loss": 0.015, "step": 177800 }, { "epoch": 1.438708633384578, "grad_norm": 0.22734816372394562, "learning_rate": 2.213385249143687e-06, "loss": 0.0216, "step": 177810 }, { "epoch": 1.43878954607978, "grad_norm": 0.31092602014541626, "learning_rate": 2.212799007993801e-06, "loss": 0.0114, "step": 177820 }, { "epoch": 1.4388704587749819, "grad_norm": 0.3306429982185364, "learning_rate": 2.2122128224287515e-06, "loss": 0.0148, "step": 177830 }, { "epoch": 1.4389513714701836, "grad_norm": 0.3278692662715912, "learning_rate": 2.211626692460234e-06, "loss": 0.0278, "step": 177840 }, { "epoch": 1.4390322841653855, "grad_norm": 0.23199462890625, "learning_rate": 2.211040618099937e-06, "loss": 0.0217, "step": 177850 }, { "epoch": 1.4391131968605875, "grad_norm": 0.29973164200782776, "learning_rate": 2.210454599359544e-06, "loss": 0.0226, "step": 177860 }, { "epoch": 1.4391941095557894, "grad_norm": 0.17610591650009155, "learning_rate": 2.2098686362507467e-06, "loss": 0.0138, "step": 177870 }, { "epoch": 1.4392750222509911, "grad_norm": 0.28758159279823303, "learning_rate": 2.2092827287852307e-06, "loss": 0.0188, "step": 177880 }, { "epoch": 1.439355934946193, "grad_norm": 0.34766751527786255, "learning_rate": 2.208696876974676e-06, "loss": 0.0263, "step": 177890 }, { "epoch": 1.439436847641395, "grad_norm": 0.3720863461494446, "learning_rate": 2.208111080830771e-06, "loss": 0.0343, "step": 177900 }, { "epoch": 1.4395177603365967, "grad_norm": 0.35271137952804565, "learning_rate": 2.207525340365196e-06, "loss": 0.0216, "step": 177910 }, { "epoch": 1.4395986730317987, "grad_norm": 0.00600305013358593, "learning_rate": 2.206939655589634e-06, "loss": 0.0152, "step": 177920 }, { "epoch": 1.4396795857270006, "grad_norm": 0.6062629818916321, "learning_rate": 2.2063540265157635e-06, "loss": 0.0253, "step": 177930 }, { "epoch": 1.4397604984222023, "grad_norm": 0.39087995886802673, "learning_rate": 2.205768453155265e-06, "loss": 0.0149, "step": 177940 }, { "epoch": 1.4398414111174043, "grad_norm": 0.4774762690067291, "learning_rate": 2.2051829355198145e-06, "loss": 0.0233, "step": 177950 }, { "epoch": 1.4399223238126062, "grad_norm": 0.30152612924575806, "learning_rate": 2.2045974736210912e-06, "loss": 0.0249, "step": 177960 }, { "epoch": 1.4400032365078081, "grad_norm": 0.25027698278427124, "learning_rate": 2.2040120674707676e-06, "loss": 0.0235, "step": 177970 }, { "epoch": 1.44008414920301, "grad_norm": 0.8264731764793396, "learning_rate": 2.203426717080525e-06, "loss": 0.04, "step": 177980 }, { "epoch": 1.4401650618982118, "grad_norm": 0.2674213647842407, "learning_rate": 2.2028414224620302e-06, "loss": 0.017, "step": 177990 }, { "epoch": 1.4402459745934137, "grad_norm": 0.779120922088623, "learning_rate": 2.202256183626957e-06, "loss": 0.029, "step": 178000 }, { "epoch": 1.4403268872886157, "grad_norm": 0.39165759086608887, "learning_rate": 2.2016710005869814e-06, "loss": 0.0196, "step": 178010 }, { "epoch": 1.4404077999838174, "grad_norm": 0.4141368865966797, "learning_rate": 2.201085873353768e-06, "loss": 0.0234, "step": 178020 }, { "epoch": 1.4404887126790193, "grad_norm": 0.612718403339386, "learning_rate": 2.200500801938986e-06, "loss": 0.0248, "step": 178030 }, { "epoch": 1.4405696253742213, "grad_norm": 0.4550173580646515, "learning_rate": 2.19991578635431e-06, "loss": 0.0167, "step": 178040 }, { "epoch": 1.440650538069423, "grad_norm": 0.12373142689466476, "learning_rate": 2.199330826611398e-06, "loss": 0.011, "step": 178050 }, { "epoch": 1.440731450764625, "grad_norm": 0.5126376748085022, "learning_rate": 2.1987459227219225e-06, "loss": 0.0135, "step": 178060 }, { "epoch": 1.4408123634598269, "grad_norm": 0.29059964418411255, "learning_rate": 2.1981610746975473e-06, "loss": 0.0221, "step": 178070 }, { "epoch": 1.4408932761550286, "grad_norm": 0.23978139460086823, "learning_rate": 2.1975762825499316e-06, "loss": 0.0222, "step": 178080 }, { "epoch": 1.4409741888502305, "grad_norm": 0.43562883138656616, "learning_rate": 2.196991546290742e-06, "loss": 0.0133, "step": 178090 }, { "epoch": 1.4410551015454325, "grad_norm": 0.5414988994598389, "learning_rate": 2.1964068659316385e-06, "loss": 0.0176, "step": 178100 }, { "epoch": 1.4411360142406344, "grad_norm": 0.4005764126777649, "learning_rate": 2.1958222414842817e-06, "loss": 0.0197, "step": 178110 }, { "epoch": 1.4412169269358364, "grad_norm": 0.17540746927261353, "learning_rate": 2.1952376729603304e-06, "loss": 0.0177, "step": 178120 }, { "epoch": 1.441297839631038, "grad_norm": 0.10713736712932587, "learning_rate": 2.194653160371442e-06, "loss": 0.0151, "step": 178130 }, { "epoch": 1.44137875232624, "grad_norm": 0.18951678276062012, "learning_rate": 2.1940687037292746e-06, "loss": 0.0272, "step": 178140 }, { "epoch": 1.441459665021442, "grad_norm": 0.44690197706222534, "learning_rate": 2.1934843030454833e-06, "loss": 0.0254, "step": 178150 }, { "epoch": 1.4415405777166437, "grad_norm": 0.1714063435792923, "learning_rate": 2.192899958331723e-06, "loss": 0.0242, "step": 178160 }, { "epoch": 1.4416214904118456, "grad_norm": 0.5132441520690918, "learning_rate": 2.192315669599646e-06, "loss": 0.0218, "step": 178170 }, { "epoch": 1.4417024031070476, "grad_norm": 0.11492379754781723, "learning_rate": 2.1917314368609067e-06, "loss": 0.0186, "step": 178180 }, { "epoch": 1.4417833158022493, "grad_norm": 0.32442304491996765, "learning_rate": 2.191147260127153e-06, "loss": 0.0151, "step": 178190 }, { "epoch": 1.4418642284974512, "grad_norm": 0.16619642078876495, "learning_rate": 2.190563139410041e-06, "loss": 0.0236, "step": 178200 }, { "epoch": 1.4419451411926532, "grad_norm": 0.0643894299864769, "learning_rate": 2.189979074721215e-06, "loss": 0.015, "step": 178210 }, { "epoch": 1.4420260538878549, "grad_norm": 0.6157031655311584, "learning_rate": 2.1893950660723216e-06, "loss": 0.0174, "step": 178220 }, { "epoch": 1.4421069665830568, "grad_norm": 0.550514817237854, "learning_rate": 2.1888111134750145e-06, "loss": 0.0142, "step": 178230 }, { "epoch": 1.4421878792782588, "grad_norm": 0.2563369870185852, "learning_rate": 2.188227216940933e-06, "loss": 0.0218, "step": 178240 }, { "epoch": 1.4422687919734607, "grad_norm": 0.30935102701187134, "learning_rate": 2.187643376481722e-06, "loss": 0.0125, "step": 178250 }, { "epoch": 1.4423497046686626, "grad_norm": 0.20062203705310822, "learning_rate": 2.18705959210903e-06, "loss": 0.0197, "step": 178260 }, { "epoch": 1.4424306173638644, "grad_norm": 0.1940799057483673, "learning_rate": 2.1864758638344926e-06, "loss": 0.0242, "step": 178270 }, { "epoch": 1.4425115300590663, "grad_norm": 0.3213764429092407, "learning_rate": 2.185892191669756e-06, "loss": 0.0165, "step": 178280 }, { "epoch": 1.4425924427542682, "grad_norm": 0.23778249323368073, "learning_rate": 2.185308575626459e-06, "loss": 0.0218, "step": 178290 }, { "epoch": 1.44267335544947, "grad_norm": 0.20026633143424988, "learning_rate": 2.1847250157162407e-06, "loss": 0.0211, "step": 178300 }, { "epoch": 1.442754268144672, "grad_norm": 0.4972458779811859, "learning_rate": 2.184141511950738e-06, "loss": 0.0137, "step": 178310 }, { "epoch": 1.4428351808398738, "grad_norm": 0.2635205388069153, "learning_rate": 2.183558064341589e-06, "loss": 0.0107, "step": 178320 }, { "epoch": 1.4429160935350756, "grad_norm": 0.19335860013961792, "learning_rate": 2.182974672900428e-06, "loss": 0.0118, "step": 178330 }, { "epoch": 1.4429970062302775, "grad_norm": 0.28670212626457214, "learning_rate": 2.1823913376388906e-06, "loss": 0.0202, "step": 178340 }, { "epoch": 1.4430779189254794, "grad_norm": 0.21946243941783905, "learning_rate": 2.1818080585686096e-06, "loss": 0.0136, "step": 178350 }, { "epoch": 1.4431588316206811, "grad_norm": 0.45059502124786377, "learning_rate": 2.181224835701218e-06, "loss": 0.019, "step": 178360 }, { "epoch": 1.443239744315883, "grad_norm": 0.6562386751174927, "learning_rate": 2.1806416690483455e-06, "loss": 0.0231, "step": 178370 }, { "epoch": 1.443320657011085, "grad_norm": 0.49961647391319275, "learning_rate": 2.1800585586216217e-06, "loss": 0.0159, "step": 178380 }, { "epoch": 1.443401569706287, "grad_norm": 0.241461381316185, "learning_rate": 2.17947550443268e-06, "loss": 0.021, "step": 178390 }, { "epoch": 1.443482482401489, "grad_norm": 0.4234701693058014, "learning_rate": 2.178892506493143e-06, "loss": 0.0258, "step": 178400 }, { "epoch": 1.4435633950966906, "grad_norm": 0.3530769944190979, "learning_rate": 2.178309564814638e-06, "loss": 0.0193, "step": 178410 }, { "epoch": 1.4436443077918926, "grad_norm": 0.11611128598451614, "learning_rate": 2.1777266794087953e-06, "loss": 0.0246, "step": 178420 }, { "epoch": 1.4437252204870945, "grad_norm": 0.6787950992584229, "learning_rate": 2.1771438502872343e-06, "loss": 0.0213, "step": 178430 }, { "epoch": 1.4438061331822962, "grad_norm": 0.5685684680938721, "learning_rate": 2.1765610774615777e-06, "loss": 0.0136, "step": 178440 }, { "epoch": 1.4438870458774982, "grad_norm": 0.31324464082717896, "learning_rate": 2.1759783609434523e-06, "loss": 0.0154, "step": 178450 }, { "epoch": 1.4439679585727, "grad_norm": 0.1515883207321167, "learning_rate": 2.175395700744476e-06, "loss": 0.0228, "step": 178460 }, { "epoch": 1.4440488712679018, "grad_norm": 0.4145209491252899, "learning_rate": 2.17481309687627e-06, "loss": 0.0159, "step": 178470 }, { "epoch": 1.4441297839631038, "grad_norm": 0.1552438735961914, "learning_rate": 2.174230549350452e-06, "loss": 0.017, "step": 178480 }, { "epoch": 1.4442106966583057, "grad_norm": 0.35766878724098206, "learning_rate": 2.173648058178641e-06, "loss": 0.019, "step": 178490 }, { "epoch": 1.4442916093535076, "grad_norm": 0.19683964550495148, "learning_rate": 2.1730656233724517e-06, "loss": 0.0148, "step": 178500 }, { "epoch": 1.4443725220487094, "grad_norm": 0.3035690188407898, "learning_rate": 2.1724832449435013e-06, "loss": 0.024, "step": 178510 }, { "epoch": 1.4444534347439113, "grad_norm": 0.6321967244148254, "learning_rate": 2.171900922903403e-06, "loss": 0.0199, "step": 178520 }, { "epoch": 1.4445343474391132, "grad_norm": 0.3648940324783325, "learning_rate": 2.1713186572637712e-06, "loss": 0.0127, "step": 178530 }, { "epoch": 1.4446152601343152, "grad_norm": 0.5205386877059937, "learning_rate": 2.170736448036217e-06, "loss": 0.0251, "step": 178540 }, { "epoch": 1.444696172829517, "grad_norm": 0.3052257001399994, "learning_rate": 2.170154295232351e-06, "loss": 0.0171, "step": 178550 }, { "epoch": 1.4447770855247188, "grad_norm": 0.3298231363296509, "learning_rate": 2.1695721988637836e-06, "loss": 0.0304, "step": 178560 }, { "epoch": 1.4448579982199208, "grad_norm": 0.38351136445999146, "learning_rate": 2.168990158942124e-06, "loss": 0.0126, "step": 178570 }, { "epoch": 1.4449389109151225, "grad_norm": 0.5631423592567444, "learning_rate": 2.168408175478978e-06, "loss": 0.0204, "step": 178580 }, { "epoch": 1.4450198236103244, "grad_norm": 0.48191073536872864, "learning_rate": 2.167826248485954e-06, "loss": 0.0205, "step": 178590 }, { "epoch": 1.4451007363055264, "grad_norm": 0.6195868253707886, "learning_rate": 2.167244377974655e-06, "loss": 0.0217, "step": 178600 }, { "epoch": 1.445181649000728, "grad_norm": 1.060619831085205, "learning_rate": 2.1666625639566896e-06, "loss": 0.0247, "step": 178610 }, { "epoch": 1.44526256169593, "grad_norm": 0.468641072511673, "learning_rate": 2.1660808064436567e-06, "loss": 0.0147, "step": 178620 }, { "epoch": 1.445343474391132, "grad_norm": 0.19567151367664337, "learning_rate": 2.165499105447157e-06, "loss": 0.0184, "step": 178630 }, { "epoch": 1.445424387086334, "grad_norm": 0.26545554399490356, "learning_rate": 2.164917460978796e-06, "loss": 0.0094, "step": 178640 }, { "epoch": 1.4455052997815359, "grad_norm": 0.5026928782463074, "learning_rate": 2.164335873050173e-06, "loss": 0.0271, "step": 178650 }, { "epoch": 1.4455862124767376, "grad_norm": 0.40631070733070374, "learning_rate": 2.1637543416728807e-06, "loss": 0.0169, "step": 178660 }, { "epoch": 1.4456671251719395, "grad_norm": 0.21748192608356476, "learning_rate": 2.163172866858522e-06, "loss": 0.0193, "step": 178670 }, { "epoch": 1.4457480378671415, "grad_norm": 0.09241801500320435, "learning_rate": 2.162591448618692e-06, "loss": 0.0158, "step": 178680 }, { "epoch": 1.4458289505623432, "grad_norm": 0.4277973175048828, "learning_rate": 2.162010086964986e-06, "loss": 0.0202, "step": 178690 }, { "epoch": 1.4459098632575451, "grad_norm": 0.3811875283718109, "learning_rate": 2.1614287819089967e-06, "loss": 0.0243, "step": 178700 }, { "epoch": 1.445990775952747, "grad_norm": 0.030156195163726807, "learning_rate": 2.1608475334623187e-06, "loss": 0.0106, "step": 178710 }, { "epoch": 1.4460716886479488, "grad_norm": 0.16583585739135742, "learning_rate": 2.160266341636543e-06, "loss": 0.0152, "step": 178720 }, { "epoch": 1.4461526013431507, "grad_norm": 0.20090578496456146, "learning_rate": 2.1596852064432593e-06, "loss": 0.0121, "step": 178730 }, { "epoch": 1.4462335140383527, "grad_norm": 0.525632917881012, "learning_rate": 2.159104127894058e-06, "loss": 0.0189, "step": 178740 }, { "epoch": 1.4463144267335544, "grad_norm": 0.25433817505836487, "learning_rate": 2.158523106000528e-06, "loss": 0.0192, "step": 178750 }, { "epoch": 1.4463953394287563, "grad_norm": 0.4981714189052582, "learning_rate": 2.1579421407742564e-06, "loss": 0.0284, "step": 178760 }, { "epoch": 1.4464762521239583, "grad_norm": 0.30561360716819763, "learning_rate": 2.1573612322268285e-06, "loss": 0.0165, "step": 178770 }, { "epoch": 1.4465571648191602, "grad_norm": 0.35922208428382874, "learning_rate": 2.1567803803698296e-06, "loss": 0.0234, "step": 178780 }, { "epoch": 1.4466380775143621, "grad_norm": 0.5531601905822754, "learning_rate": 2.156199585214844e-06, "loss": 0.0237, "step": 178790 }, { "epoch": 1.4467189902095638, "grad_norm": 0.29312506318092346, "learning_rate": 2.1556188467734534e-06, "loss": 0.017, "step": 178800 }, { "epoch": 1.4467999029047658, "grad_norm": 0.455440878868103, "learning_rate": 2.155038165057241e-06, "loss": 0.0212, "step": 178810 }, { "epoch": 1.4468808155999677, "grad_norm": 0.3757822513580322, "learning_rate": 2.154457540077784e-06, "loss": 0.0278, "step": 178820 }, { "epoch": 1.4469617282951694, "grad_norm": 0.5055583715438843, "learning_rate": 2.1538769718466664e-06, "loss": 0.0198, "step": 178830 }, { "epoch": 1.4470426409903714, "grad_norm": 0.1943083256483078, "learning_rate": 2.1532964603754656e-06, "loss": 0.0167, "step": 178840 }, { "epoch": 1.4471235536855733, "grad_norm": 0.24159210920333862, "learning_rate": 2.152716005675754e-06, "loss": 0.0235, "step": 178850 }, { "epoch": 1.447204466380775, "grad_norm": 0.17456120252609253, "learning_rate": 2.152135607759113e-06, "loss": 0.0118, "step": 178860 }, { "epoch": 1.447285379075977, "grad_norm": 0.25011977553367615, "learning_rate": 2.151555266637114e-06, "loss": 0.0208, "step": 178870 }, { "epoch": 1.447366291771179, "grad_norm": 0.768157958984375, "learning_rate": 2.150974982321333e-06, "loss": 0.0218, "step": 178880 }, { "epoch": 1.4474472044663806, "grad_norm": 0.17215505242347717, "learning_rate": 2.1503947548233406e-06, "loss": 0.0243, "step": 178890 }, { "epoch": 1.4475281171615826, "grad_norm": 0.20913317799568176, "learning_rate": 2.14981458415471e-06, "loss": 0.0187, "step": 178900 }, { "epoch": 1.4476090298567845, "grad_norm": 0.26693645119667053, "learning_rate": 2.149234470327009e-06, "loss": 0.0168, "step": 178910 }, { "epoch": 1.4476899425519865, "grad_norm": 0.3159092366695404, "learning_rate": 2.1486544133518096e-06, "loss": 0.0215, "step": 178920 }, { "epoch": 1.4477708552471884, "grad_norm": 0.13736103475093842, "learning_rate": 2.1480744132406777e-06, "loss": 0.0133, "step": 178930 }, { "epoch": 1.4478517679423901, "grad_norm": 0.5833759903907776, "learning_rate": 2.1474944700051815e-06, "loss": 0.0214, "step": 178940 }, { "epoch": 1.447932680637592, "grad_norm": 0.4384128451347351, "learning_rate": 2.146914583656886e-06, "loss": 0.0183, "step": 178950 }, { "epoch": 1.448013593332794, "grad_norm": 0.29430773854255676, "learning_rate": 2.146334754207356e-06, "loss": 0.0175, "step": 178960 }, { "epoch": 1.4480945060279957, "grad_norm": 0.33578652143478394, "learning_rate": 2.1457549816681543e-06, "loss": 0.0185, "step": 178970 }, { "epoch": 1.4481754187231977, "grad_norm": 0.3812527060508728, "learning_rate": 2.145175266050845e-06, "loss": 0.0238, "step": 178980 }, { "epoch": 1.4482563314183996, "grad_norm": 0.46219602227211, "learning_rate": 2.144595607366986e-06, "loss": 0.0166, "step": 178990 }, { "epoch": 1.4483372441136013, "grad_norm": 0.5699965357780457, "learning_rate": 2.1440160056281428e-06, "loss": 0.0149, "step": 179000 }, { "epoch": 1.4484181568088033, "grad_norm": 0.3347955048084259, "learning_rate": 2.143436460845868e-06, "loss": 0.0135, "step": 179010 }, { "epoch": 1.4484990695040052, "grad_norm": 0.4294765591621399, "learning_rate": 2.1428569730317243e-06, "loss": 0.0164, "step": 179020 }, { "epoch": 1.448579982199207, "grad_norm": 0.40111398696899414, "learning_rate": 2.142277542197268e-06, "loss": 0.0064, "step": 179030 }, { "epoch": 1.4486608948944089, "grad_norm": 0.39138063788414, "learning_rate": 2.14169816835405e-06, "loss": 0.0178, "step": 179040 }, { "epoch": 1.4487418075896108, "grad_norm": 0.3978966772556305, "learning_rate": 2.1411188515136294e-06, "loss": 0.0174, "step": 179050 }, { "epoch": 1.4488227202848127, "grad_norm": 0.3700573742389679, "learning_rate": 2.1405395916875598e-06, "loss": 0.014, "step": 179060 }, { "epoch": 1.4489036329800147, "grad_norm": 0.16913685202598572, "learning_rate": 2.1399603888873883e-06, "loss": 0.0219, "step": 179070 }, { "epoch": 1.4489845456752164, "grad_norm": 0.2891196310520172, "learning_rate": 2.13938124312467e-06, "loss": 0.0103, "step": 179080 }, { "epoch": 1.4490654583704183, "grad_norm": 0.4279026687145233, "learning_rate": 2.1388021544109543e-06, "loss": 0.01, "step": 179090 }, { "epoch": 1.4491463710656203, "grad_norm": 0.4088614583015442, "learning_rate": 2.1382231227577895e-06, "loss": 0.0348, "step": 179100 }, { "epoch": 1.449227283760822, "grad_norm": 0.24129652976989746, "learning_rate": 2.137644148176723e-06, "loss": 0.0151, "step": 179110 }, { "epoch": 1.449308196456024, "grad_norm": 0.23061247169971466, "learning_rate": 2.137065230679301e-06, "loss": 0.0227, "step": 179120 }, { "epoch": 1.4493891091512259, "grad_norm": 0.5684285163879395, "learning_rate": 2.1364863702770692e-06, "loss": 0.0233, "step": 179130 }, { "epoch": 1.4494700218464276, "grad_norm": 0.5658754706382751, "learning_rate": 2.1359075669815717e-06, "loss": 0.0204, "step": 179140 }, { "epoch": 1.4495509345416295, "grad_norm": 0.31178292632102966, "learning_rate": 2.135328820804351e-06, "loss": 0.0218, "step": 179150 }, { "epoch": 1.4496318472368315, "grad_norm": 0.4640656113624573, "learning_rate": 2.1347501317569496e-06, "loss": 0.0244, "step": 179160 }, { "epoch": 1.4497127599320334, "grad_norm": 0.35943225026130676, "learning_rate": 2.1341714998509084e-06, "loss": 0.0157, "step": 179170 }, { "epoch": 1.4497936726272351, "grad_norm": 0.3130325973033905, "learning_rate": 2.1335929250977644e-06, "loss": 0.0181, "step": 179180 }, { "epoch": 1.449874585322437, "grad_norm": 0.18228869140148163, "learning_rate": 2.133014407509062e-06, "loss": 0.044, "step": 179190 }, { "epoch": 1.449955498017639, "grad_norm": 0.5919809341430664, "learning_rate": 2.1324359470963324e-06, "loss": 0.0249, "step": 179200 }, { "epoch": 1.450036410712841, "grad_norm": 0.16121424734592438, "learning_rate": 2.131857543871113e-06, "loss": 0.0171, "step": 179210 }, { "epoch": 1.4501173234080427, "grad_norm": 0.36824700236320496, "learning_rate": 2.1312791978449434e-06, "loss": 0.0151, "step": 179220 }, { "epoch": 1.4501982361032446, "grad_norm": 0.19153417646884918, "learning_rate": 2.1307009090293505e-06, "loss": 0.027, "step": 179230 }, { "epoch": 1.4502791487984465, "grad_norm": 0.4842199683189392, "learning_rate": 2.1301226774358723e-06, "loss": 0.0124, "step": 179240 }, { "epoch": 1.4503600614936483, "grad_norm": 0.5441147089004517, "learning_rate": 2.129544503076041e-06, "loss": 0.019, "step": 179250 }, { "epoch": 1.4504409741888502, "grad_norm": 0.314018189907074, "learning_rate": 2.12896638596138e-06, "loss": 0.0201, "step": 179260 }, { "epoch": 1.4505218868840521, "grad_norm": 0.3541751801967621, "learning_rate": 2.1283883261034266e-06, "loss": 0.0154, "step": 179270 }, { "epoch": 1.4506027995792539, "grad_norm": 0.19859856367111206, "learning_rate": 2.1278103235137065e-06, "loss": 0.0128, "step": 179280 }, { "epoch": 1.4506837122744558, "grad_norm": 0.6914514303207397, "learning_rate": 2.127232378203743e-06, "loss": 0.0177, "step": 179290 }, { "epoch": 1.4507646249696577, "grad_norm": 0.5628271698951721, "learning_rate": 2.126654490185067e-06, "loss": 0.0327, "step": 179300 }, { "epoch": 1.4508455376648597, "grad_norm": 0.44078579545021057, "learning_rate": 2.126076659469201e-06, "loss": 0.0267, "step": 179310 }, { "epoch": 1.4509264503600616, "grad_norm": 0.1302345246076584, "learning_rate": 2.125498886067669e-06, "loss": 0.022, "step": 179320 }, { "epoch": 1.4510073630552633, "grad_norm": 0.5439097285270691, "learning_rate": 2.124921169991993e-06, "loss": 0.0212, "step": 179330 }, { "epoch": 1.4510882757504653, "grad_norm": 0.626929759979248, "learning_rate": 2.1243435112536954e-06, "loss": 0.0152, "step": 179340 }, { "epoch": 1.4511691884456672, "grad_norm": 0.25408971309661865, "learning_rate": 2.1237659098642953e-06, "loss": 0.0175, "step": 179350 }, { "epoch": 1.451250101140869, "grad_norm": 0.08801363408565521, "learning_rate": 2.123188365835312e-06, "loss": 0.0134, "step": 179360 }, { "epoch": 1.4513310138360709, "grad_norm": 0.5113698244094849, "learning_rate": 2.122610879178262e-06, "loss": 0.0245, "step": 179370 }, { "epoch": 1.4514119265312728, "grad_norm": 0.235351100564003, "learning_rate": 2.122033449904667e-06, "loss": 0.019, "step": 179380 }, { "epoch": 1.4514928392264745, "grad_norm": 0.3322978913784027, "learning_rate": 2.1214560780260374e-06, "loss": 0.016, "step": 179390 }, { "epoch": 1.4515737519216765, "grad_norm": 0.3511805832386017, "learning_rate": 2.1208787635538876e-06, "loss": 0.025, "step": 179400 }, { "epoch": 1.4516546646168784, "grad_norm": 0.43089747428894043, "learning_rate": 2.1203015064997367e-06, "loss": 0.0217, "step": 179410 }, { "epoch": 1.4517355773120801, "grad_norm": 0.2161894589662552, "learning_rate": 2.119724306875089e-06, "loss": 0.0201, "step": 179420 }, { "epoch": 1.451816490007282, "grad_norm": 0.14640048146247864, "learning_rate": 2.119147164691461e-06, "loss": 0.0229, "step": 179430 }, { "epoch": 1.451897402702484, "grad_norm": 0.2769760191440582, "learning_rate": 2.1185700799603638e-06, "loss": 0.0174, "step": 179440 }, { "epoch": 1.451978315397686, "grad_norm": 0.22661590576171875, "learning_rate": 2.117993052693299e-06, "loss": 0.0229, "step": 179450 }, { "epoch": 1.452059228092888, "grad_norm": 0.2779077887535095, "learning_rate": 2.1174160829017804e-06, "loss": 0.0145, "step": 179460 }, { "epoch": 1.4521401407880896, "grad_norm": 0.43529948592185974, "learning_rate": 2.1168391705973147e-06, "loss": 0.0236, "step": 179470 }, { "epoch": 1.4522210534832916, "grad_norm": 0.31595948338508606, "learning_rate": 2.1162623157914013e-06, "loss": 0.0166, "step": 179480 }, { "epoch": 1.4523019661784935, "grad_norm": 0.31099557876586914, "learning_rate": 2.11568551849555e-06, "loss": 0.0244, "step": 179490 }, { "epoch": 1.4523828788736952, "grad_norm": 0.2891589403152466, "learning_rate": 2.1151087787212623e-06, "loss": 0.0144, "step": 179500 }, { "epoch": 1.4524637915688972, "grad_norm": 0.3758377730846405, "learning_rate": 2.114532096480039e-06, "loss": 0.0147, "step": 179510 }, { "epoch": 1.452544704264099, "grad_norm": 0.5041072964668274, "learning_rate": 2.113955471783382e-06, "loss": 0.0196, "step": 179520 }, { "epoch": 1.4526256169593008, "grad_norm": 0.16983386874198914, "learning_rate": 2.1133789046427905e-06, "loss": 0.0279, "step": 179530 }, { "epoch": 1.4527065296545028, "grad_norm": 0.34393852949142456, "learning_rate": 2.1128023950697627e-06, "loss": 0.0198, "step": 179540 }, { "epoch": 1.4527874423497047, "grad_norm": 1.0438963174819946, "learning_rate": 2.1122259430757964e-06, "loss": 0.022, "step": 179550 }, { "epoch": 1.4528683550449064, "grad_norm": 0.33668914437294006, "learning_rate": 2.111649548672385e-06, "loss": 0.021, "step": 179560 }, { "epoch": 1.4529492677401084, "grad_norm": 0.2648318409919739, "learning_rate": 2.1110732118710297e-06, "loss": 0.0081, "step": 179570 }, { "epoch": 1.4530301804353103, "grad_norm": 0.2608751058578491, "learning_rate": 2.110496932683218e-06, "loss": 0.0132, "step": 179580 }, { "epoch": 1.4531110931305122, "grad_norm": 0.4837294816970825, "learning_rate": 2.109920711120444e-06, "loss": 0.0224, "step": 179590 }, { "epoch": 1.4531920058257142, "grad_norm": 0.18355262279510498, "learning_rate": 2.1093445471942033e-06, "loss": 0.015, "step": 179600 }, { "epoch": 1.453272918520916, "grad_norm": 0.3200232684612274, "learning_rate": 2.1087684409159813e-06, "loss": 0.014, "step": 179610 }, { "epoch": 1.4533538312161178, "grad_norm": 0.2750566899776459, "learning_rate": 2.1081923922972674e-06, "loss": 0.0177, "step": 179620 }, { "epoch": 1.4534347439113198, "grad_norm": 0.26581212878227234, "learning_rate": 2.1076164013495553e-06, "loss": 0.0227, "step": 179630 }, { "epoch": 1.4535156566065215, "grad_norm": 0.22239965200424194, "learning_rate": 2.1070404680843236e-06, "loss": 0.0158, "step": 179640 }, { "epoch": 1.4535965693017234, "grad_norm": 0.4468155801296234, "learning_rate": 2.106464592513065e-06, "loss": 0.0163, "step": 179650 }, { "epoch": 1.4536774819969254, "grad_norm": 0.5669438242912292, "learning_rate": 2.105888774647263e-06, "loss": 0.0174, "step": 179660 }, { "epoch": 1.453758394692127, "grad_norm": 0.25334808230400085, "learning_rate": 2.1053130144983963e-06, "loss": 0.0263, "step": 179670 }, { "epoch": 1.453839307387329, "grad_norm": 0.35802513360977173, "learning_rate": 2.1047373120779518e-06, "loss": 0.0183, "step": 179680 }, { "epoch": 1.453920220082531, "grad_norm": 0.21449598670005798, "learning_rate": 2.104161667397412e-06, "loss": 0.0246, "step": 179690 }, { "epoch": 1.454001132777733, "grad_norm": 0.3672030568122864, "learning_rate": 2.103586080468251e-06, "loss": 0.0178, "step": 179700 }, { "epoch": 1.4540820454729346, "grad_norm": 0.13268305361270905, "learning_rate": 2.1030105513019514e-06, "loss": 0.0188, "step": 179710 }, { "epoch": 1.4541629581681366, "grad_norm": 0.44741570949554443, "learning_rate": 2.102435079909992e-06, "loss": 0.0171, "step": 179720 }, { "epoch": 1.4542438708633385, "grad_norm": 0.23352691531181335, "learning_rate": 2.101859666303847e-06, "loss": 0.0132, "step": 179730 }, { "epoch": 1.4543247835585404, "grad_norm": 0.5157003998756409, "learning_rate": 2.1012843104949927e-06, "loss": 0.0249, "step": 179740 }, { "epoch": 1.4544056962537422, "grad_norm": 0.7424721717834473, "learning_rate": 2.1007090124949037e-06, "loss": 0.0279, "step": 179750 }, { "epoch": 1.454486608948944, "grad_norm": 0.13627532124519348, "learning_rate": 2.1001337723150528e-06, "loss": 0.0072, "step": 179760 }, { "epoch": 1.454567521644146, "grad_norm": 0.03727135434746742, "learning_rate": 2.099558589966912e-06, "loss": 0.0143, "step": 179770 }, { "epoch": 1.4546484343393478, "grad_norm": 0.20514731109142303, "learning_rate": 2.09898346546195e-06, "loss": 0.0247, "step": 179780 }, { "epoch": 1.4547293470345497, "grad_norm": 0.31143829226493835, "learning_rate": 2.098408398811642e-06, "loss": 0.0121, "step": 179790 }, { "epoch": 1.4548102597297516, "grad_norm": 1.0147963762283325, "learning_rate": 2.0978333900274513e-06, "loss": 0.0245, "step": 179800 }, { "epoch": 1.4548911724249534, "grad_norm": 0.6261982321739197, "learning_rate": 2.097258439120845e-06, "loss": 0.0223, "step": 179810 }, { "epoch": 1.4549720851201553, "grad_norm": 0.581509530544281, "learning_rate": 2.096683546103295e-06, "loss": 0.0131, "step": 179820 }, { "epoch": 1.4550529978153572, "grad_norm": 0.6291777491569519, "learning_rate": 2.0961087109862604e-06, "loss": 0.0149, "step": 179830 }, { "epoch": 1.4551339105105592, "grad_norm": 0.24687926471233368, "learning_rate": 2.095533933781206e-06, "loss": 0.0208, "step": 179840 }, { "epoch": 1.4552148232057611, "grad_norm": 0.29793861508369446, "learning_rate": 2.0949592144995986e-06, "loss": 0.0236, "step": 179850 }, { "epoch": 1.4552957359009628, "grad_norm": 0.5522392392158508, "learning_rate": 2.0943845531528932e-06, "loss": 0.0175, "step": 179860 }, { "epoch": 1.4553766485961648, "grad_norm": 0.5333930850028992, "learning_rate": 2.093809949752556e-06, "loss": 0.0131, "step": 179870 }, { "epoch": 1.4554575612913667, "grad_norm": 0.7039059996604919, "learning_rate": 2.093235404310046e-06, "loss": 0.0213, "step": 179880 }, { "epoch": 1.4555384739865684, "grad_norm": 0.42225679755210876, "learning_rate": 2.0926609168368156e-06, "loss": 0.0129, "step": 179890 }, { "epoch": 1.4556193866817704, "grad_norm": 0.43077704310417175, "learning_rate": 2.0920864873443274e-06, "loss": 0.019, "step": 179900 }, { "epoch": 1.4557002993769723, "grad_norm": 0.09445598721504211, "learning_rate": 2.0915121158440355e-06, "loss": 0.024, "step": 179910 }, { "epoch": 1.455781212072174, "grad_norm": 0.37088704109191895, "learning_rate": 2.0909378023473935e-06, "loss": 0.0192, "step": 179920 }, { "epoch": 1.455862124767376, "grad_norm": 0.4184921085834503, "learning_rate": 2.0903635468658566e-06, "loss": 0.0165, "step": 179930 }, { "epoch": 1.455943037462578, "grad_norm": 0.4983593225479126, "learning_rate": 2.0897893494108763e-06, "loss": 0.0159, "step": 179940 }, { "epoch": 1.4560239501577796, "grad_norm": 0.15968956053256989, "learning_rate": 2.0892152099939032e-06, "loss": 0.0202, "step": 179950 }, { "epoch": 1.4561048628529816, "grad_norm": 0.42922157049179077, "learning_rate": 2.088641128626388e-06, "loss": 0.0162, "step": 179960 }, { "epoch": 1.4561857755481835, "grad_norm": 0.3235747814178467, "learning_rate": 2.0880671053197778e-06, "loss": 0.0192, "step": 179970 }, { "epoch": 1.4562666882433855, "grad_norm": 0.5044263005256653, "learning_rate": 2.0874931400855254e-06, "loss": 0.02, "step": 179980 }, { "epoch": 1.4563476009385874, "grad_norm": 0.4695693254470825, "learning_rate": 2.0869192329350716e-06, "loss": 0.0267, "step": 179990 }, { "epoch": 1.4564285136337891, "grad_norm": 0.5806146860122681, "learning_rate": 2.0863453838798625e-06, "loss": 0.0318, "step": 180000 }, { "epoch": 1.456509426328991, "grad_norm": 0.5507019758224487, "learning_rate": 2.0857715929313473e-06, "loss": 0.021, "step": 180010 }, { "epoch": 1.456590339024193, "grad_norm": 0.26597854495048523, "learning_rate": 2.085197860100963e-06, "loss": 0.0213, "step": 180020 }, { "epoch": 1.4566712517193947, "grad_norm": 0.16317616403102875, "learning_rate": 2.0846241854001526e-06, "loss": 0.0146, "step": 180030 }, { "epoch": 1.4567521644145967, "grad_norm": 0.4785682260990143, "learning_rate": 2.0840505688403616e-06, "loss": 0.0156, "step": 180040 }, { "epoch": 1.4568330771097986, "grad_norm": 0.2859852910041809, "learning_rate": 2.083477010433023e-06, "loss": 0.0277, "step": 180050 }, { "epoch": 1.4569139898050003, "grad_norm": 0.1099487766623497, "learning_rate": 2.08290351018958e-06, "loss": 0.0216, "step": 180060 }, { "epoch": 1.4569949025002022, "grad_norm": 0.19631890952587128, "learning_rate": 2.08233006812147e-06, "loss": 0.0126, "step": 180070 }, { "epoch": 1.4570758151954042, "grad_norm": 0.4921557903289795, "learning_rate": 2.081756684240124e-06, "loss": 0.0255, "step": 180080 }, { "epoch": 1.457156727890606, "grad_norm": 0.5911749005317688, "learning_rate": 2.0811833585569814e-06, "loss": 0.0249, "step": 180090 }, { "epoch": 1.4572376405858078, "grad_norm": 0.2757234275341034, "learning_rate": 2.0806100910834754e-06, "loss": 0.0112, "step": 180100 }, { "epoch": 1.4573185532810098, "grad_norm": 0.6500216722488403, "learning_rate": 2.080036881831038e-06, "loss": 0.015, "step": 180110 }, { "epoch": 1.4573994659762117, "grad_norm": 0.4090525805950165, "learning_rate": 2.0794637308111e-06, "loss": 0.013, "step": 180120 }, { "epoch": 1.4574803786714137, "grad_norm": 0.3203814923763275, "learning_rate": 2.0788906380350933e-06, "loss": 0.0177, "step": 180130 }, { "epoch": 1.4575612913666154, "grad_norm": 0.4809171259403229, "learning_rate": 2.0783176035144455e-06, "loss": 0.012, "step": 180140 }, { "epoch": 1.4576422040618173, "grad_norm": 0.4658553898334503, "learning_rate": 2.077744627260585e-06, "loss": 0.0139, "step": 180150 }, { "epoch": 1.4577231167570193, "grad_norm": 0.21032696962356567, "learning_rate": 2.0771717092849393e-06, "loss": 0.0132, "step": 180160 }, { "epoch": 1.457804029452221, "grad_norm": 0.5595894455909729, "learning_rate": 2.0765988495989333e-06, "loss": 0.0196, "step": 180170 }, { "epoch": 1.457884942147423, "grad_norm": 0.6416497230529785, "learning_rate": 2.076026048213991e-06, "loss": 0.0155, "step": 180180 }, { "epoch": 1.4579658548426249, "grad_norm": 0.33400991559028625, "learning_rate": 2.0754533051415356e-06, "loss": 0.0225, "step": 180190 }, { "epoch": 1.4580467675378266, "grad_norm": 0.3811057507991791, "learning_rate": 2.0748806203929935e-06, "loss": 0.0166, "step": 180200 }, { "epoch": 1.4581276802330285, "grad_norm": 0.26380455493927, "learning_rate": 2.0743079939797794e-06, "loss": 0.0231, "step": 180210 }, { "epoch": 1.4582085929282305, "grad_norm": 0.898219645023346, "learning_rate": 2.073735425913315e-06, "loss": 0.0222, "step": 180220 }, { "epoch": 1.4582895056234322, "grad_norm": 0.6389216184616089, "learning_rate": 2.0731629162050235e-06, "loss": 0.0186, "step": 180230 }, { "epoch": 1.4583704183186341, "grad_norm": 0.31347009539604187, "learning_rate": 2.072590464866317e-06, "loss": 0.0163, "step": 180240 }, { "epoch": 1.458451331013836, "grad_norm": 0.16345001757144928, "learning_rate": 2.072018071908612e-06, "loss": 0.01, "step": 180250 }, { "epoch": 1.458532243709038, "grad_norm": 0.2616547644138336, "learning_rate": 2.0714457373433266e-06, "loss": 0.0167, "step": 180260 }, { "epoch": 1.45861315640424, "grad_norm": 0.5458618998527527, "learning_rate": 2.0708734611818738e-06, "loss": 0.0229, "step": 180270 }, { "epoch": 1.4586940690994417, "grad_norm": 0.22474995255470276, "learning_rate": 2.070301243435666e-06, "loss": 0.0097, "step": 180280 }, { "epoch": 1.4587749817946436, "grad_norm": 0.3636105954647064, "learning_rate": 2.0697290841161154e-06, "loss": 0.027, "step": 180290 }, { "epoch": 1.4588558944898455, "grad_norm": 0.1322035938501358, "learning_rate": 2.069156983234632e-06, "loss": 0.0163, "step": 180300 }, { "epoch": 1.4589368071850473, "grad_norm": 0.38868844509124756, "learning_rate": 2.0685849408026247e-06, "loss": 0.0267, "step": 180310 }, { "epoch": 1.4590177198802492, "grad_norm": 0.33794406056404114, "learning_rate": 2.0680129568315034e-06, "loss": 0.0119, "step": 180320 }, { "epoch": 1.4590986325754511, "grad_norm": 0.5731034874916077, "learning_rate": 2.0674410313326732e-06, "loss": 0.0183, "step": 180330 }, { "epoch": 1.4591795452706529, "grad_norm": 0.0790877416729927, "learning_rate": 2.066869164317541e-06, "loss": 0.0104, "step": 180340 }, { "epoch": 1.4592604579658548, "grad_norm": 0.4786701202392578, "learning_rate": 2.0662973557975106e-06, "loss": 0.0191, "step": 180350 }, { "epoch": 1.4593413706610567, "grad_norm": 0.24457165598869324, "learning_rate": 2.065725605783987e-06, "loss": 0.016, "step": 180360 }, { "epoch": 1.4594222833562587, "grad_norm": 0.6472233533859253, "learning_rate": 2.0651539142883707e-06, "loss": 0.031, "step": 180370 }, { "epoch": 1.4595031960514604, "grad_norm": 0.3514723479747772, "learning_rate": 2.0645822813220645e-06, "loss": 0.0194, "step": 180380 }, { "epoch": 1.4595841087466623, "grad_norm": 0.29522883892059326, "learning_rate": 2.0640107068964676e-06, "loss": 0.0093, "step": 180390 }, { "epoch": 1.4596650214418643, "grad_norm": 0.5808104872703552, "learning_rate": 2.0634391910229796e-06, "loss": 0.0235, "step": 180400 }, { "epoch": 1.4597459341370662, "grad_norm": 0.12790830433368683, "learning_rate": 2.062867733712995e-06, "loss": 0.0207, "step": 180410 }, { "epoch": 1.459826846832268, "grad_norm": 0.5032541751861572, "learning_rate": 2.0622963349779167e-06, "loss": 0.0358, "step": 180420 }, { "epoch": 1.4599077595274699, "grad_norm": 0.30187636613845825, "learning_rate": 2.0617249948291346e-06, "loss": 0.0171, "step": 180430 }, { "epoch": 1.4599886722226718, "grad_norm": 0.6169915795326233, "learning_rate": 2.0611537132780423e-06, "loss": 0.0232, "step": 180440 }, { "epoch": 1.4600695849178735, "grad_norm": 0.36336714029312134, "learning_rate": 2.060582490336037e-06, "loss": 0.0203, "step": 180450 }, { "epoch": 1.4601504976130755, "grad_norm": 0.43923425674438477, "learning_rate": 2.060011326014509e-06, "loss": 0.0194, "step": 180460 }, { "epoch": 1.4602314103082774, "grad_norm": 0.6367396116256714, "learning_rate": 2.059440220324848e-06, "loss": 0.0318, "step": 180470 }, { "epoch": 1.4603123230034791, "grad_norm": 0.4443206489086151, "learning_rate": 2.0588691732784436e-06, "loss": 0.0215, "step": 180480 }, { "epoch": 1.460393235698681, "grad_norm": 0.33654847741127014, "learning_rate": 2.0582981848866856e-06, "loss": 0.0243, "step": 180490 }, { "epoch": 1.460474148393883, "grad_norm": 0.5609886050224304, "learning_rate": 2.0577272551609596e-06, "loss": 0.0251, "step": 180500 }, { "epoch": 1.460555061089085, "grad_norm": 0.11108115315437317, "learning_rate": 2.0571563841126517e-06, "loss": 0.0101, "step": 180510 }, { "epoch": 1.460635973784287, "grad_norm": 0.5001978874206543, "learning_rate": 2.0565855717531476e-06, "loss": 0.0182, "step": 180520 }, { "epoch": 1.4607168864794886, "grad_norm": 0.3852542042732239, "learning_rate": 2.0560148180938306e-06, "loss": 0.0284, "step": 180530 }, { "epoch": 1.4607977991746905, "grad_norm": 0.3570888936519623, "learning_rate": 2.0554441231460826e-06, "loss": 0.0191, "step": 180540 }, { "epoch": 1.4608787118698925, "grad_norm": 0.2029050886631012, "learning_rate": 2.0548734869212857e-06, "loss": 0.0174, "step": 180550 }, { "epoch": 1.4609596245650942, "grad_norm": 0.3327609598636627, "learning_rate": 2.0543029094308194e-06, "loss": 0.0164, "step": 180560 }, { "epoch": 1.4610405372602961, "grad_norm": 0.445629745721817, "learning_rate": 2.0537323906860623e-06, "loss": 0.0211, "step": 180570 }, { "epoch": 1.461121449955498, "grad_norm": 0.37117117643356323, "learning_rate": 2.0531619306983933e-06, "loss": 0.0185, "step": 180580 }, { "epoch": 1.4612023626506998, "grad_norm": 0.5043029189109802, "learning_rate": 2.052591529479189e-06, "loss": 0.015, "step": 180590 }, { "epoch": 1.4612832753459017, "grad_norm": 0.36770787835121155, "learning_rate": 2.0520211870398227e-06, "loss": 0.0277, "step": 180600 }, { "epoch": 1.4613641880411037, "grad_norm": 1.701719880104065, "learning_rate": 2.0514509033916736e-06, "loss": 0.0199, "step": 180610 }, { "epoch": 1.4614451007363054, "grad_norm": 0.7344285845756531, "learning_rate": 2.0508806785461094e-06, "loss": 0.0307, "step": 180620 }, { "epoch": 1.4615260134315073, "grad_norm": 0.4378975033760071, "learning_rate": 2.0503105125145027e-06, "loss": 0.0221, "step": 180630 }, { "epoch": 1.4616069261267093, "grad_norm": 0.3841504752635956, "learning_rate": 2.0497404053082276e-06, "loss": 0.0391, "step": 180640 }, { "epoch": 1.4616878388219112, "grad_norm": 0.21643134951591492, "learning_rate": 2.049170356938654e-06, "loss": 0.0181, "step": 180650 }, { "epoch": 1.4617687515171132, "grad_norm": 0.41131922602653503, "learning_rate": 2.048600367417144e-06, "loss": 0.0128, "step": 180660 }, { "epoch": 1.4618496642123149, "grad_norm": 0.20805954933166504, "learning_rate": 2.0480304367550706e-06, "loss": 0.0119, "step": 180670 }, { "epoch": 1.4619305769075168, "grad_norm": 0.19425345957279205, "learning_rate": 2.047460564963799e-06, "loss": 0.0137, "step": 180680 }, { "epoch": 1.4620114896027188, "grad_norm": 0.3872709572315216, "learning_rate": 2.0468907520546927e-06, "loss": 0.0178, "step": 180690 }, { "epoch": 1.4620924022979205, "grad_norm": 0.6168879866600037, "learning_rate": 2.0463209980391163e-06, "loss": 0.0204, "step": 180700 }, { "epoch": 1.4621733149931224, "grad_norm": 0.47208261489868164, "learning_rate": 2.0457513029284325e-06, "loss": 0.0229, "step": 180710 }, { "epoch": 1.4622542276883244, "grad_norm": 0.19044283032417297, "learning_rate": 2.045181666734002e-06, "loss": 0.0139, "step": 180720 }, { "epoch": 1.462335140383526, "grad_norm": 0.33634525537490845, "learning_rate": 2.044612089467185e-06, "loss": 0.0178, "step": 180730 }, { "epoch": 1.462416053078728, "grad_norm": 0.3335104286670685, "learning_rate": 2.044042571139341e-06, "loss": 0.0145, "step": 180740 }, { "epoch": 1.46249696577393, "grad_norm": 0.313954621553421, "learning_rate": 2.0434731117618273e-06, "loss": 0.0201, "step": 180750 }, { "epoch": 1.4625778784691317, "grad_norm": 0.1059175357222557, "learning_rate": 2.0429037113460015e-06, "loss": 0.0209, "step": 180760 }, { "epoch": 1.4626587911643336, "grad_norm": 0.5519571304321289, "learning_rate": 2.042334369903218e-06, "loss": 0.0265, "step": 180770 }, { "epoch": 1.4627397038595356, "grad_norm": 0.2778666913509369, "learning_rate": 2.0417650874448314e-06, "loss": 0.0177, "step": 180780 }, { "epoch": 1.4628206165547375, "grad_norm": 0.1600971519947052, "learning_rate": 2.0411958639821957e-06, "loss": 0.0255, "step": 180790 }, { "epoch": 1.4629015292499394, "grad_norm": 0.8202207088470459, "learning_rate": 2.04062669952666e-06, "loss": 0.026, "step": 180800 }, { "epoch": 1.4629824419451412, "grad_norm": 0.34210914373397827, "learning_rate": 2.040057594089581e-06, "loss": 0.0202, "step": 180810 }, { "epoch": 1.463063354640343, "grad_norm": 0.4112250506877899, "learning_rate": 2.0394885476823005e-06, "loss": 0.0145, "step": 180820 }, { "epoch": 1.463144267335545, "grad_norm": 0.20199334621429443, "learning_rate": 2.038919560316173e-06, "loss": 0.0087, "step": 180830 }, { "epoch": 1.4632251800307468, "grad_norm": 0.6338939666748047, "learning_rate": 2.038350632002546e-06, "loss": 0.0204, "step": 180840 }, { "epoch": 1.4633060927259487, "grad_norm": 0.5668882727622986, "learning_rate": 2.037781762752759e-06, "loss": 0.0182, "step": 180850 }, { "epoch": 1.4633870054211506, "grad_norm": 0.46236076951026917, "learning_rate": 2.0372129525781637e-06, "loss": 0.0174, "step": 180860 }, { "epoch": 1.4634679181163524, "grad_norm": 0.23460987210273743, "learning_rate": 2.036644201490103e-06, "loss": 0.0202, "step": 180870 }, { "epoch": 1.4635488308115543, "grad_norm": 0.28322634100914, "learning_rate": 2.0360755094999136e-06, "loss": 0.0208, "step": 180880 }, { "epoch": 1.4636297435067562, "grad_norm": 0.4009098410606384, "learning_rate": 2.0355068766189433e-06, "loss": 0.0128, "step": 180890 }, { "epoch": 1.463710656201958, "grad_norm": 0.3334839940071106, "learning_rate": 2.03493830285853e-06, "loss": 0.028, "step": 180900 }, { "epoch": 1.46379156889716, "grad_norm": 0.5024486780166626, "learning_rate": 2.0343697882300127e-06, "loss": 0.0198, "step": 180910 }, { "epoch": 1.4638724815923618, "grad_norm": 0.4763806462287903, "learning_rate": 2.0338013327447292e-06, "loss": 0.0203, "step": 180920 }, { "epoch": 1.4639533942875638, "grad_norm": 0.34248459339141846, "learning_rate": 2.0332329364140165e-06, "loss": 0.0231, "step": 180930 }, { "epoch": 1.4640343069827657, "grad_norm": 0.225735142827034, "learning_rate": 2.0326645992492094e-06, "loss": 0.0174, "step": 180940 }, { "epoch": 1.4641152196779674, "grad_norm": 0.09221754968166351, "learning_rate": 2.032096321261643e-06, "loss": 0.0066, "step": 180950 }, { "epoch": 1.4641961323731694, "grad_norm": 0.4802131950855255, "learning_rate": 2.0315281024626494e-06, "loss": 0.019, "step": 180960 }, { "epoch": 1.4642770450683713, "grad_norm": 0.5419164896011353, "learning_rate": 2.0309599428635614e-06, "loss": 0.0204, "step": 180970 }, { "epoch": 1.464357957763573, "grad_norm": 0.28568515181541443, "learning_rate": 2.0303918424757093e-06, "loss": 0.0211, "step": 180980 }, { "epoch": 1.464438870458775, "grad_norm": 0.2993871867656708, "learning_rate": 2.029823801310421e-06, "loss": 0.0176, "step": 180990 }, { "epoch": 1.464519783153977, "grad_norm": 0.2177523970603943, "learning_rate": 2.02925581937903e-06, "loss": 0.0131, "step": 181000 }, { "epoch": 1.4646006958491786, "grad_norm": 0.3056284785270691, "learning_rate": 2.0286878966928567e-06, "loss": 0.0168, "step": 181010 }, { "epoch": 1.4646816085443806, "grad_norm": 0.1930013746023178, "learning_rate": 2.0281200332632327e-06, "loss": 0.0203, "step": 181020 }, { "epoch": 1.4647625212395825, "grad_norm": 0.42312952876091003, "learning_rate": 2.0275522291014827e-06, "loss": 0.025, "step": 181030 }, { "epoch": 1.4648434339347844, "grad_norm": 0.2454633116722107, "learning_rate": 2.0269844842189244e-06, "loss": 0.0197, "step": 181040 }, { "epoch": 1.4649243466299862, "grad_norm": 0.2871169149875641, "learning_rate": 2.026416798626886e-06, "loss": 0.0208, "step": 181050 }, { "epoch": 1.465005259325188, "grad_norm": 0.5535789728164673, "learning_rate": 2.025849172336689e-06, "loss": 0.0149, "step": 181060 }, { "epoch": 1.46508617202039, "grad_norm": 0.5387230515480042, "learning_rate": 2.0252816053596476e-06, "loss": 0.0134, "step": 181070 }, { "epoch": 1.465167084715592, "grad_norm": 0.21657533943653107, "learning_rate": 2.0247140977070867e-06, "loss": 0.0201, "step": 181080 }, { "epoch": 1.4652479974107937, "grad_norm": 0.2217683345079422, "learning_rate": 2.024146649390322e-06, "loss": 0.013, "step": 181090 }, { "epoch": 1.4653289101059956, "grad_norm": 0.46425163745880127, "learning_rate": 2.02357926042067e-06, "loss": 0.0164, "step": 181100 }, { "epoch": 1.4654098228011976, "grad_norm": 0.37352073192596436, "learning_rate": 2.0230119308094465e-06, "loss": 0.0159, "step": 181110 }, { "epoch": 1.4654907354963993, "grad_norm": 0.3254435360431671, "learning_rate": 2.022444660567965e-06, "loss": 0.019, "step": 181120 }, { "epoch": 1.4655716481916012, "grad_norm": 0.055705174803733826, "learning_rate": 2.021877449707539e-06, "loss": 0.0162, "step": 181130 }, { "epoch": 1.4656525608868032, "grad_norm": 0.4756613075733185, "learning_rate": 2.02131029823948e-06, "loss": 0.0207, "step": 181140 }, { "epoch": 1.465733473582005, "grad_norm": 0.4511968791484833, "learning_rate": 2.0207432061750993e-06, "loss": 0.0193, "step": 181150 }, { "epoch": 1.4658143862772068, "grad_norm": 0.2983172833919525, "learning_rate": 2.020176173525705e-06, "loss": 0.0183, "step": 181160 }, { "epoch": 1.4658952989724088, "grad_norm": 1.4418752193450928, "learning_rate": 2.0196092003026068e-06, "loss": 0.028, "step": 181170 }, { "epoch": 1.4659762116676107, "grad_norm": 0.43042895197868347, "learning_rate": 2.0190422865171093e-06, "loss": 0.0246, "step": 181180 }, { "epoch": 1.4660571243628127, "grad_norm": 0.13017652928829193, "learning_rate": 2.018475432180524e-06, "loss": 0.0119, "step": 181190 }, { "epoch": 1.4661380370580144, "grad_norm": 0.39464572072029114, "learning_rate": 2.01790863730415e-06, "loss": 0.0199, "step": 181200 }, { "epoch": 1.4662189497532163, "grad_norm": 0.26098188757896423, "learning_rate": 2.017341901899291e-06, "loss": 0.0202, "step": 181210 }, { "epoch": 1.4662998624484183, "grad_norm": 0.08357756584882736, "learning_rate": 2.016775225977255e-06, "loss": 0.0129, "step": 181220 }, { "epoch": 1.46638077514362, "grad_norm": 0.3605974316596985, "learning_rate": 2.0162086095493357e-06, "loss": 0.0211, "step": 181230 }, { "epoch": 1.466461687838822, "grad_norm": 0.39921948313713074, "learning_rate": 2.0156420526268377e-06, "loss": 0.0245, "step": 181240 }, { "epoch": 1.4665426005340239, "grad_norm": 0.36275601387023926, "learning_rate": 2.015075555221061e-06, "loss": 0.0182, "step": 181250 }, { "epoch": 1.4666235132292256, "grad_norm": 0.24557149410247803, "learning_rate": 2.0145091173432972e-06, "loss": 0.0149, "step": 181260 }, { "epoch": 1.4667044259244275, "grad_norm": 0.2741321325302124, "learning_rate": 2.0139427390048484e-06, "loss": 0.0287, "step": 181270 }, { "epoch": 1.4667853386196295, "grad_norm": 0.5214640498161316, "learning_rate": 2.013376420217009e-06, "loss": 0.016, "step": 181280 }, { "epoch": 1.4668662513148312, "grad_norm": 0.2698257863521576, "learning_rate": 2.012810160991069e-06, "loss": 0.0186, "step": 181290 }, { "epoch": 1.4669471640100331, "grad_norm": 0.3425169885158539, "learning_rate": 2.0122439613383254e-06, "loss": 0.0234, "step": 181300 }, { "epoch": 1.467028076705235, "grad_norm": 0.36246395111083984, "learning_rate": 2.011677821270069e-06, "loss": 0.0194, "step": 181310 }, { "epoch": 1.467108989400437, "grad_norm": 0.1767893135547638, "learning_rate": 2.0111117407975894e-06, "loss": 0.0218, "step": 181320 }, { "epoch": 1.467189902095639, "grad_norm": 0.46552225947380066, "learning_rate": 2.0105457199321767e-06, "loss": 0.012, "step": 181330 }, { "epoch": 1.4672708147908406, "grad_norm": 0.06371841579675674, "learning_rate": 2.0099797586851182e-06, "loss": 0.017, "step": 181340 }, { "epoch": 1.4673517274860426, "grad_norm": 0.09555216133594513, "learning_rate": 2.009413857067701e-06, "loss": 0.0191, "step": 181350 }, { "epoch": 1.4674326401812445, "grad_norm": 0.3870522081851959, "learning_rate": 2.0088480150912114e-06, "loss": 0.0115, "step": 181360 }, { "epoch": 1.4675135528764462, "grad_norm": 0.26300927996635437, "learning_rate": 2.0082822327669322e-06, "loss": 0.0238, "step": 181370 }, { "epoch": 1.4675944655716482, "grad_norm": 0.36718517541885376, "learning_rate": 2.007716510106151e-06, "loss": 0.0249, "step": 181380 }, { "epoch": 1.4676753782668501, "grad_norm": 0.36676472425460815, "learning_rate": 2.007150847120145e-06, "loss": 0.0166, "step": 181390 }, { "epoch": 1.4677562909620518, "grad_norm": 0.48222118616104126, "learning_rate": 2.006585243820196e-06, "loss": 0.0298, "step": 181400 }, { "epoch": 1.4678372036572538, "grad_norm": 0.5045180320739746, "learning_rate": 2.0060197002175886e-06, "loss": 0.0304, "step": 181410 }, { "epoch": 1.4679181163524557, "grad_norm": 0.43076014518737793, "learning_rate": 2.005454216323595e-06, "loss": 0.0389, "step": 181420 }, { "epoch": 1.4679990290476574, "grad_norm": 0.16446246206760406, "learning_rate": 2.0048887921494946e-06, "loss": 0.0119, "step": 181430 }, { "epoch": 1.4680799417428594, "grad_norm": 0.5326786637306213, "learning_rate": 2.004323427706567e-06, "loss": 0.0175, "step": 181440 }, { "epoch": 1.4681608544380613, "grad_norm": 0.558066189289093, "learning_rate": 2.0037581230060817e-06, "loss": 0.0166, "step": 181450 }, { "epoch": 1.4682417671332633, "grad_norm": 0.44140103459358215, "learning_rate": 2.0031928780593164e-06, "loss": 0.015, "step": 181460 }, { "epoch": 1.4683226798284652, "grad_norm": 0.31339508295059204, "learning_rate": 2.0026276928775445e-06, "loss": 0.0137, "step": 181470 }, { "epoch": 1.468403592523667, "grad_norm": 0.8652069568634033, "learning_rate": 2.0020625674720325e-06, "loss": 0.0151, "step": 181480 }, { "epoch": 1.4684845052188689, "grad_norm": 0.6222938895225525, "learning_rate": 2.001497501854055e-06, "loss": 0.0141, "step": 181490 }, { "epoch": 1.4685654179140708, "grad_norm": 0.37470462918281555, "learning_rate": 2.00093249603488e-06, "loss": 0.0199, "step": 181500 }, { "epoch": 1.4686463306092725, "grad_norm": 0.48102450370788574, "learning_rate": 2.0003675500257745e-06, "loss": 0.0113, "step": 181510 }, { "epoch": 1.4687272433044745, "grad_norm": 0.4204295575618744, "learning_rate": 1.9998026638380056e-06, "loss": 0.0269, "step": 181520 }, { "epoch": 1.4688081559996764, "grad_norm": 0.3985995650291443, "learning_rate": 1.99923783748284e-06, "loss": 0.0206, "step": 181530 }, { "epoch": 1.4688890686948781, "grad_norm": 0.3141591250896454, "learning_rate": 1.99867307097154e-06, "loss": 0.0263, "step": 181540 }, { "epoch": 1.46896998139008, "grad_norm": 0.19355352222919464, "learning_rate": 1.9981083643153697e-06, "loss": 0.019, "step": 181550 }, { "epoch": 1.469050894085282, "grad_norm": 0.37829285860061646, "learning_rate": 1.997543717525589e-06, "loss": 0.0237, "step": 181560 }, { "epoch": 1.469131806780484, "grad_norm": 0.25458940863609314, "learning_rate": 1.996979130613464e-06, "loss": 0.0246, "step": 181570 }, { "epoch": 1.4692127194756857, "grad_norm": 0.1862761229276657, "learning_rate": 1.9964146035902488e-06, "loss": 0.0155, "step": 181580 }, { "epoch": 1.4692936321708876, "grad_norm": 0.4381074011325836, "learning_rate": 1.995850136467201e-06, "loss": 0.01, "step": 181590 }, { "epoch": 1.4693745448660895, "grad_norm": 0.3477187156677246, "learning_rate": 1.995285729255585e-06, "loss": 0.0241, "step": 181600 }, { "epoch": 1.4694554575612915, "grad_norm": 0.362093061208725, "learning_rate": 1.9947213819666493e-06, "loss": 0.0119, "step": 181610 }, { "epoch": 1.4695363702564932, "grad_norm": 0.6689829230308533, "learning_rate": 1.9941570946116496e-06, "loss": 0.0262, "step": 181620 }, { "epoch": 1.4696172829516951, "grad_norm": 0.3669290542602539, "learning_rate": 1.9935928672018445e-06, "loss": 0.028, "step": 181630 }, { "epoch": 1.469698195646897, "grad_norm": 0.22980058193206787, "learning_rate": 1.9930286997484792e-06, "loss": 0.0198, "step": 181640 }, { "epoch": 1.4697791083420988, "grad_norm": 0.6935886740684509, "learning_rate": 1.9924645922628105e-06, "loss": 0.0411, "step": 181650 }, { "epoch": 1.4698600210373007, "grad_norm": 0.20289358496665955, "learning_rate": 1.9919005447560885e-06, "loss": 0.0083, "step": 181660 }, { "epoch": 1.4699409337325027, "grad_norm": 0.23768974840641022, "learning_rate": 1.9913365572395556e-06, "loss": 0.0206, "step": 181670 }, { "epoch": 1.4700218464277044, "grad_norm": 0.6415589451789856, "learning_rate": 1.9907726297244658e-06, "loss": 0.0272, "step": 181680 }, { "epoch": 1.4701027591229063, "grad_norm": 0.6194608211517334, "learning_rate": 1.9902087622220643e-06, "loss": 0.0225, "step": 181690 }, { "epoch": 1.4701836718181083, "grad_norm": 0.5142683386802673, "learning_rate": 1.989644954743592e-06, "loss": 0.023, "step": 181700 }, { "epoch": 1.4702645845133102, "grad_norm": 0.366408109664917, "learning_rate": 1.9890812073002976e-06, "loss": 0.0158, "step": 181710 }, { "epoch": 1.4703454972085122, "grad_norm": 0.4354023039340973, "learning_rate": 1.988517519903422e-06, "loss": 0.0313, "step": 181720 }, { "epoch": 1.4704264099037139, "grad_norm": 0.18634001910686493, "learning_rate": 1.987953892564207e-06, "loss": 0.0127, "step": 181730 }, { "epoch": 1.4705073225989158, "grad_norm": 0.2189415693283081, "learning_rate": 1.9873903252938927e-06, "loss": 0.0167, "step": 181740 }, { "epoch": 1.4705882352941178, "grad_norm": 0.7307165861129761, "learning_rate": 1.9868268181037186e-06, "loss": 0.0273, "step": 181750 }, { "epoch": 1.4706691479893195, "grad_norm": 0.2392573207616806, "learning_rate": 1.9862633710049227e-06, "loss": 0.015, "step": 181760 }, { "epoch": 1.4707500606845214, "grad_norm": 0.22006995975971222, "learning_rate": 1.9856999840087416e-06, "loss": 0.0238, "step": 181770 }, { "epoch": 1.4708309733797233, "grad_norm": 0.3694322109222412, "learning_rate": 1.9851366571264092e-06, "loss": 0.0166, "step": 181780 }, { "epoch": 1.470911886074925, "grad_norm": 0.3503381311893463, "learning_rate": 1.9845733903691643e-06, "loss": 0.0283, "step": 181790 }, { "epoch": 1.470992798770127, "grad_norm": 0.8102941513061523, "learning_rate": 1.984010183748236e-06, "loss": 0.0154, "step": 181800 }, { "epoch": 1.471073711465329, "grad_norm": 0.31367355585098267, "learning_rate": 1.9834470372748565e-06, "loss": 0.0196, "step": 181810 }, { "epoch": 1.4711546241605307, "grad_norm": 0.2961743474006653, "learning_rate": 1.9828839509602606e-06, "loss": 0.0182, "step": 181820 }, { "epoch": 1.4712355368557326, "grad_norm": 0.5468209385871887, "learning_rate": 1.9823209248156726e-06, "loss": 0.0222, "step": 181830 }, { "epoch": 1.4713164495509345, "grad_norm": 0.3497891128063202, "learning_rate": 1.981757958852322e-06, "loss": 0.0134, "step": 181840 }, { "epoch": 1.4713973622461365, "grad_norm": 0.34489157795906067, "learning_rate": 1.9811950530814405e-06, "loss": 0.0241, "step": 181850 }, { "epoch": 1.4714782749413384, "grad_norm": 0.004885021131485701, "learning_rate": 1.980632207514247e-06, "loss": 0.0153, "step": 181860 }, { "epoch": 1.4715591876365401, "grad_norm": 0.2907089293003082, "learning_rate": 1.9800694221619715e-06, "loss": 0.0244, "step": 181870 }, { "epoch": 1.471640100331742, "grad_norm": 0.16683681309223175, "learning_rate": 1.9795066970358383e-06, "loss": 0.0176, "step": 181880 }, { "epoch": 1.471721013026944, "grad_norm": 0.2034759223461151, "learning_rate": 1.978944032147064e-06, "loss": 0.0136, "step": 181890 }, { "epoch": 1.4718019257221457, "grad_norm": 0.24992340803146362, "learning_rate": 1.9783814275068747e-06, "loss": 0.0241, "step": 181900 }, { "epoch": 1.4718828384173477, "grad_norm": 0.2687118947505951, "learning_rate": 1.977818883126489e-06, "loss": 0.0182, "step": 181910 }, { "epoch": 1.4719637511125496, "grad_norm": 0.5617858171463013, "learning_rate": 1.9772563990171258e-06, "loss": 0.0271, "step": 181920 }, { "epoch": 1.4720446638077513, "grad_norm": 0.2936176359653473, "learning_rate": 1.9766939751900017e-06, "loss": 0.0195, "step": 181930 }, { "epoch": 1.4721255765029533, "grad_norm": 0.39510801434516907, "learning_rate": 1.9761316116563345e-06, "loss": 0.0165, "step": 181940 }, { "epoch": 1.4722064891981552, "grad_norm": 0.4945048689842224, "learning_rate": 1.975569308427338e-06, "loss": 0.013, "step": 181950 }, { "epoch": 1.472287401893357, "grad_norm": 0.30535265803337097, "learning_rate": 1.975007065514227e-06, "loss": 0.0212, "step": 181960 }, { "epoch": 1.4723683145885589, "grad_norm": 0.20853176712989807, "learning_rate": 1.9744448829282148e-06, "loss": 0.0114, "step": 181970 }, { "epoch": 1.4724492272837608, "grad_norm": 0.43926191329956055, "learning_rate": 1.9738827606805118e-06, "loss": 0.0286, "step": 181980 }, { "epoch": 1.4725301399789628, "grad_norm": 0.5551876425743103, "learning_rate": 1.9733206987823282e-06, "loss": 0.0257, "step": 181990 }, { "epoch": 1.4726110526741647, "grad_norm": 0.30841511487960815, "learning_rate": 1.972758697244872e-06, "loss": 0.0255, "step": 182000 }, { "epoch": 1.4726919653693664, "grad_norm": 0.5924780964851379, "learning_rate": 1.9721967560793564e-06, "loss": 0.0226, "step": 182010 }, { "epoch": 1.4727728780645684, "grad_norm": 0.4360445737838745, "learning_rate": 1.971634875296983e-06, "loss": 0.0387, "step": 182020 }, { "epoch": 1.4728537907597703, "grad_norm": 0.30598604679107666, "learning_rate": 1.9710730549089574e-06, "loss": 0.0182, "step": 182030 }, { "epoch": 1.472934703454972, "grad_norm": 0.25573498010635376, "learning_rate": 1.970511294926488e-06, "loss": 0.022, "step": 182040 }, { "epoch": 1.473015616150174, "grad_norm": 0.9458123445510864, "learning_rate": 1.969949595360772e-06, "loss": 0.0183, "step": 182050 }, { "epoch": 1.473096528845376, "grad_norm": 0.27503618597984314, "learning_rate": 1.969387956223017e-06, "loss": 0.0137, "step": 182060 }, { "epoch": 1.4731774415405776, "grad_norm": 0.2988298535346985, "learning_rate": 1.968826377524421e-06, "loss": 0.0207, "step": 182070 }, { "epoch": 1.4732583542357796, "grad_norm": 0.16850018501281738, "learning_rate": 1.968264859276184e-06, "loss": 0.0157, "step": 182080 }, { "epoch": 1.4733392669309815, "grad_norm": 0.28021010756492615, "learning_rate": 1.9677034014895035e-06, "loss": 0.0214, "step": 182090 }, { "epoch": 1.4734201796261832, "grad_norm": 0.4877571761608124, "learning_rate": 1.9671420041755773e-06, "loss": 0.0207, "step": 182100 }, { "epoch": 1.4735010923213852, "grad_norm": 0.37900564074516296, "learning_rate": 1.9665806673456015e-06, "loss": 0.016, "step": 182110 }, { "epoch": 1.473582005016587, "grad_norm": 0.43511101603507996, "learning_rate": 1.9660193910107705e-06, "loss": 0.0146, "step": 182120 }, { "epoch": 1.473662917711789, "grad_norm": 0.5058386325836182, "learning_rate": 1.9654581751822778e-06, "loss": 0.0202, "step": 182130 }, { "epoch": 1.473743830406991, "grad_norm": 0.4321618676185608, "learning_rate": 1.964897019871315e-06, "loss": 0.0213, "step": 182140 }, { "epoch": 1.4738247431021927, "grad_norm": 0.23143735527992249, "learning_rate": 1.964335925089074e-06, "loss": 0.0228, "step": 182150 }, { "epoch": 1.4739056557973946, "grad_norm": 0.20268969237804413, "learning_rate": 1.963774890846745e-06, "loss": 0.0204, "step": 182160 }, { "epoch": 1.4739865684925966, "grad_norm": 0.3720424473285675, "learning_rate": 1.9632139171555153e-06, "loss": 0.032, "step": 182170 }, { "epoch": 1.4740674811877983, "grad_norm": 0.4771963357925415, "learning_rate": 1.9626530040265736e-06, "loss": 0.0154, "step": 182180 }, { "epoch": 1.4741483938830002, "grad_norm": 0.5247911214828491, "learning_rate": 1.9620921514711035e-06, "loss": 0.0159, "step": 182190 }, { "epoch": 1.4742293065782022, "grad_norm": 0.23354540765285492, "learning_rate": 1.961531359500296e-06, "loss": 0.0125, "step": 182200 }, { "epoch": 1.4743102192734039, "grad_norm": 0.19357523322105408, "learning_rate": 1.960970628125329e-06, "loss": 0.0233, "step": 182210 }, { "epoch": 1.4743911319686058, "grad_norm": 0.082475446164608, "learning_rate": 1.9604099573573856e-06, "loss": 0.0154, "step": 182220 }, { "epoch": 1.4744720446638078, "grad_norm": 0.2612078785896301, "learning_rate": 1.959849347207652e-06, "loss": 0.0174, "step": 182230 }, { "epoch": 1.4745529573590097, "grad_norm": 0.32731983065605164, "learning_rate": 1.9592887976873032e-06, "loss": 0.0274, "step": 182240 }, { "epoch": 1.4746338700542114, "grad_norm": 0.28675001859664917, "learning_rate": 1.9587283088075186e-06, "loss": 0.0151, "step": 182250 }, { "epoch": 1.4747147827494134, "grad_norm": 0.3174495995044708, "learning_rate": 1.9581678805794795e-06, "loss": 0.0379, "step": 182260 }, { "epoch": 1.4747956954446153, "grad_norm": 0.47400224208831787, "learning_rate": 1.9576075130143603e-06, "loss": 0.023, "step": 182270 }, { "epoch": 1.4748766081398172, "grad_norm": 0.24164296686649323, "learning_rate": 1.957047206123336e-06, "loss": 0.0307, "step": 182280 }, { "epoch": 1.474957520835019, "grad_norm": 0.28097134828567505, "learning_rate": 1.956486959917582e-06, "loss": 0.0209, "step": 182290 }, { "epoch": 1.475038433530221, "grad_norm": 0.2162824273109436, "learning_rate": 1.9559267744082696e-06, "loss": 0.0159, "step": 182300 }, { "epoch": 1.4751193462254228, "grad_norm": 0.286153644323349, "learning_rate": 1.9553666496065717e-06, "loss": 0.0122, "step": 182310 }, { "epoch": 1.4752002589206246, "grad_norm": 0.8189910650253296, "learning_rate": 1.9548065855236586e-06, "loss": 0.023, "step": 182320 }, { "epoch": 1.4752811716158265, "grad_norm": 0.18113648891448975, "learning_rate": 1.9542465821706996e-06, "loss": 0.0155, "step": 182330 }, { "epoch": 1.4753620843110284, "grad_norm": 0.45327234268188477, "learning_rate": 1.953686639558862e-06, "loss": 0.0233, "step": 182340 }, { "epoch": 1.4754429970062302, "grad_norm": 0.3212288022041321, "learning_rate": 1.953126757699314e-06, "loss": 0.0257, "step": 182350 }, { "epoch": 1.475523909701432, "grad_norm": 0.3627417981624603, "learning_rate": 1.95256693660322e-06, "loss": 0.0199, "step": 182360 }, { "epoch": 1.475604822396634, "grad_norm": 0.5432976484298706, "learning_rate": 1.952007176281746e-06, "loss": 0.0201, "step": 182370 }, { "epoch": 1.475685735091836, "grad_norm": 0.1382424384355545, "learning_rate": 1.9514474767460534e-06, "loss": 0.0165, "step": 182380 }, { "epoch": 1.475766647787038, "grad_norm": 0.3336755633354187, "learning_rate": 1.9508878380073056e-06, "loss": 0.0225, "step": 182390 }, { "epoch": 1.4758475604822396, "grad_norm": 0.5167114734649658, "learning_rate": 1.9503282600766626e-06, "loss": 0.008, "step": 182400 }, { "epoch": 1.4759284731774416, "grad_norm": 0.6379678845405579, "learning_rate": 1.9497687429652824e-06, "loss": 0.0266, "step": 182410 }, { "epoch": 1.4760093858726435, "grad_norm": 0.4180164933204651, "learning_rate": 1.9492092866843293e-06, "loss": 0.0203, "step": 182420 }, { "epoch": 1.4760902985678452, "grad_norm": 0.34526005387306213, "learning_rate": 1.948649891244954e-06, "loss": 0.0146, "step": 182430 }, { "epoch": 1.4761712112630472, "grad_norm": 0.3280890882015228, "learning_rate": 1.948090556658314e-06, "loss": 0.0194, "step": 182440 }, { "epoch": 1.4762521239582491, "grad_norm": 0.7784926891326904, "learning_rate": 1.9475312829355663e-06, "loss": 0.0387, "step": 182450 }, { "epoch": 1.4763330366534508, "grad_norm": 0.34122535586357117, "learning_rate": 1.9469720700878647e-06, "loss": 0.0298, "step": 182460 }, { "epoch": 1.4764139493486528, "grad_norm": 0.17715276777744293, "learning_rate": 1.9464129181263567e-06, "loss": 0.0116, "step": 182470 }, { "epoch": 1.4764948620438547, "grad_norm": 0.11739442497491837, "learning_rate": 1.9458538270621987e-06, "loss": 0.031, "step": 182480 }, { "epoch": 1.4765757747390564, "grad_norm": 0.5625163912773132, "learning_rate": 1.945294796906538e-06, "loss": 0.0187, "step": 182490 }, { "epoch": 1.4766566874342584, "grad_norm": 0.5340216159820557, "learning_rate": 1.9447358276705242e-06, "loss": 0.0246, "step": 182500 }, { "epoch": 1.4767376001294603, "grad_norm": 0.2868167757987976, "learning_rate": 1.944176919365304e-06, "loss": 0.0273, "step": 182510 }, { "epoch": 1.4768185128246623, "grad_norm": 0.3914072513580322, "learning_rate": 1.9436180720020242e-06, "loss": 0.023, "step": 182520 }, { "epoch": 1.4768994255198642, "grad_norm": 0.28285592794418335, "learning_rate": 1.9430592855918296e-06, "loss": 0.0206, "step": 182530 }, { "epoch": 1.476980338215066, "grad_norm": 0.17880377173423767, "learning_rate": 1.942500560145864e-06, "loss": 0.0183, "step": 182540 }, { "epoch": 1.4770612509102679, "grad_norm": 0.4767206609249115, "learning_rate": 1.94194189567527e-06, "loss": 0.0206, "step": 182550 }, { "epoch": 1.4771421636054698, "grad_norm": 0.6483039855957031, "learning_rate": 1.941383292191189e-06, "loss": 0.0236, "step": 182560 }, { "epoch": 1.4772230763006715, "grad_norm": 0.46531280875205994, "learning_rate": 1.9408247497047615e-06, "loss": 0.0241, "step": 182570 }, { "epoch": 1.4773039889958735, "grad_norm": 0.017444901168346405, "learning_rate": 1.9402662682271263e-06, "loss": 0.0147, "step": 182580 }, { "epoch": 1.4773849016910754, "grad_norm": 0.3252454996109009, "learning_rate": 1.939707847769421e-06, "loss": 0.0227, "step": 182590 }, { "epoch": 1.477465814386277, "grad_norm": 0.13984981179237366, "learning_rate": 1.9391494883427796e-06, "loss": 0.0162, "step": 182600 }, { "epoch": 1.477546727081479, "grad_norm": 0.19324976205825806, "learning_rate": 1.9385911899583425e-06, "loss": 0.0105, "step": 182610 }, { "epoch": 1.477627639776681, "grad_norm": 0.27541497349739075, "learning_rate": 1.9380329526272423e-06, "loss": 0.0168, "step": 182620 }, { "epoch": 1.4777085524718827, "grad_norm": 0.3246462643146515, "learning_rate": 1.937474776360608e-06, "loss": 0.0106, "step": 182630 }, { "epoch": 1.4777894651670846, "grad_norm": 0.3595258295536041, "learning_rate": 1.936916661169575e-06, "loss": 0.0118, "step": 182640 }, { "epoch": 1.4778703778622866, "grad_norm": 0.2133726328611374, "learning_rate": 1.936358607065275e-06, "loss": 0.02, "step": 182650 }, { "epoch": 1.4779512905574885, "grad_norm": 0.47699227929115295, "learning_rate": 1.9358006140588316e-06, "loss": 0.0193, "step": 182660 }, { "epoch": 1.4780322032526905, "grad_norm": 0.3482154905796051, "learning_rate": 1.9352426821613773e-06, "loss": 0.0319, "step": 182670 }, { "epoch": 1.4781131159478922, "grad_norm": 0.29000234603881836, "learning_rate": 1.934684811384038e-06, "loss": 0.0241, "step": 182680 }, { "epoch": 1.4781940286430941, "grad_norm": 0.39351993799209595, "learning_rate": 1.9341270017379388e-06, "loss": 0.0151, "step": 182690 }, { "epoch": 1.478274941338296, "grad_norm": 0.4920042157173157, "learning_rate": 1.9335692532342044e-06, "loss": 0.0133, "step": 182700 }, { "epoch": 1.4783558540334978, "grad_norm": 0.23812268674373627, "learning_rate": 1.9330115658839575e-06, "loss": 0.0202, "step": 182710 }, { "epoch": 1.4784367667286997, "grad_norm": 0.39109402894973755, "learning_rate": 1.9324539396983207e-06, "loss": 0.0205, "step": 182720 }, { "epoch": 1.4785176794239017, "grad_norm": 0.44314974546432495, "learning_rate": 1.931896374688414e-06, "loss": 0.0159, "step": 182730 }, { "epoch": 1.4785985921191034, "grad_norm": 0.12087614089250565, "learning_rate": 1.931338870865357e-06, "loss": 0.0134, "step": 182740 }, { "epoch": 1.4786795048143053, "grad_norm": 0.3565766215324402, "learning_rate": 1.9307814282402674e-06, "loss": 0.0205, "step": 182750 }, { "epoch": 1.4787604175095073, "grad_norm": 0.6178938746452332, "learning_rate": 1.9302240468242634e-06, "loss": 0.0142, "step": 182760 }, { "epoch": 1.478841330204709, "grad_norm": 0.406134694814682, "learning_rate": 1.92966672662846e-06, "loss": 0.0241, "step": 182770 }, { "epoch": 1.478922242899911, "grad_norm": 0.6525704860687256, "learning_rate": 1.9291094676639726e-06, "loss": 0.0274, "step": 182780 }, { "epoch": 1.4790031555951129, "grad_norm": 0.39538952708244324, "learning_rate": 1.9285522699419134e-06, "loss": 0.0134, "step": 182790 }, { "epoch": 1.4790840682903148, "grad_norm": 0.40831562876701355, "learning_rate": 1.927995133473393e-06, "loss": 0.0221, "step": 182800 }, { "epoch": 1.4791649809855167, "grad_norm": 0.25807198882102966, "learning_rate": 1.9274380582695286e-06, "loss": 0.0151, "step": 182810 }, { "epoch": 1.4792458936807185, "grad_norm": 0.37292417883872986, "learning_rate": 1.9268810443414216e-06, "loss": 0.016, "step": 182820 }, { "epoch": 1.4793268063759204, "grad_norm": 0.20909689366817474, "learning_rate": 1.926324091700186e-06, "loss": 0.0221, "step": 182830 }, { "epoch": 1.4794077190711223, "grad_norm": 0.3162575364112854, "learning_rate": 1.9257672003569307e-06, "loss": 0.0276, "step": 182840 }, { "epoch": 1.479488631766324, "grad_norm": 0.4721400737762451, "learning_rate": 1.925210370322754e-06, "loss": 0.0126, "step": 182850 }, { "epoch": 1.479569544461526, "grad_norm": 0.5593545436859131, "learning_rate": 1.924653601608767e-06, "loss": 0.0216, "step": 182860 }, { "epoch": 1.479650457156728, "grad_norm": 0.45002469420433044, "learning_rate": 1.9240968942260734e-06, "loss": 0.0301, "step": 182870 }, { "epoch": 1.4797313698519297, "grad_norm": 0.2999580502510071, "learning_rate": 1.9235402481857706e-06, "loss": 0.0119, "step": 182880 }, { "epoch": 1.4798122825471316, "grad_norm": 0.14277245104312897, "learning_rate": 1.9229836634989644e-06, "loss": 0.0181, "step": 182890 }, { "epoch": 1.4798931952423335, "grad_norm": 0.6963613033294678, "learning_rate": 1.922427140176753e-06, "loss": 0.0258, "step": 182900 }, { "epoch": 1.4799741079375355, "grad_norm": 0.020518846809864044, "learning_rate": 1.9218706782302353e-06, "loss": 0.0139, "step": 182910 }, { "epoch": 1.4800550206327372, "grad_norm": 0.3309573233127594, "learning_rate": 1.921314277670509e-06, "loss": 0.0305, "step": 182920 }, { "epoch": 1.4801359333279391, "grad_norm": 0.4334609806537628, "learning_rate": 1.92075793850867e-06, "loss": 0.0222, "step": 182930 }, { "epoch": 1.480216846023141, "grad_norm": 0.38032782077789307, "learning_rate": 1.9202016607558134e-06, "loss": 0.0209, "step": 182940 }, { "epoch": 1.480297758718343, "grad_norm": 0.5120156407356262, "learning_rate": 1.9196454444230323e-06, "loss": 0.0443, "step": 182950 }, { "epoch": 1.4803786714135447, "grad_norm": 0.8957671523094177, "learning_rate": 1.9190892895214203e-06, "loss": 0.0295, "step": 182960 }, { "epoch": 1.4804595841087467, "grad_norm": 0.3023029565811157, "learning_rate": 1.9185331960620685e-06, "loss": 0.0149, "step": 182970 }, { "epoch": 1.4805404968039486, "grad_norm": 0.25356829166412354, "learning_rate": 1.9179771640560667e-06, "loss": 0.0171, "step": 182980 }, { "epoch": 1.4806214094991503, "grad_norm": 0.5089425444602966, "learning_rate": 1.917421193514502e-06, "loss": 0.0168, "step": 182990 }, { "epoch": 1.4807023221943523, "grad_norm": 0.6061645150184631, "learning_rate": 1.9168652844484676e-06, "loss": 0.0128, "step": 183000 }, { "epoch": 1.4807023221943523, "eval_loss": 0.01994890719652176, "eval_runtime": 3.8263, "eval_samples_per_second": 52.27, "eval_steps_per_second": 26.135, "step": 183000 }, { "epoch": 1.4807832348895542, "grad_norm": 0.341314435005188, "learning_rate": 1.916309436869045e-06, "loss": 0.018, "step": 183010 }, { "epoch": 1.480864147584756, "grad_norm": 0.1353106051683426, "learning_rate": 1.915753650787318e-06, "loss": 0.0148, "step": 183020 }, { "epoch": 1.4809450602799579, "grad_norm": 0.2089606374502182, "learning_rate": 1.915197926214378e-06, "loss": 0.0174, "step": 183030 }, { "epoch": 1.4810259729751598, "grad_norm": 0.3959057033061981, "learning_rate": 1.9146422631612987e-06, "loss": 0.0163, "step": 183040 }, { "epoch": 1.4811068856703618, "grad_norm": 0.4268173277378082, "learning_rate": 1.914086661639168e-06, "loss": 0.0191, "step": 183050 }, { "epoch": 1.4811877983655637, "grad_norm": 0.12095839530229568, "learning_rate": 1.913531121659066e-06, "loss": 0.0124, "step": 183060 }, { "epoch": 1.4812687110607654, "grad_norm": 0.3858180642127991, "learning_rate": 1.9129756432320663e-06, "loss": 0.0185, "step": 183070 }, { "epoch": 1.4813496237559673, "grad_norm": 0.3094237446784973, "learning_rate": 1.912420226369252e-06, "loss": 0.02, "step": 183080 }, { "epoch": 1.4814305364511693, "grad_norm": 0.9818902611732483, "learning_rate": 1.9118648710816985e-06, "loss": 0.0214, "step": 183090 }, { "epoch": 1.481511449146371, "grad_norm": 0.6030592918395996, "learning_rate": 1.9113095773804806e-06, "loss": 0.0224, "step": 183100 }, { "epoch": 1.481592361841573, "grad_norm": 0.33237341046333313, "learning_rate": 1.9107543452766726e-06, "loss": 0.0115, "step": 183110 }, { "epoch": 1.4816732745367749, "grad_norm": 0.5886693596839905, "learning_rate": 1.9101991747813474e-06, "loss": 0.0228, "step": 183120 }, { "epoch": 1.4817541872319766, "grad_norm": 0.24552057683467865, "learning_rate": 1.909644065905577e-06, "loss": 0.0155, "step": 183130 }, { "epoch": 1.4818350999271785, "grad_norm": 0.4993553161621094, "learning_rate": 1.9090890186604323e-06, "loss": 0.021, "step": 183140 }, { "epoch": 1.4819160126223805, "grad_norm": 0.3073701560497284, "learning_rate": 1.908534033056981e-06, "loss": 0.0197, "step": 183150 }, { "epoch": 1.4819969253175822, "grad_norm": 0.28096991777420044, "learning_rate": 1.907979109106292e-06, "loss": 0.0277, "step": 183160 }, { "epoch": 1.4820778380127841, "grad_norm": 0.023067185655236244, "learning_rate": 1.9074242468194326e-06, "loss": 0.0149, "step": 183170 }, { "epoch": 1.482158750707986, "grad_norm": 0.214035302400589, "learning_rate": 1.9068694462074661e-06, "loss": 0.0119, "step": 183180 }, { "epoch": 1.482239663403188, "grad_norm": 0.15178155899047852, "learning_rate": 1.906314707281462e-06, "loss": 0.0108, "step": 183190 }, { "epoch": 1.48232057609839, "grad_norm": 0.4160704016685486, "learning_rate": 1.905760030052478e-06, "loss": 0.02, "step": 183200 }, { "epoch": 1.4824014887935917, "grad_norm": 0.3264274001121521, "learning_rate": 1.9052054145315768e-06, "loss": 0.0138, "step": 183210 }, { "epoch": 1.4824824014887936, "grad_norm": 0.3541291654109955, "learning_rate": 1.904650860729823e-06, "loss": 0.026, "step": 183220 }, { "epoch": 1.4825633141839956, "grad_norm": 0.30613645911216736, "learning_rate": 1.9040963686582702e-06, "loss": 0.029, "step": 183230 }, { "epoch": 1.4826442268791973, "grad_norm": 0.4503248333930969, "learning_rate": 1.9035419383279813e-06, "loss": 0.0161, "step": 183240 }, { "epoch": 1.4827251395743992, "grad_norm": 0.2861877381801605, "learning_rate": 1.9029875697500133e-06, "loss": 0.0153, "step": 183250 }, { "epoch": 1.4828060522696012, "grad_norm": 0.5627970695495605, "learning_rate": 1.9024332629354163e-06, "loss": 0.0191, "step": 183260 }, { "epoch": 1.4828869649648029, "grad_norm": 0.5134527683258057, "learning_rate": 1.9018790178952507e-06, "loss": 0.0257, "step": 183270 }, { "epoch": 1.4829678776600048, "grad_norm": 0.32085052132606506, "learning_rate": 1.9013248346405693e-06, "loss": 0.0197, "step": 183280 }, { "epoch": 1.4830487903552068, "grad_norm": 0.3645528554916382, "learning_rate": 1.9007707131824187e-06, "loss": 0.0179, "step": 183290 }, { "epoch": 1.4831297030504085, "grad_norm": 0.3240070044994354, "learning_rate": 1.9002166535318556e-06, "loss": 0.0235, "step": 183300 }, { "epoch": 1.4832106157456104, "grad_norm": 0.4859461188316345, "learning_rate": 1.8996626556999276e-06, "loss": 0.0129, "step": 183310 }, { "epoch": 1.4832915284408124, "grad_norm": 0.6098852753639221, "learning_rate": 1.8991087196976821e-06, "loss": 0.0176, "step": 183320 }, { "epoch": 1.4833724411360143, "grad_norm": 0.5228163599967957, "learning_rate": 1.8985548455361673e-06, "loss": 0.0223, "step": 183330 }, { "epoch": 1.4834533538312162, "grad_norm": 0.6280640959739685, "learning_rate": 1.8980010332264292e-06, "loss": 0.0185, "step": 183340 }, { "epoch": 1.483534266526418, "grad_norm": 0.3345191776752472, "learning_rate": 1.897447282779511e-06, "loss": 0.0153, "step": 183350 }, { "epoch": 1.48361517922162, "grad_norm": 0.5791812539100647, "learning_rate": 1.8968935942064576e-06, "loss": 0.0253, "step": 183360 }, { "epoch": 1.4836960919168218, "grad_norm": 0.6221809983253479, "learning_rate": 1.8963399675183086e-06, "loss": 0.0247, "step": 183370 }, { "epoch": 1.4837770046120236, "grad_norm": 0.5952371954917908, "learning_rate": 1.8957864027261102e-06, "loss": 0.0277, "step": 183380 }, { "epoch": 1.4838579173072255, "grad_norm": 0.43580177426338196, "learning_rate": 1.895232899840897e-06, "loss": 0.0129, "step": 183390 }, { "epoch": 1.4839388300024274, "grad_norm": 0.27838024497032166, "learning_rate": 1.8946794588737071e-06, "loss": 0.0223, "step": 183400 }, { "epoch": 1.4840197426976292, "grad_norm": 0.22976814210414886, "learning_rate": 1.8941260798355837e-06, "loss": 0.0156, "step": 183410 }, { "epoch": 1.484100655392831, "grad_norm": 0.5906572937965393, "learning_rate": 1.8935727627375572e-06, "loss": 0.0145, "step": 183420 }, { "epoch": 1.484181568088033, "grad_norm": 0.43566328287124634, "learning_rate": 1.8930195075906622e-06, "loss": 0.0276, "step": 183430 }, { "epoch": 1.484262480783235, "grad_norm": 0.17640703916549683, "learning_rate": 1.8924663144059374e-06, "loss": 0.0154, "step": 183440 }, { "epoch": 1.4843433934784367, "grad_norm": 0.32605934143066406, "learning_rate": 1.8919131831944077e-06, "loss": 0.0162, "step": 183450 }, { "epoch": 1.4844243061736386, "grad_norm": 0.19259434938430786, "learning_rate": 1.8913601139671105e-06, "loss": 0.0163, "step": 183460 }, { "epoch": 1.4845052188688406, "grad_norm": 0.43844854831695557, "learning_rate": 1.8908071067350748e-06, "loss": 0.016, "step": 183470 }, { "epoch": 1.4845861315640425, "grad_norm": 0.48184236884117126, "learning_rate": 1.8902541615093239e-06, "loss": 0.0198, "step": 183480 }, { "epoch": 1.4846670442592442, "grad_norm": 0.2112482488155365, "learning_rate": 1.8897012783008905e-06, "loss": 0.0191, "step": 183490 }, { "epoch": 1.4847479569544462, "grad_norm": 0.41056379675865173, "learning_rate": 1.8891484571208008e-06, "loss": 0.0258, "step": 183500 }, { "epoch": 1.484828869649648, "grad_norm": 0.5338572859764099, "learning_rate": 1.8885956979800747e-06, "loss": 0.0191, "step": 183510 }, { "epoch": 1.4849097823448498, "grad_norm": 0.3266383707523346, "learning_rate": 1.8880430008897398e-06, "loss": 0.0268, "step": 183520 }, { "epoch": 1.4849906950400518, "grad_norm": 0.27684399485588074, "learning_rate": 1.8874903658608184e-06, "loss": 0.008, "step": 183530 }, { "epoch": 1.4850716077352537, "grad_norm": 0.13569042086601257, "learning_rate": 1.8869377929043302e-06, "loss": 0.0256, "step": 183540 }, { "epoch": 1.4851525204304554, "grad_norm": 0.517365574836731, "learning_rate": 1.8863852820312966e-06, "loss": 0.0222, "step": 183550 }, { "epoch": 1.4852334331256574, "grad_norm": 0.47497865557670593, "learning_rate": 1.8858328332527347e-06, "loss": 0.0231, "step": 183560 }, { "epoch": 1.4853143458208593, "grad_norm": 0.1941782534122467, "learning_rate": 1.8852804465796632e-06, "loss": 0.0243, "step": 183570 }, { "epoch": 1.4853952585160612, "grad_norm": 0.3855714201927185, "learning_rate": 1.8847281220230972e-06, "loss": 0.0225, "step": 183580 }, { "epoch": 1.4854761712112632, "grad_norm": 0.33438947796821594, "learning_rate": 1.8841758595940508e-06, "loss": 0.0276, "step": 183590 }, { "epoch": 1.485557083906465, "grad_norm": 0.4971421957015991, "learning_rate": 1.8836236593035423e-06, "loss": 0.018, "step": 183600 }, { "epoch": 1.4856379966016668, "grad_norm": 0.3058438301086426, "learning_rate": 1.8830715211625794e-06, "loss": 0.0197, "step": 183610 }, { "epoch": 1.4857189092968688, "grad_norm": 0.5162565112113953, "learning_rate": 1.8825194451821732e-06, "loss": 0.0263, "step": 183620 }, { "epoch": 1.4857998219920705, "grad_norm": 0.23014183342456818, "learning_rate": 1.8819674313733387e-06, "loss": 0.0179, "step": 183630 }, { "epoch": 1.4858807346872724, "grad_norm": 0.3916078507900238, "learning_rate": 1.881415479747078e-06, "loss": 0.025, "step": 183640 }, { "epoch": 1.4859616473824744, "grad_norm": 0.31505143642425537, "learning_rate": 1.8808635903144035e-06, "loss": 0.0112, "step": 183650 }, { "epoch": 1.486042560077676, "grad_norm": 0.4615457057952881, "learning_rate": 1.8803117630863216e-06, "loss": 0.0214, "step": 183660 }, { "epoch": 1.486123472772878, "grad_norm": 0.3306245505809784, "learning_rate": 1.8797599980738324e-06, "loss": 0.0137, "step": 183670 }, { "epoch": 1.48620438546808, "grad_norm": 0.3251292109489441, "learning_rate": 1.8792082952879443e-06, "loss": 0.0215, "step": 183680 }, { "epoch": 1.4862852981632817, "grad_norm": 0.3688810467720032, "learning_rate": 1.8786566547396607e-06, "loss": 0.0208, "step": 183690 }, { "epoch": 1.4863662108584836, "grad_norm": 0.003128589130938053, "learning_rate": 1.8781050764399767e-06, "loss": 0.0223, "step": 183700 }, { "epoch": 1.4864471235536856, "grad_norm": 0.3233047127723694, "learning_rate": 1.8775535603998984e-06, "loss": 0.0156, "step": 183710 }, { "epoch": 1.4865280362488875, "grad_norm": 0.2566840648651123, "learning_rate": 1.8770021066304223e-06, "loss": 0.0196, "step": 183720 }, { "epoch": 1.4866089489440895, "grad_norm": 0.36393558979034424, "learning_rate": 1.8764507151425465e-06, "loss": 0.0201, "step": 183730 }, { "epoch": 1.4866898616392912, "grad_norm": 0.2610551714897156, "learning_rate": 1.875899385947267e-06, "loss": 0.0233, "step": 183740 }, { "epoch": 1.4867707743344931, "grad_norm": 0.3046847879886627, "learning_rate": 1.875348119055579e-06, "loss": 0.0137, "step": 183750 }, { "epoch": 1.486851687029695, "grad_norm": 0.2888919413089752, "learning_rate": 1.874796914478476e-06, "loss": 0.0161, "step": 183760 }, { "epoch": 1.4869325997248968, "grad_norm": 0.26893582940101624, "learning_rate": 1.8742457722269513e-06, "loss": 0.016, "step": 183770 }, { "epoch": 1.4870135124200987, "grad_norm": 0.1397436410188675, "learning_rate": 1.8736946923119942e-06, "loss": 0.0135, "step": 183780 }, { "epoch": 1.4870944251153007, "grad_norm": 0.3882257342338562, "learning_rate": 1.8731436747445997e-06, "loss": 0.0153, "step": 183790 }, { "epoch": 1.4871753378105024, "grad_norm": 0.09737586975097656, "learning_rate": 1.8725927195357518e-06, "loss": 0.0191, "step": 183800 }, { "epoch": 1.4872562505057043, "grad_norm": 0.5996116399765015, "learning_rate": 1.8720418266964385e-06, "loss": 0.0332, "step": 183810 }, { "epoch": 1.4873371632009063, "grad_norm": 0.4747655689716339, "learning_rate": 1.871490996237651e-06, "loss": 0.017, "step": 183820 }, { "epoch": 1.487418075896108, "grad_norm": 0.5792122483253479, "learning_rate": 1.8709402281703692e-06, "loss": 0.0159, "step": 183830 }, { "epoch": 1.48749898859131, "grad_norm": 0.14213736355304718, "learning_rate": 1.8703895225055768e-06, "loss": 0.0116, "step": 183840 }, { "epoch": 1.4875799012865119, "grad_norm": 0.6424980759620667, "learning_rate": 1.8698388792542621e-06, "loss": 0.0181, "step": 183850 }, { "epoch": 1.4876608139817138, "grad_norm": 0.5779731273651123, "learning_rate": 1.8692882984273996e-06, "loss": 0.0139, "step": 183860 }, { "epoch": 1.4877417266769157, "grad_norm": 0.3228272497653961, "learning_rate": 1.8687377800359746e-06, "loss": 0.0171, "step": 183870 }, { "epoch": 1.4878226393721175, "grad_norm": 0.6626508831977844, "learning_rate": 1.8681873240909636e-06, "loss": 0.0109, "step": 183880 }, { "epoch": 1.4879035520673194, "grad_norm": 0.21780173480510712, "learning_rate": 1.8676369306033454e-06, "loss": 0.0212, "step": 183890 }, { "epoch": 1.4879844647625213, "grad_norm": 0.7580437660217285, "learning_rate": 1.8670865995840958e-06, "loss": 0.0177, "step": 183900 }, { "epoch": 1.488065377457723, "grad_norm": 0.3459862172603607, "learning_rate": 1.8665363310441897e-06, "loss": 0.0118, "step": 183910 }, { "epoch": 1.488146290152925, "grad_norm": 0.2800224721431732, "learning_rate": 1.865986124994602e-06, "loss": 0.0224, "step": 183920 }, { "epoch": 1.488227202848127, "grad_norm": 0.1070839986205101, "learning_rate": 1.8654359814463048e-06, "loss": 0.0214, "step": 183930 }, { "epoch": 1.4883081155433286, "grad_norm": 0.32899099588394165, "learning_rate": 1.8648859004102693e-06, "loss": 0.0153, "step": 183940 }, { "epoch": 1.4883890282385306, "grad_norm": 0.39458775520324707, "learning_rate": 1.864335881897466e-06, "loss": 0.0264, "step": 183950 }, { "epoch": 1.4884699409337325, "grad_norm": 0.25768136978149414, "learning_rate": 1.863785925918864e-06, "loss": 0.0218, "step": 183960 }, { "epoch": 1.4885508536289342, "grad_norm": 0.9703513979911804, "learning_rate": 1.863236032485431e-06, "loss": 0.0256, "step": 183970 }, { "epoch": 1.4886317663241362, "grad_norm": 0.1420266181230545, "learning_rate": 1.8626862016081326e-06, "loss": 0.0174, "step": 183980 }, { "epoch": 1.4887126790193381, "grad_norm": 0.4190252721309662, "learning_rate": 1.8621364332979358e-06, "loss": 0.0192, "step": 183990 }, { "epoch": 1.48879359171454, "grad_norm": 0.3391733169555664, "learning_rate": 1.8615867275658012e-06, "loss": 0.0272, "step": 184000 }, { "epoch": 1.488874504409742, "grad_norm": 0.050795525312423706, "learning_rate": 1.861037084422697e-06, "loss": 0.0248, "step": 184010 }, { "epoch": 1.4889554171049437, "grad_norm": 0.1261039674282074, "learning_rate": 1.8604875038795805e-06, "loss": 0.0153, "step": 184020 }, { "epoch": 1.4890363298001457, "grad_norm": 0.16972486674785614, "learning_rate": 1.859937985947411e-06, "loss": 0.0174, "step": 184030 }, { "epoch": 1.4891172424953476, "grad_norm": 0.30295974016189575, "learning_rate": 1.8593885306371522e-06, "loss": 0.0206, "step": 184040 }, { "epoch": 1.4891981551905493, "grad_norm": 0.3527820110321045, "learning_rate": 1.858839137959758e-06, "loss": 0.0167, "step": 184050 }, { "epoch": 1.4892790678857513, "grad_norm": 0.8111048936843872, "learning_rate": 1.858289807926184e-06, "loss": 0.022, "step": 184060 }, { "epoch": 1.4893599805809532, "grad_norm": 0.5654907822608948, "learning_rate": 1.8577405405473892e-06, "loss": 0.0177, "step": 184070 }, { "epoch": 1.489440893276155, "grad_norm": 0.465792715549469, "learning_rate": 1.8571913358343251e-06, "loss": 0.0158, "step": 184080 }, { "epoch": 1.4895218059713569, "grad_norm": 0.11343506723642349, "learning_rate": 1.8566421937979457e-06, "loss": 0.0245, "step": 184090 }, { "epoch": 1.4896027186665588, "grad_norm": 0.3305193781852722, "learning_rate": 1.8560931144492018e-06, "loss": 0.0235, "step": 184100 }, { "epoch": 1.4896836313617607, "grad_norm": 0.3692118227481842, "learning_rate": 1.8555440977990435e-06, "loss": 0.022, "step": 184110 }, { "epoch": 1.4897645440569625, "grad_norm": 0.41810348629951477, "learning_rate": 1.8549951438584201e-06, "loss": 0.0189, "step": 184120 }, { "epoch": 1.4898454567521644, "grad_norm": 0.11757875233888626, "learning_rate": 1.854446252638279e-06, "loss": 0.012, "step": 184130 }, { "epoch": 1.4899263694473663, "grad_norm": 0.36911526322364807, "learning_rate": 1.8538974241495667e-06, "loss": 0.0203, "step": 184140 }, { "epoch": 1.4900072821425683, "grad_norm": 0.17781256139278412, "learning_rate": 1.8533486584032284e-06, "loss": 0.0176, "step": 184150 }, { "epoch": 1.49008819483777, "grad_norm": 0.6717125177383423, "learning_rate": 1.8527999554102088e-06, "loss": 0.0262, "step": 184160 }, { "epoch": 1.490169107532972, "grad_norm": 0.016430899500846863, "learning_rate": 1.8522513151814496e-06, "loss": 0.0204, "step": 184170 }, { "epoch": 1.4902500202281739, "grad_norm": 0.24100756645202637, "learning_rate": 1.851702737727893e-06, "loss": 0.0248, "step": 184180 }, { "epoch": 1.4903309329233756, "grad_norm": 0.2846524715423584, "learning_rate": 1.8511542230604767e-06, "loss": 0.0139, "step": 184190 }, { "epoch": 1.4904118456185775, "grad_norm": 0.29447829723358154, "learning_rate": 1.850605771190146e-06, "loss": 0.0166, "step": 184200 }, { "epoch": 1.4904927583137795, "grad_norm": 0.3653431534767151, "learning_rate": 1.8500573821278328e-06, "loss": 0.0309, "step": 184210 }, { "epoch": 1.4905736710089812, "grad_norm": 0.36363300681114197, "learning_rate": 1.849509055884473e-06, "loss": 0.0163, "step": 184220 }, { "epoch": 1.4906545837041831, "grad_norm": 0.3268134593963623, "learning_rate": 1.848960792471008e-06, "loss": 0.017, "step": 184230 }, { "epoch": 1.490735496399385, "grad_norm": 0.48156556487083435, "learning_rate": 1.8484125918983659e-06, "loss": 0.0297, "step": 184240 }, { "epoch": 1.490816409094587, "grad_norm": 0.15545953810214996, "learning_rate": 1.8478644541774798e-06, "loss": 0.0236, "step": 184250 }, { "epoch": 1.490897321789789, "grad_norm": 0.13917900621891022, "learning_rate": 1.8473163793192845e-06, "loss": 0.0137, "step": 184260 }, { "epoch": 1.4909782344849907, "grad_norm": 0.2761547565460205, "learning_rate": 1.846768367334708e-06, "loss": 0.0154, "step": 184270 }, { "epoch": 1.4910591471801926, "grad_norm": 0.05104950815439224, "learning_rate": 1.84622041823468e-06, "loss": 0.0142, "step": 184280 }, { "epoch": 1.4911400598753946, "grad_norm": 0.3273794651031494, "learning_rate": 1.8456725320301278e-06, "loss": 0.02, "step": 184290 }, { "epoch": 1.4912209725705963, "grad_norm": 0.489803671836853, "learning_rate": 1.8451247087319779e-06, "loss": 0.0281, "step": 184300 }, { "epoch": 1.4913018852657982, "grad_norm": 0.7899622917175293, "learning_rate": 1.8445769483511556e-06, "loss": 0.0322, "step": 184310 }, { "epoch": 1.4913827979610002, "grad_norm": 0.34514543414115906, "learning_rate": 1.844029250898584e-06, "loss": 0.0158, "step": 184320 }, { "epoch": 1.4914637106562019, "grad_norm": 0.31551218032836914, "learning_rate": 1.843481616385187e-06, "loss": 0.0136, "step": 184330 }, { "epoch": 1.4915446233514038, "grad_norm": 0.5011345148086548, "learning_rate": 1.8429340448218858e-06, "loss": 0.017, "step": 184340 }, { "epoch": 1.4916255360466057, "grad_norm": 0.2520178556442261, "learning_rate": 1.8423865362195996e-06, "loss": 0.0303, "step": 184350 }, { "epoch": 1.4917064487418075, "grad_norm": 0.26933830976486206, "learning_rate": 1.841839090589248e-06, "loss": 0.0142, "step": 184360 }, { "epoch": 1.4917873614370094, "grad_norm": 0.1542239636182785, "learning_rate": 1.8412917079417485e-06, "loss": 0.0358, "step": 184370 }, { "epoch": 1.4918682741322113, "grad_norm": 0.2087680846452713, "learning_rate": 1.8407443882880178e-06, "loss": 0.0157, "step": 184380 }, { "epoch": 1.4919491868274133, "grad_norm": 0.020355667918920517, "learning_rate": 1.8401971316389706e-06, "loss": 0.0107, "step": 184390 }, { "epoch": 1.4920300995226152, "grad_norm": 0.17972050607204437, "learning_rate": 1.839649938005521e-06, "loss": 0.0153, "step": 184400 }, { "epoch": 1.492111012217817, "grad_norm": 0.47063571214675903, "learning_rate": 1.83910280739858e-06, "loss": 0.0218, "step": 184410 }, { "epoch": 1.4921919249130189, "grad_norm": 0.6051422953605652, "learning_rate": 1.838555739829062e-06, "loss": 0.0167, "step": 184420 }, { "epoch": 1.4922728376082208, "grad_norm": 0.2801383137702942, "learning_rate": 1.8380087353078779e-06, "loss": 0.0224, "step": 184430 }, { "epoch": 1.4923537503034225, "grad_norm": 0.4315326511859894, "learning_rate": 1.8374617938459305e-06, "loss": 0.0322, "step": 184440 }, { "epoch": 1.4924346629986245, "grad_norm": 0.21243439614772797, "learning_rate": 1.8369149154541333e-06, "loss": 0.0214, "step": 184450 }, { "epoch": 1.4925155756938264, "grad_norm": 0.20613637566566467, "learning_rate": 1.8363681001433919e-06, "loss": 0.0198, "step": 184460 }, { "epoch": 1.4925964883890281, "grad_norm": 0.3384378254413605, "learning_rate": 1.8358213479246062e-06, "loss": 0.0134, "step": 184470 }, { "epoch": 1.49267740108423, "grad_norm": 0.6694689393043518, "learning_rate": 1.835274658808686e-06, "loss": 0.039, "step": 184480 }, { "epoch": 1.492758313779432, "grad_norm": 0.12658940255641937, "learning_rate": 1.834728032806532e-06, "loss": 0.0133, "step": 184490 }, { "epoch": 1.4928392264746337, "grad_norm": 0.24234478175640106, "learning_rate": 1.834181469929045e-06, "loss": 0.0162, "step": 184500 }, { "epoch": 1.4929201391698357, "grad_norm": 0.593588650226593, "learning_rate": 1.8336349701871247e-06, "loss": 0.0219, "step": 184510 }, { "epoch": 1.4930010518650376, "grad_norm": 0.557064950466156, "learning_rate": 1.833088533591671e-06, "loss": 0.0189, "step": 184520 }, { "epoch": 1.4930819645602396, "grad_norm": 0.1815788596868515, "learning_rate": 1.832542160153581e-06, "loss": 0.0197, "step": 184530 }, { "epoch": 1.4931628772554415, "grad_norm": 0.28182992339134216, "learning_rate": 1.8319958498837504e-06, "loss": 0.0166, "step": 184540 }, { "epoch": 1.4932437899506432, "grad_norm": 0.4742491841316223, "learning_rate": 1.8314496027930751e-06, "loss": 0.0245, "step": 184550 }, { "epoch": 1.4933247026458452, "grad_norm": 0.26923999190330505, "learning_rate": 1.830903418892448e-06, "loss": 0.02, "step": 184560 }, { "epoch": 1.493405615341047, "grad_norm": 0.516066312789917, "learning_rate": 1.8303572981927614e-06, "loss": 0.0192, "step": 184570 }, { "epoch": 1.4934865280362488, "grad_norm": 0.25492313504219055, "learning_rate": 1.829811240704908e-06, "loss": 0.0304, "step": 184580 }, { "epoch": 1.4935674407314508, "grad_norm": 0.3060387969017029, "learning_rate": 1.8292652464397765e-06, "loss": 0.0277, "step": 184590 }, { "epoch": 1.4936483534266527, "grad_norm": 0.2221280187368393, "learning_rate": 1.8287193154082555e-06, "loss": 0.0119, "step": 184600 }, { "epoch": 1.4937292661218544, "grad_norm": 0.4675937592983246, "learning_rate": 1.828173447621232e-06, "loss": 0.0177, "step": 184610 }, { "epoch": 1.4938101788170564, "grad_norm": 0.4108736515045166, "learning_rate": 1.8276276430895963e-06, "loss": 0.0238, "step": 184620 }, { "epoch": 1.4938910915122583, "grad_norm": 0.7632492184638977, "learning_rate": 1.8270819018242263e-06, "loss": 0.0236, "step": 184630 }, { "epoch": 1.49397200420746, "grad_norm": 0.3042823076248169, "learning_rate": 1.8265362238360114e-06, "loss": 0.0183, "step": 184640 }, { "epoch": 1.494052916902662, "grad_norm": 0.13949401676654816, "learning_rate": 1.825990609135833e-06, "loss": 0.012, "step": 184650 }, { "epoch": 1.494133829597864, "grad_norm": 0.1589849293231964, "learning_rate": 1.825445057734569e-06, "loss": 0.0114, "step": 184660 }, { "epoch": 1.4942147422930658, "grad_norm": 0.2683676779270172, "learning_rate": 1.8248995696431027e-06, "loss": 0.0238, "step": 184670 }, { "epoch": 1.4942956549882678, "grad_norm": 0.6413825750350952, "learning_rate": 1.8243541448723117e-06, "loss": 0.0278, "step": 184680 }, { "epoch": 1.4943765676834695, "grad_norm": 0.30152055621147156, "learning_rate": 1.8238087834330731e-06, "loss": 0.0226, "step": 184690 }, { "epoch": 1.4944574803786714, "grad_norm": 0.39085638523101807, "learning_rate": 1.823263485336263e-06, "loss": 0.0223, "step": 184700 }, { "epoch": 1.4945383930738734, "grad_norm": 0.4100748896598816, "learning_rate": 1.822718250592756e-06, "loss": 0.0264, "step": 184710 }, { "epoch": 1.494619305769075, "grad_norm": 0.3446556627750397, "learning_rate": 1.8221730792134269e-06, "loss": 0.0255, "step": 184720 }, { "epoch": 1.494700218464277, "grad_norm": 0.44185537099838257, "learning_rate": 1.8216279712091462e-06, "loss": 0.0164, "step": 184730 }, { "epoch": 1.494781131159479, "grad_norm": 0.6218599081039429, "learning_rate": 1.821082926590786e-06, "loss": 0.0206, "step": 184740 }, { "epoch": 1.4948620438546807, "grad_norm": 0.21717268228530884, "learning_rate": 1.8205379453692156e-06, "loss": 0.0252, "step": 184750 }, { "epoch": 1.4949429565498826, "grad_norm": 0.5085030198097229, "learning_rate": 1.819993027555304e-06, "loss": 0.0183, "step": 184760 }, { "epoch": 1.4950238692450846, "grad_norm": 0.3161078989505768, "learning_rate": 1.8194481731599184e-06, "loss": 0.0162, "step": 184770 }, { "epoch": 1.4951047819402865, "grad_norm": 0.3039960265159607, "learning_rate": 1.8189033821939244e-06, "loss": 0.0265, "step": 184780 }, { "epoch": 1.4951856946354882, "grad_norm": 0.23472438752651215, "learning_rate": 1.818358654668187e-06, "loss": 0.0238, "step": 184790 }, { "epoch": 1.4952666073306902, "grad_norm": 0.3981534242630005, "learning_rate": 1.8178139905935671e-06, "loss": 0.0176, "step": 184800 }, { "epoch": 1.495347520025892, "grad_norm": 0.23621366918087006, "learning_rate": 1.8172693899809336e-06, "loss": 0.0215, "step": 184810 }, { "epoch": 1.495428432721094, "grad_norm": 0.09148386865854263, "learning_rate": 1.8167248528411391e-06, "loss": 0.0213, "step": 184820 }, { "epoch": 1.4955093454162958, "grad_norm": 0.3527258336544037, "learning_rate": 1.8161803791850491e-06, "loss": 0.0179, "step": 184830 }, { "epoch": 1.4955902581114977, "grad_norm": 0.7349289655685425, "learning_rate": 1.8156359690235214e-06, "loss": 0.0335, "step": 184840 }, { "epoch": 1.4956711708066996, "grad_norm": 0.422709196805954, "learning_rate": 1.8150916223674087e-06, "loss": 0.0208, "step": 184850 }, { "epoch": 1.4957520835019014, "grad_norm": 0.34477370977401733, "learning_rate": 1.814547339227572e-06, "loss": 0.0242, "step": 184860 }, { "epoch": 1.4958329961971033, "grad_norm": 0.4682811498641968, "learning_rate": 1.814003119614865e-06, "loss": 0.0209, "step": 184870 }, { "epoch": 1.4959139088923052, "grad_norm": 0.26856863498687744, "learning_rate": 1.8134589635401368e-06, "loss": 0.0251, "step": 184880 }, { "epoch": 1.495994821587507, "grad_norm": 0.3939891755580902, "learning_rate": 1.8129148710142442e-06, "loss": 0.0194, "step": 184890 }, { "epoch": 1.496075734282709, "grad_norm": 0.2742469608783722, "learning_rate": 1.8123708420480358e-06, "loss": 0.0174, "step": 184900 }, { "epoch": 1.4961566469779108, "grad_norm": 0.5355791449546814, "learning_rate": 1.8118268766523617e-06, "loss": 0.0141, "step": 184910 }, { "epoch": 1.4962375596731128, "grad_norm": 0.13738122582435608, "learning_rate": 1.8112829748380705e-06, "loss": 0.0164, "step": 184920 }, { "epoch": 1.4963184723683147, "grad_norm": 0.08850686252117157, "learning_rate": 1.8107391366160083e-06, "loss": 0.0176, "step": 184930 }, { "epoch": 1.4963993850635164, "grad_norm": 0.0024709361605346203, "learning_rate": 1.8101953619970214e-06, "loss": 0.016, "step": 184940 }, { "epoch": 1.4964802977587184, "grad_norm": 0.6314390897750854, "learning_rate": 1.8096516509919537e-06, "loss": 0.0161, "step": 184950 }, { "epoch": 1.4965612104539203, "grad_norm": 0.1290067583322525, "learning_rate": 1.8091080036116487e-06, "loss": 0.0084, "step": 184960 }, { "epoch": 1.496642123149122, "grad_norm": 0.29522228240966797, "learning_rate": 1.808564419866949e-06, "loss": 0.0209, "step": 184970 }, { "epoch": 1.496723035844324, "grad_norm": 0.5721245408058167, "learning_rate": 1.8080208997686938e-06, "loss": 0.0158, "step": 184980 }, { "epoch": 1.496803948539526, "grad_norm": 0.5627783536911011, "learning_rate": 1.8074774433277214e-06, "loss": 0.0226, "step": 184990 }, { "epoch": 1.4968848612347276, "grad_norm": 0.42913150787353516, "learning_rate": 1.8069340505548755e-06, "loss": 0.0101, "step": 185000 }, { "epoch": 1.4969657739299296, "grad_norm": 0.21878954768180847, "learning_rate": 1.8063907214609872e-06, "loss": 0.0158, "step": 185010 }, { "epoch": 1.4970466866251315, "grad_norm": 0.13334870338439941, "learning_rate": 1.8058474560568924e-06, "loss": 0.0133, "step": 185020 }, { "epoch": 1.4971275993203332, "grad_norm": 0.3956700265407562, "learning_rate": 1.8053042543534305e-06, "loss": 0.0138, "step": 185030 }, { "epoch": 1.4972085120155352, "grad_norm": 0.5529960989952087, "learning_rate": 1.804761116361427e-06, "loss": 0.0245, "step": 185040 }, { "epoch": 1.4972894247107371, "grad_norm": 0.5115861892700195, "learning_rate": 1.80421804209172e-06, "loss": 0.0217, "step": 185050 }, { "epoch": 1.497370337405939, "grad_norm": 0.31904351711273193, "learning_rate": 1.8036750315551388e-06, "loss": 0.0116, "step": 185060 }, { "epoch": 1.497451250101141, "grad_norm": 0.20913539826869965, "learning_rate": 1.8031320847625078e-06, "loss": 0.0133, "step": 185070 }, { "epoch": 1.4975321627963427, "grad_norm": 0.2811655104160309, "learning_rate": 1.8025892017246604e-06, "loss": 0.0201, "step": 185080 }, { "epoch": 1.4976130754915447, "grad_norm": 0.13522951304912567, "learning_rate": 1.8020463824524231e-06, "loss": 0.0143, "step": 185090 }, { "epoch": 1.4976939881867466, "grad_norm": 0.3209304213523865, "learning_rate": 1.801503626956616e-06, "loss": 0.0165, "step": 185100 }, { "epoch": 1.4977749008819483, "grad_norm": 0.27751898765563965, "learning_rate": 1.8009609352480683e-06, "loss": 0.0226, "step": 185110 }, { "epoch": 1.4978558135771503, "grad_norm": 0.0008387714624404907, "learning_rate": 1.8004183073376015e-06, "loss": 0.019, "step": 185120 }, { "epoch": 1.4979367262723522, "grad_norm": 0.39181068539619446, "learning_rate": 1.799875743236037e-06, "loss": 0.0144, "step": 185130 }, { "epoch": 1.498017638967554, "grad_norm": 0.13359741866588593, "learning_rate": 1.7993332429541949e-06, "loss": 0.0203, "step": 185140 }, { "epoch": 1.4980985516627559, "grad_norm": 0.55653315782547, "learning_rate": 1.7987908065028942e-06, "loss": 0.0266, "step": 185150 }, { "epoch": 1.4981794643579578, "grad_norm": 0.5405876040458679, "learning_rate": 1.7982484338929534e-06, "loss": 0.0271, "step": 185160 }, { "epoch": 1.4982603770531595, "grad_norm": 0.41836017370224, "learning_rate": 1.797706125135188e-06, "loss": 0.0153, "step": 185170 }, { "epoch": 1.4983412897483614, "grad_norm": 0.5346456170082092, "learning_rate": 1.7971638802404119e-06, "loss": 0.0167, "step": 185180 }, { "epoch": 1.4984222024435634, "grad_norm": 0.5616549253463745, "learning_rate": 1.7966216992194441e-06, "loss": 0.0372, "step": 185190 }, { "epoch": 1.4985031151387653, "grad_norm": 0.3105889558792114, "learning_rate": 1.7960795820830923e-06, "loss": 0.0112, "step": 185200 }, { "epoch": 1.4985840278339673, "grad_norm": 0.14741137623786926, "learning_rate": 1.7955375288421674e-06, "loss": 0.0227, "step": 185210 }, { "epoch": 1.498664940529169, "grad_norm": 0.490529328584671, "learning_rate": 1.7949955395074854e-06, "loss": 0.0247, "step": 185220 }, { "epoch": 1.498745853224371, "grad_norm": 0.24491682648658752, "learning_rate": 1.7944536140898478e-06, "loss": 0.0191, "step": 185230 }, { "epoch": 1.4988267659195729, "grad_norm": 0.0635874941945076, "learning_rate": 1.793911752600067e-06, "loss": 0.0097, "step": 185240 }, { "epoch": 1.4989076786147746, "grad_norm": 0.013971439562737942, "learning_rate": 1.7933699550489496e-06, "loss": 0.0115, "step": 185250 }, { "epoch": 1.4989885913099765, "grad_norm": 0.3792530298233032, "learning_rate": 1.7928282214472965e-06, "loss": 0.0241, "step": 185260 }, { "epoch": 1.4990695040051785, "grad_norm": 0.25966861844062805, "learning_rate": 1.7922865518059146e-06, "loss": 0.0178, "step": 185270 }, { "epoch": 1.4991504167003802, "grad_norm": 0.22167092561721802, "learning_rate": 1.791744946135608e-06, "loss": 0.0319, "step": 185280 }, { "epoch": 1.4992313293955821, "grad_norm": 0.38230761885643005, "learning_rate": 1.7912034044471715e-06, "loss": 0.0174, "step": 185290 }, { "epoch": 1.499312242090784, "grad_norm": 0.24322402477264404, "learning_rate": 1.7906619267514114e-06, "loss": 0.0269, "step": 185300 }, { "epoch": 1.499393154785986, "grad_norm": 0.32402530312538147, "learning_rate": 1.7901205130591231e-06, "loss": 0.0173, "step": 185310 }, { "epoch": 1.4994740674811877, "grad_norm": 0.4274120330810547, "learning_rate": 1.7895791633811054e-06, "loss": 0.0156, "step": 185320 }, { "epoch": 1.4995549801763897, "grad_norm": 0.3391225039958954, "learning_rate": 1.7890378777281542e-06, "loss": 0.0314, "step": 185330 }, { "epoch": 1.4996358928715916, "grad_norm": 0.3662809133529663, "learning_rate": 1.7884966561110634e-06, "loss": 0.0112, "step": 185340 }, { "epoch": 1.4997168055667935, "grad_norm": 0.9063756465911865, "learning_rate": 1.7879554985406266e-06, "loss": 0.0216, "step": 185350 }, { "epoch": 1.4997977182619953, "grad_norm": 0.9506863355636597, "learning_rate": 1.787414405027637e-06, "loss": 0.0173, "step": 185360 }, { "epoch": 1.4998786309571972, "grad_norm": 0.5217586755752563, "learning_rate": 1.7868733755828833e-06, "loss": 0.0106, "step": 185370 }, { "epoch": 1.4999595436523991, "grad_norm": 0.38553568720817566, "learning_rate": 1.7863324102171604e-06, "loss": 0.0189, "step": 185380 }, { "epoch": 1.5000404563476009, "grad_norm": 0.3952159285545349, "learning_rate": 1.7857915089412508e-06, "loss": 0.0149, "step": 185390 }, { "epoch": 1.5001213690428028, "grad_norm": 0.15116851031780243, "learning_rate": 1.7852506717659429e-06, "loss": 0.0284, "step": 185400 }, { "epoch": 1.5002022817380047, "grad_norm": 0.13112667202949524, "learning_rate": 1.7847098987020272e-06, "loss": 0.0151, "step": 185410 }, { "epoch": 1.5002831944332065, "grad_norm": 0.3500026762485504, "learning_rate": 1.7841691897602832e-06, "loss": 0.0167, "step": 185420 }, { "epoch": 1.5003641071284084, "grad_norm": 0.10526331514120102, "learning_rate": 1.7836285449514939e-06, "loss": 0.0192, "step": 185430 }, { "epoch": 1.5004450198236103, "grad_norm": 0.23944056034088135, "learning_rate": 1.7830879642864463e-06, "loss": 0.0226, "step": 185440 }, { "epoch": 1.500525932518812, "grad_norm": 0.35994020104408264, "learning_rate": 1.7825474477759154e-06, "loss": 0.0266, "step": 185450 }, { "epoch": 1.5006068452140142, "grad_norm": 0.29607462882995605, "learning_rate": 1.782006995430684e-06, "loss": 0.0129, "step": 185460 }, { "epoch": 1.500687757909216, "grad_norm": 0.23541709780693054, "learning_rate": 1.7814666072615321e-06, "loss": 0.0175, "step": 185470 }, { "epoch": 1.5007686706044179, "grad_norm": 0.4679076671600342, "learning_rate": 1.78092628327923e-06, "loss": 0.0195, "step": 185480 }, { "epoch": 1.5008495832996198, "grad_norm": 0.28971824049949646, "learning_rate": 1.7803860234945596e-06, "loss": 0.0191, "step": 185490 }, { "epoch": 1.5009304959948215, "grad_norm": 0.48772308230400085, "learning_rate": 1.779845827918295e-06, "loss": 0.0166, "step": 185500 }, { "epoch": 1.5010114086900235, "grad_norm": 0.49856144189834595, "learning_rate": 1.7793056965612038e-06, "loss": 0.0218, "step": 185510 }, { "epoch": 1.5010923213852254, "grad_norm": 0.3339245319366455, "learning_rate": 1.7787656294340626e-06, "loss": 0.0201, "step": 185520 }, { "epoch": 1.5011732340804271, "grad_norm": 0.7414159178733826, "learning_rate": 1.778225626547641e-06, "loss": 0.0351, "step": 185530 }, { "epoch": 1.501254146775629, "grad_norm": 0.4941486716270447, "learning_rate": 1.7776856879127074e-06, "loss": 0.0233, "step": 185540 }, { "epoch": 1.501335059470831, "grad_norm": 0.4334649443626404, "learning_rate": 1.7771458135400305e-06, "loss": 0.0184, "step": 185550 }, { "epoch": 1.5014159721660327, "grad_norm": 0.4366541802883148, "learning_rate": 1.7766060034403764e-06, "loss": 0.0293, "step": 185560 }, { "epoch": 1.501496884861235, "grad_norm": 0.05460367351770401, "learning_rate": 1.7760662576245107e-06, "loss": 0.0145, "step": 185570 }, { "epoch": 1.5015777975564366, "grad_norm": 0.21543429791927338, "learning_rate": 1.7755265761031977e-06, "loss": 0.0259, "step": 185580 }, { "epoch": 1.5016587102516383, "grad_norm": 0.40517309308052063, "learning_rate": 1.7749869588871976e-06, "loss": 0.0107, "step": 185590 }, { "epoch": 1.5017396229468405, "grad_norm": 0.16105115413665771, "learning_rate": 1.7744474059872785e-06, "loss": 0.0228, "step": 185600 }, { "epoch": 1.5018205356420422, "grad_norm": 0.30757415294647217, "learning_rate": 1.7739079174141943e-06, "loss": 0.0157, "step": 185610 }, { "epoch": 1.5019014483372441, "grad_norm": 0.28678542375564575, "learning_rate": 1.7733684931787049e-06, "loss": 0.0178, "step": 185620 }, { "epoch": 1.501982361032446, "grad_norm": 0.9917743802070618, "learning_rate": 1.772829133291572e-06, "loss": 0.0203, "step": 185630 }, { "epoch": 1.5020632737276478, "grad_norm": 0.29485365748405457, "learning_rate": 1.7722898377635477e-06, "loss": 0.0188, "step": 185640 }, { "epoch": 1.5021441864228497, "grad_norm": 0.601483166217804, "learning_rate": 1.7717506066053868e-06, "loss": 0.0146, "step": 185650 }, { "epoch": 1.5022250991180517, "grad_norm": 0.41368958353996277, "learning_rate": 1.7712114398278485e-06, "loss": 0.0261, "step": 185660 }, { "epoch": 1.5023060118132534, "grad_norm": 0.2776569128036499, "learning_rate": 1.7706723374416779e-06, "loss": 0.016, "step": 185670 }, { "epoch": 1.5023869245084553, "grad_norm": 0.16312600672245026, "learning_rate": 1.7701332994576326e-06, "loss": 0.0167, "step": 185680 }, { "epoch": 1.5024678372036573, "grad_norm": 0.4364933371543884, "learning_rate": 1.7695943258864596e-06, "loss": 0.0151, "step": 185690 }, { "epoch": 1.502548749898859, "grad_norm": 0.46052414178848267, "learning_rate": 1.769055416738908e-06, "loss": 0.0248, "step": 185700 }, { "epoch": 1.5026296625940612, "grad_norm": 0.5334877967834473, "learning_rate": 1.7685165720257253e-06, "loss": 0.0115, "step": 185710 }, { "epoch": 1.5027105752892629, "grad_norm": 0.4934559762477875, "learning_rate": 1.767977791757658e-06, "loss": 0.0278, "step": 185720 }, { "epoch": 1.5027914879844646, "grad_norm": 0.5662171840667725, "learning_rate": 1.7674390759454501e-06, "loss": 0.0377, "step": 185730 }, { "epoch": 1.5028724006796668, "grad_norm": 0.4587690532207489, "learning_rate": 1.7669004245998455e-06, "loss": 0.0176, "step": 185740 }, { "epoch": 1.5029533133748685, "grad_norm": 0.28979867696762085, "learning_rate": 1.7663618377315871e-06, "loss": 0.0247, "step": 185750 }, { "epoch": 1.5030342260700704, "grad_norm": 0.1114252433180809, "learning_rate": 1.7658233153514148e-06, "loss": 0.0295, "step": 185760 }, { "epoch": 1.5031151387652724, "grad_norm": 0.24171888828277588, "learning_rate": 1.7652848574700693e-06, "loss": 0.0205, "step": 185770 }, { "epoch": 1.503196051460474, "grad_norm": 0.21257384121418, "learning_rate": 1.7647464640982863e-06, "loss": 0.0219, "step": 185780 }, { "epoch": 1.503276964155676, "grad_norm": 0.458383172750473, "learning_rate": 1.7642081352468093e-06, "loss": 0.0154, "step": 185790 }, { "epoch": 1.503357876850878, "grad_norm": 0.3800234794616699, "learning_rate": 1.7636698709263677e-06, "loss": 0.0176, "step": 185800 }, { "epoch": 1.5034387895460797, "grad_norm": 0.4804374873638153, "learning_rate": 1.763131671147697e-06, "loss": 0.016, "step": 185810 }, { "epoch": 1.5035197022412816, "grad_norm": 0.44243863224983215, "learning_rate": 1.7625935359215346e-06, "loss": 0.0151, "step": 185820 }, { "epoch": 1.5036006149364836, "grad_norm": 0.5330665111541748, "learning_rate": 1.7620554652586086e-06, "loss": 0.0167, "step": 185830 }, { "epoch": 1.5036815276316853, "grad_norm": 0.17908263206481934, "learning_rate": 1.7615174591696487e-06, "loss": 0.0273, "step": 185840 }, { "epoch": 1.5037624403268874, "grad_norm": 0.6219460368156433, "learning_rate": 1.7609795176653898e-06, "loss": 0.0135, "step": 185850 }, { "epoch": 1.5038433530220892, "grad_norm": 0.4967270493507385, "learning_rate": 1.7604416407565533e-06, "loss": 0.0192, "step": 185860 }, { "epoch": 1.503924265717291, "grad_norm": 0.4312112033367157, "learning_rate": 1.7599038284538705e-06, "loss": 0.019, "step": 185870 }, { "epoch": 1.504005178412493, "grad_norm": 0.39718276262283325, "learning_rate": 1.7593660807680662e-06, "loss": 0.0219, "step": 185880 }, { "epoch": 1.5040860911076948, "grad_norm": 0.6043379306793213, "learning_rate": 1.7588283977098642e-06, "loss": 0.0256, "step": 185890 }, { "epoch": 1.5041670038028967, "grad_norm": 0.28767362236976624, "learning_rate": 1.7582907792899867e-06, "loss": 0.02, "step": 185900 }, { "epoch": 1.5042479164980986, "grad_norm": 0.5919480919837952, "learning_rate": 1.7577532255191564e-06, "loss": 0.0209, "step": 185910 }, { "epoch": 1.5043288291933004, "grad_norm": 0.5911917686462402, "learning_rate": 1.757215736408094e-06, "loss": 0.0193, "step": 185920 }, { "epoch": 1.5044097418885023, "grad_norm": 0.0569157674908638, "learning_rate": 1.7566783119675169e-06, "loss": 0.0133, "step": 185930 }, { "epoch": 1.5044906545837042, "grad_norm": 0.7252475619316101, "learning_rate": 1.7561409522081447e-06, "loss": 0.0171, "step": 185940 }, { "epoch": 1.504571567278906, "grad_norm": 0.30694374442100525, "learning_rate": 1.7556036571406926e-06, "loss": 0.0122, "step": 185950 }, { "epoch": 1.504652479974108, "grad_norm": 0.157391756772995, "learning_rate": 1.7550664267758766e-06, "loss": 0.0105, "step": 185960 }, { "epoch": 1.5047333926693098, "grad_norm": 0.1850486397743225, "learning_rate": 1.7545292611244103e-06, "loss": 0.0169, "step": 185970 }, { "epoch": 1.5048143053645116, "grad_norm": 0.4815339148044586, "learning_rate": 1.753992160197006e-06, "loss": 0.0198, "step": 185980 }, { "epoch": 1.5048952180597137, "grad_norm": 0.34500953555107117, "learning_rate": 1.7534551240043756e-06, "loss": 0.0211, "step": 185990 }, { "epoch": 1.5049761307549154, "grad_norm": 0.2921568751335144, "learning_rate": 1.7529181525572276e-06, "loss": 0.0347, "step": 186000 }, { "epoch": 1.5050570434501174, "grad_norm": 0.11943376809358597, "learning_rate": 1.7523812458662754e-06, "loss": 0.0206, "step": 186010 }, { "epoch": 1.5051379561453193, "grad_norm": 0.23690980672836304, "learning_rate": 1.7518444039422216e-06, "loss": 0.0233, "step": 186020 }, { "epoch": 1.505218868840521, "grad_norm": 1.5967471599578857, "learning_rate": 1.7513076267957724e-06, "loss": 0.0142, "step": 186030 }, { "epoch": 1.505299781535723, "grad_norm": 0.1246969923377037, "learning_rate": 1.7507709144376378e-06, "loss": 0.0123, "step": 186040 }, { "epoch": 1.505380694230925, "grad_norm": 0.9198890328407288, "learning_rate": 1.7502342668785155e-06, "loss": 0.0355, "step": 186050 }, { "epoch": 1.5054616069261266, "grad_norm": 0.26626792550086975, "learning_rate": 1.749697684129109e-06, "loss": 0.0194, "step": 186060 }, { "epoch": 1.5055425196213286, "grad_norm": 0.06815798580646515, "learning_rate": 1.7491611662001224e-06, "loss": 0.0096, "step": 186070 }, { "epoch": 1.5056234323165305, "grad_norm": 0.31863608956336975, "learning_rate": 1.7486247131022532e-06, "loss": 0.0139, "step": 186080 }, { "epoch": 1.5057043450117322, "grad_norm": 0.36859196424484253, "learning_rate": 1.7480883248462e-06, "loss": 0.0278, "step": 186090 }, { "epoch": 1.5057852577069342, "grad_norm": 0.4468030035495758, "learning_rate": 1.7475520014426605e-06, "loss": 0.0109, "step": 186100 }, { "epoch": 1.505866170402136, "grad_norm": 0.20850688219070435, "learning_rate": 1.74701574290233e-06, "loss": 0.0171, "step": 186110 }, { "epoch": 1.5059470830973378, "grad_norm": 0.5894250273704529, "learning_rate": 1.7464795492359027e-06, "loss": 0.0239, "step": 186120 }, { "epoch": 1.50602799579254, "grad_norm": 0.4089145064353943, "learning_rate": 1.7459434204540726e-06, "loss": 0.0193, "step": 186130 }, { "epoch": 1.5061089084877417, "grad_norm": 0.18384474515914917, "learning_rate": 1.7454073565675312e-06, "loss": 0.0131, "step": 186140 }, { "epoch": 1.5061898211829436, "grad_norm": 0.42890122532844543, "learning_rate": 1.7448713575869697e-06, "loss": 0.0217, "step": 186150 }, { "epoch": 1.5062707338781456, "grad_norm": 0.372618168592453, "learning_rate": 1.744335423523077e-06, "loss": 0.0191, "step": 186160 }, { "epoch": 1.5063516465733473, "grad_norm": 0.34664586186408997, "learning_rate": 1.743799554386541e-06, "loss": 0.032, "step": 186170 }, { "epoch": 1.5064325592685492, "grad_norm": 0.044968146830797195, "learning_rate": 1.7432637501880488e-06, "loss": 0.0107, "step": 186180 }, { "epoch": 1.5065134719637512, "grad_norm": 0.2788122296333313, "learning_rate": 1.742728010938286e-06, "loss": 0.0137, "step": 186190 }, { "epoch": 1.506594384658953, "grad_norm": 0.316013902425766, "learning_rate": 1.7421923366479365e-06, "loss": 0.0148, "step": 186200 }, { "epoch": 1.5066752973541548, "grad_norm": 0.1673801988363266, "learning_rate": 1.7416567273276836e-06, "loss": 0.0237, "step": 186210 }, { "epoch": 1.5067562100493568, "grad_norm": 0.5388879776000977, "learning_rate": 1.7411211829882064e-06, "loss": 0.0233, "step": 186220 }, { "epoch": 1.5068371227445585, "grad_norm": 0.3262883424758911, "learning_rate": 1.7405857036401913e-06, "loss": 0.0168, "step": 186230 }, { "epoch": 1.5069180354397607, "grad_norm": 0.3138347268104553, "learning_rate": 1.740050289294311e-06, "loss": 0.0204, "step": 186240 }, { "epoch": 1.5069989481349624, "grad_norm": 0.3199205994606018, "learning_rate": 1.7395149399612449e-06, "loss": 0.0225, "step": 186250 }, { "epoch": 1.507079860830164, "grad_norm": 0.36000609397888184, "learning_rate": 1.7389796556516714e-06, "loss": 0.0156, "step": 186260 }, { "epoch": 1.5071607735253663, "grad_norm": 0.2663305699825287, "learning_rate": 1.7384444363762643e-06, "loss": 0.0183, "step": 186270 }, { "epoch": 1.507241686220568, "grad_norm": 0.4933733344078064, "learning_rate": 1.7379092821456972e-06, "loss": 0.0287, "step": 186280 }, { "epoch": 1.50732259891577, "grad_norm": 0.4534046947956085, "learning_rate": 1.7373741929706433e-06, "loss": 0.0178, "step": 186290 }, { "epoch": 1.5074035116109719, "grad_norm": 0.4428502917289734, "learning_rate": 1.736839168861773e-06, "loss": 0.0113, "step": 186300 }, { "epoch": 1.5074844243061736, "grad_norm": 0.26172396540641785, "learning_rate": 1.7363042098297566e-06, "loss": 0.0147, "step": 186310 }, { "epoch": 1.5075653370013755, "grad_norm": 0.279570996761322, "learning_rate": 1.7357693158852635e-06, "loss": 0.0254, "step": 186320 }, { "epoch": 1.5076462496965775, "grad_norm": 0.2949850261211395, "learning_rate": 1.73523448703896e-06, "loss": 0.0188, "step": 186330 }, { "epoch": 1.5077271623917792, "grad_norm": 0.12363019585609436, "learning_rate": 1.7346997233015122e-06, "loss": 0.0195, "step": 186340 }, { "epoch": 1.5078080750869811, "grad_norm": 0.11282163858413696, "learning_rate": 1.734165024683585e-06, "loss": 0.0101, "step": 186350 }, { "epoch": 1.507888987782183, "grad_norm": 0.4209422767162323, "learning_rate": 1.7336303911958419e-06, "loss": 0.0162, "step": 186360 }, { "epoch": 1.5079699004773848, "grad_norm": 0.4569036662578583, "learning_rate": 1.7330958228489452e-06, "loss": 0.0261, "step": 186370 }, { "epoch": 1.508050813172587, "grad_norm": 0.22888916730880737, "learning_rate": 1.7325613196535557e-06, "loss": 0.0154, "step": 186380 }, { "epoch": 1.5081317258677887, "grad_norm": 0.4365433156490326, "learning_rate": 1.732026881620333e-06, "loss": 0.0162, "step": 186390 }, { "epoch": 1.5082126385629904, "grad_norm": 0.016505081206560135, "learning_rate": 1.7314925087599344e-06, "loss": 0.0114, "step": 186400 }, { "epoch": 1.5082935512581925, "grad_norm": 0.4045577943325043, "learning_rate": 1.7309582010830161e-06, "loss": 0.0129, "step": 186410 }, { "epoch": 1.5083744639533943, "grad_norm": 0.17485275864601135, "learning_rate": 1.7304239586002375e-06, "loss": 0.0157, "step": 186420 }, { "epoch": 1.5084553766485962, "grad_norm": 0.3504648804664612, "learning_rate": 1.729889781322252e-06, "loss": 0.0234, "step": 186430 }, { "epoch": 1.5085362893437981, "grad_norm": 0.13446015119552612, "learning_rate": 1.729355669259708e-06, "loss": 0.0124, "step": 186440 }, { "epoch": 1.5086172020389998, "grad_norm": 0.5776281952857971, "learning_rate": 1.7288216224232619e-06, "loss": 0.0141, "step": 186450 }, { "epoch": 1.5086981147342018, "grad_norm": 0.45152202248573303, "learning_rate": 1.728287640823565e-06, "loss": 0.0344, "step": 186460 }, { "epoch": 1.5087790274294037, "grad_norm": 0.4481455385684967, "learning_rate": 1.72775372447126e-06, "loss": 0.0196, "step": 186470 }, { "epoch": 1.5088599401246054, "grad_norm": 0.4385775923728943, "learning_rate": 1.7272198733770012e-06, "loss": 0.0176, "step": 186480 }, { "epoch": 1.5089408528198074, "grad_norm": 0.4129886329174042, "learning_rate": 1.726686087551433e-06, "loss": 0.0144, "step": 186490 }, { "epoch": 1.5090217655150093, "grad_norm": 0.1704871505498886, "learning_rate": 1.7261523670052006e-06, "loss": 0.0239, "step": 186500 }, { "epoch": 1.509102678210211, "grad_norm": 0.45963501930236816, "learning_rate": 1.7256187117489476e-06, "loss": 0.0205, "step": 186510 }, { "epoch": 1.5091835909054132, "grad_norm": 0.3146315813064575, "learning_rate": 1.7250851217933168e-06, "loss": 0.0189, "step": 186520 }, { "epoch": 1.509264503600615, "grad_norm": 0.5249379873275757, "learning_rate": 1.7245515971489495e-06, "loss": 0.0201, "step": 186530 }, { "epoch": 1.5093454162958169, "grad_norm": 0.2602348029613495, "learning_rate": 1.7240181378264858e-06, "loss": 0.0203, "step": 186540 }, { "epoch": 1.5094263289910188, "grad_norm": 0.29144400358200073, "learning_rate": 1.7234847438365648e-06, "loss": 0.0228, "step": 186550 }, { "epoch": 1.5095072416862205, "grad_norm": 0.6733604669570923, "learning_rate": 1.7229514151898236e-06, "loss": 0.0334, "step": 186560 }, { "epoch": 1.5095881543814225, "grad_norm": 0.6320548057556152, "learning_rate": 1.7224181518968979e-06, "loss": 0.024, "step": 186570 }, { "epoch": 1.5096690670766244, "grad_norm": 0.8874930739402771, "learning_rate": 1.7218849539684229e-06, "loss": 0.0322, "step": 186580 }, { "epoch": 1.5097499797718261, "grad_norm": 0.1903166025876999, "learning_rate": 1.7213518214150321e-06, "loss": 0.0251, "step": 186590 }, { "epoch": 1.509830892467028, "grad_norm": 0.31573888659477234, "learning_rate": 1.7208187542473581e-06, "loss": 0.0207, "step": 186600 }, { "epoch": 1.50991180516223, "grad_norm": 0.2986123561859131, "learning_rate": 1.7202857524760292e-06, "loss": 0.0246, "step": 186610 }, { "epoch": 1.5099927178574317, "grad_norm": 0.25183412432670593, "learning_rate": 1.719752816111681e-06, "loss": 0.0176, "step": 186620 }, { "epoch": 1.5100736305526337, "grad_norm": 0.2287859320640564, "learning_rate": 1.7192199451649345e-06, "loss": 0.0182, "step": 186630 }, { "epoch": 1.5101545432478356, "grad_norm": 0.08050618320703506, "learning_rate": 1.7186871396464217e-06, "loss": 0.0121, "step": 186640 }, { "epoch": 1.5102354559430373, "grad_norm": 0.23684726655483246, "learning_rate": 1.7181543995667682e-06, "loss": 0.0154, "step": 186650 }, { "epoch": 1.5103163686382395, "grad_norm": 0.5839003324508667, "learning_rate": 1.7176217249365939e-06, "loss": 0.0142, "step": 186660 }, { "epoch": 1.5103972813334412, "grad_norm": 0.35915884375572205, "learning_rate": 1.7170891157665265e-06, "loss": 0.03, "step": 186670 }, { "epoch": 1.5104781940286431, "grad_norm": 0.38894200325012207, "learning_rate": 1.716556572067188e-06, "loss": 0.0263, "step": 186680 }, { "epoch": 1.510559106723845, "grad_norm": 0.4697297215461731, "learning_rate": 1.7160240938491929e-06, "loss": 0.0204, "step": 186690 }, { "epoch": 1.5106400194190468, "grad_norm": 0.2950659990310669, "learning_rate": 1.715491681123167e-06, "loss": 0.0147, "step": 186700 }, { "epoch": 1.5107209321142487, "grad_norm": 0.2232351154088974, "learning_rate": 1.714959333899725e-06, "loss": 0.0194, "step": 186710 }, { "epoch": 1.5108018448094507, "grad_norm": 0.10045547038316727, "learning_rate": 1.7144270521894846e-06, "loss": 0.0182, "step": 186720 }, { "epoch": 1.5108827575046524, "grad_norm": 0.19923904538154602, "learning_rate": 1.71389483600306e-06, "loss": 0.0209, "step": 186730 }, { "epoch": 1.5109636701998543, "grad_norm": 0.4211335778236389, "learning_rate": 1.713362685351066e-06, "loss": 0.0213, "step": 186740 }, { "epoch": 1.5110445828950563, "grad_norm": 0.877693235874176, "learning_rate": 1.7128306002441147e-06, "loss": 0.0162, "step": 186750 }, { "epoch": 1.511125495590258, "grad_norm": 0.3609222173690796, "learning_rate": 1.7122985806928177e-06, "loss": 0.0201, "step": 186760 }, { "epoch": 1.51120640828546, "grad_norm": 0.6270076632499695, "learning_rate": 1.7117666267077849e-06, "loss": 0.0156, "step": 186770 }, { "epoch": 1.5112873209806619, "grad_norm": 0.5706462264060974, "learning_rate": 1.7112347382996248e-06, "loss": 0.0257, "step": 186780 }, { "epoch": 1.5113682336758636, "grad_norm": 0.6687204241752625, "learning_rate": 1.7107029154789445e-06, "loss": 0.0207, "step": 186790 }, { "epoch": 1.5114491463710658, "grad_norm": 0.38609778881073, "learning_rate": 1.7101711582563496e-06, "loss": 0.0323, "step": 186800 }, { "epoch": 1.5115300590662675, "grad_norm": 0.521278977394104, "learning_rate": 1.7096394666424488e-06, "loss": 0.0211, "step": 186810 }, { "epoch": 1.5116109717614694, "grad_norm": 0.3806343972682953, "learning_rate": 1.7091078406478389e-06, "loss": 0.0192, "step": 186820 }, { "epoch": 1.5116918844566714, "grad_norm": 0.3306022882461548, "learning_rate": 1.708576280283128e-06, "loss": 0.0182, "step": 186830 }, { "epoch": 1.511772797151873, "grad_norm": 0.2735305428504944, "learning_rate": 1.708044785558916e-06, "loss": 0.0162, "step": 186840 }, { "epoch": 1.511853709847075, "grad_norm": 0.48914992809295654, "learning_rate": 1.7075133564857983e-06, "loss": 0.0189, "step": 186850 }, { "epoch": 1.511934622542277, "grad_norm": 0.5464953184127808, "learning_rate": 1.706981993074377e-06, "loss": 0.0278, "step": 186860 }, { "epoch": 1.5120155352374787, "grad_norm": 0.5252869129180908, "learning_rate": 1.70645069533525e-06, "loss": 0.0168, "step": 186870 }, { "epoch": 1.5120964479326806, "grad_norm": 0.32625052332878113, "learning_rate": 1.7059194632790076e-06, "loss": 0.0161, "step": 186880 }, { "epoch": 1.5121773606278825, "grad_norm": 0.3678915202617645, "learning_rate": 1.7053882969162488e-06, "loss": 0.025, "step": 186890 }, { "epoch": 1.5122582733230843, "grad_norm": 0.35556337237358093, "learning_rate": 1.7048571962575655e-06, "loss": 0.0146, "step": 186900 }, { "epoch": 1.5123391860182864, "grad_norm": 0.3275807499885559, "learning_rate": 1.7043261613135486e-06, "loss": 0.0178, "step": 186910 }, { "epoch": 1.5124200987134881, "grad_norm": 0.47633206844329834, "learning_rate": 1.7037951920947894e-06, "loss": 0.0111, "step": 186920 }, { "epoch": 1.5125010114086899, "grad_norm": 0.4303562641143799, "learning_rate": 1.7032642886118766e-06, "loss": 0.0155, "step": 186930 }, { "epoch": 1.512581924103892, "grad_norm": 0.14215734601020813, "learning_rate": 1.7027334508753974e-06, "loss": 0.0189, "step": 186940 }, { "epoch": 1.5126628367990937, "grad_norm": 0.22395546734333038, "learning_rate": 1.7022026788959389e-06, "loss": 0.0164, "step": 186950 }, { "epoch": 1.5127437494942957, "grad_norm": 0.20277082920074463, "learning_rate": 1.701671972684084e-06, "loss": 0.0102, "step": 186960 }, { "epoch": 1.5128246621894976, "grad_norm": 0.1824791431427002, "learning_rate": 1.7011413322504222e-06, "loss": 0.0267, "step": 186970 }, { "epoch": 1.5129055748846993, "grad_norm": 0.4491855204105377, "learning_rate": 1.7006107576055308e-06, "loss": 0.0103, "step": 186980 }, { "epoch": 1.5129864875799013, "grad_norm": 0.2708001136779785, "learning_rate": 1.7000802487599905e-06, "loss": 0.0228, "step": 186990 }, { "epoch": 1.5130674002751032, "grad_norm": 0.2431158870458603, "learning_rate": 1.699549805724387e-06, "loss": 0.0133, "step": 187000 }, { "epoch": 1.513148312970305, "grad_norm": 0.6354551315307617, "learning_rate": 1.699019428509293e-06, "loss": 0.0136, "step": 187010 }, { "epoch": 1.5132292256655069, "grad_norm": 0.29617762565612793, "learning_rate": 1.6984891171252865e-06, "loss": 0.0242, "step": 187020 }, { "epoch": 1.5133101383607088, "grad_norm": 0.36230185627937317, "learning_rate": 1.6979588715829476e-06, "loss": 0.0102, "step": 187030 }, { "epoch": 1.5133910510559105, "grad_norm": 0.2926217019557953, "learning_rate": 1.6974286918928452e-06, "loss": 0.0181, "step": 187040 }, { "epoch": 1.5134719637511127, "grad_norm": 0.22212515771389008, "learning_rate": 1.6968985780655567e-06, "loss": 0.0299, "step": 187050 }, { "epoch": 1.5135528764463144, "grad_norm": 0.2654447853565216, "learning_rate": 1.696368530111655e-06, "loss": 0.0104, "step": 187060 }, { "epoch": 1.5136337891415161, "grad_norm": 0.49590203166007996, "learning_rate": 1.695838548041705e-06, "loss": 0.0202, "step": 187070 }, { "epoch": 1.5137147018367183, "grad_norm": 0.5258897542953491, "learning_rate": 1.695308631866282e-06, "loss": 0.032, "step": 187080 }, { "epoch": 1.51379561453192, "grad_norm": 0.20345014333724976, "learning_rate": 1.6947787815959528e-06, "loss": 0.0242, "step": 187090 }, { "epoch": 1.513876527227122, "grad_norm": 0.45390820503234863, "learning_rate": 1.69424899724128e-06, "loss": 0.0224, "step": 187100 }, { "epoch": 1.513957439922324, "grad_norm": 0.14408357441425323, "learning_rate": 1.6937192788128342e-06, "loss": 0.013, "step": 187110 }, { "epoch": 1.5140383526175256, "grad_norm": 0.19465823471546173, "learning_rate": 1.6931896263211773e-06, "loss": 0.0196, "step": 187120 }, { "epoch": 1.5141192653127276, "grad_norm": 0.7586768269538879, "learning_rate": 1.6926600397768727e-06, "loss": 0.0204, "step": 187130 }, { "epoch": 1.5142001780079295, "grad_norm": 0.5914139747619629, "learning_rate": 1.6921305191904807e-06, "loss": 0.0156, "step": 187140 }, { "epoch": 1.5142810907031312, "grad_norm": 0.5685479044914246, "learning_rate": 1.6916010645725628e-06, "loss": 0.0185, "step": 187150 }, { "epoch": 1.5143620033983332, "grad_norm": 0.3673056662082672, "learning_rate": 1.6910716759336777e-06, "loss": 0.0165, "step": 187160 }, { "epoch": 1.514442916093535, "grad_norm": 0.43736860156059265, "learning_rate": 1.6905423532843817e-06, "loss": 0.022, "step": 187170 }, { "epoch": 1.5145238287887368, "grad_norm": 0.4795878529548645, "learning_rate": 1.6900130966352306e-06, "loss": 0.0224, "step": 187180 }, { "epoch": 1.514604741483939, "grad_norm": 0.2520790696144104, "learning_rate": 1.6894839059967844e-06, "loss": 0.0231, "step": 187190 }, { "epoch": 1.5146856541791407, "grad_norm": 0.4556761384010315, "learning_rate": 1.6889547813795909e-06, "loss": 0.0185, "step": 187200 }, { "epoch": 1.5147665668743426, "grad_norm": 0.026350652799010277, "learning_rate": 1.6884257227942024e-06, "loss": 0.0133, "step": 187210 }, { "epoch": 1.5148474795695446, "grad_norm": 0.16636033356189728, "learning_rate": 1.687896730251175e-06, "loss": 0.026, "step": 187220 }, { "epoch": 1.5149283922647463, "grad_norm": 0.5333999991416931, "learning_rate": 1.6873678037610542e-06, "loss": 0.0211, "step": 187230 }, { "epoch": 1.5150093049599482, "grad_norm": 0.4498465955257416, "learning_rate": 1.6868389433343874e-06, "loss": 0.0121, "step": 187240 }, { "epoch": 1.5150902176551502, "grad_norm": 0.2611280679702759, "learning_rate": 1.6863101489817262e-06, "loss": 0.0172, "step": 187250 }, { "epoch": 1.515171130350352, "grad_norm": 0.303871750831604, "learning_rate": 1.6857814207136108e-06, "loss": 0.0267, "step": 187260 }, { "epoch": 1.5152520430455538, "grad_norm": 0.40650439262390137, "learning_rate": 1.6852527585405897e-06, "loss": 0.0211, "step": 187270 }, { "epoch": 1.5153329557407558, "grad_norm": 0.1540231704711914, "learning_rate": 1.684724162473207e-06, "loss": 0.0175, "step": 187280 }, { "epoch": 1.5154138684359575, "grad_norm": 0.1353926658630371, "learning_rate": 1.6841956325219978e-06, "loss": 0.0154, "step": 187290 }, { "epoch": 1.5154947811311594, "grad_norm": 0.5761480331420898, "learning_rate": 1.6836671686975087e-06, "loss": 0.025, "step": 187300 }, { "epoch": 1.5155756938263614, "grad_norm": 0.3442944288253784, "learning_rate": 1.683138771010277e-06, "loss": 0.0118, "step": 187310 }, { "epoch": 1.515656606521563, "grad_norm": 0.23891346156597137, "learning_rate": 1.6826104394708403e-06, "loss": 0.0174, "step": 187320 }, { "epoch": 1.5157375192167653, "grad_norm": 0.40370362997055054, "learning_rate": 1.6820821740897348e-06, "loss": 0.0164, "step": 187330 }, { "epoch": 1.515818431911967, "grad_norm": 0.4964565336704254, "learning_rate": 1.681553974877496e-06, "loss": 0.0114, "step": 187340 }, { "epoch": 1.515899344607169, "grad_norm": 0.5094736218452454, "learning_rate": 1.681025841844658e-06, "loss": 0.0321, "step": 187350 }, { "epoch": 1.5159802573023708, "grad_norm": 0.4648364186286926, "learning_rate": 1.6804977750017526e-06, "loss": 0.0188, "step": 187360 }, { "epoch": 1.5160611699975726, "grad_norm": 0.4945339560508728, "learning_rate": 1.6799697743593097e-06, "loss": 0.0188, "step": 187370 }, { "epoch": 1.5161420826927745, "grad_norm": 0.5444551706314087, "learning_rate": 1.6794418399278638e-06, "loss": 0.034, "step": 187380 }, { "epoch": 1.5162229953879764, "grad_norm": 0.4345456659793854, "learning_rate": 1.6789139717179387e-06, "loss": 0.0158, "step": 187390 }, { "epoch": 1.5163039080831782, "grad_norm": 0.19748233258724213, "learning_rate": 1.6783861697400622e-06, "loss": 0.0124, "step": 187400 }, { "epoch": 1.51638482077838, "grad_norm": 0.3487246036529541, "learning_rate": 1.6778584340047644e-06, "loss": 0.0187, "step": 187410 }, { "epoch": 1.516465733473582, "grad_norm": 0.10776074975728989, "learning_rate": 1.6773307645225651e-06, "loss": 0.0238, "step": 187420 }, { "epoch": 1.5165466461687838, "grad_norm": 0.22232194244861603, "learning_rate": 1.6768031613039871e-06, "loss": 0.0203, "step": 187430 }, { "epoch": 1.516627558863986, "grad_norm": 0.4280712902545929, "learning_rate": 1.6762756243595585e-06, "loss": 0.0202, "step": 187440 }, { "epoch": 1.5167084715591876, "grad_norm": 0.19480907917022705, "learning_rate": 1.6757481536997921e-06, "loss": 0.0247, "step": 187450 }, { "epoch": 1.5167893842543894, "grad_norm": 0.045069143176078796, "learning_rate": 1.675220749335213e-06, "loss": 0.0178, "step": 187460 }, { "epoch": 1.5168702969495915, "grad_norm": 0.4611981213092804, "learning_rate": 1.6746934112763385e-06, "loss": 0.0283, "step": 187470 }, { "epoch": 1.5169512096447932, "grad_norm": 0.2619459927082062, "learning_rate": 1.6741661395336806e-06, "loss": 0.0292, "step": 187480 }, { "epoch": 1.5170321223399952, "grad_norm": 0.3385044038295746, "learning_rate": 1.6736389341177595e-06, "loss": 0.0196, "step": 187490 }, { "epoch": 1.5171130350351971, "grad_norm": 0.5064250230789185, "learning_rate": 1.6731117950390878e-06, "loss": 0.0138, "step": 187500 }, { "epoch": 1.5171939477303988, "grad_norm": 0.21351683139801025, "learning_rate": 1.6725847223081776e-06, "loss": 0.014, "step": 187510 }, { "epoch": 1.5172748604256008, "grad_norm": 0.36788296699523926, "learning_rate": 1.6720577159355412e-06, "loss": 0.0184, "step": 187520 }, { "epoch": 1.5173557731208027, "grad_norm": 0.392390638589859, "learning_rate": 1.671530775931688e-06, "loss": 0.0142, "step": 187530 }, { "epoch": 1.5174366858160044, "grad_norm": 0.3080621063709259, "learning_rate": 1.6710039023071262e-06, "loss": 0.0191, "step": 187540 }, { "epoch": 1.5175175985112064, "grad_norm": 0.32690542936325073, "learning_rate": 1.6704770950723643e-06, "loss": 0.0262, "step": 187550 }, { "epoch": 1.5175985112064083, "grad_norm": 0.4038143754005432, "learning_rate": 1.6699503542379074e-06, "loss": 0.0368, "step": 187560 }, { "epoch": 1.51767942390161, "grad_norm": 0.7911895513534546, "learning_rate": 1.669423679814261e-06, "loss": 0.0215, "step": 187570 }, { "epoch": 1.5177603365968122, "grad_norm": 0.1311189830303192, "learning_rate": 1.668897071811928e-06, "loss": 0.0181, "step": 187580 }, { "epoch": 1.517841249292014, "grad_norm": 0.3386297821998596, "learning_rate": 1.6683705302414089e-06, "loss": 0.0145, "step": 187590 }, { "epoch": 1.5179221619872156, "grad_norm": 0.2854008972644806, "learning_rate": 1.6678440551132091e-06, "loss": 0.0149, "step": 187600 }, { "epoch": 1.5180030746824178, "grad_norm": 0.16334019601345062, "learning_rate": 1.6673176464378243e-06, "loss": 0.0145, "step": 187610 }, { "epoch": 1.5180839873776195, "grad_norm": 0.37214943766593933, "learning_rate": 1.666791304225751e-06, "loss": 0.02, "step": 187620 }, { "epoch": 1.5181649000728215, "grad_norm": 0.2896166741847992, "learning_rate": 1.6662650284874927e-06, "loss": 0.0273, "step": 187630 }, { "epoch": 1.5182458127680234, "grad_norm": 0.18286485970020294, "learning_rate": 1.6657388192335377e-06, "loss": 0.0123, "step": 187640 }, { "epoch": 1.5183267254632251, "grad_norm": 0.394046813249588, "learning_rate": 1.6652126764743825e-06, "loss": 0.0297, "step": 187650 }, { "epoch": 1.518407638158427, "grad_norm": 0.7677273750305176, "learning_rate": 1.6646866002205231e-06, "loss": 0.024, "step": 187660 }, { "epoch": 1.518488550853629, "grad_norm": 0.2257331907749176, "learning_rate": 1.6641605904824448e-06, "loss": 0.0167, "step": 187670 }, { "epoch": 1.5185694635488307, "grad_norm": 0.34549784660339355, "learning_rate": 1.6636346472706434e-06, "loss": 0.0242, "step": 187680 }, { "epoch": 1.5186503762440327, "grad_norm": 0.22151349484920502, "learning_rate": 1.6631087705956057e-06, "loss": 0.0157, "step": 187690 }, { "epoch": 1.5187312889392346, "grad_norm": 0.30683740973472595, "learning_rate": 1.6625829604678184e-06, "loss": 0.018, "step": 187700 }, { "epoch": 1.5188122016344363, "grad_norm": 0.3495403230190277, "learning_rate": 1.662057216897769e-06, "loss": 0.0192, "step": 187710 }, { "epoch": 1.5188931143296385, "grad_norm": 0.5917994379997253, "learning_rate": 1.6615315398959413e-06, "loss": 0.0161, "step": 187720 }, { "epoch": 1.5189740270248402, "grad_norm": 0.2780010998249054, "learning_rate": 1.6610059294728198e-06, "loss": 0.0115, "step": 187730 }, { "epoch": 1.5190549397200421, "grad_norm": 0.48507896065711975, "learning_rate": 1.6604803856388857e-06, "loss": 0.0238, "step": 187740 }, { "epoch": 1.519135852415244, "grad_norm": 0.1838018298149109, "learning_rate": 1.6599549084046201e-06, "loss": 0.0214, "step": 187750 }, { "epoch": 1.5192167651104458, "grad_norm": 0.18673963844776154, "learning_rate": 1.6594294977805026e-06, "loss": 0.0254, "step": 187760 }, { "epoch": 1.5192976778056477, "grad_norm": 0.2628438174724579, "learning_rate": 1.6589041537770117e-06, "loss": 0.0165, "step": 187770 }, { "epoch": 1.5193785905008497, "grad_norm": 0.26406484842300415, "learning_rate": 1.6583788764046244e-06, "loss": 0.0265, "step": 187780 }, { "epoch": 1.5194595031960514, "grad_norm": 0.42385774850845337, "learning_rate": 1.657853665673816e-06, "loss": 0.0294, "step": 187790 }, { "epoch": 1.5195404158912533, "grad_norm": 0.2075641006231308, "learning_rate": 1.6573285215950601e-06, "loss": 0.0135, "step": 187800 }, { "epoch": 1.5196213285864553, "grad_norm": 0.2642565667629242, "learning_rate": 1.656803444178829e-06, "loss": 0.0219, "step": 187810 }, { "epoch": 1.519702241281657, "grad_norm": 0.35191383957862854, "learning_rate": 1.656278433435598e-06, "loss": 0.0227, "step": 187820 }, { "epoch": 1.519783153976859, "grad_norm": 0.6034656167030334, "learning_rate": 1.6557534893758336e-06, "loss": 0.0159, "step": 187830 }, { "epoch": 1.5198640666720609, "grad_norm": 1.9740381240844727, "learning_rate": 1.6552286120100042e-06, "loss": 0.0148, "step": 187840 }, { "epoch": 1.5199449793672626, "grad_norm": 0.6197444200515747, "learning_rate": 1.6547038013485822e-06, "loss": 0.0235, "step": 187850 }, { "epoch": 1.5200258920624647, "grad_norm": 0.4898495674133301, "learning_rate": 1.6541790574020272e-06, "loss": 0.0158, "step": 187860 }, { "epoch": 1.5201068047576665, "grad_norm": 0.3272487223148346, "learning_rate": 1.6536543801808096e-06, "loss": 0.0237, "step": 187870 }, { "epoch": 1.5201877174528684, "grad_norm": 0.3277791142463684, "learning_rate": 1.6531297696953907e-06, "loss": 0.0193, "step": 187880 }, { "epoch": 1.5202686301480703, "grad_norm": 0.6612391471862793, "learning_rate": 1.6526052259562331e-06, "loss": 0.0241, "step": 187890 }, { "epoch": 1.520349542843272, "grad_norm": 0.2630116641521454, "learning_rate": 1.6520807489737972e-06, "loss": 0.0135, "step": 187900 }, { "epoch": 1.520430455538474, "grad_norm": 0.083973228931427, "learning_rate": 1.6515563387585432e-06, "loss": 0.0171, "step": 187910 }, { "epoch": 1.520511368233676, "grad_norm": 0.17625808715820312, "learning_rate": 1.6510319953209296e-06, "loss": 0.0171, "step": 187920 }, { "epoch": 1.5205922809288777, "grad_norm": 0.4392276406288147, "learning_rate": 1.6505077186714125e-06, "loss": 0.021, "step": 187930 }, { "epoch": 1.5206731936240796, "grad_norm": 0.18934331834316254, "learning_rate": 1.649983508820448e-06, "loss": 0.0157, "step": 187940 }, { "epoch": 1.5207541063192815, "grad_norm": 0.30124804377555847, "learning_rate": 1.6494593657784897e-06, "loss": 0.0232, "step": 187950 }, { "epoch": 1.5208350190144833, "grad_norm": 0.04600948467850685, "learning_rate": 1.6489352895559918e-06, "loss": 0.0105, "step": 187960 }, { "epoch": 1.5209159317096852, "grad_norm": 0.23916566371917725, "learning_rate": 1.6484112801634045e-06, "loss": 0.0302, "step": 187970 }, { "epoch": 1.5209968444048871, "grad_norm": 0.5218417644500732, "learning_rate": 1.6478873376111792e-06, "loss": 0.0256, "step": 187980 }, { "epoch": 1.5210777571000889, "grad_norm": 0.4471633732318878, "learning_rate": 1.647363461909764e-06, "loss": 0.0255, "step": 187990 }, { "epoch": 1.521158669795291, "grad_norm": 0.4520888030529022, "learning_rate": 1.6468396530696052e-06, "loss": 0.0171, "step": 188000 }, { "epoch": 1.5212395824904927, "grad_norm": 0.04511994123458862, "learning_rate": 1.646315911101154e-06, "loss": 0.0206, "step": 188010 }, { "epoch": 1.5213204951856947, "grad_norm": 0.03871552646160126, "learning_rate": 1.6457922360148504e-06, "loss": 0.0275, "step": 188020 }, { "epoch": 1.5214014078808966, "grad_norm": 0.23375815153121948, "learning_rate": 1.6452686278211383e-06, "loss": 0.0134, "step": 188030 }, { "epoch": 1.5214823205760983, "grad_norm": 0.2623630464076996, "learning_rate": 1.6447450865304643e-06, "loss": 0.0164, "step": 188040 }, { "epoch": 1.5215632332713003, "grad_norm": 0.31999489665031433, "learning_rate": 1.6442216121532644e-06, "loss": 0.0275, "step": 188050 }, { "epoch": 1.5216441459665022, "grad_norm": 0.3185531497001648, "learning_rate": 1.6436982046999784e-06, "loss": 0.0211, "step": 188060 }, { "epoch": 1.521725058661704, "grad_norm": 0.30349117517471313, "learning_rate": 1.643174864181048e-06, "loss": 0.0272, "step": 188070 }, { "epoch": 1.5218059713569059, "grad_norm": 0.245257169008255, "learning_rate": 1.6426515906069085e-06, "loss": 0.0126, "step": 188080 }, { "epoch": 1.5218868840521078, "grad_norm": 0.2686144709587097, "learning_rate": 1.6421283839879954e-06, "loss": 0.018, "step": 188090 }, { "epoch": 1.5219677967473095, "grad_norm": 0.3607664704322815, "learning_rate": 1.641605244334743e-06, "loss": 0.0171, "step": 188100 }, { "epoch": 1.5220487094425117, "grad_norm": 0.3624226450920105, "learning_rate": 1.6410821716575837e-06, "loss": 0.014, "step": 188110 }, { "epoch": 1.5221296221377134, "grad_norm": 0.3865061402320862, "learning_rate": 1.6405591659669502e-06, "loss": 0.0254, "step": 188120 }, { "epoch": 1.5222105348329151, "grad_norm": 0.5455400943756104, "learning_rate": 1.6400362272732712e-06, "loss": 0.019, "step": 188130 }, { "epoch": 1.5222914475281173, "grad_norm": 0.3335859775543213, "learning_rate": 1.6395133555869769e-06, "loss": 0.0082, "step": 188140 }, { "epoch": 1.522372360223319, "grad_norm": 0.4677444398403168, "learning_rate": 1.6389905509184945e-06, "loss": 0.0171, "step": 188150 }, { "epoch": 1.522453272918521, "grad_norm": 0.2301301658153534, "learning_rate": 1.63846781327825e-06, "loss": 0.0307, "step": 188160 }, { "epoch": 1.522534185613723, "grad_norm": 0.4226117432117462, "learning_rate": 1.6379451426766684e-06, "loss": 0.0342, "step": 188170 }, { "epoch": 1.5226150983089246, "grad_norm": 0.3976205289363861, "learning_rate": 1.6374225391241732e-06, "loss": 0.0169, "step": 188180 }, { "epoch": 1.5226960110041265, "grad_norm": 0.6670793890953064, "learning_rate": 1.636900002631187e-06, "loss": 0.0345, "step": 188190 }, { "epoch": 1.5227769236993285, "grad_norm": 0.2596950829029083, "learning_rate": 1.6363775332081304e-06, "loss": 0.0103, "step": 188200 }, { "epoch": 1.5228578363945302, "grad_norm": 0.4205598533153534, "learning_rate": 1.635855130865423e-06, "loss": 0.0279, "step": 188210 }, { "epoch": 1.5229387490897321, "grad_norm": 0.3062201738357544, "learning_rate": 1.6353327956134813e-06, "loss": 0.0226, "step": 188220 }, { "epoch": 1.523019661784934, "grad_norm": 0.36635297536849976, "learning_rate": 1.6348105274627252e-06, "loss": 0.0261, "step": 188230 }, { "epoch": 1.5231005744801358, "grad_norm": 0.5188004374504089, "learning_rate": 1.6342883264235709e-06, "loss": 0.028, "step": 188240 }, { "epoch": 1.523181487175338, "grad_norm": 0.3670632839202881, "learning_rate": 1.6337661925064273e-06, "loss": 0.0206, "step": 188250 }, { "epoch": 1.5232623998705397, "grad_norm": 0.5110785365104675, "learning_rate": 1.6332441257217118e-06, "loss": 0.0181, "step": 188260 }, { "epoch": 1.5233433125657414, "grad_norm": 0.3314979672431946, "learning_rate": 1.632722126079837e-06, "loss": 0.0158, "step": 188270 }, { "epoch": 1.5234242252609436, "grad_norm": 0.4785538911819458, "learning_rate": 1.632200193591207e-06, "loss": 0.0234, "step": 188280 }, { "epoch": 1.5235051379561453, "grad_norm": 0.28521928191185, "learning_rate": 1.6316783282662364e-06, "loss": 0.0117, "step": 188290 }, { "epoch": 1.5235860506513472, "grad_norm": 0.3405732810497284, "learning_rate": 1.6311565301153303e-06, "loss": 0.0325, "step": 188300 }, { "epoch": 1.5236669633465492, "grad_norm": 0.3538096249103546, "learning_rate": 1.6306347991488953e-06, "loss": 0.0088, "step": 188310 }, { "epoch": 1.5237478760417509, "grad_norm": 0.33488258719444275, "learning_rate": 1.6301131353773364e-06, "loss": 0.025, "step": 188320 }, { "epoch": 1.5238287887369528, "grad_norm": 0.28302231431007385, "learning_rate": 1.6295915388110567e-06, "loss": 0.0202, "step": 188330 }, { "epoch": 1.5239097014321548, "grad_norm": 0.6478462815284729, "learning_rate": 1.629070009460459e-06, "loss": 0.0339, "step": 188340 }, { "epoch": 1.5239906141273565, "grad_norm": 0.478916198015213, "learning_rate": 1.628548547335943e-06, "loss": 0.014, "step": 188350 }, { "epoch": 1.5240715268225584, "grad_norm": 0.14172731339931488, "learning_rate": 1.6280271524479086e-06, "loss": 0.0188, "step": 188360 }, { "epoch": 1.5241524395177604, "grad_norm": 0.5463016033172607, "learning_rate": 1.6275058248067549e-06, "loss": 0.012, "step": 188370 }, { "epoch": 1.524233352212962, "grad_norm": 0.35144537687301636, "learning_rate": 1.6269845644228771e-06, "loss": 0.0162, "step": 188380 }, { "epoch": 1.5243142649081642, "grad_norm": 0.5531435012817383, "learning_rate": 1.626463371306672e-06, "loss": 0.0255, "step": 188390 }, { "epoch": 1.524395177603366, "grad_norm": 0.2590913772583008, "learning_rate": 1.6259422454685325e-06, "loss": 0.018, "step": 188400 }, { "epoch": 1.524476090298568, "grad_norm": 0.30359771847724915, "learning_rate": 1.6254211869188507e-06, "loss": 0.0172, "step": 188410 }, { "epoch": 1.5245570029937698, "grad_norm": 0.30751273036003113, "learning_rate": 1.624900195668021e-06, "loss": 0.0159, "step": 188420 }, { "epoch": 1.5246379156889716, "grad_norm": 0.37173864245414734, "learning_rate": 1.6243792717264334e-06, "loss": 0.0195, "step": 188430 }, { "epoch": 1.5247188283841735, "grad_norm": 0.5294756889343262, "learning_rate": 1.6238584151044718e-06, "loss": 0.0169, "step": 188440 }, { "epoch": 1.5247997410793754, "grad_norm": 0.3897280991077423, "learning_rate": 1.6233376258125278e-06, "loss": 0.0268, "step": 188450 }, { "epoch": 1.5248806537745772, "grad_norm": 0.5961251854896545, "learning_rate": 1.6228169038609886e-06, "loss": 0.0211, "step": 188460 }, { "epoch": 1.524961566469779, "grad_norm": 0.5883966088294983, "learning_rate": 1.6222962492602335e-06, "loss": 0.0203, "step": 188470 }, { "epoch": 1.525042479164981, "grad_norm": 0.02061976119875908, "learning_rate": 1.6217756620206503e-06, "loss": 0.0248, "step": 188480 }, { "epoch": 1.5251233918601828, "grad_norm": 0.4501206576824188, "learning_rate": 1.6212551421526196e-06, "loss": 0.0265, "step": 188490 }, { "epoch": 1.5252043045553847, "grad_norm": 0.2999036908149719, "learning_rate": 1.6207346896665228e-06, "loss": 0.0211, "step": 188500 }, { "epoch": 1.5252852172505866, "grad_norm": 0.0031564582604914904, "learning_rate": 1.620214304572738e-06, "loss": 0.015, "step": 188510 }, { "epoch": 1.5253661299457884, "grad_norm": 0.4231225848197937, "learning_rate": 1.6196939868816448e-06, "loss": 0.0123, "step": 188520 }, { "epoch": 1.5254470426409905, "grad_norm": 0.18365438282489777, "learning_rate": 1.619173736603618e-06, "loss": 0.0347, "step": 188530 }, { "epoch": 1.5255279553361922, "grad_norm": 0.42557936906814575, "learning_rate": 1.6186535537490344e-06, "loss": 0.0178, "step": 188540 }, { "epoch": 1.5256088680313942, "grad_norm": 0.31659239530563354, "learning_rate": 1.6181334383282672e-06, "loss": 0.0113, "step": 188550 }, { "epoch": 1.5256897807265961, "grad_norm": 0.09168604761362076, "learning_rate": 1.6176133903516888e-06, "loss": 0.0189, "step": 188560 }, { "epoch": 1.5257706934217978, "grad_norm": 0.2063843011856079, "learning_rate": 1.617093409829671e-06, "loss": 0.0194, "step": 188570 }, { "epoch": 1.5258516061169998, "grad_norm": 0.627435028553009, "learning_rate": 1.6165734967725837e-06, "loss": 0.015, "step": 188580 }, { "epoch": 1.5259325188122017, "grad_norm": 0.4705042243003845, "learning_rate": 1.6160536511907948e-06, "loss": 0.0162, "step": 188590 }, { "epoch": 1.5260134315074034, "grad_norm": 0.34738120436668396, "learning_rate": 1.6155338730946724e-06, "loss": 0.0302, "step": 188600 }, { "epoch": 1.5260943442026054, "grad_norm": 0.06969581544399261, "learning_rate": 1.6150141624945803e-06, "loss": 0.0191, "step": 188610 }, { "epoch": 1.5261752568978073, "grad_norm": 0.6004441976547241, "learning_rate": 1.6144945194008877e-06, "loss": 0.0163, "step": 188620 }, { "epoch": 1.526256169593009, "grad_norm": 0.23648318648338318, "learning_rate": 1.6139749438239521e-06, "loss": 0.0092, "step": 188630 }, { "epoch": 1.526337082288211, "grad_norm": 0.39307427406311035, "learning_rate": 1.6134554357741389e-06, "loss": 0.0173, "step": 188640 }, { "epoch": 1.526417994983413, "grad_norm": 0.2958524525165558, "learning_rate": 1.6129359952618101e-06, "loss": 0.0095, "step": 188650 }, { "epoch": 1.5264989076786146, "grad_norm": 0.3245265483856201, "learning_rate": 1.6124166222973187e-06, "loss": 0.0201, "step": 188660 }, { "epoch": 1.5265798203738168, "grad_norm": 0.3852074146270752, "learning_rate": 1.6118973168910278e-06, "loss": 0.0214, "step": 188670 }, { "epoch": 1.5266607330690185, "grad_norm": 0.4232543706893921, "learning_rate": 1.6113780790532946e-06, "loss": 0.0135, "step": 188680 }, { "epoch": 1.5267416457642204, "grad_norm": 0.4537072777748108, "learning_rate": 1.6108589087944681e-06, "loss": 0.0197, "step": 188690 }, { "epoch": 1.5268225584594224, "grad_norm": 0.6042889952659607, "learning_rate": 1.6103398061249075e-06, "loss": 0.0159, "step": 188700 }, { "epoch": 1.526903471154624, "grad_norm": 0.3267030119895935, "learning_rate": 1.609820771054964e-06, "loss": 0.0177, "step": 188710 }, { "epoch": 1.526984383849826, "grad_norm": 0.43787533044815063, "learning_rate": 1.6093018035949882e-06, "loss": 0.0147, "step": 188720 }, { "epoch": 1.527065296545028, "grad_norm": 0.3526194095611572, "learning_rate": 1.6087829037553292e-06, "loss": 0.0217, "step": 188730 }, { "epoch": 1.5271462092402297, "grad_norm": 0.4551090896129608, "learning_rate": 1.6082640715463366e-06, "loss": 0.0193, "step": 188740 }, { "epoch": 1.5272271219354316, "grad_norm": 0.3090839684009552, "learning_rate": 1.6077453069783566e-06, "loss": 0.023, "step": 188750 }, { "epoch": 1.5273080346306336, "grad_norm": 0.3867185711860657, "learning_rate": 1.6072266100617352e-06, "loss": 0.0231, "step": 188760 }, { "epoch": 1.5273889473258353, "grad_norm": 0.7061505317687988, "learning_rate": 1.6067079808068147e-06, "loss": 0.0191, "step": 188770 }, { "epoch": 1.5274698600210375, "grad_norm": 0.26191675662994385, "learning_rate": 1.6061894192239435e-06, "loss": 0.0112, "step": 188780 }, { "epoch": 1.5275507727162392, "grad_norm": 0.32457441091537476, "learning_rate": 1.6056709253234582e-06, "loss": 0.0098, "step": 188790 }, { "epoch": 1.527631685411441, "grad_norm": 0.5978029370307922, "learning_rate": 1.6051524991156986e-06, "loss": 0.0172, "step": 188800 }, { "epoch": 1.527712598106643, "grad_norm": 0.1822155863046646, "learning_rate": 1.604634140611009e-06, "loss": 0.0236, "step": 188810 }, { "epoch": 1.5277935108018448, "grad_norm": 0.5613092184066772, "learning_rate": 1.6041158498197219e-06, "loss": 0.025, "step": 188820 }, { "epoch": 1.5278744234970467, "grad_norm": 0.35773536562919617, "learning_rate": 1.603597626752173e-06, "loss": 0.0288, "step": 188830 }, { "epoch": 1.5279553361922487, "grad_norm": 0.3978824019432068, "learning_rate": 1.6030794714187031e-06, "loss": 0.0181, "step": 188840 }, { "epoch": 1.5280362488874504, "grad_norm": 0.4154171347618103, "learning_rate": 1.6025613838296384e-06, "loss": 0.0273, "step": 188850 }, { "epoch": 1.5281171615826523, "grad_norm": 0.4595109522342682, "learning_rate": 1.6020433639953164e-06, "loss": 0.0191, "step": 188860 }, { "epoch": 1.5281980742778543, "grad_norm": 0.1903463453054428, "learning_rate": 1.6015254119260677e-06, "loss": 0.0189, "step": 188870 }, { "epoch": 1.528278986973056, "grad_norm": 0.5042871832847595, "learning_rate": 1.6010075276322168e-06, "loss": 0.0187, "step": 188880 }, { "epoch": 1.528359899668258, "grad_norm": 0.22359693050384521, "learning_rate": 1.600489711124097e-06, "loss": 0.0152, "step": 188890 }, { "epoch": 1.5284408123634599, "grad_norm": 0.5573954582214355, "learning_rate": 1.5999719624120335e-06, "loss": 0.0252, "step": 188900 }, { "epoch": 1.5285217250586616, "grad_norm": 0.08933082967996597, "learning_rate": 1.5994542815063507e-06, "loss": 0.0189, "step": 188910 }, { "epoch": 1.5286026377538637, "grad_norm": 0.519314706325531, "learning_rate": 1.598936668417374e-06, "loss": 0.0202, "step": 188920 }, { "epoch": 1.5286835504490655, "grad_norm": 0.05345451459288597, "learning_rate": 1.5984191231554253e-06, "loss": 0.0146, "step": 188930 }, { "epoch": 1.5287644631442672, "grad_norm": 0.4488089382648468, "learning_rate": 1.597901645730826e-06, "loss": 0.0203, "step": 188940 }, { "epoch": 1.5288453758394693, "grad_norm": 0.5139719843864441, "learning_rate": 1.597384236153896e-06, "loss": 0.0142, "step": 188950 }, { "epoch": 1.528926288534671, "grad_norm": 0.298163503408432, "learning_rate": 1.596866894434953e-06, "loss": 0.022, "step": 188960 }, { "epoch": 1.529007201229873, "grad_norm": 0.31839340925216675, "learning_rate": 1.5963496205843188e-06, "loss": 0.0266, "step": 188970 }, { "epoch": 1.529088113925075, "grad_norm": 0.32186880707740784, "learning_rate": 1.595832414612304e-06, "loss": 0.0278, "step": 188980 }, { "epoch": 1.5291690266202767, "grad_norm": 0.33564940094947815, "learning_rate": 1.5953152765292234e-06, "loss": 0.0194, "step": 188990 }, { "epoch": 1.5292499393154786, "grad_norm": 0.4560157060623169, "learning_rate": 1.594798206345396e-06, "loss": 0.0132, "step": 189000 }, { "epoch": 1.5293308520106805, "grad_norm": 0.18390649557113647, "learning_rate": 1.5942812040711276e-06, "loss": 0.0128, "step": 189010 }, { "epoch": 1.5294117647058822, "grad_norm": 0.2903648614883423, "learning_rate": 1.5937642697167288e-06, "loss": 0.017, "step": 189020 }, { "epoch": 1.5294926774010842, "grad_norm": 0.7481499314308167, "learning_rate": 1.5932474032925143e-06, "loss": 0.0202, "step": 189030 }, { "epoch": 1.5295735900962861, "grad_norm": 0.5534464120864868, "learning_rate": 1.5927306048087855e-06, "loss": 0.0205, "step": 189040 }, { "epoch": 1.5296545027914878, "grad_norm": 0.2799032926559448, "learning_rate": 1.5922138742758531e-06, "loss": 0.0138, "step": 189050 }, { "epoch": 1.52973541548669, "grad_norm": 0.8495020866394043, "learning_rate": 1.591697211704022e-06, "loss": 0.0166, "step": 189060 }, { "epoch": 1.5298163281818917, "grad_norm": 0.5225039720535278, "learning_rate": 1.5911806171035921e-06, "loss": 0.0182, "step": 189070 }, { "epoch": 1.5298972408770937, "grad_norm": 0.3601885437965393, "learning_rate": 1.5906640904848697e-06, "loss": 0.0122, "step": 189080 }, { "epoch": 1.5299781535722956, "grad_norm": 0.27090755105018616, "learning_rate": 1.5901476318581565e-06, "loss": 0.0163, "step": 189090 }, { "epoch": 1.5300590662674973, "grad_norm": 0.34966135025024414, "learning_rate": 1.5896312412337467e-06, "loss": 0.0148, "step": 189100 }, { "epoch": 1.5301399789626993, "grad_norm": 0.3795103132724762, "learning_rate": 1.589114918621944e-06, "loss": 0.0249, "step": 189110 }, { "epoch": 1.5302208916579012, "grad_norm": 0.18570739030838013, "learning_rate": 1.5885986640330435e-06, "loss": 0.0256, "step": 189120 }, { "epoch": 1.530301804353103, "grad_norm": 0.6647528409957886, "learning_rate": 1.5880824774773411e-06, "loss": 0.0281, "step": 189130 }, { "epoch": 1.5303827170483049, "grad_norm": 0.16644702851772308, "learning_rate": 1.5875663589651307e-06, "loss": 0.0214, "step": 189140 }, { "epoch": 1.5304636297435068, "grad_norm": 0.421263724565506, "learning_rate": 1.5870503085067052e-06, "loss": 0.0183, "step": 189150 }, { "epoch": 1.5305445424387085, "grad_norm": 0.23616071045398712, "learning_rate": 1.5865343261123561e-06, "loss": 0.0218, "step": 189160 }, { "epoch": 1.5306254551339105, "grad_norm": 0.4369950592517853, "learning_rate": 1.5860184117923738e-06, "loss": 0.0157, "step": 189170 }, { "epoch": 1.5307063678291124, "grad_norm": 0.41987836360931396, "learning_rate": 1.5855025655570455e-06, "loss": 0.0186, "step": 189180 }, { "epoch": 1.5307872805243141, "grad_norm": 0.43303173780441284, "learning_rate": 1.5849867874166637e-06, "loss": 0.0196, "step": 189190 }, { "epoch": 1.5308681932195163, "grad_norm": 0.49972543120384216, "learning_rate": 1.5844710773815092e-06, "loss": 0.0151, "step": 189200 }, { "epoch": 1.530949105914718, "grad_norm": 0.006122025661170483, "learning_rate": 1.583955435461867e-06, "loss": 0.0236, "step": 189210 }, { "epoch": 1.53103001860992, "grad_norm": 0.0661730095744133, "learning_rate": 1.5834398616680246e-06, "loss": 0.0151, "step": 189220 }, { "epoch": 1.5311109313051219, "grad_norm": 0.4396664500236511, "learning_rate": 1.582924356010261e-06, "loss": 0.0114, "step": 189230 }, { "epoch": 1.5311918440003236, "grad_norm": 0.20872275531291962, "learning_rate": 1.582408918498855e-06, "loss": 0.0191, "step": 189240 }, { "epoch": 1.5312727566955255, "grad_norm": 0.0906100794672966, "learning_rate": 1.5818935491440918e-06, "loss": 0.015, "step": 189250 }, { "epoch": 1.5313536693907275, "grad_norm": 0.4143432378768921, "learning_rate": 1.5813782479562424e-06, "loss": 0.024, "step": 189260 }, { "epoch": 1.5314345820859292, "grad_norm": 0.3838622570037842, "learning_rate": 1.5808630149455883e-06, "loss": 0.0179, "step": 189270 }, { "epoch": 1.5315154947811311, "grad_norm": 0.29582473635673523, "learning_rate": 1.580347850122405e-06, "loss": 0.0139, "step": 189280 }, { "epoch": 1.531596407476333, "grad_norm": 0.35763993859291077, "learning_rate": 1.5798327534969614e-06, "loss": 0.0206, "step": 189290 }, { "epoch": 1.5316773201715348, "grad_norm": 0.02870604395866394, "learning_rate": 1.579317725079535e-06, "loss": 0.0198, "step": 189300 }, { "epoch": 1.531758232866737, "grad_norm": 0.0639684796333313, "learning_rate": 1.578802764880395e-06, "loss": 0.0108, "step": 189310 }, { "epoch": 1.5318391455619387, "grad_norm": 0.36960160732269287, "learning_rate": 1.5782878729098117e-06, "loss": 0.0193, "step": 189320 }, { "epoch": 1.5319200582571404, "grad_norm": 0.3594573140144348, "learning_rate": 1.577773049178053e-06, "loss": 0.0222, "step": 189330 }, { "epoch": 1.5320009709523426, "grad_norm": 0.4871216118335724, "learning_rate": 1.5772582936953862e-06, "loss": 0.0128, "step": 189340 }, { "epoch": 1.5320818836475443, "grad_norm": 0.49644437432289124, "learning_rate": 1.576743606472077e-06, "loss": 0.0199, "step": 189350 }, { "epoch": 1.5321627963427462, "grad_norm": 0.38165998458862305, "learning_rate": 1.5762289875183895e-06, "loss": 0.023, "step": 189360 }, { "epoch": 1.5322437090379482, "grad_norm": 0.33460891246795654, "learning_rate": 1.5757144368445876e-06, "loss": 0.0207, "step": 189370 }, { "epoch": 1.5323246217331499, "grad_norm": 0.4597364664077759, "learning_rate": 1.5751999544609314e-06, "loss": 0.0172, "step": 189380 }, { "epoch": 1.5324055344283518, "grad_norm": 0.2751327157020569, "learning_rate": 1.5746855403776829e-06, "loss": 0.0219, "step": 189390 }, { "epoch": 1.5324864471235538, "grad_norm": 0.5973648428916931, "learning_rate": 1.5741711946050981e-06, "loss": 0.0095, "step": 189400 }, { "epoch": 1.5325673598187555, "grad_norm": 0.1604260802268982, "learning_rate": 1.5736569171534393e-06, "loss": 0.0254, "step": 189410 }, { "epoch": 1.5326482725139574, "grad_norm": 0.37946265935897827, "learning_rate": 1.5731427080329593e-06, "loss": 0.0216, "step": 189420 }, { "epoch": 1.5327291852091594, "grad_norm": 0.052017033100128174, "learning_rate": 1.5726285672539116e-06, "loss": 0.019, "step": 189430 }, { "epoch": 1.532810097904361, "grad_norm": 0.49092450737953186, "learning_rate": 1.5721144948265549e-06, "loss": 0.0341, "step": 189440 }, { "epoch": 1.5328910105995632, "grad_norm": 0.32807686924934387, "learning_rate": 1.5716004907611343e-06, "loss": 0.0176, "step": 189450 }, { "epoch": 1.532971923294765, "grad_norm": 0.635482132434845, "learning_rate": 1.5710865550679061e-06, "loss": 0.0227, "step": 189460 }, { "epoch": 1.5330528359899667, "grad_norm": 0.49979162216186523, "learning_rate": 1.5705726877571197e-06, "loss": 0.0182, "step": 189470 }, { "epoch": 1.5331337486851688, "grad_norm": 0.3495912253856659, "learning_rate": 1.5700588888390179e-06, "loss": 0.0141, "step": 189480 }, { "epoch": 1.5332146613803705, "grad_norm": 0.3706452250480652, "learning_rate": 1.569545158323852e-06, "loss": 0.0191, "step": 189490 }, { "epoch": 1.5332955740755725, "grad_norm": 0.31639164686203003, "learning_rate": 1.5690314962218661e-06, "loss": 0.014, "step": 189500 }, { "epoch": 1.5333764867707744, "grad_norm": 0.09950955212116241, "learning_rate": 1.568517902543304e-06, "loss": 0.0135, "step": 189510 }, { "epoch": 1.5334573994659761, "grad_norm": 0.5155535936355591, "learning_rate": 1.5680043772984078e-06, "loss": 0.0284, "step": 189520 }, { "epoch": 1.533538312161178, "grad_norm": 0.21830378472805023, "learning_rate": 1.5674909204974192e-06, "loss": 0.0141, "step": 189530 }, { "epoch": 1.53361922485638, "grad_norm": 0.2588229775428772, "learning_rate": 1.5669775321505781e-06, "loss": 0.0139, "step": 189540 }, { "epoch": 1.5337001375515817, "grad_norm": 0.8389936089515686, "learning_rate": 1.5664642122681223e-06, "loss": 0.0173, "step": 189550 }, { "epoch": 1.5337810502467837, "grad_norm": 0.25808483362197876, "learning_rate": 1.5659509608602895e-06, "loss": 0.0164, "step": 189560 }, { "epoch": 1.5338619629419856, "grad_norm": 0.1942775398492813, "learning_rate": 1.5654377779373152e-06, "loss": 0.0173, "step": 189570 }, { "epoch": 1.5339428756371873, "grad_norm": 1.04310941696167, "learning_rate": 1.564924663509434e-06, "loss": 0.024, "step": 189580 }, { "epoch": 1.5340237883323895, "grad_norm": 0.29033955931663513, "learning_rate": 1.5644116175868762e-06, "loss": 0.0188, "step": 189590 }, { "epoch": 1.5341047010275912, "grad_norm": 0.503363311290741, "learning_rate": 1.56389864017988e-06, "loss": 0.0163, "step": 189600 }, { "epoch": 1.534185613722793, "grad_norm": 0.7323974370956421, "learning_rate": 1.5633857312986695e-06, "loss": 0.0211, "step": 189610 }, { "epoch": 1.534266526417995, "grad_norm": 0.4540530741214752, "learning_rate": 1.5628728909534735e-06, "loss": 0.0276, "step": 189620 }, { "epoch": 1.5343474391131968, "grad_norm": 0.3711126744747162, "learning_rate": 1.5623601191545252e-06, "loss": 0.0131, "step": 189630 }, { "epoch": 1.5344283518083988, "grad_norm": 1.2948524951934814, "learning_rate": 1.5618474159120457e-06, "loss": 0.0263, "step": 189640 }, { "epoch": 1.5345092645036007, "grad_norm": 0.402678906917572, "learning_rate": 1.5613347812362596e-06, "loss": 0.0335, "step": 189650 }, { "epoch": 1.5345901771988024, "grad_norm": 0.33721786737442017, "learning_rate": 1.5608222151373946e-06, "loss": 0.0271, "step": 189660 }, { "epoch": 1.5346710898940044, "grad_norm": 0.21561257541179657, "learning_rate": 1.5603097176256676e-06, "loss": 0.0156, "step": 189670 }, { "epoch": 1.5347520025892063, "grad_norm": 0.2036677747964859, "learning_rate": 1.5597972887113033e-06, "loss": 0.0097, "step": 189680 }, { "epoch": 1.534832915284408, "grad_norm": 0.34027427434921265, "learning_rate": 1.5592849284045192e-06, "loss": 0.0149, "step": 189690 }, { "epoch": 1.53491382797961, "grad_norm": 0.3846531808376312, "learning_rate": 1.5587726367155337e-06, "loss": 0.0221, "step": 189700 }, { "epoch": 1.534994740674812, "grad_norm": 0.2911316156387329, "learning_rate": 1.5582604136545632e-06, "loss": 0.0153, "step": 189710 }, { "epoch": 1.5350756533700136, "grad_norm": 0.3874364495277405, "learning_rate": 1.5577482592318227e-06, "loss": 0.0195, "step": 189720 }, { "epoch": 1.5351565660652158, "grad_norm": 0.41790124773979187, "learning_rate": 1.5572361734575265e-06, "loss": 0.0142, "step": 189730 }, { "epoch": 1.5352374787604175, "grad_norm": 0.3483874201774597, "learning_rate": 1.5567241563418867e-06, "loss": 0.0161, "step": 189740 }, { "epoch": 1.5353183914556194, "grad_norm": 0.32631915807724, "learning_rate": 1.556212207895114e-06, "loss": 0.0111, "step": 189750 }, { "epoch": 1.5353993041508214, "grad_norm": 0.5105257034301758, "learning_rate": 1.5557003281274197e-06, "loss": 0.0213, "step": 189760 }, { "epoch": 1.535480216846023, "grad_norm": 0.3800119161605835, "learning_rate": 1.5551885170490105e-06, "loss": 0.0124, "step": 189770 }, { "epoch": 1.535561129541225, "grad_norm": 0.5047524571418762, "learning_rate": 1.554676774670094e-06, "loss": 0.033, "step": 189780 }, { "epoch": 1.535642042236427, "grad_norm": 0.03444459289312363, "learning_rate": 1.5541651010008756e-06, "loss": 0.0214, "step": 189790 }, { "epoch": 1.5357229549316287, "grad_norm": 0.6638785600662231, "learning_rate": 1.5536534960515603e-06, "loss": 0.0167, "step": 189800 }, { "epoch": 1.5358038676268306, "grad_norm": 0.2795383036136627, "learning_rate": 1.5531419598323488e-06, "loss": 0.0113, "step": 189810 }, { "epoch": 1.5358847803220326, "grad_norm": 0.3619678318500519, "learning_rate": 1.5526304923534474e-06, "loss": 0.014, "step": 189820 }, { "epoch": 1.5359656930172343, "grad_norm": 0.41720181703567505, "learning_rate": 1.5521190936250513e-06, "loss": 0.026, "step": 189830 }, { "epoch": 1.5360466057124362, "grad_norm": 0.5494118332862854, "learning_rate": 1.5516077636573596e-06, "loss": 0.0205, "step": 189840 }, { "epoch": 1.5361275184076382, "grad_norm": 0.33097562193870544, "learning_rate": 1.5510965024605745e-06, "loss": 0.0201, "step": 189850 }, { "epoch": 1.5362084311028399, "grad_norm": 0.339618057012558, "learning_rate": 1.550585310044887e-06, "loss": 0.0149, "step": 189860 }, { "epoch": 1.536289343798042, "grad_norm": 0.5602640509605408, "learning_rate": 1.550074186420492e-06, "loss": 0.0209, "step": 189870 }, { "epoch": 1.5363702564932438, "grad_norm": 0.2652903199195862, "learning_rate": 1.5495631315975857e-06, "loss": 0.0145, "step": 189880 }, { "epoch": 1.5364511691884457, "grad_norm": 0.483058363199234, "learning_rate": 1.5490521455863582e-06, "loss": 0.0301, "step": 189890 }, { "epoch": 1.5365320818836476, "grad_norm": 0.3334938585758209, "learning_rate": 1.5485412283970009e-06, "loss": 0.0128, "step": 189900 }, { "epoch": 1.5366129945788494, "grad_norm": 0.26350274682044983, "learning_rate": 1.5480303800397023e-06, "loss": 0.0251, "step": 189910 }, { "epoch": 1.5366939072740513, "grad_norm": 0.33229199051856995, "learning_rate": 1.54751960052465e-06, "loss": 0.0281, "step": 189920 }, { "epoch": 1.5367748199692532, "grad_norm": 0.3667355179786682, "learning_rate": 1.5470088898620317e-06, "loss": 0.016, "step": 189930 }, { "epoch": 1.536855732664455, "grad_norm": 0.45242658257484436, "learning_rate": 1.5464982480620305e-06, "loss": 0.0163, "step": 189940 }, { "epoch": 1.536936645359657, "grad_norm": 0.4283197224140167, "learning_rate": 1.5459876751348317e-06, "loss": 0.0262, "step": 189950 }, { "epoch": 1.5370175580548588, "grad_norm": 0.44471171498298645, "learning_rate": 1.5454771710906164e-06, "loss": 0.014, "step": 189960 }, { "epoch": 1.5370984707500606, "grad_norm": 0.3923569321632385, "learning_rate": 1.5449667359395664e-06, "loss": 0.0167, "step": 189970 }, { "epoch": 1.5371793834452627, "grad_norm": 0.455228328704834, "learning_rate": 1.5444563696918614e-06, "loss": 0.0202, "step": 189980 }, { "epoch": 1.5372602961404644, "grad_norm": 0.40130266547203064, "learning_rate": 1.5439460723576787e-06, "loss": 0.0156, "step": 189990 }, { "epoch": 1.5373412088356662, "grad_norm": 0.3356005549430847, "learning_rate": 1.5434358439471936e-06, "loss": 0.0162, "step": 190000 }, { "epoch": 1.5374221215308683, "grad_norm": 0.4527864456176758, "learning_rate": 1.542925684470587e-06, "loss": 0.0304, "step": 190010 }, { "epoch": 1.53750303422607, "grad_norm": 0.21591317653656006, "learning_rate": 1.5424155939380282e-06, "loss": 0.0164, "step": 190020 }, { "epoch": 1.537583946921272, "grad_norm": 0.17794401943683624, "learning_rate": 1.5419055723596888e-06, "loss": 0.0185, "step": 190030 }, { "epoch": 1.537664859616474, "grad_norm": 0.5357354879379272, "learning_rate": 1.5413956197457446e-06, "loss": 0.026, "step": 190040 }, { "epoch": 1.5377457723116756, "grad_norm": 0.3249863386154175, "learning_rate": 1.5408857361063645e-06, "loss": 0.0094, "step": 190050 }, { "epoch": 1.5378266850068776, "grad_norm": 0.3849373459815979, "learning_rate": 1.5403759214517128e-06, "loss": 0.0177, "step": 190060 }, { "epoch": 1.5379075977020795, "grad_norm": 0.3746972382068634, "learning_rate": 1.5398661757919615e-06, "loss": 0.0193, "step": 190070 }, { "epoch": 1.5379885103972812, "grad_norm": 0.473422646522522, "learning_rate": 1.5393564991372745e-06, "loss": 0.0137, "step": 190080 }, { "epoch": 1.5380694230924832, "grad_norm": 0.23562520742416382, "learning_rate": 1.5388468914978167e-06, "loss": 0.0171, "step": 190090 }, { "epoch": 1.5381503357876851, "grad_norm": 0.17363809049129486, "learning_rate": 1.5383373528837503e-06, "loss": 0.0168, "step": 190100 }, { "epoch": 1.5382312484828868, "grad_norm": 0.5861110687255859, "learning_rate": 1.537827883305238e-06, "loss": 0.0271, "step": 190110 }, { "epoch": 1.538312161178089, "grad_norm": 0.47015950083732605, "learning_rate": 1.5373184827724392e-06, "loss": 0.0229, "step": 190120 }, { "epoch": 1.5383930738732907, "grad_norm": 0.26287126541137695, "learning_rate": 1.536809151295513e-06, "loss": 0.0154, "step": 190130 }, { "epoch": 1.5384739865684924, "grad_norm": 0.18134450912475586, "learning_rate": 1.5362998888846175e-06, "loss": 0.0112, "step": 190140 }, { "epoch": 1.5385548992636946, "grad_norm": 0.370821088552475, "learning_rate": 1.5357906955499086e-06, "loss": 0.0181, "step": 190150 }, { "epoch": 1.5386358119588963, "grad_norm": 0.06941548734903336, "learning_rate": 1.5352815713015413e-06, "loss": 0.0131, "step": 190160 }, { "epoch": 1.5387167246540983, "grad_norm": 0.7018948793411255, "learning_rate": 1.5347725161496685e-06, "loss": 0.0154, "step": 190170 }, { "epoch": 1.5387976373493002, "grad_norm": 0.34041690826416016, "learning_rate": 1.5342635301044423e-06, "loss": 0.0209, "step": 190180 }, { "epoch": 1.538878550044502, "grad_norm": 0.6201601028442383, "learning_rate": 1.5337546131760133e-06, "loss": 0.0281, "step": 190190 }, { "epoch": 1.5389594627397039, "grad_norm": 0.37790045142173767, "learning_rate": 1.5332457653745314e-06, "loss": 0.0217, "step": 190200 }, { "epoch": 1.5390403754349058, "grad_norm": 0.23108680546283722, "learning_rate": 1.5327369867101438e-06, "loss": 0.0156, "step": 190210 }, { "epoch": 1.5391212881301075, "grad_norm": 0.40086349844932556, "learning_rate": 1.532228277192996e-06, "loss": 0.0189, "step": 190220 }, { "epoch": 1.5392022008253095, "grad_norm": 0.47030919790267944, "learning_rate": 1.5317196368332354e-06, "loss": 0.0189, "step": 190230 }, { "epoch": 1.5392831135205114, "grad_norm": 0.363799124956131, "learning_rate": 1.5312110656410066e-06, "loss": 0.0186, "step": 190240 }, { "epoch": 1.539364026215713, "grad_norm": 0.03491400182247162, "learning_rate": 1.5307025636264472e-06, "loss": 0.0182, "step": 190250 }, { "epoch": 1.5394449389109153, "grad_norm": 0.3166523873806, "learning_rate": 1.5301941307997025e-06, "loss": 0.024, "step": 190260 }, { "epoch": 1.539525851606117, "grad_norm": 0.22934581339359283, "learning_rate": 1.5296857671709125e-06, "loss": 0.016, "step": 190270 }, { "epoch": 1.539606764301319, "grad_norm": 0.28400295972824097, "learning_rate": 1.5291774727502106e-06, "loss": 0.0282, "step": 190280 }, { "epoch": 1.5396876769965209, "grad_norm": 0.3703850507736206, "learning_rate": 1.5286692475477383e-06, "loss": 0.0197, "step": 190290 }, { "epoch": 1.5397685896917226, "grad_norm": 0.45916303992271423, "learning_rate": 1.5281610915736294e-06, "loss": 0.0257, "step": 190300 }, { "epoch": 1.5398495023869245, "grad_norm": 0.31661081314086914, "learning_rate": 1.5276530048380184e-06, "loss": 0.0228, "step": 190310 }, { "epoch": 1.5399304150821265, "grad_norm": 0.23069731891155243, "learning_rate": 1.527144987351038e-06, "loss": 0.0224, "step": 190320 }, { "epoch": 1.5400113277773282, "grad_norm": 0.205572709441185, "learning_rate": 1.526637039122819e-06, "loss": 0.013, "step": 190330 }, { "epoch": 1.5400922404725301, "grad_norm": 0.2979294955730438, "learning_rate": 1.5261291601634915e-06, "loss": 0.0268, "step": 190340 }, { "epoch": 1.540173153167732, "grad_norm": 0.8021060228347778, "learning_rate": 1.5256213504831846e-06, "loss": 0.0116, "step": 190350 }, { "epoch": 1.5402540658629338, "grad_norm": 0.2351778894662857, "learning_rate": 1.5251136100920249e-06, "loss": 0.0126, "step": 190360 }, { "epoch": 1.5403349785581357, "grad_norm": 0.03530779108405113, "learning_rate": 1.5246059390001383e-06, "loss": 0.0168, "step": 190370 }, { "epoch": 1.5404158912533377, "grad_norm": 0.19943851232528687, "learning_rate": 1.5240983372176493e-06, "loss": 0.0148, "step": 190380 }, { "epoch": 1.5404968039485394, "grad_norm": 0.2546260952949524, "learning_rate": 1.5235908047546816e-06, "loss": 0.019, "step": 190390 }, { "epoch": 1.5405777166437415, "grad_norm": 0.626494824886322, "learning_rate": 1.523083341621356e-06, "loss": 0.02, "step": 190400 }, { "epoch": 1.5406586293389433, "grad_norm": 0.27223941683769226, "learning_rate": 1.522575947827793e-06, "loss": 0.0131, "step": 190410 }, { "epoch": 1.5407395420341452, "grad_norm": 0.3812236785888672, "learning_rate": 1.5220686233841098e-06, "loss": 0.0237, "step": 190420 }, { "epoch": 1.5408204547293471, "grad_norm": 0.0554778091609478, "learning_rate": 1.5215613683004288e-06, "loss": 0.03, "step": 190430 }, { "epoch": 1.5409013674245489, "grad_norm": 0.28972548246383667, "learning_rate": 1.5210541825868602e-06, "loss": 0.0225, "step": 190440 }, { "epoch": 1.5409822801197508, "grad_norm": 0.5758192539215088, "learning_rate": 1.5205470662535226e-06, "loss": 0.016, "step": 190450 }, { "epoch": 1.5410631928149527, "grad_norm": 0.39173731207847595, "learning_rate": 1.5200400193105302e-06, "loss": 0.0123, "step": 190460 }, { "epoch": 1.5411441055101545, "grad_norm": 0.1907898634672165, "learning_rate": 1.5195330417679899e-06, "loss": 0.0201, "step": 190470 }, { "epoch": 1.5412250182053564, "grad_norm": 0.3117116689682007, "learning_rate": 1.5190261336360174e-06, "loss": 0.0092, "step": 190480 }, { "epoch": 1.5413059309005583, "grad_norm": 0.5158648490905762, "learning_rate": 1.5185192949247197e-06, "loss": 0.0229, "step": 190490 }, { "epoch": 1.54138684359576, "grad_norm": 0.35623931884765625, "learning_rate": 1.5180125256442047e-06, "loss": 0.0218, "step": 190500 }, { "epoch": 1.541467756290962, "grad_norm": 0.3451787531375885, "learning_rate": 1.5175058258045794e-06, "loss": 0.0271, "step": 190510 }, { "epoch": 1.541548668986164, "grad_norm": 0.3441033363342285, "learning_rate": 1.5169991954159484e-06, "loss": 0.0197, "step": 190520 }, { "epoch": 1.5416295816813657, "grad_norm": 0.21660709381103516, "learning_rate": 1.5164926344884157e-06, "loss": 0.0108, "step": 190530 }, { "epoch": 1.5417104943765678, "grad_norm": 0.40139254927635193, "learning_rate": 1.5159861430320833e-06, "loss": 0.022, "step": 190540 }, { "epoch": 1.5417914070717695, "grad_norm": 0.5174697637557983, "learning_rate": 1.515479721057052e-06, "loss": 0.029, "step": 190550 }, { "epoch": 1.5418723197669715, "grad_norm": 0.2604166567325592, "learning_rate": 1.5149733685734219e-06, "loss": 0.0117, "step": 190560 }, { "epoch": 1.5419532324621734, "grad_norm": 0.5280420780181885, "learning_rate": 1.5144670855912908e-06, "loss": 0.0263, "step": 190570 }, { "epoch": 1.5420341451573751, "grad_norm": 0.44209712743759155, "learning_rate": 1.5139608721207532e-06, "loss": 0.0166, "step": 190580 }, { "epoch": 1.542115057852577, "grad_norm": 0.6060330271720886, "learning_rate": 1.5134547281719103e-06, "loss": 0.0208, "step": 190590 }, { "epoch": 1.542195970547779, "grad_norm": 0.1817963868379593, "learning_rate": 1.5129486537548505e-06, "loss": 0.0177, "step": 190600 }, { "epoch": 1.5422768832429807, "grad_norm": 0.4868391454219818, "learning_rate": 1.5124426488796668e-06, "loss": 0.0315, "step": 190610 }, { "epoch": 1.5423577959381827, "grad_norm": 0.4000928997993469, "learning_rate": 1.5119367135564555e-06, "loss": 0.0198, "step": 190620 }, { "epoch": 1.5424387086333846, "grad_norm": 0.1487213671207428, "learning_rate": 1.5114308477952994e-06, "loss": 0.0176, "step": 190630 }, { "epoch": 1.5425196213285863, "grad_norm": 0.6170963644981384, "learning_rate": 1.5109250516062917e-06, "loss": 0.0261, "step": 190640 }, { "epoch": 1.5426005340237885, "grad_norm": 0.2822800874710083, "learning_rate": 1.5104193249995196e-06, "loss": 0.0204, "step": 190650 }, { "epoch": 1.5426814467189902, "grad_norm": 0.5128058791160583, "learning_rate": 1.5099136679850647e-06, "loss": 0.0171, "step": 190660 }, { "epoch": 1.542762359414192, "grad_norm": 0.22123385965824127, "learning_rate": 1.5094080805730148e-06, "loss": 0.022, "step": 190670 }, { "epoch": 1.542843272109394, "grad_norm": 0.5814074277877808, "learning_rate": 1.5089025627734538e-06, "loss": 0.0239, "step": 190680 }, { "epoch": 1.5429241848045958, "grad_norm": 0.2386779934167862, "learning_rate": 1.5083971145964577e-06, "loss": 0.0145, "step": 190690 }, { "epoch": 1.5430050974997978, "grad_norm": 0.12239392846822739, "learning_rate": 1.5078917360521117e-06, "loss": 0.0139, "step": 190700 }, { "epoch": 1.5430860101949997, "grad_norm": 0.3454565703868866, "learning_rate": 1.5073864271504923e-06, "loss": 0.034, "step": 190710 }, { "epoch": 1.5431669228902014, "grad_norm": 0.46616819500923157, "learning_rate": 1.5068811879016782e-06, "loss": 0.0171, "step": 190720 }, { "epoch": 1.5432478355854033, "grad_norm": 0.09493325650691986, "learning_rate": 1.506376018315744e-06, "loss": 0.0179, "step": 190730 }, { "epoch": 1.5433287482806053, "grad_norm": 0.5562183856964111, "learning_rate": 1.5058709184027652e-06, "loss": 0.0098, "step": 190740 }, { "epoch": 1.543409660975807, "grad_norm": 1.0909208059310913, "learning_rate": 1.505365888172814e-06, "loss": 0.0154, "step": 190750 }, { "epoch": 1.543490573671009, "grad_norm": 0.6178295016288757, "learning_rate": 1.5048609276359632e-06, "loss": 0.0242, "step": 190760 }, { "epoch": 1.5435714863662109, "grad_norm": 0.3516148030757904, "learning_rate": 1.5043560368022803e-06, "loss": 0.025, "step": 190770 }, { "epoch": 1.5436523990614126, "grad_norm": 0.26211902499198914, "learning_rate": 1.5038512156818402e-06, "loss": 0.0142, "step": 190780 }, { "epoch": 1.5437333117566148, "grad_norm": 0.5756239891052246, "learning_rate": 1.5033464642847056e-06, "loss": 0.0244, "step": 190790 }, { "epoch": 1.5438142244518165, "grad_norm": 0.4354715049266815, "learning_rate": 1.5028417826209425e-06, "loss": 0.0118, "step": 190800 }, { "epoch": 1.5438951371470182, "grad_norm": 0.43855276703834534, "learning_rate": 1.5023371707006196e-06, "loss": 0.0209, "step": 190810 }, { "epoch": 1.5439760498422204, "grad_norm": 0.1899714320898056, "learning_rate": 1.501832628533797e-06, "loss": 0.0244, "step": 190820 }, { "epoch": 1.544056962537422, "grad_norm": 0.5221223831176758, "learning_rate": 1.501328156130536e-06, "loss": 0.016, "step": 190830 }, { "epoch": 1.544137875232624, "grad_norm": 0.33143526315689087, "learning_rate": 1.5008237535009018e-06, "loss": 0.0157, "step": 190840 }, { "epoch": 1.544218787927826, "grad_norm": 0.5150995850563049, "learning_rate": 1.5003194206549477e-06, "loss": 0.0238, "step": 190850 }, { "epoch": 1.5442997006230277, "grad_norm": 0.493904173374176, "learning_rate": 1.4998151576027364e-06, "loss": 0.0378, "step": 190860 }, { "epoch": 1.5443806133182296, "grad_norm": 0.36835336685180664, "learning_rate": 1.4993109643543242e-06, "loss": 0.017, "step": 190870 }, { "epoch": 1.5444615260134316, "grad_norm": 0.012846049852669239, "learning_rate": 1.4988068409197609e-06, "loss": 0.0199, "step": 190880 }, { "epoch": 1.5445424387086333, "grad_norm": 0.7279520034790039, "learning_rate": 1.4983027873091056e-06, "loss": 0.0192, "step": 190890 }, { "epoch": 1.5446233514038352, "grad_norm": 0.5287958383560181, "learning_rate": 1.4977988035324103e-06, "loss": 0.0263, "step": 190900 }, { "epoch": 1.5447042640990372, "grad_norm": 0.48889651894569397, "learning_rate": 1.4972948895997214e-06, "loss": 0.0252, "step": 190910 }, { "epoch": 1.5447851767942389, "grad_norm": 0.4500894546508789, "learning_rate": 1.4967910455210926e-06, "loss": 0.014, "step": 190920 }, { "epoch": 1.544866089489441, "grad_norm": 0.37139034271240234, "learning_rate": 1.4962872713065714e-06, "loss": 0.0193, "step": 190930 }, { "epoch": 1.5449470021846428, "grad_norm": 0.7458708882331848, "learning_rate": 1.4957835669662035e-06, "loss": 0.0255, "step": 190940 }, { "epoch": 1.5450279148798447, "grad_norm": 0.30085185170173645, "learning_rate": 1.4952799325100347e-06, "loss": 0.0113, "step": 190950 }, { "epoch": 1.5451088275750466, "grad_norm": 0.4502141773700714, "learning_rate": 1.4947763679481092e-06, "loss": 0.029, "step": 190960 }, { "epoch": 1.5451897402702484, "grad_norm": 0.35844099521636963, "learning_rate": 1.4942728732904688e-06, "loss": 0.0143, "step": 190970 }, { "epoch": 1.5452706529654503, "grad_norm": 0.24850012362003326, "learning_rate": 1.4937694485471548e-06, "loss": 0.0224, "step": 190980 }, { "epoch": 1.5453515656606522, "grad_norm": 0.5299362540245056, "learning_rate": 1.4932660937282062e-06, "loss": 0.023, "step": 190990 }, { "epoch": 1.545432478355854, "grad_norm": 0.44300901889801025, "learning_rate": 1.4927628088436653e-06, "loss": 0.018, "step": 191000 }, { "epoch": 1.545513391051056, "grad_norm": 0.5620959401130676, "learning_rate": 1.4922595939035639e-06, "loss": 0.017, "step": 191010 }, { "epoch": 1.5455943037462578, "grad_norm": 0.2244056761264801, "learning_rate": 1.4917564489179388e-06, "loss": 0.0132, "step": 191020 }, { "epoch": 1.5456752164414596, "grad_norm": 0.3218197226524353, "learning_rate": 1.4912533738968283e-06, "loss": 0.0164, "step": 191030 }, { "epoch": 1.5457561291366615, "grad_norm": 0.27463802695274353, "learning_rate": 1.4907503688502584e-06, "loss": 0.021, "step": 191040 }, { "epoch": 1.5458370418318634, "grad_norm": 0.32879117131233215, "learning_rate": 1.4902474337882655e-06, "loss": 0.0155, "step": 191050 }, { "epoch": 1.5459179545270652, "grad_norm": 0.596312940120697, "learning_rate": 1.48974456872088e-06, "loss": 0.0188, "step": 191060 }, { "epoch": 1.5459988672222673, "grad_norm": 0.8702014684677124, "learning_rate": 1.4892417736581254e-06, "loss": 0.0335, "step": 191070 }, { "epoch": 1.546079779917469, "grad_norm": 0.4199484586715698, "learning_rate": 1.488739048610034e-06, "loss": 0.0133, "step": 191080 }, { "epoch": 1.546160692612671, "grad_norm": 0.20915257930755615, "learning_rate": 1.4882363935866313e-06, "loss": 0.0151, "step": 191090 }, { "epoch": 1.546241605307873, "grad_norm": 0.15342552959918976, "learning_rate": 1.4877338085979364e-06, "loss": 0.0157, "step": 191100 }, { "epoch": 1.5463225180030746, "grad_norm": 0.29397308826446533, "learning_rate": 1.4872312936539772e-06, "loss": 0.0115, "step": 191110 }, { "epoch": 1.5464034306982766, "grad_norm": 0.2895689308643341, "learning_rate": 1.4867288487647745e-06, "loss": 0.0169, "step": 191120 }, { "epoch": 1.5464843433934785, "grad_norm": 0.3425927460193634, "learning_rate": 1.4862264739403487e-06, "loss": 0.015, "step": 191130 }, { "epoch": 1.5465652560886802, "grad_norm": 0.20761646330356598, "learning_rate": 1.485724169190717e-06, "loss": 0.0156, "step": 191140 }, { "epoch": 1.5466461687838822, "grad_norm": 0.4712092876434326, "learning_rate": 1.4852219345258983e-06, "loss": 0.026, "step": 191150 }, { "epoch": 1.546727081479084, "grad_norm": 0.29340532422065735, "learning_rate": 1.484719769955908e-06, "loss": 0.0142, "step": 191160 }, { "epoch": 1.5468079941742858, "grad_norm": 0.10983157157897949, "learning_rate": 1.4842176754907611e-06, "loss": 0.019, "step": 191170 }, { "epoch": 1.5468889068694878, "grad_norm": 0.5092175602912903, "learning_rate": 1.4837156511404681e-06, "loss": 0.0169, "step": 191180 }, { "epoch": 1.5469698195646897, "grad_norm": 0.19862186908721924, "learning_rate": 1.4832136969150473e-06, "loss": 0.0149, "step": 191190 }, { "epoch": 1.5470507322598914, "grad_norm": 0.24971938133239746, "learning_rate": 1.4827118128245028e-06, "loss": 0.0127, "step": 191200 }, { "epoch": 1.5471316449550936, "grad_norm": 0.31638336181640625, "learning_rate": 1.4822099988788446e-06, "loss": 0.0153, "step": 191210 }, { "epoch": 1.5472125576502953, "grad_norm": 0.2378072887659073, "learning_rate": 1.4817082550880846e-06, "loss": 0.0146, "step": 191220 }, { "epoch": 1.5472934703454972, "grad_norm": 0.28566521406173706, "learning_rate": 1.481206581462225e-06, "loss": 0.0165, "step": 191230 }, { "epoch": 1.5473743830406992, "grad_norm": 0.5817863941192627, "learning_rate": 1.4807049780112693e-06, "loss": 0.0229, "step": 191240 }, { "epoch": 1.547455295735901, "grad_norm": 0.09384233504533768, "learning_rate": 1.4802034447452268e-06, "loss": 0.0201, "step": 191250 }, { "epoch": 1.5475362084311028, "grad_norm": 0.6298761963844299, "learning_rate": 1.4797019816740926e-06, "loss": 0.0201, "step": 191260 }, { "epoch": 1.5476171211263048, "grad_norm": 0.34944450855255127, "learning_rate": 1.4792005888078725e-06, "loss": 0.022, "step": 191270 }, { "epoch": 1.5476980338215065, "grad_norm": 0.3819049596786499, "learning_rate": 1.4786992661565652e-06, "loss": 0.0197, "step": 191280 }, { "epoch": 1.5477789465167084, "grad_norm": 0.7752323746681213, "learning_rate": 1.4781980137301644e-06, "loss": 0.0202, "step": 191290 }, { "epoch": 1.5478598592119104, "grad_norm": 0.2820030450820923, "learning_rate": 1.477696831538671e-06, "loss": 0.0147, "step": 191300 }, { "epoch": 1.547940771907112, "grad_norm": 0.4022228717803955, "learning_rate": 1.4771957195920782e-06, "loss": 0.0173, "step": 191310 }, { "epoch": 1.5480216846023143, "grad_norm": 0.3261450231075287, "learning_rate": 1.47669467790038e-06, "loss": 0.015, "step": 191320 }, { "epoch": 1.548102597297516, "grad_norm": 0.23153537511825562, "learning_rate": 1.4761937064735683e-06, "loss": 0.0162, "step": 191330 }, { "epoch": 1.5481835099927177, "grad_norm": 0.5543911457061768, "learning_rate": 1.4756928053216334e-06, "loss": 0.0151, "step": 191340 }, { "epoch": 1.5482644226879199, "grad_norm": 0.4198293089866638, "learning_rate": 1.4751919744545661e-06, "loss": 0.0287, "step": 191350 }, { "epoch": 1.5483453353831216, "grad_norm": 0.4792092740535736, "learning_rate": 1.4746912138823538e-06, "loss": 0.0223, "step": 191360 }, { "epoch": 1.5484262480783235, "grad_norm": 0.46713393926620483, "learning_rate": 1.4741905236149822e-06, "loss": 0.0375, "step": 191370 }, { "epoch": 1.5485071607735255, "grad_norm": 0.13933461904525757, "learning_rate": 1.473689903662438e-06, "loss": 0.021, "step": 191380 }, { "epoch": 1.5485880734687272, "grad_norm": 0.5927649140357971, "learning_rate": 1.4731893540347043e-06, "loss": 0.0139, "step": 191390 }, { "epoch": 1.5486689861639291, "grad_norm": 0.31169524788856506, "learning_rate": 1.4726888747417618e-06, "loss": 0.0219, "step": 191400 }, { "epoch": 1.548749898859131, "grad_norm": 0.49940404295921326, "learning_rate": 1.4721884657935958e-06, "loss": 0.012, "step": 191410 }, { "epoch": 1.5488308115543328, "grad_norm": 0.22117172181606293, "learning_rate": 1.471688127200182e-06, "loss": 0.018, "step": 191420 }, { "epoch": 1.5489117242495347, "grad_norm": 0.4057428538799286, "learning_rate": 1.471187858971499e-06, "loss": 0.0203, "step": 191430 }, { "epoch": 1.5489926369447367, "grad_norm": 0.19982102513313293, "learning_rate": 1.4706876611175269e-06, "loss": 0.014, "step": 191440 }, { "epoch": 1.5490735496399384, "grad_norm": 0.6662197113037109, "learning_rate": 1.4701875336482374e-06, "loss": 0.0193, "step": 191450 }, { "epoch": 1.5491544623351405, "grad_norm": 0.2517136037349701, "learning_rate": 1.4696874765736041e-06, "loss": 0.0224, "step": 191460 }, { "epoch": 1.5492353750303423, "grad_norm": 0.24216507375240326, "learning_rate": 1.4691874899036047e-06, "loss": 0.0165, "step": 191470 }, { "epoch": 1.549316287725544, "grad_norm": 0.4079399108886719, "learning_rate": 1.4686875736482038e-06, "loss": 0.0232, "step": 191480 }, { "epoch": 1.5493972004207461, "grad_norm": 1.055677890777588, "learning_rate": 1.4681877278173756e-06, "loss": 0.0104, "step": 191490 }, { "epoch": 1.5494781131159479, "grad_norm": 0.5515174269676208, "learning_rate": 1.4676879524210875e-06, "loss": 0.0192, "step": 191500 }, { "epoch": 1.5495590258111498, "grad_norm": 0.4595435857772827, "learning_rate": 1.4671882474693055e-06, "loss": 0.0167, "step": 191510 }, { "epoch": 1.5496399385063517, "grad_norm": 0.28405240178108215, "learning_rate": 1.4666886129719964e-06, "loss": 0.021, "step": 191520 }, { "epoch": 1.5497208512015535, "grad_norm": 0.22342318296432495, "learning_rate": 1.4661890489391233e-06, "loss": 0.0218, "step": 191530 }, { "epoch": 1.5498017638967554, "grad_norm": 0.3455648124217987, "learning_rate": 1.46568955538065e-06, "loss": 0.0167, "step": 191540 }, { "epoch": 1.5498826765919573, "grad_norm": 0.4187302589416504, "learning_rate": 1.4651901323065372e-06, "loss": 0.0203, "step": 191550 }, { "epoch": 1.549963589287159, "grad_norm": 0.39667394757270813, "learning_rate": 1.464690779726744e-06, "loss": 0.0252, "step": 191560 }, { "epoch": 1.550044501982361, "grad_norm": 0.3226339519023895, "learning_rate": 1.4641914976512305e-06, "loss": 0.0163, "step": 191570 }, { "epoch": 1.550125414677563, "grad_norm": 0.30468934774398804, "learning_rate": 1.4636922860899532e-06, "loss": 0.0114, "step": 191580 }, { "epoch": 1.5502063273727646, "grad_norm": 0.1509942263364792, "learning_rate": 1.4631931450528663e-06, "loss": 0.0105, "step": 191590 }, { "epoch": 1.5502872400679668, "grad_norm": 0.4969467222690582, "learning_rate": 1.4626940745499286e-06, "loss": 0.0247, "step": 191600 }, { "epoch": 1.5503681527631685, "grad_norm": 0.7983325123786926, "learning_rate": 1.4621950745910885e-06, "loss": 0.0114, "step": 191610 }, { "epoch": 1.5504490654583705, "grad_norm": 0.3225751519203186, "learning_rate": 1.461696145186297e-06, "loss": 0.0223, "step": 191620 }, { "epoch": 1.5505299781535724, "grad_norm": 0.5049090385437012, "learning_rate": 1.4611972863455098e-06, "loss": 0.0137, "step": 191630 }, { "epoch": 1.5506108908487741, "grad_norm": 0.2942807078361511, "learning_rate": 1.4606984980786704e-06, "loss": 0.0249, "step": 191640 }, { "epoch": 1.550691803543976, "grad_norm": 0.2801535427570343, "learning_rate": 1.4601997803957258e-06, "loss": 0.0104, "step": 191650 }, { "epoch": 1.550772716239178, "grad_norm": 0.26042765378952026, "learning_rate": 1.4597011333066274e-06, "loss": 0.0166, "step": 191660 }, { "epoch": 1.5508536289343797, "grad_norm": 0.21522387862205505, "learning_rate": 1.4592025568213125e-06, "loss": 0.0257, "step": 191670 }, { "epoch": 1.5509345416295817, "grad_norm": 0.3443397879600525, "learning_rate": 1.4587040509497291e-06, "loss": 0.0194, "step": 191680 }, { "epoch": 1.5510154543247836, "grad_norm": 0.7869383096694946, "learning_rate": 1.4582056157018176e-06, "loss": 0.0244, "step": 191690 }, { "epoch": 1.5510963670199853, "grad_norm": 0.6570045351982117, "learning_rate": 1.457707251087518e-06, "loss": 0.0264, "step": 191700 }, { "epoch": 1.5511772797151873, "grad_norm": 0.42867302894592285, "learning_rate": 1.4572089571167686e-06, "loss": 0.0229, "step": 191710 }, { "epoch": 1.5512581924103892, "grad_norm": 1.2016544342041016, "learning_rate": 1.4567107337995079e-06, "loss": 0.017, "step": 191720 }, { "epoch": 1.551339105105591, "grad_norm": 0.306566059589386, "learning_rate": 1.4562125811456718e-06, "loss": 0.0117, "step": 191730 }, { "epoch": 1.551420017800793, "grad_norm": 0.14561644196510315, "learning_rate": 1.455714499165194e-06, "loss": 0.0187, "step": 191740 }, { "epoch": 1.5515009304959948, "grad_norm": 0.29147088527679443, "learning_rate": 1.455216487868008e-06, "loss": 0.0179, "step": 191750 }, { "epoch": 1.5515818431911967, "grad_norm": 0.5751509666442871, "learning_rate": 1.4547185472640463e-06, "loss": 0.0185, "step": 191760 }, { "epoch": 1.5516627558863987, "grad_norm": 0.4145169258117676, "learning_rate": 1.4542206773632384e-06, "loss": 0.0199, "step": 191770 }, { "epoch": 1.5517436685816004, "grad_norm": 0.3969607651233673, "learning_rate": 1.4537228781755136e-06, "loss": 0.0247, "step": 191780 }, { "epoch": 1.5518245812768023, "grad_norm": 0.10008898377418518, "learning_rate": 1.4532251497107997e-06, "loss": 0.009, "step": 191790 }, { "epoch": 1.5519054939720043, "grad_norm": 0.34321528673171997, "learning_rate": 1.4527274919790225e-06, "loss": 0.0139, "step": 191800 }, { "epoch": 1.551986406667206, "grad_norm": 0.2619072198867798, "learning_rate": 1.4522299049901056e-06, "loss": 0.0119, "step": 191810 }, { "epoch": 1.552067319362408, "grad_norm": 0.2715795636177063, "learning_rate": 1.4517323887539764e-06, "loss": 0.0177, "step": 191820 }, { "epoch": 1.5521482320576099, "grad_norm": 0.6160914301872253, "learning_rate": 1.4512349432805523e-06, "loss": 0.0248, "step": 191830 }, { "epoch": 1.5522291447528116, "grad_norm": 0.26150956749916077, "learning_rate": 1.4507375685797543e-06, "loss": 0.0124, "step": 191840 }, { "epoch": 1.5523100574480138, "grad_norm": 0.549508273601532, "learning_rate": 1.4502402646615038e-06, "loss": 0.0175, "step": 191850 }, { "epoch": 1.5523909701432155, "grad_norm": 0.517382800579071, "learning_rate": 1.4497430315357191e-06, "loss": 0.0253, "step": 191860 }, { "epoch": 1.5524718828384172, "grad_norm": 0.5823975205421448, "learning_rate": 1.4492458692123112e-06, "loss": 0.0184, "step": 191870 }, { "epoch": 1.5525527955336194, "grad_norm": 0.27388685941696167, "learning_rate": 1.4487487777012006e-06, "loss": 0.0308, "step": 191880 }, { "epoch": 1.552633708228821, "grad_norm": 0.4177039861679077, "learning_rate": 1.4482517570122983e-06, "loss": 0.017, "step": 191890 }, { "epoch": 1.552714620924023, "grad_norm": 0.35207927227020264, "learning_rate": 1.447754807155517e-06, "loss": 0.0116, "step": 191900 }, { "epoch": 1.552795533619225, "grad_norm": 0.385959655046463, "learning_rate": 1.4472579281407667e-06, "loss": 0.0196, "step": 191910 }, { "epoch": 1.5528764463144267, "grad_norm": 0.738366961479187, "learning_rate": 1.4467611199779569e-06, "loss": 0.0258, "step": 191920 }, { "epoch": 1.5529573590096286, "grad_norm": 0.24918188154697418, "learning_rate": 1.446264382676995e-06, "loss": 0.0184, "step": 191930 }, { "epoch": 1.5530382717048306, "grad_norm": 0.3085488975048065, "learning_rate": 1.4457677162477885e-06, "loss": 0.0196, "step": 191940 }, { "epoch": 1.5531191844000323, "grad_norm": 0.28993695974349976, "learning_rate": 1.4452711207002407e-06, "loss": 0.0209, "step": 191950 }, { "epoch": 1.5532000970952342, "grad_norm": 0.41137784719467163, "learning_rate": 1.4447745960442567e-06, "loss": 0.0158, "step": 191960 }, { "epoch": 1.5532810097904362, "grad_norm": 0.9047826528549194, "learning_rate": 1.4442781422897374e-06, "loss": 0.0165, "step": 191970 }, { "epoch": 1.5533619224856379, "grad_norm": 0.5894948840141296, "learning_rate": 1.4437817594465846e-06, "loss": 0.0207, "step": 191980 }, { "epoch": 1.55344283518084, "grad_norm": 0.38092470169067383, "learning_rate": 1.4432854475246965e-06, "loss": 0.0157, "step": 191990 }, { "epoch": 1.5535237478760417, "grad_norm": 0.33000800013542175, "learning_rate": 1.4427892065339721e-06, "loss": 0.0105, "step": 192000 }, { "epoch": 1.5536046605712435, "grad_norm": 0.3573591411113739, "learning_rate": 1.4422930364843075e-06, "loss": 0.0142, "step": 192010 }, { "epoch": 1.5536855732664456, "grad_norm": 0.43271759152412415, "learning_rate": 1.4417969373855973e-06, "loss": 0.0153, "step": 192020 }, { "epoch": 1.5537664859616473, "grad_norm": 0.36377477645874023, "learning_rate": 1.4413009092477337e-06, "loss": 0.0181, "step": 192030 }, { "epoch": 1.5538473986568493, "grad_norm": 0.3820253610610962, "learning_rate": 1.4408049520806128e-06, "loss": 0.0276, "step": 192040 }, { "epoch": 1.5539283113520512, "grad_norm": 0.5567811131477356, "learning_rate": 1.440309065894125e-06, "loss": 0.0167, "step": 192050 }, { "epoch": 1.554009224047253, "grad_norm": 0.4935010075569153, "learning_rate": 1.4398132506981544e-06, "loss": 0.0229, "step": 192060 }, { "epoch": 1.5540901367424549, "grad_norm": 0.2042941153049469, "learning_rate": 1.4393175065025943e-06, "loss": 0.0202, "step": 192070 }, { "epoch": 1.5541710494376568, "grad_norm": 0.2407694011926651, "learning_rate": 1.4388218333173299e-06, "loss": 0.0111, "step": 192080 }, { "epoch": 1.5542519621328585, "grad_norm": 0.25116831064224243, "learning_rate": 1.4383262311522455e-06, "loss": 0.0183, "step": 192090 }, { "epoch": 1.5543328748280605, "grad_norm": 0.2255123257637024, "learning_rate": 1.437830700017226e-06, "loss": 0.0281, "step": 192100 }, { "epoch": 1.5544137875232624, "grad_norm": 0.3074731230735779, "learning_rate": 1.4373352399221525e-06, "loss": 0.014, "step": 192110 }, { "epoch": 1.5544947002184641, "grad_norm": 0.2635669410228729, "learning_rate": 1.436839850876907e-06, "loss": 0.0228, "step": 192120 }, { "epoch": 1.5545756129136663, "grad_norm": 0.3565923571586609, "learning_rate": 1.4363445328913682e-06, "loss": 0.0191, "step": 192130 }, { "epoch": 1.554656525608868, "grad_norm": 0.33882638812065125, "learning_rate": 1.4358492859754148e-06, "loss": 0.0195, "step": 192140 }, { "epoch": 1.55473743830407, "grad_norm": 0.3160227835178375, "learning_rate": 1.435354110138923e-06, "loss": 0.0143, "step": 192150 }, { "epoch": 1.554818350999272, "grad_norm": 0.2892349362373352, "learning_rate": 1.4348590053917683e-06, "loss": 0.0209, "step": 192160 }, { "epoch": 1.5548992636944736, "grad_norm": 0.03731481730937958, "learning_rate": 1.434363971743824e-06, "loss": 0.0126, "step": 192170 }, { "epoch": 1.5549801763896756, "grad_norm": 0.2904910743236542, "learning_rate": 1.4338690092049634e-06, "loss": 0.0141, "step": 192180 }, { "epoch": 1.5550610890848775, "grad_norm": 0.37145692110061646, "learning_rate": 1.4333741177850563e-06, "loss": 0.0178, "step": 192190 }, { "epoch": 1.5551420017800792, "grad_norm": 0.5604051947593689, "learning_rate": 1.4328792974939738e-06, "loss": 0.0203, "step": 192200 }, { "epoch": 1.5552229144752812, "grad_norm": 0.22989821434020996, "learning_rate": 1.4323845483415827e-06, "loss": 0.0416, "step": 192210 }, { "epoch": 1.555303827170483, "grad_norm": 0.24284230172634125, "learning_rate": 1.4318898703377488e-06, "loss": 0.0175, "step": 192220 }, { "epoch": 1.5553847398656848, "grad_norm": 0.5341348052024841, "learning_rate": 1.43139526349234e-06, "loss": 0.0234, "step": 192230 }, { "epoch": 1.5554656525608868, "grad_norm": 0.18752442300319672, "learning_rate": 1.4309007278152208e-06, "loss": 0.031, "step": 192240 }, { "epoch": 1.5555465652560887, "grad_norm": 0.34763103723526, "learning_rate": 1.4304062633162491e-06, "loss": 0.0106, "step": 192250 }, { "epoch": 1.5556274779512904, "grad_norm": 0.34746354818344116, "learning_rate": 1.42991187000529e-06, "loss": 0.0171, "step": 192260 }, { "epoch": 1.5557083906464926, "grad_norm": 0.3562268912792206, "learning_rate": 1.429417547892203e-06, "loss": 0.0149, "step": 192270 }, { "epoch": 1.5557893033416943, "grad_norm": 0.371080219745636, "learning_rate": 1.4289232969868427e-06, "loss": 0.0172, "step": 192280 }, { "epoch": 1.5558702160368962, "grad_norm": 0.5218178629875183, "learning_rate": 1.4284291172990693e-06, "loss": 0.014, "step": 192290 }, { "epoch": 1.5559511287320982, "grad_norm": 0.5419762134552002, "learning_rate": 1.4279350088387378e-06, "loss": 0.0178, "step": 192300 }, { "epoch": 1.5560320414273, "grad_norm": 0.4022580087184906, "learning_rate": 1.4274409716157016e-06, "loss": 0.0129, "step": 192310 }, { "epoch": 1.5561129541225018, "grad_norm": 0.2357722818851471, "learning_rate": 1.4269470056398133e-06, "loss": 0.0151, "step": 192320 }, { "epoch": 1.5561938668177038, "grad_norm": 0.49879196286201477, "learning_rate": 1.4264531109209234e-06, "loss": 0.0214, "step": 192330 }, { "epoch": 1.5562747795129055, "grad_norm": 0.32462507486343384, "learning_rate": 1.425959287468882e-06, "loss": 0.0223, "step": 192340 }, { "epoch": 1.5563556922081074, "grad_norm": 0.6308854222297668, "learning_rate": 1.4254655352935382e-06, "loss": 0.0256, "step": 192350 }, { "epoch": 1.5564366049033094, "grad_norm": 0.5459569096565247, "learning_rate": 1.4249718544047374e-06, "loss": 0.0234, "step": 192360 }, { "epoch": 1.556517517598511, "grad_norm": 0.3920758366584778, "learning_rate": 1.4244782448123261e-06, "loss": 0.0143, "step": 192370 }, { "epoch": 1.556598430293713, "grad_norm": 0.7265533804893494, "learning_rate": 1.4239847065261485e-06, "loss": 0.027, "step": 192380 }, { "epoch": 1.556679342988915, "grad_norm": 0.21962414681911469, "learning_rate": 1.4234912395560446e-06, "loss": 0.0155, "step": 192390 }, { "epoch": 1.5567602556841167, "grad_norm": 0.46702519059181213, "learning_rate": 1.4229978439118603e-06, "loss": 0.0291, "step": 192400 }, { "epoch": 1.5568411683793189, "grad_norm": 0.30555459856987, "learning_rate": 1.4225045196034315e-06, "loss": 0.0204, "step": 192410 }, { "epoch": 1.5569220810745206, "grad_norm": 0.2150389850139618, "learning_rate": 1.422011266640596e-06, "loss": 0.0199, "step": 192420 }, { "epoch": 1.5570029937697225, "grad_norm": 0.5979526042938232, "learning_rate": 1.4215180850331956e-06, "loss": 0.0262, "step": 192430 }, { "epoch": 1.5570839064649244, "grad_norm": 0.3726496398448944, "learning_rate": 1.4210249747910587e-06, "loss": 0.0152, "step": 192440 }, { "epoch": 1.5571648191601262, "grad_norm": 0.6109780073165894, "learning_rate": 1.4205319359240254e-06, "loss": 0.0161, "step": 192450 }, { "epoch": 1.557245731855328, "grad_norm": 0.42904728651046753, "learning_rate": 1.4200389684419274e-06, "loss": 0.0194, "step": 192460 }, { "epoch": 1.55732664455053, "grad_norm": 0.3678138256072998, "learning_rate": 1.4195460723545913e-06, "loss": 0.0185, "step": 192470 }, { "epoch": 1.5574075572457318, "grad_norm": 0.3845634460449219, "learning_rate": 1.4190532476718515e-06, "loss": 0.0158, "step": 192480 }, { "epoch": 1.5574884699409337, "grad_norm": 0.2803228199481964, "learning_rate": 1.418560494403537e-06, "loss": 0.0439, "step": 192490 }, { "epoch": 1.5575693826361356, "grad_norm": 0.63446444272995, "learning_rate": 1.4180678125594694e-06, "loss": 0.0242, "step": 192500 }, { "epoch": 1.5576502953313374, "grad_norm": 0.47174763679504395, "learning_rate": 1.4175752021494787e-06, "loss": 0.0226, "step": 192510 }, { "epoch": 1.5577312080265395, "grad_norm": 0.8425513505935669, "learning_rate": 1.4170826631833884e-06, "loss": 0.0237, "step": 192520 }, { "epoch": 1.5578121207217412, "grad_norm": 0.6100754141807556, "learning_rate": 1.41659019567102e-06, "loss": 0.0281, "step": 192530 }, { "epoch": 1.557893033416943, "grad_norm": 0.35455965995788574, "learning_rate": 1.4160977996221953e-06, "loss": 0.0152, "step": 192540 }, { "epoch": 1.5579739461121451, "grad_norm": 0.3402650058269501, "learning_rate": 1.415605475046734e-06, "loss": 0.025, "step": 192550 }, { "epoch": 1.5580548588073468, "grad_norm": 0.6965414881706238, "learning_rate": 1.415113221954455e-06, "loss": 0.0296, "step": 192560 }, { "epoch": 1.5581357715025488, "grad_norm": 0.37497642636299133, "learning_rate": 1.4146210403551741e-06, "loss": 0.0146, "step": 192570 }, { "epoch": 1.5582166841977507, "grad_norm": 0.4194571077823639, "learning_rate": 1.4141289302587064e-06, "loss": 0.024, "step": 192580 }, { "epoch": 1.5582975968929524, "grad_norm": 0.7883598208427429, "learning_rate": 1.41363689167487e-06, "loss": 0.02, "step": 192590 }, { "epoch": 1.5583785095881544, "grad_norm": 0.19432343542575836, "learning_rate": 1.4131449246134727e-06, "loss": 0.0234, "step": 192600 }, { "epoch": 1.5584594222833563, "grad_norm": 0.20109237730503082, "learning_rate": 1.4126530290843266e-06, "loss": 0.0179, "step": 192610 }, { "epoch": 1.558540334978558, "grad_norm": 0.33203551173210144, "learning_rate": 1.4121612050972455e-06, "loss": 0.0175, "step": 192620 }, { "epoch": 1.55862124767376, "grad_norm": 0.3034682869911194, "learning_rate": 1.4116694526620322e-06, "loss": 0.0186, "step": 192630 }, { "epoch": 1.558702160368962, "grad_norm": 0.460679829120636, "learning_rate": 1.4111777717884978e-06, "loss": 0.0173, "step": 192640 }, { "epoch": 1.5587830730641636, "grad_norm": 0.06283417344093323, "learning_rate": 1.4106861624864477e-06, "loss": 0.0136, "step": 192650 }, { "epoch": 1.5588639857593658, "grad_norm": 0.18765074014663696, "learning_rate": 1.4101946247656816e-06, "loss": 0.0133, "step": 192660 }, { "epoch": 1.5589448984545675, "grad_norm": 0.20029494166374207, "learning_rate": 1.4097031586360072e-06, "loss": 0.0193, "step": 192670 }, { "epoch": 1.5590258111497692, "grad_norm": 0.34928449988365173, "learning_rate": 1.409211764107225e-06, "loss": 0.0279, "step": 192680 }, { "epoch": 1.5591067238449714, "grad_norm": 0.43148234486579895, "learning_rate": 1.408720441189131e-06, "loss": 0.017, "step": 192690 }, { "epoch": 1.5591876365401731, "grad_norm": 0.24138645827770233, "learning_rate": 1.4082291898915274e-06, "loss": 0.0151, "step": 192700 }, { "epoch": 1.559268549235375, "grad_norm": 0.556932270526886, "learning_rate": 1.40773801022421e-06, "loss": 0.0228, "step": 192710 }, { "epoch": 1.559349461930577, "grad_norm": 0.7295939326286316, "learning_rate": 1.4072469021969743e-06, "loss": 0.0262, "step": 192720 }, { "epoch": 1.5594303746257787, "grad_norm": 0.3120153248310089, "learning_rate": 1.406755865819615e-06, "loss": 0.0132, "step": 192730 }, { "epoch": 1.5595112873209807, "grad_norm": 0.237221822142601, "learning_rate": 1.4062649011019241e-06, "loss": 0.0084, "step": 192740 }, { "epoch": 1.5595922000161826, "grad_norm": 0.4838283061981201, "learning_rate": 1.405774008053693e-06, "loss": 0.0277, "step": 192750 }, { "epoch": 1.5596731127113843, "grad_norm": 0.5513643026351929, "learning_rate": 1.4052831866847117e-06, "loss": 0.0215, "step": 192760 }, { "epoch": 1.5597540254065863, "grad_norm": 0.29289060831069946, "learning_rate": 1.4047924370047665e-06, "loss": 0.0129, "step": 192770 }, { "epoch": 1.5598349381017882, "grad_norm": 0.4288678467273712, "learning_rate": 1.4043017590236496e-06, "loss": 0.018, "step": 192780 }, { "epoch": 1.55991585079699, "grad_norm": 0.3082481622695923, "learning_rate": 1.4038111527511416e-06, "loss": 0.0275, "step": 192790 }, { "epoch": 1.559996763492192, "grad_norm": 0.42869487404823303, "learning_rate": 1.403320618197027e-06, "loss": 0.0217, "step": 192800 }, { "epoch": 1.5600776761873938, "grad_norm": 0.38722702860832214, "learning_rate": 1.4028301553710933e-06, "loss": 0.0124, "step": 192810 }, { "epoch": 1.5601585888825957, "grad_norm": 0.2993360757827759, "learning_rate": 1.402339764283116e-06, "loss": 0.0241, "step": 192820 }, { "epoch": 1.5602395015777977, "grad_norm": 0.30925846099853516, "learning_rate": 1.4018494449428754e-06, "loss": 0.0105, "step": 192830 }, { "epoch": 1.5603204142729994, "grad_norm": 0.12248393893241882, "learning_rate": 1.4013591973601548e-06, "loss": 0.0257, "step": 192840 }, { "epoch": 1.5604013269682013, "grad_norm": 0.46570029854774475, "learning_rate": 1.4008690215447252e-06, "loss": 0.0372, "step": 192850 }, { "epoch": 1.5604822396634033, "grad_norm": 0.0049645500257611275, "learning_rate": 1.4003789175063659e-06, "loss": 0.0166, "step": 192860 }, { "epoch": 1.560563152358605, "grad_norm": 0.36131876707077026, "learning_rate": 1.399888885254852e-06, "loss": 0.0108, "step": 192870 }, { "epoch": 1.560644065053807, "grad_norm": 0.9818131327629089, "learning_rate": 1.3993989247999506e-06, "loss": 0.0205, "step": 192880 }, { "epoch": 1.5607249777490089, "grad_norm": 0.5545316338539124, "learning_rate": 1.3989090361514385e-06, "loss": 0.0188, "step": 192890 }, { "epoch": 1.5608058904442106, "grad_norm": 0.26261138916015625, "learning_rate": 1.398419219319085e-06, "loss": 0.0222, "step": 192900 }, { "epoch": 1.5608868031394125, "grad_norm": 0.4136073589324951, "learning_rate": 1.3979294743126532e-06, "loss": 0.0212, "step": 192910 }, { "epoch": 1.5609677158346145, "grad_norm": 0.37805917859077454, "learning_rate": 1.3974398011419155e-06, "loss": 0.0171, "step": 192920 }, { "epoch": 1.5610486285298162, "grad_norm": 0.31796297430992126, "learning_rate": 1.3969501998166357e-06, "loss": 0.0165, "step": 192930 }, { "epoch": 1.5611295412250183, "grad_norm": 0.09167615324258804, "learning_rate": 1.3964606703465777e-06, "loss": 0.0105, "step": 192940 }, { "epoch": 1.56121045392022, "grad_norm": 0.3902861177921295, "learning_rate": 1.3959712127415043e-06, "loss": 0.0307, "step": 192950 }, { "epoch": 1.561291366615422, "grad_norm": 0.6062818765640259, "learning_rate": 1.3954818270111765e-06, "loss": 0.0197, "step": 192960 }, { "epoch": 1.561372279310624, "grad_norm": 0.2688486874103546, "learning_rate": 1.3949925131653542e-06, "loss": 0.0175, "step": 192970 }, { "epoch": 1.5614531920058257, "grad_norm": 0.33909574151039124, "learning_rate": 1.3945032712137963e-06, "loss": 0.0212, "step": 192980 }, { "epoch": 1.5615341047010276, "grad_norm": 0.3995663523674011, "learning_rate": 1.3940141011662566e-06, "loss": 0.0159, "step": 192990 }, { "epoch": 1.5616150173962295, "grad_norm": 0.735305905342102, "learning_rate": 1.393525003032497e-06, "loss": 0.0204, "step": 193000 }, { "epoch": 1.5616959300914313, "grad_norm": 0.3970178961753845, "learning_rate": 1.3930359768222656e-06, "loss": 0.0187, "step": 193010 }, { "epoch": 1.5617768427866332, "grad_norm": 0.5563597083091736, "learning_rate": 1.3925470225453153e-06, "loss": 0.022, "step": 193020 }, { "epoch": 1.5618577554818351, "grad_norm": 0.264557808637619, "learning_rate": 1.3920581402114024e-06, "loss": 0.0294, "step": 193030 }, { "epoch": 1.5619386681770369, "grad_norm": 0.5112789273262024, "learning_rate": 1.3915693298302718e-06, "loss": 0.016, "step": 193040 }, { "epoch": 1.5620195808722388, "grad_norm": 0.16900750994682312, "learning_rate": 1.3910805914116715e-06, "loss": 0.0204, "step": 193050 }, { "epoch": 1.5621004935674407, "grad_norm": 0.4755719304084778, "learning_rate": 1.3905919249653527e-06, "loss": 0.0187, "step": 193060 }, { "epoch": 1.5621814062626425, "grad_norm": 0.3882889151573181, "learning_rate": 1.3901033305010552e-06, "loss": 0.0256, "step": 193070 }, { "epoch": 1.5622623189578446, "grad_norm": 0.3034636974334717, "learning_rate": 1.3896148080285282e-06, "loss": 0.0216, "step": 193080 }, { "epoch": 1.5623432316530463, "grad_norm": 0.15331090986728668, "learning_rate": 1.3891263575575126e-06, "loss": 0.0144, "step": 193090 }, { "epoch": 1.5624241443482483, "grad_norm": 0.6234062910079956, "learning_rate": 1.3886379790977467e-06, "loss": 0.019, "step": 193100 }, { "epoch": 1.5625050570434502, "grad_norm": 0.2760421335697174, "learning_rate": 1.3881496726589733e-06, "loss": 0.0196, "step": 193110 }, { "epoch": 1.562585969738652, "grad_norm": 0.23246534168720245, "learning_rate": 1.3876614382509296e-06, "loss": 0.0144, "step": 193120 }, { "epoch": 1.5626668824338539, "grad_norm": 0.730359673500061, "learning_rate": 1.3871732758833528e-06, "loss": 0.0204, "step": 193130 }, { "epoch": 1.5627477951290558, "grad_norm": 0.8500255942344666, "learning_rate": 1.3866851855659785e-06, "loss": 0.0203, "step": 193140 }, { "epoch": 1.5628287078242575, "grad_norm": 0.4344070255756378, "learning_rate": 1.3861971673085395e-06, "loss": 0.0121, "step": 193150 }, { "epoch": 1.5629096205194595, "grad_norm": 0.23056256771087646, "learning_rate": 1.385709221120769e-06, "loss": 0.0151, "step": 193160 }, { "epoch": 1.5629905332146614, "grad_norm": 0.34576818346977234, "learning_rate": 1.3852213470123982e-06, "loss": 0.0148, "step": 193170 }, { "epoch": 1.5630714459098631, "grad_norm": 0.0658554956316948, "learning_rate": 1.3847335449931548e-06, "loss": 0.0121, "step": 193180 }, { "epoch": 1.5631523586050653, "grad_norm": 0.2641107439994812, "learning_rate": 1.3842458150727717e-06, "loss": 0.0182, "step": 193190 }, { "epoch": 1.563233271300267, "grad_norm": 0.6342376470565796, "learning_rate": 1.383758157260971e-06, "loss": 0.0151, "step": 193200 }, { "epoch": 1.5633141839954687, "grad_norm": 0.5917468070983887, "learning_rate": 1.3832705715674782e-06, "loss": 0.0247, "step": 193210 }, { "epoch": 1.563395096690671, "grad_norm": 0.27763134241104126, "learning_rate": 1.3827830580020217e-06, "loss": 0.0101, "step": 193220 }, { "epoch": 1.5634760093858726, "grad_norm": 0.18566732108592987, "learning_rate": 1.3822956165743195e-06, "loss": 0.0198, "step": 193230 }, { "epoch": 1.5635569220810746, "grad_norm": 0.32924556732177734, "learning_rate": 1.381808247294092e-06, "loss": 0.0191, "step": 193240 }, { "epoch": 1.5636378347762765, "grad_norm": 0.14263476431369781, "learning_rate": 1.381320950171064e-06, "loss": 0.0079, "step": 193250 }, { "epoch": 1.5637187474714782, "grad_norm": 0.5019482970237732, "learning_rate": 1.3808337252149473e-06, "loss": 0.0195, "step": 193260 }, { "epoch": 1.5637996601666801, "grad_norm": 0.6311221122741699, "learning_rate": 1.3803465724354625e-06, "loss": 0.0311, "step": 193270 }, { "epoch": 1.563880572861882, "grad_norm": 0.02020823396742344, "learning_rate": 1.3798594918423264e-06, "loss": 0.0153, "step": 193280 }, { "epoch": 1.5639614855570838, "grad_norm": 0.3956589996814728, "learning_rate": 1.3793724834452466e-06, "loss": 0.018, "step": 193290 }, { "epoch": 1.5640423982522857, "grad_norm": 0.09369300305843353, "learning_rate": 1.3788855472539403e-06, "loss": 0.0105, "step": 193300 }, { "epoch": 1.5641233109474877, "grad_norm": 0.3482847511768341, "learning_rate": 1.3783986832781178e-06, "loss": 0.0257, "step": 193310 }, { "epoch": 1.5642042236426894, "grad_norm": 0.4733000695705414, "learning_rate": 1.377911891527488e-06, "loss": 0.0225, "step": 193320 }, { "epoch": 1.5642851363378916, "grad_norm": 0.29233574867248535, "learning_rate": 1.3774251720117587e-06, "loss": 0.0197, "step": 193330 }, { "epoch": 1.5643660490330933, "grad_norm": 0.30075645446777344, "learning_rate": 1.376938524740637e-06, "loss": 0.0159, "step": 193340 }, { "epoch": 1.564446961728295, "grad_norm": 0.3107820749282837, "learning_rate": 1.3764519497238277e-06, "loss": 0.0194, "step": 193350 }, { "epoch": 1.5645278744234972, "grad_norm": 0.572289228439331, "learning_rate": 1.3759654469710344e-06, "loss": 0.0212, "step": 193360 }, { "epoch": 1.5646087871186989, "grad_norm": 0.5032978057861328, "learning_rate": 1.3754790164919602e-06, "loss": 0.0164, "step": 193370 }, { "epoch": 1.5646896998139008, "grad_norm": 0.15555953979492188, "learning_rate": 1.3749926582963046e-06, "loss": 0.0237, "step": 193380 }, { "epoch": 1.5647706125091028, "grad_norm": 0.22325751185417175, "learning_rate": 1.374506372393768e-06, "loss": 0.0159, "step": 193390 }, { "epoch": 1.5648515252043045, "grad_norm": 0.6660409569740295, "learning_rate": 1.3740201587940465e-06, "loss": 0.0235, "step": 193400 }, { "epoch": 1.5649324378995064, "grad_norm": 0.32789909839630127, "learning_rate": 1.3735340175068413e-06, "loss": 0.0174, "step": 193410 }, { "epoch": 1.5650133505947084, "grad_norm": 0.417426198720932, "learning_rate": 1.3730479485418424e-06, "loss": 0.0185, "step": 193420 }, { "epoch": 1.56509426328991, "grad_norm": 0.45995399355888367, "learning_rate": 1.3725619519087447e-06, "loss": 0.0249, "step": 193430 }, { "epoch": 1.565175175985112, "grad_norm": 0.5772237777709961, "learning_rate": 1.3720760276172434e-06, "loss": 0.0221, "step": 193440 }, { "epoch": 1.565256088680314, "grad_norm": 0.14572106301784515, "learning_rate": 1.371590175677025e-06, "loss": 0.0232, "step": 193450 }, { "epoch": 1.5653370013755157, "grad_norm": 0.5081575512886047, "learning_rate": 1.37110439609778e-06, "loss": 0.0242, "step": 193460 }, { "epoch": 1.5654179140707178, "grad_norm": 0.2476973533630371, "learning_rate": 1.3706186888891992e-06, "loss": 0.0125, "step": 193470 }, { "epoch": 1.5654988267659196, "grad_norm": 0.2683389186859131, "learning_rate": 1.3701330540609636e-06, "loss": 0.0219, "step": 193480 }, { "epoch": 1.5655797394611215, "grad_norm": 0.24329619109630585, "learning_rate": 1.3696474916227632e-06, "loss": 0.0134, "step": 193490 }, { "epoch": 1.5656606521563234, "grad_norm": 0.5044171810150146, "learning_rate": 1.3691620015842794e-06, "loss": 0.0124, "step": 193500 }, { "epoch": 1.5657415648515252, "grad_norm": 0.4700436294078827, "learning_rate": 1.3686765839551946e-06, "loss": 0.0266, "step": 193510 }, { "epoch": 1.565822477546727, "grad_norm": 0.23864933848381042, "learning_rate": 1.368191238745189e-06, "loss": 0.0299, "step": 193520 }, { "epoch": 1.565903390241929, "grad_norm": 0.9332107901573181, "learning_rate": 1.367705965963942e-06, "loss": 0.0252, "step": 193530 }, { "epoch": 1.5659843029371308, "grad_norm": 0.3333747386932373, "learning_rate": 1.3672207656211316e-06, "loss": 0.0195, "step": 193540 }, { "epoch": 1.5660652156323327, "grad_norm": 0.5641093850135803, "learning_rate": 1.366735637726434e-06, "loss": 0.0233, "step": 193550 }, { "epoch": 1.5661461283275346, "grad_norm": 0.6361833214759827, "learning_rate": 1.366250582289524e-06, "loss": 0.0165, "step": 193560 }, { "epoch": 1.5662270410227364, "grad_norm": 0.4216761887073517, "learning_rate": 1.3657655993200746e-06, "loss": 0.0127, "step": 193570 }, { "epoch": 1.5663079537179383, "grad_norm": 0.4012928903102875, "learning_rate": 1.3652806888277582e-06, "loss": 0.023, "step": 193580 }, { "epoch": 1.5663888664131402, "grad_norm": 0.09211508184671402, "learning_rate": 1.3647958508222454e-06, "loss": 0.0201, "step": 193590 }, { "epoch": 1.566469779108342, "grad_norm": 0.4297579824924469, "learning_rate": 1.3643110853132047e-06, "loss": 0.0117, "step": 193600 }, { "epoch": 1.5665506918035441, "grad_norm": 0.22581809759140015, "learning_rate": 1.3638263923103046e-06, "loss": 0.0144, "step": 193610 }, { "epoch": 1.5666316044987458, "grad_norm": 0.1636747419834137, "learning_rate": 1.363341771823209e-06, "loss": 0.0196, "step": 193620 }, { "epoch": 1.5667125171939478, "grad_norm": 0.3456317186355591, "learning_rate": 1.3628572238615878e-06, "loss": 0.0127, "step": 193630 }, { "epoch": 1.5667934298891497, "grad_norm": 0.7500210404396057, "learning_rate": 1.3623727484350991e-06, "loss": 0.0318, "step": 193640 }, { "epoch": 1.5668743425843514, "grad_norm": 0.7986199855804443, "learning_rate": 1.361888345553405e-06, "loss": 0.0229, "step": 193650 }, { "epoch": 1.5669552552795534, "grad_norm": 0.3144749104976654, "learning_rate": 1.3614040152261693e-06, "loss": 0.0152, "step": 193660 }, { "epoch": 1.5670361679747553, "grad_norm": 0.5344556570053101, "learning_rate": 1.360919757463049e-06, "loss": 0.0187, "step": 193670 }, { "epoch": 1.567117080669957, "grad_norm": 0.5120384097099304, "learning_rate": 1.3604355722737011e-06, "loss": 0.0208, "step": 193680 }, { "epoch": 1.567197993365159, "grad_norm": 0.3248711824417114, "learning_rate": 1.359951459667783e-06, "loss": 0.0247, "step": 193690 }, { "epoch": 1.567278906060361, "grad_norm": 0.7241166234016418, "learning_rate": 1.3594674196549485e-06, "loss": 0.0211, "step": 193700 }, { "epoch": 1.5673598187555626, "grad_norm": 0.18964843451976776, "learning_rate": 1.3589834522448503e-06, "loss": 0.0219, "step": 193710 }, { "epoch": 1.5674407314507648, "grad_norm": 0.45031067728996277, "learning_rate": 1.358499557447141e-06, "loss": 0.0184, "step": 193720 }, { "epoch": 1.5675216441459665, "grad_norm": 0.5092394351959229, "learning_rate": 1.3580157352714702e-06, "loss": 0.0197, "step": 193730 }, { "epoch": 1.5676025568411682, "grad_norm": 0.4702492356300354, "learning_rate": 1.3575319857274872e-06, "loss": 0.0249, "step": 193740 }, { "epoch": 1.5676834695363704, "grad_norm": 0.25305479764938354, "learning_rate": 1.3570483088248392e-06, "loss": 0.0179, "step": 193750 }, { "epoch": 1.567764382231572, "grad_norm": 0.5094743967056274, "learning_rate": 1.3565647045731723e-06, "loss": 0.0324, "step": 193760 }, { "epoch": 1.567845294926774, "grad_norm": 0.3364826738834381, "learning_rate": 1.3560811729821306e-06, "loss": 0.021, "step": 193770 }, { "epoch": 1.567926207621976, "grad_norm": 0.3556925356388092, "learning_rate": 1.355597714061357e-06, "loss": 0.0203, "step": 193780 }, { "epoch": 1.5680071203171777, "grad_norm": 0.7323254942893982, "learning_rate": 1.3551143278204932e-06, "loss": 0.0298, "step": 193790 }, { "epoch": 1.5680880330123796, "grad_norm": 0.09436865150928497, "learning_rate": 1.3546310142691798e-06, "loss": 0.0263, "step": 193800 }, { "epoch": 1.5681689457075816, "grad_norm": 0.18020157516002655, "learning_rate": 1.3541477734170532e-06, "loss": 0.0172, "step": 193810 }, { "epoch": 1.5682498584027833, "grad_norm": 0.5216124653816223, "learning_rate": 1.3536646052737556e-06, "loss": 0.0163, "step": 193820 }, { "epoch": 1.5683307710979852, "grad_norm": 0.3548571467399597, "learning_rate": 1.3531815098489186e-06, "loss": 0.0194, "step": 193830 }, { "epoch": 1.5684116837931872, "grad_norm": 0.06895329058170319, "learning_rate": 1.352698487152176e-06, "loss": 0.019, "step": 193840 }, { "epoch": 1.568492596488389, "grad_norm": 0.5106825232505798, "learning_rate": 1.3522155371931634e-06, "loss": 0.0203, "step": 193850 }, { "epoch": 1.568573509183591, "grad_norm": 0.37423989176750183, "learning_rate": 1.3517326599815129e-06, "loss": 0.0151, "step": 193860 }, { "epoch": 1.5686544218787928, "grad_norm": 0.5368314981460571, "learning_rate": 1.35124985552685e-06, "loss": 0.0141, "step": 193870 }, { "epoch": 1.5687353345739945, "grad_norm": 0.19706176221370697, "learning_rate": 1.350767123838807e-06, "loss": 0.0223, "step": 193880 }, { "epoch": 1.5688162472691967, "grad_norm": 0.1815885603427887, "learning_rate": 1.3502844649270098e-06, "loss": 0.0153, "step": 193890 }, { "epoch": 1.5688971599643984, "grad_norm": 0.6603992581367493, "learning_rate": 1.3498018788010842e-06, "loss": 0.0237, "step": 193900 }, { "epoch": 1.5689780726596003, "grad_norm": 0.2128487229347229, "learning_rate": 1.3493193654706538e-06, "loss": 0.0118, "step": 193910 }, { "epoch": 1.5690589853548023, "grad_norm": 0.30337241291999817, "learning_rate": 1.348836924945342e-06, "loss": 0.0229, "step": 193920 }, { "epoch": 1.569139898050004, "grad_norm": 0.48598241806030273, "learning_rate": 1.34835455723477e-06, "loss": 0.0169, "step": 193930 }, { "epoch": 1.569220810745206, "grad_norm": 0.46998730301856995, "learning_rate": 1.3478722623485575e-06, "loss": 0.0277, "step": 193940 }, { "epoch": 1.5693017234404079, "grad_norm": 0.38342779874801636, "learning_rate": 1.3473900402963219e-06, "loss": 0.0152, "step": 193950 }, { "epoch": 1.5693826361356096, "grad_norm": 0.6578203439712524, "learning_rate": 1.3469078910876816e-06, "loss": 0.0309, "step": 193960 }, { "epoch": 1.5694635488308115, "grad_norm": 0.3624677360057831, "learning_rate": 1.3464258147322513e-06, "loss": 0.0157, "step": 193970 }, { "epoch": 1.5695444615260135, "grad_norm": 0.5556983351707458, "learning_rate": 1.345943811239645e-06, "loss": 0.0263, "step": 193980 }, { "epoch": 1.5696253742212152, "grad_norm": 0.02668745256960392, "learning_rate": 1.3454618806194757e-06, "loss": 0.016, "step": 193990 }, { "epoch": 1.5697062869164173, "grad_norm": 0.35993340611457825, "learning_rate": 1.3449800228813537e-06, "loss": 0.0201, "step": 194000 }, { "epoch": 1.569787199611619, "grad_norm": 0.2901374399662018, "learning_rate": 1.3444982380348893e-06, "loss": 0.0147, "step": 194010 }, { "epoch": 1.569868112306821, "grad_norm": 0.5026450157165527, "learning_rate": 1.3440165260896904e-06, "loss": 0.0216, "step": 194020 }, { "epoch": 1.569949025002023, "grad_norm": 0.3753160536289215, "learning_rate": 1.3435348870553622e-06, "loss": 0.0316, "step": 194030 }, { "epoch": 1.5700299376972247, "grad_norm": 0.5477102994918823, "learning_rate": 1.343053320941513e-06, "loss": 0.0172, "step": 194040 }, { "epoch": 1.5701108503924266, "grad_norm": 0.38995760679244995, "learning_rate": 1.3425718277577466e-06, "loss": 0.0203, "step": 194050 }, { "epoch": 1.5701917630876285, "grad_norm": 0.3097293972969055, "learning_rate": 1.342090407513661e-06, "loss": 0.0178, "step": 194060 }, { "epoch": 1.5702726757828303, "grad_norm": 0.1317460536956787, "learning_rate": 1.3416090602188615e-06, "loss": 0.0118, "step": 194070 }, { "epoch": 1.5703535884780322, "grad_norm": 0.3659115731716156, "learning_rate": 1.3411277858829474e-06, "loss": 0.0171, "step": 194080 }, { "epoch": 1.5704345011732341, "grad_norm": 0.5223232507705688, "learning_rate": 1.340646584515513e-06, "loss": 0.0336, "step": 194090 }, { "epoch": 1.5705154138684359, "grad_norm": 0.5250966548919678, "learning_rate": 1.3401654561261585e-06, "loss": 0.018, "step": 194100 }, { "epoch": 1.5705963265636378, "grad_norm": 0.3544439375400543, "learning_rate": 1.3396844007244775e-06, "loss": 0.0212, "step": 194110 }, { "epoch": 1.5706772392588397, "grad_norm": 0.523173451423645, "learning_rate": 1.3392034183200636e-06, "loss": 0.0189, "step": 194120 }, { "epoch": 1.5707581519540414, "grad_norm": 0.23738063871860504, "learning_rate": 1.3387225089225098e-06, "loss": 0.0088, "step": 194130 }, { "epoch": 1.5708390646492436, "grad_norm": 0.6625652313232422, "learning_rate": 1.338241672541406e-06, "loss": 0.0218, "step": 194140 }, { "epoch": 1.5709199773444453, "grad_norm": 0.33940812945365906, "learning_rate": 1.3377609091863418e-06, "loss": 0.0284, "step": 194150 }, { "epoch": 1.5710008900396473, "grad_norm": 0.22461754083633423, "learning_rate": 1.3372802188669049e-06, "loss": 0.0076, "step": 194160 }, { "epoch": 1.5710818027348492, "grad_norm": 0.3302481174468994, "learning_rate": 1.3367996015926814e-06, "loss": 0.0181, "step": 194170 }, { "epoch": 1.571162715430051, "grad_norm": 0.25334909558296204, "learning_rate": 1.3363190573732571e-06, "loss": 0.0112, "step": 194180 }, { "epoch": 1.5712436281252529, "grad_norm": 0.6069832444190979, "learning_rate": 1.3358385862182143e-06, "loss": 0.0231, "step": 194190 }, { "epoch": 1.5713245408204548, "grad_norm": 0.18067650496959686, "learning_rate": 1.3353581881371341e-06, "loss": 0.012, "step": 194200 }, { "epoch": 1.5714054535156565, "grad_norm": 0.30331477522850037, "learning_rate": 1.334877863139602e-06, "loss": 0.0106, "step": 194210 }, { "epoch": 1.5714863662108585, "grad_norm": 0.5144851803779602, "learning_rate": 1.3343976112351897e-06, "loss": 0.0158, "step": 194220 }, { "epoch": 1.5715672789060604, "grad_norm": 0.45060089230537415, "learning_rate": 1.3339174324334807e-06, "loss": 0.0174, "step": 194230 }, { "epoch": 1.5716481916012621, "grad_norm": 0.19769378006458282, "learning_rate": 1.3334373267440504e-06, "loss": 0.0136, "step": 194240 }, { "epoch": 1.571729104296464, "grad_norm": 0.9257219433784485, "learning_rate": 1.332957294176469e-06, "loss": 0.0267, "step": 194250 }, { "epoch": 1.571810016991666, "grad_norm": 0.8222692012786865, "learning_rate": 1.332477334740314e-06, "loss": 0.0281, "step": 194260 }, { "epoch": 1.5718909296868677, "grad_norm": 0.4235246777534485, "learning_rate": 1.3319974484451586e-06, "loss": 0.0214, "step": 194270 }, { "epoch": 1.5719718423820699, "grad_norm": 0.5134044289588928, "learning_rate": 1.331517635300567e-06, "loss": 0.0203, "step": 194280 }, { "epoch": 1.5720527550772716, "grad_norm": 0.5442884564399719, "learning_rate": 1.3310378953161129e-06, "loss": 0.0221, "step": 194290 }, { "epoch": 1.5721336677724735, "grad_norm": 0.25139206647872925, "learning_rate": 1.330558228501363e-06, "loss": 0.0158, "step": 194300 }, { "epoch": 1.5722145804676755, "grad_norm": 0.24462850391864777, "learning_rate": 1.3300786348658824e-06, "loss": 0.0159, "step": 194310 }, { "epoch": 1.5722954931628772, "grad_norm": 0.008183012716472149, "learning_rate": 1.3295991144192361e-06, "loss": 0.019, "step": 194320 }, { "epoch": 1.5723764058580791, "grad_norm": 0.16510765254497528, "learning_rate": 1.3291196671709867e-06, "loss": 0.0162, "step": 194330 }, { "epoch": 1.572457318553281, "grad_norm": 0.26936471462249756, "learning_rate": 1.3286402931306957e-06, "loss": 0.0093, "step": 194340 }, { "epoch": 1.5725382312484828, "grad_norm": 0.3593365252017975, "learning_rate": 1.3281609923079242e-06, "loss": 0.0256, "step": 194350 }, { "epoch": 1.5726191439436847, "grad_norm": 0.4828703999519348, "learning_rate": 1.3276817647122297e-06, "loss": 0.0262, "step": 194360 }, { "epoch": 1.5727000566388867, "grad_norm": 0.2872883975505829, "learning_rate": 1.3272026103531699e-06, "loss": 0.019, "step": 194370 }, { "epoch": 1.5727809693340884, "grad_norm": 0.5015137791633606, "learning_rate": 1.3267235292403003e-06, "loss": 0.0194, "step": 194380 }, { "epoch": 1.5728618820292906, "grad_norm": 0.3104906380176544, "learning_rate": 1.3262445213831737e-06, "loss": 0.0156, "step": 194390 }, { "epoch": 1.5729427947244923, "grad_norm": 0.25373461842536926, "learning_rate": 1.3257655867913477e-06, "loss": 0.0296, "step": 194400 }, { "epoch": 1.573023707419694, "grad_norm": 0.5007390379905701, "learning_rate": 1.3252867254743684e-06, "loss": 0.0216, "step": 194410 }, { "epoch": 1.5731046201148962, "grad_norm": 0.19509483873844147, "learning_rate": 1.3248079374417867e-06, "loss": 0.0104, "step": 194420 }, { "epoch": 1.5731855328100979, "grad_norm": 0.569858968257904, "learning_rate": 1.3243292227031552e-06, "loss": 0.0176, "step": 194430 }, { "epoch": 1.5732664455052998, "grad_norm": 0.25330308079719543, "learning_rate": 1.323850581268014e-06, "loss": 0.0261, "step": 194440 }, { "epoch": 1.5733473582005018, "grad_norm": 0.4009173512458801, "learning_rate": 1.3233720131459143e-06, "loss": 0.0143, "step": 194450 }, { "epoch": 1.5734282708957035, "grad_norm": 0.33377835154533386, "learning_rate": 1.3228935183463993e-06, "loss": 0.0248, "step": 194460 }, { "epoch": 1.5735091835909054, "grad_norm": 0.3567396402359009, "learning_rate": 1.3224150968790078e-06, "loss": 0.0189, "step": 194470 }, { "epoch": 1.5735900962861074, "grad_norm": 0.5761469006538391, "learning_rate": 1.321936748753284e-06, "loss": 0.0218, "step": 194480 }, { "epoch": 1.573671008981309, "grad_norm": 0.4410286843776703, "learning_rate": 1.3214584739787694e-06, "loss": 0.0172, "step": 194490 }, { "epoch": 1.573751921676511, "grad_norm": 0.08734425902366638, "learning_rate": 1.3209802725649968e-06, "loss": 0.019, "step": 194500 }, { "epoch": 1.573832834371713, "grad_norm": 0.22503766417503357, "learning_rate": 1.3205021445215072e-06, "loss": 0.019, "step": 194510 }, { "epoch": 1.5739137470669147, "grad_norm": 0.5012041330337524, "learning_rate": 1.3200240898578342e-06, "loss": 0.0184, "step": 194520 }, { "epoch": 1.5739946597621168, "grad_norm": 0.43624624609947205, "learning_rate": 1.3195461085835125e-06, "loss": 0.0169, "step": 194530 }, { "epoch": 1.5740755724573186, "grad_norm": 0.42144253849983215, "learning_rate": 1.3190682007080734e-06, "loss": 0.019, "step": 194540 }, { "epoch": 1.5741564851525203, "grad_norm": 0.2051822394132614, "learning_rate": 1.3185903662410488e-06, "loss": 0.0212, "step": 194550 }, { "epoch": 1.5742373978477224, "grad_norm": 0.3061341941356659, "learning_rate": 1.3181126051919673e-06, "loss": 0.0144, "step": 194560 }, { "epoch": 1.5743183105429241, "grad_norm": 0.4990403354167938, "learning_rate": 1.3176349175703569e-06, "loss": 0.0264, "step": 194570 }, { "epoch": 1.574399223238126, "grad_norm": 0.41310518980026245, "learning_rate": 1.3171573033857427e-06, "loss": 0.0157, "step": 194580 }, { "epoch": 1.574480135933328, "grad_norm": 0.3637442886829376, "learning_rate": 1.3166797626476546e-06, "loss": 0.0166, "step": 194590 }, { "epoch": 1.5745610486285297, "grad_norm": 0.5687440633773804, "learning_rate": 1.3162022953656111e-06, "loss": 0.0181, "step": 194600 }, { "epoch": 1.5746419613237317, "grad_norm": 0.4372296929359436, "learning_rate": 1.3157249015491346e-06, "loss": 0.0231, "step": 194610 }, { "epoch": 1.5747228740189336, "grad_norm": 0.38328760862350464, "learning_rate": 1.3152475812077497e-06, "loss": 0.0145, "step": 194620 }, { "epoch": 1.5748037867141353, "grad_norm": 0.5123530626296997, "learning_rate": 1.3147703343509722e-06, "loss": 0.0174, "step": 194630 }, { "epoch": 1.5748846994093373, "grad_norm": 0.14905108511447906, "learning_rate": 1.3142931609883187e-06, "loss": 0.0214, "step": 194640 }, { "epoch": 1.5749656121045392, "grad_norm": 0.2324405312538147, "learning_rate": 1.31381606112931e-06, "loss": 0.0264, "step": 194650 }, { "epoch": 1.575046524799741, "grad_norm": 0.23576101660728455, "learning_rate": 1.3133390347834552e-06, "loss": 0.0169, "step": 194660 }, { "epoch": 1.575127437494943, "grad_norm": 0.40790626406669617, "learning_rate": 1.3128620819602717e-06, "loss": 0.0222, "step": 194670 }, { "epoch": 1.5752083501901448, "grad_norm": 0.6059476733207703, "learning_rate": 1.3123852026692719e-06, "loss": 0.0259, "step": 194680 }, { "epoch": 1.5752892628853468, "grad_norm": 0.33495476841926575, "learning_rate": 1.3119083969199615e-06, "loss": 0.0282, "step": 194690 }, { "epoch": 1.5753701755805487, "grad_norm": 0.19632068276405334, "learning_rate": 1.3114316647218533e-06, "loss": 0.0171, "step": 194700 }, { "epoch": 1.5754510882757504, "grad_norm": 0.3238441050052643, "learning_rate": 1.3109550060844533e-06, "loss": 0.0195, "step": 194710 }, { "epoch": 1.5755320009709524, "grad_norm": 0.6306163668632507, "learning_rate": 1.3104784210172688e-06, "loss": 0.0216, "step": 194720 }, { "epoch": 1.5756129136661543, "grad_norm": 0.38065919280052185, "learning_rate": 1.3100019095298028e-06, "loss": 0.0129, "step": 194730 }, { "epoch": 1.575693826361356, "grad_norm": 0.2510652542114258, "learning_rate": 1.3095254716315587e-06, "loss": 0.0184, "step": 194740 }, { "epoch": 1.575774739056558, "grad_norm": 0.4400785267353058, "learning_rate": 1.3090491073320378e-06, "loss": 0.0217, "step": 194750 }, { "epoch": 1.57585565175176, "grad_norm": 0.4112212061882019, "learning_rate": 1.3085728166407414e-06, "loss": 0.0216, "step": 194760 }, { "epoch": 1.5759365644469616, "grad_norm": 0.5840732455253601, "learning_rate": 1.3080965995671647e-06, "loss": 0.0156, "step": 194770 }, { "epoch": 1.5760174771421636, "grad_norm": 0.328791081905365, "learning_rate": 1.3076204561208111e-06, "loss": 0.0149, "step": 194780 }, { "epoch": 1.5760983898373655, "grad_norm": 0.37370049953460693, "learning_rate": 1.3071443863111705e-06, "loss": 0.0175, "step": 194790 }, { "epoch": 1.5761793025325672, "grad_norm": 0.2990460991859436, "learning_rate": 1.3066683901477385e-06, "loss": 0.0206, "step": 194800 }, { "epoch": 1.5762602152277694, "grad_norm": 0.5348486304283142, "learning_rate": 1.3061924676400107e-06, "loss": 0.0119, "step": 194810 }, { "epoch": 1.576341127922971, "grad_norm": 0.43313395977020264, "learning_rate": 1.3057166187974751e-06, "loss": 0.0182, "step": 194820 }, { "epoch": 1.576422040618173, "grad_norm": 0.45956629514694214, "learning_rate": 1.3052408436296204e-06, "loss": 0.0103, "step": 194830 }, { "epoch": 1.576502953313375, "grad_norm": 0.2767736315727234, "learning_rate": 1.3047651421459406e-06, "loss": 0.018, "step": 194840 }, { "epoch": 1.5765838660085767, "grad_norm": 0.5308359861373901, "learning_rate": 1.3042895143559165e-06, "loss": 0.018, "step": 194850 }, { "epoch": 1.5766647787037786, "grad_norm": 0.46153008937835693, "learning_rate": 1.3038139602690369e-06, "loss": 0.0218, "step": 194860 }, { "epoch": 1.5767456913989806, "grad_norm": 0.9797713756561279, "learning_rate": 1.3033384798947861e-06, "loss": 0.025, "step": 194870 }, { "epoch": 1.5768266040941823, "grad_norm": 0.30349966883659363, "learning_rate": 1.3028630732426428e-06, "loss": 0.0145, "step": 194880 }, { "epoch": 1.5769075167893842, "grad_norm": 0.32753244042396545, "learning_rate": 1.3023877403220925e-06, "loss": 0.0179, "step": 194890 }, { "epoch": 1.5769884294845862, "grad_norm": 0.2964467704296112, "learning_rate": 1.301912481142613e-06, "loss": 0.0189, "step": 194900 }, { "epoch": 1.577069342179788, "grad_norm": 0.11368878930807114, "learning_rate": 1.3014372957136801e-06, "loss": 0.0206, "step": 194910 }, { "epoch": 1.5771502548749898, "grad_norm": 0.30612775683403015, "learning_rate": 1.3009621840447733e-06, "loss": 0.024, "step": 194920 }, { "epoch": 1.5772311675701918, "grad_norm": 0.2586591839790344, "learning_rate": 1.3004871461453672e-06, "loss": 0.0149, "step": 194930 }, { "epoch": 1.5773120802653935, "grad_norm": 0.40848487615585327, "learning_rate": 1.3000121820249345e-06, "loss": 0.0171, "step": 194940 }, { "epoch": 1.5773929929605957, "grad_norm": 0.13897447288036346, "learning_rate": 1.2995372916929478e-06, "loss": 0.016, "step": 194950 }, { "epoch": 1.5774739056557974, "grad_norm": 0.32355111837387085, "learning_rate": 1.299062475158878e-06, "loss": 0.0124, "step": 194960 }, { "epoch": 1.5775548183509993, "grad_norm": 0.3785461485385895, "learning_rate": 1.2985877324321938e-06, "loss": 0.0092, "step": 194970 }, { "epoch": 1.5776357310462013, "grad_norm": 0.4226323664188385, "learning_rate": 1.2981130635223637e-06, "loss": 0.0281, "step": 194980 }, { "epoch": 1.577716643741403, "grad_norm": 0.3614126741886139, "learning_rate": 1.297638468438852e-06, "loss": 0.0315, "step": 194990 }, { "epoch": 1.577797556436605, "grad_norm": 0.5252849459648132, "learning_rate": 1.297163947191128e-06, "loss": 0.0216, "step": 195000 }, { "epoch": 1.5778784691318068, "grad_norm": 0.2248598039150238, "learning_rate": 1.2966894997886499e-06, "loss": 0.0179, "step": 195010 }, { "epoch": 1.5779593818270086, "grad_norm": 0.8824974298477173, "learning_rate": 1.2962151262408807e-06, "loss": 0.0308, "step": 195020 }, { "epoch": 1.5780402945222105, "grad_norm": 0.035363901406526566, "learning_rate": 1.2957408265572846e-06, "loss": 0.0124, "step": 195030 }, { "epoch": 1.5781212072174124, "grad_norm": 0.4490935802459717, "learning_rate": 1.2952666007473163e-06, "loss": 0.0373, "step": 195040 }, { "epoch": 1.5782021199126142, "grad_norm": 0.3607196807861328, "learning_rate": 1.2947924488204334e-06, "loss": 0.0146, "step": 195050 }, { "epoch": 1.5782830326078163, "grad_norm": 0.3954817056655884, "learning_rate": 1.2943183707860957e-06, "loss": 0.0213, "step": 195060 }, { "epoch": 1.578363945303018, "grad_norm": 0.3873703181743622, "learning_rate": 1.293844366653752e-06, "loss": 0.0116, "step": 195070 }, { "epoch": 1.5784448579982198, "grad_norm": 0.33880242705345154, "learning_rate": 1.2933704364328604e-06, "loss": 0.0142, "step": 195080 }, { "epoch": 1.578525770693422, "grad_norm": 0.6561703681945801, "learning_rate": 1.2928965801328713e-06, "loss": 0.0167, "step": 195090 }, { "epoch": 1.5786066833886236, "grad_norm": 0.3952414393424988, "learning_rate": 1.2924227977632308e-06, "loss": 0.0245, "step": 195100 }, { "epoch": 1.5786875960838256, "grad_norm": 0.3443869650363922, "learning_rate": 1.2919490893333925e-06, "loss": 0.0232, "step": 195110 }, { "epoch": 1.5787685087790275, "grad_norm": 0.3114664852619171, "learning_rate": 1.2914754548528014e-06, "loss": 0.0184, "step": 195120 }, { "epoch": 1.5788494214742292, "grad_norm": 0.2533332109451294, "learning_rate": 1.2910018943309033e-06, "loss": 0.0339, "step": 195130 }, { "epoch": 1.5789303341694312, "grad_norm": 0.2861807942390442, "learning_rate": 1.2905284077771423e-06, "loss": 0.0273, "step": 195140 }, { "epoch": 1.5790112468646331, "grad_norm": 0.7641224265098572, "learning_rate": 1.2900549952009612e-06, "loss": 0.0194, "step": 195150 }, { "epoch": 1.5790921595598348, "grad_norm": 0.15472032129764557, "learning_rate": 1.2895816566118014e-06, "loss": 0.0128, "step": 195160 }, { "epoch": 1.5791730722550368, "grad_norm": 0.3803578019142151, "learning_rate": 1.2891083920191023e-06, "loss": 0.0136, "step": 195170 }, { "epoch": 1.5792539849502387, "grad_norm": 0.2778491675853729, "learning_rate": 1.2886352014323023e-06, "loss": 0.0146, "step": 195180 }, { "epoch": 1.5793348976454404, "grad_norm": 0.3601343333721161, "learning_rate": 1.2881620848608378e-06, "loss": 0.0243, "step": 195190 }, { "epoch": 1.5794158103406426, "grad_norm": 0.5979670286178589, "learning_rate": 1.2876890423141446e-06, "loss": 0.0207, "step": 195200 }, { "epoch": 1.5794967230358443, "grad_norm": 0.47068002820014954, "learning_rate": 1.2872160738016554e-06, "loss": 0.0173, "step": 195210 }, { "epoch": 1.579577635731046, "grad_norm": 0.3757019340991974, "learning_rate": 1.2867431793328062e-06, "loss": 0.019, "step": 195220 }, { "epoch": 1.5796585484262482, "grad_norm": 0.14868466556072235, "learning_rate": 1.2862703589170238e-06, "loss": 0.0122, "step": 195230 }, { "epoch": 1.57973946112145, "grad_norm": 0.5906819701194763, "learning_rate": 1.2857976125637378e-06, "loss": 0.0188, "step": 195240 }, { "epoch": 1.5798203738166519, "grad_norm": 0.41242098808288574, "learning_rate": 1.2853249402823797e-06, "loss": 0.0104, "step": 195250 }, { "epoch": 1.5799012865118538, "grad_norm": 0.22463294863700867, "learning_rate": 1.284852342082371e-06, "loss": 0.0161, "step": 195260 }, { "epoch": 1.5799821992070555, "grad_norm": 0.1317579746246338, "learning_rate": 1.2843798179731414e-06, "loss": 0.0209, "step": 195270 }, { "epoch": 1.5800631119022575, "grad_norm": 0.3950454294681549, "learning_rate": 1.2839073679641135e-06, "loss": 0.0185, "step": 195280 }, { "epoch": 1.5801440245974594, "grad_norm": 0.2564832270145416, "learning_rate": 1.2834349920647044e-06, "loss": 0.0134, "step": 195290 }, { "epoch": 1.5802249372926611, "grad_norm": 0.4071229100227356, "learning_rate": 1.2829626902843407e-06, "loss": 0.0187, "step": 195300 }, { "epoch": 1.580305849987863, "grad_norm": 0.1810622662305832, "learning_rate": 1.2824904626324386e-06, "loss": 0.0315, "step": 195310 }, { "epoch": 1.580386762683065, "grad_norm": 0.3977459669113159, "learning_rate": 1.282018309118417e-06, "loss": 0.014, "step": 195320 }, { "epoch": 1.5804676753782667, "grad_norm": 0.31398600339889526, "learning_rate": 1.28154622975169e-06, "loss": 0.015, "step": 195330 }, { "epoch": 1.5805485880734689, "grad_norm": 0.689826488494873, "learning_rate": 1.2810742245416746e-06, "loss": 0.0249, "step": 195340 }, { "epoch": 1.5806295007686706, "grad_norm": 0.46246257424354553, "learning_rate": 1.2806022934977818e-06, "loss": 0.0213, "step": 195350 }, { "epoch": 1.5807104134638725, "grad_norm": 0.2966441810131073, "learning_rate": 1.2801304366294254e-06, "loss": 0.0094, "step": 195360 }, { "epoch": 1.5807913261590745, "grad_norm": 0.4041379988193512, "learning_rate": 1.2796586539460137e-06, "loss": 0.0157, "step": 195370 }, { "epoch": 1.5808722388542762, "grad_norm": 0.14422371983528137, "learning_rate": 1.279186945456956e-06, "loss": 0.022, "step": 195380 }, { "epoch": 1.5809531515494781, "grad_norm": 0.3793698251247406, "learning_rate": 1.27871531117166e-06, "loss": 0.0162, "step": 195390 }, { "epoch": 1.58103406424468, "grad_norm": 0.5467894673347473, "learning_rate": 1.2782437510995299e-06, "loss": 0.0249, "step": 195400 }, { "epoch": 1.5811149769398818, "grad_norm": 0.4660378396511078, "learning_rate": 1.2777722652499736e-06, "loss": 0.0088, "step": 195410 }, { "epoch": 1.5811958896350837, "grad_norm": 0.2748298943042755, "learning_rate": 1.2773008536323906e-06, "loss": 0.0218, "step": 195420 }, { "epoch": 1.5812768023302857, "grad_norm": 0.674270749092102, "learning_rate": 1.2768295162561812e-06, "loss": 0.0174, "step": 195430 }, { "epoch": 1.5813577150254874, "grad_norm": 0.41482821106910706, "learning_rate": 1.27635825313075e-06, "loss": 0.0226, "step": 195440 }, { "epoch": 1.5814386277206893, "grad_norm": 0.3507642149925232, "learning_rate": 1.2758870642654913e-06, "loss": 0.0153, "step": 195450 }, { "epoch": 1.5815195404158913, "grad_norm": 0.31716734170913696, "learning_rate": 1.2754159496698015e-06, "loss": 0.0151, "step": 195460 }, { "epoch": 1.581600453111093, "grad_norm": 0.49769672751426697, "learning_rate": 1.2749449093530798e-06, "loss": 0.0091, "step": 195470 }, { "epoch": 1.5816813658062951, "grad_norm": 0.2628585994243622, "learning_rate": 1.2744739433247171e-06, "loss": 0.0149, "step": 195480 }, { "epoch": 1.5817622785014969, "grad_norm": 0.4647241234779358, "learning_rate": 1.2740030515941072e-06, "loss": 0.0198, "step": 195490 }, { "epoch": 1.5818431911966988, "grad_norm": 0.03909875825047493, "learning_rate": 1.2735322341706397e-06, "loss": 0.0173, "step": 195500 }, { "epoch": 1.5819241038919007, "grad_norm": 0.37174269556999207, "learning_rate": 1.2730614910637057e-06, "loss": 0.0116, "step": 195510 }, { "epoch": 1.5820050165871025, "grad_norm": 0.7395440340042114, "learning_rate": 1.2725908222826922e-06, "loss": 0.0173, "step": 195520 }, { "epoch": 1.5820859292823044, "grad_norm": 0.006306121125817299, "learning_rate": 1.2721202278369855e-06, "loss": 0.0193, "step": 195530 }, { "epoch": 1.5821668419775063, "grad_norm": 0.27677178382873535, "learning_rate": 1.2716497077359713e-06, "loss": 0.0158, "step": 195540 }, { "epoch": 1.582247754672708, "grad_norm": 0.2648894488811493, "learning_rate": 1.2711792619890324e-06, "loss": 0.0179, "step": 195550 }, { "epoch": 1.58232866736791, "grad_norm": 0.21710428595542908, "learning_rate": 1.2707088906055515e-06, "loss": 0.0085, "step": 195560 }, { "epoch": 1.582409580063112, "grad_norm": 0.35441914200782776, "learning_rate": 1.270238593594909e-06, "loss": 0.0174, "step": 195570 }, { "epoch": 1.5824904927583137, "grad_norm": 0.13369788229465485, "learning_rate": 1.2697683709664838e-06, "loss": 0.023, "step": 195580 }, { "epoch": 1.5825714054535158, "grad_norm": 0.23087410628795624, "learning_rate": 1.2692982227296534e-06, "loss": 0.0174, "step": 195590 }, { "epoch": 1.5826523181487175, "grad_norm": 0.7523202896118164, "learning_rate": 1.2688281488937943e-06, "loss": 0.0332, "step": 195600 }, { "epoch": 1.5827332308439193, "grad_norm": 0.39779379963874817, "learning_rate": 1.2683581494682806e-06, "loss": 0.0221, "step": 195610 }, { "epoch": 1.5828141435391214, "grad_norm": 0.1028672605752945, "learning_rate": 1.2678882244624847e-06, "loss": 0.0247, "step": 195620 }, { "epoch": 1.5828950562343231, "grad_norm": 0.3262469470500946, "learning_rate": 1.2674183738857821e-06, "loss": 0.0124, "step": 195630 }, { "epoch": 1.582975968929525, "grad_norm": 0.24259717762470245, "learning_rate": 1.2669485977475389e-06, "loss": 0.017, "step": 195640 }, { "epoch": 1.583056881624727, "grad_norm": 0.2979283332824707, "learning_rate": 1.2664788960571228e-06, "loss": 0.0237, "step": 195650 }, { "epoch": 1.5831377943199287, "grad_norm": 0.30333760380744934, "learning_rate": 1.2660092688239055e-06, "loss": 0.0081, "step": 195660 }, { "epoch": 1.5832187070151307, "grad_norm": 0.5281480550765991, "learning_rate": 1.2655397160572519e-06, "loss": 0.0256, "step": 195670 }, { "epoch": 1.5832996197103326, "grad_norm": 0.39963898062705994, "learning_rate": 1.265070237766522e-06, "loss": 0.0117, "step": 195680 }, { "epoch": 1.5833805324055343, "grad_norm": 0.21531127393245697, "learning_rate": 1.2646008339610826e-06, "loss": 0.0179, "step": 195690 }, { "epoch": 1.5834614451007363, "grad_norm": 0.3638491630554199, "learning_rate": 1.2641315046502945e-06, "loss": 0.014, "step": 195700 }, { "epoch": 1.5835423577959382, "grad_norm": 0.5872242450714111, "learning_rate": 1.2636622498435163e-06, "loss": 0.0254, "step": 195710 }, { "epoch": 1.58362327049114, "grad_norm": 0.41113728284835815, "learning_rate": 1.2631930695501066e-06, "loss": 0.011, "step": 195720 }, { "epoch": 1.583704183186342, "grad_norm": 0.6768860220909119, "learning_rate": 1.2627239637794226e-06, "loss": 0.0187, "step": 195730 }, { "epoch": 1.5837850958815438, "grad_norm": 0.272811621427536, "learning_rate": 1.262254932540819e-06, "loss": 0.0368, "step": 195740 }, { "epoch": 1.5838660085767455, "grad_norm": 0.18942414224147797, "learning_rate": 1.2617859758436507e-06, "loss": 0.0176, "step": 195750 }, { "epoch": 1.5839469212719477, "grad_norm": 0.42674192786216736, "learning_rate": 1.261317093697269e-06, "loss": 0.0242, "step": 195760 }, { "epoch": 1.5840278339671494, "grad_norm": 0.39135733246803284, "learning_rate": 1.260848286111025e-06, "loss": 0.0158, "step": 195770 }, { "epoch": 1.5841087466623514, "grad_norm": 0.4673939347267151, "learning_rate": 1.2603795530942687e-06, "loss": 0.0207, "step": 195780 }, { "epoch": 1.5841896593575533, "grad_norm": 0.5023181438446045, "learning_rate": 1.2599108946563471e-06, "loss": 0.0182, "step": 195790 }, { "epoch": 1.584270572052755, "grad_norm": 0.26051414012908936, "learning_rate": 1.2594423108066074e-06, "loss": 0.0203, "step": 195800 }, { "epoch": 1.584351484747957, "grad_norm": 0.3970929980278015, "learning_rate": 1.2589738015543922e-06, "loss": 0.0276, "step": 195810 }, { "epoch": 1.584432397443159, "grad_norm": 0.47334423661231995, "learning_rate": 1.2585053669090502e-06, "loss": 0.014, "step": 195820 }, { "epoch": 1.5845133101383606, "grad_norm": 0.17307765781879425, "learning_rate": 1.2580370068799186e-06, "loss": 0.0202, "step": 195830 }, { "epoch": 1.5845942228335625, "grad_norm": 0.42450809478759766, "learning_rate": 1.2575687214763376e-06, "loss": 0.0175, "step": 195840 }, { "epoch": 1.5846751355287645, "grad_norm": 0.22002501785755157, "learning_rate": 1.2571005107076494e-06, "loss": 0.0138, "step": 195850 }, { "epoch": 1.5847560482239662, "grad_norm": 0.6745476722717285, "learning_rate": 1.256632374583191e-06, "loss": 0.0144, "step": 195860 }, { "epoch": 1.5848369609191684, "grad_norm": 0.6043362021446228, "learning_rate": 1.2561643131122947e-06, "loss": 0.0227, "step": 195870 }, { "epoch": 1.58491787361437, "grad_norm": 0.4619528353214264, "learning_rate": 1.2556963263042992e-06, "loss": 0.016, "step": 195880 }, { "epoch": 1.5849987863095718, "grad_norm": 0.16276654601097107, "learning_rate": 1.2552284141685356e-06, "loss": 0.0158, "step": 195890 }, { "epoch": 1.585079699004774, "grad_norm": 0.38338467478752136, "learning_rate": 1.254760576714336e-06, "loss": 0.0221, "step": 195900 }, { "epoch": 1.5851606116999757, "grad_norm": 0.3243880271911621, "learning_rate": 1.2542928139510296e-06, "loss": 0.0147, "step": 195910 }, { "epoch": 1.5852415243951776, "grad_norm": 0.30818963050842285, "learning_rate": 1.2538251258879463e-06, "loss": 0.0135, "step": 195920 }, { "epoch": 1.5853224370903796, "grad_norm": 0.006610303185880184, "learning_rate": 1.2533575125344117e-06, "loss": 0.0176, "step": 195930 }, { "epoch": 1.5854033497855813, "grad_norm": 0.497077077627182, "learning_rate": 1.2528899738997523e-06, "loss": 0.0174, "step": 195940 }, { "epoch": 1.5854842624807832, "grad_norm": 0.7159580588340759, "learning_rate": 1.252422509993292e-06, "loss": 0.0325, "step": 195950 }, { "epoch": 1.5855651751759852, "grad_norm": 0.13983900845050812, "learning_rate": 1.251955120824353e-06, "loss": 0.0201, "step": 195960 }, { "epoch": 1.5856460878711869, "grad_norm": 0.2003641426563263, "learning_rate": 1.2514878064022562e-06, "loss": 0.0212, "step": 195970 }, { "epoch": 1.5857270005663888, "grad_norm": 0.19275861978530884, "learning_rate": 1.2510205667363224e-06, "loss": 0.0204, "step": 195980 }, { "epoch": 1.5858079132615908, "grad_norm": 0.6535763144493103, "learning_rate": 1.2505534018358684e-06, "loss": 0.0275, "step": 195990 }, { "epoch": 1.5858888259567925, "grad_norm": 0.4565538465976715, "learning_rate": 1.2500863117102118e-06, "loss": 0.017, "step": 196000 }, { "epoch": 1.5859697386519946, "grad_norm": 0.8827694058418274, "learning_rate": 1.249619296368667e-06, "loss": 0.0247, "step": 196010 }, { "epoch": 1.5860506513471964, "grad_norm": 0.2384014129638672, "learning_rate": 1.2491523558205482e-06, "loss": 0.0184, "step": 196020 }, { "epoch": 1.5861315640423983, "grad_norm": 0.35982292890548706, "learning_rate": 1.2486854900751656e-06, "loss": 0.02, "step": 196030 }, { "epoch": 1.5862124767376002, "grad_norm": 0.25487223267555237, "learning_rate": 1.2482186991418333e-06, "loss": 0.0159, "step": 196040 }, { "epoch": 1.586293389432802, "grad_norm": 0.6795988082885742, "learning_rate": 1.2477519830298595e-06, "loss": 0.0295, "step": 196050 }, { "epoch": 1.586374302128004, "grad_norm": 0.6725874543190002, "learning_rate": 1.2472853417485486e-06, "loss": 0.0208, "step": 196060 }, { "epoch": 1.5864552148232058, "grad_norm": 0.19220787286758423, "learning_rate": 1.2468187753072103e-06, "loss": 0.018, "step": 196070 }, { "epoch": 1.5865361275184076, "grad_norm": 0.689386248588562, "learning_rate": 1.2463522837151504e-06, "loss": 0.0234, "step": 196080 }, { "epoch": 1.5866170402136095, "grad_norm": 0.5252165794372559, "learning_rate": 1.2458858669816665e-06, "loss": 0.0197, "step": 196090 }, { "epoch": 1.5866979529088114, "grad_norm": 0.4126623272895813, "learning_rate": 1.245419525116065e-06, "loss": 0.0122, "step": 196100 }, { "epoch": 1.5867788656040132, "grad_norm": 0.3116239905357361, "learning_rate": 1.2449532581276453e-06, "loss": 0.0281, "step": 196110 }, { "epoch": 1.586859778299215, "grad_norm": 0.027580881491303444, "learning_rate": 1.2444870660257053e-06, "loss": 0.0161, "step": 196120 }, { "epoch": 1.586940690994417, "grad_norm": 0.3162533938884735, "learning_rate": 1.2440209488195425e-06, "loss": 0.0106, "step": 196130 }, { "epoch": 1.5870216036896188, "grad_norm": 0.5075695514678955, "learning_rate": 1.2435549065184527e-06, "loss": 0.0317, "step": 196140 }, { "epoch": 1.587102516384821, "grad_norm": 0.31463637948036194, "learning_rate": 1.2430889391317297e-06, "loss": 0.0168, "step": 196150 }, { "epoch": 1.5871834290800226, "grad_norm": 0.3503698408603668, "learning_rate": 1.2426230466686672e-06, "loss": 0.0195, "step": 196160 }, { "epoch": 1.5872643417752246, "grad_norm": 0.4525358974933624, "learning_rate": 1.2421572291385559e-06, "loss": 0.0142, "step": 196170 }, { "epoch": 1.5873452544704265, "grad_norm": 0.19541653990745544, "learning_rate": 1.241691486550685e-06, "loss": 0.0156, "step": 196180 }, { "epoch": 1.5874261671656282, "grad_norm": 0.48753249645233154, "learning_rate": 1.2412258189143434e-06, "loss": 0.0249, "step": 196190 }, { "epoch": 1.5875070798608302, "grad_norm": 0.0006286841817200184, "learning_rate": 1.2407602262388163e-06, "loss": 0.0131, "step": 196200 }, { "epoch": 1.5875879925560321, "grad_norm": 0.603749692440033, "learning_rate": 1.2402947085333939e-06, "loss": 0.0265, "step": 196210 }, { "epoch": 1.5876689052512338, "grad_norm": 0.229909285902977, "learning_rate": 1.239829265807354e-06, "loss": 0.0222, "step": 196220 }, { "epoch": 1.5877498179464358, "grad_norm": 0.26328086853027344, "learning_rate": 1.2393638980699807e-06, "loss": 0.0219, "step": 196230 }, { "epoch": 1.5878307306416377, "grad_norm": 0.11197008192539215, "learning_rate": 1.238898605330558e-06, "loss": 0.0166, "step": 196240 }, { "epoch": 1.5879116433368394, "grad_norm": 0.4007585942745209, "learning_rate": 1.2384333875983595e-06, "loss": 0.0153, "step": 196250 }, { "epoch": 1.5879925560320416, "grad_norm": 0.1877850741147995, "learning_rate": 1.2379682448826675e-06, "loss": 0.0214, "step": 196260 }, { "epoch": 1.5880734687272433, "grad_norm": 0.22873520851135254, "learning_rate": 1.2375031771927586e-06, "loss": 0.0151, "step": 196270 }, { "epoch": 1.588154381422445, "grad_norm": 0.33063697814941406, "learning_rate": 1.2370381845379026e-06, "loss": 0.0166, "step": 196280 }, { "epoch": 1.5882352941176472, "grad_norm": 0.25462502241134644, "learning_rate": 1.2365732669273778e-06, "loss": 0.0125, "step": 196290 }, { "epoch": 1.588316206812849, "grad_norm": 0.027600985020399094, "learning_rate": 1.236108424370454e-06, "loss": 0.0231, "step": 196300 }, { "epoch": 1.5883971195080508, "grad_norm": 0.24837611615657806, "learning_rate": 1.235643656876402e-06, "loss": 0.0157, "step": 196310 }, { "epoch": 1.5884780322032528, "grad_norm": 0.1973632276058197, "learning_rate": 1.2351789644544903e-06, "loss": 0.0312, "step": 196320 }, { "epoch": 1.5885589448984545, "grad_norm": 0.3540809154510498, "learning_rate": 1.2347143471139861e-06, "loss": 0.0207, "step": 196330 }, { "epoch": 1.5886398575936564, "grad_norm": 0.21079590916633606, "learning_rate": 1.2342498048641555e-06, "loss": 0.019, "step": 196340 }, { "epoch": 1.5887207702888584, "grad_norm": 0.4844026267528534, "learning_rate": 1.2337853377142629e-06, "loss": 0.0229, "step": 196350 }, { "epoch": 1.58880168298406, "grad_norm": 0.38491925597190857, "learning_rate": 1.2333209456735707e-06, "loss": 0.0362, "step": 196360 }, { "epoch": 1.588882595679262, "grad_norm": 0.4311814606189728, "learning_rate": 1.23285662875134e-06, "loss": 0.0193, "step": 196370 }, { "epoch": 1.588963508374464, "grad_norm": 0.02977878414094448, "learning_rate": 1.2323923869568317e-06, "loss": 0.0089, "step": 196380 }, { "epoch": 1.5890444210696657, "grad_norm": 0.21397092938423157, "learning_rate": 1.2319282202993015e-06, "loss": 0.021, "step": 196390 }, { "epoch": 1.5891253337648679, "grad_norm": 0.28065207600593567, "learning_rate": 1.2314641287880114e-06, "loss": 0.0146, "step": 196400 }, { "epoch": 1.5892062464600696, "grad_norm": 0.3713715672492981, "learning_rate": 1.2310001124322118e-06, "loss": 0.0203, "step": 196410 }, { "epoch": 1.5892871591552713, "grad_norm": 1.1922804117202759, "learning_rate": 1.2305361712411574e-06, "loss": 0.0317, "step": 196420 }, { "epoch": 1.5893680718504735, "grad_norm": 0.30853620171546936, "learning_rate": 1.2300723052241036e-06, "loss": 0.0169, "step": 196430 }, { "epoch": 1.5894489845456752, "grad_norm": 0.2590756416320801, "learning_rate": 1.2296085143902963e-06, "loss": 0.016, "step": 196440 }, { "epoch": 1.5895298972408771, "grad_norm": 0.13279667496681213, "learning_rate": 1.2291447987489896e-06, "loss": 0.0228, "step": 196450 }, { "epoch": 1.589610809936079, "grad_norm": 0.36035025119781494, "learning_rate": 1.2286811583094299e-06, "loss": 0.0216, "step": 196460 }, { "epoch": 1.5896917226312808, "grad_norm": 0.3801417350769043, "learning_rate": 1.2282175930808604e-06, "loss": 0.0128, "step": 196470 }, { "epoch": 1.5897726353264827, "grad_norm": 0.4403834342956543, "learning_rate": 1.22775410307253e-06, "loss": 0.0195, "step": 196480 }, { "epoch": 1.5898535480216847, "grad_norm": 0.314153790473938, "learning_rate": 1.2272906882936813e-06, "loss": 0.0202, "step": 196490 }, { "epoch": 1.5899344607168864, "grad_norm": 0.6265918612480164, "learning_rate": 1.2268273487535526e-06, "loss": 0.0139, "step": 196500 }, { "epoch": 1.5900153734120883, "grad_norm": 0.10537179559469223, "learning_rate": 1.226364084461389e-06, "loss": 0.0215, "step": 196510 }, { "epoch": 1.5900962861072903, "grad_norm": 0.32034847140312195, "learning_rate": 1.2259008954264262e-06, "loss": 0.0144, "step": 196520 }, { "epoch": 1.590177198802492, "grad_norm": 0.2627258896827698, "learning_rate": 1.2254377816579033e-06, "loss": 0.0177, "step": 196530 }, { "epoch": 1.5902581114976941, "grad_norm": 0.23480239510536194, "learning_rate": 1.2249747431650555e-06, "loss": 0.0186, "step": 196540 }, { "epoch": 1.5903390241928959, "grad_norm": 0.2834336757659912, "learning_rate": 1.2245117799571165e-06, "loss": 0.0321, "step": 196550 }, { "epoch": 1.5904199368880978, "grad_norm": 0.47058457136154175, "learning_rate": 1.2240488920433198e-06, "loss": 0.0178, "step": 196560 }, { "epoch": 1.5905008495832997, "grad_norm": 0.371587336063385, "learning_rate": 1.2235860794328963e-06, "loss": 0.0213, "step": 196570 }, { "epoch": 1.5905817622785015, "grad_norm": 0.04525793343782425, "learning_rate": 1.2231233421350752e-06, "loss": 0.0134, "step": 196580 }, { "epoch": 1.5906626749737034, "grad_norm": 0.21023628115653992, "learning_rate": 1.2226606801590885e-06, "loss": 0.0231, "step": 196590 }, { "epoch": 1.5907435876689053, "grad_norm": 0.3771092891693115, "learning_rate": 1.2221980935141586e-06, "loss": 0.022, "step": 196600 }, { "epoch": 1.590824500364107, "grad_norm": 0.3946102261543274, "learning_rate": 1.2217355822095106e-06, "loss": 0.0259, "step": 196610 }, { "epoch": 1.590905413059309, "grad_norm": 0.4993323087692261, "learning_rate": 1.2212731462543736e-06, "loss": 0.024, "step": 196620 }, { "epoch": 1.590986325754511, "grad_norm": 0.45923641324043274, "learning_rate": 1.220810785657965e-06, "loss": 0.0171, "step": 196630 }, { "epoch": 1.5910672384497127, "grad_norm": 0.4871709644794464, "learning_rate": 1.2203485004295052e-06, "loss": 0.0152, "step": 196640 }, { "epoch": 1.5911481511449146, "grad_norm": 0.2319263517856598, "learning_rate": 1.2198862905782188e-06, "loss": 0.0092, "step": 196650 }, { "epoch": 1.5912290638401165, "grad_norm": 0.6114771962165833, "learning_rate": 1.2194241561133168e-06, "loss": 0.0289, "step": 196660 }, { "epoch": 1.5913099765353182, "grad_norm": 0.36547034978866577, "learning_rate": 1.2189620970440202e-06, "loss": 0.0125, "step": 196670 }, { "epoch": 1.5913908892305204, "grad_norm": 0.4716389775276184, "learning_rate": 1.2185001133795444e-06, "loss": 0.0294, "step": 196680 }, { "epoch": 1.5914718019257221, "grad_norm": 0.43406063318252563, "learning_rate": 1.2180382051290974e-06, "loss": 0.017, "step": 196690 }, { "epoch": 1.591552714620924, "grad_norm": 0.6501254439353943, "learning_rate": 1.2175763723018964e-06, "loss": 0.0206, "step": 196700 }, { "epoch": 1.591633627316126, "grad_norm": 0.4203300476074219, "learning_rate": 1.2171146149071488e-06, "loss": 0.0179, "step": 196710 }, { "epoch": 1.5917145400113277, "grad_norm": 0.6992178559303284, "learning_rate": 1.216652932954065e-06, "loss": 0.0206, "step": 196720 }, { "epoch": 1.5917954527065297, "grad_norm": 0.2192048877477646, "learning_rate": 1.2161913264518506e-06, "loss": 0.0129, "step": 196730 }, { "epoch": 1.5918763654017316, "grad_norm": 0.2902044355869293, "learning_rate": 1.2157297954097124e-06, "loss": 0.0175, "step": 196740 }, { "epoch": 1.5919572780969333, "grad_norm": 0.2600612938404083, "learning_rate": 1.2152683398368549e-06, "loss": 0.017, "step": 196750 }, { "epoch": 1.5920381907921353, "grad_norm": 0.5675525665283203, "learning_rate": 1.2148069597424804e-06, "loss": 0.0253, "step": 196760 }, { "epoch": 1.5921191034873372, "grad_norm": 0.4888414740562439, "learning_rate": 1.2143456551357903e-06, "loss": 0.0245, "step": 196770 }, { "epoch": 1.592200016182539, "grad_norm": 0.257678359746933, "learning_rate": 1.2138844260259836e-06, "loss": 0.0069, "step": 196780 }, { "epoch": 1.5922809288777409, "grad_norm": 0.5480420589447021, "learning_rate": 1.2134232724222595e-06, "loss": 0.0143, "step": 196790 }, { "epoch": 1.5923618415729428, "grad_norm": 0.28377044200897217, "learning_rate": 1.2129621943338133e-06, "loss": 0.0189, "step": 196800 }, { "epoch": 1.5924427542681445, "grad_norm": 0.036541882902383804, "learning_rate": 1.2125011917698437e-06, "loss": 0.0102, "step": 196810 }, { "epoch": 1.5925236669633467, "grad_norm": 0.25995880365371704, "learning_rate": 1.2120402647395412e-06, "loss": 0.0235, "step": 196820 }, { "epoch": 1.5926045796585484, "grad_norm": 0.4394978880882263, "learning_rate": 1.2115794132520968e-06, "loss": 0.0265, "step": 196830 }, { "epoch": 1.5926854923537503, "grad_norm": 0.41300687193870544, "learning_rate": 1.2111186373167066e-06, "loss": 0.0146, "step": 196840 }, { "epoch": 1.5927664050489523, "grad_norm": 0.37007471919059753, "learning_rate": 1.2106579369425537e-06, "loss": 0.0184, "step": 196850 }, { "epoch": 1.592847317744154, "grad_norm": 0.2989601492881775, "learning_rate": 1.21019731213883e-06, "loss": 0.0147, "step": 196860 }, { "epoch": 1.592928230439356, "grad_norm": 0.5787564516067505, "learning_rate": 1.209736762914721e-06, "loss": 0.0127, "step": 196870 }, { "epoch": 1.5930091431345579, "grad_norm": 0.261005699634552, "learning_rate": 1.2092762892794085e-06, "loss": 0.0356, "step": 196880 }, { "epoch": 1.5930900558297596, "grad_norm": 0.5172159671783447, "learning_rate": 1.2088158912420794e-06, "loss": 0.0161, "step": 196890 }, { "epoch": 1.5931709685249615, "grad_norm": 0.23664502799510956, "learning_rate": 1.2083555688119147e-06, "loss": 0.0133, "step": 196900 }, { "epoch": 1.5932518812201635, "grad_norm": 0.036438170820474625, "learning_rate": 1.207895321998091e-06, "loss": 0.0131, "step": 196910 }, { "epoch": 1.5933327939153652, "grad_norm": 0.2790302336215973, "learning_rate": 1.2074351508097916e-06, "loss": 0.0222, "step": 196920 }, { "epoch": 1.5934137066105674, "grad_norm": 0.45185375213623047, "learning_rate": 1.206975055256191e-06, "loss": 0.032, "step": 196930 }, { "epoch": 1.593494619305769, "grad_norm": 0.19947561621665955, "learning_rate": 1.2065150353464655e-06, "loss": 0.0164, "step": 196940 }, { "epoch": 1.5935755320009708, "grad_norm": 0.45221346616744995, "learning_rate": 1.2060550910897895e-06, "loss": 0.0235, "step": 196950 }, { "epoch": 1.593656444696173, "grad_norm": 0.28751805424690247, "learning_rate": 1.2055952224953349e-06, "loss": 0.02, "step": 196960 }, { "epoch": 1.5937373573913747, "grad_norm": 0.2812732458114624, "learning_rate": 1.2051354295722739e-06, "loss": 0.0097, "step": 196970 }, { "epoch": 1.5938182700865766, "grad_norm": 0.21858446300029755, "learning_rate": 1.2046757123297748e-06, "loss": 0.0151, "step": 196980 }, { "epoch": 1.5938991827817786, "grad_norm": 0.1555987447500229, "learning_rate": 1.204216070777005e-06, "loss": 0.0151, "step": 196990 }, { "epoch": 1.5939800954769803, "grad_norm": 0.45212823152542114, "learning_rate": 1.2037565049231359e-06, "loss": 0.0145, "step": 197000 }, { "epoch": 1.5940610081721822, "grad_norm": 0.6513184905052185, "learning_rate": 1.2032970147773272e-06, "loss": 0.0257, "step": 197010 }, { "epoch": 1.5941419208673842, "grad_norm": 0.8143689036369324, "learning_rate": 1.202837600348743e-06, "loss": 0.0311, "step": 197020 }, { "epoch": 1.5942228335625859, "grad_norm": 0.3886951208114624, "learning_rate": 1.2023782616465495e-06, "loss": 0.016, "step": 197030 }, { "epoch": 1.5943037462577878, "grad_norm": 0.18466193974018097, "learning_rate": 1.201918998679903e-06, "loss": 0.0068, "step": 197040 }, { "epoch": 1.5943846589529898, "grad_norm": 0.5323919057846069, "learning_rate": 1.2014598114579624e-06, "loss": 0.0186, "step": 197050 }, { "epoch": 1.5944655716481915, "grad_norm": 0.420956552028656, "learning_rate": 1.2010006999898898e-06, "loss": 0.0251, "step": 197060 }, { "epoch": 1.5945464843433936, "grad_norm": 0.3303517997264862, "learning_rate": 1.2005416642848355e-06, "loss": 0.0165, "step": 197070 }, { "epoch": 1.5946273970385954, "grad_norm": 0.5693265199661255, "learning_rate": 1.200082704351958e-06, "loss": 0.0135, "step": 197080 }, { "epoch": 1.594708309733797, "grad_norm": 0.3828183710575104, "learning_rate": 1.19962382020041e-06, "loss": 0.015, "step": 197090 }, { "epoch": 1.5947892224289992, "grad_norm": 0.7122418284416199, "learning_rate": 1.1991650118393395e-06, "loss": 0.0211, "step": 197100 }, { "epoch": 1.594870135124201, "grad_norm": 0.2122986763715744, "learning_rate": 1.1987062792779004e-06, "loss": 0.0108, "step": 197110 }, { "epoch": 1.594951047819403, "grad_norm": 0.23557977378368378, "learning_rate": 1.1982476225252398e-06, "loss": 0.0156, "step": 197120 }, { "epoch": 1.5950319605146048, "grad_norm": 0.33893558382987976, "learning_rate": 1.197789041590504e-06, "loss": 0.0217, "step": 197130 }, { "epoch": 1.5951128732098065, "grad_norm": 0.22198015451431274, "learning_rate": 1.1973305364828392e-06, "loss": 0.0176, "step": 197140 }, { "epoch": 1.5951937859050085, "grad_norm": 0.29460829496383667, "learning_rate": 1.1968721072113892e-06, "loss": 0.0172, "step": 197150 }, { "epoch": 1.5952746986002104, "grad_norm": 0.049476396292448044, "learning_rate": 1.196413753785296e-06, "loss": 0.01, "step": 197160 }, { "epoch": 1.5953556112954121, "grad_norm": 0.5010408759117126, "learning_rate": 1.195955476213701e-06, "loss": 0.0148, "step": 197170 }, { "epoch": 1.595436523990614, "grad_norm": 0.36357054114341736, "learning_rate": 1.1954972745057436e-06, "loss": 0.0234, "step": 197180 }, { "epoch": 1.595517436685816, "grad_norm": 0.3606552183628082, "learning_rate": 1.195039148670561e-06, "loss": 0.0112, "step": 197190 }, { "epoch": 1.5955983493810177, "grad_norm": 0.5050954222679138, "learning_rate": 1.1945810987172895e-06, "loss": 0.0203, "step": 197200 }, { "epoch": 1.59567926207622, "grad_norm": 0.21800681948661804, "learning_rate": 1.1941231246550639e-06, "loss": 0.0156, "step": 197210 }, { "epoch": 1.5957601747714216, "grad_norm": 0.5160918235778809, "learning_rate": 1.1936652264930204e-06, "loss": 0.0133, "step": 197220 }, { "epoch": 1.5958410874666236, "grad_norm": 0.8302992582321167, "learning_rate": 1.193207404240287e-06, "loss": 0.0141, "step": 197230 }, { "epoch": 1.5959220001618255, "grad_norm": 0.28629234433174133, "learning_rate": 1.192749657905994e-06, "loss": 0.0185, "step": 197240 }, { "epoch": 1.5960029128570272, "grad_norm": 0.5996227264404297, "learning_rate": 1.1922919874992745e-06, "loss": 0.0248, "step": 197250 }, { "epoch": 1.5960838255522292, "grad_norm": 0.3967129588127136, "learning_rate": 1.1918343930292498e-06, "loss": 0.0175, "step": 197260 }, { "epoch": 1.596164738247431, "grad_norm": 0.18444029986858368, "learning_rate": 1.1913768745050503e-06, "loss": 0.0178, "step": 197270 }, { "epoch": 1.5962456509426328, "grad_norm": 0.6111803650856018, "learning_rate": 1.1909194319357992e-06, "loss": 0.0171, "step": 197280 }, { "epoch": 1.5963265636378348, "grad_norm": 0.31205087900161743, "learning_rate": 1.1904620653306181e-06, "loss": 0.0114, "step": 197290 }, { "epoch": 1.5964074763330367, "grad_norm": 0.3760751783847809, "learning_rate": 1.1900047746986287e-06, "loss": 0.017, "step": 197300 }, { "epoch": 1.5964883890282384, "grad_norm": 0.3683299124240875, "learning_rate": 1.1895475600489515e-06, "loss": 0.0119, "step": 197310 }, { "epoch": 1.5965693017234404, "grad_norm": 0.42032182216644287, "learning_rate": 1.1890904213907033e-06, "loss": 0.0199, "step": 197320 }, { "epoch": 1.5966502144186423, "grad_norm": 0.5042151808738708, "learning_rate": 1.1886333587330018e-06, "loss": 0.0253, "step": 197330 }, { "epoch": 1.596731127113844, "grad_norm": 0.500729501247406, "learning_rate": 1.188176372084962e-06, "loss": 0.0201, "step": 197340 }, { "epoch": 1.5968120398090462, "grad_norm": 0.34729620814323425, "learning_rate": 1.187719461455697e-06, "loss": 0.0129, "step": 197350 }, { "epoch": 1.596892952504248, "grad_norm": 0.2423604130744934, "learning_rate": 1.1872626268543191e-06, "loss": 0.0176, "step": 197360 }, { "epoch": 1.5969738651994498, "grad_norm": 0.3651016056537628, "learning_rate": 1.1868058682899397e-06, "loss": 0.0225, "step": 197370 }, { "epoch": 1.5970547778946518, "grad_norm": 0.19571882486343384, "learning_rate": 1.186349185771667e-06, "loss": 0.0138, "step": 197380 }, { "epoch": 1.5971356905898535, "grad_norm": 0.3859564960002899, "learning_rate": 1.1858925793086084e-06, "loss": 0.0184, "step": 197390 }, { "epoch": 1.5972166032850554, "grad_norm": 0.23690879344940186, "learning_rate": 1.1854360489098693e-06, "loss": 0.0155, "step": 197400 }, { "epoch": 1.5972975159802574, "grad_norm": 0.1859348714351654, "learning_rate": 1.1849795945845577e-06, "loss": 0.0218, "step": 197410 }, { "epoch": 1.597378428675459, "grad_norm": 0.2668244540691376, "learning_rate": 1.1845232163417735e-06, "loss": 0.0202, "step": 197420 }, { "epoch": 1.597459341370661, "grad_norm": 0.3295203447341919, "learning_rate": 1.184066914190617e-06, "loss": 0.0156, "step": 197430 }, { "epoch": 1.597540254065863, "grad_norm": 0.5887957215309143, "learning_rate": 1.1836106881401933e-06, "loss": 0.0223, "step": 197440 }, { "epoch": 1.5976211667610647, "grad_norm": 0.3223468065261841, "learning_rate": 1.1831545381995963e-06, "loss": 0.0128, "step": 197450 }, { "epoch": 1.5977020794562666, "grad_norm": 0.368651419878006, "learning_rate": 1.182698464377922e-06, "loss": 0.0149, "step": 197460 }, { "epoch": 1.5977829921514686, "grad_norm": 0.24690312147140503, "learning_rate": 1.1822424666842709e-06, "loss": 0.0152, "step": 197470 }, { "epoch": 1.5978639048466703, "grad_norm": 0.46010085940361023, "learning_rate": 1.1817865451277333e-06, "loss": 0.0288, "step": 197480 }, { "epoch": 1.5979448175418725, "grad_norm": 0.4729270339012146, "learning_rate": 1.1813306997174023e-06, "loss": 0.0194, "step": 197490 }, { "epoch": 1.5980257302370742, "grad_norm": 0.3727872371673584, "learning_rate": 1.1808749304623696e-06, "loss": 0.0347, "step": 197500 }, { "epoch": 1.5981066429322761, "grad_norm": 0.3855551481246948, "learning_rate": 1.1804192373717233e-06, "loss": 0.0224, "step": 197510 }, { "epoch": 1.598187555627478, "grad_norm": 0.18236377835273743, "learning_rate": 1.1799636204545523e-06, "loss": 0.0219, "step": 197520 }, { "epoch": 1.5982684683226798, "grad_norm": 0.15452410280704498, "learning_rate": 1.179508079719942e-06, "loss": 0.0073, "step": 197530 }, { "epoch": 1.5983493810178817, "grad_norm": 0.6323766708374023, "learning_rate": 1.1790526151769782e-06, "loss": 0.0297, "step": 197540 }, { "epoch": 1.5984302937130836, "grad_norm": 0.24766553938388824, "learning_rate": 1.1785972268347428e-06, "loss": 0.0241, "step": 197550 }, { "epoch": 1.5985112064082854, "grad_norm": 0.36359745264053345, "learning_rate": 1.1781419147023193e-06, "loss": 0.0174, "step": 197560 }, { "epoch": 1.5985921191034873, "grad_norm": 0.47542789578437805, "learning_rate": 1.1776866787887864e-06, "loss": 0.0172, "step": 197570 }, { "epoch": 1.5986730317986892, "grad_norm": 0.2275344729423523, "learning_rate": 1.1772315191032235e-06, "loss": 0.0237, "step": 197580 }, { "epoch": 1.598753944493891, "grad_norm": 0.20944716036319733, "learning_rate": 1.1767764356547078e-06, "loss": 0.0176, "step": 197590 }, { "epoch": 1.5988348571890931, "grad_norm": 0.31493979692459106, "learning_rate": 1.176321428452315e-06, "loss": 0.0178, "step": 197600 }, { "epoch": 1.5989157698842948, "grad_norm": 0.5599105358123779, "learning_rate": 1.1758664975051187e-06, "loss": 0.0184, "step": 197610 }, { "epoch": 1.5989966825794966, "grad_norm": 0.09349633753299713, "learning_rate": 1.1754116428221913e-06, "loss": 0.0311, "step": 197620 }, { "epoch": 1.5990775952746987, "grad_norm": 0.3201221227645874, "learning_rate": 1.1749568644126069e-06, "loss": 0.0102, "step": 197630 }, { "epoch": 1.5991585079699004, "grad_norm": 0.22120913863182068, "learning_rate": 1.1745021622854318e-06, "loss": 0.0109, "step": 197640 }, { "epoch": 1.5992394206651024, "grad_norm": 0.39393091201782227, "learning_rate": 1.1740475364497334e-06, "loss": 0.0187, "step": 197650 }, { "epoch": 1.5993203333603043, "grad_norm": 0.3038496673107147, "learning_rate": 1.1735929869145813e-06, "loss": 0.0243, "step": 197660 }, { "epoch": 1.599401246055506, "grad_norm": 0.39886772632598877, "learning_rate": 1.173138513689041e-06, "loss": 0.0131, "step": 197670 }, { "epoch": 1.599482158750708, "grad_norm": 0.11547566950321198, "learning_rate": 1.1726841167821712e-06, "loss": 0.0167, "step": 197680 }, { "epoch": 1.59956307144591, "grad_norm": 0.4756937623023987, "learning_rate": 1.1722297962030377e-06, "loss": 0.0081, "step": 197690 }, { "epoch": 1.5996439841411116, "grad_norm": 0.44751086831092834, "learning_rate": 1.171775551960701e-06, "loss": 0.0187, "step": 197700 }, { "epoch": 1.5997248968363136, "grad_norm": 0.2590256631374359, "learning_rate": 1.1713213840642184e-06, "loss": 0.0121, "step": 197710 }, { "epoch": 1.5998058095315155, "grad_norm": 0.4588765501976013, "learning_rate": 1.1708672925226488e-06, "loss": 0.0157, "step": 197720 }, { "epoch": 1.5998867222267172, "grad_norm": 0.24119091033935547, "learning_rate": 1.1704132773450465e-06, "loss": 0.0182, "step": 197730 }, { "epoch": 1.5999676349219194, "grad_norm": 0.47190651297569275, "learning_rate": 1.169959338540468e-06, "loss": 0.029, "step": 197740 }, { "epoch": 1.6000485476171211, "grad_norm": 0.21122343838214874, "learning_rate": 1.169505476117964e-06, "loss": 0.0108, "step": 197750 }, { "epoch": 1.6001294603123228, "grad_norm": 0.4808051288127899, "learning_rate": 1.1690516900865867e-06, "loss": 0.0209, "step": 197760 }, { "epoch": 1.600210373007525, "grad_norm": 0.5658376812934875, "learning_rate": 1.1685979804553865e-06, "loss": 0.0271, "step": 197770 }, { "epoch": 1.6002912857027267, "grad_norm": 0.4812750220298767, "learning_rate": 1.1681443472334109e-06, "loss": 0.0119, "step": 197780 }, { "epoch": 1.6003721983979287, "grad_norm": 0.27142658829689026, "learning_rate": 1.167690790429707e-06, "loss": 0.0289, "step": 197790 }, { "epoch": 1.6004531110931306, "grad_norm": 0.1869468241930008, "learning_rate": 1.1672373100533197e-06, "loss": 0.015, "step": 197800 }, { "epoch": 1.6005340237883323, "grad_norm": 0.2331829071044922, "learning_rate": 1.1667839061132913e-06, "loss": 0.0176, "step": 197810 }, { "epoch": 1.6006149364835343, "grad_norm": 0.33315640687942505, "learning_rate": 1.166330578618669e-06, "loss": 0.0223, "step": 197820 }, { "epoch": 1.6006958491787362, "grad_norm": 0.29414936900138855, "learning_rate": 1.1658773275784885e-06, "loss": 0.0178, "step": 197830 }, { "epoch": 1.600776761873938, "grad_norm": 0.3504408895969391, "learning_rate": 1.165424153001789e-06, "loss": 0.0182, "step": 197840 }, { "epoch": 1.6008576745691399, "grad_norm": 0.3478168249130249, "learning_rate": 1.1649710548976107e-06, "loss": 0.0233, "step": 197850 }, { "epoch": 1.6009385872643418, "grad_norm": 0.3141081929206848, "learning_rate": 1.1645180332749906e-06, "loss": 0.0161, "step": 197860 }, { "epoch": 1.6010194999595435, "grad_norm": 0.15136326849460602, "learning_rate": 1.1640650881429582e-06, "loss": 0.0205, "step": 197870 }, { "epoch": 1.6011004126547457, "grad_norm": 0.24899893999099731, "learning_rate": 1.163612219510551e-06, "loss": 0.0228, "step": 197880 }, { "epoch": 1.6011813253499474, "grad_norm": 0.33439168334007263, "learning_rate": 1.1631594273867991e-06, "loss": 0.0193, "step": 197890 }, { "epoch": 1.6012622380451493, "grad_norm": 0.2736611068248749, "learning_rate": 1.1627067117807323e-06, "loss": 0.0323, "step": 197900 }, { "epoch": 1.6013431507403513, "grad_norm": 0.3016921877861023, "learning_rate": 1.1622540727013798e-06, "loss": 0.0135, "step": 197910 }, { "epoch": 1.601424063435553, "grad_norm": 0.26291733980178833, "learning_rate": 1.1618015101577673e-06, "loss": 0.0166, "step": 197920 }, { "epoch": 1.601504976130755, "grad_norm": 0.37449389696121216, "learning_rate": 1.1613490241589216e-06, "loss": 0.0169, "step": 197930 }, { "epoch": 1.6015858888259569, "grad_norm": 0.45021048188209534, "learning_rate": 1.1608966147138652e-06, "loss": 0.0109, "step": 197940 }, { "epoch": 1.6016668015211586, "grad_norm": 0.3497640788555145, "learning_rate": 1.1604442818316213e-06, "loss": 0.0097, "step": 197950 }, { "epoch": 1.6017477142163605, "grad_norm": 0.7035878896713257, "learning_rate": 1.1599920255212104e-06, "loss": 0.017, "step": 197960 }, { "epoch": 1.6018286269115625, "grad_norm": 0.37104153633117676, "learning_rate": 1.1595398457916523e-06, "loss": 0.0166, "step": 197970 }, { "epoch": 1.6019095396067642, "grad_norm": 0.4627656638622284, "learning_rate": 1.1590877426519643e-06, "loss": 0.0151, "step": 197980 }, { "epoch": 1.6019904523019661, "grad_norm": 0.5611015558242798, "learning_rate": 1.1586357161111627e-06, "loss": 0.0138, "step": 197990 }, { "epoch": 1.602071364997168, "grad_norm": 0.030512532219290733, "learning_rate": 1.1581837661782625e-06, "loss": 0.0202, "step": 198000 }, { "epoch": 1.6021522776923698, "grad_norm": 0.2921161949634552, "learning_rate": 1.1577318928622749e-06, "loss": 0.0189, "step": 198010 }, { "epoch": 1.602233190387572, "grad_norm": 0.3150452971458435, "learning_rate": 1.1572800961722163e-06, "loss": 0.0157, "step": 198020 }, { "epoch": 1.6023141030827737, "grad_norm": 0.3677634000778198, "learning_rate": 1.1568283761170912e-06, "loss": 0.0179, "step": 198030 }, { "epoch": 1.6023950157779756, "grad_norm": 0.4945150911808014, "learning_rate": 1.1563767327059117e-06, "loss": 0.0149, "step": 198040 }, { "epoch": 1.6024759284731775, "grad_norm": 0.23799581825733185, "learning_rate": 1.155925165947686e-06, "loss": 0.0143, "step": 198050 }, { "epoch": 1.6025568411683793, "grad_norm": 0.16753321886062622, "learning_rate": 1.1554736758514145e-06, "loss": 0.0242, "step": 198060 }, { "epoch": 1.6026377538635812, "grad_norm": 0.3957788050174713, "learning_rate": 1.1550222624261059e-06, "loss": 0.0174, "step": 198070 }, { "epoch": 1.6027186665587831, "grad_norm": 0.3635767996311188, "learning_rate": 1.1545709256807625e-06, "loss": 0.0155, "step": 198080 }, { "epoch": 1.6027995792539849, "grad_norm": 0.6230932474136353, "learning_rate": 1.1541196656243814e-06, "loss": 0.0288, "step": 198090 }, { "epoch": 1.6028804919491868, "grad_norm": 0.30328434705734253, "learning_rate": 1.1536684822659655e-06, "loss": 0.0188, "step": 198100 }, { "epoch": 1.6029614046443887, "grad_norm": 0.3812221884727478, "learning_rate": 1.153217375614512e-06, "loss": 0.0124, "step": 198110 }, { "epoch": 1.6030423173395905, "grad_norm": 0.6089500784873962, "learning_rate": 1.1527663456790172e-06, "loss": 0.0219, "step": 198120 }, { "epoch": 1.6031232300347926, "grad_norm": 0.48368415236473083, "learning_rate": 1.1523153924684754e-06, "loss": 0.0143, "step": 198130 }, { "epoch": 1.6032041427299943, "grad_norm": 0.29922425746917725, "learning_rate": 1.1518645159918807e-06, "loss": 0.0247, "step": 198140 }, { "epoch": 1.603285055425196, "grad_norm": 0.377750039100647, "learning_rate": 1.151413716258224e-06, "loss": 0.0216, "step": 198150 }, { "epoch": 1.6033659681203982, "grad_norm": 0.4320223033428192, "learning_rate": 1.1509629932764959e-06, "loss": 0.0279, "step": 198160 }, { "epoch": 1.6034468808156, "grad_norm": 0.4085063934326172, "learning_rate": 1.1505123470556857e-06, "loss": 0.0163, "step": 198170 }, { "epoch": 1.6035277935108019, "grad_norm": 0.24730554223060608, "learning_rate": 1.1500617776047802e-06, "loss": 0.0113, "step": 198180 }, { "epoch": 1.6036087062060038, "grad_norm": 0.4458935260772705, "learning_rate": 1.1496112849327646e-06, "loss": 0.0219, "step": 198190 }, { "epoch": 1.6036896189012055, "grad_norm": 0.28899040818214417, "learning_rate": 1.1491608690486222e-06, "loss": 0.0103, "step": 198200 }, { "epoch": 1.6037705315964075, "grad_norm": 0.5533283948898315, "learning_rate": 1.1487105299613393e-06, "loss": 0.0115, "step": 198210 }, { "epoch": 1.6038514442916094, "grad_norm": 0.2866235673427582, "learning_rate": 1.1482602676798933e-06, "loss": 0.0312, "step": 198220 }, { "epoch": 1.6039323569868111, "grad_norm": 0.45068639516830444, "learning_rate": 1.1478100822132632e-06, "loss": 0.0127, "step": 198230 }, { "epoch": 1.604013269682013, "grad_norm": 0.3577409088611603, "learning_rate": 1.1473599735704316e-06, "loss": 0.0242, "step": 198240 }, { "epoch": 1.604094182377215, "grad_norm": 0.02158496528863907, "learning_rate": 1.1469099417603696e-06, "loss": 0.0183, "step": 198250 }, { "epoch": 1.6041750950724167, "grad_norm": 0.3833214044570923, "learning_rate": 1.1464599867920557e-06, "loss": 0.0174, "step": 198260 }, { "epoch": 1.604256007767619, "grad_norm": 0.6796612739562988, "learning_rate": 1.1460101086744635e-06, "loss": 0.0133, "step": 198270 }, { "epoch": 1.6043369204628206, "grad_norm": 0.2314966320991516, "learning_rate": 1.145560307416561e-06, "loss": 0.0187, "step": 198280 }, { "epoch": 1.6044178331580223, "grad_norm": 0.6896091103553772, "learning_rate": 1.1451105830273223e-06, "loss": 0.0253, "step": 198290 }, { "epoch": 1.6044987458532245, "grad_norm": 0.3169368803501129, "learning_rate": 1.144660935515715e-06, "loss": 0.0154, "step": 198300 }, { "epoch": 1.6045796585484262, "grad_norm": 0.2963050901889801, "learning_rate": 1.144211364890706e-06, "loss": 0.0212, "step": 198310 }, { "epoch": 1.6046605712436282, "grad_norm": 0.1657986342906952, "learning_rate": 1.143761871161262e-06, "loss": 0.0187, "step": 198320 }, { "epoch": 1.60474148393883, "grad_norm": 0.11958954483270645, "learning_rate": 1.143312454336346e-06, "loss": 0.0274, "step": 198330 }, { "epoch": 1.6048223966340318, "grad_norm": 0.2076481133699417, "learning_rate": 1.1428631144249218e-06, "loss": 0.0194, "step": 198340 }, { "epoch": 1.6049033093292338, "grad_norm": 0.2898986339569092, "learning_rate": 1.14241385143595e-06, "loss": 0.0275, "step": 198350 }, { "epoch": 1.6049842220244357, "grad_norm": 0.17621488869190216, "learning_rate": 1.14196466537839e-06, "loss": 0.0167, "step": 198360 }, { "epoch": 1.6050651347196374, "grad_norm": 0.22388318181037903, "learning_rate": 1.1415155562612e-06, "loss": 0.0208, "step": 198370 }, { "epoch": 1.6051460474148393, "grad_norm": 0.9388719797134399, "learning_rate": 1.1410665240933372e-06, "loss": 0.0245, "step": 198380 }, { "epoch": 1.6052269601100413, "grad_norm": 0.46330687403678894, "learning_rate": 1.1406175688837539e-06, "loss": 0.0155, "step": 198390 }, { "epoch": 1.605307872805243, "grad_norm": 0.41713613271713257, "learning_rate": 1.140168690641409e-06, "loss": 0.0116, "step": 198400 }, { "epoch": 1.6053887855004452, "grad_norm": 0.4514263868331909, "learning_rate": 1.1397198893752493e-06, "loss": 0.0203, "step": 198410 }, { "epoch": 1.6054696981956469, "grad_norm": 0.6576203107833862, "learning_rate": 1.1392711650942256e-06, "loss": 0.0205, "step": 198420 }, { "epoch": 1.6055506108908488, "grad_norm": 0.4511909782886505, "learning_rate": 1.1388225178072909e-06, "loss": 0.0142, "step": 198430 }, { "epoch": 1.6056315235860508, "grad_norm": 0.49335166811943054, "learning_rate": 1.1383739475233874e-06, "loss": 0.017, "step": 198440 }, { "epoch": 1.6057124362812525, "grad_norm": 0.28516432642936707, "learning_rate": 1.137925454251464e-06, "loss": 0.0199, "step": 198450 }, { "epoch": 1.6057933489764544, "grad_norm": 0.41615477204322815, "learning_rate": 1.1374770380004663e-06, "loss": 0.0178, "step": 198460 }, { "epoch": 1.6058742616716564, "grad_norm": 0.5043818950653076, "learning_rate": 1.1370286987793317e-06, "loss": 0.0187, "step": 198470 }, { "epoch": 1.605955174366858, "grad_norm": 0.1980941742658615, "learning_rate": 1.1365804365970062e-06, "loss": 0.0168, "step": 198480 }, { "epoch": 1.60603608706206, "grad_norm": 0.2173365205526352, "learning_rate": 1.136132251462429e-06, "loss": 0.0188, "step": 198490 }, { "epoch": 1.606116999757262, "grad_norm": 0.25138434767723083, "learning_rate": 1.1356841433845345e-06, "loss": 0.0198, "step": 198500 }, { "epoch": 1.6061979124524637, "grad_norm": 0.3426143229007721, "learning_rate": 1.1352361123722632e-06, "loss": 0.032, "step": 198510 }, { "epoch": 1.6062788251476656, "grad_norm": 0.34123367071151733, "learning_rate": 1.1347881584345488e-06, "loss": 0.0181, "step": 198520 }, { "epoch": 1.6063597378428676, "grad_norm": 0.5762198567390442, "learning_rate": 1.1343402815803245e-06, "loss": 0.0169, "step": 198530 }, { "epoch": 1.6064406505380693, "grad_norm": 0.2715696096420288, "learning_rate": 1.133892481818523e-06, "loss": 0.0313, "step": 198540 }, { "epoch": 1.6065215632332714, "grad_norm": 0.1929967999458313, "learning_rate": 1.1334447591580738e-06, "loss": 0.0169, "step": 198550 }, { "epoch": 1.6066024759284732, "grad_norm": 0.5921836495399475, "learning_rate": 1.1329971136079064e-06, "loss": 0.0225, "step": 198560 }, { "epoch": 1.606683388623675, "grad_norm": 0.22853246331214905, "learning_rate": 1.132549545176948e-06, "loss": 0.0206, "step": 198570 }, { "epoch": 1.606764301318877, "grad_norm": 0.4357672333717346, "learning_rate": 1.1321020538741228e-06, "loss": 0.0119, "step": 198580 }, { "epoch": 1.6068452140140788, "grad_norm": 0.26415470242500305, "learning_rate": 1.1316546397083595e-06, "loss": 0.0195, "step": 198590 }, { "epoch": 1.6069261267092807, "grad_norm": 0.4239506125450134, "learning_rate": 1.1312073026885767e-06, "loss": 0.0196, "step": 198600 }, { "epoch": 1.6070070394044826, "grad_norm": 0.2962835431098938, "learning_rate": 1.1307600428236954e-06, "loss": 0.0141, "step": 198610 }, { "epoch": 1.6070879520996844, "grad_norm": 0.10326845198869705, "learning_rate": 1.1303128601226392e-06, "loss": 0.0254, "step": 198620 }, { "epoch": 1.6071688647948863, "grad_norm": 0.3462146520614624, "learning_rate": 1.129865754594323e-06, "loss": 0.0181, "step": 198630 }, { "epoch": 1.6072497774900882, "grad_norm": 0.42240646481513977, "learning_rate": 1.1294187262476624e-06, "loss": 0.0097, "step": 198640 }, { "epoch": 1.60733069018529, "grad_norm": 0.2844271659851074, "learning_rate": 1.1289717750915769e-06, "loss": 0.0127, "step": 198650 }, { "epoch": 1.607411602880492, "grad_norm": 0.46311187744140625, "learning_rate": 1.1285249011349742e-06, "loss": 0.0258, "step": 198660 }, { "epoch": 1.6074925155756938, "grad_norm": 0.31311237812042236, "learning_rate": 1.1280781043867712e-06, "loss": 0.014, "step": 198670 }, { "epoch": 1.6075734282708956, "grad_norm": 0.01546822115778923, "learning_rate": 1.1276313848558774e-06, "loss": 0.0118, "step": 198680 }, { "epoch": 1.6076543409660977, "grad_norm": 0.2724854052066803, "learning_rate": 1.127184742551198e-06, "loss": 0.0114, "step": 198690 }, { "epoch": 1.6077352536612994, "grad_norm": 0.11596047878265381, "learning_rate": 1.1267381774816444e-06, "loss": 0.0117, "step": 198700 }, { "epoch": 1.6078161663565014, "grad_norm": 0.1452888548374176, "learning_rate": 1.1262916896561227e-06, "loss": 0.0139, "step": 198710 }, { "epoch": 1.6078970790517033, "grad_norm": 0.5679036974906921, "learning_rate": 1.1258452790835322e-06, "loss": 0.0171, "step": 198720 }, { "epoch": 1.607977991746905, "grad_norm": 0.3961470425128937, "learning_rate": 1.125398945772781e-06, "loss": 0.0264, "step": 198730 }, { "epoch": 1.608058904442107, "grad_norm": 0.6333884000778198, "learning_rate": 1.1249526897327678e-06, "loss": 0.0244, "step": 198740 }, { "epoch": 1.608139817137309, "grad_norm": 0.46274903416633606, "learning_rate": 1.1245065109723924e-06, "loss": 0.018, "step": 198750 }, { "epoch": 1.6082207298325106, "grad_norm": 0.3023882806301117, "learning_rate": 1.1240604095005537e-06, "loss": 0.0145, "step": 198760 }, { "epoch": 1.6083016425277126, "grad_norm": 0.0012843779986724257, "learning_rate": 1.1236143853261472e-06, "loss": 0.012, "step": 198770 }, { "epoch": 1.6083825552229145, "grad_norm": 0.09821200370788574, "learning_rate": 1.1231684384580682e-06, "loss": 0.0096, "step": 198780 }, { "epoch": 1.6084634679181162, "grad_norm": 0.25520405173301697, "learning_rate": 1.1227225689052108e-06, "loss": 0.0125, "step": 198790 }, { "epoch": 1.6085443806133184, "grad_norm": 0.5434890985488892, "learning_rate": 1.1222767766764647e-06, "loss": 0.0179, "step": 198800 }, { "epoch": 1.60862529330852, "grad_norm": 0.6968173980712891, "learning_rate": 1.1218310617807248e-06, "loss": 0.0278, "step": 198810 }, { "epoch": 1.6087062060037218, "grad_norm": 0.4685293734073639, "learning_rate": 1.1213854242268763e-06, "loss": 0.0183, "step": 198820 }, { "epoch": 1.608787118698924, "grad_norm": 0.2708599269390106, "learning_rate": 1.1209398640238056e-06, "loss": 0.0254, "step": 198830 }, { "epoch": 1.6088680313941257, "grad_norm": 0.17415639758110046, "learning_rate": 1.1204943811804031e-06, "loss": 0.0065, "step": 198840 }, { "epoch": 1.6089489440893276, "grad_norm": 0.5314310193061829, "learning_rate": 1.120048975705547e-06, "loss": 0.026, "step": 198850 }, { "epoch": 1.6090298567845296, "grad_norm": 0.4544307291507721, "learning_rate": 1.119603647608125e-06, "loss": 0.0126, "step": 198860 }, { "epoch": 1.6091107694797313, "grad_norm": 0.3890807032585144, "learning_rate": 1.1191583968970181e-06, "loss": 0.0114, "step": 198870 }, { "epoch": 1.6091916821749332, "grad_norm": 0.13747768104076385, "learning_rate": 1.118713223581101e-06, "loss": 0.0167, "step": 198880 }, { "epoch": 1.6092725948701352, "grad_norm": 0.13324429094791412, "learning_rate": 1.1182681276692565e-06, "loss": 0.0158, "step": 198890 }, { "epoch": 1.609353507565337, "grad_norm": 0.46802476048469543, "learning_rate": 1.1178231091703605e-06, "loss": 0.0166, "step": 198900 }, { "epoch": 1.6094344202605388, "grad_norm": 0.22904352843761444, "learning_rate": 1.1173781680932845e-06, "loss": 0.0086, "step": 198910 }, { "epoch": 1.6095153329557408, "grad_norm": 0.29037290811538696, "learning_rate": 1.1169333044469055e-06, "loss": 0.0177, "step": 198920 }, { "epoch": 1.6095962456509425, "grad_norm": 0.21678340435028076, "learning_rate": 1.1164885182400941e-06, "loss": 0.0142, "step": 198930 }, { "epoch": 1.6096771583461447, "grad_norm": 0.2901305556297302, "learning_rate": 1.1160438094817212e-06, "loss": 0.0077, "step": 198940 }, { "epoch": 1.6097580710413464, "grad_norm": 0.10791254043579102, "learning_rate": 1.1155991781806547e-06, "loss": 0.015, "step": 198950 }, { "epoch": 1.609838983736548, "grad_norm": 0.28210705518722534, "learning_rate": 1.1151546243457623e-06, "loss": 0.0115, "step": 198960 }, { "epoch": 1.6099198964317503, "grad_norm": 0.44264742732048035, "learning_rate": 1.1147101479859097e-06, "loss": 0.0242, "step": 198970 }, { "epoch": 1.610000809126952, "grad_norm": 0.20250117778778076, "learning_rate": 1.1142657491099612e-06, "loss": 0.0198, "step": 198980 }, { "epoch": 1.610081721822154, "grad_norm": 0.3312526047229767, "learning_rate": 1.1138214277267777e-06, "loss": 0.0193, "step": 198990 }, { "epoch": 1.6101626345173559, "grad_norm": 0.3790353238582611, "learning_rate": 1.1133771838452245e-06, "loss": 0.0184, "step": 199000 }, { "epoch": 1.6102435472125576, "grad_norm": 0.4758913516998291, "learning_rate": 1.112933017474157e-06, "loss": 0.0311, "step": 199010 }, { "epoch": 1.6103244599077595, "grad_norm": 0.5833202600479126, "learning_rate": 1.1124889286224326e-06, "loss": 0.0108, "step": 199020 }, { "epoch": 1.6104053726029615, "grad_norm": 0.38742128014564514, "learning_rate": 1.1120449172989134e-06, "loss": 0.0151, "step": 199030 }, { "epoch": 1.6104862852981632, "grad_norm": 0.28050118684768677, "learning_rate": 1.111600983512448e-06, "loss": 0.0125, "step": 199040 }, { "epoch": 1.6105671979933651, "grad_norm": 0.29461905360221863, "learning_rate": 1.111157127271892e-06, "loss": 0.012, "step": 199050 }, { "epoch": 1.610648110688567, "grad_norm": 0.0006342886481434107, "learning_rate": 1.1107133485860993e-06, "loss": 0.0214, "step": 199060 }, { "epoch": 1.6107290233837688, "grad_norm": 0.31572094559669495, "learning_rate": 1.1102696474639158e-06, "loss": 0.0147, "step": 199070 }, { "epoch": 1.610809936078971, "grad_norm": 0.0580950602889061, "learning_rate": 1.1098260239141933e-06, "loss": 0.0207, "step": 199080 }, { "epoch": 1.6108908487741727, "grad_norm": 0.6094371676445007, "learning_rate": 1.1093824779457807e-06, "loss": 0.018, "step": 199090 }, { "epoch": 1.6109717614693746, "grad_norm": 0.04658042639493942, "learning_rate": 1.1089390095675174e-06, "loss": 0.0161, "step": 199100 }, { "epoch": 1.6110526741645765, "grad_norm": 0.45371222496032715, "learning_rate": 1.1084956187882534e-06, "loss": 0.01, "step": 199110 }, { "epoch": 1.6111335868597783, "grad_norm": 0.31146010756492615, "learning_rate": 1.1080523056168286e-06, "loss": 0.0229, "step": 199120 }, { "epoch": 1.6112144995549802, "grad_norm": 0.3335164189338684, "learning_rate": 1.107609070062084e-06, "loss": 0.0224, "step": 199130 }, { "epoch": 1.6112954122501821, "grad_norm": 0.3822745680809021, "learning_rate": 1.1071659121328592e-06, "loss": 0.0149, "step": 199140 }, { "epoch": 1.6113763249453839, "grad_norm": 0.35450130701065063, "learning_rate": 1.1067228318379924e-06, "loss": 0.0149, "step": 199150 }, { "epoch": 1.6114572376405858, "grad_norm": 0.11782018840312958, "learning_rate": 1.1062798291863196e-06, "loss": 0.0217, "step": 199160 }, { "epoch": 1.6115381503357877, "grad_norm": 0.3096023499965668, "learning_rate": 1.1058369041866762e-06, "loss": 0.0262, "step": 199170 }, { "epoch": 1.6116190630309895, "grad_norm": 0.4671062231063843, "learning_rate": 1.1053940568478945e-06, "loss": 0.0114, "step": 199180 }, { "epoch": 1.6116999757261914, "grad_norm": 0.3538225293159485, "learning_rate": 1.104951287178806e-06, "loss": 0.0177, "step": 199190 }, { "epoch": 1.6117808884213933, "grad_norm": 0.28005966544151306, "learning_rate": 1.1045085951882411e-06, "loss": 0.0188, "step": 199200 }, { "epoch": 1.611861801116595, "grad_norm": 0.5328633189201355, "learning_rate": 1.1040659808850279e-06, "loss": 0.0234, "step": 199210 }, { "epoch": 1.6119427138117972, "grad_norm": 0.3776596486568451, "learning_rate": 1.1036234442779965e-06, "loss": 0.0179, "step": 199220 }, { "epoch": 1.612023626506999, "grad_norm": 0.3157002031803131, "learning_rate": 1.1031809853759683e-06, "loss": 0.0231, "step": 199230 }, { "epoch": 1.6121045392022009, "grad_norm": 0.27789461612701416, "learning_rate": 1.1027386041877674e-06, "loss": 0.0199, "step": 199240 }, { "epoch": 1.6121854518974028, "grad_norm": 0.5407437086105347, "learning_rate": 1.1022963007222209e-06, "loss": 0.0217, "step": 199250 }, { "epoch": 1.6122663645926045, "grad_norm": 0.2893226146697998, "learning_rate": 1.1018540749881447e-06, "loss": 0.0142, "step": 199260 }, { "epoch": 1.6123472772878065, "grad_norm": 0.49267059564590454, "learning_rate": 1.101411926994358e-06, "loss": 0.0212, "step": 199270 }, { "epoch": 1.6124281899830084, "grad_norm": 0.4169113039970398, "learning_rate": 1.1009698567496812e-06, "loss": 0.0251, "step": 199280 }, { "epoch": 1.6125091026782101, "grad_norm": 0.3382278382778168, "learning_rate": 1.1005278642629298e-06, "loss": 0.0168, "step": 199290 }, { "epoch": 1.612590015373412, "grad_norm": 0.5603755712509155, "learning_rate": 1.1000859495429179e-06, "loss": 0.0149, "step": 199300 }, { "epoch": 1.612670928068614, "grad_norm": 0.08285494893789291, "learning_rate": 1.0996441125984586e-06, "loss": 0.0141, "step": 199310 }, { "epoch": 1.6127518407638157, "grad_norm": 0.20579282939434052, "learning_rate": 1.0992023534383634e-06, "loss": 0.0238, "step": 199320 }, { "epoch": 1.6128327534590177, "grad_norm": 0.28617995977401733, "learning_rate": 1.0987606720714428e-06, "loss": 0.0228, "step": 199330 }, { "epoch": 1.6129136661542196, "grad_norm": 0.41536957025527954, "learning_rate": 1.0983190685065042e-06, "loss": 0.0186, "step": 199340 }, { "epoch": 1.6129945788494213, "grad_norm": 0.47049373388290405, "learning_rate": 1.0978775427523552e-06, "loss": 0.0305, "step": 199350 }, { "epoch": 1.6130754915446235, "grad_norm": 0.2819115221500397, "learning_rate": 1.097436094817801e-06, "loss": 0.0136, "step": 199360 }, { "epoch": 1.6131564042398252, "grad_norm": 0.6210606694221497, "learning_rate": 1.0969947247116446e-06, "loss": 0.015, "step": 199370 }, { "epoch": 1.6132373169350271, "grad_norm": 0.28440672159194946, "learning_rate": 1.0965534324426891e-06, "loss": 0.0203, "step": 199380 }, { "epoch": 1.613318229630229, "grad_norm": 0.3218575417995453, "learning_rate": 1.0961122180197353e-06, "loss": 0.0132, "step": 199390 }, { "epoch": 1.6133991423254308, "grad_norm": 0.36570048332214355, "learning_rate": 1.0956710814515797e-06, "loss": 0.0283, "step": 199400 }, { "epoch": 1.6134800550206327, "grad_norm": 0.619705319404602, "learning_rate": 1.0952300227470252e-06, "loss": 0.0247, "step": 199410 }, { "epoch": 1.6135609677158347, "grad_norm": 0.27204230427742004, "learning_rate": 1.0947890419148632e-06, "loss": 0.0128, "step": 199420 }, { "epoch": 1.6136418804110364, "grad_norm": 0.21660272777080536, "learning_rate": 1.0943481389638883e-06, "loss": 0.0301, "step": 199430 }, { "epoch": 1.6137227931062383, "grad_norm": 0.07582840323448181, "learning_rate": 1.093907313902897e-06, "loss": 0.0148, "step": 199440 }, { "epoch": 1.6138037058014403, "grad_norm": 0.6533350348472595, "learning_rate": 1.0934665667406768e-06, "loss": 0.0256, "step": 199450 }, { "epoch": 1.613884618496642, "grad_norm": 0.14228439331054688, "learning_rate": 1.0930258974860175e-06, "loss": 0.0154, "step": 199460 }, { "epoch": 1.6139655311918442, "grad_norm": 0.2389521449804306, "learning_rate": 1.0925853061477105e-06, "loss": 0.0178, "step": 199470 }, { "epoch": 1.6140464438870459, "grad_norm": 0.4124678075313568, "learning_rate": 1.0921447927345401e-06, "loss": 0.0157, "step": 199480 }, { "epoch": 1.6141273565822476, "grad_norm": 0.4119122326374054, "learning_rate": 1.091704357255292e-06, "loss": 0.0127, "step": 199490 }, { "epoch": 1.6142082692774498, "grad_norm": 0.1406117081642151, "learning_rate": 1.0912639997187502e-06, "loss": 0.0148, "step": 199500 }, { "epoch": 1.6142891819726515, "grad_norm": 0.2390303909778595, "learning_rate": 1.0908237201336958e-06, "loss": 0.0062, "step": 199510 }, { "epoch": 1.6143700946678534, "grad_norm": 0.3225855231285095, "learning_rate": 1.0903835185089096e-06, "loss": 0.0171, "step": 199520 }, { "epoch": 1.6144510073630554, "grad_norm": 0.34800592064857483, "learning_rate": 1.0899433948531708e-06, "loss": 0.0205, "step": 199530 }, { "epoch": 1.614531920058257, "grad_norm": 0.5938066244125366, "learning_rate": 1.0895033491752565e-06, "loss": 0.0246, "step": 199540 }, { "epoch": 1.614612832753459, "grad_norm": 0.4732610583305359, "learning_rate": 1.0890633814839424e-06, "loss": 0.0232, "step": 199550 }, { "epoch": 1.614693745448661, "grad_norm": 0.327687531709671, "learning_rate": 1.0886234917880028e-06, "loss": 0.0211, "step": 199560 }, { "epoch": 1.6147746581438627, "grad_norm": 0.27309533953666687, "learning_rate": 1.0881836800962103e-06, "loss": 0.0185, "step": 199570 }, { "epoch": 1.6148555708390646, "grad_norm": 0.5171592831611633, "learning_rate": 1.0877439464173362e-06, "loss": 0.0168, "step": 199580 }, { "epoch": 1.6149364835342666, "grad_norm": 0.42315641045570374, "learning_rate": 1.08730429076015e-06, "loss": 0.0242, "step": 199590 }, { "epoch": 1.6150173962294683, "grad_norm": 0.3145422637462616, "learning_rate": 1.0868647131334193e-06, "loss": 0.0141, "step": 199600 }, { "epoch": 1.6150983089246704, "grad_norm": 0.2571994960308075, "learning_rate": 1.0864252135459114e-06, "loss": 0.0104, "step": 199610 }, { "epoch": 1.6151792216198722, "grad_norm": 0.15841005742549896, "learning_rate": 1.0859857920063887e-06, "loss": 0.0134, "step": 199620 }, { "epoch": 1.6152601343150739, "grad_norm": 0.12524902820587158, "learning_rate": 1.08554644852362e-06, "loss": 0.0181, "step": 199630 }, { "epoch": 1.615341047010276, "grad_norm": 0.1542855203151703, "learning_rate": 1.085107183106362e-06, "loss": 0.0142, "step": 199640 }, { "epoch": 1.6154219597054778, "grad_norm": 0.33318474888801575, "learning_rate": 1.084667995763375e-06, "loss": 0.0195, "step": 199650 }, { "epoch": 1.6155028724006797, "grad_norm": 0.48486244678497314, "learning_rate": 1.0842288865034206e-06, "loss": 0.0191, "step": 199660 }, { "epoch": 1.6155837850958816, "grad_norm": 0.24686409533023834, "learning_rate": 1.0837898553352561e-06, "loss": 0.0135, "step": 199670 }, { "epoch": 1.6156646977910833, "grad_norm": 0.19560760259628296, "learning_rate": 1.083350902267632e-06, "loss": 0.0115, "step": 199680 }, { "epoch": 1.6157456104862853, "grad_norm": 0.20509853959083557, "learning_rate": 1.082912027309308e-06, "loss": 0.0233, "step": 199690 }, { "epoch": 1.6158265231814872, "grad_norm": 0.38323521614074707, "learning_rate": 1.0824732304690338e-06, "loss": 0.0167, "step": 199700 }, { "epoch": 1.615907435876689, "grad_norm": 0.24005936086177826, "learning_rate": 1.082034511755561e-06, "loss": 0.0137, "step": 199710 }, { "epoch": 1.6159883485718909, "grad_norm": 0.1786831170320511, "learning_rate": 1.0815958711776382e-06, "loss": 0.0191, "step": 199720 }, { "epoch": 1.6160692612670928, "grad_norm": 0.5130757689476013, "learning_rate": 1.081157308744014e-06, "loss": 0.022, "step": 199730 }, { "epoch": 1.6161501739622945, "grad_norm": 0.5447347164154053, "learning_rate": 1.0807188244634341e-06, "loss": 0.0132, "step": 199740 }, { "epoch": 1.6162310866574967, "grad_norm": 0.28568071126937866, "learning_rate": 1.080280418344643e-06, "loss": 0.0215, "step": 199750 }, { "epoch": 1.6163119993526984, "grad_norm": 0.18219508230686188, "learning_rate": 1.0798420903963848e-06, "loss": 0.0185, "step": 199760 }, { "epoch": 1.6163929120479004, "grad_norm": 0.4872108995914459, "learning_rate": 1.0794038406273998e-06, "loss": 0.02, "step": 199770 }, { "epoch": 1.6164738247431023, "grad_norm": 0.5250245332717896, "learning_rate": 1.0789656690464285e-06, "loss": 0.0156, "step": 199780 }, { "epoch": 1.616554737438304, "grad_norm": 0.4423377811908722, "learning_rate": 1.0785275756622094e-06, "loss": 0.0142, "step": 199790 }, { "epoch": 1.616635650133506, "grad_norm": 0.45702433586120605, "learning_rate": 1.078089560483479e-06, "loss": 0.0164, "step": 199800 }, { "epoch": 1.616716562828708, "grad_norm": 0.28100964426994324, "learning_rate": 1.0776516235189727e-06, "loss": 0.0167, "step": 199810 }, { "epoch": 1.6167974755239096, "grad_norm": 0.2674539089202881, "learning_rate": 1.0772137647774233e-06, "loss": 0.0182, "step": 199820 }, { "epoch": 1.6168783882191116, "grad_norm": 0.211974635720253, "learning_rate": 1.0767759842675668e-06, "loss": 0.0111, "step": 199830 }, { "epoch": 1.6169593009143135, "grad_norm": 0.31655153632164, "learning_rate": 1.0763382819981278e-06, "loss": 0.0139, "step": 199840 }, { "epoch": 1.6170402136095152, "grad_norm": 0.6819803714752197, "learning_rate": 1.0759006579778403e-06, "loss": 0.0299, "step": 199850 }, { "epoch": 1.6171211263047172, "grad_norm": 0.46419697999954224, "learning_rate": 1.0754631122154313e-06, "loss": 0.0136, "step": 199860 }, { "epoch": 1.617202038999919, "grad_norm": 0.045349739491939545, "learning_rate": 1.0750256447196228e-06, "loss": 0.0116, "step": 199870 }, { "epoch": 1.6172829516951208, "grad_norm": 0.006923331413418055, "learning_rate": 1.0745882554991433e-06, "loss": 0.0155, "step": 199880 }, { "epoch": 1.617363864390323, "grad_norm": 0.3489387631416321, "learning_rate": 1.0741509445627136e-06, "loss": 0.0109, "step": 199890 }, { "epoch": 1.6174447770855247, "grad_norm": 0.5217750072479248, "learning_rate": 1.073713711919056e-06, "loss": 0.0146, "step": 199900 }, { "epoch": 1.6175256897807266, "grad_norm": 0.5702049136161804, "learning_rate": 1.0732765575768895e-06, "loss": 0.03, "step": 199910 }, { "epoch": 1.6176066024759286, "grad_norm": 0.24352455139160156, "learning_rate": 1.0728394815449322e-06, "loss": 0.0166, "step": 199920 }, { "epoch": 1.6176875151711303, "grad_norm": 0.2710544764995575, "learning_rate": 1.0724024838319008e-06, "loss": 0.0283, "step": 199930 }, { "epoch": 1.6177684278663322, "grad_norm": 0.2684382498264313, "learning_rate": 1.0719655644465105e-06, "loss": 0.0286, "step": 199940 }, { "epoch": 1.6178493405615342, "grad_norm": 0.2511449456214905, "learning_rate": 1.0715287233974748e-06, "loss": 0.0121, "step": 199950 }, { "epoch": 1.617930253256736, "grad_norm": 0.07520538568496704, "learning_rate": 1.071091960693505e-06, "loss": 0.0117, "step": 199960 }, { "epoch": 1.6180111659519378, "grad_norm": 0.445241242647171, "learning_rate": 1.070655276343312e-06, "loss": 0.017, "step": 199970 }, { "epoch": 1.6180920786471398, "grad_norm": 0.09631791710853577, "learning_rate": 1.0702186703556038e-06, "loss": 0.0117, "step": 199980 }, { "epoch": 1.6181729913423415, "grad_norm": 0.3311402201652527, "learning_rate": 1.0697821427390887e-06, "loss": 0.0185, "step": 199990 }, { "epoch": 1.6182539040375437, "grad_norm": 0.49968278408050537, "learning_rate": 1.0693456935024715e-06, "loss": 0.0251, "step": 200000 }, { "epoch": 1.6183348167327454, "grad_norm": 0.40212830901145935, "learning_rate": 1.0689093226544551e-06, "loss": 0.0202, "step": 200010 }, { "epoch": 1.618415729427947, "grad_norm": 0.538802444934845, "learning_rate": 1.068473030203746e-06, "loss": 0.0154, "step": 200020 }, { "epoch": 1.6184966421231493, "grad_norm": 0.1847478747367859, "learning_rate": 1.0680368161590399e-06, "loss": 0.0205, "step": 200030 }, { "epoch": 1.618577554818351, "grad_norm": 0.471966028213501, "learning_rate": 1.0676006805290401e-06, "loss": 0.0187, "step": 200040 }, { "epoch": 1.618658467513553, "grad_norm": 0.4735524356365204, "learning_rate": 1.0671646233224443e-06, "loss": 0.0186, "step": 200050 }, { "epoch": 1.6187393802087549, "grad_norm": 0.698841392993927, "learning_rate": 1.0667286445479446e-06, "loss": 0.0417, "step": 200060 }, { "epoch": 1.6188202929039566, "grad_norm": 0.24001777172088623, "learning_rate": 1.06629274421424e-06, "loss": 0.0203, "step": 200070 }, { "epoch": 1.6189012055991585, "grad_norm": 0.7614918351173401, "learning_rate": 1.0658569223300242e-06, "loss": 0.0097, "step": 200080 }, { "epoch": 1.6189821182943605, "grad_norm": 0.4625107944011688, "learning_rate": 1.0654211789039836e-06, "loss": 0.012, "step": 200090 }, { "epoch": 1.6190630309895622, "grad_norm": 0.3692778944969177, "learning_rate": 1.0649855139448124e-06, "loss": 0.0174, "step": 200100 }, { "epoch": 1.619143943684764, "grad_norm": 0.488260418176651, "learning_rate": 1.064549927461198e-06, "loss": 0.0206, "step": 200110 }, { "epoch": 1.619224856379966, "grad_norm": 0.43450406193733215, "learning_rate": 1.0641144194618276e-06, "loss": 0.0201, "step": 200120 }, { "epoch": 1.6193057690751678, "grad_norm": 0.5132456421852112, "learning_rate": 1.0636789899553856e-06, "loss": 0.0154, "step": 200130 }, { "epoch": 1.61938668177037, "grad_norm": 0.29119741916656494, "learning_rate": 1.0632436389505568e-06, "loss": 0.0196, "step": 200140 }, { "epoch": 1.6194675944655716, "grad_norm": 0.34173139929771423, "learning_rate": 1.0628083664560223e-06, "loss": 0.018, "step": 200150 }, { "epoch": 1.6195485071607734, "grad_norm": 0.3833739161491394, "learning_rate": 1.0623731724804632e-06, "loss": 0.0298, "step": 200160 }, { "epoch": 1.6196294198559755, "grad_norm": 0.5757009387016296, "learning_rate": 1.0619380570325587e-06, "loss": 0.0202, "step": 200170 }, { "epoch": 1.6197103325511772, "grad_norm": 0.12212346494197845, "learning_rate": 1.0615030201209858e-06, "loss": 0.0213, "step": 200180 }, { "epoch": 1.6197912452463792, "grad_norm": 0.24689626693725586, "learning_rate": 1.0610680617544207e-06, "loss": 0.0177, "step": 200190 }, { "epoch": 1.6198721579415811, "grad_norm": 0.64731764793396, "learning_rate": 1.0606331819415367e-06, "loss": 0.0167, "step": 200200 }, { "epoch": 1.6199530706367828, "grad_norm": 0.5251492857933044, "learning_rate": 1.0601983806910093e-06, "loss": 0.0142, "step": 200210 }, { "epoch": 1.6200339833319848, "grad_norm": 0.205735981464386, "learning_rate": 1.0597636580115072e-06, "loss": 0.0093, "step": 200220 }, { "epoch": 1.6201148960271867, "grad_norm": 0.19936688244342804, "learning_rate": 1.0593290139116995e-06, "loss": 0.026, "step": 200230 }, { "epoch": 1.6201958087223884, "grad_norm": 0.39641857147216797, "learning_rate": 1.0588944484002577e-06, "loss": 0.0153, "step": 200240 }, { "epoch": 1.6202767214175904, "grad_norm": 0.7016860842704773, "learning_rate": 1.0584599614858437e-06, "loss": 0.0201, "step": 200250 }, { "epoch": 1.6203576341127923, "grad_norm": 0.3974543511867523, "learning_rate": 1.0580255531771262e-06, "loss": 0.0155, "step": 200260 }, { "epoch": 1.620438546807994, "grad_norm": 1.6744567155838013, "learning_rate": 1.0575912234827684e-06, "loss": 0.0223, "step": 200270 }, { "epoch": 1.6205194595031962, "grad_norm": 0.20773111283779144, "learning_rate": 1.0571569724114283e-06, "loss": 0.0187, "step": 200280 }, { "epoch": 1.620600372198398, "grad_norm": 0.7718110084533691, "learning_rate": 1.05672279997177e-06, "loss": 0.0192, "step": 200290 }, { "epoch": 1.6206812848935999, "grad_norm": 0.38420870900154114, "learning_rate": 1.056288706172452e-06, "loss": 0.0178, "step": 200300 }, { "epoch": 1.6207621975888018, "grad_norm": 0.6925604343414307, "learning_rate": 1.0558546910221272e-06, "loss": 0.018, "step": 200310 }, { "epoch": 1.6208431102840035, "grad_norm": 0.4980071783065796, "learning_rate": 1.055420754529456e-06, "loss": 0.0116, "step": 200320 }, { "epoch": 1.6209240229792055, "grad_norm": 0.2687302529811859, "learning_rate": 1.0549868967030902e-06, "loss": 0.0132, "step": 200330 }, { "epoch": 1.6210049356744074, "grad_norm": 0.29040637612342834, "learning_rate": 1.0545531175516826e-06, "loss": 0.0254, "step": 200340 }, { "epoch": 1.6210858483696091, "grad_norm": 0.21268205344676971, "learning_rate": 1.0541194170838842e-06, "loss": 0.0181, "step": 200350 }, { "epoch": 1.621166761064811, "grad_norm": 0.3325517177581787, "learning_rate": 1.0536857953083435e-06, "loss": 0.0163, "step": 200360 }, { "epoch": 1.621247673760013, "grad_norm": 0.30521905422210693, "learning_rate": 1.0532522522337086e-06, "loss": 0.0209, "step": 200370 }, { "epoch": 1.6213285864552147, "grad_norm": 0.3905077576637268, "learning_rate": 1.0528187878686257e-06, "loss": 0.0151, "step": 200380 }, { "epoch": 1.6214094991504167, "grad_norm": 0.360781192779541, "learning_rate": 1.0523854022217378e-06, "loss": 0.0212, "step": 200390 }, { "epoch": 1.6214904118456186, "grad_norm": 0.3213898837566376, "learning_rate": 1.0519520953016916e-06, "loss": 0.0138, "step": 200400 }, { "epoch": 1.6215713245408203, "grad_norm": 0.25000762939453125, "learning_rate": 1.0515188671171251e-06, "loss": 0.0106, "step": 200410 }, { "epoch": 1.6216522372360225, "grad_norm": 0.6486683487892151, "learning_rate": 1.051085717676678e-06, "loss": 0.0279, "step": 200420 }, { "epoch": 1.6217331499312242, "grad_norm": 0.3603663146495819, "learning_rate": 1.0506526469889922e-06, "loss": 0.0245, "step": 200430 }, { "epoch": 1.6218140626264261, "grad_norm": 0.0013619427336379886, "learning_rate": 1.0502196550626997e-06, "loss": 0.0174, "step": 200440 }, { "epoch": 1.621894975321628, "grad_norm": 0.29387369751930237, "learning_rate": 1.0497867419064383e-06, "loss": 0.0098, "step": 200450 }, { "epoch": 1.6219758880168298, "grad_norm": 0.611770749092102, "learning_rate": 1.0493539075288434e-06, "loss": 0.0153, "step": 200460 }, { "epoch": 1.6220568007120317, "grad_norm": 0.23235543072223663, "learning_rate": 1.048921151938541e-06, "loss": 0.0126, "step": 200470 }, { "epoch": 1.6221377134072337, "grad_norm": 0.5491460561752319, "learning_rate": 1.0484884751441671e-06, "loss": 0.027, "step": 200480 }, { "epoch": 1.6222186261024354, "grad_norm": 0.2859869599342346, "learning_rate": 1.0480558771543498e-06, "loss": 0.0076, "step": 200490 }, { "epoch": 1.6222995387976373, "grad_norm": 0.2603861689567566, "learning_rate": 1.0476233579777124e-06, "loss": 0.0101, "step": 200500 }, { "epoch": 1.6223804514928393, "grad_norm": 0.2951783537864685, "learning_rate": 1.0471909176228844e-06, "loss": 0.0151, "step": 200510 }, { "epoch": 1.622461364188041, "grad_norm": 0.5345235466957092, "learning_rate": 1.0467585560984888e-06, "loss": 0.0172, "step": 200520 }, { "epoch": 1.622542276883243, "grad_norm": 0.43114808201789856, "learning_rate": 1.0463262734131485e-06, "loss": 0.0177, "step": 200530 }, { "epoch": 1.6226231895784449, "grad_norm": 0.14957769215106964, "learning_rate": 1.0458940695754837e-06, "loss": 0.0181, "step": 200540 }, { "epoch": 1.6227041022736466, "grad_norm": 0.07461140304803848, "learning_rate": 1.0454619445941144e-06, "loss": 0.0157, "step": 200550 }, { "epoch": 1.6227850149688487, "grad_norm": 0.2229076772928238, "learning_rate": 1.0450298984776586e-06, "loss": 0.0215, "step": 200560 }, { "epoch": 1.6228659276640505, "grad_norm": 0.5054015517234802, "learning_rate": 1.0445979312347316e-06, "loss": 0.0197, "step": 200570 }, { "epoch": 1.6229468403592524, "grad_norm": 0.5972140431404114, "learning_rate": 1.0441660428739476e-06, "loss": 0.0234, "step": 200580 }, { "epoch": 1.6230277530544543, "grad_norm": 0.3750339448451996, "learning_rate": 1.043734233403923e-06, "loss": 0.0189, "step": 200590 }, { "epoch": 1.623108665749656, "grad_norm": 0.2536107897758484, "learning_rate": 1.0433025028332661e-06, "loss": 0.0199, "step": 200600 }, { "epoch": 1.623189578444858, "grad_norm": 0.29084667563438416, "learning_rate": 1.0428708511705865e-06, "loss": 0.0206, "step": 200610 }, { "epoch": 1.62327049114006, "grad_norm": 0.11304503679275513, "learning_rate": 1.0424392784244964e-06, "loss": 0.014, "step": 200620 }, { "epoch": 1.6233514038352617, "grad_norm": 0.4850819706916809, "learning_rate": 1.0420077846035986e-06, "loss": 0.0142, "step": 200630 }, { "epoch": 1.6234323165304636, "grad_norm": 0.4143800139427185, "learning_rate": 1.0415763697164989e-06, "loss": 0.015, "step": 200640 }, { "epoch": 1.6235132292256655, "grad_norm": 0.27135246992111206, "learning_rate": 1.0411450337718044e-06, "loss": 0.0201, "step": 200650 }, { "epoch": 1.6235941419208673, "grad_norm": 0.39476123452186584, "learning_rate": 1.0407137767781123e-06, "loss": 0.0109, "step": 200660 }, { "epoch": 1.6236750546160694, "grad_norm": 0.2446277141571045, "learning_rate": 1.0402825987440263e-06, "loss": 0.0138, "step": 200670 }, { "epoch": 1.6237559673112711, "grad_norm": 0.4210735261440277, "learning_rate": 1.0398514996781461e-06, "loss": 0.0165, "step": 200680 }, { "epoch": 1.6238368800064729, "grad_norm": 0.006514246109873056, "learning_rate": 1.039420479589065e-06, "loss": 0.021, "step": 200690 }, { "epoch": 1.623917792701675, "grad_norm": 0.16200363636016846, "learning_rate": 1.0389895384853826e-06, "loss": 0.0162, "step": 200700 }, { "epoch": 1.6239987053968767, "grad_norm": 0.4844353199005127, "learning_rate": 1.0385586763756927e-06, "loss": 0.019, "step": 200710 }, { "epoch": 1.6240796180920787, "grad_norm": 0.3357386887073517, "learning_rate": 1.0381278932685852e-06, "loss": 0.0147, "step": 200720 }, { "epoch": 1.6241605307872806, "grad_norm": 0.6589234471321106, "learning_rate": 1.0376971891726534e-06, "loss": 0.0158, "step": 200730 }, { "epoch": 1.6242414434824823, "grad_norm": 0.09648894518613815, "learning_rate": 1.0372665640964868e-06, "loss": 0.011, "step": 200740 }, { "epoch": 1.6243223561776843, "grad_norm": 0.4969114661216736, "learning_rate": 1.036836018048673e-06, "loss": 0.0187, "step": 200750 }, { "epoch": 1.6244032688728862, "grad_norm": 0.1462157815694809, "learning_rate": 1.0364055510377978e-06, "loss": 0.0101, "step": 200760 }, { "epoch": 1.624484181568088, "grad_norm": 0.4727596044540405, "learning_rate": 1.0359751630724469e-06, "loss": 0.0282, "step": 200770 }, { "epoch": 1.6245650942632899, "grad_norm": 0.3898674547672272, "learning_rate": 1.0355448541612024e-06, "loss": 0.0117, "step": 200780 }, { "epoch": 1.6246460069584918, "grad_norm": 0.18791194260120392, "learning_rate": 1.035114624312647e-06, "loss": 0.0143, "step": 200790 }, { "epoch": 1.6247269196536935, "grad_norm": 0.4067077040672302, "learning_rate": 1.0346844735353584e-06, "loss": 0.0179, "step": 200800 }, { "epoch": 1.6248078323488957, "grad_norm": 0.451814204454422, "learning_rate": 1.0342544018379198e-06, "loss": 0.0199, "step": 200810 }, { "epoch": 1.6248887450440974, "grad_norm": 0.5953519940376282, "learning_rate": 1.0338244092289035e-06, "loss": 0.0243, "step": 200820 }, { "epoch": 1.6249696577392991, "grad_norm": 0.49785542488098145, "learning_rate": 1.033394495716885e-06, "loss": 0.0201, "step": 200830 }, { "epoch": 1.6250505704345013, "grad_norm": 0.36891478300094604, "learning_rate": 1.0329646613104422e-06, "loss": 0.0213, "step": 200840 }, { "epoch": 1.625131483129703, "grad_norm": 0.385133296251297, "learning_rate": 1.0325349060181433e-06, "loss": 0.0223, "step": 200850 }, { "epoch": 1.625212395824905, "grad_norm": 0.7656163573265076, "learning_rate": 1.0321052298485585e-06, "loss": 0.0206, "step": 200860 }, { "epoch": 1.625293308520107, "grad_norm": 0.45917725563049316, "learning_rate": 1.0316756328102613e-06, "loss": 0.0169, "step": 200870 }, { "epoch": 1.6253742212153086, "grad_norm": 0.5936164855957031, "learning_rate": 1.0312461149118131e-06, "loss": 0.0299, "step": 200880 }, { "epoch": 1.6254551339105106, "grad_norm": 0.4495074450969696, "learning_rate": 1.0308166761617838e-06, "loss": 0.0151, "step": 200890 }, { "epoch": 1.6255360466057125, "grad_norm": 0.385585218667984, "learning_rate": 1.0303873165687384e-06, "loss": 0.0157, "step": 200900 }, { "epoch": 1.6256169593009142, "grad_norm": 0.4123780429363251, "learning_rate": 1.0299580361412348e-06, "loss": 0.016, "step": 200910 }, { "epoch": 1.6256978719961162, "grad_norm": 0.40597525238990784, "learning_rate": 1.0295288348878386e-06, "loss": 0.0227, "step": 200920 }, { "epoch": 1.625778784691318, "grad_norm": 0.292095810174942, "learning_rate": 1.029099712817107e-06, "loss": 0.016, "step": 200930 }, { "epoch": 1.6258596973865198, "grad_norm": 0.40373677015304565, "learning_rate": 1.0286706699375998e-06, "loss": 0.0167, "step": 200940 }, { "epoch": 1.625940610081722, "grad_norm": 0.24780085682868958, "learning_rate": 1.0282417062578714e-06, "loss": 0.0179, "step": 200950 }, { "epoch": 1.6260215227769237, "grad_norm": 0.3499579131603241, "learning_rate": 1.027812821786478e-06, "loss": 0.0159, "step": 200960 }, { "epoch": 1.6261024354721256, "grad_norm": 0.4664487838745117, "learning_rate": 1.027384016531972e-06, "loss": 0.0211, "step": 200970 }, { "epoch": 1.6261833481673276, "grad_norm": 0.3471020758152008, "learning_rate": 1.0269552905029052e-06, "loss": 0.0225, "step": 200980 }, { "epoch": 1.6262642608625293, "grad_norm": 0.33334672451019287, "learning_rate": 1.0265266437078259e-06, "loss": 0.0164, "step": 200990 }, { "epoch": 1.6263451735577312, "grad_norm": 0.41866564750671387, "learning_rate": 1.0260980761552874e-06, "loss": 0.0211, "step": 201000 }, { "epoch": 1.6264260862529332, "grad_norm": 0.32263341546058655, "learning_rate": 1.0256695878538326e-06, "loss": 0.0156, "step": 201010 }, { "epoch": 1.6265069989481349, "grad_norm": 0.29308590292930603, "learning_rate": 1.0252411788120054e-06, "loss": 0.0132, "step": 201020 }, { "epoch": 1.6265879116433368, "grad_norm": 1.1161640882492065, "learning_rate": 1.0248128490383552e-06, "loss": 0.0164, "step": 201030 }, { "epoch": 1.6266688243385388, "grad_norm": 0.3856368064880371, "learning_rate": 1.0243845985414191e-06, "loss": 0.0292, "step": 201040 }, { "epoch": 1.6267497370337405, "grad_norm": 0.06788202375173569, "learning_rate": 1.0239564273297381e-06, "loss": 0.0132, "step": 201050 }, { "epoch": 1.6268306497289424, "grad_norm": 0.3248506188392639, "learning_rate": 1.0235283354118547e-06, "loss": 0.0154, "step": 201060 }, { "epoch": 1.6269115624241444, "grad_norm": 0.42010945081710815, "learning_rate": 1.0231003227963015e-06, "loss": 0.0294, "step": 201070 }, { "epoch": 1.626992475119346, "grad_norm": 0.4515925943851471, "learning_rate": 1.0226723894916184e-06, "loss": 0.0095, "step": 201080 }, { "epoch": 1.6270733878145482, "grad_norm": 0.6593226194381714, "learning_rate": 1.0222445355063377e-06, "loss": 0.0179, "step": 201090 }, { "epoch": 1.62715430050975, "grad_norm": 0.4893582761287689, "learning_rate": 1.021816760848992e-06, "loss": 0.0241, "step": 201100 }, { "epoch": 1.627235213204952, "grad_norm": 0.2097373604774475, "learning_rate": 1.0213890655281133e-06, "loss": 0.0114, "step": 201110 }, { "epoch": 1.6273161259001538, "grad_norm": 0.37478986382484436, "learning_rate": 1.0209614495522302e-06, "loss": 0.0173, "step": 201120 }, { "epoch": 1.6273970385953556, "grad_norm": 0.24424409866333008, "learning_rate": 1.0205339129298709e-06, "loss": 0.0171, "step": 201130 }, { "epoch": 1.6274779512905575, "grad_norm": 0.33254367113113403, "learning_rate": 1.0201064556695611e-06, "loss": 0.0204, "step": 201140 }, { "epoch": 1.6275588639857594, "grad_norm": 0.5449259281158447, "learning_rate": 1.0196790777798272e-06, "loss": 0.0209, "step": 201150 }, { "epoch": 1.6276397766809612, "grad_norm": 0.2743471562862396, "learning_rate": 1.0192517792691903e-06, "loss": 0.0156, "step": 201160 }, { "epoch": 1.627720689376163, "grad_norm": 0.36703062057495117, "learning_rate": 1.0188245601461734e-06, "loss": 0.0165, "step": 201170 }, { "epoch": 1.627801602071365, "grad_norm": 0.38117653131484985, "learning_rate": 1.018397420419296e-06, "loss": 0.0154, "step": 201180 }, { "epoch": 1.6278825147665668, "grad_norm": 0.4938383400440216, "learning_rate": 1.0179703600970764e-06, "loss": 0.0251, "step": 201190 }, { "epoch": 1.6279634274617687, "grad_norm": 0.40886202454566956, "learning_rate": 1.017543379188032e-06, "loss": 0.0166, "step": 201200 }, { "epoch": 1.6280443401569706, "grad_norm": 0.38708484172821045, "learning_rate": 1.0171164777006753e-06, "loss": 0.0259, "step": 201210 }, { "epoch": 1.6281252528521724, "grad_norm": 0.5232579708099365, "learning_rate": 1.0166896556435257e-06, "loss": 0.0265, "step": 201220 }, { "epoch": 1.6282061655473745, "grad_norm": 0.432355672121048, "learning_rate": 1.0162629130250901e-06, "loss": 0.0116, "step": 201230 }, { "epoch": 1.6282870782425762, "grad_norm": 0.8736002445220947, "learning_rate": 1.0158362498538793e-06, "loss": 0.0225, "step": 201240 }, { "epoch": 1.6283679909377782, "grad_norm": 0.31462931632995605, "learning_rate": 1.015409666138406e-06, "loss": 0.0112, "step": 201250 }, { "epoch": 1.6284489036329801, "grad_norm": 0.5372375249862671, "learning_rate": 1.014983161887174e-06, "loss": 0.0182, "step": 201260 }, { "epoch": 1.6285298163281818, "grad_norm": 0.7382150888442993, "learning_rate": 1.014556737108689e-06, "loss": 0.0253, "step": 201270 }, { "epoch": 1.6286107290233838, "grad_norm": 0.39101529121398926, "learning_rate": 1.014130391811457e-06, "loss": 0.0187, "step": 201280 }, { "epoch": 1.6286916417185857, "grad_norm": 0.4053838551044464, "learning_rate": 1.0137041260039803e-06, "loss": 0.0274, "step": 201290 }, { "epoch": 1.6287725544137874, "grad_norm": 0.32285767793655396, "learning_rate": 1.0132779396947595e-06, "loss": 0.0222, "step": 201300 }, { "epoch": 1.6288534671089894, "grad_norm": 0.18004177510738373, "learning_rate": 1.0128518328922931e-06, "loss": 0.0157, "step": 201310 }, { "epoch": 1.6289343798041913, "grad_norm": 0.31550610065460205, "learning_rate": 1.01242580560508e-06, "loss": 0.0196, "step": 201320 }, { "epoch": 1.629015292499393, "grad_norm": 0.2652769088745117, "learning_rate": 1.0119998578416162e-06, "loss": 0.0169, "step": 201330 }, { "epoch": 1.6290962051945952, "grad_norm": 0.3180077373981476, "learning_rate": 1.0115739896103965e-06, "loss": 0.0127, "step": 201340 }, { "epoch": 1.629177117889797, "grad_norm": 0.40889886021614075, "learning_rate": 1.011148200919913e-06, "loss": 0.026, "step": 201350 }, { "epoch": 1.6292580305849986, "grad_norm": 0.6024144887924194, "learning_rate": 1.0107224917786584e-06, "loss": 0.0194, "step": 201360 }, { "epoch": 1.6293389432802008, "grad_norm": 0.3008282482624054, "learning_rate": 1.0102968621951214e-06, "loss": 0.0153, "step": 201370 }, { "epoch": 1.6294198559754025, "grad_norm": 0.3903923034667969, "learning_rate": 1.0098713121777913e-06, "loss": 0.0284, "step": 201380 }, { "epoch": 1.6295007686706044, "grad_norm": 0.2968038022518158, "learning_rate": 1.0094458417351538e-06, "loss": 0.0111, "step": 201390 }, { "epoch": 1.6295816813658064, "grad_norm": 0.16522036492824554, "learning_rate": 1.0090204508756951e-06, "loss": 0.0163, "step": 201400 }, { "epoch": 1.629662594061008, "grad_norm": 0.47769203782081604, "learning_rate": 1.008595139607898e-06, "loss": 0.0124, "step": 201410 }, { "epoch": 1.62974350675621, "grad_norm": 0.3706885576248169, "learning_rate": 1.0081699079402446e-06, "loss": 0.0178, "step": 201420 }, { "epoch": 1.629824419451412, "grad_norm": 0.6005082130432129, "learning_rate": 1.0077447558812136e-06, "loss": 0.0239, "step": 201430 }, { "epoch": 1.6299053321466137, "grad_norm": 0.4815385341644287, "learning_rate": 1.0073196834392885e-06, "loss": 0.0129, "step": 201440 }, { "epoch": 1.6299862448418156, "grad_norm": 0.08732901513576508, "learning_rate": 1.0068946906229422e-06, "loss": 0.0156, "step": 201450 }, { "epoch": 1.6300671575370176, "grad_norm": 0.2862358093261719, "learning_rate": 1.0064697774406496e-06, "loss": 0.0131, "step": 201460 }, { "epoch": 1.6301480702322193, "grad_norm": 0.39689698815345764, "learning_rate": 1.0060449439008884e-06, "loss": 0.0143, "step": 201470 }, { "epoch": 1.6302289829274215, "grad_norm": 0.48875778913497925, "learning_rate": 1.0056201900121288e-06, "loss": 0.0219, "step": 201480 }, { "epoch": 1.6303098956226232, "grad_norm": 0.5649400353431702, "learning_rate": 1.0051955157828425e-06, "loss": 0.0142, "step": 201490 }, { "epoch": 1.630390808317825, "grad_norm": 0.49481019377708435, "learning_rate": 1.0047709212214978e-06, "loss": 0.0205, "step": 201500 }, { "epoch": 1.630471721013027, "grad_norm": 0.5789694786071777, "learning_rate": 1.0043464063365627e-06, "loss": 0.0133, "step": 201510 }, { "epoch": 1.6305526337082288, "grad_norm": 0.21556870639324188, "learning_rate": 1.0039219711365038e-06, "loss": 0.0101, "step": 201520 }, { "epoch": 1.6306335464034307, "grad_norm": 0.6443472504615784, "learning_rate": 1.0034976156297848e-06, "loss": 0.014, "step": 201530 }, { "epoch": 1.6307144590986327, "grad_norm": 0.3662497103214264, "learning_rate": 1.0030733398248688e-06, "loss": 0.0144, "step": 201540 }, { "epoch": 1.6307953717938344, "grad_norm": 0.5773325562477112, "learning_rate": 1.0026491437302172e-06, "loss": 0.0237, "step": 201550 }, { "epoch": 1.6308762844890363, "grad_norm": 0.31142815947532654, "learning_rate": 1.0022250273542895e-06, "loss": 0.0075, "step": 201560 }, { "epoch": 1.6309571971842383, "grad_norm": 0.49787992238998413, "learning_rate": 1.0018009907055442e-06, "loss": 0.0147, "step": 201570 }, { "epoch": 1.63103810987944, "grad_norm": 0.5134344696998596, "learning_rate": 1.0013770337924377e-06, "loss": 0.0122, "step": 201580 }, { "epoch": 1.631119022574642, "grad_norm": 0.21344850957393646, "learning_rate": 1.0009531566234242e-06, "loss": 0.0107, "step": 201590 }, { "epoch": 1.6311999352698439, "grad_norm": 0.2120044082403183, "learning_rate": 1.0005293592069582e-06, "loss": 0.0111, "step": 201600 }, { "epoch": 1.6312808479650456, "grad_norm": 0.44466522336006165, "learning_rate": 1.0001056415514909e-06, "loss": 0.0328, "step": 201610 }, { "epoch": 1.6313617606602477, "grad_norm": 0.4931838810443878, "learning_rate": 9.996820036654708e-07, "loss": 0.0215, "step": 201620 }, { "epoch": 1.6314426733554495, "grad_norm": 0.4968501329421997, "learning_rate": 9.992584455573496e-07, "loss": 0.0153, "step": 201630 }, { "epoch": 1.6315235860506514, "grad_norm": 0.25485122203826904, "learning_rate": 9.988349672355736e-07, "loss": 0.0182, "step": 201640 }, { "epoch": 1.6316044987458533, "grad_norm": 0.3261509835720062, "learning_rate": 9.984115687085855e-07, "loss": 0.0176, "step": 201650 }, { "epoch": 1.631685411441055, "grad_norm": 0.010511110536754131, "learning_rate": 9.979882499848315e-07, "loss": 0.0236, "step": 201660 }, { "epoch": 1.631766324136257, "grad_norm": 0.1818477362394333, "learning_rate": 9.975650110727546e-07, "loss": 0.0182, "step": 201670 }, { "epoch": 1.631847236831459, "grad_norm": 0.1849127560853958, "learning_rate": 9.971418519807918e-07, "loss": 0.0128, "step": 201680 }, { "epoch": 1.6319281495266607, "grad_norm": 0.49294161796569824, "learning_rate": 9.967187727173861e-07, "loss": 0.0213, "step": 201690 }, { "epoch": 1.6320090622218626, "grad_norm": 0.18856686353683472, "learning_rate": 9.962957732909722e-07, "loss": 0.0096, "step": 201700 }, { "epoch": 1.6320899749170645, "grad_norm": 0.3052847981452942, "learning_rate": 9.958728537099876e-07, "loss": 0.017, "step": 201710 }, { "epoch": 1.6321708876122663, "grad_norm": 0.2523518204689026, "learning_rate": 9.954500139828654e-07, "loss": 0.0167, "step": 201720 }, { "epoch": 1.6322518003074682, "grad_norm": 0.5324560403823853, "learning_rate": 9.950272541180389e-07, "loss": 0.0112, "step": 201730 }, { "epoch": 1.6323327130026701, "grad_norm": 0.25433915853500366, "learning_rate": 9.94604574123939e-07, "loss": 0.0127, "step": 201740 }, { "epoch": 1.6324136256978719, "grad_norm": 0.2716224789619446, "learning_rate": 9.941819740089953e-07, "loss": 0.0179, "step": 201750 }, { "epoch": 1.632494538393074, "grad_norm": 0.8603891730308533, "learning_rate": 9.937594537816347e-07, "loss": 0.0293, "step": 201760 }, { "epoch": 1.6325754510882757, "grad_norm": 0.30791717767715454, "learning_rate": 9.93337013450285e-07, "loss": 0.0184, "step": 201770 }, { "epoch": 1.6326563637834777, "grad_norm": 0.007642843760550022, "learning_rate": 9.929146530233696e-07, "loss": 0.0084, "step": 201780 }, { "epoch": 1.6327372764786796, "grad_norm": 0.33371099829673767, "learning_rate": 9.924923725093127e-07, "loss": 0.017, "step": 201790 }, { "epoch": 1.6328181891738813, "grad_norm": 0.43694862723350525, "learning_rate": 9.920701719165349e-07, "loss": 0.0223, "step": 201800 }, { "epoch": 1.6328991018690833, "grad_norm": 0.47864973545074463, "learning_rate": 9.916480512534565e-07, "loss": 0.0221, "step": 201810 }, { "epoch": 1.6329800145642852, "grad_norm": 0.22276774048805237, "learning_rate": 9.912260105284944e-07, "loss": 0.0329, "step": 201820 }, { "epoch": 1.633060927259487, "grad_norm": 0.19482311606407166, "learning_rate": 9.908040497500687e-07, "loss": 0.0203, "step": 201830 }, { "epoch": 1.6331418399546889, "grad_norm": 0.23927105963230133, "learning_rate": 9.903821689265903e-07, "loss": 0.0133, "step": 201840 }, { "epoch": 1.6332227526498908, "grad_norm": 0.2464764416217804, "learning_rate": 9.899603680664765e-07, "loss": 0.0318, "step": 201850 }, { "epoch": 1.6333036653450925, "grad_norm": 0.47043702006340027, "learning_rate": 9.895386471781381e-07, "loss": 0.0068, "step": 201860 }, { "epoch": 1.6333845780402947, "grad_norm": 0.7734962105751038, "learning_rate": 9.89117006269983e-07, "loss": 0.0299, "step": 201870 }, { "epoch": 1.6334654907354964, "grad_norm": 0.2763087749481201, "learning_rate": 9.886954453504227e-07, "loss": 0.0137, "step": 201880 }, { "epoch": 1.6335464034306981, "grad_norm": 0.6260592341423035, "learning_rate": 9.882739644278655e-07, "loss": 0.0173, "step": 201890 }, { "epoch": 1.6336273161259003, "grad_norm": 0.31219229102134705, "learning_rate": 9.878525635107116e-07, "loss": 0.0137, "step": 201900 }, { "epoch": 1.633708228821102, "grad_norm": 0.1916493922472, "learning_rate": 9.874312426073702e-07, "loss": 0.0152, "step": 201910 }, { "epoch": 1.633789141516304, "grad_norm": 0.5924240946769714, "learning_rate": 9.870100017262414e-07, "loss": 0.0246, "step": 201920 }, { "epoch": 1.6338700542115059, "grad_norm": 0.24624747037887573, "learning_rate": 9.865888408757262e-07, "loss": 0.0162, "step": 201930 }, { "epoch": 1.6339509669067076, "grad_norm": 0.417621910572052, "learning_rate": 9.861677600642239e-07, "loss": 0.0124, "step": 201940 }, { "epoch": 1.6340318796019095, "grad_norm": 0.4786466360092163, "learning_rate": 9.857467593001324e-07, "loss": 0.029, "step": 201950 }, { "epoch": 1.6341127922971115, "grad_norm": 0.5589735507965088, "learning_rate": 9.853258385918467e-07, "loss": 0.0169, "step": 201960 }, { "epoch": 1.6341937049923132, "grad_norm": 0.5968777537345886, "learning_rate": 9.849049979477626e-07, "loss": 0.0159, "step": 201970 }, { "epoch": 1.6342746176875151, "grad_norm": 0.6593871712684631, "learning_rate": 9.844842373762715e-07, "loss": 0.023, "step": 201980 }, { "epoch": 1.634355530382717, "grad_norm": 0.4096255302429199, "learning_rate": 9.840635568857654e-07, "loss": 0.0169, "step": 201990 }, { "epoch": 1.6344364430779188, "grad_norm": 0.21207007765769958, "learning_rate": 9.836429564846335e-07, "loss": 0.0158, "step": 202000 }, { "epoch": 1.634517355773121, "grad_norm": 0.4290551245212555, "learning_rate": 9.832224361812625e-07, "loss": 0.0178, "step": 202010 }, { "epoch": 1.6345982684683227, "grad_norm": 0.22504137456417084, "learning_rate": 9.82801995984043e-07, "loss": 0.0177, "step": 202020 }, { "epoch": 1.6346791811635244, "grad_norm": 0.3235554099082947, "learning_rate": 9.823816359013543e-07, "loss": 0.0148, "step": 202030 }, { "epoch": 1.6347600938587266, "grad_norm": 0.3407396078109741, "learning_rate": 9.819613559415842e-07, "loss": 0.0116, "step": 202040 }, { "epoch": 1.6348410065539283, "grad_norm": 0.5578231811523438, "learning_rate": 9.815411561131128e-07, "loss": 0.0171, "step": 202050 }, { "epoch": 1.6349219192491302, "grad_norm": 0.6993831396102905, "learning_rate": 9.811210364243179e-07, "loss": 0.0181, "step": 202060 }, { "epoch": 1.6350028319443322, "grad_norm": 0.30280551314353943, "learning_rate": 9.807009968835807e-07, "loss": 0.0201, "step": 202070 }, { "epoch": 1.6350837446395339, "grad_norm": 0.3936808109283447, "learning_rate": 9.802810374992777e-07, "loss": 0.0251, "step": 202080 }, { "epoch": 1.6351646573347358, "grad_norm": 0.5054006576538086, "learning_rate": 9.798611582797818e-07, "loss": 0.0206, "step": 202090 }, { "epoch": 1.6352455700299378, "grad_norm": 0.4353826344013214, "learning_rate": 9.794413592334689e-07, "loss": 0.0164, "step": 202100 }, { "epoch": 1.6353264827251395, "grad_norm": 0.5055380463600159, "learning_rate": 9.790216403687103e-07, "loss": 0.0228, "step": 202110 }, { "epoch": 1.6354073954203414, "grad_norm": 0.2189035266637802, "learning_rate": 9.786020016938769e-07, "loss": 0.0165, "step": 202120 }, { "epoch": 1.6354883081155434, "grad_norm": 0.15347403287887573, "learning_rate": 9.781824432173365e-07, "loss": 0.0174, "step": 202130 }, { "epoch": 1.635569220810745, "grad_norm": 0.6475096344947815, "learning_rate": 9.777629649474568e-07, "loss": 0.0135, "step": 202140 }, { "epoch": 1.6356501335059472, "grad_norm": 0.5066447854042053, "learning_rate": 9.77343566892604e-07, "loss": 0.0236, "step": 202150 }, { "epoch": 1.635731046201149, "grad_norm": 0.28520846366882324, "learning_rate": 9.769242490611408e-07, "loss": 0.0174, "step": 202160 }, { "epoch": 1.635811958896351, "grad_norm": 0.6510792374610901, "learning_rate": 9.7650501146143e-07, "loss": 0.0309, "step": 202170 }, { "epoch": 1.6358928715915528, "grad_norm": 0.20347797870635986, "learning_rate": 9.760858541018347e-07, "loss": 0.0211, "step": 202180 }, { "epoch": 1.6359737842867546, "grad_norm": 0.5648415088653564, "learning_rate": 9.756667769907107e-07, "loss": 0.0223, "step": 202190 }, { "epoch": 1.6360546969819565, "grad_norm": 0.24645477533340454, "learning_rate": 9.752477801364162e-07, "loss": 0.0108, "step": 202200 }, { "epoch": 1.6361356096771584, "grad_norm": 0.39333775639533997, "learning_rate": 9.748288635473107e-07, "loss": 0.0209, "step": 202210 }, { "epoch": 1.6362165223723601, "grad_norm": 0.19875375926494598, "learning_rate": 9.744100272317448e-07, "loss": 0.0158, "step": 202220 }, { "epoch": 1.636297435067562, "grad_norm": 0.4183054566383362, "learning_rate": 9.73991271198071e-07, "loss": 0.0097, "step": 202230 }, { "epoch": 1.636378347762764, "grad_norm": 0.39062052965164185, "learning_rate": 9.735725954546448e-07, "loss": 0.0091, "step": 202240 }, { "epoch": 1.6364592604579657, "grad_norm": 0.3025496006011963, "learning_rate": 9.7315400000981e-07, "loss": 0.0142, "step": 202250 }, { "epoch": 1.6365401731531677, "grad_norm": 0.4380600154399872, "learning_rate": 9.727354848719194e-07, "loss": 0.0218, "step": 202260 }, { "epoch": 1.6366210858483696, "grad_norm": 0.1772458553314209, "learning_rate": 9.72317050049319e-07, "loss": 0.0155, "step": 202270 }, { "epoch": 1.6367019985435713, "grad_norm": 0.2220587432384491, "learning_rate": 9.71898695550349e-07, "loss": 0.0196, "step": 202280 }, { "epoch": 1.6367829112387735, "grad_norm": 0.19643253087997437, "learning_rate": 9.714804213833573e-07, "loss": 0.0089, "step": 202290 }, { "epoch": 1.6368638239339752, "grad_norm": 0.3143172562122345, "learning_rate": 9.710622275566855e-07, "loss": 0.0247, "step": 202300 }, { "epoch": 1.6369447366291772, "grad_norm": 0.5411675572395325, "learning_rate": 9.706441140786699e-07, "loss": 0.0166, "step": 202310 }, { "epoch": 1.637025649324379, "grad_norm": 0.3554624319076538, "learning_rate": 9.702260809576518e-07, "loss": 0.0139, "step": 202320 }, { "epoch": 1.6371065620195808, "grad_norm": 0.5143857598304749, "learning_rate": 9.698081282019672e-07, "loss": 0.0213, "step": 202330 }, { "epoch": 1.6371874747147828, "grad_norm": 0.08538052439689636, "learning_rate": 9.69390255819952e-07, "loss": 0.0281, "step": 202340 }, { "epoch": 1.6372683874099847, "grad_norm": 0.23116658627986908, "learning_rate": 9.689724638199388e-07, "loss": 0.0103, "step": 202350 }, { "epoch": 1.6373493001051864, "grad_norm": 0.4056096076965332, "learning_rate": 9.685547522102595e-07, "loss": 0.0276, "step": 202360 }, { "epoch": 1.6374302128003884, "grad_norm": 0.15016832947731018, "learning_rate": 9.681371209992457e-07, "loss": 0.0147, "step": 202370 }, { "epoch": 1.6375111254955903, "grad_norm": 0.31448981165885925, "learning_rate": 9.677195701952247e-07, "loss": 0.0156, "step": 202380 }, { "epoch": 1.637592038190792, "grad_norm": 0.3052366077899933, "learning_rate": 9.673020998065235e-07, "loss": 0.0234, "step": 202390 }, { "epoch": 1.637672950885994, "grad_norm": 0.36221766471862793, "learning_rate": 9.668847098414712e-07, "loss": 0.0097, "step": 202400 }, { "epoch": 1.637753863581196, "grad_norm": 0.4838391840457916, "learning_rate": 9.664674003083874e-07, "loss": 0.0207, "step": 202410 }, { "epoch": 1.6378347762763976, "grad_norm": 0.47508084774017334, "learning_rate": 9.66050171215595e-07, "loss": 0.0321, "step": 202420 }, { "epoch": 1.6379156889715998, "grad_norm": 0.4625738859176636, "learning_rate": 9.656330225714184e-07, "loss": 0.0141, "step": 202430 }, { "epoch": 1.6379966016668015, "grad_norm": 0.505253255367279, "learning_rate": 9.652159543841732e-07, "loss": 0.0211, "step": 202440 }, { "epoch": 1.6380775143620034, "grad_norm": 0.3848375678062439, "learning_rate": 9.647989666621766e-07, "loss": 0.0201, "step": 202450 }, { "epoch": 1.6381584270572054, "grad_norm": 0.1306750774383545, "learning_rate": 9.643820594137481e-07, "loss": 0.0202, "step": 202460 }, { "epoch": 1.638239339752407, "grad_norm": 0.14440740644931793, "learning_rate": 9.639652326471976e-07, "loss": 0.0157, "step": 202470 }, { "epoch": 1.638320252447609, "grad_norm": 0.3593665063381195, "learning_rate": 9.635484863708417e-07, "loss": 0.0398, "step": 202480 }, { "epoch": 1.638401165142811, "grad_norm": 0.14603616297245026, "learning_rate": 9.631318205929908e-07, "loss": 0.0194, "step": 202490 }, { "epoch": 1.6384820778380127, "grad_norm": 0.4788804352283478, "learning_rate": 9.62715235321951e-07, "loss": 0.0338, "step": 202500 }, { "epoch": 1.6385629905332146, "grad_norm": 0.9406419992446899, "learning_rate": 9.622987305660341e-07, "loss": 0.0312, "step": 202510 }, { "epoch": 1.6386439032284166, "grad_norm": 0.5532739758491516, "learning_rate": 9.618823063335447e-07, "loss": 0.0206, "step": 202520 }, { "epoch": 1.6387248159236183, "grad_norm": 0.5322594046592712, "learning_rate": 9.614659626327883e-07, "loss": 0.0173, "step": 202530 }, { "epoch": 1.6388057286188205, "grad_norm": 0.3561728596687317, "learning_rate": 9.610496994720675e-07, "loss": 0.0122, "step": 202540 }, { "epoch": 1.6388866413140222, "grad_norm": 0.6222570538520813, "learning_rate": 9.606335168596836e-07, "loss": 0.0204, "step": 202550 }, { "epoch": 1.638967554009224, "grad_norm": 0.3319229185581207, "learning_rate": 9.602174148039372e-07, "loss": 0.0148, "step": 202560 }, { "epoch": 1.639048466704426, "grad_norm": 0.5410130620002747, "learning_rate": 9.598013933131256e-07, "loss": 0.0167, "step": 202570 }, { "epoch": 1.6391293793996278, "grad_norm": 0.28180018067359924, "learning_rate": 9.593854523955447e-07, "loss": 0.015, "step": 202580 }, { "epoch": 1.6392102920948297, "grad_norm": 0.003727078903466463, "learning_rate": 9.589695920594938e-07, "loss": 0.0209, "step": 202590 }, { "epoch": 1.6392912047900317, "grad_norm": 0.48569104075431824, "learning_rate": 9.58553812313262e-07, "loss": 0.0236, "step": 202600 }, { "epoch": 1.6393721174852334, "grad_norm": 0.5016211867332458, "learning_rate": 9.581381131651412e-07, "loss": 0.0205, "step": 202610 }, { "epoch": 1.6394530301804353, "grad_norm": 0.4519445598125458, "learning_rate": 9.577224946234254e-07, "loss": 0.0215, "step": 202620 }, { "epoch": 1.6395339428756373, "grad_norm": 0.405678391456604, "learning_rate": 9.573069566964e-07, "loss": 0.0101, "step": 202630 }, { "epoch": 1.639614855570839, "grad_norm": 0.1700609028339386, "learning_rate": 9.568914993923506e-07, "loss": 0.0254, "step": 202640 }, { "epoch": 1.639695768266041, "grad_norm": 0.3560033440589905, "learning_rate": 9.564761227195685e-07, "loss": 0.0259, "step": 202650 }, { "epoch": 1.6397766809612428, "grad_norm": 0.30932849645614624, "learning_rate": 9.56060826686331e-07, "loss": 0.0249, "step": 202660 }, { "epoch": 1.6398575936564446, "grad_norm": 0.1150767058134079, "learning_rate": 9.556456113009239e-07, "loss": 0.0192, "step": 202670 }, { "epoch": 1.6399385063516467, "grad_norm": 0.20347000658512115, "learning_rate": 9.552304765716286e-07, "loss": 0.0227, "step": 202680 }, { "epoch": 1.6400194190468484, "grad_norm": 0.40764227509498596, "learning_rate": 9.548154225067203e-07, "loss": 0.0258, "step": 202690 }, { "epoch": 1.6401003317420502, "grad_norm": 0.31141015887260437, "learning_rate": 9.544004491144798e-07, "loss": 0.0212, "step": 202700 }, { "epoch": 1.6401812444372523, "grad_norm": 0.2374899685382843, "learning_rate": 9.53985556403183e-07, "loss": 0.0152, "step": 202710 }, { "epoch": 1.640262157132454, "grad_norm": 0.33404141664505005, "learning_rate": 9.535707443810999e-07, "loss": 0.014, "step": 202720 }, { "epoch": 1.640343069827656, "grad_norm": 0.38445591926574707, "learning_rate": 9.531560130565076e-07, "loss": 0.0113, "step": 202730 }, { "epoch": 1.640423982522858, "grad_norm": 0.10046301782131195, "learning_rate": 9.527413624376753e-07, "loss": 0.0175, "step": 202740 }, { "epoch": 1.6405048952180596, "grad_norm": 0.4268931746482849, "learning_rate": 9.523267925328722e-07, "loss": 0.0167, "step": 202750 }, { "epoch": 1.6405858079132616, "grad_norm": 0.04103242978453636, "learning_rate": 9.519123033503663e-07, "loss": 0.031, "step": 202760 }, { "epoch": 1.6406667206084635, "grad_norm": 0.23582854866981506, "learning_rate": 9.514978948984238e-07, "loss": 0.023, "step": 202770 }, { "epoch": 1.6407476333036652, "grad_norm": 0.4421556293964386, "learning_rate": 9.510835671853086e-07, "loss": 0.0226, "step": 202780 }, { "epoch": 1.6408285459988672, "grad_norm": 0.26257264614105225, "learning_rate": 9.506693202192846e-07, "loss": 0.0199, "step": 202790 }, { "epoch": 1.6409094586940691, "grad_norm": 0.7372409701347351, "learning_rate": 9.502551540086108e-07, "loss": 0.0295, "step": 202800 }, { "epoch": 1.6409903713892708, "grad_norm": 0.35497936606407166, "learning_rate": 9.498410685615511e-07, "loss": 0.0237, "step": 202810 }, { "epoch": 1.641071284084473, "grad_norm": 0.6588711738586426, "learning_rate": 9.494270638863595e-07, "loss": 0.0125, "step": 202820 }, { "epoch": 1.6411521967796747, "grad_norm": 0.28717872500419617, "learning_rate": 9.490131399912933e-07, "loss": 0.0165, "step": 202830 }, { "epoch": 1.6412331094748767, "grad_norm": 0.46073004603385925, "learning_rate": 9.485992968846103e-07, "loss": 0.0254, "step": 202840 }, { "epoch": 1.6413140221700786, "grad_norm": 0.2634817361831665, "learning_rate": 9.481855345745599e-07, "loss": 0.0177, "step": 202850 }, { "epoch": 1.6413949348652803, "grad_norm": 0.3835236430168152, "learning_rate": 9.477718530693941e-07, "loss": 0.0229, "step": 202860 }, { "epoch": 1.6414758475604823, "grad_norm": 0.42502301931381226, "learning_rate": 9.473582523773673e-07, "loss": 0.0122, "step": 202870 }, { "epoch": 1.6415567602556842, "grad_norm": 0.2803176939487457, "learning_rate": 9.46944732506721e-07, "loss": 0.0285, "step": 202880 }, { "epoch": 1.641637672950886, "grad_norm": 0.27677831053733826, "learning_rate": 9.465312934657078e-07, "loss": 0.0207, "step": 202890 }, { "epoch": 1.6417185856460879, "grad_norm": 0.5174588561058044, "learning_rate": 9.461179352625699e-07, "loss": 0.0152, "step": 202900 }, { "epoch": 1.6417994983412898, "grad_norm": 0.4578606188297272, "learning_rate": 9.457046579055523e-07, "loss": 0.0168, "step": 202910 }, { "epoch": 1.6418804110364915, "grad_norm": 0.2508465051651001, "learning_rate": 9.452914614028963e-07, "loss": 0.0189, "step": 202920 }, { "epoch": 1.6419613237316935, "grad_norm": 0.28537777066230774, "learning_rate": 9.448783457628418e-07, "loss": 0.032, "step": 202930 }, { "epoch": 1.6420422364268954, "grad_norm": 0.2485140711069107, "learning_rate": 9.444653109936281e-07, "loss": 0.016, "step": 202940 }, { "epoch": 1.6421231491220971, "grad_norm": 0.41876840591430664, "learning_rate": 9.44052357103492e-07, "loss": 0.0346, "step": 202950 }, { "epoch": 1.6422040618172993, "grad_norm": 0.4633290767669678, "learning_rate": 9.436394841006697e-07, "loss": 0.0309, "step": 202960 }, { "epoch": 1.642284974512501, "grad_norm": 0.5784764289855957, "learning_rate": 9.432266919933941e-07, "loss": 0.0166, "step": 202970 }, { "epoch": 1.642365887207703, "grad_norm": 0.18995097279548645, "learning_rate": 9.428139807898983e-07, "loss": 0.0203, "step": 202980 }, { "epoch": 1.6424467999029049, "grad_norm": 0.2528819143772125, "learning_rate": 9.42401350498412e-07, "loss": 0.0123, "step": 202990 }, { "epoch": 1.6425277125981066, "grad_norm": 0.28019386529922485, "learning_rate": 9.419888011271649e-07, "loss": 0.0203, "step": 203000 }, { "epoch": 1.6426086252933085, "grad_norm": 0.6149090528488159, "learning_rate": 9.415763326843846e-07, "loss": 0.0181, "step": 203010 }, { "epoch": 1.6426895379885105, "grad_norm": 0.2602284252643585, "learning_rate": 9.411639451782955e-07, "loss": 0.0143, "step": 203020 }, { "epoch": 1.6427704506837122, "grad_norm": 0.48510363698005676, "learning_rate": 9.407516386171251e-07, "loss": 0.0228, "step": 203030 }, { "epoch": 1.6428513633789141, "grad_norm": 0.14309220016002655, "learning_rate": 9.403394130090926e-07, "loss": 0.0128, "step": 203040 }, { "epoch": 1.642932276074116, "grad_norm": 0.09860535711050034, "learning_rate": 9.399272683624188e-07, "loss": 0.0197, "step": 203050 }, { "epoch": 1.6430131887693178, "grad_norm": 0.30933597683906555, "learning_rate": 9.395152046853273e-07, "loss": 0.018, "step": 203060 }, { "epoch": 1.6430941014645197, "grad_norm": 0.35408300161361694, "learning_rate": 9.391032219860302e-07, "loss": 0.0186, "step": 203070 }, { "epoch": 1.6431750141597217, "grad_norm": 0.5436526536941528, "learning_rate": 9.386913202727476e-07, "loss": 0.0207, "step": 203080 }, { "epoch": 1.6432559268549234, "grad_norm": 0.3922780454158783, "learning_rate": 9.382794995536931e-07, "loss": 0.0234, "step": 203090 }, { "epoch": 1.6433368395501255, "grad_norm": 0.510895848274231, "learning_rate": 9.378677598370795e-07, "loss": 0.022, "step": 203100 }, { "epoch": 1.6434177522453273, "grad_norm": 0.40860074758529663, "learning_rate": 9.374561011311173e-07, "loss": 0.0149, "step": 203110 }, { "epoch": 1.6434986649405292, "grad_norm": 0.3000853359699249, "learning_rate": 9.370445234440173e-07, "loss": 0.0244, "step": 203120 }, { "epoch": 1.6435795776357311, "grad_norm": 0.5205962657928467, "learning_rate": 9.366330267839868e-07, "loss": 0.0182, "step": 203130 }, { "epoch": 1.6436604903309329, "grad_norm": 0.5941992402076721, "learning_rate": 9.36221611159232e-07, "loss": 0.035, "step": 203140 }, { "epoch": 1.6437414030261348, "grad_norm": 0.7432891130447388, "learning_rate": 9.358102765779586e-07, "loss": 0.0174, "step": 203150 }, { "epoch": 1.6438223157213367, "grad_norm": 0.29025503993034363, "learning_rate": 9.353990230483695e-07, "loss": 0.0238, "step": 203160 }, { "epoch": 1.6439032284165385, "grad_norm": 0.29437726736068726, "learning_rate": 9.349878505786658e-07, "loss": 0.0072, "step": 203170 }, { "epoch": 1.6439841411117404, "grad_norm": 0.2734096646308899, "learning_rate": 9.345767591770477e-07, "loss": 0.016, "step": 203180 }, { "epoch": 1.6440650538069423, "grad_norm": 0.4438989758491516, "learning_rate": 9.341657488517142e-07, "loss": 0.0107, "step": 203190 }, { "epoch": 1.644145966502144, "grad_norm": 0.7144386768341064, "learning_rate": 9.337548196108609e-07, "loss": 0.0192, "step": 203200 }, { "epoch": 1.6442268791973462, "grad_norm": 0.3195420205593109, "learning_rate": 9.333439714626824e-07, "loss": 0.009, "step": 203210 }, { "epoch": 1.644307791892548, "grad_norm": 0.5109350681304932, "learning_rate": 9.32933204415376e-07, "loss": 0.0246, "step": 203220 }, { "epoch": 1.6443887045877497, "grad_norm": 0.3887309432029724, "learning_rate": 9.325225184771298e-07, "loss": 0.0185, "step": 203230 }, { "epoch": 1.6444696172829518, "grad_norm": 0.07678434997797012, "learning_rate": 9.321119136561335e-07, "loss": 0.0093, "step": 203240 }, { "epoch": 1.6445505299781535, "grad_norm": 0.4347119927406311, "learning_rate": 9.317013899605798e-07, "loss": 0.0124, "step": 203250 }, { "epoch": 1.6446314426733555, "grad_norm": 0.4711619019508362, "learning_rate": 9.31290947398652e-07, "loss": 0.0186, "step": 203260 }, { "epoch": 1.6447123553685574, "grad_norm": 0.5436983108520508, "learning_rate": 9.308805859785358e-07, "loss": 0.0239, "step": 203270 }, { "epoch": 1.6447932680637591, "grad_norm": 0.3982733488082886, "learning_rate": 9.304703057084175e-07, "loss": 0.0179, "step": 203280 }, { "epoch": 1.644874180758961, "grad_norm": 0.35476481914520264, "learning_rate": 9.300601065964777e-07, "loss": 0.0251, "step": 203290 }, { "epoch": 1.644955093454163, "grad_norm": 0.5214197635650635, "learning_rate": 9.29649988650897e-07, "loss": 0.0204, "step": 203300 }, { "epoch": 1.6450360061493647, "grad_norm": 0.2604447305202484, "learning_rate": 9.292399518798545e-07, "loss": 0.022, "step": 203310 }, { "epoch": 1.6451169188445667, "grad_norm": 0.2880004644393921, "learning_rate": 9.28829996291527e-07, "loss": 0.0191, "step": 203320 }, { "epoch": 1.6451978315397686, "grad_norm": 0.04335728660225868, "learning_rate": 9.284201218940908e-07, "loss": 0.0153, "step": 203330 }, { "epoch": 1.6452787442349703, "grad_norm": 0.4538816809654236, "learning_rate": 9.280103286957199e-07, "loss": 0.0176, "step": 203340 }, { "epoch": 1.6453596569301725, "grad_norm": 0.4734386205673218, "learning_rate": 9.276006167045865e-07, "loss": 0.0144, "step": 203350 }, { "epoch": 1.6454405696253742, "grad_norm": 0.756844699382782, "learning_rate": 9.27190985928862e-07, "loss": 0.0201, "step": 203360 }, { "epoch": 1.645521482320576, "grad_norm": 0.23146562278270721, "learning_rate": 9.267814363767147e-07, "loss": 0.0171, "step": 203370 }, { "epoch": 1.645602395015778, "grad_norm": 0.46634504199028015, "learning_rate": 9.263719680563127e-07, "loss": 0.0156, "step": 203380 }, { "epoch": 1.6456833077109798, "grad_norm": 0.5061715841293335, "learning_rate": 9.259625809758222e-07, "loss": 0.0264, "step": 203390 }, { "epoch": 1.6457642204061818, "grad_norm": 0.2888886332511902, "learning_rate": 9.255532751434071e-07, "loss": 0.0181, "step": 203400 }, { "epoch": 1.6458451331013837, "grad_norm": 0.38728609681129456, "learning_rate": 9.251440505672304e-07, "loss": 0.0261, "step": 203410 }, { "epoch": 1.6459260457965854, "grad_norm": 0.6936681866645813, "learning_rate": 9.247349072554535e-07, "loss": 0.0227, "step": 203420 }, { "epoch": 1.6460069584917874, "grad_norm": 0.6664934158325195, "learning_rate": 9.243258452162346e-07, "loss": 0.0258, "step": 203430 }, { "epoch": 1.6460878711869893, "grad_norm": 0.06201225891709328, "learning_rate": 9.239168644577335e-07, "loss": 0.0334, "step": 203440 }, { "epoch": 1.646168783882191, "grad_norm": 0.5013487935066223, "learning_rate": 9.235079649881068e-07, "loss": 0.0294, "step": 203450 }, { "epoch": 1.646249696577393, "grad_norm": 0.007973890751600266, "learning_rate": 9.230991468155054e-07, "loss": 0.0166, "step": 203460 }, { "epoch": 1.646330609272595, "grad_norm": 0.3686421811580658, "learning_rate": 9.22690409948086e-07, "loss": 0.0112, "step": 203470 }, { "epoch": 1.6464115219677966, "grad_norm": 0.16914740204811096, "learning_rate": 9.22281754394e-07, "loss": 0.0097, "step": 203480 }, { "epoch": 1.6464924346629988, "grad_norm": 0.289898157119751, "learning_rate": 9.218731801613934e-07, "loss": 0.0145, "step": 203490 }, { "epoch": 1.6465733473582005, "grad_norm": 0.2696565091609955, "learning_rate": 9.214646872584182e-07, "loss": 0.0154, "step": 203500 }, { "epoch": 1.6466542600534024, "grad_norm": 0.015188891440629959, "learning_rate": 9.210562756932196e-07, "loss": 0.016, "step": 203510 }, { "epoch": 1.6467351727486044, "grad_norm": 0.16450181603431702, "learning_rate": 9.206479454739431e-07, "loss": 0.0132, "step": 203520 }, { "epoch": 1.646816085443806, "grad_norm": 0.39675748348236084, "learning_rate": 9.202396966087307e-07, "loss": 0.0237, "step": 203530 }, { "epoch": 1.646896998139008, "grad_norm": 0.09762389957904816, "learning_rate": 9.198315291057253e-07, "loss": 0.0159, "step": 203540 }, { "epoch": 1.64697791083421, "grad_norm": 0.29494544863700867, "learning_rate": 9.194234429730659e-07, "loss": 0.0224, "step": 203550 }, { "epoch": 1.6470588235294117, "grad_norm": 0.12486786395311356, "learning_rate": 9.190154382188921e-07, "loss": 0.0157, "step": 203560 }, { "epoch": 1.6471397362246136, "grad_norm": 0.13713404536247253, "learning_rate": 9.186075148513396e-07, "loss": 0.0198, "step": 203570 }, { "epoch": 1.6472206489198156, "grad_norm": 0.09024859219789505, "learning_rate": 9.181996728785442e-07, "loss": 0.0126, "step": 203580 }, { "epoch": 1.6473015616150173, "grad_norm": 0.725100576877594, "learning_rate": 9.177919123086393e-07, "loss": 0.0267, "step": 203590 }, { "epoch": 1.6473824743102192, "grad_norm": 0.4464181661605835, "learning_rate": 9.173842331497568e-07, "loss": 0.0248, "step": 203600 }, { "epoch": 1.6474633870054212, "grad_norm": 0.13162606954574585, "learning_rate": 9.169766354100268e-07, "loss": 0.0169, "step": 203610 }, { "epoch": 1.6475442997006229, "grad_norm": 0.19310879707336426, "learning_rate": 9.165691190975768e-07, "loss": 0.0142, "step": 203620 }, { "epoch": 1.647625212395825, "grad_norm": 0.13405515253543854, "learning_rate": 9.161616842205367e-07, "loss": 0.0176, "step": 203630 }, { "epoch": 1.6477061250910268, "grad_norm": 0.31378042697906494, "learning_rate": 9.157543307870315e-07, "loss": 0.01, "step": 203640 }, { "epoch": 1.6477870377862287, "grad_norm": 0.3170435130596161, "learning_rate": 9.153470588051815e-07, "loss": 0.0132, "step": 203650 }, { "epoch": 1.6478679504814306, "grad_norm": 0.5115718841552734, "learning_rate": 9.149398682831128e-07, "loss": 0.0176, "step": 203660 }, { "epoch": 1.6479488631766324, "grad_norm": 0.2647398114204407, "learning_rate": 9.14532759228946e-07, "loss": 0.0212, "step": 203670 }, { "epoch": 1.6480297758718343, "grad_norm": 0.5807589888572693, "learning_rate": 9.141257316507951e-07, "loss": 0.0154, "step": 203680 }, { "epoch": 1.6481106885670362, "grad_norm": 0.6811927556991577, "learning_rate": 9.13718785556783e-07, "loss": 0.0246, "step": 203690 }, { "epoch": 1.648191601262238, "grad_norm": 0.2694936990737915, "learning_rate": 9.133119209550228e-07, "loss": 0.0156, "step": 203700 }, { "epoch": 1.64827251395744, "grad_norm": 0.1972205489873886, "learning_rate": 9.129051378536286e-07, "loss": 0.0089, "step": 203710 }, { "epoch": 1.6483534266526418, "grad_norm": 0.31112122535705566, "learning_rate": 9.124984362607137e-07, "loss": 0.0127, "step": 203720 }, { "epoch": 1.6484343393478436, "grad_norm": 0.24449288845062256, "learning_rate": 9.120918161843884e-07, "loss": 0.0132, "step": 203730 }, { "epoch": 1.6485152520430457, "grad_norm": 0.3174130916595459, "learning_rate": 9.116852776327617e-07, "loss": 0.0204, "step": 203740 }, { "epoch": 1.6485961647382474, "grad_norm": 0.28343164920806885, "learning_rate": 9.112788206139411e-07, "loss": 0.0175, "step": 203750 }, { "epoch": 1.6486770774334492, "grad_norm": 0.5254454016685486, "learning_rate": 9.108724451360329e-07, "loss": 0.0181, "step": 203760 }, { "epoch": 1.6487579901286513, "grad_norm": 0.3039143681526184, "learning_rate": 9.104661512071416e-07, "loss": 0.0259, "step": 203770 }, { "epoch": 1.648838902823853, "grad_norm": 0.21837155520915985, "learning_rate": 9.100599388353692e-07, "loss": 0.0165, "step": 203780 }, { "epoch": 1.648919815519055, "grad_norm": 0.38868603110313416, "learning_rate": 9.09653808028817e-07, "loss": 0.0199, "step": 203790 }, { "epoch": 1.649000728214257, "grad_norm": 0.41531574726104736, "learning_rate": 9.092477587955844e-07, "loss": 0.0179, "step": 203800 }, { "epoch": 1.6490816409094586, "grad_norm": 0.28011009097099304, "learning_rate": 9.088417911437697e-07, "loss": 0.0138, "step": 203810 }, { "epoch": 1.6491625536046606, "grad_norm": 0.47327059507369995, "learning_rate": 9.084359050814673e-07, "loss": 0.0305, "step": 203820 }, { "epoch": 1.6492434662998625, "grad_norm": 0.5785698294639587, "learning_rate": 9.080301006167753e-07, "loss": 0.0211, "step": 203830 }, { "epoch": 1.6493243789950642, "grad_norm": 0.512207567691803, "learning_rate": 9.07624377757782e-07, "loss": 0.0187, "step": 203840 }, { "epoch": 1.6494052916902662, "grad_norm": 0.29755204916000366, "learning_rate": 9.072187365125829e-07, "loss": 0.0155, "step": 203850 }, { "epoch": 1.6494862043854681, "grad_norm": 0.28690779209136963, "learning_rate": 9.068131768892663e-07, "loss": 0.0167, "step": 203860 }, { "epoch": 1.6495671170806698, "grad_norm": 0.1939789205789566, "learning_rate": 9.064076988959181e-07, "loss": 0.013, "step": 203870 }, { "epoch": 1.649648029775872, "grad_norm": 0.3692256808280945, "learning_rate": 9.060023025406267e-07, "loss": 0.0195, "step": 203880 }, { "epoch": 1.6497289424710737, "grad_norm": 0.11848986893892288, "learning_rate": 9.055969878314785e-07, "loss": 0.0066, "step": 203890 }, { "epoch": 1.6498098551662754, "grad_norm": 0.2687893211841583, "learning_rate": 9.051917547765521e-07, "loss": 0.0195, "step": 203900 }, { "epoch": 1.6498907678614776, "grad_norm": 0.1549856960773468, "learning_rate": 9.047866033839325e-07, "loss": 0.0146, "step": 203910 }, { "epoch": 1.6499716805566793, "grad_norm": 0.7318930625915527, "learning_rate": 9.043815336616996e-07, "loss": 0.0284, "step": 203920 }, { "epoch": 1.6500525932518812, "grad_norm": 0.3190307915210724, "learning_rate": 9.0397654561793e-07, "loss": 0.0176, "step": 203930 }, { "epoch": 1.6501335059470832, "grad_norm": 0.36135926842689514, "learning_rate": 9.035716392607013e-07, "loss": 0.0186, "step": 203940 }, { "epoch": 1.650214418642285, "grad_norm": 0.3735285997390747, "learning_rate": 9.031668145980882e-07, "loss": 0.018, "step": 203950 }, { "epoch": 1.6502953313374868, "grad_norm": 0.2979678809642792, "learning_rate": 9.027620716381641e-07, "loss": 0.0128, "step": 203960 }, { "epoch": 1.6503762440326888, "grad_norm": 0.2146102786064148, "learning_rate": 9.023574103890015e-07, "loss": 0.0247, "step": 203970 }, { "epoch": 1.6504571567278905, "grad_norm": 0.17045249044895172, "learning_rate": 9.01952830858669e-07, "loss": 0.0174, "step": 203980 }, { "epoch": 1.6505380694230924, "grad_norm": 0.5256614089012146, "learning_rate": 9.015483330552366e-07, "loss": 0.0219, "step": 203990 }, { "epoch": 1.6506189821182944, "grad_norm": 0.45560047030448914, "learning_rate": 9.011439169867698e-07, "loss": 0.0286, "step": 204000 }, { "epoch": 1.650699894813496, "grad_norm": 0.40357163548469543, "learning_rate": 9.007395826613336e-07, "loss": 0.0238, "step": 204010 }, { "epoch": 1.6507808075086983, "grad_norm": 0.02278025634586811, "learning_rate": 9.003353300869955e-07, "loss": 0.0127, "step": 204020 }, { "epoch": 1.6508617202039, "grad_norm": 0.15054793655872345, "learning_rate": 8.999311592718125e-07, "loss": 0.0146, "step": 204030 }, { "epoch": 1.6509426328991017, "grad_norm": 0.2403983473777771, "learning_rate": 8.995270702238457e-07, "loss": 0.0133, "step": 204040 }, { "epoch": 1.6510235455943039, "grad_norm": 0.29607686400413513, "learning_rate": 8.991230629511577e-07, "loss": 0.0279, "step": 204050 }, { "epoch": 1.6511044582895056, "grad_norm": 0.42465969920158386, "learning_rate": 8.987191374618004e-07, "loss": 0.0144, "step": 204060 }, { "epoch": 1.6511853709847075, "grad_norm": 0.3861915171146393, "learning_rate": 8.983152937638329e-07, "loss": 0.0225, "step": 204070 }, { "epoch": 1.6512662836799095, "grad_norm": 0.6034689545631409, "learning_rate": 8.979115318653092e-07, "loss": 0.022, "step": 204080 }, { "epoch": 1.6513471963751112, "grad_norm": 0.3976269066333771, "learning_rate": 8.975078517742775e-07, "loss": 0.0198, "step": 204090 }, { "epoch": 1.6514281090703131, "grad_norm": 0.4662558436393738, "learning_rate": 8.971042534987922e-07, "loss": 0.0168, "step": 204100 }, { "epoch": 1.651509021765515, "grad_norm": 0.34805506467819214, "learning_rate": 8.967007370469011e-07, "loss": 0.017, "step": 204110 }, { "epoch": 1.6515899344607168, "grad_norm": 0.18058988451957703, "learning_rate": 8.962973024266514e-07, "loss": 0.014, "step": 204120 }, { "epoch": 1.6516708471559187, "grad_norm": 0.324556827545166, "learning_rate": 8.958939496460883e-07, "loss": 0.0131, "step": 204130 }, { "epoch": 1.6517517598511207, "grad_norm": 0.001551780034787953, "learning_rate": 8.954906787132567e-07, "loss": 0.019, "step": 204140 }, { "epoch": 1.6518326725463224, "grad_norm": 0.43882086873054504, "learning_rate": 8.950874896361982e-07, "loss": 0.0105, "step": 204150 }, { "epoch": 1.6519135852415245, "grad_norm": 0.3380606472492218, "learning_rate": 8.94684382422954e-07, "loss": 0.0134, "step": 204160 }, { "epoch": 1.6519944979367263, "grad_norm": 0.34184956550598145, "learning_rate": 8.942813570815617e-07, "loss": 0.0174, "step": 204170 }, { "epoch": 1.6520754106319282, "grad_norm": 0.5867283344268799, "learning_rate": 8.938784136200629e-07, "loss": 0.0387, "step": 204180 }, { "epoch": 1.6521563233271301, "grad_norm": 0.360589861869812, "learning_rate": 8.934755520464893e-07, "loss": 0.0158, "step": 204190 }, { "epoch": 1.6522372360223319, "grad_norm": 0.3955887258052826, "learning_rate": 8.930727723688753e-07, "loss": 0.02, "step": 204200 }, { "epoch": 1.6523181487175338, "grad_norm": 0.44436439871788025, "learning_rate": 8.926700745952571e-07, "loss": 0.0196, "step": 204210 }, { "epoch": 1.6523990614127357, "grad_norm": 0.4564019739627838, "learning_rate": 8.922674587336622e-07, "loss": 0.015, "step": 204220 }, { "epoch": 1.6524799741079375, "grad_norm": 0.2320006638765335, "learning_rate": 8.918649247921201e-07, "loss": 0.0144, "step": 204230 }, { "epoch": 1.6525608868031394, "grad_norm": 0.2585696876049042, "learning_rate": 8.914624727786614e-07, "loss": 0.0173, "step": 204240 }, { "epoch": 1.6526417994983413, "grad_norm": 0.4052944481372833, "learning_rate": 8.910601027013083e-07, "loss": 0.0245, "step": 204250 }, { "epoch": 1.652722712193543, "grad_norm": 0.3912021517753601, "learning_rate": 8.906578145680877e-07, "loss": 0.0143, "step": 204260 }, { "epoch": 1.652803624888745, "grad_norm": 0.47164297103881836, "learning_rate": 8.902556083870234e-07, "loss": 0.0171, "step": 204270 }, { "epoch": 1.652884537583947, "grad_norm": 0.27636203169822693, "learning_rate": 8.898534841661321e-07, "loss": 0.0236, "step": 204280 }, { "epoch": 1.6529654502791487, "grad_norm": 0.14543747901916504, "learning_rate": 8.894514419134375e-07, "loss": 0.014, "step": 204290 }, { "epoch": 1.6530463629743508, "grad_norm": 0.3223821818828583, "learning_rate": 8.890494816369577e-07, "loss": 0.0227, "step": 204300 }, { "epoch": 1.6531272756695525, "grad_norm": 0.5742441415786743, "learning_rate": 8.88647603344705e-07, "loss": 0.0243, "step": 204310 }, { "epoch": 1.6532081883647545, "grad_norm": 0.0931808203458786, "learning_rate": 8.882458070446969e-07, "loss": 0.0101, "step": 204320 }, { "epoch": 1.6532891010599564, "grad_norm": 0.27432364225387573, "learning_rate": 8.878440927449467e-07, "loss": 0.0187, "step": 204330 }, { "epoch": 1.6533700137551581, "grad_norm": 0.45558324456214905, "learning_rate": 8.874424604534643e-07, "loss": 0.0165, "step": 204340 }, { "epoch": 1.65345092645036, "grad_norm": 0.26233458518981934, "learning_rate": 8.870409101782606e-07, "loss": 0.012, "step": 204350 }, { "epoch": 1.653531839145562, "grad_norm": 0.20681387186050415, "learning_rate": 8.866394419273427e-07, "loss": 0.0173, "step": 204360 }, { "epoch": 1.6536127518407637, "grad_norm": 0.2567713260650635, "learning_rate": 8.862380557087175e-07, "loss": 0.0148, "step": 204370 }, { "epoch": 1.6536936645359657, "grad_norm": 0.5702130794525146, "learning_rate": 8.858367515303901e-07, "loss": 0.0233, "step": 204380 }, { "epoch": 1.6537745772311676, "grad_norm": 0.6110172271728516, "learning_rate": 8.854355294003614e-07, "loss": 0.02, "step": 204390 }, { "epoch": 1.6538554899263693, "grad_norm": 0.2676358222961426, "learning_rate": 8.850343893266377e-07, "loss": 0.0158, "step": 204400 }, { "epoch": 1.6539364026215715, "grad_norm": 0.15786929428577423, "learning_rate": 8.846333313172151e-07, "loss": 0.0144, "step": 204410 }, { "epoch": 1.6540173153167732, "grad_norm": 0.259368360042572, "learning_rate": 8.842323553800908e-07, "loss": 0.0196, "step": 204420 }, { "epoch": 1.654098228011975, "grad_norm": 0.08680781722068787, "learning_rate": 8.838314615232662e-07, "loss": 0.0212, "step": 204430 }, { "epoch": 1.654179140707177, "grad_norm": 0.39057105779647827, "learning_rate": 8.834306497547324e-07, "loss": 0.0196, "step": 204440 }, { "epoch": 1.6542600534023788, "grad_norm": 0.26645246148109436, "learning_rate": 8.830299200824827e-07, "loss": 0.0211, "step": 204450 }, { "epoch": 1.6543409660975807, "grad_norm": 0.42284464836120605, "learning_rate": 8.826292725145119e-07, "loss": 0.0166, "step": 204460 }, { "epoch": 1.6544218787927827, "grad_norm": 0.4095294773578644, "learning_rate": 8.82228707058806e-07, "loss": 0.0254, "step": 204470 }, { "epoch": 1.6545027914879844, "grad_norm": 0.32865211367607117, "learning_rate": 8.818282237233567e-07, "loss": 0.0131, "step": 204480 }, { "epoch": 1.6545837041831863, "grad_norm": 0.3918997645378113, "learning_rate": 8.81427822516151e-07, "loss": 0.0189, "step": 204490 }, { "epoch": 1.6546646168783883, "grad_norm": 0.24107606709003448, "learning_rate": 8.810275034451704e-07, "loss": 0.0162, "step": 204500 }, { "epoch": 1.65474552957359, "grad_norm": 0.4195979833602905, "learning_rate": 8.806272665184012e-07, "loss": 0.0151, "step": 204510 }, { "epoch": 1.654826442268792, "grad_norm": 0.8989406824111938, "learning_rate": 8.802271117438266e-07, "loss": 0.021, "step": 204520 }, { "epoch": 1.6549073549639939, "grad_norm": 0.28293025493621826, "learning_rate": 8.798270391294228e-07, "loss": 0.0204, "step": 204530 }, { "epoch": 1.6549882676591956, "grad_norm": 0.3083611726760864, "learning_rate": 8.794270486831713e-07, "loss": 0.0255, "step": 204540 }, { "epoch": 1.6550691803543978, "grad_norm": 0.14591889083385468, "learning_rate": 8.790271404130485e-07, "loss": 0.0114, "step": 204550 }, { "epoch": 1.6551500930495995, "grad_norm": 0.40836426615715027, "learning_rate": 8.786273143270296e-07, "loss": 0.0249, "step": 204560 }, { "epoch": 1.6552310057448012, "grad_norm": 0.21454256772994995, "learning_rate": 8.78227570433089e-07, "loss": 0.0254, "step": 204570 }, { "epoch": 1.6553119184400034, "grad_norm": 0.09387756139039993, "learning_rate": 8.778279087391972e-07, "loss": 0.015, "step": 204580 }, { "epoch": 1.655392831135205, "grad_norm": 0.33192136883735657, "learning_rate": 8.774283292533259e-07, "loss": 0.0168, "step": 204590 }, { "epoch": 1.655473743830407, "grad_norm": 0.29307448863983154, "learning_rate": 8.770288319834431e-07, "loss": 0.0167, "step": 204600 }, { "epoch": 1.655554656525609, "grad_norm": 0.30465736985206604, "learning_rate": 8.76629416937515e-07, "loss": 0.0154, "step": 204610 }, { "epoch": 1.6556355692208107, "grad_norm": 0.37773051857948303, "learning_rate": 8.762300841235116e-07, "loss": 0.0234, "step": 204620 }, { "epoch": 1.6557164819160126, "grad_norm": 0.48887449502944946, "learning_rate": 8.758308335493915e-07, "loss": 0.0211, "step": 204630 }, { "epoch": 1.6557973946112146, "grad_norm": 0.3452477753162384, "learning_rate": 8.754316652231182e-07, "loss": 0.0138, "step": 204640 }, { "epoch": 1.6558783073064163, "grad_norm": 0.26368004083633423, "learning_rate": 8.750325791526548e-07, "loss": 0.0251, "step": 204650 }, { "epoch": 1.6559592200016182, "grad_norm": 0.050123464316129684, "learning_rate": 8.746335753459562e-07, "loss": 0.0186, "step": 204660 }, { "epoch": 1.6560401326968202, "grad_norm": 0.3250272274017334, "learning_rate": 8.742346538109836e-07, "loss": 0.0196, "step": 204670 }, { "epoch": 1.6561210453920219, "grad_norm": 0.4039469361305237, "learning_rate": 8.738358145556918e-07, "loss": 0.021, "step": 204680 }, { "epoch": 1.656201958087224, "grad_norm": 0.26370733976364136, "learning_rate": 8.734370575880313e-07, "loss": 0.0142, "step": 204690 }, { "epoch": 1.6562828707824258, "grad_norm": 0.25242093205451965, "learning_rate": 8.730383829159589e-07, "loss": 0.0088, "step": 204700 }, { "epoch": 1.6563637834776277, "grad_norm": 0.5590860247612, "learning_rate": 8.726397905474232e-07, "loss": 0.0178, "step": 204710 }, { "epoch": 1.6564446961728296, "grad_norm": 0.4907344579696655, "learning_rate": 8.722412804903734e-07, "loss": 0.0153, "step": 204720 }, { "epoch": 1.6565256088680314, "grad_norm": 0.4349566698074341, "learning_rate": 8.718428527527578e-07, "loss": 0.018, "step": 204730 }, { "epoch": 1.6566065215632333, "grad_norm": 0.03615092486143112, "learning_rate": 8.71444507342521e-07, "loss": 0.0206, "step": 204740 }, { "epoch": 1.6566874342584352, "grad_norm": 0.3198952376842499, "learning_rate": 8.710462442676082e-07, "loss": 0.0163, "step": 204750 }, { "epoch": 1.656768346953637, "grad_norm": 0.35599425435066223, "learning_rate": 8.70648063535961e-07, "loss": 0.0138, "step": 204760 }, { "epoch": 1.656849259648839, "grad_norm": 0.29572588205337524, "learning_rate": 8.702499651555213e-07, "loss": 0.0215, "step": 204770 }, { "epoch": 1.6569301723440408, "grad_norm": 0.29897910356521606, "learning_rate": 8.698519491342272e-07, "loss": 0.0256, "step": 204780 }, { "epoch": 1.6570110850392425, "grad_norm": 0.3368745744228363, "learning_rate": 8.694540154800168e-07, "loss": 0.0169, "step": 204790 }, { "epoch": 1.6570919977344445, "grad_norm": 0.36271077394485474, "learning_rate": 8.690561642008255e-07, "loss": 0.0143, "step": 204800 }, { "epoch": 1.6571729104296464, "grad_norm": 0.21169468760490417, "learning_rate": 8.686583953045902e-07, "loss": 0.0183, "step": 204810 }, { "epoch": 1.6572538231248481, "grad_norm": 0.5525238513946533, "learning_rate": 8.682607087992406e-07, "loss": 0.0207, "step": 204820 }, { "epoch": 1.6573347358200503, "grad_norm": 0.012193027883768082, "learning_rate": 8.678631046927072e-07, "loss": 0.013, "step": 204830 }, { "epoch": 1.657415648515252, "grad_norm": 0.2320440262556076, "learning_rate": 8.67465582992923e-07, "loss": 0.0201, "step": 204840 }, { "epoch": 1.657496561210454, "grad_norm": 0.4706341326236725, "learning_rate": 8.670681437078127e-07, "loss": 0.0211, "step": 204850 }, { "epoch": 1.657577473905656, "grad_norm": 0.32302817702293396, "learning_rate": 8.666707868453023e-07, "loss": 0.0126, "step": 204860 }, { "epoch": 1.6576583866008576, "grad_norm": 0.2763570547103882, "learning_rate": 8.662735124133193e-07, "loss": 0.0187, "step": 204870 }, { "epoch": 1.6577392992960596, "grad_norm": 0.22432048618793488, "learning_rate": 8.658763204197823e-07, "loss": 0.0229, "step": 204880 }, { "epoch": 1.6578202119912615, "grad_norm": 0.24427393078804016, "learning_rate": 8.654792108726151e-07, "loss": 0.0205, "step": 204890 }, { "epoch": 1.6579011246864632, "grad_norm": 0.03509662672877312, "learning_rate": 8.650821837797374e-07, "loss": 0.0073, "step": 204900 }, { "epoch": 1.6579820373816652, "grad_norm": 0.49685609340667725, "learning_rate": 8.646852391490662e-07, "loss": 0.0169, "step": 204910 }, { "epoch": 1.658062950076867, "grad_norm": 0.09443437308073044, "learning_rate": 8.642883769885174e-07, "loss": 0.0076, "step": 204920 }, { "epoch": 1.6581438627720688, "grad_norm": 0.44375526905059814, "learning_rate": 8.638915973060063e-07, "loss": 0.0174, "step": 204930 }, { "epoch": 1.6582247754672708, "grad_norm": 0.25495049357414246, "learning_rate": 8.634949001094462e-07, "loss": 0.0185, "step": 204940 }, { "epoch": 1.6583056881624727, "grad_norm": 0.18704640865325928, "learning_rate": 8.630982854067471e-07, "loss": 0.0269, "step": 204950 }, { "epoch": 1.6583866008576744, "grad_norm": 0.0810922309756279, "learning_rate": 8.627017532058196e-07, "loss": 0.023, "step": 204960 }, { "epoch": 1.6584675135528766, "grad_norm": 0.43766868114471436, "learning_rate": 8.623053035145712e-07, "loss": 0.0141, "step": 204970 }, { "epoch": 1.6585484262480783, "grad_norm": 0.5895628333091736, "learning_rate": 8.619089363409083e-07, "loss": 0.0202, "step": 204980 }, { "epoch": 1.6586293389432802, "grad_norm": 0.19269369542598724, "learning_rate": 8.615126516927363e-07, "loss": 0.0169, "step": 204990 }, { "epoch": 1.6587102516384822, "grad_norm": 1.0332139730453491, "learning_rate": 8.611164495779572e-07, "loss": 0.0182, "step": 205000 }, { "epoch": 1.658791164333684, "grad_norm": 0.23991574347019196, "learning_rate": 8.607203300044731e-07, "loss": 0.0232, "step": 205010 }, { "epoch": 1.6588720770288858, "grad_norm": 0.17822790145874023, "learning_rate": 8.603242929801825e-07, "loss": 0.0171, "step": 205020 }, { "epoch": 1.6589529897240878, "grad_norm": 0.2701944410800934, "learning_rate": 8.59928338512987e-07, "loss": 0.019, "step": 205030 }, { "epoch": 1.6590339024192895, "grad_norm": 0.35775113105773926, "learning_rate": 8.595324666107796e-07, "loss": 0.0233, "step": 205040 }, { "epoch": 1.6591148151144914, "grad_norm": 0.21807223558425903, "learning_rate": 8.591366772814547e-07, "loss": 0.0302, "step": 205050 }, { "epoch": 1.6591957278096934, "grad_norm": 0.3994629979133606, "learning_rate": 8.587409705329097e-07, "loss": 0.0166, "step": 205060 }, { "epoch": 1.659276640504895, "grad_norm": 0.18839438259601593, "learning_rate": 8.583453463730323e-07, "loss": 0.0192, "step": 205070 }, { "epoch": 1.6593575532000973, "grad_norm": 0.132343590259552, "learning_rate": 8.579498048097124e-07, "loss": 0.0302, "step": 205080 }, { "epoch": 1.659438465895299, "grad_norm": 0.32152608036994934, "learning_rate": 8.575543458508406e-07, "loss": 0.0245, "step": 205090 }, { "epoch": 1.6595193785905007, "grad_norm": 0.3643718659877777, "learning_rate": 8.57158969504302e-07, "loss": 0.023, "step": 205100 }, { "epoch": 1.6596002912857029, "grad_norm": 0.535552442073822, "learning_rate": 8.567636757779818e-07, "loss": 0.028, "step": 205110 }, { "epoch": 1.6596812039809046, "grad_norm": 0.2165180891752243, "learning_rate": 8.563684646797638e-07, "loss": 0.018, "step": 205120 }, { "epoch": 1.6597621166761065, "grad_norm": 0.42330828309059143, "learning_rate": 8.559733362175287e-07, "loss": 0.0186, "step": 205130 }, { "epoch": 1.6598430293713085, "grad_norm": 0.36797162890434265, "learning_rate": 8.555782903991572e-07, "loss": 0.0258, "step": 205140 }, { "epoch": 1.6599239420665102, "grad_norm": 0.7562450170516968, "learning_rate": 8.551833272325278e-07, "loss": 0.0392, "step": 205150 }, { "epoch": 1.6600048547617121, "grad_norm": 0.39684781432151794, "learning_rate": 8.547884467255163e-07, "loss": 0.0131, "step": 205160 }, { "epoch": 1.660085767456914, "grad_norm": 0.513196587562561, "learning_rate": 8.543936488859983e-07, "loss": 0.0239, "step": 205170 }, { "epoch": 1.6601666801521158, "grad_norm": 0.4547397494316101, "learning_rate": 8.539989337218473e-07, "loss": 0.021, "step": 205180 }, { "epoch": 1.6602475928473177, "grad_norm": 0.4638803005218506, "learning_rate": 8.53604301240935e-07, "loss": 0.0104, "step": 205190 }, { "epoch": 1.6603285055425197, "grad_norm": 0.07771436870098114, "learning_rate": 8.532097514511317e-07, "loss": 0.0098, "step": 205200 }, { "epoch": 1.6604094182377214, "grad_norm": 0.2902085781097412, "learning_rate": 8.528152843603044e-07, "loss": 0.0104, "step": 205210 }, { "epoch": 1.6604903309329235, "grad_norm": 0.6049702167510986, "learning_rate": 8.524208999763234e-07, "loss": 0.0189, "step": 205220 }, { "epoch": 1.6605712436281252, "grad_norm": 0.31072285771369934, "learning_rate": 8.520265983070503e-07, "loss": 0.0155, "step": 205230 }, { "epoch": 1.660652156323327, "grad_norm": 0.8658475279808044, "learning_rate": 8.516323793603487e-07, "loss": 0.0229, "step": 205240 }, { "epoch": 1.6607330690185291, "grad_norm": 0.28931906819343567, "learning_rate": 8.51238243144083e-07, "loss": 0.0109, "step": 205250 }, { "epoch": 1.6608139817137308, "grad_norm": 0.33606958389282227, "learning_rate": 8.508441896661134e-07, "loss": 0.0152, "step": 205260 }, { "epoch": 1.6608948944089328, "grad_norm": 0.23190833628177643, "learning_rate": 8.50450218934295e-07, "loss": 0.0187, "step": 205270 }, { "epoch": 1.6609758071041347, "grad_norm": 0.07754157483577728, "learning_rate": 8.50056330956488e-07, "loss": 0.0103, "step": 205280 }, { "epoch": 1.6610567197993364, "grad_norm": 0.386008620262146, "learning_rate": 8.496625257405461e-07, "loss": 0.0161, "step": 205290 }, { "epoch": 1.6611376324945384, "grad_norm": 0.5347093343734741, "learning_rate": 8.492688032943236e-07, "loss": 0.0133, "step": 205300 }, { "epoch": 1.6612185451897403, "grad_norm": 0.3436005413532257, "learning_rate": 8.488751636256726e-07, "loss": 0.022, "step": 205310 }, { "epoch": 1.661299457884942, "grad_norm": 0.2806837558746338, "learning_rate": 8.484816067424429e-07, "loss": 0.0131, "step": 205320 }, { "epoch": 1.661380370580144, "grad_norm": 0.591196596622467, "learning_rate": 8.480881326524832e-07, "loss": 0.0204, "step": 205330 }, { "epoch": 1.661461283275346, "grad_norm": 0.2295948565006256, "learning_rate": 8.476947413636405e-07, "loss": 0.0068, "step": 205340 }, { "epoch": 1.6615421959705476, "grad_norm": 0.44228506088256836, "learning_rate": 8.473014328837603e-07, "loss": 0.0159, "step": 205350 }, { "epoch": 1.6616231086657498, "grad_norm": 0.38621389865875244, "learning_rate": 8.469082072206863e-07, "loss": 0.0211, "step": 205360 }, { "epoch": 1.6617040213609515, "grad_norm": 1.0214831829071045, "learning_rate": 8.465150643822606e-07, "loss": 0.0197, "step": 205370 }, { "epoch": 1.6617849340561535, "grad_norm": 0.5382938981056213, "learning_rate": 8.461220043763235e-07, "loss": 0.0283, "step": 205380 }, { "epoch": 1.6618658467513554, "grad_norm": 0.30461859703063965, "learning_rate": 8.457290272107138e-07, "loss": 0.0151, "step": 205390 }, { "epoch": 1.6619467594465571, "grad_norm": 0.15566754341125488, "learning_rate": 8.453361328932685e-07, "loss": 0.0144, "step": 205400 }, { "epoch": 1.662027672141759, "grad_norm": 0.09838457405567169, "learning_rate": 8.449433214318237e-07, "loss": 0.0119, "step": 205410 }, { "epoch": 1.662108584836961, "grad_norm": 0.7484480142593384, "learning_rate": 8.445505928342124e-07, "loss": 0.0236, "step": 205420 }, { "epoch": 1.6621894975321627, "grad_norm": 0.3883189260959625, "learning_rate": 8.441579471082656e-07, "loss": 0.013, "step": 205430 }, { "epoch": 1.6622704102273647, "grad_norm": 0.2806808352470398, "learning_rate": 8.43765384261816e-07, "loss": 0.0106, "step": 205440 }, { "epoch": 1.6623513229225666, "grad_norm": 0.26711153984069824, "learning_rate": 8.433729043026934e-07, "loss": 0.0211, "step": 205450 }, { "epoch": 1.6624322356177683, "grad_norm": 0.520043671131134, "learning_rate": 8.429805072387204e-07, "loss": 0.0209, "step": 205460 }, { "epoch": 1.6625131483129703, "grad_norm": 0.4636824131011963, "learning_rate": 8.425881930777269e-07, "loss": 0.0205, "step": 205470 }, { "epoch": 1.6625940610081722, "grad_norm": 0.42384395003318787, "learning_rate": 8.421959618275361e-07, "loss": 0.0232, "step": 205480 }, { "epoch": 1.662674973703374, "grad_norm": 0.36606624722480774, "learning_rate": 8.418038134959672e-07, "loss": 0.0162, "step": 205490 }, { "epoch": 1.662755886398576, "grad_norm": 0.27542513608932495, "learning_rate": 8.41411748090844e-07, "loss": 0.0124, "step": 205500 }, { "epoch": 1.6628367990937778, "grad_norm": 0.4467889666557312, "learning_rate": 8.410197656199842e-07, "loss": 0.0205, "step": 205510 }, { "epoch": 1.6629177117889797, "grad_norm": 0.18659336864948273, "learning_rate": 8.406278660912053e-07, "loss": 0.0178, "step": 205520 }, { "epoch": 1.6629986244841817, "grad_norm": 0.301848441362381, "learning_rate": 8.402360495123229e-07, "loss": 0.0101, "step": 205530 }, { "epoch": 1.6630795371793834, "grad_norm": 0.5769155621528625, "learning_rate": 8.398443158911507e-07, "loss": 0.0158, "step": 205540 }, { "epoch": 1.6631604498745853, "grad_norm": 0.1956610083580017, "learning_rate": 8.394526652355012e-07, "loss": 0.0156, "step": 205550 }, { "epoch": 1.6632413625697873, "grad_norm": 0.41953542828559875, "learning_rate": 8.390610975531849e-07, "loss": 0.0306, "step": 205560 }, { "epoch": 1.663322275264989, "grad_norm": 0.31552740931510925, "learning_rate": 8.386696128520105e-07, "loss": 0.021, "step": 205570 }, { "epoch": 1.663403187960191, "grad_norm": 0.5366262197494507, "learning_rate": 8.382782111397863e-07, "loss": 0.0139, "step": 205580 }, { "epoch": 1.6634841006553929, "grad_norm": 0.33478280901908875, "learning_rate": 8.378868924243172e-07, "loss": 0.0153, "step": 205590 }, { "epoch": 1.6635650133505946, "grad_norm": 0.3869457244873047, "learning_rate": 8.374956567134074e-07, "loss": 0.008, "step": 205600 }, { "epoch": 1.6636459260457965, "grad_norm": 0.15578597784042358, "learning_rate": 8.37104504014859e-07, "loss": 0.0229, "step": 205610 }, { "epoch": 1.6637268387409985, "grad_norm": 0.11251913756132126, "learning_rate": 8.367134343364736e-07, "loss": 0.0143, "step": 205620 }, { "epoch": 1.6638077514362002, "grad_norm": 0.570837676525116, "learning_rate": 8.363224476860482e-07, "loss": 0.0181, "step": 205630 }, { "epoch": 1.6638886641314024, "grad_norm": 0.32918110489845276, "learning_rate": 8.359315440713839e-07, "loss": 0.0176, "step": 205640 }, { "epoch": 1.663969576826604, "grad_norm": 0.29730427265167236, "learning_rate": 8.355407235002722e-07, "loss": 0.0256, "step": 205650 }, { "epoch": 1.664050489521806, "grad_norm": 0.2874546945095062, "learning_rate": 8.351499859805101e-07, "loss": 0.0223, "step": 205660 }, { "epoch": 1.664131402217008, "grad_norm": 0.5116411447525024, "learning_rate": 8.347593315198904e-07, "loss": 0.0215, "step": 205670 }, { "epoch": 1.6642123149122097, "grad_norm": 0.46772822737693787, "learning_rate": 8.343687601262002e-07, "loss": 0.018, "step": 205680 }, { "epoch": 1.6642932276074116, "grad_norm": 0.6512619853019714, "learning_rate": 8.33978271807232e-07, "loss": 0.0278, "step": 205690 }, { "epoch": 1.6643741403026135, "grad_norm": 0.13248184323310852, "learning_rate": 8.335878665707725e-07, "loss": 0.0099, "step": 205700 }, { "epoch": 1.6644550529978153, "grad_norm": 0.34165751934051514, "learning_rate": 8.331975444246077e-07, "loss": 0.0168, "step": 205710 }, { "epoch": 1.6645359656930172, "grad_norm": 0.33772721886634827, "learning_rate": 8.328073053765212e-07, "loss": 0.016, "step": 205720 }, { "epoch": 1.6646168783882191, "grad_norm": 0.18362633883953094, "learning_rate": 8.324171494342953e-07, "loss": 0.0151, "step": 205730 }, { "epoch": 1.6646977910834209, "grad_norm": 0.41623827815055847, "learning_rate": 8.320270766057115e-07, "loss": 0.0136, "step": 205740 }, { "epoch": 1.664778703778623, "grad_norm": 0.5246970653533936, "learning_rate": 8.316370868985485e-07, "loss": 0.0254, "step": 205750 }, { "epoch": 1.6648596164738247, "grad_norm": 0.35393086075782776, "learning_rate": 8.31247180320584e-07, "loss": 0.018, "step": 205760 }, { "epoch": 1.6649405291690265, "grad_norm": 0.3457569181919098, "learning_rate": 8.308573568795941e-07, "loss": 0.0424, "step": 205770 }, { "epoch": 1.6650214418642286, "grad_norm": 0.13013668358325958, "learning_rate": 8.304676165833525e-07, "loss": 0.0168, "step": 205780 }, { "epoch": 1.6651023545594303, "grad_norm": 0.12522828578948975, "learning_rate": 8.300779594396324e-07, "loss": 0.0257, "step": 205790 }, { "epoch": 1.6651832672546323, "grad_norm": 0.28139084577560425, "learning_rate": 8.296883854562038e-07, "loss": 0.022, "step": 205800 }, { "epoch": 1.6652641799498342, "grad_norm": 0.7620553970336914, "learning_rate": 8.29298894640837e-07, "loss": 0.0226, "step": 205810 }, { "epoch": 1.665345092645036, "grad_norm": 0.27505412697792053, "learning_rate": 8.28909487001297e-07, "loss": 0.0115, "step": 205820 }, { "epoch": 1.6654260053402379, "grad_norm": 0.18490178883075714, "learning_rate": 8.285201625453548e-07, "loss": 0.0194, "step": 205830 }, { "epoch": 1.6655069180354398, "grad_norm": 0.636744499206543, "learning_rate": 8.281309212807692e-07, "loss": 0.0154, "step": 205840 }, { "epoch": 1.6655878307306415, "grad_norm": 0.3464474380016327, "learning_rate": 8.277417632153056e-07, "loss": 0.014, "step": 205850 }, { "epoch": 1.6656687434258435, "grad_norm": 0.3032878637313843, "learning_rate": 8.273526883567268e-07, "loss": 0.0144, "step": 205860 }, { "epoch": 1.6657496561210454, "grad_norm": 0.2948945462703705, "learning_rate": 8.269636967127864e-07, "loss": 0.018, "step": 205870 }, { "epoch": 1.6658305688162471, "grad_norm": 0.12640461325645447, "learning_rate": 8.265747882912473e-07, "loss": 0.0214, "step": 205880 }, { "epoch": 1.6659114815114493, "grad_norm": 0.2707204222679138, "learning_rate": 8.261859630998642e-07, "loss": 0.0154, "step": 205890 }, { "epoch": 1.665992394206651, "grad_norm": 0.16947512328624725, "learning_rate": 8.257972211463889e-07, "loss": 0.0163, "step": 205900 }, { "epoch": 1.6660733069018527, "grad_norm": 0.27972421050071716, "learning_rate": 8.254085624385766e-07, "loss": 0.0255, "step": 205910 }, { "epoch": 1.666154219597055, "grad_norm": 0.5486173629760742, "learning_rate": 8.25019986984178e-07, "loss": 0.0251, "step": 205920 }, { "epoch": 1.6662351322922566, "grad_norm": 0.020292865112423897, "learning_rate": 8.246314947909417e-07, "loss": 0.0133, "step": 205930 }, { "epoch": 1.6663160449874586, "grad_norm": 0.4233115017414093, "learning_rate": 8.242430858666156e-07, "loss": 0.0191, "step": 205940 }, { "epoch": 1.6663969576826605, "grad_norm": 0.003111580153927207, "learning_rate": 8.238547602189461e-07, "loss": 0.0279, "step": 205950 }, { "epoch": 1.6664778703778622, "grad_norm": 0.41403308510780334, "learning_rate": 8.234665178556766e-07, "loss": 0.0124, "step": 205960 }, { "epoch": 1.6665587830730642, "grad_norm": 0.4219348430633545, "learning_rate": 8.230783587845509e-07, "loss": 0.0213, "step": 205970 }, { "epoch": 1.666639695768266, "grad_norm": 0.05396309494972229, "learning_rate": 8.226902830133082e-07, "loss": 0.0263, "step": 205980 }, { "epoch": 1.6667206084634678, "grad_norm": 0.2930498719215393, "learning_rate": 8.223022905496913e-07, "loss": 0.0148, "step": 205990 }, { "epoch": 1.6668015211586698, "grad_norm": 0.6132137775421143, "learning_rate": 8.219143814014346e-07, "loss": 0.0144, "step": 206000 }, { "epoch": 1.6668824338538717, "grad_norm": 0.5376596450805664, "learning_rate": 8.215265555762735e-07, "loss": 0.0173, "step": 206010 }, { "epoch": 1.6669633465490734, "grad_norm": 0.0432380810379982, "learning_rate": 8.211388130819475e-07, "loss": 0.0147, "step": 206020 }, { "epoch": 1.6670442592442756, "grad_norm": 0.2191711813211441, "learning_rate": 8.207511539261842e-07, "loss": 0.0145, "step": 206030 }, { "epoch": 1.6671251719394773, "grad_norm": 0.29117417335510254, "learning_rate": 8.203635781167151e-07, "loss": 0.0156, "step": 206040 }, { "epoch": 1.6672060846346792, "grad_norm": 0.2927129566669464, "learning_rate": 8.199760856612732e-07, "loss": 0.0275, "step": 206050 }, { "epoch": 1.6672869973298812, "grad_norm": 0.8266236186027527, "learning_rate": 8.195886765675815e-07, "loss": 0.0199, "step": 206060 }, { "epoch": 1.6673679100250829, "grad_norm": 0.5440625548362732, "learning_rate": 8.192013508433694e-07, "loss": 0.0219, "step": 206070 }, { "epoch": 1.6674488227202848, "grad_norm": 0.5565234422683716, "learning_rate": 8.188141084963619e-07, "loss": 0.0219, "step": 206080 }, { "epoch": 1.6675297354154868, "grad_norm": 0.20333199203014374, "learning_rate": 8.184269495342778e-07, "loss": 0.0113, "step": 206090 }, { "epoch": 1.6676106481106885, "grad_norm": 0.2838537395000458, "learning_rate": 8.180398739648415e-07, "loss": 0.0224, "step": 206100 }, { "epoch": 1.6676915608058904, "grad_norm": 0.241718128323555, "learning_rate": 8.176528817957724e-07, "loss": 0.0158, "step": 206110 }, { "epoch": 1.6677724735010924, "grad_norm": 0.3093504011631012, "learning_rate": 8.172659730347848e-07, "loss": 0.0204, "step": 206120 }, { "epoch": 1.667853386196294, "grad_norm": 0.21085003018379211, "learning_rate": 8.168791476895988e-07, "loss": 0.0256, "step": 206130 }, { "epoch": 1.667934298891496, "grad_norm": 0.27777278423309326, "learning_rate": 8.164924057679269e-07, "loss": 0.0175, "step": 206140 }, { "epoch": 1.668015211586698, "grad_norm": 0.43845605850219727, "learning_rate": 8.161057472774825e-07, "loss": 0.0264, "step": 206150 }, { "epoch": 1.6680961242818997, "grad_norm": 0.023558951914310455, "learning_rate": 8.157191722259766e-07, "loss": 0.0141, "step": 206160 }, { "epoch": 1.6681770369771018, "grad_norm": 0.2507610321044922, "learning_rate": 8.153326806211176e-07, "loss": 0.0156, "step": 206170 }, { "epoch": 1.6682579496723036, "grad_norm": 0.24766553938388824, "learning_rate": 8.149462724706147e-07, "loss": 0.015, "step": 206180 }, { "epoch": 1.6683388623675055, "grad_norm": 0.5371096730232239, "learning_rate": 8.145599477821731e-07, "loss": 0.021, "step": 206190 }, { "epoch": 1.6684197750627074, "grad_norm": 0.18408459424972534, "learning_rate": 8.141737065634964e-07, "loss": 0.0177, "step": 206200 }, { "epoch": 1.6685006877579092, "grad_norm": 0.17035344243049622, "learning_rate": 8.137875488222907e-07, "loss": 0.0131, "step": 206210 }, { "epoch": 1.668581600453111, "grad_norm": 0.35422995686531067, "learning_rate": 8.134014745662539e-07, "loss": 0.0152, "step": 206220 }, { "epoch": 1.668662513148313, "grad_norm": 0.03478015214204788, "learning_rate": 8.130154838030851e-07, "loss": 0.0152, "step": 206230 }, { "epoch": 1.6687434258435148, "grad_norm": 0.425142765045166, "learning_rate": 8.126295765404857e-07, "loss": 0.0188, "step": 206240 }, { "epoch": 1.6688243385387167, "grad_norm": 0.4455672800540924, "learning_rate": 8.122437527861471e-07, "loss": 0.0163, "step": 206250 }, { "epoch": 1.6689052512339186, "grad_norm": 0.2823316156864166, "learning_rate": 8.118580125477671e-07, "loss": 0.0183, "step": 206260 }, { "epoch": 1.6689861639291204, "grad_norm": 0.5014564394950867, "learning_rate": 8.11472355833039e-07, "loss": 0.019, "step": 206270 }, { "epoch": 1.6690670766243225, "grad_norm": 0.4149790108203888, "learning_rate": 8.110867826496499e-07, "loss": 0.0249, "step": 206280 }, { "epoch": 1.6691479893195242, "grad_norm": 0.4108055531978607, "learning_rate": 8.107012930052926e-07, "loss": 0.0299, "step": 206290 }, { "epoch": 1.669228902014726, "grad_norm": 0.23804214596748352, "learning_rate": 8.103158869076555e-07, "loss": 0.0101, "step": 206300 }, { "epoch": 1.6693098147099281, "grad_norm": 0.5216634273529053, "learning_rate": 8.099305643644212e-07, "loss": 0.017, "step": 206310 }, { "epoch": 1.6693907274051298, "grad_norm": 0.45573607087135315, "learning_rate": 8.095453253832769e-07, "loss": 0.0176, "step": 206320 }, { "epoch": 1.6694716401003318, "grad_norm": 0.2436862289905548, "learning_rate": 8.091601699719048e-07, "loss": 0.0147, "step": 206330 }, { "epoch": 1.6695525527955337, "grad_norm": 0.4420028030872345, "learning_rate": 8.087750981379855e-07, "loss": 0.0158, "step": 206340 }, { "epoch": 1.6696334654907354, "grad_norm": 0.07631022483110428, "learning_rate": 8.083901098891994e-07, "loss": 0.0225, "step": 206350 }, { "epoch": 1.6697143781859374, "grad_norm": 0.22323527932167053, "learning_rate": 8.080052052332232e-07, "loss": 0.0181, "step": 206360 }, { "epoch": 1.6697952908811393, "grad_norm": 0.34778377413749695, "learning_rate": 8.076203841777336e-07, "loss": 0.0195, "step": 206370 }, { "epoch": 1.669876203576341, "grad_norm": 0.4776521921157837, "learning_rate": 8.072356467304049e-07, "loss": 0.0194, "step": 206380 }, { "epoch": 1.669957116271543, "grad_norm": 0.16635853052139282, "learning_rate": 8.068509928989088e-07, "loss": 0.0196, "step": 206390 }, { "epoch": 1.670038028966745, "grad_norm": 0.2658426761627197, "learning_rate": 8.064664226909197e-07, "loss": 0.0269, "step": 206400 }, { "epoch": 1.6701189416619466, "grad_norm": 0.07488652318716049, "learning_rate": 8.060819361141042e-07, "loss": 0.019, "step": 206410 }, { "epoch": 1.6701998543571488, "grad_norm": 0.6614619493484497, "learning_rate": 8.056975331761291e-07, "loss": 0.0232, "step": 206420 }, { "epoch": 1.6702807670523505, "grad_norm": 0.14122019708156586, "learning_rate": 8.053132138846647e-07, "loss": 0.0221, "step": 206430 }, { "epoch": 1.6703616797475522, "grad_norm": 0.3897090554237366, "learning_rate": 8.049289782473713e-07, "loss": 0.0286, "step": 206440 }, { "epoch": 1.6704425924427544, "grad_norm": 0.5956182479858398, "learning_rate": 8.045448262719125e-07, "loss": 0.0159, "step": 206450 }, { "epoch": 1.670523505137956, "grad_norm": 0.30743202567100525, "learning_rate": 8.041607579659522e-07, "loss": 0.0112, "step": 206460 }, { "epoch": 1.670604417833158, "grad_norm": 0.38920289278030396, "learning_rate": 8.037767733371454e-07, "loss": 0.0212, "step": 206470 }, { "epoch": 1.67068533052836, "grad_norm": 0.3628155291080475, "learning_rate": 8.033928723931539e-07, "loss": 0.017, "step": 206480 }, { "epoch": 1.6707662432235617, "grad_norm": 0.07538527250289917, "learning_rate": 8.030090551416331e-07, "loss": 0.0161, "step": 206490 }, { "epoch": 1.6708471559187636, "grad_norm": 0.3102499842643738, "learning_rate": 8.026253215902335e-07, "loss": 0.0195, "step": 206500 }, { "epoch": 1.6709280686139656, "grad_norm": 0.6297762989997864, "learning_rate": 8.02241671746613e-07, "loss": 0.0422, "step": 206510 }, { "epoch": 1.6710089813091673, "grad_norm": 0.4460095465183258, "learning_rate": 8.018581056184199e-07, "loss": 0.013, "step": 206520 }, { "epoch": 1.6710898940043692, "grad_norm": 0.22150269150733948, "learning_rate": 8.014746232133042e-07, "loss": 0.0155, "step": 206530 }, { "epoch": 1.6711708066995712, "grad_norm": 0.5184540152549744, "learning_rate": 8.010912245389136e-07, "loss": 0.0253, "step": 206540 }, { "epoch": 1.671251719394773, "grad_norm": 0.36784595251083374, "learning_rate": 8.007079096028947e-07, "loss": 0.0189, "step": 206550 }, { "epoch": 1.671332632089975, "grad_norm": 0.22615474462509155, "learning_rate": 8.00324678412891e-07, "loss": 0.0291, "step": 206560 }, { "epoch": 1.6714135447851768, "grad_norm": 0.7917117476463318, "learning_rate": 7.999415309765457e-07, "loss": 0.0198, "step": 206570 }, { "epoch": 1.6714944574803787, "grad_norm": 0.16389679908752441, "learning_rate": 7.995584673015e-07, "loss": 0.0123, "step": 206580 }, { "epoch": 1.6715753701755807, "grad_norm": 0.35934731364250183, "learning_rate": 7.991754873953933e-07, "loss": 0.01, "step": 206590 }, { "epoch": 1.6716562828707824, "grad_norm": 0.44754859805107117, "learning_rate": 7.987925912658629e-07, "loss": 0.0222, "step": 206600 }, { "epoch": 1.6717371955659843, "grad_norm": 0.35194823145866394, "learning_rate": 7.984097789205436e-07, "loss": 0.0168, "step": 206610 }, { "epoch": 1.6718181082611863, "grad_norm": 0.6726698279380798, "learning_rate": 7.980270503670745e-07, "loss": 0.0256, "step": 206620 }, { "epoch": 1.671899020956388, "grad_norm": 0.6177241802215576, "learning_rate": 7.976444056130833e-07, "loss": 0.0334, "step": 206630 }, { "epoch": 1.67197993365159, "grad_norm": 0.17396502196788788, "learning_rate": 7.972618446662017e-07, "loss": 0.0177, "step": 206640 }, { "epoch": 1.6720608463467919, "grad_norm": 0.12419277429580688, "learning_rate": 7.96879367534063e-07, "loss": 0.0245, "step": 206650 }, { "epoch": 1.6721417590419936, "grad_norm": 0.48342567682266235, "learning_rate": 7.964969742242907e-07, "loss": 0.0119, "step": 206660 }, { "epoch": 1.6722226717371955, "grad_norm": 0.17874670028686523, "learning_rate": 7.961146647445112e-07, "loss": 0.0295, "step": 206670 }, { "epoch": 1.6723035844323975, "grad_norm": 0.42140305042266846, "learning_rate": 7.957324391023524e-07, "loss": 0.0264, "step": 206680 }, { "epoch": 1.6723844971275992, "grad_norm": 0.2957017421722412, "learning_rate": 7.953502973054317e-07, "loss": 0.0105, "step": 206690 }, { "epoch": 1.6724654098228013, "grad_norm": 1.377644419670105, "learning_rate": 7.949682393613744e-07, "loss": 0.0193, "step": 206700 }, { "epoch": 1.672546322518003, "grad_norm": 0.17073620855808258, "learning_rate": 7.945862652777986e-07, "loss": 0.0164, "step": 206710 }, { "epoch": 1.672627235213205, "grad_norm": 0.4127926826477051, "learning_rate": 7.942043750623212e-07, "loss": 0.0247, "step": 206720 }, { "epoch": 1.672708147908407, "grad_norm": 0.31615322828292847, "learning_rate": 7.938225687225586e-07, "loss": 0.0127, "step": 206730 }, { "epoch": 1.6727890606036087, "grad_norm": 0.26941993832588196, "learning_rate": 7.934408462661258e-07, "loss": 0.0132, "step": 206740 }, { "epoch": 1.6728699732988106, "grad_norm": 0.3373965322971344, "learning_rate": 7.93059207700635e-07, "loss": 0.0161, "step": 206750 }, { "epoch": 1.6729508859940125, "grad_norm": 0.6793763041496277, "learning_rate": 7.926776530336965e-07, "loss": 0.0202, "step": 206760 }, { "epoch": 1.6730317986892143, "grad_norm": 0.15722760558128357, "learning_rate": 7.922961822729203e-07, "loss": 0.0186, "step": 206770 }, { "epoch": 1.6731127113844162, "grad_norm": 0.7485611438751221, "learning_rate": 7.919147954259138e-07, "loss": 0.0168, "step": 206780 }, { "epoch": 1.6731936240796181, "grad_norm": 0.3642275929450989, "learning_rate": 7.915334925002832e-07, "loss": 0.0217, "step": 206790 }, { "epoch": 1.6732745367748199, "grad_norm": 0.1022939532995224, "learning_rate": 7.911522735036314e-07, "loss": 0.0192, "step": 206800 }, { "epoch": 1.6733554494700218, "grad_norm": 0.6647599935531616, "learning_rate": 7.907711384435646e-07, "loss": 0.0201, "step": 206810 }, { "epoch": 1.6734363621652237, "grad_norm": 0.26711609959602356, "learning_rate": 7.903900873276804e-07, "loss": 0.0189, "step": 206820 }, { "epoch": 1.6735172748604255, "grad_norm": 0.1383850872516632, "learning_rate": 7.900091201635773e-07, "loss": 0.0137, "step": 206830 }, { "epoch": 1.6735981875556276, "grad_norm": 0.6475138664245605, "learning_rate": 7.896282369588576e-07, "loss": 0.0224, "step": 206840 }, { "epoch": 1.6736791002508293, "grad_norm": 0.1381559520959854, "learning_rate": 7.892474377211129e-07, "loss": 0.0158, "step": 206850 }, { "epoch": 1.6737600129460313, "grad_norm": 0.2666741907596588, "learning_rate": 7.888667224579372e-07, "loss": 0.0232, "step": 206860 }, { "epoch": 1.6738409256412332, "grad_norm": 0.12204734981060028, "learning_rate": 7.884860911769276e-07, "loss": 0.0141, "step": 206870 }, { "epoch": 1.673921838336435, "grad_norm": 0.45038968324661255, "learning_rate": 7.881055438856689e-07, "loss": 0.0167, "step": 206880 }, { "epoch": 1.6740027510316369, "grad_norm": 0.5191385746002197, "learning_rate": 7.877250805917552e-07, "loss": 0.0182, "step": 206890 }, { "epoch": 1.6740836637268388, "grad_norm": 0.4402310848236084, "learning_rate": 7.873447013027718e-07, "loss": 0.0198, "step": 206900 }, { "epoch": 1.6741645764220405, "grad_norm": 0.4229034185409546, "learning_rate": 7.869644060263055e-07, "loss": 0.0141, "step": 206910 }, { "epoch": 1.6742454891172425, "grad_norm": 0.021693814545869827, "learning_rate": 7.865841947699399e-07, "loss": 0.0154, "step": 206920 }, { "epoch": 1.6743264018124444, "grad_norm": 0.4475116729736328, "learning_rate": 7.862040675412575e-07, "loss": 0.0191, "step": 206930 }, { "epoch": 1.6744073145076461, "grad_norm": 0.3893425762653351, "learning_rate": 7.858240243478393e-07, "loss": 0.021, "step": 206940 }, { "epoch": 1.6744882272028483, "grad_norm": 0.44761228561401367, "learning_rate": 7.854440651972644e-07, "loss": 0.0157, "step": 206950 }, { "epoch": 1.67456913989805, "grad_norm": 0.5209549069404602, "learning_rate": 7.850641900971101e-07, "loss": 0.0304, "step": 206960 }, { "epoch": 1.6746500525932517, "grad_norm": 0.40914011001586914, "learning_rate": 7.846843990549525e-07, "loss": 0.0155, "step": 206970 }, { "epoch": 1.6747309652884539, "grad_norm": 0.346150666475296, "learning_rate": 7.843046920783659e-07, "loss": 0.0195, "step": 206980 }, { "epoch": 1.6748118779836556, "grad_norm": 0.31906014680862427, "learning_rate": 7.839250691749223e-07, "loss": 0.0257, "step": 206990 }, { "epoch": 1.6748927906788575, "grad_norm": 0.42032989859580994, "learning_rate": 7.835455303521927e-07, "loss": 0.0137, "step": 207000 }, { "epoch": 1.6749737033740595, "grad_norm": 0.3101758658885956, "learning_rate": 7.831660756177462e-07, "loss": 0.0229, "step": 207010 }, { "epoch": 1.6750546160692612, "grad_norm": 0.48726704716682434, "learning_rate": 7.827867049791488e-07, "loss": 0.023, "step": 207020 }, { "epoch": 1.6751355287644631, "grad_norm": 0.1941405087709427, "learning_rate": 7.8240741844397e-07, "loss": 0.0167, "step": 207030 }, { "epoch": 1.675216441459665, "grad_norm": 0.6515725255012512, "learning_rate": 7.820282160197707e-07, "loss": 0.0107, "step": 207040 }, { "epoch": 1.6752973541548668, "grad_norm": 0.10158855468034744, "learning_rate": 7.816490977141128e-07, "loss": 0.0247, "step": 207050 }, { "epoch": 1.6753782668500687, "grad_norm": 0.6821381449699402, "learning_rate": 7.812700635345605e-07, "loss": 0.0148, "step": 207060 }, { "epoch": 1.6754591795452707, "grad_norm": 0.3476363718509674, "learning_rate": 7.808911134886693e-07, "loss": 0.0186, "step": 207070 }, { "epoch": 1.6755400922404724, "grad_norm": 0.30617886781692505, "learning_rate": 7.805122475839972e-07, "loss": 0.0175, "step": 207080 }, { "epoch": 1.6756210049356746, "grad_norm": 0.17918920516967773, "learning_rate": 7.801334658281012e-07, "loss": 0.0164, "step": 207090 }, { "epoch": 1.6757019176308763, "grad_norm": 0.08291912823915482, "learning_rate": 7.797547682285351e-07, "loss": 0.0199, "step": 207100 }, { "epoch": 1.675782830326078, "grad_norm": 0.04528593644499779, "learning_rate": 7.793761547928508e-07, "loss": 0.014, "step": 207110 }, { "epoch": 1.6758637430212802, "grad_norm": 0.15159976482391357, "learning_rate": 7.789976255285991e-07, "loss": 0.0123, "step": 207120 }, { "epoch": 1.6759446557164819, "grad_norm": 0.2163323611021042, "learning_rate": 7.786191804433285e-07, "loss": 0.0166, "step": 207130 }, { "epoch": 1.6760255684116838, "grad_norm": 0.2926882803440094, "learning_rate": 7.782408195445868e-07, "loss": 0.0188, "step": 207140 }, { "epoch": 1.6761064811068858, "grad_norm": 0.8425076007843018, "learning_rate": 7.778625428399194e-07, "loss": 0.0275, "step": 207150 }, { "epoch": 1.6761873938020875, "grad_norm": 0.5894491076469421, "learning_rate": 7.7748435033687e-07, "loss": 0.0152, "step": 207160 }, { "epoch": 1.6762683064972894, "grad_norm": 0.3107801079750061, "learning_rate": 7.771062420429814e-07, "loss": 0.025, "step": 207170 }, { "epoch": 1.6763492191924914, "grad_norm": 0.4179185628890991, "learning_rate": 7.767282179657936e-07, "loss": 0.0198, "step": 207180 }, { "epoch": 1.676430131887693, "grad_norm": 0.8096086978912354, "learning_rate": 7.763502781128462e-07, "loss": 0.0312, "step": 207190 }, { "epoch": 1.676511044582895, "grad_norm": 0.22027692198753357, "learning_rate": 7.759724224916759e-07, "loss": 0.0198, "step": 207200 }, { "epoch": 1.676591957278097, "grad_norm": 0.24573826789855957, "learning_rate": 7.755946511098184e-07, "loss": 0.0154, "step": 207210 }, { "epoch": 1.6766728699732987, "grad_norm": 0.4752001166343689, "learning_rate": 7.752169639748069e-07, "loss": 0.0239, "step": 207220 }, { "epoch": 1.6767537826685008, "grad_norm": 0.3787784278392792, "learning_rate": 7.748393610941745e-07, "loss": 0.0134, "step": 207230 }, { "epoch": 1.6768346953637026, "grad_norm": 0.39918819069862366, "learning_rate": 7.744618424754502e-07, "loss": 0.0208, "step": 207240 }, { "epoch": 1.6769156080589045, "grad_norm": 0.4554632604122162, "learning_rate": 7.74084408126165e-07, "loss": 0.0286, "step": 207250 }, { "epoch": 1.6769965207541064, "grad_norm": 0.06474517285823822, "learning_rate": 7.737070580538464e-07, "loss": 0.0155, "step": 207260 }, { "epoch": 1.6770774334493082, "grad_norm": 0.37269288301467896, "learning_rate": 7.73329792266016e-07, "loss": 0.016, "step": 207270 }, { "epoch": 1.67715834614451, "grad_norm": 0.4376753568649292, "learning_rate": 7.729526107702007e-07, "loss": 0.0171, "step": 207280 }, { "epoch": 1.677239258839712, "grad_norm": 0.35282012820243835, "learning_rate": 7.725755135739227e-07, "loss": 0.034, "step": 207290 }, { "epoch": 1.6773201715349138, "grad_norm": 0.26974475383758545, "learning_rate": 7.72198500684701e-07, "loss": 0.0191, "step": 207300 }, { "epoch": 1.6774010842301157, "grad_norm": 0.8626517653465271, "learning_rate": 7.718215721100547e-07, "loss": 0.0119, "step": 207310 }, { "epoch": 1.6774819969253176, "grad_norm": 0.3697517514228821, "learning_rate": 7.714447278575016e-07, "loss": 0.0248, "step": 207320 }, { "epoch": 1.6775629096205193, "grad_norm": 0.5940667986869812, "learning_rate": 7.710679679345562e-07, "loss": 0.0186, "step": 207330 }, { "epoch": 1.6776438223157213, "grad_norm": 0.05917268991470337, "learning_rate": 7.70691292348733e-07, "loss": 0.0178, "step": 207340 }, { "epoch": 1.6777247350109232, "grad_norm": 0.15569965541362762, "learning_rate": 7.703147011075429e-07, "loss": 0.0108, "step": 207350 }, { "epoch": 1.677805647706125, "grad_norm": 0.44574248790740967, "learning_rate": 7.699381942184974e-07, "loss": 0.0108, "step": 207360 }, { "epoch": 1.677886560401327, "grad_norm": 0.3746815025806427, "learning_rate": 7.695617716891041e-07, "loss": 0.0179, "step": 207370 }, { "epoch": 1.6779674730965288, "grad_norm": 0.135539248585701, "learning_rate": 7.691854335268706e-07, "loss": 0.0144, "step": 207380 }, { "epoch": 1.6780483857917308, "grad_norm": 0.7241682410240173, "learning_rate": 7.688091797393021e-07, "loss": 0.021, "step": 207390 }, { "epoch": 1.6781292984869327, "grad_norm": 0.34171611070632935, "learning_rate": 7.684330103339016e-07, "loss": 0.0136, "step": 207400 }, { "epoch": 1.6782102111821344, "grad_norm": 0.34104594588279724, "learning_rate": 7.680569253181719e-07, "loss": 0.0138, "step": 207410 }, { "epoch": 1.6782911238773364, "grad_norm": 0.4137289524078369, "learning_rate": 7.676809246996125e-07, "loss": 0.0193, "step": 207420 }, { "epoch": 1.6783720365725383, "grad_norm": 0.4450749456882477, "learning_rate": 7.673050084857214e-07, "loss": 0.0252, "step": 207430 }, { "epoch": 1.67845294926774, "grad_norm": 0.26958656311035156, "learning_rate": 7.669291766839976e-07, "loss": 0.021, "step": 207440 }, { "epoch": 1.678533861962942, "grad_norm": 1.0015665292739868, "learning_rate": 7.665534293019355e-07, "loss": 0.02, "step": 207450 }, { "epoch": 1.678614774658144, "grad_norm": 0.6979461908340454, "learning_rate": 7.66177766347026e-07, "loss": 0.0291, "step": 207460 }, { "epoch": 1.6786956873533456, "grad_norm": 0.41531065106391907, "learning_rate": 7.658021878267646e-07, "loss": 0.0166, "step": 207470 }, { "epoch": 1.6787766000485476, "grad_norm": 0.1256789118051529, "learning_rate": 7.654266937486404e-07, "loss": 0.0154, "step": 207480 }, { "epoch": 1.6788575127437495, "grad_norm": 0.23002862930297852, "learning_rate": 7.650512841201391e-07, "loss": 0.0159, "step": 207490 }, { "epoch": 1.6789384254389512, "grad_norm": 0.4962247312068939, "learning_rate": 7.646759589487507e-07, "loss": 0.0218, "step": 207500 }, { "epoch": 1.6790193381341534, "grad_norm": 0.2132471650838852, "learning_rate": 7.643007182419588e-07, "loss": 0.0235, "step": 207510 }, { "epoch": 1.679100250829355, "grad_norm": 0.618547797203064, "learning_rate": 7.639255620072472e-07, "loss": 0.0135, "step": 207520 }, { "epoch": 1.679181163524557, "grad_norm": 0.4323873817920685, "learning_rate": 7.635504902520979e-07, "loss": 0.0134, "step": 207530 }, { "epoch": 1.679262076219759, "grad_norm": 0.28842729330062866, "learning_rate": 7.631755029839905e-07, "loss": 0.0173, "step": 207540 }, { "epoch": 1.6793429889149607, "grad_norm": 0.249696284532547, "learning_rate": 7.628006002104033e-07, "loss": 0.0081, "step": 207550 }, { "epoch": 1.6794239016101626, "grad_norm": 0.30939745903015137, "learning_rate": 7.624257819388126e-07, "loss": 0.0177, "step": 207560 }, { "epoch": 1.6795048143053646, "grad_norm": 0.5000531077384949, "learning_rate": 7.620510481766941e-07, "loss": 0.0168, "step": 207570 }, { "epoch": 1.6795857270005663, "grad_norm": 0.6355414390563965, "learning_rate": 7.616763989315207e-07, "loss": 0.0088, "step": 207580 }, { "epoch": 1.6796666396957682, "grad_norm": 0.24747706949710846, "learning_rate": 7.613018342107642e-07, "loss": 0.0254, "step": 207590 }, { "epoch": 1.6797475523909702, "grad_norm": 0.2761688232421875, "learning_rate": 7.609273540218942e-07, "loss": 0.0129, "step": 207600 }, { "epoch": 1.679828465086172, "grad_norm": 0.4144209325313568, "learning_rate": 7.605529583723787e-07, "loss": 0.0182, "step": 207610 }, { "epoch": 1.679909377781374, "grad_norm": 0.20903006196022034, "learning_rate": 7.601786472696848e-07, "loss": 0.0112, "step": 207620 }, { "epoch": 1.6799902904765758, "grad_norm": 0.2875693142414093, "learning_rate": 7.598044207212762e-07, "loss": 0.018, "step": 207630 }, { "epoch": 1.6800712031717775, "grad_norm": 0.918709933757782, "learning_rate": 7.594302787346192e-07, "loss": 0.0219, "step": 207640 }, { "epoch": 1.6801521158669797, "grad_norm": 0.3931932747364044, "learning_rate": 7.590562213171704e-07, "loss": 0.0193, "step": 207650 }, { "epoch": 1.6802330285621814, "grad_norm": 0.26488301157951355, "learning_rate": 7.586822484763939e-07, "loss": 0.0166, "step": 207660 }, { "epoch": 1.6803139412573833, "grad_norm": 0.35338759422302246, "learning_rate": 7.583083602197472e-07, "loss": 0.0247, "step": 207670 }, { "epoch": 1.6803948539525853, "grad_norm": 0.151199609041214, "learning_rate": 7.579345565546831e-07, "loss": 0.0209, "step": 207680 }, { "epoch": 1.680475766647787, "grad_norm": 0.48605823516845703, "learning_rate": 7.575608374886601e-07, "loss": 0.0111, "step": 207690 }, { "epoch": 1.680556679342989, "grad_norm": 0.20109768211841583, "learning_rate": 7.57187203029131e-07, "loss": 0.0149, "step": 207700 }, { "epoch": 1.6806375920381909, "grad_norm": 0.21558748185634613, "learning_rate": 7.568136531835441e-07, "loss": 0.0188, "step": 207710 }, { "epoch": 1.6807185047333926, "grad_norm": 0.2953947186470032, "learning_rate": 7.564401879593524e-07, "loss": 0.0118, "step": 207720 }, { "epoch": 1.6807994174285945, "grad_norm": 0.34389862418174744, "learning_rate": 7.560668073640021e-07, "loss": 0.0145, "step": 207730 }, { "epoch": 1.6808803301237965, "grad_norm": 0.5998127460479736, "learning_rate": 7.556935114049407e-07, "loss": 0.0175, "step": 207740 }, { "epoch": 1.6809612428189982, "grad_norm": 0.05135621875524521, "learning_rate": 7.553203000896115e-07, "loss": 0.011, "step": 207750 }, { "epoch": 1.6810421555142003, "grad_norm": 0.006552115082740784, "learning_rate": 7.549471734254582e-07, "loss": 0.0156, "step": 207760 }, { "epoch": 1.681123068209402, "grad_norm": 0.7020780444145203, "learning_rate": 7.545741314199218e-07, "loss": 0.0215, "step": 207770 }, { "epoch": 1.6812039809046038, "grad_norm": 0.2672421932220459, "learning_rate": 7.542011740804417e-07, "loss": 0.0149, "step": 207780 }, { "epoch": 1.681284893599806, "grad_norm": 0.2511093318462372, "learning_rate": 7.538283014144549e-07, "loss": 0.0155, "step": 207790 }, { "epoch": 1.6813658062950076, "grad_norm": 0.2332300841808319, "learning_rate": 7.534555134294009e-07, "loss": 0.0142, "step": 207800 }, { "epoch": 1.6814467189902096, "grad_norm": 0.24470598995685577, "learning_rate": 7.530828101327109e-07, "loss": 0.0176, "step": 207810 }, { "epoch": 1.6815276316854115, "grad_norm": 0.271471232175827, "learning_rate": 7.527101915318169e-07, "loss": 0.028, "step": 207820 }, { "epoch": 1.6816085443806132, "grad_norm": 0.40259385108947754, "learning_rate": 7.523376576341546e-07, "loss": 0.0231, "step": 207830 }, { "epoch": 1.6816894570758152, "grad_norm": 0.2570849061012268, "learning_rate": 7.519652084471479e-07, "loss": 0.0112, "step": 207840 }, { "epoch": 1.6817703697710171, "grad_norm": 0.2897126078605652, "learning_rate": 7.515928439782283e-07, "loss": 0.0124, "step": 207850 }, { "epoch": 1.6818512824662188, "grad_norm": 0.6232751607894897, "learning_rate": 7.512205642348219e-07, "loss": 0.0133, "step": 207860 }, { "epoch": 1.6819321951614208, "grad_norm": 0.14657637476921082, "learning_rate": 7.508483692243491e-07, "loss": 0.0134, "step": 207870 }, { "epoch": 1.6820131078566227, "grad_norm": 0.31945639848709106, "learning_rate": 7.504762589542369e-07, "loss": 0.0184, "step": 207880 }, { "epoch": 1.6820940205518244, "grad_norm": 0.4756549000740051, "learning_rate": 7.501042334319053e-07, "loss": 0.0129, "step": 207890 }, { "epoch": 1.6821749332470266, "grad_norm": 0.19508692622184753, "learning_rate": 7.49732292664771e-07, "loss": 0.0211, "step": 207900 }, { "epoch": 1.6822558459422283, "grad_norm": 0.30100712180137634, "learning_rate": 7.493604366602542e-07, "loss": 0.0085, "step": 207910 }, { "epoch": 1.6823367586374303, "grad_norm": 0.3031668961048126, "learning_rate": 7.4898866542577e-07, "loss": 0.0267, "step": 207920 }, { "epoch": 1.6824176713326322, "grad_norm": 0.17204993963241577, "learning_rate": 7.486169789687325e-07, "loss": 0.0135, "step": 207930 }, { "epoch": 1.682498584027834, "grad_norm": 0.9227681159973145, "learning_rate": 7.482453772965542e-07, "loss": 0.0192, "step": 207940 }, { "epoch": 1.6825794967230359, "grad_norm": 0.3786884844303131, "learning_rate": 7.478738604166463e-07, "loss": 0.0174, "step": 207950 }, { "epoch": 1.6826604094182378, "grad_norm": 0.3891862630844116, "learning_rate": 7.475024283364174e-07, "loss": 0.0193, "step": 207960 }, { "epoch": 1.6827413221134395, "grad_norm": 0.43114811182022095, "learning_rate": 7.471310810632754e-07, "loss": 0.0228, "step": 207970 }, { "epoch": 1.6828222348086415, "grad_norm": 0.3843551278114319, "learning_rate": 7.467598186046248e-07, "loss": 0.0148, "step": 207980 }, { "epoch": 1.6829031475038434, "grad_norm": 0.2059422880411148, "learning_rate": 7.463886409678728e-07, "loss": 0.0236, "step": 207990 }, { "epoch": 1.6829840601990451, "grad_norm": 0.3917522728443146, "learning_rate": 7.460175481604181e-07, "loss": 0.0177, "step": 208000 }, { "epoch": 1.683064972894247, "grad_norm": 0.30781203508377075, "learning_rate": 7.456465401896617e-07, "loss": 0.026, "step": 208010 }, { "epoch": 1.683145885589449, "grad_norm": 0.5009530186653137, "learning_rate": 7.452756170630066e-07, "loss": 0.0168, "step": 208020 }, { "epoch": 1.6832267982846507, "grad_norm": 0.2621578872203827, "learning_rate": 7.449047787878455e-07, "loss": 0.0182, "step": 208030 }, { "epoch": 1.6833077109798529, "grad_norm": 0.338640421628952, "learning_rate": 7.445340253715744e-07, "loss": 0.0136, "step": 208040 }, { "epoch": 1.6833886236750546, "grad_norm": 0.5234189033508301, "learning_rate": 7.441633568215911e-07, "loss": 0.021, "step": 208050 }, { "epoch": 1.6834695363702565, "grad_norm": 0.38464826345443726, "learning_rate": 7.437927731452827e-07, "loss": 0.0182, "step": 208060 }, { "epoch": 1.6835504490654585, "grad_norm": 0.3138248026371002, "learning_rate": 7.434222743500435e-07, "loss": 0.0238, "step": 208070 }, { "epoch": 1.6836313617606602, "grad_norm": 0.6049676537513733, "learning_rate": 7.430518604432624e-07, "loss": 0.0268, "step": 208080 }, { "epoch": 1.6837122744558621, "grad_norm": 0.29690927267074585, "learning_rate": 7.426815314323221e-07, "loss": 0.0204, "step": 208090 }, { "epoch": 1.683793187151064, "grad_norm": 0.20521733164787292, "learning_rate": 7.42311287324613e-07, "loss": 0.0178, "step": 208100 }, { "epoch": 1.6838740998462658, "grad_norm": 0.5626742243766785, "learning_rate": 7.419411281275179e-07, "loss": 0.0199, "step": 208110 }, { "epoch": 1.6839550125414677, "grad_norm": 0.3864908516407013, "learning_rate": 7.41571053848415e-07, "loss": 0.0205, "step": 208120 }, { "epoch": 1.6840359252366697, "grad_norm": 0.531268298625946, "learning_rate": 7.412010644946893e-07, "loss": 0.015, "step": 208130 }, { "epoch": 1.6841168379318714, "grad_norm": 0.6334229707717896, "learning_rate": 7.408311600737173e-07, "loss": 0.0226, "step": 208140 }, { "epoch": 1.6841977506270736, "grad_norm": 0.1936444342136383, "learning_rate": 7.404613405928763e-07, "loss": 0.0142, "step": 208150 }, { "epoch": 1.6842786633222753, "grad_norm": 0.509470522403717, "learning_rate": 7.400916060595414e-07, "loss": 0.0249, "step": 208160 }, { "epoch": 1.684359576017477, "grad_norm": 0.24290908873081207, "learning_rate": 7.397219564810865e-07, "loss": 0.0186, "step": 208170 }, { "epoch": 1.6844404887126792, "grad_norm": 0.7167633771896362, "learning_rate": 7.393523918648837e-07, "loss": 0.0195, "step": 208180 }, { "epoch": 1.6845214014078809, "grad_norm": 0.42862066626548767, "learning_rate": 7.389829122183023e-07, "loss": 0.0173, "step": 208190 }, { "epoch": 1.6846023141030828, "grad_norm": 0.4068213701248169, "learning_rate": 7.386135175487103e-07, "loss": 0.0319, "step": 208200 }, { "epoch": 1.6846832267982847, "grad_norm": 0.1634320318698883, "learning_rate": 7.382442078634777e-07, "loss": 0.0238, "step": 208210 }, { "epoch": 1.6847641394934865, "grad_norm": 0.3243829011917114, "learning_rate": 7.378749831699666e-07, "loss": 0.0186, "step": 208220 }, { "epoch": 1.6848450521886884, "grad_norm": 0.21933741867542267, "learning_rate": 7.3750584347554e-07, "loss": 0.015, "step": 208230 }, { "epoch": 1.6849259648838903, "grad_norm": 0.2330106794834137, "learning_rate": 7.37136788787563e-07, "loss": 0.0186, "step": 208240 }, { "epoch": 1.685006877579092, "grad_norm": 0.21039898693561554, "learning_rate": 7.367678191133925e-07, "loss": 0.0186, "step": 208250 }, { "epoch": 1.685087790274294, "grad_norm": 0.5914091467857361, "learning_rate": 7.363989344603861e-07, "loss": 0.0197, "step": 208260 }, { "epoch": 1.685168702969496, "grad_norm": 0.28868430852890015, "learning_rate": 7.36030134835905e-07, "loss": 0.0104, "step": 208270 }, { "epoch": 1.6852496156646977, "grad_norm": 0.11226090788841248, "learning_rate": 7.356614202472984e-07, "loss": 0.0257, "step": 208280 }, { "epoch": 1.6853305283598998, "grad_norm": 0.3760523796081543, "learning_rate": 7.352927907019242e-07, "loss": 0.0191, "step": 208290 }, { "epoch": 1.6854114410551015, "grad_norm": 0.9136833548545837, "learning_rate": 7.349242462071326e-07, "loss": 0.0225, "step": 208300 }, { "epoch": 1.6854923537503033, "grad_norm": 0.19719751179218292, "learning_rate": 7.345557867702707e-07, "loss": 0.0133, "step": 208310 }, { "epoch": 1.6855732664455054, "grad_norm": 0.3260858356952667, "learning_rate": 7.341874123986903e-07, "loss": 0.0106, "step": 208320 }, { "epoch": 1.6856541791407071, "grad_norm": 0.6613368988037109, "learning_rate": 7.338191230997365e-07, "loss": 0.015, "step": 208330 }, { "epoch": 1.685735091835909, "grad_norm": 0.5416080951690674, "learning_rate": 7.334509188807537e-07, "loss": 0.0201, "step": 208340 }, { "epoch": 1.685816004531111, "grad_norm": 0.1126607209444046, "learning_rate": 7.330827997490852e-07, "loss": 0.0147, "step": 208350 }, { "epoch": 1.6858969172263127, "grad_norm": 0.025501979514956474, "learning_rate": 7.327147657120725e-07, "loss": 0.0197, "step": 208360 }, { "epoch": 1.6859778299215147, "grad_norm": 0.49156588315963745, "learning_rate": 7.323468167770553e-07, "loss": 0.0169, "step": 208370 }, { "epoch": 1.6860587426167166, "grad_norm": 0.22938302159309387, "learning_rate": 7.31978952951371e-07, "loss": 0.0173, "step": 208380 }, { "epoch": 1.6861396553119183, "grad_norm": 0.17598547041416168, "learning_rate": 7.316111742423554e-07, "loss": 0.0155, "step": 208390 }, { "epoch": 1.6862205680071203, "grad_norm": 0.5424051880836487, "learning_rate": 7.312434806573465e-07, "loss": 0.0101, "step": 208400 }, { "epoch": 1.6863014807023222, "grad_norm": 0.28212594985961914, "learning_rate": 7.308758722036735e-07, "loss": 0.0143, "step": 208410 }, { "epoch": 1.686382393397524, "grad_norm": 0.4349152445793152, "learning_rate": 7.305083488886672e-07, "loss": 0.019, "step": 208420 }, { "epoch": 1.686463306092726, "grad_norm": 0.24252425134181976, "learning_rate": 7.301409107196611e-07, "loss": 0.0165, "step": 208430 }, { "epoch": 1.6865442187879278, "grad_norm": 0.4538879692554474, "learning_rate": 7.297735577039794e-07, "loss": 0.0169, "step": 208440 }, { "epoch": 1.6866251314831298, "grad_norm": 0.36971965432167053, "learning_rate": 7.294062898489479e-07, "loss": 0.0129, "step": 208450 }, { "epoch": 1.6867060441783317, "grad_norm": 0.4520987570285797, "learning_rate": 7.290391071618946e-07, "loss": 0.0162, "step": 208460 }, { "epoch": 1.6867869568735334, "grad_norm": 0.571851372718811, "learning_rate": 7.286720096501376e-07, "loss": 0.0201, "step": 208470 }, { "epoch": 1.6868678695687354, "grad_norm": 0.3344075679779053, "learning_rate": 7.283049973210016e-07, "loss": 0.0134, "step": 208480 }, { "epoch": 1.6869487822639373, "grad_norm": 0.23340196907520294, "learning_rate": 7.279380701818051e-07, "loss": 0.0186, "step": 208490 }, { "epoch": 1.687029694959139, "grad_norm": 0.18684369325637817, "learning_rate": 7.275712282398634e-07, "loss": 0.0221, "step": 208500 }, { "epoch": 1.687110607654341, "grad_norm": 0.33388322591781616, "learning_rate": 7.272044715024945e-07, "loss": 0.0133, "step": 208510 }, { "epoch": 1.687191520349543, "grad_norm": 0.7076943516731262, "learning_rate": 7.268377999770127e-07, "loss": 0.0291, "step": 208520 }, { "epoch": 1.6872724330447446, "grad_norm": 0.6375203132629395, "learning_rate": 7.264712136707297e-07, "loss": 0.025, "step": 208530 }, { "epoch": 1.6873533457399466, "grad_norm": 0.07007425278425217, "learning_rate": 7.261047125909571e-07, "loss": 0.0223, "step": 208540 }, { "epoch": 1.6874342584351485, "grad_norm": 0.6204449534416199, "learning_rate": 7.257382967450027e-07, "loss": 0.0176, "step": 208550 }, { "epoch": 1.6875151711303502, "grad_norm": 0.3584047853946686, "learning_rate": 7.253719661401748e-07, "loss": 0.0153, "step": 208560 }, { "epoch": 1.6875960838255524, "grad_norm": 0.1069910004734993, "learning_rate": 7.250057207837791e-07, "loss": 0.0211, "step": 208570 }, { "epoch": 1.687676996520754, "grad_norm": 0.3159853518009186, "learning_rate": 7.246395606831191e-07, "loss": 0.0155, "step": 208580 }, { "epoch": 1.687757909215956, "grad_norm": 0.6268966794013977, "learning_rate": 7.242734858454975e-07, "loss": 0.0217, "step": 208590 }, { "epoch": 1.687838821911158, "grad_norm": 0.1898757368326187, "learning_rate": 7.23907496278215e-07, "loss": 0.012, "step": 208600 }, { "epoch": 1.6879197346063597, "grad_norm": 0.00438074953854084, "learning_rate": 7.235415919885686e-07, "loss": 0.017, "step": 208610 }, { "epoch": 1.6880006473015616, "grad_norm": 0.32888317108154297, "learning_rate": 7.231757729838596e-07, "loss": 0.013, "step": 208620 }, { "epoch": 1.6880815599967636, "grad_norm": 0.08744307607412338, "learning_rate": 7.228100392713799e-07, "loss": 0.0137, "step": 208630 }, { "epoch": 1.6881624726919653, "grad_norm": 0.26658573746681213, "learning_rate": 7.224443908584239e-07, "loss": 0.0131, "step": 208640 }, { "epoch": 1.6882433853871672, "grad_norm": 0.40908053517341614, "learning_rate": 7.220788277522855e-07, "loss": 0.019, "step": 208650 }, { "epoch": 1.6883242980823692, "grad_norm": 0.2056598663330078, "learning_rate": 7.217133499602536e-07, "loss": 0.0224, "step": 208660 }, { "epoch": 1.6884052107775709, "grad_norm": 0.43892577290534973, "learning_rate": 7.213479574896154e-07, "loss": 0.0256, "step": 208670 }, { "epoch": 1.6884861234727728, "grad_norm": 0.3369806706905365, "learning_rate": 7.209826503476613e-07, "loss": 0.014, "step": 208680 }, { "epoch": 1.6885670361679748, "grad_norm": 0.4775499105453491, "learning_rate": 7.206174285416734e-07, "loss": 0.0151, "step": 208690 }, { "epoch": 1.6886479488631765, "grad_norm": 0.34087809920310974, "learning_rate": 7.202522920789379e-07, "loss": 0.0182, "step": 208700 }, { "epoch": 1.6887288615583786, "grad_norm": 0.3213619589805603, "learning_rate": 7.198872409667351e-07, "loss": 0.0129, "step": 208710 }, { "epoch": 1.6888097742535804, "grad_norm": 0.46489596366882324, "learning_rate": 7.195222752123454e-07, "loss": 0.0161, "step": 208720 }, { "epoch": 1.6888906869487823, "grad_norm": 0.2173299789428711, "learning_rate": 7.191573948230479e-07, "loss": 0.0158, "step": 208730 }, { "epoch": 1.6889715996439842, "grad_norm": 0.2954733073711395, "learning_rate": 7.187925998061184e-07, "loss": 0.0139, "step": 208740 }, { "epoch": 1.689052512339186, "grad_norm": 0.14705507457256317, "learning_rate": 7.18427890168833e-07, "loss": 0.016, "step": 208750 }, { "epoch": 1.689133425034388, "grad_norm": 0.4308481812477112, "learning_rate": 7.180632659184639e-07, "loss": 0.0188, "step": 208760 }, { "epoch": 1.6892143377295898, "grad_norm": 0.6025606393814087, "learning_rate": 7.176987270622837e-07, "loss": 0.0388, "step": 208770 }, { "epoch": 1.6892952504247916, "grad_norm": 0.23914548754692078, "learning_rate": 7.173342736075622e-07, "loss": 0.0207, "step": 208780 }, { "epoch": 1.6893761631199935, "grad_norm": 0.4518732726573944, "learning_rate": 7.169699055615676e-07, "loss": 0.0175, "step": 208790 }, { "epoch": 1.6894570758151954, "grad_norm": 0.36434105038642883, "learning_rate": 7.16605622931566e-07, "loss": 0.0178, "step": 208800 }, { "epoch": 1.6895379885103972, "grad_norm": 0.37382036447525024, "learning_rate": 7.162414257248229e-07, "loss": 0.019, "step": 208810 }, { "epoch": 1.6896189012055993, "grad_norm": 0.408344030380249, "learning_rate": 7.158773139486008e-07, "loss": 0.0137, "step": 208820 }, { "epoch": 1.689699813900801, "grad_norm": 0.48738884925842285, "learning_rate": 7.155132876101606e-07, "loss": 0.0105, "step": 208830 }, { "epoch": 1.6897807265960028, "grad_norm": 0.6027119755744934, "learning_rate": 7.151493467167653e-07, "loss": 0.0242, "step": 208840 }, { "epoch": 1.689861639291205, "grad_norm": 0.09093628823757172, "learning_rate": 7.147854912756691e-07, "loss": 0.0262, "step": 208850 }, { "epoch": 1.6899425519864066, "grad_norm": 0.25130486488342285, "learning_rate": 7.14421721294129e-07, "loss": 0.0154, "step": 208860 }, { "epoch": 1.6900234646816086, "grad_norm": 0.6192461252212524, "learning_rate": 7.140580367794026e-07, "loss": 0.0147, "step": 208870 }, { "epoch": 1.6901043773768105, "grad_norm": 0.28103873133659363, "learning_rate": 7.136944377387384e-07, "loss": 0.011, "step": 208880 }, { "epoch": 1.6901852900720122, "grad_norm": 0.13700835406780243, "learning_rate": 7.133309241793906e-07, "loss": 0.0148, "step": 208890 }, { "epoch": 1.6902662027672142, "grad_norm": 0.5304850339889526, "learning_rate": 7.129674961086081e-07, "loss": 0.0178, "step": 208900 }, { "epoch": 1.6903471154624161, "grad_norm": 0.4344831705093384, "learning_rate": 7.126041535336387e-07, "loss": 0.0189, "step": 208910 }, { "epoch": 1.6904280281576178, "grad_norm": 0.4990748465061188, "learning_rate": 7.122408964617283e-07, "loss": 0.0232, "step": 208920 }, { "epoch": 1.6905089408528198, "grad_norm": 0.15492630004882812, "learning_rate": 7.118777249001213e-07, "loss": 0.0094, "step": 208930 }, { "epoch": 1.6905898535480217, "grad_norm": 0.5807963013648987, "learning_rate": 7.115146388560607e-07, "loss": 0.0149, "step": 208940 }, { "epoch": 1.6906707662432234, "grad_norm": 0.08144547045230865, "learning_rate": 7.111516383367873e-07, "loss": 0.0174, "step": 208950 }, { "epoch": 1.6907516789384256, "grad_norm": 0.45953118801116943, "learning_rate": 7.107887233495398e-07, "loss": 0.0199, "step": 208960 }, { "epoch": 1.6908325916336273, "grad_norm": 0.8860967755317688, "learning_rate": 7.104258939015568e-07, "loss": 0.0221, "step": 208970 }, { "epoch": 1.690913504328829, "grad_norm": 0.39467331767082214, "learning_rate": 7.100631500000732e-07, "loss": 0.0189, "step": 208980 }, { "epoch": 1.6909944170240312, "grad_norm": 0.31655189394950867, "learning_rate": 7.097004916523243e-07, "loss": 0.0176, "step": 208990 }, { "epoch": 1.691075329719233, "grad_norm": 0.1651729792356491, "learning_rate": 7.093379188655414e-07, "loss": 0.0157, "step": 209000 }, { "epoch": 1.6911562424144349, "grad_norm": 0.35517606139183044, "learning_rate": 7.089754316469554e-07, "loss": 0.0176, "step": 209010 }, { "epoch": 1.6912371551096368, "grad_norm": 0.3628454804420471, "learning_rate": 7.086130300037953e-07, "loss": 0.0194, "step": 209020 }, { "epoch": 1.6913180678048385, "grad_norm": 0.4954923093318939, "learning_rate": 7.082507139432904e-07, "loss": 0.0212, "step": 209030 }, { "epoch": 1.6913989805000404, "grad_norm": 0.58644038438797, "learning_rate": 7.07888483472664e-07, "loss": 0.0215, "step": 209040 }, { "epoch": 1.6914798931952424, "grad_norm": 0.508438229560852, "learning_rate": 7.075263385991394e-07, "loss": 0.02, "step": 209050 }, { "epoch": 1.691560805890444, "grad_norm": 0.4531589448451996, "learning_rate": 7.07164279329941e-07, "loss": 0.0164, "step": 209060 }, { "epoch": 1.691641718585646, "grad_norm": 0.20440557599067688, "learning_rate": 7.0680230567229e-07, "loss": 0.0279, "step": 209070 }, { "epoch": 1.691722631280848, "grad_norm": 0.11854103952646255, "learning_rate": 7.064404176334017e-07, "loss": 0.0162, "step": 209080 }, { "epoch": 1.6918035439760497, "grad_norm": 0.169923335313797, "learning_rate": 7.060786152204957e-07, "loss": 0.0242, "step": 209090 }, { "epoch": 1.6918844566712519, "grad_norm": 0.4041852355003357, "learning_rate": 7.057168984407869e-07, "loss": 0.0174, "step": 209100 }, { "epoch": 1.6919653693664536, "grad_norm": 0.17914600670337677, "learning_rate": 7.053552673014896e-07, "loss": 0.0218, "step": 209110 }, { "epoch": 1.6920462820616555, "grad_norm": 0.11967680603265762, "learning_rate": 7.049937218098147e-07, "loss": 0.0197, "step": 209120 }, { "epoch": 1.6921271947568575, "grad_norm": 0.25599828362464905, "learning_rate": 7.046322619729723e-07, "loss": 0.0141, "step": 209130 }, { "epoch": 1.6922081074520592, "grad_norm": 0.41045111417770386, "learning_rate": 7.042708877981724e-07, "loss": 0.0217, "step": 209140 }, { "epoch": 1.6922890201472611, "grad_norm": 0.34876349568367004, "learning_rate": 7.039095992926204e-07, "loss": 0.0211, "step": 209150 }, { "epoch": 1.692369932842463, "grad_norm": 0.2522251009941101, "learning_rate": 7.03548396463522e-07, "loss": 0.0213, "step": 209160 }, { "epoch": 1.6924508455376648, "grad_norm": 0.37660884857177734, "learning_rate": 7.031872793180811e-07, "loss": 0.0268, "step": 209170 }, { "epoch": 1.6925317582328667, "grad_norm": 0.5261776447296143, "learning_rate": 7.028262478634984e-07, "loss": 0.0261, "step": 209180 }, { "epoch": 1.6926126709280687, "grad_norm": 0.39586880803108215, "learning_rate": 7.024653021069744e-07, "loss": 0.0148, "step": 209190 }, { "epoch": 1.6926935836232704, "grad_norm": 0.2482355237007141, "learning_rate": 7.021044420557077e-07, "loss": 0.0207, "step": 209200 }, { "epoch": 1.6927744963184723, "grad_norm": 0.28803813457489014, "learning_rate": 7.017436677168948e-07, "loss": 0.0188, "step": 209210 }, { "epoch": 1.6928554090136743, "grad_norm": 0.38514918088912964, "learning_rate": 7.013829790977306e-07, "loss": 0.0247, "step": 209220 }, { "epoch": 1.692936321708876, "grad_norm": 0.3158520460128784, "learning_rate": 7.010223762054075e-07, "loss": 0.0128, "step": 209230 }, { "epoch": 1.6930172344040781, "grad_norm": 0.14462533593177795, "learning_rate": 7.006618590471165e-07, "loss": 0.0108, "step": 209240 }, { "epoch": 1.6930981470992799, "grad_norm": 0.4856274724006653, "learning_rate": 7.003014276300502e-07, "loss": 0.0322, "step": 209250 }, { "epoch": 1.6931790597944818, "grad_norm": 0.1397782415151596, "learning_rate": 6.999410819613955e-07, "loss": 0.0212, "step": 209260 }, { "epoch": 1.6932599724896837, "grad_norm": 0.4088376760482788, "learning_rate": 6.995808220483358e-07, "loss": 0.0206, "step": 209270 }, { "epoch": 1.6933408851848855, "grad_norm": 0.7708694338798523, "learning_rate": 6.99220647898059e-07, "loss": 0.0275, "step": 209280 }, { "epoch": 1.6934217978800874, "grad_norm": 0.3238854706287384, "learning_rate": 6.988605595177483e-07, "loss": 0.019, "step": 209290 }, { "epoch": 1.6935027105752893, "grad_norm": 0.5781298279762268, "learning_rate": 6.985005569145814e-07, "loss": 0.0282, "step": 209300 }, { "epoch": 1.693583623270491, "grad_norm": 0.22010838985443115, "learning_rate": 6.981406400957414e-07, "loss": 0.0115, "step": 209310 }, { "epoch": 1.693664535965693, "grad_norm": 0.2916402816772461, "learning_rate": 6.977808090684046e-07, "loss": 0.0248, "step": 209320 }, { "epoch": 1.693745448660895, "grad_norm": 0.5077095031738281, "learning_rate": 6.974210638397471e-07, "loss": 0.0275, "step": 209330 }, { "epoch": 1.6938263613560967, "grad_norm": 0.5464421510696411, "learning_rate": 6.970614044169432e-07, "loss": 0.0221, "step": 209340 }, { "epoch": 1.6939072740512986, "grad_norm": 0.4067747890949249, "learning_rate": 6.967018308071661e-07, "loss": 0.0237, "step": 209350 }, { "epoch": 1.6939881867465005, "grad_norm": 0.4409008026123047, "learning_rate": 6.963423430175859e-07, "loss": 0.0258, "step": 209360 }, { "epoch": 1.6940690994417023, "grad_norm": 0.039084821939468384, "learning_rate": 6.95982941055372e-07, "loss": 0.0271, "step": 209370 }, { "epoch": 1.6941500121369044, "grad_norm": 0.16198013722896576, "learning_rate": 6.956236249276926e-07, "loss": 0.0114, "step": 209380 }, { "epoch": 1.6942309248321061, "grad_norm": 0.5476803183555603, "learning_rate": 6.952643946417132e-07, "loss": 0.0092, "step": 209390 }, { "epoch": 1.694311837527308, "grad_norm": 0.23691734671592712, "learning_rate": 6.949052502045977e-07, "loss": 0.0127, "step": 209400 }, { "epoch": 1.69439275022251, "grad_norm": 0.401764839887619, "learning_rate": 6.94546191623508e-07, "loss": 0.0127, "step": 209410 }, { "epoch": 1.6944736629177117, "grad_norm": 0.24683436751365662, "learning_rate": 6.941872189056059e-07, "loss": 0.0197, "step": 209420 }, { "epoch": 1.6945545756129137, "grad_norm": 0.24542906880378723, "learning_rate": 6.938283320580485e-07, "loss": 0.0173, "step": 209430 }, { "epoch": 1.6946354883081156, "grad_norm": 0.2228868007659912, "learning_rate": 6.934695310879952e-07, "loss": 0.0216, "step": 209440 }, { "epoch": 1.6947164010033173, "grad_norm": 0.5431485772132874, "learning_rate": 6.931108160026018e-07, "loss": 0.0171, "step": 209450 }, { "epoch": 1.6947973136985193, "grad_norm": 0.4366428554058075, "learning_rate": 6.927521868090187e-07, "loss": 0.021, "step": 209460 }, { "epoch": 1.6948782263937212, "grad_norm": 0.38628771901130676, "learning_rate": 6.923936435144013e-07, "loss": 0.0209, "step": 209470 }, { "epoch": 1.694959139088923, "grad_norm": 0.18674367666244507, "learning_rate": 6.920351861259001e-07, "loss": 0.0366, "step": 209480 }, { "epoch": 1.695040051784125, "grad_norm": 0.4218727648258209, "learning_rate": 6.916768146506603e-07, "loss": 0.0121, "step": 209490 }, { "epoch": 1.6951209644793268, "grad_norm": 0.3683704137802124, "learning_rate": 6.913185290958324e-07, "loss": 0.0171, "step": 209500 }, { "epoch": 1.6952018771745285, "grad_norm": 0.23975585401058197, "learning_rate": 6.909603294685602e-07, "loss": 0.0174, "step": 209510 }, { "epoch": 1.6952827898697307, "grad_norm": 0.24538946151733398, "learning_rate": 6.906022157759878e-07, "loss": 0.0152, "step": 209520 }, { "epoch": 1.6953637025649324, "grad_norm": 0.7161343693733215, "learning_rate": 6.902441880252564e-07, "loss": 0.0254, "step": 209530 }, { "epoch": 1.6954446152601343, "grad_norm": 0.22489935159683228, "learning_rate": 6.898862462235062e-07, "loss": 0.0183, "step": 209540 }, { "epoch": 1.6955255279553363, "grad_norm": 0.5033699870109558, "learning_rate": 6.89528390377876e-07, "loss": 0.0123, "step": 209550 }, { "epoch": 1.695606440650538, "grad_norm": 0.17662237584590912, "learning_rate": 6.891706204955024e-07, "loss": 0.0133, "step": 209560 }, { "epoch": 1.69568735334574, "grad_norm": 0.28746742010116577, "learning_rate": 6.8881293658352e-07, "loss": 0.0104, "step": 209570 }, { "epoch": 1.6957682660409419, "grad_norm": 0.12497389316558838, "learning_rate": 6.884553386490622e-07, "loss": 0.0123, "step": 209580 }, { "epoch": 1.6958491787361436, "grad_norm": 0.44899845123291016, "learning_rate": 6.88097826699261e-07, "loss": 0.0199, "step": 209590 }, { "epoch": 1.6959300914313455, "grad_norm": 0.7734050750732422, "learning_rate": 6.877404007412442e-07, "loss": 0.0233, "step": 209600 }, { "epoch": 1.6960110041265475, "grad_norm": 0.2208855003118515, "learning_rate": 6.873830607821441e-07, "loss": 0.0164, "step": 209610 }, { "epoch": 1.6960919168217492, "grad_norm": 0.636803388595581, "learning_rate": 6.87025806829083e-07, "loss": 0.0233, "step": 209620 }, { "epoch": 1.6961728295169514, "grad_norm": 0.8175402283668518, "learning_rate": 6.866686388891864e-07, "loss": 0.0223, "step": 209630 }, { "epoch": 1.696253742212153, "grad_norm": 0.19614431262016296, "learning_rate": 6.8631155696958e-07, "loss": 0.0105, "step": 209640 }, { "epoch": 1.6963346549073548, "grad_norm": 0.24514691531658173, "learning_rate": 6.859545610773805e-07, "loss": 0.0107, "step": 209650 }, { "epoch": 1.696415567602557, "grad_norm": 0.36468422412872314, "learning_rate": 6.855976512197116e-07, "loss": 0.0157, "step": 209660 }, { "epoch": 1.6964964802977587, "grad_norm": 0.1405043751001358, "learning_rate": 6.852408274036904e-07, "loss": 0.0139, "step": 209670 }, { "epoch": 1.6965773929929606, "grad_norm": 0.3622055649757385, "learning_rate": 6.848840896364295e-07, "loss": 0.0208, "step": 209680 }, { "epoch": 1.6966583056881626, "grad_norm": 0.3630783259868622, "learning_rate": 6.845274379250472e-07, "loss": 0.0162, "step": 209690 }, { "epoch": 1.6967392183833643, "grad_norm": 0.36670032143592834, "learning_rate": 6.841708722766554e-07, "loss": 0.0192, "step": 209700 }, { "epoch": 1.6968201310785662, "grad_norm": 0.24132710695266724, "learning_rate": 6.838143926983631e-07, "loss": 0.018, "step": 209710 }, { "epoch": 1.6969010437737682, "grad_norm": 0.2669043242931366, "learning_rate": 6.834579991972812e-07, "loss": 0.0161, "step": 209720 }, { "epoch": 1.6969819564689699, "grad_norm": 0.22678890824317932, "learning_rate": 6.831016917805172e-07, "loss": 0.0226, "step": 209730 }, { "epoch": 1.6970628691641718, "grad_norm": 0.2708948254585266, "learning_rate": 6.827454704551767e-07, "loss": 0.0167, "step": 209740 }, { "epoch": 1.6971437818593738, "grad_norm": 0.2544391453266144, "learning_rate": 6.823893352283633e-07, "loss": 0.0246, "step": 209750 }, { "epoch": 1.6972246945545755, "grad_norm": 0.3945915699005127, "learning_rate": 6.820332861071799e-07, "loss": 0.0181, "step": 209760 }, { "epoch": 1.6973056072497776, "grad_norm": 0.06862399727106094, "learning_rate": 6.816773230987273e-07, "loss": 0.0196, "step": 209770 }, { "epoch": 1.6973865199449794, "grad_norm": 0.2011564075946808, "learning_rate": 6.813214462101037e-07, "loss": 0.0175, "step": 209780 }, { "epoch": 1.6974674326401813, "grad_norm": 0.15852272510528564, "learning_rate": 6.809656554484056e-07, "loss": 0.0195, "step": 209790 }, { "epoch": 1.6975483453353832, "grad_norm": 0.5506576895713806, "learning_rate": 6.80609950820732e-07, "loss": 0.0167, "step": 209800 }, { "epoch": 1.697629258030585, "grad_norm": 0.26242583990097046, "learning_rate": 6.802543323341731e-07, "loss": 0.0171, "step": 209810 }, { "epoch": 1.697710170725787, "grad_norm": 0.28793448209762573, "learning_rate": 6.798987999958212e-07, "loss": 0.0108, "step": 209820 }, { "epoch": 1.6977910834209888, "grad_norm": 0.22583958506584167, "learning_rate": 6.795433538127699e-07, "loss": 0.0192, "step": 209830 }, { "epoch": 1.6978719961161906, "grad_norm": 0.6841935515403748, "learning_rate": 6.791879937921042e-07, "loss": 0.0138, "step": 209840 }, { "epoch": 1.6979529088113925, "grad_norm": 0.45785096287727356, "learning_rate": 6.788327199409112e-07, "loss": 0.0234, "step": 209850 }, { "epoch": 1.6980338215065944, "grad_norm": 0.04323781281709671, "learning_rate": 6.784775322662796e-07, "loss": 0.0135, "step": 209860 }, { "epoch": 1.6981147342017961, "grad_norm": 0.39871302247047424, "learning_rate": 6.781224307752882e-07, "loss": 0.0137, "step": 209870 }, { "epoch": 1.698195646896998, "grad_norm": 0.3862256407737732, "learning_rate": 6.777674154750219e-07, "loss": 0.017, "step": 209880 }, { "epoch": 1.6982765595922, "grad_norm": 0.4289039671421051, "learning_rate": 6.774124863725611e-07, "loss": 0.0215, "step": 209890 }, { "epoch": 1.6983574722874017, "grad_norm": 0.33054855465888977, "learning_rate": 6.77057643474981e-07, "loss": 0.0202, "step": 209900 }, { "epoch": 1.698438384982604, "grad_norm": 0.33023953437805176, "learning_rate": 6.767028867893605e-07, "loss": 0.0116, "step": 209910 }, { "epoch": 1.6985192976778056, "grad_norm": 0.23785609006881714, "learning_rate": 6.763482163227747e-07, "loss": 0.0202, "step": 209920 }, { "epoch": 1.6986002103730076, "grad_norm": 0.17526185512542725, "learning_rate": 6.759936320822958e-07, "loss": 0.0154, "step": 209930 }, { "epoch": 1.6986811230682095, "grad_norm": 0.32342204451560974, "learning_rate": 6.756391340749957e-07, "loss": 0.0197, "step": 209940 }, { "epoch": 1.6987620357634112, "grad_norm": 0.6591234803199768, "learning_rate": 6.752847223079434e-07, "loss": 0.0179, "step": 209950 }, { "epoch": 1.6988429484586132, "grad_norm": 0.3957979679107666, "learning_rate": 6.749303967882081e-07, "loss": 0.0185, "step": 209960 }, { "epoch": 1.698923861153815, "grad_norm": 0.3046381175518036, "learning_rate": 6.745761575228549e-07, "loss": 0.0257, "step": 209970 }, { "epoch": 1.6990047738490168, "grad_norm": 0.25449976325035095, "learning_rate": 6.742220045189479e-07, "loss": 0.0185, "step": 209980 }, { "epoch": 1.6990856865442188, "grad_norm": 0.3670799732208252, "learning_rate": 6.738679377835533e-07, "loss": 0.014, "step": 209990 }, { "epoch": 1.6991665992394207, "grad_norm": 0.07929053157567978, "learning_rate": 6.735139573237287e-07, "loss": 0.0188, "step": 210000 }, { "epoch": 1.6992475119346224, "grad_norm": 0.33576732873916626, "learning_rate": 6.731600631465335e-07, "loss": 0.0114, "step": 210010 }, { "epoch": 1.6993284246298246, "grad_norm": 1.016082525253296, "learning_rate": 6.728062552590287e-07, "loss": 0.023, "step": 210020 }, { "epoch": 1.6994093373250263, "grad_norm": 0.307704895734787, "learning_rate": 6.724525336682663e-07, "loss": 0.0212, "step": 210030 }, { "epoch": 1.699490250020228, "grad_norm": 0.3247676491737366, "learning_rate": 6.720988983813021e-07, "loss": 0.0149, "step": 210040 }, { "epoch": 1.6995711627154302, "grad_norm": 0.3348418176174164, "learning_rate": 6.717453494051901e-07, "loss": 0.0158, "step": 210050 }, { "epoch": 1.699652075410632, "grad_norm": 0.35807403922080994, "learning_rate": 6.713918867469782e-07, "loss": 0.0183, "step": 210060 }, { "epoch": 1.6997329881058338, "grad_norm": 0.4181901514530182, "learning_rate": 6.710385104137179e-07, "loss": 0.0154, "step": 210070 }, { "epoch": 1.6998139008010358, "grad_norm": 0.326082319021225, "learning_rate": 6.70685220412457e-07, "loss": 0.0177, "step": 210080 }, { "epoch": 1.6998948134962375, "grad_norm": 0.032372429966926575, "learning_rate": 6.703320167502369e-07, "loss": 0.0268, "step": 210090 }, { "epoch": 1.6999757261914394, "grad_norm": 0.2547188699245453, "learning_rate": 6.699788994341061e-07, "loss": 0.0198, "step": 210100 }, { "epoch": 1.7000566388866414, "grad_norm": 0.3068036437034607, "learning_rate": 6.69625868471106e-07, "loss": 0.0311, "step": 210110 }, { "epoch": 1.700137551581843, "grad_norm": 0.23262549936771393, "learning_rate": 6.692729238682738e-07, "loss": 0.0073, "step": 210120 }, { "epoch": 1.700218464277045, "grad_norm": 0.26396653056144714, "learning_rate": 6.689200656326522e-07, "loss": 0.0535, "step": 210130 }, { "epoch": 1.700299376972247, "grad_norm": 0.4257710874080658, "learning_rate": 6.685672937712762e-07, "loss": 0.017, "step": 210140 }, { "epoch": 1.7003802896674487, "grad_norm": 0.34024810791015625, "learning_rate": 6.682146082911811e-07, "loss": 0.0239, "step": 210150 }, { "epoch": 1.7004612023626509, "grad_norm": 0.1876441240310669, "learning_rate": 6.678620091994014e-07, "loss": 0.0278, "step": 210160 }, { "epoch": 1.7005421150578526, "grad_norm": 0.212576761841774, "learning_rate": 6.675094965029683e-07, "loss": 0.0154, "step": 210170 }, { "epoch": 1.7006230277530543, "grad_norm": 0.29960134625434875, "learning_rate": 6.67157070208912e-07, "loss": 0.0233, "step": 210180 }, { "epoch": 1.7007039404482565, "grad_norm": 0.6813313961029053, "learning_rate": 6.668047303242603e-07, "loss": 0.0207, "step": 210190 }, { "epoch": 1.7007848531434582, "grad_norm": 0.352389931678772, "learning_rate": 6.664524768560398e-07, "loss": 0.0142, "step": 210200 }, { "epoch": 1.7008657658386601, "grad_norm": 0.47501444816589355, "learning_rate": 6.661003098112783e-07, "loss": 0.0124, "step": 210210 }, { "epoch": 1.700946678533862, "grad_norm": 0.3371339440345764, "learning_rate": 6.657482291969963e-07, "loss": 0.0225, "step": 210220 }, { "epoch": 1.7010275912290638, "grad_norm": 0.5013192892074585, "learning_rate": 6.653962350202142e-07, "loss": 0.0246, "step": 210230 }, { "epoch": 1.7011085039242657, "grad_norm": 0.4809322655200958, "learning_rate": 6.650443272879559e-07, "loss": 0.0241, "step": 210240 }, { "epoch": 1.7011894166194677, "grad_norm": 0.2465280294418335, "learning_rate": 6.646925060072357e-07, "loss": 0.0184, "step": 210250 }, { "epoch": 1.7012703293146694, "grad_norm": 0.3560587763786316, "learning_rate": 6.643407711850696e-07, "loss": 0.011, "step": 210260 }, { "epoch": 1.7013512420098713, "grad_norm": 0.38747939467430115, "learning_rate": 6.639891228284767e-07, "loss": 0.0157, "step": 210270 }, { "epoch": 1.7014321547050733, "grad_norm": 0.45160847902297974, "learning_rate": 6.636375609444651e-07, "loss": 0.0207, "step": 210280 }, { "epoch": 1.701513067400275, "grad_norm": 0.45988762378692627, "learning_rate": 6.632860855400481e-07, "loss": 0.014, "step": 210290 }, { "epoch": 1.7015939800954771, "grad_norm": 0.33442696928977966, "learning_rate": 6.629346966222367e-07, "loss": 0.0245, "step": 210300 }, { "epoch": 1.7016748927906789, "grad_norm": 0.41403815150260925, "learning_rate": 6.625833941980342e-07, "loss": 0.0177, "step": 210310 }, { "epoch": 1.7017558054858806, "grad_norm": 0.5527387261390686, "learning_rate": 6.622321782744507e-07, "loss": 0.0099, "step": 210320 }, { "epoch": 1.7018367181810827, "grad_norm": 0.7420069575309753, "learning_rate": 6.618810488584887e-07, "loss": 0.0247, "step": 210330 }, { "epoch": 1.7019176308762844, "grad_norm": 0.2601150572299957, "learning_rate": 6.615300059571511e-07, "loss": 0.0144, "step": 210340 }, { "epoch": 1.7019985435714864, "grad_norm": 0.519270658493042, "learning_rate": 6.611790495774389e-07, "loss": 0.0159, "step": 210350 }, { "epoch": 1.7020794562666883, "grad_norm": 0.19695129990577698, "learning_rate": 6.608281797263505e-07, "loss": 0.0194, "step": 210360 }, { "epoch": 1.70216036896189, "grad_norm": 0.18323881924152374, "learning_rate": 6.604773964108835e-07, "loss": 0.014, "step": 210370 }, { "epoch": 1.702241281657092, "grad_norm": 0.20392170548439026, "learning_rate": 6.60126699638034e-07, "loss": 0.0195, "step": 210380 }, { "epoch": 1.702322194352294, "grad_norm": 0.30920901894569397, "learning_rate": 6.597760894147958e-07, "loss": 0.0167, "step": 210390 }, { "epoch": 1.7024031070474956, "grad_norm": 0.17726914584636688, "learning_rate": 6.594255657481608e-07, "loss": 0.015, "step": 210400 }, { "epoch": 1.7024840197426976, "grad_norm": 0.3226865530014038, "learning_rate": 6.590751286451191e-07, "loss": 0.0119, "step": 210410 }, { "epoch": 1.7025649324378995, "grad_norm": 0.425218790769577, "learning_rate": 6.587247781126588e-07, "loss": 0.0156, "step": 210420 }, { "epoch": 1.7026458451331012, "grad_norm": 0.32904309034347534, "learning_rate": 6.583745141577708e-07, "loss": 0.0213, "step": 210430 }, { "epoch": 1.7027267578283034, "grad_norm": 0.13830988109111786, "learning_rate": 6.580243367874356e-07, "loss": 0.018, "step": 210440 }, { "epoch": 1.7028076705235051, "grad_norm": 0.456929087638855, "learning_rate": 6.576742460086383e-07, "loss": 0.0194, "step": 210450 }, { "epoch": 1.702888583218707, "grad_norm": 0.4589657485485077, "learning_rate": 6.573242418283632e-07, "loss": 0.0211, "step": 210460 }, { "epoch": 1.702969495913909, "grad_norm": 0.09783697128295898, "learning_rate": 6.569743242535858e-07, "loss": 0.0156, "step": 210470 }, { "epoch": 1.7030504086091107, "grad_norm": 0.26021808385849, "learning_rate": 6.56624493291288e-07, "loss": 0.0123, "step": 210480 }, { "epoch": 1.7031313213043127, "grad_norm": 0.46609556674957275, "learning_rate": 6.562747489484467e-07, "loss": 0.0153, "step": 210490 }, { "epoch": 1.7032122339995146, "grad_norm": 0.3562491834163666, "learning_rate": 6.559250912320337e-07, "loss": 0.0181, "step": 210500 }, { "epoch": 1.7032931466947163, "grad_norm": 0.7061545252799988, "learning_rate": 6.555755201490244e-07, "loss": 0.0122, "step": 210510 }, { "epoch": 1.7033740593899183, "grad_norm": 0.42489612102508545, "learning_rate": 6.5522603570639e-07, "loss": 0.0156, "step": 210520 }, { "epoch": 1.7034549720851202, "grad_norm": 0.4354812800884247, "learning_rate": 6.548766379111004e-07, "loss": 0.0166, "step": 210530 }, { "epoch": 1.703535884780322, "grad_norm": 0.27330395579338074, "learning_rate": 6.545273267701236e-07, "loss": 0.0115, "step": 210540 }, { "epoch": 1.7036167974755239, "grad_norm": 0.5307769179344177, "learning_rate": 6.541781022904248e-07, "loss": 0.0195, "step": 210550 }, { "epoch": 1.7036977101707258, "grad_norm": 0.31818780303001404, "learning_rate": 6.538289644789697e-07, "loss": 0.0214, "step": 210560 }, { "epoch": 1.7037786228659275, "grad_norm": 0.4388105571269989, "learning_rate": 6.534799133427205e-07, "loss": 0.0141, "step": 210570 }, { "epoch": 1.7038595355611297, "grad_norm": 0.37756192684173584, "learning_rate": 6.53130948888639e-07, "loss": 0.0146, "step": 210580 }, { "epoch": 1.7039404482563314, "grad_norm": 0.15631163120269775, "learning_rate": 6.527820711236837e-07, "loss": 0.021, "step": 210590 }, { "epoch": 1.7040213609515333, "grad_norm": 0.6812311410903931, "learning_rate": 6.524332800548128e-07, "loss": 0.0158, "step": 210600 }, { "epoch": 1.7041022736467353, "grad_norm": 0.26500385999679565, "learning_rate": 6.520845756889804e-07, "loss": 0.0128, "step": 210610 }, { "epoch": 1.704183186341937, "grad_norm": 0.4652189314365387, "learning_rate": 6.517359580331451e-07, "loss": 0.0188, "step": 210620 }, { "epoch": 1.704264099037139, "grad_norm": 0.2915354371070862, "learning_rate": 6.513874270942549e-07, "loss": 0.0152, "step": 210630 }, { "epoch": 1.7043450117323409, "grad_norm": 0.33047786355018616, "learning_rate": 6.510389828792619e-07, "loss": 0.0267, "step": 210640 }, { "epoch": 1.7044259244275426, "grad_norm": 0.3700200915336609, "learning_rate": 6.506906253951168e-07, "loss": 0.0185, "step": 210650 }, { "epoch": 1.7045068371227445, "grad_norm": 0.30527573823928833, "learning_rate": 6.503423546487642e-07, "loss": 0.013, "step": 210660 }, { "epoch": 1.7045877498179465, "grad_norm": 0.20573757588863373, "learning_rate": 6.499941706471502e-07, "loss": 0.0138, "step": 210670 }, { "epoch": 1.7046686625131482, "grad_norm": 0.3086912930011749, "learning_rate": 6.49646073397221e-07, "loss": 0.0165, "step": 210680 }, { "epoch": 1.7047495752083504, "grad_norm": 0.15377646684646606, "learning_rate": 6.492980629059148e-07, "loss": 0.0145, "step": 210690 }, { "epoch": 1.704830487903552, "grad_norm": 0.11744119971990585, "learning_rate": 6.48950139180175e-07, "loss": 0.0142, "step": 210700 }, { "epoch": 1.7049114005987538, "grad_norm": 0.2901841998100281, "learning_rate": 6.486023022269394e-07, "loss": 0.016, "step": 210710 }, { "epoch": 1.704992313293956, "grad_norm": 0.06740926206111908, "learning_rate": 6.482545520531446e-07, "loss": 0.0094, "step": 210720 }, { "epoch": 1.7050732259891577, "grad_norm": 0.21511957049369812, "learning_rate": 6.479068886657258e-07, "loss": 0.0124, "step": 210730 }, { "epoch": 1.7051541386843596, "grad_norm": 0.3114405870437622, "learning_rate": 6.475593120716167e-07, "loss": 0.0188, "step": 210740 }, { "epoch": 1.7052350513795616, "grad_norm": 0.532605767250061, "learning_rate": 6.472118222777485e-07, "loss": 0.0284, "step": 210750 }, { "epoch": 1.7053159640747633, "grad_norm": 0.22871074080467224, "learning_rate": 6.468644192910511e-07, "loss": 0.0118, "step": 210760 }, { "epoch": 1.7053968767699652, "grad_norm": 0.01751517504453659, "learning_rate": 6.465171031184531e-07, "loss": 0.0142, "step": 210770 }, { "epoch": 1.7054777894651671, "grad_norm": 0.18832674622535706, "learning_rate": 6.461698737668809e-07, "loss": 0.0169, "step": 210780 }, { "epoch": 1.7055587021603689, "grad_norm": 0.3511013686656952, "learning_rate": 6.458227312432591e-07, "loss": 0.0164, "step": 210790 }, { "epoch": 1.7056396148555708, "grad_norm": 0.32441094517707825, "learning_rate": 6.45475675554511e-07, "loss": 0.0247, "step": 210800 }, { "epoch": 1.7057205275507727, "grad_norm": 0.5001794695854187, "learning_rate": 6.451287067075579e-07, "loss": 0.0259, "step": 210810 }, { "epoch": 1.7058014402459745, "grad_norm": 0.42702415585517883, "learning_rate": 6.44781824709319e-07, "loss": 0.0191, "step": 210820 }, { "epoch": 1.7058823529411766, "grad_norm": 0.3162308633327484, "learning_rate": 6.444350295667112e-07, "loss": 0.0192, "step": 210830 }, { "epoch": 1.7059632656363783, "grad_norm": 0.40705904364585876, "learning_rate": 6.440883212866545e-07, "loss": 0.0128, "step": 210840 }, { "epoch": 1.70604417833158, "grad_norm": 0.2311715930700302, "learning_rate": 6.437416998760587e-07, "loss": 0.0113, "step": 210850 }, { "epoch": 1.7061250910267822, "grad_norm": 0.25020772218704224, "learning_rate": 6.433951653418374e-07, "loss": 0.0174, "step": 210860 }, { "epoch": 1.706206003721984, "grad_norm": 0.3446123003959656, "learning_rate": 6.430487176909034e-07, "loss": 0.0116, "step": 210870 }, { "epoch": 1.7062869164171859, "grad_norm": 1.0587689876556396, "learning_rate": 6.42702356930166e-07, "loss": 0.0185, "step": 210880 }, { "epoch": 1.7063678291123878, "grad_norm": 0.391417920589447, "learning_rate": 6.423560830665287e-07, "loss": 0.0122, "step": 210890 }, { "epoch": 1.7064487418075895, "grad_norm": 0.5010652542114258, "learning_rate": 6.420098961069015e-07, "loss": 0.019, "step": 210900 }, { "epoch": 1.7065296545027915, "grad_norm": 0.04115918651223183, "learning_rate": 6.416637960581867e-07, "loss": 0.0136, "step": 210910 }, { "epoch": 1.7066105671979934, "grad_norm": 0.3639720380306244, "learning_rate": 6.413177829272871e-07, "loss": 0.0217, "step": 210920 }, { "epoch": 1.7066914798931951, "grad_norm": 0.17751941084861755, "learning_rate": 6.409718567211021e-07, "loss": 0.0199, "step": 210930 }, { "epoch": 1.706772392588397, "grad_norm": 0.11290034651756287, "learning_rate": 6.406260174465312e-07, "loss": 0.0169, "step": 210940 }, { "epoch": 1.706853305283599, "grad_norm": 0.2468317449092865, "learning_rate": 6.402802651104716e-07, "loss": 0.0127, "step": 210950 }, { "epoch": 1.7069342179788007, "grad_norm": 0.2899598479270935, "learning_rate": 6.399345997198181e-07, "loss": 0.0121, "step": 210960 }, { "epoch": 1.707015130674003, "grad_norm": 0.1952618956565857, "learning_rate": 6.395890212814643e-07, "loss": 0.0176, "step": 210970 }, { "epoch": 1.7070960433692046, "grad_norm": 0.21559511125087738, "learning_rate": 6.392435298023025e-07, "loss": 0.0276, "step": 210980 }, { "epoch": 1.7071769560644066, "grad_norm": 0.2168361395597458, "learning_rate": 6.388981252892229e-07, "loss": 0.0127, "step": 210990 }, { "epoch": 1.7072578687596085, "grad_norm": 0.3914637267589569, "learning_rate": 6.385528077491127e-07, "loss": 0.0257, "step": 211000 }, { "epoch": 1.7073387814548102, "grad_norm": 0.3494647443294525, "learning_rate": 6.382075771888601e-07, "loss": 0.0154, "step": 211010 }, { "epoch": 1.7074196941500122, "grad_norm": 0.4071306884288788, "learning_rate": 6.378624336153477e-07, "loss": 0.0278, "step": 211020 }, { "epoch": 1.707500606845214, "grad_norm": 0.28811344504356384, "learning_rate": 6.375173770354626e-07, "loss": 0.019, "step": 211030 }, { "epoch": 1.7075815195404158, "grad_norm": 0.18554872274398804, "learning_rate": 6.371724074560825e-07, "loss": 0.0202, "step": 211040 }, { "epoch": 1.7076624322356178, "grad_norm": 3.840136766433716, "learning_rate": 6.368275248840871e-07, "loss": 0.0164, "step": 211050 }, { "epoch": 1.7077433449308197, "grad_norm": 0.14621929824352264, "learning_rate": 6.36482729326357e-07, "loss": 0.0193, "step": 211060 }, { "epoch": 1.7078242576260214, "grad_norm": 0.43303772807121277, "learning_rate": 6.361380207897682e-07, "loss": 0.0234, "step": 211070 }, { "epoch": 1.7079051703212234, "grad_norm": 0.27685314416885376, "learning_rate": 6.357933992811921e-07, "loss": 0.0162, "step": 211080 }, { "epoch": 1.7079860830164253, "grad_norm": 0.368068128824234, "learning_rate": 6.354488648075041e-07, "loss": 0.0215, "step": 211090 }, { "epoch": 1.708066995711627, "grad_norm": 0.5761148929595947, "learning_rate": 6.351044173755749e-07, "loss": 0.0178, "step": 211100 }, { "epoch": 1.7081479084068292, "grad_norm": 0.25435975193977356, "learning_rate": 6.347600569922729e-07, "loss": 0.0221, "step": 211110 }, { "epoch": 1.708228821102031, "grad_norm": 0.47814860939979553, "learning_rate": 6.344157836644665e-07, "loss": 0.0162, "step": 211120 }, { "epoch": 1.7083097337972328, "grad_norm": 0.37857845425605774, "learning_rate": 6.340715973990208e-07, "loss": 0.0113, "step": 211130 }, { "epoch": 1.7083906464924348, "grad_norm": 0.3650563955307007, "learning_rate": 6.337274982028007e-07, "loss": 0.0125, "step": 211140 }, { "epoch": 1.7084715591876365, "grad_norm": 0.2686516046524048, "learning_rate": 6.333834860826676e-07, "loss": 0.0085, "step": 211150 }, { "epoch": 1.7085524718828384, "grad_norm": 0.24192246794700623, "learning_rate": 6.330395610454826e-07, "loss": 0.0172, "step": 211160 }, { "epoch": 1.7086333845780404, "grad_norm": 0.011607913300395012, "learning_rate": 6.326957230981046e-07, "loss": 0.0141, "step": 211170 }, { "epoch": 1.708714297273242, "grad_norm": 0.6283367276191711, "learning_rate": 6.323519722473908e-07, "loss": 0.0145, "step": 211180 }, { "epoch": 1.708795209968444, "grad_norm": 0.23269598186016083, "learning_rate": 6.32008308500196e-07, "loss": 0.0132, "step": 211190 }, { "epoch": 1.708876122663646, "grad_norm": 0.5454142689704895, "learning_rate": 6.316647318633745e-07, "loss": 0.0203, "step": 211200 }, { "epoch": 1.7089570353588477, "grad_norm": 0.4980802834033966, "learning_rate": 6.313212423437781e-07, "loss": 0.0129, "step": 211210 }, { "epoch": 1.7090379480540496, "grad_norm": 0.48410484194755554, "learning_rate": 6.309778399482563e-07, "loss": 0.012, "step": 211220 }, { "epoch": 1.7091188607492516, "grad_norm": 0.2690354287624359, "learning_rate": 6.306345246836587e-07, "loss": 0.0157, "step": 211230 }, { "epoch": 1.7091997734444533, "grad_norm": 0.23843643069267273, "learning_rate": 6.302912965568297e-07, "loss": 0.0118, "step": 211240 }, { "epoch": 1.7092806861396554, "grad_norm": 0.19160409271717072, "learning_rate": 6.299481555746173e-07, "loss": 0.0206, "step": 211250 }, { "epoch": 1.7093615988348572, "grad_norm": 0.3846505582332611, "learning_rate": 6.296051017438648e-07, "loss": 0.0208, "step": 211260 }, { "epoch": 1.709442511530059, "grad_norm": 0.4204733967781067, "learning_rate": 6.292621350714096e-07, "loss": 0.016, "step": 211270 }, { "epoch": 1.709523424225261, "grad_norm": 0.24919025599956512, "learning_rate": 6.289192555640949e-07, "loss": 0.0122, "step": 211280 }, { "epoch": 1.7096043369204628, "grad_norm": 0.47064825892448425, "learning_rate": 6.285764632287594e-07, "loss": 0.0159, "step": 211290 }, { "epoch": 1.7096852496156647, "grad_norm": 0.32988032698631287, "learning_rate": 6.282337580722359e-07, "loss": 0.0087, "step": 211300 }, { "epoch": 1.7097661623108666, "grad_norm": 0.22811175882816315, "learning_rate": 6.278911401013616e-07, "loss": 0.0183, "step": 211310 }, { "epoch": 1.7098470750060684, "grad_norm": 0.42072972655296326, "learning_rate": 6.275486093229688e-07, "loss": 0.0177, "step": 211320 }, { "epoch": 1.7099279877012703, "grad_norm": 0.2985037565231323, "learning_rate": 6.272061657438883e-07, "loss": 0.0106, "step": 211330 }, { "epoch": 1.7100089003964722, "grad_norm": 0.5768496990203857, "learning_rate": 6.26863809370949e-07, "loss": 0.013, "step": 211340 }, { "epoch": 1.710089813091674, "grad_norm": 0.6090452075004578, "learning_rate": 6.265215402109792e-07, "loss": 0.0193, "step": 211350 }, { "epoch": 1.7101707257868761, "grad_norm": 0.4395294785499573, "learning_rate": 6.261793582708048e-07, "loss": 0.0258, "step": 211360 }, { "epoch": 1.7102516384820778, "grad_norm": 0.42869868874549866, "learning_rate": 6.258372635572491e-07, "loss": 0.0101, "step": 211370 }, { "epoch": 1.7103325511772796, "grad_norm": 0.29307860136032104, "learning_rate": 6.254952560771355e-07, "loss": 0.0237, "step": 211380 }, { "epoch": 1.7104134638724817, "grad_norm": 0.4850022494792938, "learning_rate": 6.251533358372836e-07, "loss": 0.0216, "step": 211390 }, { "epoch": 1.7104943765676834, "grad_norm": 0.08671265095472336, "learning_rate": 6.248115028445123e-07, "loss": 0.0111, "step": 211400 }, { "epoch": 1.7105752892628854, "grad_norm": 0.4586442708969116, "learning_rate": 6.244697571056386e-07, "loss": 0.0206, "step": 211410 }, { "epoch": 1.7106562019580873, "grad_norm": 0.44152122735977173, "learning_rate": 6.241280986274806e-07, "loss": 0.0167, "step": 211420 }, { "epoch": 1.710737114653289, "grad_norm": 0.3857405185699463, "learning_rate": 6.237865274168486e-07, "loss": 0.0135, "step": 211430 }, { "epoch": 1.710818027348491, "grad_norm": 0.2988779842853546, "learning_rate": 6.234450434805545e-07, "loss": 0.0216, "step": 211440 }, { "epoch": 1.710898940043693, "grad_norm": 0.0750042200088501, "learning_rate": 6.23103646825412e-07, "loss": 0.0105, "step": 211450 }, { "epoch": 1.7109798527388946, "grad_norm": 0.3751363456249237, "learning_rate": 6.227623374582253e-07, "loss": 0.0195, "step": 211460 }, { "epoch": 1.7110607654340966, "grad_norm": 0.5926979184150696, "learning_rate": 6.224211153858039e-07, "loss": 0.0381, "step": 211470 }, { "epoch": 1.7111416781292985, "grad_norm": 0.13572610914707184, "learning_rate": 6.22079980614953e-07, "loss": 0.0152, "step": 211480 }, { "epoch": 1.7112225908245002, "grad_norm": 0.47403785586357117, "learning_rate": 6.217389331524721e-07, "loss": 0.0242, "step": 211490 }, { "epoch": 1.7113035035197024, "grad_norm": 0.357877254486084, "learning_rate": 6.213979730051667e-07, "loss": 0.0196, "step": 211500 }, { "epoch": 1.7113844162149041, "grad_norm": 0.12897838652133942, "learning_rate": 6.210571001798355e-07, "loss": 0.0192, "step": 211510 }, { "epoch": 1.7114653289101058, "grad_norm": 0.3221987187862396, "learning_rate": 6.207163146832757e-07, "loss": 0.0141, "step": 211520 }, { "epoch": 1.711546241605308, "grad_norm": 0.17931222915649414, "learning_rate": 6.203756165222835e-07, "loss": 0.013, "step": 211530 }, { "epoch": 1.7116271543005097, "grad_norm": 0.7067688703536987, "learning_rate": 6.200350057036541e-07, "loss": 0.0194, "step": 211540 }, { "epoch": 1.7117080669957117, "grad_norm": 0.15735678374767303, "learning_rate": 6.196944822341805e-07, "loss": 0.0193, "step": 211550 }, { "epoch": 1.7117889796909136, "grad_norm": 0.4729344844818115, "learning_rate": 6.193540461206527e-07, "loss": 0.0238, "step": 211560 }, { "epoch": 1.7118698923861153, "grad_norm": 0.30090340971946716, "learning_rate": 6.190136973698607e-07, "loss": 0.0278, "step": 211570 }, { "epoch": 1.7119508050813173, "grad_norm": 0.25059446692466736, "learning_rate": 6.186734359885915e-07, "loss": 0.0123, "step": 211580 }, { "epoch": 1.7120317177765192, "grad_norm": 0.008307388052344322, "learning_rate": 6.183332619836313e-07, "loss": 0.0181, "step": 211590 }, { "epoch": 1.712112630471721, "grad_norm": 0.2287149876356125, "learning_rate": 6.179931753617635e-07, "loss": 0.017, "step": 211600 }, { "epoch": 1.7121935431669228, "grad_norm": 0.13664355874061584, "learning_rate": 6.176531761297728e-07, "loss": 0.0205, "step": 211610 }, { "epoch": 1.7122744558621248, "grad_norm": 0.25280556082725525, "learning_rate": 6.173132642944369e-07, "loss": 0.0114, "step": 211620 }, { "epoch": 1.7123553685573265, "grad_norm": 0.43484318256378174, "learning_rate": 6.169734398625349e-07, "loss": 0.0167, "step": 211630 }, { "epoch": 1.7124362812525287, "grad_norm": 0.30453258752822876, "learning_rate": 6.166337028408464e-07, "loss": 0.0161, "step": 211640 }, { "epoch": 1.7125171939477304, "grad_norm": 0.29057416319847107, "learning_rate": 6.162940532361428e-07, "loss": 0.0178, "step": 211650 }, { "epoch": 1.7125981066429323, "grad_norm": 0.26208066940307617, "learning_rate": 6.159544910552012e-07, "loss": 0.0241, "step": 211660 }, { "epoch": 1.7126790193381343, "grad_norm": 0.5044052004814148, "learning_rate": 6.156150163047931e-07, "loss": 0.0221, "step": 211670 }, { "epoch": 1.712759932033336, "grad_norm": 0.06127867102622986, "learning_rate": 6.152756289916856e-07, "loss": 0.0087, "step": 211680 }, { "epoch": 1.712840844728538, "grad_norm": 0.6043950915336609, "learning_rate": 6.149363291226502e-07, "loss": 0.0168, "step": 211690 }, { "epoch": 1.7129217574237399, "grad_norm": 0.2755005359649658, "learning_rate": 6.145971167044534e-07, "loss": 0.0217, "step": 211700 }, { "epoch": 1.7130026701189416, "grad_norm": 0.21485909819602966, "learning_rate": 6.142579917438574e-07, "loss": 0.0228, "step": 211710 }, { "epoch": 1.7130835828141435, "grad_norm": 0.3075707256793976, "learning_rate": 6.139189542476276e-07, "loss": 0.0213, "step": 211720 }, { "epoch": 1.7131644955093455, "grad_norm": 0.13656657934188843, "learning_rate": 6.135800042225248e-07, "loss": 0.0198, "step": 211730 }, { "epoch": 1.7132454082045472, "grad_norm": 0.6553542613983154, "learning_rate": 6.132411416753087e-07, "loss": 0.0207, "step": 211740 }, { "epoch": 1.7133263208997491, "grad_norm": 0.4636829197406769, "learning_rate": 6.129023666127376e-07, "loss": 0.0243, "step": 211750 }, { "epoch": 1.713407233594951, "grad_norm": 0.4252067804336548, "learning_rate": 6.125636790415668e-07, "loss": 0.0183, "step": 211760 }, { "epoch": 1.7134881462901528, "grad_norm": 0.33437836170196533, "learning_rate": 6.122250789685513e-07, "loss": 0.016, "step": 211770 }, { "epoch": 1.713569058985355, "grad_norm": 0.26277995109558105, "learning_rate": 6.118865664004436e-07, "loss": 0.0153, "step": 211780 }, { "epoch": 1.7136499716805567, "grad_norm": 0.31884539127349854, "learning_rate": 6.115481413439939e-07, "loss": 0.0105, "step": 211790 }, { "epoch": 1.7137308843757586, "grad_norm": 0.633210301399231, "learning_rate": 6.112098038059538e-07, "loss": 0.0196, "step": 211800 }, { "epoch": 1.7138117970709605, "grad_norm": 0.2950422465801239, "learning_rate": 6.108715537930676e-07, "loss": 0.0156, "step": 211810 }, { "epoch": 1.7138927097661623, "grad_norm": 0.10210071504116058, "learning_rate": 6.10533391312082e-07, "loss": 0.0186, "step": 211820 }, { "epoch": 1.7139736224613642, "grad_norm": 0.3729948401451111, "learning_rate": 6.101953163697433e-07, "loss": 0.0172, "step": 211830 }, { "epoch": 1.7140545351565661, "grad_norm": 0.2632363736629486, "learning_rate": 6.098573289727905e-07, "loss": 0.0156, "step": 211840 }, { "epoch": 1.7141354478517679, "grad_norm": 0.20686957240104675, "learning_rate": 6.095194291279644e-07, "loss": 0.011, "step": 211850 }, { "epoch": 1.7142163605469698, "grad_norm": 0.020319465547800064, "learning_rate": 6.091816168420062e-07, "loss": 0.0217, "step": 211860 }, { "epoch": 1.7142972732421717, "grad_norm": 0.5066226720809937, "learning_rate": 6.088438921216494e-07, "loss": 0.0133, "step": 211870 }, { "epoch": 1.7143781859373735, "grad_norm": 0.7131887078285217, "learning_rate": 6.085062549736326e-07, "loss": 0.0114, "step": 211880 }, { "epoch": 1.7144590986325754, "grad_norm": 0.15315864980220795, "learning_rate": 6.081687054046881e-07, "loss": 0.0132, "step": 211890 }, { "epoch": 1.7145400113277773, "grad_norm": 0.27349910140037537, "learning_rate": 6.078312434215455e-07, "loss": 0.015, "step": 211900 }, { "epoch": 1.714620924022979, "grad_norm": 0.2707100510597229, "learning_rate": 6.074938690309374e-07, "loss": 0.0166, "step": 211910 }, { "epoch": 1.7147018367181812, "grad_norm": 0.3699873089790344, "learning_rate": 6.07156582239592e-07, "loss": 0.0192, "step": 211920 }, { "epoch": 1.714782749413383, "grad_norm": 0.008433174341917038, "learning_rate": 6.068193830542324e-07, "loss": 0.0228, "step": 211930 }, { "epoch": 1.7148636621085849, "grad_norm": 0.3640226125717163, "learning_rate": 6.064822714815877e-07, "loss": 0.0141, "step": 211940 }, { "epoch": 1.7149445748037868, "grad_norm": 0.34908682107925415, "learning_rate": 6.061452475283785e-07, "loss": 0.0152, "step": 211950 }, { "epoch": 1.7150254874989885, "grad_norm": 0.10351221263408661, "learning_rate": 6.058083112013263e-07, "loss": 0.0127, "step": 211960 }, { "epoch": 1.7151064001941905, "grad_norm": 0.3143679201602936, "learning_rate": 6.054714625071511e-07, "loss": 0.0127, "step": 211970 }, { "epoch": 1.7151873128893924, "grad_norm": 0.3839344084262848, "learning_rate": 6.051347014525699e-07, "loss": 0.0154, "step": 211980 }, { "epoch": 1.7152682255845941, "grad_norm": 0.4799230694770813, "learning_rate": 6.047980280443e-07, "loss": 0.023, "step": 211990 }, { "epoch": 1.715349138279796, "grad_norm": 0.51296466588974, "learning_rate": 6.044614422890538e-07, "loss": 0.0145, "step": 212000 }, { "epoch": 1.715430050974998, "grad_norm": 0.4640869200229645, "learning_rate": 6.041249441935443e-07, "loss": 0.0237, "step": 212010 }, { "epoch": 1.7155109636701997, "grad_norm": 0.3472539782524109, "learning_rate": 6.037885337644845e-07, "loss": 0.0157, "step": 212020 }, { "epoch": 1.715591876365402, "grad_norm": 0.0024621535558253527, "learning_rate": 6.034522110085805e-07, "loss": 0.0141, "step": 212030 }, { "epoch": 1.7156727890606036, "grad_norm": 0.4130937457084656, "learning_rate": 6.0311597593254e-07, "loss": 0.0168, "step": 212040 }, { "epoch": 1.7157537017558053, "grad_norm": 0.184869185090065, "learning_rate": 6.027798285430708e-07, "loss": 0.0356, "step": 212050 }, { "epoch": 1.7158346144510075, "grad_norm": 0.4710838198661804, "learning_rate": 6.024437688468726e-07, "loss": 0.0122, "step": 212060 }, { "epoch": 1.7159155271462092, "grad_norm": 0.4188864827156067, "learning_rate": 6.021077968506517e-07, "loss": 0.0148, "step": 212070 }, { "epoch": 1.7159964398414111, "grad_norm": 0.2519286274909973, "learning_rate": 6.017719125611066e-07, "loss": 0.0225, "step": 212080 }, { "epoch": 1.716077352536613, "grad_norm": 0.004905075766146183, "learning_rate": 6.014361159849342e-07, "loss": 0.01, "step": 212090 }, { "epoch": 1.7161582652318148, "grad_norm": 0.08659805357456207, "learning_rate": 6.011004071288334e-07, "loss": 0.0224, "step": 212100 }, { "epoch": 1.7162391779270167, "grad_norm": 0.5814322233200073, "learning_rate": 6.007647859994997e-07, "loss": 0.0253, "step": 212110 }, { "epoch": 1.7163200906222187, "grad_norm": 0.4749721884727478, "learning_rate": 6.004292526036226e-07, "loss": 0.0159, "step": 212120 }, { "epoch": 1.7164010033174204, "grad_norm": 0.225808784365654, "learning_rate": 6.000938069478979e-07, "loss": 0.0191, "step": 212130 }, { "epoch": 1.7164819160126223, "grad_norm": 0.5377570390701294, "learning_rate": 5.99758449039013e-07, "loss": 0.0307, "step": 212140 }, { "epoch": 1.7165628287078243, "grad_norm": 0.13199840486049652, "learning_rate": 5.994231788836569e-07, "loss": 0.0069, "step": 212150 }, { "epoch": 1.716643741403026, "grad_norm": 0.25563907623291016, "learning_rate": 5.990879964885154e-07, "loss": 0.022, "step": 212160 }, { "epoch": 1.7167246540982282, "grad_norm": 0.002999508986249566, "learning_rate": 5.987529018602728e-07, "loss": 0.0099, "step": 212170 }, { "epoch": 1.7168055667934299, "grad_norm": 0.49649113416671753, "learning_rate": 5.984178950056124e-07, "loss": 0.0185, "step": 212180 }, { "epoch": 1.7168864794886316, "grad_norm": 0.710081160068512, "learning_rate": 5.980829759312146e-07, "loss": 0.0275, "step": 212190 }, { "epoch": 1.7169673921838338, "grad_norm": 0.3018221855163574, "learning_rate": 5.977481446437583e-07, "loss": 0.0257, "step": 212200 }, { "epoch": 1.7170483048790355, "grad_norm": 0.30354568362236023, "learning_rate": 5.974134011499239e-07, "loss": 0.0142, "step": 212210 }, { "epoch": 1.7171292175742374, "grad_norm": 0.3050629198551178, "learning_rate": 5.970787454563836e-07, "loss": 0.021, "step": 212220 }, { "epoch": 1.7172101302694394, "grad_norm": 0.29428979754447937, "learning_rate": 5.967441775698119e-07, "loss": 0.0185, "step": 212230 }, { "epoch": 1.717291042964641, "grad_norm": 0.2183281034231186, "learning_rate": 5.964096974968835e-07, "loss": 0.0106, "step": 212240 }, { "epoch": 1.717371955659843, "grad_norm": 0.2199273258447647, "learning_rate": 5.960753052442669e-07, "loss": 0.0179, "step": 212250 }, { "epoch": 1.717452868355045, "grad_norm": 0.47163981199264526, "learning_rate": 5.957410008186299e-07, "loss": 0.02, "step": 212260 }, { "epoch": 1.7175337810502467, "grad_norm": 0.6044396162033081, "learning_rate": 5.954067842266431e-07, "loss": 0.0146, "step": 212270 }, { "epoch": 1.7176146937454486, "grad_norm": 0.4023536741733551, "learning_rate": 5.95072655474967e-07, "loss": 0.0234, "step": 212280 }, { "epoch": 1.7176956064406506, "grad_norm": 0.09086526930332184, "learning_rate": 5.947386145702688e-07, "loss": 0.0165, "step": 212290 }, { "epoch": 1.7177765191358523, "grad_norm": 0.23383758962154388, "learning_rate": 5.944046615192101e-07, "loss": 0.0172, "step": 212300 }, { "epoch": 1.7178574318310544, "grad_norm": 0.15544509887695312, "learning_rate": 5.940707963284475e-07, "loss": 0.0234, "step": 212310 }, { "epoch": 1.7179383445262562, "grad_norm": 0.2460891306400299, "learning_rate": 5.937370190046432e-07, "loss": 0.0196, "step": 212320 }, { "epoch": 1.718019257221458, "grad_norm": 0.034472737461328506, "learning_rate": 5.934033295544517e-07, "loss": 0.0176, "step": 212330 }, { "epoch": 1.71810016991666, "grad_norm": 0.33876222372055054, "learning_rate": 5.930697279845276e-07, "loss": 0.0273, "step": 212340 }, { "epoch": 1.7181810826118618, "grad_norm": 0.39451777935028076, "learning_rate": 5.927362143015247e-07, "loss": 0.0142, "step": 212350 }, { "epoch": 1.7182619953070637, "grad_norm": 0.5192626118659973, "learning_rate": 5.924027885120942e-07, "loss": 0.0155, "step": 212360 }, { "epoch": 1.7183429080022656, "grad_norm": 0.39531561732292175, "learning_rate": 5.920694506228847e-07, "loss": 0.0247, "step": 212370 }, { "epoch": 1.7184238206974674, "grad_norm": 0.4296720623970032, "learning_rate": 5.917362006405447e-07, "loss": 0.0234, "step": 212380 }, { "epoch": 1.7185047333926693, "grad_norm": 0.3591099977493286, "learning_rate": 5.914030385717196e-07, "loss": 0.0149, "step": 212390 }, { "epoch": 1.7185856460878712, "grad_norm": 0.3323921263217926, "learning_rate": 5.910699644230545e-07, "loss": 0.0267, "step": 212400 }, { "epoch": 1.718666558783073, "grad_norm": 0.733392596244812, "learning_rate": 5.907369782011907e-07, "loss": 0.0241, "step": 212410 }, { "epoch": 1.718747471478275, "grad_norm": 0.44033753871917725, "learning_rate": 5.904040799127691e-07, "loss": 0.0116, "step": 212420 }, { "epoch": 1.7188283841734768, "grad_norm": 0.09569204598665237, "learning_rate": 5.900712695644306e-07, "loss": 0.022, "step": 212430 }, { "epoch": 1.7189092968686785, "grad_norm": 0.3660154938697815, "learning_rate": 5.897385471628098e-07, "loss": 0.0271, "step": 212440 }, { "epoch": 1.7189902095638807, "grad_norm": 0.020155925303697586, "learning_rate": 5.894059127145424e-07, "loss": 0.0038, "step": 212450 }, { "epoch": 1.7190711222590824, "grad_norm": 0.575464129447937, "learning_rate": 5.890733662262649e-07, "loss": 0.0294, "step": 212460 }, { "epoch": 1.7191520349542844, "grad_norm": 0.5172253251075745, "learning_rate": 5.887409077046064e-07, "loss": 0.0219, "step": 212470 }, { "epoch": 1.7192329476494863, "grad_norm": 0.2664695382118225, "learning_rate": 5.884085371561965e-07, "loss": 0.0181, "step": 212480 }, { "epoch": 1.719313860344688, "grad_norm": 0.2829267680644989, "learning_rate": 5.880762545876678e-07, "loss": 0.0174, "step": 212490 }, { "epoch": 1.71939477303989, "grad_norm": 0.03065512888133526, "learning_rate": 5.877440600056416e-07, "loss": 0.0149, "step": 212500 }, { "epoch": 1.719475685735092, "grad_norm": 0.4809228181838989, "learning_rate": 5.874119534167466e-07, "loss": 0.0166, "step": 212510 }, { "epoch": 1.7195565984302936, "grad_norm": 0.4210875332355499, "learning_rate": 5.870799348276046e-07, "loss": 0.026, "step": 212520 }, { "epoch": 1.7196375111254956, "grad_norm": 0.4539072811603546, "learning_rate": 5.867480042448376e-07, "loss": 0.0154, "step": 212530 }, { "epoch": 1.7197184238206975, "grad_norm": 0.33523252606391907, "learning_rate": 5.864161616750647e-07, "loss": 0.0322, "step": 212540 }, { "epoch": 1.7197993365158992, "grad_norm": 0.27303192019462585, "learning_rate": 5.860844071249033e-07, "loss": 0.015, "step": 212550 }, { "epoch": 1.7198802492111014, "grad_norm": 0.3284359276294708, "learning_rate": 5.857527406009711e-07, "loss": 0.0136, "step": 212560 }, { "epoch": 1.719961161906303, "grad_norm": 0.22267360985279083, "learning_rate": 5.854211621098815e-07, "loss": 0.0181, "step": 212570 }, { "epoch": 1.7200420746015048, "grad_norm": 0.09610728919506073, "learning_rate": 5.850896716582466e-07, "loss": 0.0169, "step": 212580 }, { "epoch": 1.720122987296707, "grad_norm": 0.13059185445308685, "learning_rate": 5.847582692526787e-07, "loss": 0.0137, "step": 212590 }, { "epoch": 1.7202038999919087, "grad_norm": 1.6220402717590332, "learning_rate": 5.844269548997855e-07, "loss": 0.0312, "step": 212600 }, { "epoch": 1.7202848126871106, "grad_norm": 0.2487754225730896, "learning_rate": 5.840957286061744e-07, "loss": 0.043, "step": 212610 }, { "epoch": 1.7203657253823126, "grad_norm": 0.25724363327026367, "learning_rate": 5.837645903784534e-07, "loss": 0.0174, "step": 212620 }, { "epoch": 1.7204466380775143, "grad_norm": 0.21453380584716797, "learning_rate": 5.834335402232239e-07, "loss": 0.0106, "step": 212630 }, { "epoch": 1.7205275507727162, "grad_norm": 0.2488163411617279, "learning_rate": 5.831025781470873e-07, "loss": 0.0202, "step": 212640 }, { "epoch": 1.7206084634679182, "grad_norm": 0.6349210143089294, "learning_rate": 5.827717041566472e-07, "loss": 0.0183, "step": 212650 }, { "epoch": 1.72068937616312, "grad_norm": 0.41914936900138855, "learning_rate": 5.824409182584994e-07, "loss": 0.0168, "step": 212660 }, { "epoch": 1.7207702888583218, "grad_norm": 0.42138800024986267, "learning_rate": 5.821102204592411e-07, "loss": 0.0212, "step": 212670 }, { "epoch": 1.7208512015535238, "grad_norm": 0.17923088371753693, "learning_rate": 5.817796107654683e-07, "loss": 0.0218, "step": 212680 }, { "epoch": 1.7209321142487255, "grad_norm": 0.6207237243652344, "learning_rate": 5.814490891837743e-07, "loss": 0.0227, "step": 212690 }, { "epoch": 1.7210130269439277, "grad_norm": 0.5911434888839722, "learning_rate": 5.811186557207499e-07, "loss": 0.0227, "step": 212700 }, { "epoch": 1.7210939396391294, "grad_norm": 0.2998337149620056, "learning_rate": 5.807883103829854e-07, "loss": 0.0115, "step": 212710 }, { "epoch": 1.721174852334331, "grad_norm": 0.4051804542541504, "learning_rate": 5.804580531770682e-07, "loss": 0.0225, "step": 212720 }, { "epoch": 1.7212557650295333, "grad_norm": 0.618413507938385, "learning_rate": 5.801278841095858e-07, "loss": 0.0194, "step": 212730 }, { "epoch": 1.721336677724735, "grad_norm": 0.4091450273990631, "learning_rate": 5.797978031871216e-07, "loss": 0.0161, "step": 212740 }, { "epoch": 1.721417590419937, "grad_norm": 0.4547711908817291, "learning_rate": 5.794678104162588e-07, "loss": 0.0133, "step": 212750 }, { "epoch": 1.7214985031151389, "grad_norm": 0.17224323749542236, "learning_rate": 5.791379058035784e-07, "loss": 0.0132, "step": 212760 }, { "epoch": 1.7215794158103406, "grad_norm": 0.2965056300163269, "learning_rate": 5.788080893556597e-07, "loss": 0.0143, "step": 212770 }, { "epoch": 1.7216603285055425, "grad_norm": 0.4420844614505768, "learning_rate": 5.784783610790795e-07, "loss": 0.0201, "step": 212780 }, { "epoch": 1.7217412412007445, "grad_norm": 0.3590445816516876, "learning_rate": 5.781487209804148e-07, "loss": 0.02, "step": 212790 }, { "epoch": 1.7218221538959462, "grad_norm": 0.36844220757484436, "learning_rate": 5.778191690662388e-07, "loss": 0.0117, "step": 212800 }, { "epoch": 1.7219030665911481, "grad_norm": 0.2805519700050354, "learning_rate": 5.774897053431238e-07, "loss": 0.0221, "step": 212810 }, { "epoch": 1.72198397928635, "grad_norm": 0.45981860160827637, "learning_rate": 5.771603298176403e-07, "loss": 0.0106, "step": 212820 }, { "epoch": 1.7220648919815518, "grad_norm": 0.23501801490783691, "learning_rate": 5.768310424963558e-07, "loss": 0.0191, "step": 212830 }, { "epoch": 1.722145804676754, "grad_norm": 0.3444318175315857, "learning_rate": 5.76501843385841e-07, "loss": 0.0246, "step": 212840 }, { "epoch": 1.7222267173719557, "grad_norm": 0.39276090264320374, "learning_rate": 5.761727324926569e-07, "loss": 0.0176, "step": 212850 }, { "epoch": 1.7223076300671576, "grad_norm": 0.2683981657028198, "learning_rate": 5.758437098233677e-07, "loss": 0.013, "step": 212860 }, { "epoch": 1.7223885427623595, "grad_norm": 0.7093353867530823, "learning_rate": 5.755147753845369e-07, "loss": 0.0142, "step": 212870 }, { "epoch": 1.7224694554575612, "grad_norm": 0.31221434473991394, "learning_rate": 5.751859291827244e-07, "loss": 0.0116, "step": 212880 }, { "epoch": 1.7225503681527632, "grad_norm": 0.9307655096054077, "learning_rate": 5.748571712244855e-07, "loss": 0.03, "step": 212890 }, { "epoch": 1.7226312808479651, "grad_norm": 0.3895930349826813, "learning_rate": 5.745285015163792e-07, "loss": 0.0266, "step": 212900 }, { "epoch": 1.7227121935431668, "grad_norm": 0.3692934215068817, "learning_rate": 5.741999200649595e-07, "loss": 0.021, "step": 212910 }, { "epoch": 1.7227931062383688, "grad_norm": 0.18754830956459045, "learning_rate": 5.738714268767792e-07, "loss": 0.0204, "step": 212920 }, { "epoch": 1.7228740189335707, "grad_norm": 0.0976623073220253, "learning_rate": 5.735430219583893e-07, "loss": 0.0139, "step": 212930 }, { "epoch": 1.7229549316287724, "grad_norm": 0.3645983040332794, "learning_rate": 5.732147053163389e-07, "loss": 0.0282, "step": 212940 }, { "epoch": 1.7230358443239744, "grad_norm": 0.2639550566673279, "learning_rate": 5.728864769571763e-07, "loss": 0.0108, "step": 212950 }, { "epoch": 1.7231167570191763, "grad_norm": 0.34768128395080566, "learning_rate": 5.725583368874466e-07, "loss": 0.0166, "step": 212960 }, { "epoch": 1.723197669714378, "grad_norm": 0.4183620810508728, "learning_rate": 5.72230285113694e-07, "loss": 0.0193, "step": 212970 }, { "epoch": 1.7232785824095802, "grad_norm": 0.5820289254188538, "learning_rate": 5.719023216424613e-07, "loss": 0.014, "step": 212980 }, { "epoch": 1.723359495104782, "grad_norm": 0.6568224430084229, "learning_rate": 5.71574446480288e-07, "loss": 0.0282, "step": 212990 }, { "epoch": 1.7234404077999839, "grad_norm": 0.7510204315185547, "learning_rate": 5.71246659633714e-07, "loss": 0.0292, "step": 213000 }, { "epoch": 1.7235213204951858, "grad_norm": 0.35049837827682495, "learning_rate": 5.709189611092763e-07, "loss": 0.012, "step": 213010 }, { "epoch": 1.7236022331903875, "grad_norm": 0.4769634008407593, "learning_rate": 5.705913509135091e-07, "loss": 0.0171, "step": 213020 }, { "epoch": 1.7236831458855895, "grad_norm": 0.6165713667869568, "learning_rate": 5.702638290529466e-07, "loss": 0.0212, "step": 213030 }, { "epoch": 1.7237640585807914, "grad_norm": 0.35611239075660706, "learning_rate": 5.699363955341203e-07, "loss": 0.0167, "step": 213040 }, { "epoch": 1.7238449712759931, "grad_norm": 0.33483752608299255, "learning_rate": 5.696090503635598e-07, "loss": 0.0165, "step": 213050 }, { "epoch": 1.723925883971195, "grad_norm": 0.5208714604377747, "learning_rate": 5.692817935477945e-07, "loss": 0.0176, "step": 213060 }, { "epoch": 1.724006796666397, "grad_norm": 0.6289610266685486, "learning_rate": 5.689546250933509e-07, "loss": 0.0254, "step": 213070 }, { "epoch": 1.7240877093615987, "grad_norm": 0.4782196283340454, "learning_rate": 5.686275450067513e-07, "loss": 0.0169, "step": 213080 }, { "epoch": 1.7241686220568007, "grad_norm": 0.6188614964485168, "learning_rate": 5.683005532945213e-07, "loss": 0.0178, "step": 213090 }, { "epoch": 1.7242495347520026, "grad_norm": 0.45819923281669617, "learning_rate": 5.679736499631811e-07, "loss": 0.0216, "step": 213100 }, { "epoch": 1.7243304474472043, "grad_norm": 0.0844057947397232, "learning_rate": 5.676468350192499e-07, "loss": 0.0213, "step": 213110 }, { "epoch": 1.7244113601424065, "grad_norm": 0.06280422955751419, "learning_rate": 5.673201084692454e-07, "loss": 0.0142, "step": 213120 }, { "epoch": 1.7244922728376082, "grad_norm": 0.2022300660610199, "learning_rate": 5.669934703196833e-07, "loss": 0.0181, "step": 213130 }, { "epoch": 1.7245731855328101, "grad_norm": 0.6930587887763977, "learning_rate": 5.666669205770787e-07, "loss": 0.023, "step": 213140 }, { "epoch": 1.724654098228012, "grad_norm": 0.4547484815120697, "learning_rate": 5.663404592479421e-07, "loss": 0.016, "step": 213150 }, { "epoch": 1.7247350109232138, "grad_norm": 0.1298740953207016, "learning_rate": 5.660140863387859e-07, "loss": 0.0103, "step": 213160 }, { "epoch": 1.7248159236184157, "grad_norm": 0.5369369983673096, "learning_rate": 5.656878018561185e-07, "loss": 0.0167, "step": 213170 }, { "epoch": 1.7248968363136177, "grad_norm": 0.16212330758571625, "learning_rate": 5.65361605806446e-07, "loss": 0.0175, "step": 213180 }, { "epoch": 1.7249777490088194, "grad_norm": 0.203004390001297, "learning_rate": 5.650354981962747e-07, "loss": 0.012, "step": 213190 }, { "epoch": 1.7250586617040213, "grad_norm": 0.9345111846923828, "learning_rate": 5.647094790321073e-07, "loss": 0.0166, "step": 213200 }, { "epoch": 1.7251395743992233, "grad_norm": 0.5008276700973511, "learning_rate": 5.643835483204463e-07, "loss": 0.0186, "step": 213210 }, { "epoch": 1.725220487094425, "grad_norm": 0.4184180796146393, "learning_rate": 5.640577060677904e-07, "loss": 0.0134, "step": 213220 }, { "epoch": 1.7253013997896272, "grad_norm": 0.20373524725437164, "learning_rate": 5.637319522806411e-07, "loss": 0.0098, "step": 213230 }, { "epoch": 1.7253823124848289, "grad_norm": 0.36508703231811523, "learning_rate": 5.634062869654905e-07, "loss": 0.0243, "step": 213240 }, { "epoch": 1.7254632251800306, "grad_norm": 0.8175627589225769, "learning_rate": 5.630807101288365e-07, "loss": 0.0223, "step": 213250 }, { "epoch": 1.7255441378752328, "grad_norm": 0.2728390097618103, "learning_rate": 5.62755221777172e-07, "loss": 0.0132, "step": 213260 }, { "epoch": 1.7256250505704345, "grad_norm": 0.7023683190345764, "learning_rate": 5.624298219169849e-07, "loss": 0.0204, "step": 213270 }, { "epoch": 1.7257059632656364, "grad_norm": 0.21241171658039093, "learning_rate": 5.621045105547679e-07, "loss": 0.0209, "step": 213280 }, { "epoch": 1.7257868759608384, "grad_norm": 0.22845079004764557, "learning_rate": 5.617792876970091e-07, "loss": 0.014, "step": 213290 }, { "epoch": 1.72586778865604, "grad_norm": 0.30894550681114197, "learning_rate": 5.614541533501905e-07, "loss": 0.0225, "step": 213300 }, { "epoch": 1.725948701351242, "grad_norm": 0.2512347996234894, "learning_rate": 5.611291075207992e-07, "loss": 0.0196, "step": 213310 }, { "epoch": 1.726029614046444, "grad_norm": 0.23278948664665222, "learning_rate": 5.608041502153178e-07, "loss": 0.0127, "step": 213320 }, { "epoch": 1.7261105267416457, "grad_norm": 0.3099290132522583, "learning_rate": 5.604792814402255e-07, "loss": 0.0184, "step": 213330 }, { "epoch": 1.7261914394368476, "grad_norm": 0.6660807132720947, "learning_rate": 5.601545012020021e-07, "loss": 0.0136, "step": 213340 }, { "epoch": 1.7262723521320495, "grad_norm": 0.2714175581932068, "learning_rate": 5.598298095071242e-07, "loss": 0.0232, "step": 213350 }, { "epoch": 1.7263532648272513, "grad_norm": 0.8332483172416687, "learning_rate": 5.595052063620665e-07, "loss": 0.0148, "step": 213360 }, { "epoch": 1.7264341775224534, "grad_norm": 0.15197327733039856, "learning_rate": 5.591806917733039e-07, "loss": 0.018, "step": 213370 }, { "epoch": 1.7265150902176551, "grad_norm": 0.3563579022884369, "learning_rate": 5.588562657473073e-07, "loss": 0.007, "step": 213380 }, { "epoch": 1.7265960029128569, "grad_norm": 0.2369457334280014, "learning_rate": 5.585319282905466e-07, "loss": 0.0181, "step": 213390 }, { "epoch": 1.726676915608059, "grad_norm": 0.05799511447548866, "learning_rate": 5.582076794094904e-07, "loss": 0.0167, "step": 213400 }, { "epoch": 1.7267578283032607, "grad_norm": 0.5046542286872864, "learning_rate": 5.578835191106036e-07, "loss": 0.0261, "step": 213410 }, { "epoch": 1.7268387409984627, "grad_norm": 0.29491281509399414, "learning_rate": 5.575594474003548e-07, "loss": 0.0115, "step": 213420 }, { "epoch": 1.7269196536936646, "grad_norm": 0.6537142992019653, "learning_rate": 5.572354642852034e-07, "loss": 0.024, "step": 213430 }, { "epoch": 1.7270005663888663, "grad_norm": 0.522598922252655, "learning_rate": 5.569115697716104e-07, "loss": 0.0215, "step": 213440 }, { "epoch": 1.7270814790840683, "grad_norm": 0.7921897768974304, "learning_rate": 5.565877638660383e-07, "loss": 0.0238, "step": 213450 }, { "epoch": 1.7271623917792702, "grad_norm": 0.11202681064605713, "learning_rate": 5.562640465749409e-07, "loss": 0.0134, "step": 213460 }, { "epoch": 1.727243304474472, "grad_norm": 0.27856767177581787, "learning_rate": 5.559404179047767e-07, "loss": 0.0177, "step": 213470 }, { "epoch": 1.7273242171696739, "grad_norm": 0.29710471630096436, "learning_rate": 5.556168778620003e-07, "loss": 0.0193, "step": 213480 }, { "epoch": 1.7274051298648758, "grad_norm": 0.1217091754078865, "learning_rate": 5.552934264530607e-07, "loss": 0.0108, "step": 213490 }, { "epoch": 1.7274860425600775, "grad_norm": 0.017435753718018532, "learning_rate": 5.549700636844113e-07, "loss": 0.0131, "step": 213500 }, { "epoch": 1.7275669552552797, "grad_norm": 0.35693004727363586, "learning_rate": 5.546467895625013e-07, "loss": 0.0141, "step": 213510 }, { "epoch": 1.7276478679504814, "grad_norm": 0.8004728555679321, "learning_rate": 5.543236040937744e-07, "loss": 0.0245, "step": 213520 }, { "epoch": 1.7277287806456834, "grad_norm": 0.38227543234825134, "learning_rate": 5.540005072846794e-07, "loss": 0.0219, "step": 213530 }, { "epoch": 1.7278096933408853, "grad_norm": 0.3726901710033417, "learning_rate": 5.536774991416582e-07, "loss": 0.0219, "step": 213540 }, { "epoch": 1.727890606036087, "grad_norm": 0.206150084733963, "learning_rate": 5.533545796711525e-07, "loss": 0.0192, "step": 213550 }, { "epoch": 1.727971518731289, "grad_norm": 0.4451181888580322, "learning_rate": 5.530317488796022e-07, "loss": 0.0109, "step": 213560 }, { "epoch": 1.728052431426491, "grad_norm": 0.008744459599256516, "learning_rate": 5.527090067734459e-07, "loss": 0.0199, "step": 213570 }, { "epoch": 1.7281333441216926, "grad_norm": 0.026689328253269196, "learning_rate": 5.523863533591201e-07, "loss": 0.0062, "step": 213580 }, { "epoch": 1.7282142568168946, "grad_norm": 0.6001180410385132, "learning_rate": 5.520637886430591e-07, "loss": 0.022, "step": 213590 }, { "epoch": 1.7282951695120965, "grad_norm": 0.4052526652812958, "learning_rate": 5.517413126316945e-07, "loss": 0.0235, "step": 213600 }, { "epoch": 1.7283760822072982, "grad_norm": 0.24208411574363708, "learning_rate": 5.514189253314611e-07, "loss": 0.0281, "step": 213610 }, { "epoch": 1.7284569949025002, "grad_norm": 0.38432469964027405, "learning_rate": 5.510966267487844e-07, "loss": 0.0224, "step": 213620 }, { "epoch": 1.728537907597702, "grad_norm": 0.38794225454330444, "learning_rate": 5.507744168900931e-07, "loss": 0.0215, "step": 213630 }, { "epoch": 1.7286188202929038, "grad_norm": 0.543972373008728, "learning_rate": 5.504522957618147e-07, "loss": 0.0191, "step": 213640 }, { "epoch": 1.728699732988106, "grad_norm": 0.4077652096748352, "learning_rate": 5.501302633703698e-07, "loss": 0.0181, "step": 213650 }, { "epoch": 1.7287806456833077, "grad_norm": 0.5811930894851685, "learning_rate": 5.498083197221843e-07, "loss": 0.0202, "step": 213660 }, { "epoch": 1.7288615583785096, "grad_norm": 0.21511198580265045, "learning_rate": 5.494864648236775e-07, "loss": 0.0153, "step": 213670 }, { "epoch": 1.7289424710737116, "grad_norm": 0.27214017510414124, "learning_rate": 5.49164698681266e-07, "loss": 0.0181, "step": 213680 }, { "epoch": 1.7290233837689133, "grad_norm": 0.4443775415420532, "learning_rate": 5.488430213013696e-07, "loss": 0.0254, "step": 213690 }, { "epoch": 1.7291042964641152, "grad_norm": 0.3999631404876709, "learning_rate": 5.485214326904032e-07, "loss": 0.0137, "step": 213700 }, { "epoch": 1.7291852091593172, "grad_norm": 0.11542191356420517, "learning_rate": 5.481999328547771e-07, "loss": 0.0131, "step": 213710 }, { "epoch": 1.729266121854519, "grad_norm": 0.4163447916507721, "learning_rate": 5.478785218009064e-07, "loss": 0.0199, "step": 213720 }, { "epoch": 1.7293470345497208, "grad_norm": 0.3672528564929962, "learning_rate": 5.475571995351997e-07, "loss": 0.021, "step": 213730 }, { "epoch": 1.7294279472449228, "grad_norm": 0.2508798837661743, "learning_rate": 5.472359660640653e-07, "loss": 0.0176, "step": 213740 }, { "epoch": 1.7295088599401245, "grad_norm": 0.26663801074028015, "learning_rate": 5.469148213939091e-07, "loss": 0.0331, "step": 213750 }, { "epoch": 1.7295897726353264, "grad_norm": 0.33100759983062744, "learning_rate": 5.46593765531136e-07, "loss": 0.0212, "step": 213760 }, { "epoch": 1.7296706853305284, "grad_norm": 0.1248847246170044, "learning_rate": 5.462727984821487e-07, "loss": 0.0199, "step": 213770 }, { "epoch": 1.72975159802573, "grad_norm": 0.27025043964385986, "learning_rate": 5.459519202533481e-07, "loss": 0.0103, "step": 213780 }, { "epoch": 1.7298325107209322, "grad_norm": 0.1913624405860901, "learning_rate": 5.456311308511326e-07, "loss": 0.0187, "step": 213790 }, { "epoch": 1.729913423416134, "grad_norm": 0.24831868708133698, "learning_rate": 5.453104302819029e-07, "loss": 0.0157, "step": 213800 }, { "epoch": 1.729994336111336, "grad_norm": 0.1766144335269928, "learning_rate": 5.449898185520507e-07, "loss": 0.0188, "step": 213810 }, { "epoch": 1.7300752488065378, "grad_norm": 0.4656100273132324, "learning_rate": 5.446692956679711e-07, "loss": 0.0138, "step": 213820 }, { "epoch": 1.7301561615017396, "grad_norm": 0.3309519588947296, "learning_rate": 5.443488616360587e-07, "loss": 0.0157, "step": 213830 }, { "epoch": 1.7302370741969415, "grad_norm": 0.15331582725048065, "learning_rate": 5.440285164627013e-07, "loss": 0.0206, "step": 213840 }, { "epoch": 1.7303179868921434, "grad_norm": 0.27660614252090454, "learning_rate": 5.43708260154287e-07, "loss": 0.019, "step": 213850 }, { "epoch": 1.7303988995873452, "grad_norm": 0.44947630167007446, "learning_rate": 5.433880927172053e-07, "loss": 0.0198, "step": 213860 }, { "epoch": 1.730479812282547, "grad_norm": 0.47088050842285156, "learning_rate": 5.430680141578382e-07, "loss": 0.0219, "step": 213870 }, { "epoch": 1.730560724977749, "grad_norm": 0.5498777031898499, "learning_rate": 5.427480244825717e-07, "loss": 0.0213, "step": 213880 }, { "epoch": 1.7306416376729508, "grad_norm": 0.40692856907844543, "learning_rate": 5.424281236977868e-07, "loss": 0.0181, "step": 213890 }, { "epoch": 1.730722550368153, "grad_norm": 0.4159921407699585, "learning_rate": 5.421083118098607e-07, "loss": 0.0178, "step": 213900 }, { "epoch": 1.7308034630633546, "grad_norm": 0.33346495032310486, "learning_rate": 5.417885888251745e-07, "loss": 0.0128, "step": 213910 }, { "epoch": 1.7308843757585564, "grad_norm": 0.38072824478149414, "learning_rate": 5.41468954750104e-07, "loss": 0.0131, "step": 213920 }, { "epoch": 1.7309652884537585, "grad_norm": 0.5465502738952637, "learning_rate": 5.411494095910208e-07, "loss": 0.0197, "step": 213930 }, { "epoch": 1.7310462011489602, "grad_norm": 0.19901679456233978, "learning_rate": 5.40829953354301e-07, "loss": 0.0105, "step": 213940 }, { "epoch": 1.7311271138441622, "grad_norm": 0.5081601142883301, "learning_rate": 5.405105860463133e-07, "loss": 0.0158, "step": 213950 }, { "epoch": 1.7312080265393641, "grad_norm": 0.3169511556625366, "learning_rate": 5.401913076734283e-07, "loss": 0.0209, "step": 213960 }, { "epoch": 1.7312889392345658, "grad_norm": 0.6828960180282593, "learning_rate": 5.398721182420119e-07, "loss": 0.0131, "step": 213970 }, { "epoch": 1.7313698519297678, "grad_norm": 0.1576712280511856, "learning_rate": 5.395530177584307e-07, "loss": 0.0125, "step": 213980 }, { "epoch": 1.7314507646249697, "grad_norm": 0.3768405318260193, "learning_rate": 5.392340062290486e-07, "loss": 0.0172, "step": 213990 }, { "epoch": 1.7315316773201714, "grad_norm": 0.29105857014656067, "learning_rate": 5.389150836602264e-07, "loss": 0.0222, "step": 214000 }, { "epoch": 1.7316125900153734, "grad_norm": 0.24482101202011108, "learning_rate": 5.385962500583248e-07, "loss": 0.0235, "step": 214010 }, { "epoch": 1.7316935027105753, "grad_norm": 0.4499695897102356, "learning_rate": 5.382775054297041e-07, "loss": 0.0141, "step": 214020 }, { "epoch": 1.731774415405777, "grad_norm": 0.5856263637542725, "learning_rate": 5.379588497807181e-07, "loss": 0.0277, "step": 214030 }, { "epoch": 1.7318553281009792, "grad_norm": 0.43446671962738037, "learning_rate": 5.37640283117723e-07, "loss": 0.0247, "step": 214040 }, { "epoch": 1.731936240796181, "grad_norm": 0.5101196765899658, "learning_rate": 5.37321805447073e-07, "loss": 0.0225, "step": 214050 }, { "epoch": 1.7320171534913826, "grad_norm": 0.616652250289917, "learning_rate": 5.370034167751182e-07, "loss": 0.0147, "step": 214060 }, { "epoch": 1.7320980661865848, "grad_norm": 0.26093944907188416, "learning_rate": 5.366851171082072e-07, "loss": 0.017, "step": 214070 }, { "epoch": 1.7321789788817865, "grad_norm": 0.15150286257266998, "learning_rate": 5.36366906452691e-07, "loss": 0.0266, "step": 214080 }, { "epoch": 1.7322598915769885, "grad_norm": 0.46152910590171814, "learning_rate": 5.360487848149115e-07, "loss": 0.0172, "step": 214090 }, { "epoch": 1.7323408042721904, "grad_norm": 0.49379393458366394, "learning_rate": 5.357307522012168e-07, "loss": 0.0159, "step": 214100 }, { "epoch": 1.7324217169673921, "grad_norm": 0.5801241993904114, "learning_rate": 5.354128086179483e-07, "loss": 0.0182, "step": 214110 }, { "epoch": 1.732502629662594, "grad_norm": 0.16358263790607452, "learning_rate": 5.350949540714439e-07, "loss": 0.0201, "step": 214120 }, { "epoch": 1.732583542357796, "grad_norm": 0.181136816740036, "learning_rate": 5.347771885680464e-07, "loss": 0.0275, "step": 214130 }, { "epoch": 1.7326644550529977, "grad_norm": 0.6382445693016052, "learning_rate": 5.344595121140911e-07, "loss": 0.0232, "step": 214140 }, { "epoch": 1.7327453677481996, "grad_norm": 0.13843828439712524, "learning_rate": 5.341419247159136e-07, "loss": 0.0243, "step": 214150 }, { "epoch": 1.7328262804434016, "grad_norm": 0.4327358305454254, "learning_rate": 5.338244263798475e-07, "loss": 0.0174, "step": 214160 }, { "epoch": 1.7329071931386033, "grad_norm": 0.18751047551631927, "learning_rate": 5.335070171122248e-07, "loss": 0.0143, "step": 214170 }, { "epoch": 1.7329881058338055, "grad_norm": 0.09354974329471588, "learning_rate": 5.331896969193756e-07, "loss": 0.0179, "step": 214180 }, { "epoch": 1.7330690185290072, "grad_norm": 0.33759090304374695, "learning_rate": 5.328724658076279e-07, "loss": 0.0097, "step": 214190 }, { "epoch": 1.7331499312242091, "grad_norm": 0.601440966129303, "learning_rate": 5.325553237833075e-07, "loss": 0.0245, "step": 214200 }, { "epoch": 1.733230843919411, "grad_norm": 0.14146751165390015, "learning_rate": 5.322382708527418e-07, "loss": 0.0153, "step": 214210 }, { "epoch": 1.7333117566146128, "grad_norm": 0.21462193131446838, "learning_rate": 5.319213070222506e-07, "loss": 0.0182, "step": 214220 }, { "epoch": 1.7333926693098147, "grad_norm": 0.3880365788936615, "learning_rate": 5.316044322981561e-07, "loss": 0.0103, "step": 214230 }, { "epoch": 1.7334735820050167, "grad_norm": 0.4679574966430664, "learning_rate": 5.312876466867795e-07, "loss": 0.0149, "step": 214240 }, { "epoch": 1.7335544947002184, "grad_norm": 0.3569199740886688, "learning_rate": 5.30970950194436e-07, "loss": 0.0201, "step": 214250 }, { "epoch": 1.7336354073954203, "grad_norm": 0.42981207370758057, "learning_rate": 5.306543428274413e-07, "loss": 0.0168, "step": 214260 }, { "epoch": 1.7337163200906223, "grad_norm": 0.36030086874961853, "learning_rate": 5.303378245921131e-07, "loss": 0.009, "step": 214270 }, { "epoch": 1.733797232785824, "grad_norm": 0.28219425678253174, "learning_rate": 5.300213954947586e-07, "loss": 0.0151, "step": 214280 }, { "epoch": 1.733878145481026, "grad_norm": 0.5474379062652588, "learning_rate": 5.297050555416917e-07, "loss": 0.0156, "step": 214290 }, { "epoch": 1.7339590581762279, "grad_norm": 0.4738869071006775, "learning_rate": 5.293888047392215e-07, "loss": 0.0183, "step": 214300 }, { "epoch": 1.7340399708714296, "grad_norm": 0.30224019289016724, "learning_rate": 5.290726430936516e-07, "loss": 0.0163, "step": 214310 }, { "epoch": 1.7341208835666317, "grad_norm": 0.6293721795082092, "learning_rate": 5.287565706112907e-07, "loss": 0.0127, "step": 214320 }, { "epoch": 1.7342017962618335, "grad_norm": 0.42146241664886475, "learning_rate": 5.284405872984405e-07, "loss": 0.0162, "step": 214330 }, { "epoch": 1.7342827089570354, "grad_norm": 0.24970926344394684, "learning_rate": 5.281246931614026e-07, "loss": 0.0144, "step": 214340 }, { "epoch": 1.7343636216522373, "grad_norm": 0.36963093280792236, "learning_rate": 5.278088882064775e-07, "loss": 0.0131, "step": 214350 }, { "epoch": 1.734444534347439, "grad_norm": 0.12659117579460144, "learning_rate": 5.274931724399629e-07, "loss": 0.0163, "step": 214360 }, { "epoch": 1.734525447042641, "grad_norm": 0.2771291136741638, "learning_rate": 5.27177545868155e-07, "loss": 0.0184, "step": 214370 }, { "epoch": 1.734606359737843, "grad_norm": 0.4444602131843567, "learning_rate": 5.26862008497348e-07, "loss": 0.0194, "step": 214380 }, { "epoch": 1.7346872724330447, "grad_norm": 0.03538667783141136, "learning_rate": 5.265465603338355e-07, "loss": 0.0158, "step": 214390 }, { "epoch": 1.7347681851282466, "grad_norm": 0.19887764751911163, "learning_rate": 5.262312013839083e-07, "loss": 0.0186, "step": 214400 }, { "epoch": 1.7348490978234485, "grad_norm": 0.2396003007888794, "learning_rate": 5.259159316538548e-07, "loss": 0.0182, "step": 214410 }, { "epoch": 1.7349300105186503, "grad_norm": 0.402128666639328, "learning_rate": 5.256007511499616e-07, "loss": 0.0122, "step": 214420 }, { "epoch": 1.7350109232138524, "grad_norm": 0.1517850011587143, "learning_rate": 5.252856598785177e-07, "loss": 0.0099, "step": 214430 }, { "epoch": 1.7350918359090541, "grad_norm": 0.28507012128829956, "learning_rate": 5.249706578458041e-07, "loss": 0.0219, "step": 214440 }, { "epoch": 1.7351727486042559, "grad_norm": 0.3851182460784912, "learning_rate": 5.246557450581024e-07, "loss": 0.0166, "step": 214450 }, { "epoch": 1.735253661299458, "grad_norm": 0.3467448651790619, "learning_rate": 5.243409215216961e-07, "loss": 0.013, "step": 214460 }, { "epoch": 1.7353345739946597, "grad_norm": 0.5690191388130188, "learning_rate": 5.240261872428603e-07, "loss": 0.0205, "step": 214470 }, { "epoch": 1.7354154866898617, "grad_norm": 0.1375429481267929, "learning_rate": 5.237115422278721e-07, "loss": 0.0239, "step": 214480 }, { "epoch": 1.7354963993850636, "grad_norm": 0.1226353719830513, "learning_rate": 5.23396986483008e-07, "loss": 0.0151, "step": 214490 }, { "epoch": 1.7355773120802653, "grad_norm": 0.4208415448665619, "learning_rate": 5.230825200145412e-07, "loss": 0.0178, "step": 214500 }, { "epoch": 1.7356582247754673, "grad_norm": 0.7004709839820862, "learning_rate": 5.227681428287417e-07, "loss": 0.0293, "step": 214510 }, { "epoch": 1.7357391374706692, "grad_norm": 0.3288619816303253, "learning_rate": 5.224538549318803e-07, "loss": 0.0129, "step": 214520 }, { "epoch": 1.735820050165871, "grad_norm": 0.31806743144989014, "learning_rate": 5.221396563302239e-07, "loss": 0.015, "step": 214530 }, { "epoch": 1.7359009628610729, "grad_norm": 0.20310260355472565, "learning_rate": 5.218255470300387e-07, "loss": 0.0153, "step": 214540 }, { "epoch": 1.7359818755562748, "grad_norm": 0.2536528706550598, "learning_rate": 5.215115270375892e-07, "loss": 0.0184, "step": 214550 }, { "epoch": 1.7360627882514765, "grad_norm": 0.21998579800128937, "learning_rate": 5.211975963591381e-07, "loss": 0.0237, "step": 214560 }, { "epoch": 1.7361437009466787, "grad_norm": 0.37613654136657715, "learning_rate": 5.208837550009455e-07, "loss": 0.0238, "step": 214570 }, { "epoch": 1.7362246136418804, "grad_norm": 0.40140920877456665, "learning_rate": 5.205700029692707e-07, "loss": 0.0238, "step": 214580 }, { "epoch": 1.7363055263370821, "grad_norm": 0.2539859712123871, "learning_rate": 5.202563402703703e-07, "loss": 0.0161, "step": 214590 }, { "epoch": 1.7363864390322843, "grad_norm": 0.26792219281196594, "learning_rate": 5.199427669105006e-07, "loss": 0.0197, "step": 214600 }, { "epoch": 1.736467351727486, "grad_norm": 0.039521459490060806, "learning_rate": 5.196292828959143e-07, "loss": 0.0162, "step": 214610 }, { "epoch": 1.736548264422688, "grad_norm": 0.5949604511260986, "learning_rate": 5.193158882328636e-07, "loss": 0.0195, "step": 214620 }, { "epoch": 1.7366291771178899, "grad_norm": 0.3876340687274933, "learning_rate": 5.190025829275986e-07, "loss": 0.025, "step": 214630 }, { "epoch": 1.7367100898130916, "grad_norm": 0.5010242462158203, "learning_rate": 5.186893669863657e-07, "loss": 0.0211, "step": 214640 }, { "epoch": 1.7367910025082935, "grad_norm": 0.4105589687824249, "learning_rate": 5.183762404154152e-07, "loss": 0.0175, "step": 214650 }, { "epoch": 1.7368719152034955, "grad_norm": 0.8346501588821411, "learning_rate": 5.180632032209887e-07, "loss": 0.023, "step": 214660 }, { "epoch": 1.7369528278986972, "grad_norm": 0.41703560948371887, "learning_rate": 5.177502554093284e-07, "loss": 0.0259, "step": 214670 }, { "epoch": 1.7370337405938991, "grad_norm": 0.3078143000602722, "learning_rate": 5.17437396986678e-07, "loss": 0.015, "step": 214680 }, { "epoch": 1.737114653289101, "grad_norm": 0.7263699173927307, "learning_rate": 5.171246279592757e-07, "loss": 0.0205, "step": 214690 }, { "epoch": 1.7371955659843028, "grad_norm": 0.4111878573894501, "learning_rate": 5.168119483333589e-07, "loss": 0.0174, "step": 214700 }, { "epoch": 1.737276478679505, "grad_norm": 0.30746984481811523, "learning_rate": 5.164993581151629e-07, "loss": 0.0314, "step": 214710 }, { "epoch": 1.7373573913747067, "grad_norm": 0.41489243507385254, "learning_rate": 5.161868573109224e-07, "loss": 0.019, "step": 214720 }, { "epoch": 1.7374383040699086, "grad_norm": 0.3241032361984253, "learning_rate": 5.158744459268695e-07, "loss": 0.0096, "step": 214730 }, { "epoch": 1.7375192167651106, "grad_norm": 0.24181069433689117, "learning_rate": 5.155621239692343e-07, "loss": 0.035, "step": 214740 }, { "epoch": 1.7376001294603123, "grad_norm": 0.4140608012676239, "learning_rate": 5.152498914442456e-07, "loss": 0.0226, "step": 214750 }, { "epoch": 1.7376810421555142, "grad_norm": 0.3258456587791443, "learning_rate": 5.149377483581297e-07, "loss": 0.0255, "step": 214760 }, { "epoch": 1.7377619548507162, "grad_norm": 0.3252216875553131, "learning_rate": 5.146256947171124e-07, "loss": 0.0202, "step": 214770 }, { "epoch": 1.7378428675459179, "grad_norm": 0.21320296823978424, "learning_rate": 5.143137305274165e-07, "loss": 0.0208, "step": 214780 }, { "epoch": 1.7379237802411198, "grad_norm": 0.18593449890613556, "learning_rate": 5.140018557952631e-07, "loss": 0.0137, "step": 214790 }, { "epoch": 1.7380046929363218, "grad_norm": 0.5334717631340027, "learning_rate": 5.136900705268727e-07, "loss": 0.0156, "step": 214800 }, { "epoch": 1.7380856056315235, "grad_norm": 0.3464302122592926, "learning_rate": 5.133783747284627e-07, "loss": 0.0183, "step": 214810 }, { "epoch": 1.7381665183267254, "grad_norm": 0.3554430902004242, "learning_rate": 5.130667684062496e-07, "loss": 0.0231, "step": 214820 }, { "epoch": 1.7382474310219274, "grad_norm": 0.24918346107006073, "learning_rate": 5.127552515664464e-07, "loss": 0.0165, "step": 214830 }, { "epoch": 1.738328343717129, "grad_norm": 0.4380670487880707, "learning_rate": 5.124438242152686e-07, "loss": 0.0188, "step": 214840 }, { "epoch": 1.7384092564123312, "grad_norm": 0.5183207392692566, "learning_rate": 5.121324863589239e-07, "loss": 0.0198, "step": 214850 }, { "epoch": 1.738490169107533, "grad_norm": 0.35391896963119507, "learning_rate": 5.118212380036219e-07, "loss": 0.018, "step": 214860 }, { "epoch": 1.738571081802735, "grad_norm": 0.5218855738639832, "learning_rate": 5.115100791555711e-07, "loss": 0.0172, "step": 214870 }, { "epoch": 1.7386519944979368, "grad_norm": 0.13510441780090332, "learning_rate": 5.111990098209768e-07, "loss": 0.0241, "step": 214880 }, { "epoch": 1.7387329071931386, "grad_norm": 0.4778693914413452, "learning_rate": 5.108880300060403e-07, "loss": 0.0315, "step": 214890 }, { "epoch": 1.7388138198883405, "grad_norm": 0.2536948323249817, "learning_rate": 5.105771397169662e-07, "loss": 0.0132, "step": 214900 }, { "epoch": 1.7388947325835424, "grad_norm": 0.004062039777636528, "learning_rate": 5.102663389599532e-07, "loss": 0.0153, "step": 214910 }, { "epoch": 1.7389756452787442, "grad_norm": 0.34472930431365967, "learning_rate": 5.099556277411998e-07, "loss": 0.0166, "step": 214920 }, { "epoch": 1.739056557973946, "grad_norm": 0.36164727807044983, "learning_rate": 5.096450060669028e-07, "loss": 0.0125, "step": 214930 }, { "epoch": 1.739137470669148, "grad_norm": 0.1694585084915161, "learning_rate": 5.093344739432566e-07, "loss": 0.0092, "step": 214940 }, { "epoch": 1.7392183833643498, "grad_norm": 0.4434588551521301, "learning_rate": 5.090240313764539e-07, "loss": 0.0175, "step": 214950 }, { "epoch": 1.7392992960595517, "grad_norm": 0.31020745635032654, "learning_rate": 5.087136783726859e-07, "loss": 0.0163, "step": 214960 }, { "epoch": 1.7393802087547536, "grad_norm": 0.1757546216249466, "learning_rate": 5.084034149381423e-07, "loss": 0.0187, "step": 214970 }, { "epoch": 1.7394611214499553, "grad_norm": 0.44894617795944214, "learning_rate": 5.080932410790107e-07, "loss": 0.0213, "step": 214980 }, { "epoch": 1.7395420341451575, "grad_norm": 0.21083006262779236, "learning_rate": 5.077831568014763e-07, "loss": 0.0175, "step": 214990 }, { "epoch": 1.7396229468403592, "grad_norm": 0.5765217542648315, "learning_rate": 5.07473162111723e-07, "loss": 0.019, "step": 215000 }, { "epoch": 1.7397038595355612, "grad_norm": 0.562735915184021, "learning_rate": 5.071632570159335e-07, "loss": 0.0312, "step": 215010 }, { "epoch": 1.739784772230763, "grad_norm": 0.26563000679016113, "learning_rate": 5.068534415202886e-07, "loss": 0.0269, "step": 215020 }, { "epoch": 1.7398656849259648, "grad_norm": 0.21465125679969788, "learning_rate": 5.065437156309649e-07, "loss": 0.0181, "step": 215030 }, { "epoch": 1.7399465976211668, "grad_norm": 0.5206040143966675, "learning_rate": 5.062340793541425e-07, "loss": 0.045, "step": 215040 }, { "epoch": 1.7400275103163687, "grad_norm": 0.4990500807762146, "learning_rate": 5.059245326959927e-07, "loss": 0.0225, "step": 215050 }, { "epoch": 1.7401084230115704, "grad_norm": 0.2730879485607147, "learning_rate": 5.05615075662692e-07, "loss": 0.0175, "step": 215060 }, { "epoch": 1.7401893357067724, "grad_norm": 0.43759235739707947, "learning_rate": 5.053057082604113e-07, "loss": 0.0206, "step": 215070 }, { "epoch": 1.7402702484019743, "grad_norm": 0.549706220626831, "learning_rate": 5.049964304953176e-07, "loss": 0.0141, "step": 215080 }, { "epoch": 1.740351161097176, "grad_norm": 0.16184085607528687, "learning_rate": 5.046872423735816e-07, "loss": 0.0133, "step": 215090 }, { "epoch": 1.7404320737923782, "grad_norm": 0.3463270962238312, "learning_rate": 5.043781439013695e-07, "loss": 0.013, "step": 215100 }, { "epoch": 1.74051298648758, "grad_norm": 0.20487840473651886, "learning_rate": 5.040691350848431e-07, "loss": 0.0183, "step": 215110 }, { "epoch": 1.7405938991827816, "grad_norm": 0.5399836897850037, "learning_rate": 5.037602159301668e-07, "loss": 0.0176, "step": 215120 }, { "epoch": 1.7406748118779838, "grad_norm": 0.2686760425567627, "learning_rate": 5.034513864435014e-07, "loss": 0.0147, "step": 215130 }, { "epoch": 1.7407557245731855, "grad_norm": 0.2506863474845886, "learning_rate": 5.031426466310058e-07, "loss": 0.0154, "step": 215140 }, { "epoch": 1.7408366372683874, "grad_norm": 0.12237724661827087, "learning_rate": 5.02833996498836e-07, "loss": 0.012, "step": 215150 }, { "epoch": 1.7409175499635894, "grad_norm": 0.3108687400817871, "learning_rate": 5.02525436053149e-07, "loss": 0.0238, "step": 215160 }, { "epoch": 1.740998462658791, "grad_norm": 0.17754791676998138, "learning_rate": 5.022169653000968e-07, "loss": 0.0157, "step": 215170 }, { "epoch": 1.741079375353993, "grad_norm": 0.7996717095375061, "learning_rate": 5.019085842458327e-07, "loss": 0.0107, "step": 215180 }, { "epoch": 1.741160288049195, "grad_norm": 0.30568721890449524, "learning_rate": 5.016002928965058e-07, "loss": 0.0244, "step": 215190 }, { "epoch": 1.7412412007443967, "grad_norm": 0.6688517332077026, "learning_rate": 5.012920912582641e-07, "loss": 0.0211, "step": 215200 }, { "epoch": 1.7413221134395986, "grad_norm": 0.18237143754959106, "learning_rate": 5.00983979337255e-07, "loss": 0.0181, "step": 215210 }, { "epoch": 1.7414030261348006, "grad_norm": 0.16345354914665222, "learning_rate": 5.006759571396214e-07, "loss": 0.0113, "step": 215220 }, { "epoch": 1.7414839388300023, "grad_norm": 0.2219846099615097, "learning_rate": 5.003680246715092e-07, "loss": 0.0112, "step": 215230 }, { "epoch": 1.7415648515252045, "grad_norm": 0.6719872355461121, "learning_rate": 5.00060181939056e-07, "loss": 0.0298, "step": 215240 }, { "epoch": 1.7416457642204062, "grad_norm": 0.004019506741315126, "learning_rate": 4.99752428948403e-07, "loss": 0.0159, "step": 215250 }, { "epoch": 1.741726676915608, "grad_norm": 0.6738337278366089, "learning_rate": 4.994447657056889e-07, "loss": 0.0225, "step": 215260 }, { "epoch": 1.74180758961081, "grad_norm": 0.28645071387290955, "learning_rate": 4.991371922170457e-07, "loss": 0.0144, "step": 215270 }, { "epoch": 1.7418885023060118, "grad_norm": 0.19797571003437042, "learning_rate": 4.988297084886107e-07, "loss": 0.0151, "step": 215280 }, { "epoch": 1.7419694150012137, "grad_norm": 0.3835645914077759, "learning_rate": 4.98522314526515e-07, "loss": 0.0156, "step": 215290 }, { "epoch": 1.7420503276964157, "grad_norm": 0.2943435311317444, "learning_rate": 4.982150103368876e-07, "loss": 0.0151, "step": 215300 }, { "epoch": 1.7421312403916174, "grad_norm": 0.2714850902557373, "learning_rate": 4.979077959258588e-07, "loss": 0.0145, "step": 215310 }, { "epoch": 1.7422121530868193, "grad_norm": 0.15202215313911438, "learning_rate": 4.976006712995546e-07, "loss": 0.0162, "step": 215320 }, { "epoch": 1.7422930657820213, "grad_norm": 0.6149230003356934, "learning_rate": 4.972936364641001e-07, "loss": 0.016, "step": 215330 }, { "epoch": 1.742373978477223, "grad_norm": 0.537624180316925, "learning_rate": 4.969866914256182e-07, "loss": 0.0182, "step": 215340 }, { "epoch": 1.742454891172425, "grad_norm": 0.09842866659164429, "learning_rate": 4.966798361902303e-07, "loss": 0.0137, "step": 215350 }, { "epoch": 1.7425358038676269, "grad_norm": 0.4620705246925354, "learning_rate": 4.963730707640568e-07, "loss": 0.0335, "step": 215360 }, { "epoch": 1.7426167165628286, "grad_norm": 0.40285584330558777, "learning_rate": 4.96066395153214e-07, "loss": 0.013, "step": 215370 }, { "epoch": 1.7426976292580307, "grad_norm": 0.5011897683143616, "learning_rate": 4.957598093638194e-07, "loss": 0.0146, "step": 215380 }, { "epoch": 1.7427785419532325, "grad_norm": 0.5712392926216125, "learning_rate": 4.954533134019862e-07, "loss": 0.0314, "step": 215390 }, { "epoch": 1.7428594546484344, "grad_norm": 0.5775241851806641, "learning_rate": 4.951469072738274e-07, "loss": 0.0175, "step": 215400 }, { "epoch": 1.7429403673436363, "grad_norm": 0.38926205039024353, "learning_rate": 4.948405909854526e-07, "loss": 0.0105, "step": 215410 }, { "epoch": 1.743021280038838, "grad_norm": 0.2718609869480133, "learning_rate": 4.945343645429729e-07, "loss": 0.0193, "step": 215420 }, { "epoch": 1.74310219273404, "grad_norm": 0.11170589923858643, "learning_rate": 4.94228227952493e-07, "loss": 0.0148, "step": 215430 }, { "epoch": 1.743183105429242, "grad_norm": 0.49074745178222656, "learning_rate": 4.939221812201179e-07, "loss": 0.0187, "step": 215440 }, { "epoch": 1.7432640181244436, "grad_norm": 0.5675808787345886, "learning_rate": 4.936162243519537e-07, "loss": 0.0363, "step": 215450 }, { "epoch": 1.7433449308196456, "grad_norm": 0.12207978963851929, "learning_rate": 4.933103573540993e-07, "loss": 0.0152, "step": 215460 }, { "epoch": 1.7434258435148475, "grad_norm": 0.3928716480731964, "learning_rate": 4.930045802326566e-07, "loss": 0.0265, "step": 215470 }, { "epoch": 1.7435067562100492, "grad_norm": 0.2799692451953888, "learning_rate": 4.926988929937238e-07, "loss": 0.0244, "step": 215480 }, { "epoch": 1.7435876689052512, "grad_norm": 0.25987350940704346, "learning_rate": 4.923932956433941e-07, "loss": 0.0329, "step": 215490 }, { "epoch": 1.7436685816004531, "grad_norm": 0.3343239426612854, "learning_rate": 4.920877881877651e-07, "loss": 0.0155, "step": 215500 }, { "epoch": 1.7437494942956548, "grad_norm": 0.2809058725833893, "learning_rate": 4.917823706329295e-07, "loss": 0.0164, "step": 215510 }, { "epoch": 1.743830406990857, "grad_norm": 0.40230366587638855, "learning_rate": 4.914770429849758e-07, "loss": 0.0275, "step": 215520 }, { "epoch": 1.7439113196860587, "grad_norm": 0.732601523399353, "learning_rate": 4.911718052499947e-07, "loss": 0.0173, "step": 215530 }, { "epoch": 1.7439922323812607, "grad_norm": 0.2199828177690506, "learning_rate": 4.908666574340742e-07, "loss": 0.0229, "step": 215540 }, { "epoch": 1.7440731450764626, "grad_norm": 0.500042200088501, "learning_rate": 4.905615995432983e-07, "loss": 0.02, "step": 215550 }, { "epoch": 1.7441540577716643, "grad_norm": 0.16991375386714935, "learning_rate": 4.90256631583752e-07, "loss": 0.0346, "step": 215560 }, { "epoch": 1.7442349704668663, "grad_norm": 0.22008872032165527, "learning_rate": 4.899517535615167e-07, "loss": 0.0094, "step": 215570 }, { "epoch": 1.7443158831620682, "grad_norm": 0.3509450852870941, "learning_rate": 4.896469654826725e-07, "loss": 0.0138, "step": 215580 }, { "epoch": 1.74439679585727, "grad_norm": 0.27226993441581726, "learning_rate": 4.893422673532972e-07, "loss": 0.0141, "step": 215590 }, { "epoch": 1.7444777085524719, "grad_norm": 0.26680707931518555, "learning_rate": 4.890376591794677e-07, "loss": 0.0166, "step": 215600 }, { "epoch": 1.7445586212476738, "grad_norm": 0.17468221485614777, "learning_rate": 4.887331409672607e-07, "loss": 0.0232, "step": 215610 }, { "epoch": 1.7446395339428755, "grad_norm": 0.14116312563419342, "learning_rate": 4.884287127227467e-07, "loss": 0.0164, "step": 215620 }, { "epoch": 1.7447204466380775, "grad_norm": 1.2611825466156006, "learning_rate": 4.881243744519964e-07, "loss": 0.0244, "step": 215630 }, { "epoch": 1.7448013593332794, "grad_norm": 0.7530478239059448, "learning_rate": 4.878201261610827e-07, "loss": 0.0103, "step": 215640 }, { "epoch": 1.7448822720284811, "grad_norm": 0.08452558517456055, "learning_rate": 4.875159678560698e-07, "loss": 0.0156, "step": 215650 }, { "epoch": 1.7449631847236833, "grad_norm": 0.26652035117149353, "learning_rate": 4.872118995430237e-07, "loss": 0.0248, "step": 215660 }, { "epoch": 1.745044097418885, "grad_norm": 0.19197271764278412, "learning_rate": 4.869079212280109e-07, "loss": 0.0212, "step": 215670 }, { "epoch": 1.745125010114087, "grad_norm": 0.47412586212158203, "learning_rate": 4.866040329170907e-07, "loss": 0.0274, "step": 215680 }, { "epoch": 1.7452059228092889, "grad_norm": 0.323405385017395, "learning_rate": 4.863002346163253e-07, "loss": 0.0163, "step": 215690 }, { "epoch": 1.7452868355044906, "grad_norm": 0.3274262547492981, "learning_rate": 4.859965263317745e-07, "loss": 0.0185, "step": 215700 }, { "epoch": 1.7453677481996925, "grad_norm": 0.5303484201431274, "learning_rate": 4.856929080694911e-07, "loss": 0.0221, "step": 215710 }, { "epoch": 1.7454486608948945, "grad_norm": 0.26564955711364746, "learning_rate": 4.853893798355336e-07, "loss": 0.0212, "step": 215720 }, { "epoch": 1.7455295735900962, "grad_norm": 0.2576703429222107, "learning_rate": 4.850859416359543e-07, "loss": 0.0119, "step": 215730 }, { "epoch": 1.7456104862852981, "grad_norm": 0.18811605870723724, "learning_rate": 4.847825934768041e-07, "loss": 0.0194, "step": 215740 }, { "epoch": 1.7456913989805, "grad_norm": 0.3935118317604065, "learning_rate": 4.844793353641336e-07, "loss": 0.0198, "step": 215750 }, { "epoch": 1.7457723116757018, "grad_norm": 0.2850140929222107, "learning_rate": 4.841761673039896e-07, "loss": 0.0232, "step": 215760 }, { "epoch": 1.745853224370904, "grad_norm": 0.5053084492683411, "learning_rate": 4.838730893024185e-07, "loss": 0.0282, "step": 215770 }, { "epoch": 1.7459341370661057, "grad_norm": 0.20907212793827057, "learning_rate": 4.835701013654654e-07, "loss": 0.0164, "step": 215780 }, { "epoch": 1.7460150497613074, "grad_norm": 0.37295955419540405, "learning_rate": 4.832672034991703e-07, "loss": 0.0203, "step": 215790 }, { "epoch": 1.7460959624565096, "grad_norm": 0.21973150968551636, "learning_rate": 4.829643957095781e-07, "loss": 0.0152, "step": 215800 }, { "epoch": 1.7461768751517113, "grad_norm": 0.2565702795982361, "learning_rate": 4.826616780027238e-07, "loss": 0.0133, "step": 215810 }, { "epoch": 1.7462577878469132, "grad_norm": 0.38961344957351685, "learning_rate": 4.823590503846448e-07, "loss": 0.013, "step": 215820 }, { "epoch": 1.7463387005421152, "grad_norm": 0.45571064949035645, "learning_rate": 4.820565128613797e-07, "loss": 0.0168, "step": 215830 }, { "epoch": 1.7464196132373169, "grad_norm": 0.5250520706176758, "learning_rate": 4.81754065438958e-07, "loss": 0.0186, "step": 215840 }, { "epoch": 1.7465005259325188, "grad_norm": 0.26072564721107483, "learning_rate": 4.814517081234127e-07, "loss": 0.0178, "step": 215850 }, { "epoch": 1.7465814386277208, "grad_norm": 0.13170793652534485, "learning_rate": 4.811494409207757e-07, "loss": 0.0264, "step": 215860 }, { "epoch": 1.7466623513229225, "grad_norm": 0.2150847464799881, "learning_rate": 4.808472638370715e-07, "loss": 0.0154, "step": 215870 }, { "epoch": 1.7467432640181244, "grad_norm": 0.3849613666534424, "learning_rate": 4.805451768783292e-07, "loss": 0.0075, "step": 215880 }, { "epoch": 1.7468241767133263, "grad_norm": 0.3772086501121521, "learning_rate": 4.802431800505731e-07, "loss": 0.0187, "step": 215890 }, { "epoch": 1.746905089408528, "grad_norm": 0.33534693717956543, "learning_rate": 4.799412733598236e-07, "loss": 0.0212, "step": 215900 }, { "epoch": 1.7469860021037302, "grad_norm": 0.23165656626224518, "learning_rate": 4.796394568121038e-07, "loss": 0.0173, "step": 215910 }, { "epoch": 1.747066914798932, "grad_norm": 0.2464444935321808, "learning_rate": 4.793377304134334e-07, "loss": 0.0192, "step": 215920 }, { "epoch": 1.7471478274941337, "grad_norm": 0.4654861390590668, "learning_rate": 4.790360941698263e-07, "loss": 0.027, "step": 215930 }, { "epoch": 1.7472287401893358, "grad_norm": 0.13697700202465057, "learning_rate": 4.787345480873013e-07, "loss": 0.0098, "step": 215940 }, { "epoch": 1.7473096528845375, "grad_norm": 0.3526504933834076, "learning_rate": 4.784330921718705e-07, "loss": 0.0169, "step": 215950 }, { "epoch": 1.7473905655797395, "grad_norm": 1.0810356140136719, "learning_rate": 4.781317264295465e-07, "loss": 0.0185, "step": 215960 }, { "epoch": 1.7474714782749414, "grad_norm": 0.22857578098773956, "learning_rate": 4.77830450866339e-07, "loss": 0.0164, "step": 215970 }, { "epoch": 1.7475523909701431, "grad_norm": 0.41512101888656616, "learning_rate": 4.775292654882568e-07, "loss": 0.0208, "step": 215980 }, { "epoch": 1.747633303665345, "grad_norm": 0.2174787074327469, "learning_rate": 4.772281703013054e-07, "loss": 0.0123, "step": 215990 }, { "epoch": 1.747714216360547, "grad_norm": 0.4197913408279419, "learning_rate": 4.769271653114904e-07, "loss": 0.0082, "step": 216000 }, { "epoch": 1.7477951290557487, "grad_norm": 0.5320416688919067, "learning_rate": 4.7662625052481373e-07, "loss": 0.0155, "step": 216010 }, { "epoch": 1.7478760417509507, "grad_norm": 0.2365705519914627, "learning_rate": 4.7632542594727935e-07, "loss": 0.0195, "step": 216020 }, { "epoch": 1.7479569544461526, "grad_norm": 0.3351876139640808, "learning_rate": 4.760246915848832e-07, "loss": 0.0194, "step": 216030 }, { "epoch": 1.7480378671413543, "grad_norm": 0.37623709440231323, "learning_rate": 4.757240474436231e-07, "loss": 0.0167, "step": 216040 }, { "epoch": 1.7481187798365565, "grad_norm": 0.42633986473083496, "learning_rate": 4.754234935294971e-07, "loss": 0.017, "step": 216050 }, { "epoch": 1.7481996925317582, "grad_norm": 0.6357143521308899, "learning_rate": 4.751230298484977e-07, "loss": 0.0382, "step": 216060 }, { "epoch": 1.7482806052269602, "grad_norm": 0.20401187241077423, "learning_rate": 4.7482265640661553e-07, "loss": 0.0223, "step": 216070 }, { "epoch": 1.748361517922162, "grad_norm": 0.35739243030548096, "learning_rate": 4.745223732098442e-07, "loss": 0.0243, "step": 216080 }, { "epoch": 1.7484424306173638, "grad_norm": 0.2849233150482178, "learning_rate": 4.7422218026416886e-07, "loss": 0.021, "step": 216090 }, { "epoch": 1.7485233433125658, "grad_norm": 0.538148820400238, "learning_rate": 4.7392207757557875e-07, "loss": 0.0144, "step": 216100 }, { "epoch": 1.7486042560077677, "grad_norm": 0.2678346633911133, "learning_rate": 4.736220651500584e-07, "loss": 0.0235, "step": 216110 }, { "epoch": 1.7486851687029694, "grad_norm": 0.3708053529262543, "learning_rate": 4.733221429935886e-07, "loss": 0.0212, "step": 216120 }, { "epoch": 1.7487660813981714, "grad_norm": 0.28109312057495117, "learning_rate": 4.7302231111215346e-07, "loss": 0.0151, "step": 216130 }, { "epoch": 1.7488469940933733, "grad_norm": 0.7647177577018738, "learning_rate": 4.727225695117316e-07, "loss": 0.0165, "step": 216140 }, { "epoch": 1.748927906788575, "grad_norm": 0.3586146831512451, "learning_rate": 4.7242291819830035e-07, "loss": 0.0255, "step": 216150 }, { "epoch": 1.749008819483777, "grad_norm": 0.41122007369995117, "learning_rate": 4.7212335717783607e-07, "loss": 0.0154, "step": 216160 }, { "epoch": 1.749089732178979, "grad_norm": 0.5307204127311707, "learning_rate": 4.718238864563124e-07, "loss": 0.0123, "step": 216170 }, { "epoch": 1.7491706448741806, "grad_norm": 0.3214643895626068, "learning_rate": 4.7152450603970225e-07, "loss": 0.0182, "step": 216180 }, { "epoch": 1.7492515575693828, "grad_norm": 0.6070910096168518, "learning_rate": 4.712252159339753e-07, "loss": 0.0257, "step": 216190 }, { "epoch": 1.7493324702645845, "grad_norm": 0.44373369216918945, "learning_rate": 4.7092601614510124e-07, "loss": 0.0244, "step": 216200 }, { "epoch": 1.7494133829597864, "grad_norm": 0.41984379291534424, "learning_rate": 4.7062690667904585e-07, "loss": 0.0179, "step": 216210 }, { "epoch": 1.7494942956549884, "grad_norm": 0.039028555154800415, "learning_rate": 4.7032788754177547e-07, "loss": 0.0129, "step": 216220 }, { "epoch": 1.74957520835019, "grad_norm": 0.32820701599121094, "learning_rate": 4.7002895873925146e-07, "loss": 0.0175, "step": 216230 }, { "epoch": 1.749656121045392, "grad_norm": 0.33767059445381165, "learning_rate": 4.6973012027743905e-07, "loss": 0.0191, "step": 216240 }, { "epoch": 1.749737033740594, "grad_norm": 0.3862658739089966, "learning_rate": 4.694313721622945e-07, "loss": 0.0232, "step": 216250 }, { "epoch": 1.7498179464357957, "grad_norm": 0.27274441719055176, "learning_rate": 4.691327143997754e-07, "loss": 0.013, "step": 216260 }, { "epoch": 1.7498988591309976, "grad_norm": 0.30898839235305786, "learning_rate": 4.6883414699584187e-07, "loss": 0.0154, "step": 216270 }, { "epoch": 1.7499797718261996, "grad_norm": 0.46547478437423706, "learning_rate": 4.685356699564431e-07, "loss": 0.0214, "step": 216280 }, { "epoch": 1.7500606845214013, "grad_norm": 0.11219866573810577, "learning_rate": 4.682372832875348e-07, "loss": 0.0119, "step": 216290 }, { "epoch": 1.7501415972166035, "grad_norm": 0.6505143642425537, "learning_rate": 4.6793898699506733e-07, "loss": 0.0257, "step": 216300 }, { "epoch": 1.7502225099118052, "grad_norm": 0.10087237507104874, "learning_rate": 4.676407810849892e-07, "loss": 0.0199, "step": 216310 }, { "epoch": 1.7503034226070069, "grad_norm": 0.3873251676559448, "learning_rate": 4.6734266556324735e-07, "loss": 0.019, "step": 216320 }, { "epoch": 1.750384335302209, "grad_norm": 0.29332950711250305, "learning_rate": 4.67044640435787e-07, "loss": 0.0251, "step": 216330 }, { "epoch": 1.7504652479974108, "grad_norm": 0.5094901919364929, "learning_rate": 4.667467057085517e-07, "loss": 0.0174, "step": 216340 }, { "epoch": 1.7505461606926127, "grad_norm": 0.10581207275390625, "learning_rate": 4.66448861387484e-07, "loss": 0.0162, "step": 216350 }, { "epoch": 1.7506270733878146, "grad_norm": 0.31239962577819824, "learning_rate": 4.6615110747852244e-07, "loss": 0.0211, "step": 216360 }, { "epoch": 1.7507079860830164, "grad_norm": 0.3890049457550049, "learning_rate": 4.658534439876056e-07, "loss": 0.0172, "step": 216370 }, { "epoch": 1.7507888987782183, "grad_norm": 0.27535825967788696, "learning_rate": 4.655558709206698e-07, "loss": 0.0158, "step": 216380 }, { "epoch": 1.7508698114734202, "grad_norm": 0.32730618119239807, "learning_rate": 4.652583882836498e-07, "loss": 0.0155, "step": 216390 }, { "epoch": 1.750950724168622, "grad_norm": 0.5154147744178772, "learning_rate": 4.6496099608247747e-07, "loss": 0.0101, "step": 216400 }, { "epoch": 1.751031636863824, "grad_norm": 0.22912150621414185, "learning_rate": 4.6466369432308477e-07, "loss": 0.0157, "step": 216410 }, { "epoch": 1.7511125495590258, "grad_norm": 0.4717820882797241, "learning_rate": 4.643664830113992e-07, "loss": 0.0192, "step": 216420 }, { "epoch": 1.7511934622542276, "grad_norm": 0.42973077297210693, "learning_rate": 4.640693621533504e-07, "loss": 0.0108, "step": 216430 }, { "epoch": 1.7512743749494297, "grad_norm": 0.6696274280548096, "learning_rate": 4.637723317548615e-07, "loss": 0.031, "step": 216440 }, { "epoch": 1.7513552876446314, "grad_norm": 0.34214863181114197, "learning_rate": 4.634753918218565e-07, "loss": 0.011, "step": 216450 }, { "epoch": 1.7514362003398332, "grad_norm": 0.34277990460395813, "learning_rate": 4.6317854236025974e-07, "loss": 0.0191, "step": 216460 }, { "epoch": 1.7515171130350353, "grad_norm": 0.033995598554611206, "learning_rate": 4.62881783375988e-07, "loss": 0.0135, "step": 216470 }, { "epoch": 1.751598025730237, "grad_norm": 0.3831930160522461, "learning_rate": 4.625851148749599e-07, "loss": 0.0205, "step": 216480 }, { "epoch": 1.751678938425439, "grad_norm": 0.5001027584075928, "learning_rate": 4.622885368630936e-07, "loss": 0.0242, "step": 216490 }, { "epoch": 1.751759851120641, "grad_norm": 0.6487706303596497, "learning_rate": 4.6199204934630316e-07, "loss": 0.0188, "step": 216500 }, { "epoch": 1.7518407638158426, "grad_norm": 0.2917037308216095, "learning_rate": 4.6169565233050105e-07, "loss": 0.0178, "step": 216510 }, { "epoch": 1.7519216765110446, "grad_norm": 0.36885812878608704, "learning_rate": 4.613993458215982e-07, "loss": 0.0165, "step": 216520 }, { "epoch": 1.7520025892062465, "grad_norm": 0.24575048685073853, "learning_rate": 4.611031298255042e-07, "loss": 0.0135, "step": 216530 }, { "epoch": 1.7520835019014482, "grad_norm": 0.49877843260765076, "learning_rate": 4.608070043481261e-07, "loss": 0.017, "step": 216540 }, { "epoch": 1.7521644145966502, "grad_norm": 0.0013152381870895624, "learning_rate": 4.605109693953697e-07, "loss": 0.0198, "step": 216550 }, { "epoch": 1.7522453272918521, "grad_norm": 0.29642388224601746, "learning_rate": 4.602150249731391e-07, "loss": 0.0144, "step": 216560 }, { "epoch": 1.7523262399870538, "grad_norm": 0.4630139172077179, "learning_rate": 4.599191710873352e-07, "loss": 0.023, "step": 216570 }, { "epoch": 1.752407152682256, "grad_norm": 0.14645950496196747, "learning_rate": 4.5962340774385936e-07, "loss": 0.0127, "step": 216580 }, { "epoch": 1.7524880653774577, "grad_norm": 0.15333427488803864, "learning_rate": 4.5932773494860973e-07, "loss": 0.0249, "step": 216590 }, { "epoch": 1.7525689780726597, "grad_norm": 0.46489691734313965, "learning_rate": 4.59032152707482e-07, "loss": 0.0139, "step": 216600 }, { "epoch": 1.7526498907678616, "grad_norm": 0.3030426800251007, "learning_rate": 4.5873666102637216e-07, "loss": 0.0141, "step": 216610 }, { "epoch": 1.7527308034630633, "grad_norm": 0.7263733148574829, "learning_rate": 4.5844125991117206e-07, "loss": 0.0125, "step": 216620 }, { "epoch": 1.7528117161582653, "grad_norm": 0.2509722113609314, "learning_rate": 4.581459493677737e-07, "loss": 0.0247, "step": 216630 }, { "epoch": 1.7528926288534672, "grad_norm": 0.2694467306137085, "learning_rate": 4.578507294020651e-07, "loss": 0.0159, "step": 216640 }, { "epoch": 1.752973541548669, "grad_norm": 0.3308631479740143, "learning_rate": 4.575556000199366e-07, "loss": 0.0206, "step": 216650 }, { "epoch": 1.7530544542438709, "grad_norm": 0.27794769406318665, "learning_rate": 4.5726056122727125e-07, "loss": 0.0106, "step": 216660 }, { "epoch": 1.7531353669390728, "grad_norm": 0.32888516783714294, "learning_rate": 4.569656130299532e-07, "loss": 0.0208, "step": 216670 }, { "epoch": 1.7532162796342745, "grad_norm": 0.4189929664134979, "learning_rate": 4.5667075543386564e-07, "loss": 0.0206, "step": 216680 }, { "epoch": 1.7532971923294765, "grad_norm": 0.3945596218109131, "learning_rate": 4.563759884448887e-07, "loss": 0.0187, "step": 216690 }, { "epoch": 1.7533781050246784, "grad_norm": 0.03232831507921219, "learning_rate": 4.560813120689006e-07, "loss": 0.0138, "step": 216700 }, { "epoch": 1.75345901771988, "grad_norm": 0.23728302121162415, "learning_rate": 4.5578672631177823e-07, "loss": 0.0122, "step": 216710 }, { "epoch": 1.7535399304150823, "grad_norm": 0.36067017912864685, "learning_rate": 4.5549223117939634e-07, "loss": 0.0162, "step": 216720 }, { "epoch": 1.753620843110284, "grad_norm": 0.3562811613082886, "learning_rate": 4.55197826677628e-07, "loss": 0.0122, "step": 216730 }, { "epoch": 1.753701755805486, "grad_norm": 0.43677225708961487, "learning_rate": 4.549035128123447e-07, "loss": 0.0134, "step": 216740 }, { "epoch": 1.7537826685006879, "grad_norm": 0.07712438702583313, "learning_rate": 4.5460928958941495e-07, "loss": 0.0106, "step": 216750 }, { "epoch": 1.7538635811958896, "grad_norm": 0.304959774017334, "learning_rate": 4.5431515701470805e-07, "loss": 0.0174, "step": 216760 }, { "epoch": 1.7539444938910915, "grad_norm": 0.3874105215072632, "learning_rate": 4.5402111509408874e-07, "loss": 0.0229, "step": 216770 }, { "epoch": 1.7540254065862935, "grad_norm": 0.3907536268234253, "learning_rate": 4.537271638334212e-07, "loss": 0.009, "step": 216780 }, { "epoch": 1.7541063192814952, "grad_norm": 0.38404393196105957, "learning_rate": 4.534333032385674e-07, "loss": 0.011, "step": 216790 }, { "epoch": 1.7541872319766971, "grad_norm": 0.2270815521478653, "learning_rate": 4.5313953331538874e-07, "loss": 0.0197, "step": 216800 }, { "epoch": 1.754268144671899, "grad_norm": 0.2530960142612457, "learning_rate": 4.528458540697428e-07, "loss": 0.0095, "step": 216810 }, { "epoch": 1.7543490573671008, "grad_norm": 0.09287150204181671, "learning_rate": 4.525522655074871e-07, "loss": 0.0144, "step": 216820 }, { "epoch": 1.7544299700623027, "grad_norm": 0.09057750552892685, "learning_rate": 4.522587676344753e-07, "loss": 0.0163, "step": 216830 }, { "epoch": 1.7545108827575047, "grad_norm": 0.2542537748813629, "learning_rate": 4.5196536045656383e-07, "loss": 0.0288, "step": 216840 }, { "epoch": 1.7545917954527064, "grad_norm": 0.44633325934410095, "learning_rate": 4.516720439796007e-07, "loss": 0.0141, "step": 216850 }, { "epoch": 1.7546727081479085, "grad_norm": 0.11979985237121582, "learning_rate": 4.5137881820943584e-07, "loss": 0.0109, "step": 216860 }, { "epoch": 1.7547536208431103, "grad_norm": 0.3073525130748749, "learning_rate": 4.510856831519189e-07, "loss": 0.0295, "step": 216870 }, { "epoch": 1.7548345335383122, "grad_norm": 0.17214776575565338, "learning_rate": 4.507926388128958e-07, "loss": 0.0186, "step": 216880 }, { "epoch": 1.7549154462335141, "grad_norm": 0.5901901721954346, "learning_rate": 4.504996851982074e-07, "loss": 0.0167, "step": 216890 }, { "epoch": 1.7549963589287159, "grad_norm": 0.1997581571340561, "learning_rate": 4.502068223136996e-07, "loss": 0.0205, "step": 216900 }, { "epoch": 1.7550772716239178, "grad_norm": 0.6914882063865662, "learning_rate": 4.4991405016521215e-07, "loss": 0.0222, "step": 216910 }, { "epoch": 1.7551581843191197, "grad_norm": 0.3579578101634979, "learning_rate": 4.4962136875858265e-07, "loss": 0.017, "step": 216920 }, { "epoch": 1.7552390970143215, "grad_norm": 0.18576495349407196, "learning_rate": 4.4932877809964917e-07, "loss": 0.0123, "step": 216930 }, { "epoch": 1.7553200097095234, "grad_norm": 0.4943750202655792, "learning_rate": 4.490362781942459e-07, "loss": 0.0136, "step": 216940 }, { "epoch": 1.7554009224047253, "grad_norm": 0.30489176511764526, "learning_rate": 4.4874386904820654e-07, "loss": 0.0133, "step": 216950 }, { "epoch": 1.755481835099927, "grad_norm": 0.5679923892021179, "learning_rate": 4.484515506673626e-07, "loss": 0.0209, "step": 216960 }, { "epoch": 1.7555627477951292, "grad_norm": 0.21392886340618134, "learning_rate": 4.481593230575443e-07, "loss": 0.016, "step": 216970 }, { "epoch": 1.755643660490331, "grad_norm": 0.22343866527080536, "learning_rate": 4.4786718622457814e-07, "loss": 0.0236, "step": 216980 }, { "epoch": 1.7557245731855327, "grad_norm": 0.3732278048992157, "learning_rate": 4.4757514017429173e-07, "loss": 0.0182, "step": 216990 }, { "epoch": 1.7558054858807348, "grad_norm": 0.3416205942630768, "learning_rate": 4.472831849125081e-07, "loss": 0.0294, "step": 217000 }, { "epoch": 1.7558863985759365, "grad_norm": 0.18017467856407166, "learning_rate": 4.469913204450499e-07, "loss": 0.0149, "step": 217010 }, { "epoch": 1.7559673112711385, "grad_norm": 0.24656397104263306, "learning_rate": 4.4669954677773855e-07, "loss": 0.0349, "step": 217020 }, { "epoch": 1.7560482239663404, "grad_norm": 0.5446913838386536, "learning_rate": 4.4640786391639104e-07, "loss": 0.0123, "step": 217030 }, { "epoch": 1.7561291366615421, "grad_norm": 0.4385295510292053, "learning_rate": 4.4611627186682774e-07, "loss": 0.02, "step": 217040 }, { "epoch": 1.756210049356744, "grad_norm": 0.4661940932273865, "learning_rate": 4.458247706348595e-07, "loss": 0.0234, "step": 217050 }, { "epoch": 1.756290962051946, "grad_norm": 0.1524258852005005, "learning_rate": 4.455333602263029e-07, "loss": 0.0082, "step": 217060 }, { "epoch": 1.7563718747471477, "grad_norm": 0.2290981411933899, "learning_rate": 4.452420406469699e-07, "loss": 0.0168, "step": 217070 }, { "epoch": 1.7564527874423497, "grad_norm": 0.17056001722812653, "learning_rate": 4.4495081190266687e-07, "loss": 0.0182, "step": 217080 }, { "epoch": 1.7565337001375516, "grad_norm": 0.1925487518310547, "learning_rate": 4.446596739992043e-07, "loss": 0.0179, "step": 217090 }, { "epoch": 1.7566146128327533, "grad_norm": 0.25545984506607056, "learning_rate": 4.4436862694238916e-07, "loss": 0.027, "step": 217100 }, { "epoch": 1.7566955255279555, "grad_norm": 0.01039960514754057, "learning_rate": 4.440776707380223e-07, "loss": 0.0129, "step": 217110 }, { "epoch": 1.7567764382231572, "grad_norm": 0.06659965217113495, "learning_rate": 4.437868053919092e-07, "loss": 0.0157, "step": 217120 }, { "epoch": 1.756857350918359, "grad_norm": 0.16972430050373077, "learning_rate": 4.434960309098496e-07, "loss": 0.0178, "step": 217130 }, { "epoch": 1.756938263613561, "grad_norm": 0.509994626045227, "learning_rate": 4.4320534729764274e-07, "loss": 0.0104, "step": 217140 }, { "epoch": 1.7570191763087628, "grad_norm": 0.46607282757759094, "learning_rate": 4.4291475456108513e-07, "loss": 0.0173, "step": 217150 }, { "epoch": 1.7571000890039647, "grad_norm": 0.28632375597953796, "learning_rate": 4.4262425270597263e-07, "loss": 0.0135, "step": 217160 }, { "epoch": 1.7571810016991667, "grad_norm": 0.9051337838172913, "learning_rate": 4.423338417380979e-07, "loss": 0.0167, "step": 217170 }, { "epoch": 1.7572619143943684, "grad_norm": 0.3794279396533966, "learning_rate": 4.420435216632535e-07, "loss": 0.0186, "step": 217180 }, { "epoch": 1.7573428270895703, "grad_norm": 0.5110986232757568, "learning_rate": 4.417532924872286e-07, "loss": 0.014, "step": 217190 }, { "epoch": 1.7574237397847723, "grad_norm": 0.5370842814445496, "learning_rate": 4.4146315421581096e-07, "loss": 0.0189, "step": 217200 }, { "epoch": 1.757504652479974, "grad_norm": 0.4076796770095825, "learning_rate": 4.411731068547881e-07, "loss": 0.0254, "step": 217210 }, { "epoch": 1.757585565175176, "grad_norm": 0.5626024603843689, "learning_rate": 4.40883150409942e-07, "loss": 0.0221, "step": 217220 }, { "epoch": 1.7576664778703779, "grad_norm": 0.39900052547454834, "learning_rate": 4.4059328488705865e-07, "loss": 0.0185, "step": 217230 }, { "epoch": 1.7577473905655796, "grad_norm": 0.9746137261390686, "learning_rate": 4.403035102919151e-07, "loss": 0.0188, "step": 217240 }, { "epoch": 1.7578283032607818, "grad_norm": 0.3274822533130646, "learning_rate": 4.4001382663029335e-07, "loss": 0.0198, "step": 217250 }, { "epoch": 1.7579092159559835, "grad_norm": 0.6358402967453003, "learning_rate": 4.3972423390796937e-07, "loss": 0.0237, "step": 217260 }, { "epoch": 1.7579901286511854, "grad_norm": 0.284690797328949, "learning_rate": 4.3943473213071743e-07, "loss": 0.0169, "step": 217270 }, { "epoch": 1.7580710413463874, "grad_norm": 0.2642922103404999, "learning_rate": 4.391453213043123e-07, "loss": 0.0212, "step": 217280 }, { "epoch": 1.758151954041589, "grad_norm": 0.2897729277610779, "learning_rate": 4.388560014345267e-07, "loss": 0.0148, "step": 217290 }, { "epoch": 1.758232866736791, "grad_norm": 0.13221964240074158, "learning_rate": 4.38566772527127e-07, "loss": 0.0196, "step": 217300 }, { "epoch": 1.758313779431993, "grad_norm": 0.31343111395835876, "learning_rate": 4.382776345878842e-07, "loss": 0.0164, "step": 217310 }, { "epoch": 1.7583946921271947, "grad_norm": 0.19333982467651367, "learning_rate": 4.3798858762256427e-07, "loss": 0.0115, "step": 217320 }, { "epoch": 1.7584756048223966, "grad_norm": 0.3653721511363983, "learning_rate": 4.3769963163693087e-07, "loss": 0.0227, "step": 217330 }, { "epoch": 1.7585565175175986, "grad_norm": 0.29459768533706665, "learning_rate": 4.3741076663674667e-07, "loss": 0.0146, "step": 217340 }, { "epoch": 1.7586374302128003, "grad_norm": 0.5309885144233704, "learning_rate": 4.371219926277731e-07, "loss": 0.0237, "step": 217350 }, { "epoch": 1.7587183429080022, "grad_norm": 0.35710975527763367, "learning_rate": 4.368333096157684e-07, "loss": 0.0257, "step": 217360 }, { "epoch": 1.7587992556032042, "grad_norm": 0.33321112394332886, "learning_rate": 4.365447176064902e-07, "loss": 0.014, "step": 217370 }, { "epoch": 1.7588801682984059, "grad_norm": 0.22105857729911804, "learning_rate": 4.362562166056933e-07, "loss": 0.0131, "step": 217380 }, { "epoch": 1.758961080993608, "grad_norm": 0.07899349182844162, "learning_rate": 4.3596780661913194e-07, "loss": 0.0109, "step": 217390 }, { "epoch": 1.7590419936888098, "grad_norm": 0.6937175989151001, "learning_rate": 4.356794876525577e-07, "loss": 0.0183, "step": 217400 }, { "epoch": 1.7591229063840117, "grad_norm": 0.2114291936159134, "learning_rate": 4.353912597117194e-07, "loss": 0.0147, "step": 217410 }, { "epoch": 1.7592038190792136, "grad_norm": 0.6507508158683777, "learning_rate": 4.3510312280236834e-07, "loss": 0.0231, "step": 217420 }, { "epoch": 1.7592847317744154, "grad_norm": 0.18302592635154724, "learning_rate": 4.3481507693024727e-07, "loss": 0.0188, "step": 217430 }, { "epoch": 1.7593656444696173, "grad_norm": 0.19444549083709717, "learning_rate": 4.3452712210110106e-07, "loss": 0.02, "step": 217440 }, { "epoch": 1.7594465571648192, "grad_norm": 0.34152504801750183, "learning_rate": 4.3423925832067504e-07, "loss": 0.0294, "step": 217450 }, { "epoch": 1.759527469860021, "grad_norm": 0.44488704204559326, "learning_rate": 4.3395148559470636e-07, "loss": 0.0167, "step": 217460 }, { "epoch": 1.759608382555223, "grad_norm": 0.3483852446079254, "learning_rate": 4.33663803928937e-07, "loss": 0.0204, "step": 217470 }, { "epoch": 1.7596892952504248, "grad_norm": 0.5257983207702637, "learning_rate": 4.333762133291042e-07, "loss": 0.0216, "step": 217480 }, { "epoch": 1.7597702079456266, "grad_norm": 0.6452791094779968, "learning_rate": 4.3308871380093986e-07, "loss": 0.0237, "step": 217490 }, { "epoch": 1.7598511206408285, "grad_norm": 0.21370761096477509, "learning_rate": 4.328013053501812e-07, "loss": 0.0099, "step": 217500 }, { "epoch": 1.7599320333360304, "grad_norm": 0.3770866096019745, "learning_rate": 4.325139879825596e-07, "loss": 0.018, "step": 217510 }, { "epoch": 1.7600129460312322, "grad_norm": 0.4182334244251251, "learning_rate": 4.3222676170380227e-07, "loss": 0.0242, "step": 217520 }, { "epoch": 1.7600938587264343, "grad_norm": 0.512515664100647, "learning_rate": 4.3193962651964016e-07, "loss": 0.0336, "step": 217530 }, { "epoch": 1.760174771421636, "grad_norm": 0.4750268757343292, "learning_rate": 4.3165258243579864e-07, "loss": 0.0194, "step": 217540 }, { "epoch": 1.760255684116838, "grad_norm": 0.33322873711586, "learning_rate": 4.313656294580021e-07, "loss": 0.0185, "step": 217550 }, { "epoch": 1.76033659681204, "grad_norm": 0.09207119792699814, "learning_rate": 4.3107876759197364e-07, "loss": 0.0068, "step": 217560 }, { "epoch": 1.7604175095072416, "grad_norm": 0.2620956003665924, "learning_rate": 4.307919968434332e-07, "loss": 0.0201, "step": 217570 }, { "epoch": 1.7604984222024436, "grad_norm": 0.42716678977012634, "learning_rate": 4.305053172181006e-07, "loss": 0.0369, "step": 217580 }, { "epoch": 1.7605793348976455, "grad_norm": 0.191142275929451, "learning_rate": 4.30218728721693e-07, "loss": 0.022, "step": 217590 }, { "epoch": 1.7606602475928472, "grad_norm": 0.5012081861495972, "learning_rate": 4.2993223135992413e-07, "loss": 0.0216, "step": 217600 }, { "epoch": 1.7607411602880492, "grad_norm": 0.40688589215278625, "learning_rate": 4.2964582513851163e-07, "loss": 0.015, "step": 217610 }, { "epoch": 1.760822072983251, "grad_norm": 0.6139885783195496, "learning_rate": 4.2935951006316314e-07, "loss": 0.0108, "step": 217620 }, { "epoch": 1.7609029856784528, "grad_norm": 0.5920014381408691, "learning_rate": 4.290732861395896e-07, "loss": 0.0235, "step": 217630 }, { "epoch": 1.760983898373655, "grad_norm": 0.36299559473991394, "learning_rate": 4.287871533735011e-07, "loss": 0.0193, "step": 217640 }, { "epoch": 1.7610648110688567, "grad_norm": 0.3391024172306061, "learning_rate": 4.2850111177060226e-07, "loss": 0.0163, "step": 217650 }, { "epoch": 1.7611457237640584, "grad_norm": 0.12961824238300323, "learning_rate": 4.2821516133659647e-07, "loss": 0.0125, "step": 217660 }, { "epoch": 1.7612266364592606, "grad_norm": 0.22771692276000977, "learning_rate": 4.2792930207719017e-07, "loss": 0.0178, "step": 217670 }, { "epoch": 1.7613075491544623, "grad_norm": 0.22970159351825714, "learning_rate": 4.2764353399807945e-07, "loss": 0.0159, "step": 217680 }, { "epoch": 1.7613884618496642, "grad_norm": 0.13823537528514862, "learning_rate": 4.273578571049675e-07, "loss": 0.0239, "step": 217690 }, { "epoch": 1.7614693745448662, "grad_norm": 0.574862539768219, "learning_rate": 4.270722714035502e-07, "loss": 0.0368, "step": 217700 }, { "epoch": 1.761550287240068, "grad_norm": 0.8455715775489807, "learning_rate": 4.2678677689952043e-07, "loss": 0.0237, "step": 217710 }, { "epoch": 1.7616311999352698, "grad_norm": 0.4689892530441284, "learning_rate": 4.265013735985751e-07, "loss": 0.0148, "step": 217720 }, { "epoch": 1.7617121126304718, "grad_norm": 0.2397962212562561, "learning_rate": 4.262160615064048e-07, "loss": 0.0116, "step": 217730 }, { "epoch": 1.7617930253256735, "grad_norm": 0.3018781542778015, "learning_rate": 4.259308406286999e-07, "loss": 0.0171, "step": 217740 }, { "epoch": 1.7618739380208754, "grad_norm": 0.03657068684697151, "learning_rate": 4.256457109711476e-07, "loss": 0.0156, "step": 217750 }, { "epoch": 1.7619548507160774, "grad_norm": 0.28773191571235657, "learning_rate": 4.2536067253943494e-07, "loss": 0.0204, "step": 217760 }, { "epoch": 1.762035763411279, "grad_norm": 0.48826736211776733, "learning_rate": 4.250757253392457e-07, "loss": 0.013, "step": 217770 }, { "epoch": 1.7621166761064813, "grad_norm": 0.5055207014083862, "learning_rate": 4.247908693762637e-07, "loss": 0.0162, "step": 217780 }, { "epoch": 1.762197588801683, "grad_norm": 0.467316210269928, "learning_rate": 4.245061046561677e-07, "loss": 0.0287, "step": 217790 }, { "epoch": 1.7622785014968847, "grad_norm": 0.6535823941230774, "learning_rate": 4.2422143118464045e-07, "loss": 0.027, "step": 217800 }, { "epoch": 1.7623594141920869, "grad_norm": 0.3039720952510834, "learning_rate": 4.2393684896735564e-07, "loss": 0.0234, "step": 217810 }, { "epoch": 1.7624403268872886, "grad_norm": 0.0374763123691082, "learning_rate": 4.2365235800998884e-07, "loss": 0.0137, "step": 217820 }, { "epoch": 1.7625212395824905, "grad_norm": 0.6764252185821533, "learning_rate": 4.2336795831821653e-07, "loss": 0.0217, "step": 217830 }, { "epoch": 1.7626021522776925, "grad_norm": 0.48232391476631165, "learning_rate": 4.2308364989770754e-07, "loss": 0.0284, "step": 217840 }, { "epoch": 1.7626830649728942, "grad_norm": 0.6274698972702026, "learning_rate": 4.227994327541324e-07, "loss": 0.0206, "step": 217850 }, { "epoch": 1.7627639776680961, "grad_norm": 0.4591429829597473, "learning_rate": 4.2251530689316143e-07, "loss": 0.0312, "step": 217860 }, { "epoch": 1.762844890363298, "grad_norm": 0.245622456073761, "learning_rate": 4.222312723204569e-07, "loss": 0.0139, "step": 217870 }, { "epoch": 1.7629258030584998, "grad_norm": 0.5883489847183228, "learning_rate": 4.21947329041687e-07, "loss": 0.0214, "step": 217880 }, { "epoch": 1.7630067157537017, "grad_norm": 0.47464755177497864, "learning_rate": 4.216634770625133e-07, "loss": 0.0162, "step": 217890 }, { "epoch": 1.7630876284489037, "grad_norm": 0.23286642134189606, "learning_rate": 4.213797163885952e-07, "loss": 0.0192, "step": 217900 }, { "epoch": 1.7631685411441054, "grad_norm": 0.18443289399147034, "learning_rate": 4.2109604702559314e-07, "loss": 0.023, "step": 217910 }, { "epoch": 1.7632494538393075, "grad_norm": 0.16651232540607452, "learning_rate": 4.208124689791648e-07, "loss": 0.0091, "step": 217920 }, { "epoch": 1.7633303665345093, "grad_norm": 0.41072365641593933, "learning_rate": 4.20528982254963e-07, "loss": 0.0302, "step": 217930 }, { "epoch": 1.7634112792297112, "grad_norm": 0.3846270740032196, "learning_rate": 4.202455868586436e-07, "loss": 0.0128, "step": 217940 }, { "epoch": 1.7634921919249131, "grad_norm": 0.5489598512649536, "learning_rate": 4.1996228279585773e-07, "loss": 0.0164, "step": 217950 }, { "epoch": 1.7635731046201149, "grad_norm": 0.5508912801742554, "learning_rate": 4.1967907007225527e-07, "loss": 0.0234, "step": 217960 }, { "epoch": 1.7636540173153168, "grad_norm": 0.5046993494033813, "learning_rate": 4.1939594869348454e-07, "loss": 0.0148, "step": 217970 }, { "epoch": 1.7637349300105187, "grad_norm": 0.3519110083580017, "learning_rate": 4.1911291866519154e-07, "loss": 0.0203, "step": 217980 }, { "epoch": 1.7638158427057204, "grad_norm": 0.5965684652328491, "learning_rate": 4.1882997999302064e-07, "loss": 0.022, "step": 217990 }, { "epoch": 1.7638967554009224, "grad_norm": 0.4254828691482544, "learning_rate": 4.1854713268261403e-07, "loss": 0.0295, "step": 218000 }, { "epoch": 1.7639776680961243, "grad_norm": 0.45284953713417053, "learning_rate": 4.1826437673961274e-07, "loss": 0.0153, "step": 218010 }, { "epoch": 1.764058580791326, "grad_norm": 0.32865577936172485, "learning_rate": 4.179817121696572e-07, "loss": 0.0195, "step": 218020 }, { "epoch": 1.764139493486528, "grad_norm": 0.356794536113739, "learning_rate": 4.1769913897838297e-07, "loss": 0.0304, "step": 218030 }, { "epoch": 1.76422040618173, "grad_norm": 0.4350234568119049, "learning_rate": 4.174166571714244e-07, "loss": 0.0123, "step": 218040 }, { "epoch": 1.7643013188769316, "grad_norm": 0.2955460548400879, "learning_rate": 4.1713426675441873e-07, "loss": 0.0115, "step": 218050 }, { "epoch": 1.7643822315721338, "grad_norm": 0.26269853115081787, "learning_rate": 4.1685196773299406e-07, "loss": 0.014, "step": 218060 }, { "epoch": 1.7644631442673355, "grad_norm": 0.34034979343414307, "learning_rate": 4.165697601127805e-07, "loss": 0.0419, "step": 218070 }, { "epoch": 1.7645440569625375, "grad_norm": 0.4515378773212433, "learning_rate": 4.1628764389940844e-07, "loss": 0.0205, "step": 218080 }, { "epoch": 1.7646249696577394, "grad_norm": 0.32846716046333313, "learning_rate": 4.160056190985012e-07, "loss": 0.013, "step": 218090 }, { "epoch": 1.7647058823529411, "grad_norm": 0.4874269366264343, "learning_rate": 4.15723685715686e-07, "loss": 0.0094, "step": 218100 }, { "epoch": 1.764786795048143, "grad_norm": 0.44181719422340393, "learning_rate": 4.1544184375658326e-07, "loss": 0.0165, "step": 218110 }, { "epoch": 1.764867707743345, "grad_norm": 0.47160014510154724, "learning_rate": 4.151600932268152e-07, "loss": 0.0211, "step": 218120 }, { "epoch": 1.7649486204385467, "grad_norm": 0.35532671213150024, "learning_rate": 4.1487843413199945e-07, "loss": 0.0131, "step": 218130 }, { "epoch": 1.7650295331337487, "grad_norm": 0.2354114055633545, "learning_rate": 4.145968664777539e-07, "loss": 0.0127, "step": 218140 }, { "epoch": 1.7651104458289506, "grad_norm": 0.26629799604415894, "learning_rate": 4.1431539026969394e-07, "loss": 0.0126, "step": 218150 }, { "epoch": 1.7651913585241523, "grad_norm": 0.29425254464149475, "learning_rate": 4.1403400551343233e-07, "loss": 0.0233, "step": 218160 }, { "epoch": 1.7652722712193545, "grad_norm": 0.250245064496994, "learning_rate": 4.1375271221458124e-07, "loss": 0.0141, "step": 218170 }, { "epoch": 1.7653531839145562, "grad_norm": 0.2601219415664673, "learning_rate": 4.1347151037875066e-07, "loss": 0.0162, "step": 218180 }, { "epoch": 1.765434096609758, "grad_norm": 0.17329572141170502, "learning_rate": 4.1319040001154774e-07, "loss": 0.0148, "step": 218190 }, { "epoch": 1.76551500930496, "grad_norm": 0.31472840905189514, "learning_rate": 4.1290938111857915e-07, "loss": 0.027, "step": 218200 }, { "epoch": 1.7655959220001618, "grad_norm": 0.4461668133735657, "learning_rate": 4.1262845370544923e-07, "loss": 0.016, "step": 218210 }, { "epoch": 1.7656768346953637, "grad_norm": 0.6272706985473633, "learning_rate": 4.123476177777602e-07, "loss": 0.0153, "step": 218220 }, { "epoch": 1.7657577473905657, "grad_norm": 0.24879585206508636, "learning_rate": 4.120668733411126e-07, "loss": 0.0135, "step": 218230 }, { "epoch": 1.7658386600857674, "grad_norm": 0.5859715342521667, "learning_rate": 4.1178622040110696e-07, "loss": 0.0282, "step": 218240 }, { "epoch": 1.7659195727809693, "grad_norm": 0.4151252806186676, "learning_rate": 4.115056589633387e-07, "loss": 0.0267, "step": 218250 }, { "epoch": 1.7660004854761713, "grad_norm": 0.5234605669975281, "learning_rate": 4.112251890334018e-07, "loss": 0.0155, "step": 218260 }, { "epoch": 1.766081398171373, "grad_norm": 0.38196220993995667, "learning_rate": 4.109448106168934e-07, "loss": 0.0256, "step": 218270 }, { "epoch": 1.766162310866575, "grad_norm": 0.5616946816444397, "learning_rate": 4.106645237194007e-07, "loss": 0.0186, "step": 218280 }, { "epoch": 1.7662432235617769, "grad_norm": 0.23589906096458435, "learning_rate": 4.1038432834651697e-07, "loss": 0.0177, "step": 218290 }, { "epoch": 1.7663241362569786, "grad_norm": 0.3413342833518982, "learning_rate": 4.1010422450382835e-07, "loss": 0.0205, "step": 218300 }, { "epoch": 1.7664050489521808, "grad_norm": 0.34724661707878113, "learning_rate": 4.098242121969209e-07, "loss": 0.0226, "step": 218310 }, { "epoch": 1.7664859616473825, "grad_norm": 0.15610189735889435, "learning_rate": 4.0954429143137964e-07, "loss": 0.0146, "step": 218320 }, { "epoch": 1.7665668743425842, "grad_norm": 0.39792400598526, "learning_rate": 4.092644622127867e-07, "loss": 0.0257, "step": 218330 }, { "epoch": 1.7666477870377864, "grad_norm": 0.33105096220970154, "learning_rate": 4.0898472454672264e-07, "loss": 0.0304, "step": 218340 }, { "epoch": 1.766728699732988, "grad_norm": 0.2695325016975403, "learning_rate": 4.087050784387658e-07, "loss": 0.0106, "step": 218350 }, { "epoch": 1.76680961242819, "grad_norm": 0.5615307092666626, "learning_rate": 4.08425523894494e-07, "loss": 0.0283, "step": 218360 }, { "epoch": 1.766890525123392, "grad_norm": 0.6840311288833618, "learning_rate": 4.081460609194815e-07, "loss": 0.017, "step": 218370 }, { "epoch": 1.7669714378185937, "grad_norm": 0.4256691336631775, "learning_rate": 4.078666895193023e-07, "loss": 0.0097, "step": 218380 }, { "epoch": 1.7670523505137956, "grad_norm": 0.37934428453445435, "learning_rate": 4.075874096995269e-07, "loss": 0.0283, "step": 218390 }, { "epoch": 1.7671332632089976, "grad_norm": 0.5204101800918579, "learning_rate": 4.0730822146572647e-07, "loss": 0.0255, "step": 218400 }, { "epoch": 1.7672141759041993, "grad_norm": 0.5800830125808716, "learning_rate": 4.0702912482346755e-07, "loss": 0.0297, "step": 218410 }, { "epoch": 1.7672950885994012, "grad_norm": 0.25707805156707764, "learning_rate": 4.0675011977831525e-07, "loss": 0.019, "step": 218420 }, { "epoch": 1.7673760012946031, "grad_norm": 0.48682186007499695, "learning_rate": 4.0647120633583723e-07, "loss": 0.0229, "step": 218430 }, { "epoch": 1.7674569139898049, "grad_norm": 0.14002138376235962, "learning_rate": 4.0619238450159304e-07, "loss": 0.0104, "step": 218440 }, { "epoch": 1.767537826685007, "grad_norm": 0.47722509503364563, "learning_rate": 4.059136542811426e-07, "loss": 0.0153, "step": 218450 }, { "epoch": 1.7676187393802087, "grad_norm": 0.32580310106277466, "learning_rate": 4.056350156800476e-07, "loss": 0.0182, "step": 218460 }, { "epoch": 1.7676996520754105, "grad_norm": 0.5208432078361511, "learning_rate": 4.053564687038619e-07, "loss": 0.0234, "step": 218470 }, { "epoch": 1.7677805647706126, "grad_norm": 0.41799476742744446, "learning_rate": 4.050780133581411e-07, "loss": 0.0185, "step": 218480 }, { "epoch": 1.7678614774658143, "grad_norm": 0.22743462026119232, "learning_rate": 4.0479964964843964e-07, "loss": 0.0093, "step": 218490 }, { "epoch": 1.7679423901610163, "grad_norm": 0.2049807757139206, "learning_rate": 4.045213775803086e-07, "loss": 0.0173, "step": 218500 }, { "epoch": 1.7680233028562182, "grad_norm": 0.28438735008239746, "learning_rate": 4.0424319715929696e-07, "loss": 0.0258, "step": 218510 }, { "epoch": 1.76810421555142, "grad_norm": 0.7999328970909119, "learning_rate": 4.0396510839095295e-07, "loss": 0.0121, "step": 218520 }, { "epoch": 1.7681851282466219, "grad_norm": 0.07132462412118912, "learning_rate": 4.0368711128082217e-07, "loss": 0.0177, "step": 218530 }, { "epoch": 1.7682660409418238, "grad_norm": 0.29017704725265503, "learning_rate": 4.0340920583444854e-07, "loss": 0.0147, "step": 218540 }, { "epoch": 1.7683469536370255, "grad_norm": 0.697174608707428, "learning_rate": 4.0313139205737426e-07, "loss": 0.0214, "step": 218550 }, { "epoch": 1.7684278663322275, "grad_norm": 1.064461350440979, "learning_rate": 4.028536699551405e-07, "loss": 0.0175, "step": 218560 }, { "epoch": 1.7685087790274294, "grad_norm": 0.30573180317878723, "learning_rate": 4.025760395332845e-07, "loss": 0.0089, "step": 218570 }, { "epoch": 1.7685896917226311, "grad_norm": 0.38963431119918823, "learning_rate": 4.0229850079734457e-07, "loss": 0.0222, "step": 218580 }, { "epoch": 1.7686706044178333, "grad_norm": 0.33726757764816284, "learning_rate": 4.020210537528546e-07, "loss": 0.0245, "step": 218590 }, { "epoch": 1.768751517113035, "grad_norm": 0.3990755081176758, "learning_rate": 4.01743698405348e-07, "loss": 0.0108, "step": 218600 }, { "epoch": 1.768832429808237, "grad_norm": 0.5377591848373413, "learning_rate": 4.0146643476035584e-07, "loss": 0.0141, "step": 218610 }, { "epoch": 1.768913342503439, "grad_norm": 0.49592509865760803, "learning_rate": 4.0118926282340764e-07, "loss": 0.019, "step": 218620 }, { "epoch": 1.7689942551986406, "grad_norm": 0.18361671268939972, "learning_rate": 4.009121826000312e-07, "loss": 0.0208, "step": 218630 }, { "epoch": 1.7690751678938426, "grad_norm": 0.3940732181072235, "learning_rate": 4.0063519409575147e-07, "loss": 0.0221, "step": 218640 }, { "epoch": 1.7691560805890445, "grad_norm": 0.36012741923332214, "learning_rate": 4.003582973160947e-07, "loss": 0.0228, "step": 218650 }, { "epoch": 1.7692369932842462, "grad_norm": 0.4210898280143738, "learning_rate": 4.000814922665802e-07, "loss": 0.0161, "step": 218660 }, { "epoch": 1.7693179059794482, "grad_norm": 0.3350486755371094, "learning_rate": 3.998047789527293e-07, "loss": 0.0139, "step": 218670 }, { "epoch": 1.76939881867465, "grad_norm": 0.4029509723186493, "learning_rate": 3.995281573800608e-07, "loss": 0.0177, "step": 218680 }, { "epoch": 1.7694797313698518, "grad_norm": 0.44777947664260864, "learning_rate": 3.9925162755409254e-07, "loss": 0.0084, "step": 218690 }, { "epoch": 1.7695606440650538, "grad_norm": 0.4829263389110565, "learning_rate": 3.989751894803362e-07, "loss": 0.0136, "step": 218700 }, { "epoch": 1.7696415567602557, "grad_norm": 0.25753024220466614, "learning_rate": 3.9869884316430684e-07, "loss": 0.0194, "step": 218710 }, { "epoch": 1.7697224694554574, "grad_norm": 0.12290466576814651, "learning_rate": 3.984225886115156e-07, "loss": 0.0117, "step": 218720 }, { "epoch": 1.7698033821506596, "grad_norm": 0.3547658920288086, "learning_rate": 3.98146425827472e-07, "loss": 0.0217, "step": 218730 }, { "epoch": 1.7698842948458613, "grad_norm": 0.35778123140335083, "learning_rate": 3.978703548176821e-07, "loss": 0.0211, "step": 218740 }, { "epoch": 1.7699652075410632, "grad_norm": 0.2705558240413666, "learning_rate": 3.9759437558765324e-07, "loss": 0.0235, "step": 218750 }, { "epoch": 1.7700461202362652, "grad_norm": 0.4466187655925751, "learning_rate": 3.973184881428882e-07, "loss": 0.0109, "step": 218760 }, { "epoch": 1.770127032931467, "grad_norm": 0.5985513925552368, "learning_rate": 3.970426924888887e-07, "loss": 0.0145, "step": 218770 }, { "epoch": 1.7702079456266688, "grad_norm": 0.5361762046813965, "learning_rate": 3.967669886311559e-07, "loss": 0.0289, "step": 218780 }, { "epoch": 1.7702888583218708, "grad_norm": 0.6413195133209229, "learning_rate": 3.9649137657518766e-07, "loss": 0.0237, "step": 218790 }, { "epoch": 1.7703697710170725, "grad_norm": 0.2769928574562073, "learning_rate": 3.9621585632648063e-07, "loss": 0.0283, "step": 218800 }, { "epoch": 1.7704506837122744, "grad_norm": 0.3382083475589752, "learning_rate": 3.959404278905288e-07, "loss": 0.0069, "step": 218810 }, { "epoch": 1.7705315964074764, "grad_norm": 0.1704469621181488, "learning_rate": 3.9566509127282606e-07, "loss": 0.0149, "step": 218820 }, { "epoch": 1.770612509102678, "grad_norm": 0.5730942487716675, "learning_rate": 3.953898464788619e-07, "loss": 0.0326, "step": 218830 }, { "epoch": 1.7706934217978803, "grad_norm": 0.3386180102825165, "learning_rate": 3.95114693514127e-07, "loss": 0.0082, "step": 218840 }, { "epoch": 1.770774334493082, "grad_norm": 0.42786216735839844, "learning_rate": 3.948396323841097e-07, "loss": 0.0261, "step": 218850 }, { "epoch": 1.7708552471882837, "grad_norm": 0.591335654258728, "learning_rate": 3.9456466309429174e-07, "loss": 0.0161, "step": 218860 }, { "epoch": 1.7709361598834858, "grad_norm": 0.06443315744400024, "learning_rate": 3.9428978565015987e-07, "loss": 0.0173, "step": 218870 }, { "epoch": 1.7710170725786876, "grad_norm": 0.22749532759189606, "learning_rate": 3.9401500005719627e-07, "loss": 0.0165, "step": 218880 }, { "epoch": 1.7710979852738895, "grad_norm": 0.4077252149581909, "learning_rate": 3.9374030632087834e-07, "loss": 0.0236, "step": 218890 }, { "epoch": 1.7711788979690914, "grad_norm": 0.34749019145965576, "learning_rate": 3.934657044466861e-07, "loss": 0.0106, "step": 218900 }, { "epoch": 1.7712598106642932, "grad_norm": 0.3172158896923065, "learning_rate": 3.931911944400962e-07, "loss": 0.0203, "step": 218910 }, { "epoch": 1.771340723359495, "grad_norm": 0.11444646120071411, "learning_rate": 3.929167763065817e-07, "loss": 0.0113, "step": 218920 }, { "epoch": 1.771421636054697, "grad_norm": 0.2765246033668518, "learning_rate": 3.926424500516168e-07, "loss": 0.0166, "step": 218930 }, { "epoch": 1.7715025487498988, "grad_norm": 0.9161515235900879, "learning_rate": 3.923682156806713e-07, "loss": 0.0473, "step": 218940 }, { "epoch": 1.7715834614451007, "grad_norm": 0.387830913066864, "learning_rate": 3.9209407319921454e-07, "loss": 0.0142, "step": 218950 }, { "epoch": 1.7716643741403026, "grad_norm": 0.43613240122795105, "learning_rate": 3.918200226127139e-07, "loss": 0.0208, "step": 218960 }, { "epoch": 1.7717452868355044, "grad_norm": 0.47170490026474, "learning_rate": 3.91546063926635e-07, "loss": 0.0103, "step": 218970 }, { "epoch": 1.7718261995307065, "grad_norm": 0.372416615486145, "learning_rate": 3.912721971464406e-07, "loss": 0.0153, "step": 218980 }, { "epoch": 1.7719071122259082, "grad_norm": 0.41562074422836304, "learning_rate": 3.9099842227759254e-07, "loss": 0.0187, "step": 218990 }, { "epoch": 1.77198802492111, "grad_norm": 0.11386124044656754, "learning_rate": 3.9072473932555143e-07, "loss": 0.0113, "step": 219000 }, { "epoch": 1.7720689376163121, "grad_norm": 0.5412214994430542, "learning_rate": 3.904511482957746e-07, "loss": 0.0218, "step": 219010 }, { "epoch": 1.7721498503115138, "grad_norm": 0.9678758978843689, "learning_rate": 3.9017764919371814e-07, "loss": 0.0329, "step": 219020 }, { "epoch": 1.7722307630067158, "grad_norm": 0.326311856508255, "learning_rate": 3.899042420248361e-07, "loss": 0.0162, "step": 219030 }, { "epoch": 1.7723116757019177, "grad_norm": 0.18987806141376495, "learning_rate": 3.89630926794583e-07, "loss": 0.0078, "step": 219040 }, { "epoch": 1.7723925883971194, "grad_norm": 0.29071107506752014, "learning_rate": 3.893577035084067e-07, "loss": 0.0268, "step": 219050 }, { "epoch": 1.7724735010923214, "grad_norm": 0.17915786802768707, "learning_rate": 3.8908457217175843e-07, "loss": 0.015, "step": 219060 }, { "epoch": 1.7725544137875233, "grad_norm": 0.2047107219696045, "learning_rate": 3.8881153279008544e-07, "loss": 0.0142, "step": 219070 }, { "epoch": 1.772635326482725, "grad_norm": 0.3162314295768738, "learning_rate": 3.885385853688295e-07, "loss": 0.014, "step": 219080 }, { "epoch": 1.772716239177927, "grad_norm": 0.23140934109687805, "learning_rate": 3.8826572991343735e-07, "loss": 0.0075, "step": 219090 }, { "epoch": 1.772797151873129, "grad_norm": 1.0827765464782715, "learning_rate": 3.8799296642935024e-07, "loss": 0.0156, "step": 219100 }, { "epoch": 1.7728780645683306, "grad_norm": 0.32843852043151855, "learning_rate": 3.8772029492200546e-07, "loss": 0.0163, "step": 219110 }, { "epoch": 1.7729589772635328, "grad_norm": 0.22559423744678497, "learning_rate": 3.874477153968431e-07, "loss": 0.0116, "step": 219120 }, { "epoch": 1.7730398899587345, "grad_norm": 0.33787623047828674, "learning_rate": 3.8717522785929883e-07, "loss": 0.0161, "step": 219130 }, { "epoch": 1.7731208026539365, "grad_norm": 0.5216505527496338, "learning_rate": 3.869028323148061e-07, "loss": 0.0176, "step": 219140 }, { "epoch": 1.7732017153491384, "grad_norm": 0.2916181981563568, "learning_rate": 3.866305287687977e-07, "loss": 0.0183, "step": 219150 }, { "epoch": 1.7732826280443401, "grad_norm": 0.02923634648323059, "learning_rate": 3.8635831722670435e-07, "loss": 0.0101, "step": 219160 }, { "epoch": 1.773363540739542, "grad_norm": 0.6604907512664795, "learning_rate": 3.860861976939545e-07, "loss": 0.0257, "step": 219170 }, { "epoch": 1.773444453434744, "grad_norm": 0.4753578305244446, "learning_rate": 3.8581417017597546e-07, "loss": 0.012, "step": 219180 }, { "epoch": 1.7735253661299457, "grad_norm": 0.5230489373207092, "learning_rate": 3.8554223467819117e-07, "loss": 0.015, "step": 219190 }, { "epoch": 1.7736062788251477, "grad_norm": 0.3821590840816498, "learning_rate": 3.8527039120602573e-07, "loss": 0.0182, "step": 219200 }, { "epoch": 1.7736871915203496, "grad_norm": 0.6077001094818115, "learning_rate": 3.8499863976490024e-07, "loss": 0.0224, "step": 219210 }, { "epoch": 1.7737681042155513, "grad_norm": 0.2795889675617218, "learning_rate": 3.8472698036023325e-07, "loss": 0.0313, "step": 219220 }, { "epoch": 1.7738490169107533, "grad_norm": 0.4664726257324219, "learning_rate": 3.8445541299744485e-07, "loss": 0.0167, "step": 219230 }, { "epoch": 1.7739299296059552, "grad_norm": 0.3926510810852051, "learning_rate": 3.8418393768194896e-07, "loss": 0.0194, "step": 219240 }, { "epoch": 1.774010842301157, "grad_norm": 1.0157169103622437, "learning_rate": 3.8391255441915907e-07, "loss": 0.0302, "step": 219250 }, { "epoch": 1.774091754996359, "grad_norm": 0.8942396640777588, "learning_rate": 3.836412632144898e-07, "loss": 0.0197, "step": 219260 }, { "epoch": 1.7741726676915608, "grad_norm": 0.26193150877952576, "learning_rate": 3.8337006407334844e-07, "loss": 0.0113, "step": 219270 }, { "epoch": 1.7742535803867627, "grad_norm": 0.4323654770851135, "learning_rate": 3.830989570011456e-07, "loss": 0.014, "step": 219280 }, { "epoch": 1.7743344930819647, "grad_norm": 0.1722310483455658, "learning_rate": 3.8282794200328876e-07, "loss": 0.0161, "step": 219290 }, { "epoch": 1.7744154057771664, "grad_norm": 0.2883701026439667, "learning_rate": 3.8255701908517905e-07, "loss": 0.0222, "step": 219300 }, { "epoch": 1.7744963184723683, "grad_norm": 0.1448742300271988, "learning_rate": 3.8228618825222275e-07, "loss": 0.0103, "step": 219310 }, { "epoch": 1.7745772311675703, "grad_norm": 0.30516543984413147, "learning_rate": 3.8201544950981996e-07, "loss": 0.0261, "step": 219320 }, { "epoch": 1.774658143862772, "grad_norm": 0.2659894824028015, "learning_rate": 3.8174480286337023e-07, "loss": 0.0168, "step": 219330 }, { "epoch": 1.774739056557974, "grad_norm": 0.2581962049007416, "learning_rate": 3.81474248318271e-07, "loss": 0.0157, "step": 219340 }, { "epoch": 1.7748199692531759, "grad_norm": 0.5719477534294128, "learning_rate": 3.812037858799172e-07, "loss": 0.0111, "step": 219350 }, { "epoch": 1.7749008819483776, "grad_norm": 0.23887720704078674, "learning_rate": 3.8093341555370365e-07, "loss": 0.0114, "step": 219360 }, { "epoch": 1.7749817946435795, "grad_norm": 0.46650123596191406, "learning_rate": 3.8066313734502147e-07, "loss": 0.0212, "step": 219370 }, { "epoch": 1.7750627073387815, "grad_norm": 0.5419756174087524, "learning_rate": 3.803929512592608e-07, "loss": 0.0133, "step": 219380 }, { "epoch": 1.7751436200339832, "grad_norm": 0.48596692085266113, "learning_rate": 3.8012285730181174e-07, "loss": 0.0188, "step": 219390 }, { "epoch": 1.7752245327291853, "grad_norm": 0.23046694695949554, "learning_rate": 3.798528554780584e-07, "loss": 0.0122, "step": 219400 }, { "epoch": 1.775305445424387, "grad_norm": 0.13821667432785034, "learning_rate": 3.7958294579338527e-07, "loss": 0.0306, "step": 219410 }, { "epoch": 1.775386358119589, "grad_norm": 0.21171993017196655, "learning_rate": 3.7931312825317814e-07, "loss": 0.0164, "step": 219420 }, { "epoch": 1.775467270814791, "grad_norm": 0.5454593300819397, "learning_rate": 3.790434028628148e-07, "loss": 0.0194, "step": 219430 }, { "epoch": 1.7755481835099927, "grad_norm": 0.07612137496471405, "learning_rate": 3.78773769627675e-07, "loss": 0.0194, "step": 219440 }, { "epoch": 1.7756290962051946, "grad_norm": 0.46588271856307983, "learning_rate": 3.785042285531382e-07, "loss": 0.0192, "step": 219450 }, { "epoch": 1.7757100089003965, "grad_norm": 0.5012422800064087, "learning_rate": 3.7823477964457623e-07, "loss": 0.0211, "step": 219460 }, { "epoch": 1.7757909215955983, "grad_norm": 0.2444961965084076, "learning_rate": 3.7796542290736594e-07, "loss": 0.0149, "step": 219470 }, { "epoch": 1.7758718342908002, "grad_norm": 0.6270140409469604, "learning_rate": 3.7769615834687803e-07, "loss": 0.0251, "step": 219480 }, { "epoch": 1.7759527469860021, "grad_norm": 0.302720308303833, "learning_rate": 3.7742698596848035e-07, "loss": 0.0148, "step": 219490 }, { "epoch": 1.7760336596812039, "grad_norm": 0.15971317887306213, "learning_rate": 3.771579057775443e-07, "loss": 0.009, "step": 219500 }, { "epoch": 1.776114572376406, "grad_norm": 0.42951953411102295, "learning_rate": 3.768889177794349e-07, "loss": 0.023, "step": 219510 }, { "epoch": 1.7761954850716077, "grad_norm": 0.7119084596633911, "learning_rate": 3.7662002197951463e-07, "loss": 0.024, "step": 219520 }, { "epoch": 1.7762763977668095, "grad_norm": 0.14441359043121338, "learning_rate": 3.76351218383148e-07, "loss": 0.0295, "step": 219530 }, { "epoch": 1.7763573104620116, "grad_norm": 0.3320688009262085, "learning_rate": 3.760825069956964e-07, "loss": 0.033, "step": 219540 }, { "epoch": 1.7764382231572133, "grad_norm": 0.28057581186294556, "learning_rate": 3.7581388782251705e-07, "loss": 0.0214, "step": 219550 }, { "epoch": 1.7765191358524153, "grad_norm": 0.36294659972190857, "learning_rate": 3.7554536086896745e-07, "loss": 0.0196, "step": 219560 }, { "epoch": 1.7766000485476172, "grad_norm": 0.407548189163208, "learning_rate": 3.752769261404032e-07, "loss": 0.0291, "step": 219570 }, { "epoch": 1.776680961242819, "grad_norm": 0.26013970375061035, "learning_rate": 3.750085836421774e-07, "loss": 0.0241, "step": 219580 }, { "epoch": 1.7767618739380209, "grad_norm": 0.31353259086608887, "learning_rate": 3.747403333796418e-07, "loss": 0.0137, "step": 219590 }, { "epoch": 1.7768427866332228, "grad_norm": 0.17114034295082092, "learning_rate": 3.7447217535814485e-07, "loss": 0.008, "step": 219600 }, { "epoch": 1.7769236993284245, "grad_norm": 0.462563693523407, "learning_rate": 3.742041095830373e-07, "loss": 0.0312, "step": 219610 }, { "epoch": 1.7770046120236265, "grad_norm": 0.4725468158721924, "learning_rate": 3.739361360596622e-07, "loss": 0.0185, "step": 219620 }, { "epoch": 1.7770855247188284, "grad_norm": 0.357835590839386, "learning_rate": 3.73668254793364e-07, "loss": 0.0139, "step": 219630 }, { "epoch": 1.7771664374140301, "grad_norm": 0.33790621161460876, "learning_rate": 3.734004657894874e-07, "loss": 0.0205, "step": 219640 }, { "epoch": 1.7772473501092323, "grad_norm": 0.36551254987716675, "learning_rate": 3.731327690533709e-07, "loss": 0.0152, "step": 219650 }, { "epoch": 1.777328262804434, "grad_norm": 0.2879734933376312, "learning_rate": 3.728651645903525e-07, "loss": 0.0213, "step": 219660 }, { "epoch": 1.7774091754996357, "grad_norm": 0.16346268355846405, "learning_rate": 3.725976524057717e-07, "loss": 0.017, "step": 219670 }, { "epoch": 1.777490088194838, "grad_norm": 0.24659158289432526, "learning_rate": 3.723302325049605e-07, "loss": 0.0182, "step": 219680 }, { "epoch": 1.7775710008900396, "grad_norm": 0.3175276219844818, "learning_rate": 3.720629048932539e-07, "loss": 0.0209, "step": 219690 }, { "epoch": 1.7776519135852415, "grad_norm": 0.27413684129714966, "learning_rate": 3.7179566957598335e-07, "loss": 0.0141, "step": 219700 }, { "epoch": 1.7777328262804435, "grad_norm": 0.23881851136684418, "learning_rate": 3.7152852655847613e-07, "loss": 0.0192, "step": 219710 }, { "epoch": 1.7778137389756452, "grad_norm": 0.3195593059062958, "learning_rate": 3.71261475846062e-07, "loss": 0.0228, "step": 219720 }, { "epoch": 1.7778946516708471, "grad_norm": 0.1840210109949112, "learning_rate": 3.709945174440671e-07, "loss": 0.0193, "step": 219730 }, { "epoch": 1.777975564366049, "grad_norm": 0.5080223083496094, "learning_rate": 3.7072765135781286e-07, "loss": 0.0228, "step": 219740 }, { "epoch": 1.7780564770612508, "grad_norm": 0.4815525710582733, "learning_rate": 3.7046087759262327e-07, "loss": 0.0212, "step": 219750 }, { "epoch": 1.7781373897564527, "grad_norm": 0.3160783052444458, "learning_rate": 3.7019419615381804e-07, "loss": 0.0205, "step": 219760 }, { "epoch": 1.7782183024516547, "grad_norm": 0.5353878140449524, "learning_rate": 3.699276070467156e-07, "loss": 0.0247, "step": 219770 }, { "epoch": 1.7782992151468564, "grad_norm": 0.36126863956451416, "learning_rate": 3.696611102766323e-07, "loss": 0.0184, "step": 219780 }, { "epoch": 1.7783801278420586, "grad_norm": 0.3523772656917572, "learning_rate": 3.6939470584888337e-07, "loss": 0.0161, "step": 219790 }, { "epoch": 1.7784610405372603, "grad_norm": 0.451945424079895, "learning_rate": 3.6912839376878173e-07, "loss": 0.0189, "step": 219800 }, { "epoch": 1.7785419532324622, "grad_norm": 0.3353232741355896, "learning_rate": 3.688621740416376e-07, "loss": 0.0185, "step": 219810 }, { "epoch": 1.7786228659276642, "grad_norm": 0.2149214893579483, "learning_rate": 3.6859604667276006e-07, "loss": 0.016, "step": 219820 }, { "epoch": 1.7787037786228659, "grad_norm": 0.32438355684280396, "learning_rate": 3.683300116674582e-07, "loss": 0.0168, "step": 219830 }, { "epoch": 1.7787846913180678, "grad_norm": 0.07941217720508575, "learning_rate": 3.680640690310361e-07, "loss": 0.018, "step": 219840 }, { "epoch": 1.7788656040132698, "grad_norm": 0.405283123254776, "learning_rate": 3.6779821876879674e-07, "loss": 0.0154, "step": 219850 }, { "epoch": 1.7789465167084715, "grad_norm": 0.6355006694793701, "learning_rate": 3.675324608860448e-07, "loss": 0.0193, "step": 219860 }, { "epoch": 1.7790274294036734, "grad_norm": 0.36198604106903076, "learning_rate": 3.67266795388076e-07, "loss": 0.0167, "step": 219870 }, { "epoch": 1.7791083420988754, "grad_norm": 0.3127744197845459, "learning_rate": 3.670012222801922e-07, "loss": 0.0185, "step": 219880 }, { "epoch": 1.779189254794077, "grad_norm": 0.4974927604198456, "learning_rate": 3.6673574156768923e-07, "loss": 0.0178, "step": 219890 }, { "epoch": 1.779270167489279, "grad_norm": 0.48220130801200867, "learning_rate": 3.6647035325585835e-07, "loss": 0.0173, "step": 219900 }, { "epoch": 1.779351080184481, "grad_norm": 0.2761872112751007, "learning_rate": 3.662050573499959e-07, "loss": 0.0137, "step": 219910 }, { "epoch": 1.7794319928796827, "grad_norm": 0.8802619576454163, "learning_rate": 3.659398538553915e-07, "loss": 0.0381, "step": 219920 }, { "epoch": 1.7795129055748848, "grad_norm": 0.2691798210144043, "learning_rate": 3.6567474277733274e-07, "loss": 0.0245, "step": 219930 }, { "epoch": 1.7795938182700866, "grad_norm": 0.21501798927783966, "learning_rate": 3.6540972412110855e-07, "loss": 0.0176, "step": 219940 }, { "epoch": 1.7796747309652885, "grad_norm": 0.04852627217769623, "learning_rate": 3.651447978920031e-07, "loss": 0.0196, "step": 219950 }, { "epoch": 1.7797556436604904, "grad_norm": 0.4383065402507782, "learning_rate": 3.648799640952999e-07, "loss": 0.0233, "step": 219960 }, { "epoch": 1.7798365563556922, "grad_norm": 0.3272096812725067, "learning_rate": 3.646152227362809e-07, "loss": 0.013, "step": 219970 }, { "epoch": 1.779917469050894, "grad_norm": 0.31554514169692993, "learning_rate": 3.643505738202252e-07, "loss": 0.0262, "step": 219980 }, { "epoch": 1.779998381746096, "grad_norm": 0.47721269726753235, "learning_rate": 3.640860173524113e-07, "loss": 0.0204, "step": 219990 }, { "epoch": 1.7800792944412978, "grad_norm": 0.2720910608768463, "learning_rate": 3.6382155333811507e-07, "loss": 0.0166, "step": 220000 }, { "epoch": 1.7801602071364997, "grad_norm": 0.25720107555389404, "learning_rate": 3.6355718178260947e-07, "loss": 0.0279, "step": 220010 }, { "epoch": 1.7802411198317016, "grad_norm": 0.34746307134628296, "learning_rate": 3.632929026911697e-07, "loss": 0.0133, "step": 220020 }, { "epoch": 1.7803220325269034, "grad_norm": 0.21218812465667725, "learning_rate": 3.630287160690632e-07, "loss": 0.011, "step": 220030 }, { "epoch": 1.7804029452221053, "grad_norm": 0.5294857621192932, "learning_rate": 3.627646219215597e-07, "loss": 0.0237, "step": 220040 }, { "epoch": 1.7804838579173072, "grad_norm": 0.18989312648773193, "learning_rate": 3.625006202539277e-07, "loss": 0.0158, "step": 220050 }, { "epoch": 1.780564770612509, "grad_norm": 0.289044588804245, "learning_rate": 3.622367110714298e-07, "loss": 0.0183, "step": 220060 }, { "epoch": 1.7806456833077111, "grad_norm": 0.5636069774627686, "learning_rate": 3.619728943793288e-07, "loss": 0.0153, "step": 220070 }, { "epoch": 1.7807265960029128, "grad_norm": 0.198051318526268, "learning_rate": 3.61709170182889e-07, "loss": 0.0143, "step": 220080 }, { "epoch": 1.7808075086981148, "grad_norm": 0.5762969851493835, "learning_rate": 3.614455384873661e-07, "loss": 0.013, "step": 220090 }, { "epoch": 1.7808884213933167, "grad_norm": 0.38020530343055725, "learning_rate": 3.6118199929802034e-07, "loss": 0.0124, "step": 220100 }, { "epoch": 1.7809693340885184, "grad_norm": 0.8110939264297485, "learning_rate": 3.60918552620107e-07, "loss": 0.0301, "step": 220110 }, { "epoch": 1.7810502467837204, "grad_norm": 0.3311648368835449, "learning_rate": 3.606551984588791e-07, "loss": 0.0108, "step": 220120 }, { "epoch": 1.7811311594789223, "grad_norm": 0.49121037125587463, "learning_rate": 3.603919368195896e-07, "loss": 0.0181, "step": 220130 }, { "epoch": 1.781212072174124, "grad_norm": 0.48177534341812134, "learning_rate": 3.601287677074883e-07, "loss": 0.0187, "step": 220140 }, { "epoch": 1.781292984869326, "grad_norm": 0.46853387355804443, "learning_rate": 3.598656911278231e-07, "loss": 0.0219, "step": 220150 }, { "epoch": 1.781373897564528, "grad_norm": 0.14005520939826965, "learning_rate": 3.59602707085841e-07, "loss": 0.0152, "step": 220160 }, { "epoch": 1.7814548102597296, "grad_norm": 0.33165717124938965, "learning_rate": 3.593398155867872e-07, "loss": 0.0244, "step": 220170 }, { "epoch": 1.7815357229549318, "grad_norm": 0.5400751233100891, "learning_rate": 3.590770166359031e-07, "loss": 0.0242, "step": 220180 }, { "epoch": 1.7816166356501335, "grad_norm": 0.1885746717453003, "learning_rate": 3.588143102384312e-07, "loss": 0.0135, "step": 220190 }, { "epoch": 1.7816975483453352, "grad_norm": 0.40404805541038513, "learning_rate": 3.585516963996094e-07, "loss": 0.0199, "step": 220200 }, { "epoch": 1.7817784610405374, "grad_norm": 0.3897843062877655, "learning_rate": 3.582891751246753e-07, "loss": 0.0195, "step": 220210 }, { "epoch": 1.781859373735739, "grad_norm": 0.23965755105018616, "learning_rate": 3.5802674641886524e-07, "loss": 0.0195, "step": 220220 }, { "epoch": 1.781940286430941, "grad_norm": 0.614852786064148, "learning_rate": 3.577644102874106e-07, "loss": 0.0101, "step": 220230 }, { "epoch": 1.782021199126143, "grad_norm": 0.25466278195381165, "learning_rate": 3.575021667355466e-07, "loss": 0.0278, "step": 220240 }, { "epoch": 1.7821021118213447, "grad_norm": 0.4301185607910156, "learning_rate": 3.572400157685002e-07, "loss": 0.0171, "step": 220250 }, { "epoch": 1.7821830245165466, "grad_norm": 0.27135682106018066, "learning_rate": 3.5697795739149943e-07, "loss": 0.0169, "step": 220260 }, { "epoch": 1.7822639372117486, "grad_norm": 0.23243379592895508, "learning_rate": 3.5671599160977346e-07, "loss": 0.0097, "step": 220270 }, { "epoch": 1.7823448499069503, "grad_norm": 0.14558719098567963, "learning_rate": 3.564541184285436e-07, "loss": 0.0113, "step": 220280 }, { "epoch": 1.7824257626021522, "grad_norm": 0.6826800107955933, "learning_rate": 3.561923378530324e-07, "loss": 0.0266, "step": 220290 }, { "epoch": 1.7825066752973542, "grad_norm": 0.6103337407112122, "learning_rate": 3.5593064988846293e-07, "loss": 0.0246, "step": 220300 }, { "epoch": 1.782587587992556, "grad_norm": 0.4256438910961151, "learning_rate": 3.556690545400521e-07, "loss": 0.0123, "step": 220310 }, { "epoch": 1.782668500687758, "grad_norm": 0.46415433287620544, "learning_rate": 3.5540755181301736e-07, "loss": 0.0114, "step": 220320 }, { "epoch": 1.7827494133829598, "grad_norm": 0.20142170786857605, "learning_rate": 3.55146141712574e-07, "loss": 0.0139, "step": 220330 }, { "epoch": 1.7828303260781615, "grad_norm": 0.48262616991996765, "learning_rate": 3.5488482424393457e-07, "loss": 0.0125, "step": 220340 }, { "epoch": 1.7829112387733637, "grad_norm": 0.3930649161338806, "learning_rate": 3.546235994123115e-07, "loss": 0.0228, "step": 220350 }, { "epoch": 1.7829921514685654, "grad_norm": 0.6880437135696411, "learning_rate": 3.54362467222914e-07, "loss": 0.0305, "step": 220360 }, { "epoch": 1.7830730641637673, "grad_norm": 0.5300248861312866, "learning_rate": 3.5410142768094955e-07, "loss": 0.0231, "step": 220370 }, { "epoch": 1.7831539768589693, "grad_norm": 0.290122389793396, "learning_rate": 3.538404807916246e-07, "loss": 0.0195, "step": 220380 }, { "epoch": 1.783234889554171, "grad_norm": 0.1520773321390152, "learning_rate": 3.5357962656014213e-07, "loss": 0.0123, "step": 220390 }, { "epoch": 1.783315802249373, "grad_norm": 0.421190470457077, "learning_rate": 3.533188649917052e-07, "loss": 0.0228, "step": 220400 }, { "epoch": 1.7833967149445749, "grad_norm": 0.2966664731502533, "learning_rate": 3.5305819609151416e-07, "loss": 0.0174, "step": 220410 }, { "epoch": 1.7834776276397766, "grad_norm": 0.48971784114837646, "learning_rate": 3.5279761986476646e-07, "loss": 0.0127, "step": 220420 }, { "epoch": 1.7835585403349785, "grad_norm": 0.3320268988609314, "learning_rate": 3.5253713631666077e-07, "loss": 0.015, "step": 220430 }, { "epoch": 1.7836394530301805, "grad_norm": 0.2171277552843094, "learning_rate": 3.522767454523901e-07, "loss": 0.0191, "step": 220440 }, { "epoch": 1.7837203657253822, "grad_norm": 0.38877370953559875, "learning_rate": 3.5201644727714757e-07, "loss": 0.0143, "step": 220450 }, { "epoch": 1.7838012784205843, "grad_norm": 0.1412612944841385, "learning_rate": 3.517562417961257e-07, "loss": 0.0187, "step": 220460 }, { "epoch": 1.783882191115786, "grad_norm": 0.9155945181846619, "learning_rate": 3.5149612901451193e-07, "loss": 0.0263, "step": 220470 }, { "epoch": 1.783963103810988, "grad_norm": 0.3294931650161743, "learning_rate": 3.512361089374938e-07, "loss": 0.0147, "step": 220480 }, { "epoch": 1.78404401650619, "grad_norm": 0.18744204938411713, "learning_rate": 3.5097618157025837e-07, "loss": 0.0088, "step": 220490 }, { "epoch": 1.7841249292013917, "grad_norm": 0.3558445870876312, "learning_rate": 3.507163469179886e-07, "loss": 0.015, "step": 220500 }, { "epoch": 1.7842058418965936, "grad_norm": 0.4138406217098236, "learning_rate": 3.50456604985866e-07, "loss": 0.0154, "step": 220510 }, { "epoch": 1.7842867545917955, "grad_norm": 0.25393810868263245, "learning_rate": 3.5019695577907075e-07, "loss": 0.0207, "step": 220520 }, { "epoch": 1.7843676672869972, "grad_norm": 0.21626001596450806, "learning_rate": 3.4993739930278104e-07, "loss": 0.0219, "step": 220530 }, { "epoch": 1.7844485799821992, "grad_norm": 0.2893652617931366, "learning_rate": 3.4967793556217324e-07, "loss": 0.0182, "step": 220540 }, { "epoch": 1.7845294926774011, "grad_norm": 0.19516682624816895, "learning_rate": 3.494185645624221e-07, "loss": 0.0256, "step": 220550 }, { "epoch": 1.7846104053726028, "grad_norm": 0.3157002329826355, "learning_rate": 3.4915928630869956e-07, "loss": 0.0185, "step": 220560 }, { "epoch": 1.7846913180678048, "grad_norm": 0.3661162257194519, "learning_rate": 3.489001008061765e-07, "loss": 0.0117, "step": 220570 }, { "epoch": 1.7847722307630067, "grad_norm": 0.18816246092319489, "learning_rate": 3.486410080600222e-07, "loss": 0.0104, "step": 220580 }, { "epoch": 1.7848531434582084, "grad_norm": 0.46801093220710754, "learning_rate": 3.4838200807540354e-07, "loss": 0.0194, "step": 220590 }, { "epoch": 1.7849340561534106, "grad_norm": 0.21977677941322327, "learning_rate": 3.481231008574859e-07, "loss": 0.0185, "step": 220600 }, { "epoch": 1.7850149688486123, "grad_norm": 0.4191223680973053, "learning_rate": 3.478642864114323e-07, "loss": 0.0177, "step": 220610 }, { "epoch": 1.7850958815438143, "grad_norm": 0.4860753118991852, "learning_rate": 3.4760556474240424e-07, "loss": 0.0152, "step": 220620 }, { "epoch": 1.7851767942390162, "grad_norm": 0.3283984065055847, "learning_rate": 3.47346935855562e-07, "loss": 0.0166, "step": 220630 }, { "epoch": 1.785257706934218, "grad_norm": 0.3626807928085327, "learning_rate": 3.4708839975606143e-07, "loss": 0.0251, "step": 220640 }, { "epoch": 1.7853386196294199, "grad_norm": 0.12426600605249405, "learning_rate": 3.468299564490613e-07, "loss": 0.0106, "step": 220650 }, { "epoch": 1.7854195323246218, "grad_norm": 0.35582780838012695, "learning_rate": 3.4657160593971504e-07, "loss": 0.0247, "step": 220660 }, { "epoch": 1.7855004450198235, "grad_norm": 0.007757094223052263, "learning_rate": 3.463133482331732e-07, "loss": 0.0118, "step": 220670 }, { "epoch": 1.7855813577150255, "grad_norm": 0.47793859243392944, "learning_rate": 3.4605518333458766e-07, "loss": 0.0248, "step": 220680 }, { "epoch": 1.7856622704102274, "grad_norm": 0.8217606544494629, "learning_rate": 3.457971112491071e-07, "loss": 0.0188, "step": 220690 }, { "epoch": 1.7857431831054291, "grad_norm": 0.3035918176174164, "learning_rate": 3.455391319818763e-07, "loss": 0.0226, "step": 220700 }, { "epoch": 1.7858240958006313, "grad_norm": 0.18738768994808197, "learning_rate": 3.452812455380422e-07, "loss": 0.0161, "step": 220710 }, { "epoch": 1.785905008495833, "grad_norm": 0.3745638430118561, "learning_rate": 3.4502345192274745e-07, "loss": 0.0125, "step": 220720 }, { "epoch": 1.7859859211910347, "grad_norm": 0.20773182809352875, "learning_rate": 3.4476575114113287e-07, "loss": 0.0224, "step": 220730 }, { "epoch": 1.7860668338862369, "grad_norm": 0.508319079875946, "learning_rate": 3.445081431983377e-07, "loss": 0.0172, "step": 220740 }, { "epoch": 1.7861477465814386, "grad_norm": 0.2961225211620331, "learning_rate": 3.442506280994989e-07, "loss": 0.0189, "step": 220750 }, { "epoch": 1.7862286592766405, "grad_norm": 0.04343046247959137, "learning_rate": 3.4399320584975307e-07, "loss": 0.0129, "step": 220760 }, { "epoch": 1.7863095719718425, "grad_norm": 0.025721527636051178, "learning_rate": 3.4373587645423365e-07, "loss": 0.0192, "step": 220770 }, { "epoch": 1.7863904846670442, "grad_norm": 0.4364013075828552, "learning_rate": 3.4347863991807174e-07, "loss": 0.0131, "step": 220780 }, { "epoch": 1.7864713973622461, "grad_norm": 0.6152388453483582, "learning_rate": 3.4322149624639865e-07, "loss": 0.0224, "step": 220790 }, { "epoch": 1.786552310057448, "grad_norm": 0.015307690016925335, "learning_rate": 3.429644454443415e-07, "loss": 0.0181, "step": 220800 }, { "epoch": 1.7866332227526498, "grad_norm": 0.44052618741989136, "learning_rate": 3.427074875170272e-07, "loss": 0.0177, "step": 220810 }, { "epoch": 1.7867141354478517, "grad_norm": 0.5386049151420593, "learning_rate": 3.4245062246958006e-07, "loss": 0.0138, "step": 220820 }, { "epoch": 1.7867950481430537, "grad_norm": 0.2591244578361511, "learning_rate": 3.4219385030712315e-07, "loss": 0.0153, "step": 220830 }, { "epoch": 1.7868759608382554, "grad_norm": 0.6126967072486877, "learning_rate": 3.419371710347752e-07, "loss": 0.0412, "step": 220840 }, { "epoch": 1.7869568735334576, "grad_norm": 0.46808817982673645, "learning_rate": 3.416805846576593e-07, "loss": 0.0153, "step": 220850 }, { "epoch": 1.7870377862286593, "grad_norm": 0.5104411244392395, "learning_rate": 3.414240911808875e-07, "loss": 0.0211, "step": 220860 }, { "epoch": 1.787118698923861, "grad_norm": 0.29440435767173767, "learning_rate": 3.41167690609579e-07, "loss": 0.0177, "step": 220870 }, { "epoch": 1.7871996116190632, "grad_norm": 0.29192426800727844, "learning_rate": 3.4091138294884587e-07, "loss": 0.0188, "step": 220880 }, { "epoch": 1.7872805243142649, "grad_norm": 0.13701987266540527, "learning_rate": 3.406551682037984e-07, "loss": 0.0145, "step": 220890 }, { "epoch": 1.7873614370094668, "grad_norm": 0.4124860167503357, "learning_rate": 3.4039904637954756e-07, "loss": 0.0178, "step": 220900 }, { "epoch": 1.7874423497046688, "grad_norm": 0.4870074987411499, "learning_rate": 3.401430174812015e-07, "loss": 0.0198, "step": 220910 }, { "epoch": 1.7875232623998705, "grad_norm": 0.3503803014755249, "learning_rate": 3.3988708151386493e-07, "loss": 0.0129, "step": 220920 }, { "epoch": 1.7876041750950724, "grad_norm": 0.28794118762016296, "learning_rate": 3.3963123848264337e-07, "loss": 0.0112, "step": 220930 }, { "epoch": 1.7876850877902744, "grad_norm": 0.08422495424747467, "learning_rate": 3.3937548839263757e-07, "loss": 0.0122, "step": 220940 }, { "epoch": 1.787766000485476, "grad_norm": 0.302859902381897, "learning_rate": 3.3911983124894855e-07, "loss": 0.018, "step": 220950 }, { "epoch": 1.787846913180678, "grad_norm": 0.5832112431526184, "learning_rate": 3.3886426705667553e-07, "loss": 0.0222, "step": 220960 }, { "epoch": 1.78792782587588, "grad_norm": 0.4656238555908203, "learning_rate": 3.3860879582091443e-07, "loss": 0.0188, "step": 220970 }, { "epoch": 1.7880087385710817, "grad_norm": 0.3139951229095459, "learning_rate": 3.383534175467601e-07, "loss": 0.0147, "step": 220980 }, { "epoch": 1.7880896512662838, "grad_norm": 0.5638482570648193, "learning_rate": 3.3809813223930564e-07, "loss": 0.0277, "step": 220990 }, { "epoch": 1.7881705639614855, "grad_norm": 0.12379739433526993, "learning_rate": 3.378429399036426e-07, "loss": 0.0141, "step": 221000 }, { "epoch": 1.7882514766566875, "grad_norm": 0.5088558793067932, "learning_rate": 3.375878405448596e-07, "loss": 0.0239, "step": 221010 }, { "epoch": 1.7883323893518894, "grad_norm": 0.25436317920684814, "learning_rate": 3.373328341680443e-07, "loss": 0.0159, "step": 221020 }, { "epoch": 1.7884133020470911, "grad_norm": 0.2566220462322235, "learning_rate": 3.370779207782815e-07, "loss": 0.0173, "step": 221030 }, { "epoch": 1.788494214742293, "grad_norm": 0.08281677216291428, "learning_rate": 3.368231003806571e-07, "loss": 0.0333, "step": 221040 }, { "epoch": 1.788575127437495, "grad_norm": 0.27145278453826904, "learning_rate": 3.3656837298025045e-07, "loss": 0.0211, "step": 221050 }, { "epoch": 1.7886560401326967, "grad_norm": 0.21575233340263367, "learning_rate": 3.36313738582143e-07, "loss": 0.0269, "step": 221060 }, { "epoch": 1.7887369528278987, "grad_norm": 0.48284047842025757, "learning_rate": 3.36059197191414e-07, "loss": 0.0172, "step": 221070 }, { "epoch": 1.7888178655231006, "grad_norm": 0.22609832882881165, "learning_rate": 3.3580474881313606e-07, "loss": 0.0204, "step": 221080 }, { "epoch": 1.7888987782183023, "grad_norm": 0.45813068747520447, "learning_rate": 3.355503934523868e-07, "loss": 0.0222, "step": 221090 }, { "epoch": 1.7889796909135043, "grad_norm": 0.3928792476654053, "learning_rate": 3.3529613111423884e-07, "loss": 0.0169, "step": 221100 }, { "epoch": 1.7890606036087062, "grad_norm": 0.4712304472923279, "learning_rate": 3.350419618037598e-07, "loss": 0.0218, "step": 221110 }, { "epoch": 1.789141516303908, "grad_norm": 0.10375110059976578, "learning_rate": 3.347878855260223e-07, "loss": 0.0146, "step": 221120 }, { "epoch": 1.78922242899911, "grad_norm": 0.3899035155773163, "learning_rate": 3.345339022860911e-07, "loss": 0.0172, "step": 221130 }, { "epoch": 1.7893033416943118, "grad_norm": 0.5609198808670044, "learning_rate": 3.3428001208903215e-07, "loss": 0.0139, "step": 221140 }, { "epoch": 1.7893842543895138, "grad_norm": 0.3904474675655365, "learning_rate": 3.3402621493990874e-07, "loss": 0.0207, "step": 221150 }, { "epoch": 1.7894651670847157, "grad_norm": 0.6856539249420166, "learning_rate": 3.337725108437817e-07, "loss": 0.0115, "step": 221160 }, { "epoch": 1.7895460797799174, "grad_norm": 0.025794025510549545, "learning_rate": 3.335188998057115e-07, "loss": 0.0136, "step": 221170 }, { "epoch": 1.7896269924751194, "grad_norm": 0.45004886388778687, "learning_rate": 3.332653818307557e-07, "loss": 0.0117, "step": 221180 }, { "epoch": 1.7897079051703213, "grad_norm": 0.3067459166049957, "learning_rate": 3.3301195692396873e-07, "loss": 0.0204, "step": 221190 }, { "epoch": 1.789788817865523, "grad_norm": 0.30194416642189026, "learning_rate": 3.3275862509040744e-07, "loss": 0.0272, "step": 221200 }, { "epoch": 1.789869730560725, "grad_norm": 0.5795177221298218, "learning_rate": 3.3250538633512186e-07, "loss": 0.0188, "step": 221210 }, { "epoch": 1.789950643255927, "grad_norm": 0.4449652135372162, "learning_rate": 3.3225224066316173e-07, "loss": 0.0136, "step": 221220 }, { "epoch": 1.7900315559511286, "grad_norm": 0.06838895380496979, "learning_rate": 3.319991880795781e-07, "loss": 0.0172, "step": 221230 }, { "epoch": 1.7901124686463306, "grad_norm": 0.674285888671875, "learning_rate": 3.3174622858941573e-07, "loss": 0.0206, "step": 221240 }, { "epoch": 1.7901933813415325, "grad_norm": 0.6338257193565369, "learning_rate": 3.3149336219771843e-07, "loss": 0.022, "step": 221250 }, { "epoch": 1.7902742940367342, "grad_norm": 0.35035109519958496, "learning_rate": 3.312405889095327e-07, "loss": 0.0181, "step": 221260 }, { "epoch": 1.7903552067319364, "grad_norm": 0.20615385472774506, "learning_rate": 3.309879087298951e-07, "loss": 0.0223, "step": 221270 }, { "epoch": 1.790436119427138, "grad_norm": 0.3193471133708954, "learning_rate": 3.307353216638476e-07, "loss": 0.0264, "step": 221280 }, { "epoch": 1.79051703212234, "grad_norm": 0.5192899107933044, "learning_rate": 3.3048282771642795e-07, "loss": 0.0137, "step": 221290 }, { "epoch": 1.790597944817542, "grad_norm": 0.4728534519672394, "learning_rate": 3.302304268926687e-07, "loss": 0.0205, "step": 221300 }, { "epoch": 1.7906788575127437, "grad_norm": 0.32833796739578247, "learning_rate": 3.299781191976065e-07, "loss": 0.0257, "step": 221310 }, { "epoch": 1.7907597702079456, "grad_norm": 0.4008313715457916, "learning_rate": 3.297259046362722e-07, "loss": 0.0223, "step": 221320 }, { "epoch": 1.7908406829031476, "grad_norm": 0.2199205607175827, "learning_rate": 3.294737832136935e-07, "loss": 0.0274, "step": 221330 }, { "epoch": 1.7909215955983493, "grad_norm": 0.33015871047973633, "learning_rate": 3.2922175493490196e-07, "loss": 0.0118, "step": 221340 }, { "epoch": 1.7910025082935512, "grad_norm": 0.6768724918365479, "learning_rate": 3.2896981980492127e-07, "loss": 0.0205, "step": 221350 }, { "epoch": 1.7910834209887532, "grad_norm": 0.35412049293518066, "learning_rate": 3.287179778287769e-07, "loss": 0.0167, "step": 221360 }, { "epoch": 1.791164333683955, "grad_norm": 0.4094889461994171, "learning_rate": 3.2846622901149096e-07, "loss": 0.012, "step": 221370 }, { "epoch": 1.791245246379157, "grad_norm": 0.5916716456413269, "learning_rate": 3.2821457335808383e-07, "loss": 0.0237, "step": 221380 }, { "epoch": 1.7913261590743588, "grad_norm": 0.4701899290084839, "learning_rate": 3.279630108735748e-07, "loss": 0.014, "step": 221390 }, { "epoch": 1.7914070717695605, "grad_norm": 0.31063568592071533, "learning_rate": 3.2771154156298e-07, "loss": 0.0229, "step": 221400 }, { "epoch": 1.7914879844647627, "grad_norm": 0.6130534410476685, "learning_rate": 3.274601654313142e-07, "loss": 0.0139, "step": 221410 }, { "epoch": 1.7915688971599644, "grad_norm": 0.6165128946304321, "learning_rate": 3.272088824835928e-07, "loss": 0.0205, "step": 221420 }, { "epoch": 1.7916498098551663, "grad_norm": 0.41372376680374146, "learning_rate": 3.2695769272482526e-07, "loss": 0.0218, "step": 221430 }, { "epoch": 1.7917307225503682, "grad_norm": 0.33248454332351685, "learning_rate": 3.267065961600202e-07, "loss": 0.0162, "step": 221440 }, { "epoch": 1.79181163524557, "grad_norm": 0.2494441270828247, "learning_rate": 3.264555927941876e-07, "loss": 0.02, "step": 221450 }, { "epoch": 1.791892547940772, "grad_norm": 0.28971830010414124, "learning_rate": 3.262046826323306e-07, "loss": 0.0181, "step": 221460 }, { "epoch": 1.7919734606359738, "grad_norm": 0.09735699743032455, "learning_rate": 3.2595386567945475e-07, "loss": 0.0084, "step": 221470 }, { "epoch": 1.7920543733311756, "grad_norm": 0.42335787415504456, "learning_rate": 3.257031419405632e-07, "loss": 0.021, "step": 221480 }, { "epoch": 1.7921352860263775, "grad_norm": 0.3375087380409241, "learning_rate": 3.254525114206525e-07, "loss": 0.0167, "step": 221490 }, { "epoch": 1.7922161987215794, "grad_norm": 0.3873060345649719, "learning_rate": 3.252019741247242e-07, "loss": 0.0159, "step": 221500 }, { "epoch": 1.7922971114167812, "grad_norm": 0.10337238758802414, "learning_rate": 3.2495153005777437e-07, "loss": 0.0159, "step": 221510 }, { "epoch": 1.7923780241119833, "grad_norm": 0.2846023142337799, "learning_rate": 3.247011792247945e-07, "loss": 0.0126, "step": 221520 }, { "epoch": 1.792458936807185, "grad_norm": 0.4609564542770386, "learning_rate": 3.244509216307812e-07, "loss": 0.0212, "step": 221530 }, { "epoch": 1.7925398495023868, "grad_norm": 0.15380558371543884, "learning_rate": 3.2420075728072376e-07, "loss": 0.0137, "step": 221540 }, { "epoch": 1.792620762197589, "grad_norm": 0.4507937431335449, "learning_rate": 3.2395068617961046e-07, "loss": 0.0117, "step": 221550 }, { "epoch": 1.7927016748927906, "grad_norm": 0.25799205899238586, "learning_rate": 3.2370070833243004e-07, "loss": 0.0079, "step": 221560 }, { "epoch": 1.7927825875879926, "grad_norm": 0.45917314291000366, "learning_rate": 3.234508237441664e-07, "loss": 0.0184, "step": 221570 }, { "epoch": 1.7928635002831945, "grad_norm": 0.3599154055118561, "learning_rate": 3.2320103241980315e-07, "loss": 0.0133, "step": 221580 }, { "epoch": 1.7929444129783962, "grad_norm": 0.2258649617433548, "learning_rate": 3.2295133436432257e-07, "loss": 0.0202, "step": 221590 }, { "epoch": 1.7930253256735982, "grad_norm": 0.5234658718109131, "learning_rate": 3.227017295827034e-07, "loss": 0.0156, "step": 221600 }, { "epoch": 1.7931062383688001, "grad_norm": 0.545097827911377, "learning_rate": 3.22452218079925e-07, "loss": 0.0167, "step": 221610 }, { "epoch": 1.7931871510640018, "grad_norm": 0.3442240357398987, "learning_rate": 3.222027998609617e-07, "loss": 0.0127, "step": 221620 }, { "epoch": 1.7932680637592038, "grad_norm": 0.1640186458826065, "learning_rate": 3.219534749307879e-07, "loss": 0.01, "step": 221630 }, { "epoch": 1.7933489764544057, "grad_norm": 0.2840384542942047, "learning_rate": 3.217042432943773e-07, "loss": 0.0189, "step": 221640 }, { "epoch": 1.7934298891496074, "grad_norm": 0.5923588871955872, "learning_rate": 3.2145510495669883e-07, "loss": 0.0215, "step": 221650 }, { "epoch": 1.7935108018448096, "grad_norm": 0.3374970555305481, "learning_rate": 3.212060599227207e-07, "loss": 0.0119, "step": 221660 }, { "epoch": 1.7935917145400113, "grad_norm": 0.9656815528869629, "learning_rate": 3.2095710819741164e-07, "loss": 0.016, "step": 221670 }, { "epoch": 1.7936726272352133, "grad_norm": 0.4409258961677551, "learning_rate": 3.207082497857339e-07, "loss": 0.0167, "step": 221680 }, { "epoch": 1.7937535399304152, "grad_norm": 0.8396209478378296, "learning_rate": 3.2045948469265233e-07, "loss": 0.0139, "step": 221690 }, { "epoch": 1.793834452625617, "grad_norm": 0.3658197224140167, "learning_rate": 3.2021081292312797e-07, "loss": 0.0205, "step": 221700 }, { "epoch": 1.7939153653208189, "grad_norm": 0.3783232867717743, "learning_rate": 3.1996223448211794e-07, "loss": 0.0164, "step": 221710 }, { "epoch": 1.7939962780160208, "grad_norm": 0.21871984004974365, "learning_rate": 3.197137493745822e-07, "loss": 0.0091, "step": 221720 }, { "epoch": 1.7940771907112225, "grad_norm": 0.3727802634239197, "learning_rate": 3.194653576054757e-07, "loss": 0.0169, "step": 221730 }, { "epoch": 1.7941581034064245, "grad_norm": 0.2808331847190857, "learning_rate": 3.1921705917975055e-07, "loss": 0.022, "step": 221740 }, { "epoch": 1.7942390161016264, "grad_norm": 0.32209789752960205, "learning_rate": 3.1896885410236e-07, "loss": 0.0243, "step": 221750 }, { "epoch": 1.7943199287968281, "grad_norm": 0.822928249835968, "learning_rate": 3.1872074237825344e-07, "loss": 0.0164, "step": 221760 }, { "epoch": 1.79440084149203, "grad_norm": 0.07279179990291595, "learning_rate": 3.184727240123792e-07, "loss": 0.0148, "step": 221770 }, { "epoch": 1.794481754187232, "grad_norm": 0.44830936193466187, "learning_rate": 3.1822479900968317e-07, "loss": 0.0226, "step": 221780 }, { "epoch": 1.7945626668824337, "grad_norm": 0.3639329671859741, "learning_rate": 3.179769673751104e-07, "loss": 0.0158, "step": 221790 }, { "epoch": 1.7946435795776359, "grad_norm": 0.2962827682495117, "learning_rate": 3.177292291136019e-07, "loss": 0.0116, "step": 221800 }, { "epoch": 1.7947244922728376, "grad_norm": 0.6174368858337402, "learning_rate": 3.1748158423009987e-07, "loss": 0.0137, "step": 221810 }, { "epoch": 1.7948054049680395, "grad_norm": 0.3638637959957123, "learning_rate": 3.1723403272954143e-07, "loss": 0.023, "step": 221820 }, { "epoch": 1.7948863176632415, "grad_norm": 0.5073443651199341, "learning_rate": 3.1698657461686656e-07, "loss": 0.0213, "step": 221830 }, { "epoch": 1.7949672303584432, "grad_norm": 0.4694962501525879, "learning_rate": 3.167392098970068e-07, "loss": 0.0164, "step": 221840 }, { "epoch": 1.7950481430536451, "grad_norm": 0.4169379770755768, "learning_rate": 3.16491938574896e-07, "loss": 0.0196, "step": 221850 }, { "epoch": 1.795129055748847, "grad_norm": 0.08176698535680771, "learning_rate": 3.162447606554675e-07, "loss": 0.0092, "step": 221860 }, { "epoch": 1.7952099684440488, "grad_norm": 0.3870357871055603, "learning_rate": 3.1599767614364896e-07, "loss": 0.0205, "step": 221870 }, { "epoch": 1.7952908811392507, "grad_norm": 0.07445493340492249, "learning_rate": 3.157506850443676e-07, "loss": 0.0106, "step": 221880 }, { "epoch": 1.7953717938344527, "grad_norm": 0.5218777060508728, "learning_rate": 3.155037873625516e-07, "loss": 0.0144, "step": 221890 }, { "epoch": 1.7954527065296544, "grad_norm": 0.6663287281990051, "learning_rate": 3.152569831031216e-07, "loss": 0.0175, "step": 221900 }, { "epoch": 1.7955336192248563, "grad_norm": 1.0485366582870483, "learning_rate": 3.1501027227100136e-07, "loss": 0.0192, "step": 221910 }, { "epoch": 1.7956145319200583, "grad_norm": 0.27545636892318726, "learning_rate": 3.1476365487111136e-07, "loss": 0.0105, "step": 221920 }, { "epoch": 1.79569544461526, "grad_norm": 0.247147336602211, "learning_rate": 3.145171309083689e-07, "loss": 0.0099, "step": 221930 }, { "epoch": 1.7957763573104621, "grad_norm": 0.2620653212070465, "learning_rate": 3.14270700387691e-07, "loss": 0.0219, "step": 221940 }, { "epoch": 1.7958572700056639, "grad_norm": 0.3001648783683777, "learning_rate": 3.140243633139922e-07, "loss": 0.0155, "step": 221950 }, { "epoch": 1.7959381827008658, "grad_norm": 0.7589752078056335, "learning_rate": 3.1377811969218463e-07, "loss": 0.026, "step": 221960 }, { "epoch": 1.7960190953960677, "grad_norm": 0.06488263607025146, "learning_rate": 3.1353196952717935e-07, "loss": 0.018, "step": 221970 }, { "epoch": 1.7961000080912695, "grad_norm": 0.26754072308540344, "learning_rate": 3.132859128238858e-07, "loss": 0.0124, "step": 221980 }, { "epoch": 1.7961809207864714, "grad_norm": 0.4209173023700714, "learning_rate": 3.1303994958720996e-07, "loss": 0.0238, "step": 221990 }, { "epoch": 1.7962618334816733, "grad_norm": 0.34056153893470764, "learning_rate": 3.12794079822058e-07, "loss": 0.0184, "step": 222000 }, { "epoch": 1.796342746176875, "grad_norm": 0.41408398747444153, "learning_rate": 3.1254830353333267e-07, "loss": 0.0235, "step": 222010 }, { "epoch": 1.796423658872077, "grad_norm": 0.48425760865211487, "learning_rate": 3.1230262072593666e-07, "loss": 0.0166, "step": 222020 }, { "epoch": 1.796504571567279, "grad_norm": 0.04275006800889969, "learning_rate": 3.120570314047683e-07, "loss": 0.0143, "step": 222030 }, { "epoch": 1.7965854842624807, "grad_norm": 0.6044994592666626, "learning_rate": 3.1181153557472477e-07, "loss": 0.0262, "step": 222040 }, { "epoch": 1.7966663969576828, "grad_norm": 0.20903551578521729, "learning_rate": 3.1156613324070493e-07, "loss": 0.0183, "step": 222050 }, { "epoch": 1.7967473096528845, "grad_norm": 0.5610242486000061, "learning_rate": 3.113208244075994e-07, "loss": 0.0151, "step": 222060 }, { "epoch": 1.7968282223480863, "grad_norm": 0.1910293847322464, "learning_rate": 3.1107560908030133e-07, "loss": 0.013, "step": 222070 }, { "epoch": 1.7969091350432884, "grad_norm": 0.7517403364181519, "learning_rate": 3.108304872637036e-07, "loss": 0.0225, "step": 222080 }, { "epoch": 1.7969900477384901, "grad_norm": 0.3947615623474121, "learning_rate": 3.1058545896268997e-07, "loss": 0.0106, "step": 222090 }, { "epoch": 1.797070960433692, "grad_norm": 0.2618544399738312, "learning_rate": 3.103405241821511e-07, "loss": 0.0134, "step": 222100 }, { "epoch": 1.797151873128894, "grad_norm": 0.07145559787750244, "learning_rate": 3.100956829269697e-07, "loss": 0.0061, "step": 222110 }, { "epoch": 1.7972327858240957, "grad_norm": 0.28444480895996094, "learning_rate": 3.098509352020296e-07, "loss": 0.0122, "step": 222120 }, { "epoch": 1.7973136985192977, "grad_norm": 0.14288800954818726, "learning_rate": 3.096062810122108e-07, "loss": 0.014, "step": 222130 }, { "epoch": 1.7973946112144996, "grad_norm": 0.28797101974487305, "learning_rate": 3.093617203623928e-07, "loss": 0.0154, "step": 222140 }, { "epoch": 1.7974755239097013, "grad_norm": 0.4494587779045105, "learning_rate": 3.091172532574532e-07, "loss": 0.0169, "step": 222150 }, { "epoch": 1.7975564366049033, "grad_norm": 0.5118721723556519, "learning_rate": 3.088728797022672e-07, "loss": 0.0234, "step": 222160 }, { "epoch": 1.7976373493001052, "grad_norm": 0.3653031587600708, "learning_rate": 3.08628599701708e-07, "loss": 0.0193, "step": 222170 }, { "epoch": 1.797718261995307, "grad_norm": 0.33670368790626526, "learning_rate": 3.0838441326064726e-07, "loss": 0.0167, "step": 222180 }, { "epoch": 1.797799174690509, "grad_norm": 0.5172804594039917, "learning_rate": 3.08140320383955e-07, "loss": 0.0119, "step": 222190 }, { "epoch": 1.7978800873857108, "grad_norm": 0.46231725811958313, "learning_rate": 3.0789632107649956e-07, "loss": 0.0206, "step": 222200 }, { "epoch": 1.7979610000809125, "grad_norm": 0.4346323609352112, "learning_rate": 3.076524153431459e-07, "loss": 0.0373, "step": 222210 }, { "epoch": 1.7980419127761147, "grad_norm": 0.6597572565078735, "learning_rate": 3.074086031887591e-07, "loss": 0.0339, "step": 222220 }, { "epoch": 1.7981228254713164, "grad_norm": 0.48994508385658264, "learning_rate": 3.071648846182007e-07, "loss": 0.0164, "step": 222230 }, { "epoch": 1.7982037381665184, "grad_norm": 0.12718068063259125, "learning_rate": 3.0692125963633304e-07, "loss": 0.0123, "step": 222240 }, { "epoch": 1.7982846508617203, "grad_norm": 0.3069879412651062, "learning_rate": 3.066777282480127e-07, "loss": 0.0132, "step": 222250 }, { "epoch": 1.798365563556922, "grad_norm": 0.2787042260169983, "learning_rate": 3.064342904580958e-07, "loss": 0.0191, "step": 222260 }, { "epoch": 1.798446476252124, "grad_norm": 0.18216678500175476, "learning_rate": 3.061909462714402e-07, "loss": 0.0142, "step": 222270 }, { "epoch": 1.798527388947326, "grad_norm": 0.3802392780780792, "learning_rate": 3.059476956928964e-07, "loss": 0.0195, "step": 222280 }, { "epoch": 1.7986083016425276, "grad_norm": 0.43773287534713745, "learning_rate": 3.057045387273155e-07, "loss": 0.0235, "step": 222290 }, { "epoch": 1.7986892143377295, "grad_norm": 0.20320159196853638, "learning_rate": 3.054614753795482e-07, "loss": 0.0205, "step": 222300 }, { "epoch": 1.7987701270329315, "grad_norm": 0.3309265077114105, "learning_rate": 3.05218505654441e-07, "loss": 0.0146, "step": 222310 }, { "epoch": 1.7988510397281332, "grad_norm": 0.1911391019821167, "learning_rate": 3.0497562955683956e-07, "loss": 0.022, "step": 222320 }, { "epoch": 1.7989319524233354, "grad_norm": 0.15051403641700745, "learning_rate": 3.047328470915878e-07, "loss": 0.0077, "step": 222330 }, { "epoch": 1.799012865118537, "grad_norm": 0.3967221975326538, "learning_rate": 3.0449015826352734e-07, "loss": 0.0128, "step": 222340 }, { "epoch": 1.799093777813739, "grad_norm": 0.28098204731941223, "learning_rate": 3.042475630774977e-07, "loss": 0.0171, "step": 222350 }, { "epoch": 1.799174690508941, "grad_norm": 0.10822883993387222, "learning_rate": 3.040050615383372e-07, "loss": 0.0239, "step": 222360 }, { "epoch": 1.7992556032041427, "grad_norm": 0.23780719935894012, "learning_rate": 3.037626536508825e-07, "loss": 0.0102, "step": 222370 }, { "epoch": 1.7993365158993446, "grad_norm": 0.3773810863494873, "learning_rate": 3.0352033941996705e-07, "loss": 0.0239, "step": 222380 }, { "epoch": 1.7994174285945466, "grad_norm": 0.41642045974731445, "learning_rate": 3.0327811885042415e-07, "loss": 0.0188, "step": 222390 }, { "epoch": 1.7994983412897483, "grad_norm": 0.3981183171272278, "learning_rate": 3.030359919470838e-07, "loss": 0.0191, "step": 222400 }, { "epoch": 1.7995792539849502, "grad_norm": 0.25838643312454224, "learning_rate": 3.0279395871477435e-07, "loss": 0.0168, "step": 222410 }, { "epoch": 1.7996601666801522, "grad_norm": 0.48802316188812256, "learning_rate": 3.025520191583237e-07, "loss": 0.0224, "step": 222420 }, { "epoch": 1.7997410793753539, "grad_norm": 0.6168241500854492, "learning_rate": 3.0231017328255617e-07, "loss": 0.0177, "step": 222430 }, { "epoch": 1.7998219920705558, "grad_norm": 0.42002224922180176, "learning_rate": 3.020684210922947e-07, "loss": 0.018, "step": 222440 }, { "epoch": 1.7999029047657578, "grad_norm": 0.2783769369125366, "learning_rate": 3.018267625923604e-07, "loss": 0.0171, "step": 222450 }, { "epoch": 1.7999838174609595, "grad_norm": 0.4445739984512329, "learning_rate": 3.0158519778757387e-07, "loss": 0.015, "step": 222460 }, { "epoch": 1.8000647301561616, "grad_norm": 0.6535977721214294, "learning_rate": 3.013437266827524e-07, "loss": 0.0239, "step": 222470 }, { "epoch": 1.8001456428513634, "grad_norm": 0.38056305050849915, "learning_rate": 3.011023492827092e-07, "loss": 0.0152, "step": 222480 }, { "epoch": 1.8002265555465653, "grad_norm": 0.531343400478363, "learning_rate": 3.008610655922606e-07, "loss": 0.0124, "step": 222490 }, { "epoch": 1.8003074682417672, "grad_norm": 0.25111910700798035, "learning_rate": 3.006198756162182e-07, "loss": 0.0142, "step": 222500 }, { "epoch": 1.800388380936969, "grad_norm": 0.4826146364212036, "learning_rate": 3.0037877935939096e-07, "loss": 0.014, "step": 222510 }, { "epoch": 1.800469293632171, "grad_norm": 0.504303514957428, "learning_rate": 3.0013777682658787e-07, "loss": 0.0267, "step": 222520 }, { "epoch": 1.8005502063273728, "grad_norm": 0.5793650150299072, "learning_rate": 2.99896868022615e-07, "loss": 0.0122, "step": 222530 }, { "epoch": 1.8006311190225746, "grad_norm": 0.3476024866104126, "learning_rate": 2.9965605295227684e-07, "loss": 0.0118, "step": 222540 }, { "epoch": 1.8007120317177765, "grad_norm": 0.12239936739206314, "learning_rate": 2.9941533162037574e-07, "loss": 0.0166, "step": 222550 }, { "epoch": 1.8007929444129784, "grad_norm": 0.17533452808856964, "learning_rate": 2.9917470403171277e-07, "loss": 0.015, "step": 222560 }, { "epoch": 1.8008738571081802, "grad_norm": 0.20200572907924652, "learning_rate": 2.9893417019108585e-07, "loss": 0.0203, "step": 222570 }, { "epoch": 1.8009547698033823, "grad_norm": 0.40414607524871826, "learning_rate": 2.986937301032933e-07, "loss": 0.0178, "step": 222580 }, { "epoch": 1.801035682498584, "grad_norm": 0.4413236975669861, "learning_rate": 2.984533837731285e-07, "loss": 0.0207, "step": 222590 }, { "epoch": 1.8011165951937858, "grad_norm": 0.14257708191871643, "learning_rate": 2.9821313120538653e-07, "loss": 0.0316, "step": 222600 }, { "epoch": 1.801197507888988, "grad_norm": 0.18691758811473846, "learning_rate": 2.979729724048569e-07, "loss": 0.0298, "step": 222610 }, { "epoch": 1.8012784205841896, "grad_norm": 0.5030999779701233, "learning_rate": 2.977329073763302e-07, "loss": 0.0151, "step": 222620 }, { "epoch": 1.8013593332793916, "grad_norm": 0.4374494254589081, "learning_rate": 2.9749293612459374e-07, "loss": 0.0208, "step": 222630 }, { "epoch": 1.8014402459745935, "grad_norm": 0.23930524289608002, "learning_rate": 2.9725305865443255e-07, "loss": 0.0257, "step": 222640 }, { "epoch": 1.8015211586697952, "grad_norm": 0.3852173388004303, "learning_rate": 2.9701327497063226e-07, "loss": 0.0197, "step": 222650 }, { "epoch": 1.8016020713649972, "grad_norm": 0.38704851269721985, "learning_rate": 2.9677358507797405e-07, "loss": 0.019, "step": 222660 }, { "epoch": 1.801682984060199, "grad_norm": 0.41588306427001953, "learning_rate": 2.965339889812363e-07, "loss": 0.0113, "step": 222670 }, { "epoch": 1.8017638967554008, "grad_norm": 0.38437455892562866, "learning_rate": 2.9629448668519913e-07, "loss": 0.0116, "step": 222680 }, { "epoch": 1.8018448094506028, "grad_norm": 0.5497220754623413, "learning_rate": 2.9605507819463976e-07, "loss": 0.0205, "step": 222690 }, { "epoch": 1.8019257221458047, "grad_norm": 0.34999343752861023, "learning_rate": 2.958157635143294e-07, "loss": 0.015, "step": 222700 }, { "epoch": 1.8020066348410064, "grad_norm": 0.1476365327835083, "learning_rate": 2.9557654264904366e-07, "loss": 0.018, "step": 222710 }, { "epoch": 1.8020875475362086, "grad_norm": 0.35604599118232727, "learning_rate": 2.953374156035527e-07, "loss": 0.0246, "step": 222720 }, { "epoch": 1.8021684602314103, "grad_norm": 0.16104929149150848, "learning_rate": 2.950983823826242e-07, "loss": 0.0212, "step": 222730 }, { "epoch": 1.802249372926612, "grad_norm": 0.37787219882011414, "learning_rate": 2.9485944299102677e-07, "loss": 0.0199, "step": 222740 }, { "epoch": 1.8023302856218142, "grad_norm": 0.2058870494365692, "learning_rate": 2.9462059743352424e-07, "loss": 0.0249, "step": 222750 }, { "epoch": 1.802411198317016, "grad_norm": 0.7033275365829468, "learning_rate": 2.943818457148806e-07, "loss": 0.0211, "step": 222760 }, { "epoch": 1.8024921110122178, "grad_norm": 0.16903647780418396, "learning_rate": 2.941431878398571e-07, "loss": 0.0251, "step": 222770 }, { "epoch": 1.8025730237074198, "grad_norm": 0.18253885209560394, "learning_rate": 2.9390462381321314e-07, "loss": 0.025, "step": 222780 }, { "epoch": 1.8026539364026215, "grad_norm": 0.2506721019744873, "learning_rate": 2.936661536397062e-07, "loss": 0.0123, "step": 222790 }, { "epoch": 1.8027348490978234, "grad_norm": 0.27495524287223816, "learning_rate": 2.934277773240923e-07, "loss": 0.0245, "step": 222800 }, { "epoch": 1.8028157617930254, "grad_norm": 0.20512385666370392, "learning_rate": 2.931894948711256e-07, "loss": 0.0154, "step": 222810 }, { "epoch": 1.802896674488227, "grad_norm": 0.4870890974998474, "learning_rate": 2.929513062855582e-07, "loss": 0.0142, "step": 222820 }, { "epoch": 1.802977587183429, "grad_norm": 0.48578616976737976, "learning_rate": 2.927132115721398e-07, "loss": 0.0157, "step": 222830 }, { "epoch": 1.803058499878631, "grad_norm": 0.376754492521286, "learning_rate": 2.924752107356177e-07, "loss": 0.0254, "step": 222840 }, { "epoch": 1.8031394125738327, "grad_norm": 0.21855588257312775, "learning_rate": 2.9223730378074133e-07, "loss": 0.009, "step": 222850 }, { "epoch": 1.8032203252690349, "grad_norm": 0.5401240587234497, "learning_rate": 2.91999490712252e-07, "loss": 0.0233, "step": 222860 }, { "epoch": 1.8033012379642366, "grad_norm": 1.0511137247085571, "learning_rate": 2.9176177153489414e-07, "loss": 0.0335, "step": 222870 }, { "epoch": 1.8033821506594385, "grad_norm": 0.6530522704124451, "learning_rate": 2.915241462534096e-07, "loss": 0.023, "step": 222880 }, { "epoch": 1.8034630633546405, "grad_norm": 0.623251736164093, "learning_rate": 2.9128661487253405e-07, "loss": 0.0205, "step": 222890 }, { "epoch": 1.8035439760498422, "grad_norm": 0.2975897789001465, "learning_rate": 2.91049177397007e-07, "loss": 0.0265, "step": 222900 }, { "epoch": 1.8036248887450441, "grad_norm": 0.3765420615673065, "learning_rate": 2.908118338315641e-07, "loss": 0.0113, "step": 222910 }, { "epoch": 1.803705801440246, "grad_norm": 0.02801424451172352, "learning_rate": 2.9057458418093597e-07, "loss": 0.0092, "step": 222920 }, { "epoch": 1.8037867141354478, "grad_norm": 0.34522566199302673, "learning_rate": 2.903374284498567e-07, "loss": 0.0186, "step": 222930 }, { "epoch": 1.8038676268306497, "grad_norm": 0.5155054926872253, "learning_rate": 2.9010036664305464e-07, "loss": 0.0187, "step": 222940 }, { "epoch": 1.8039485395258517, "grad_norm": 0.32856613397598267, "learning_rate": 2.898633987652577e-07, "loss": 0.0167, "step": 222950 }, { "epoch": 1.8040294522210534, "grad_norm": 0.37956976890563965, "learning_rate": 2.896265248211921e-07, "loss": 0.0107, "step": 222960 }, { "epoch": 1.8041103649162553, "grad_norm": 0.7974297404289246, "learning_rate": 2.8938974481558134e-07, "loss": 0.015, "step": 222970 }, { "epoch": 1.8041912776114573, "grad_norm": 0.3159165680408478, "learning_rate": 2.891530587531471e-07, "loss": 0.0137, "step": 222980 }, { "epoch": 1.804272190306659, "grad_norm": 0.34461328387260437, "learning_rate": 2.889164666386107e-07, "loss": 0.0233, "step": 222990 }, { "epoch": 1.8043531030018611, "grad_norm": 0.5854827165603638, "learning_rate": 2.8867996847668887e-07, "loss": 0.0177, "step": 223000 }, { "epoch": 1.8044340156970629, "grad_norm": 0.28511014580726624, "learning_rate": 2.884435642721001e-07, "loss": 0.0105, "step": 223010 }, { "epoch": 1.8045149283922648, "grad_norm": 0.35229918360710144, "learning_rate": 2.8820725402955775e-07, "loss": 0.0188, "step": 223020 }, { "epoch": 1.8045958410874667, "grad_norm": 0.38563480973243713, "learning_rate": 2.879710377537737e-07, "loss": 0.0141, "step": 223030 }, { "epoch": 1.8046767537826685, "grad_norm": 0.806125819683075, "learning_rate": 2.877349154494613e-07, "loss": 0.0159, "step": 223040 }, { "epoch": 1.8047576664778704, "grad_norm": 0.6213095188140869, "learning_rate": 2.8749888712132634e-07, "loss": 0.0166, "step": 223050 }, { "epoch": 1.8048385791730723, "grad_norm": 0.12030058354139328, "learning_rate": 2.872629527740783e-07, "loss": 0.0183, "step": 223060 }, { "epoch": 1.804919491868274, "grad_norm": 0.21876966953277588, "learning_rate": 2.870271124124224e-07, "loss": 0.0155, "step": 223070 }, { "epoch": 1.805000404563476, "grad_norm": 0.18234534561634064, "learning_rate": 2.8679136604105984e-07, "loss": 0.0351, "step": 223080 }, { "epoch": 1.805081317258678, "grad_norm": 0.17359906435012817, "learning_rate": 2.86555713664694e-07, "loss": 0.0138, "step": 223090 }, { "epoch": 1.8051622299538796, "grad_norm": 0.42802077531814575, "learning_rate": 2.8632015528802515e-07, "loss": 0.0204, "step": 223100 }, { "epoch": 1.8052431426490816, "grad_norm": 0.43704953789711, "learning_rate": 2.8608469091574775e-07, "loss": 0.0296, "step": 223110 }, { "epoch": 1.8053240553442835, "grad_norm": 0.1351744681596756, "learning_rate": 2.858493205525609e-07, "loss": 0.0151, "step": 223120 }, { "epoch": 1.8054049680394852, "grad_norm": 0.014187571592628956, "learning_rate": 2.8561404420315686e-07, "loss": 0.0154, "step": 223130 }, { "epoch": 1.8054858807346874, "grad_norm": 0.3091960847377777, "learning_rate": 2.853788618722281e-07, "loss": 0.0193, "step": 223140 }, { "epoch": 1.8055667934298891, "grad_norm": 0.14821358025074005, "learning_rate": 2.851437735644652e-07, "loss": 0.024, "step": 223150 }, { "epoch": 1.805647706125091, "grad_norm": 0.2544313073158264, "learning_rate": 2.8490877928455674e-07, "loss": 0.0149, "step": 223160 }, { "epoch": 1.805728618820293, "grad_norm": 0.23843632638454437, "learning_rate": 2.846738790371878e-07, "loss": 0.0265, "step": 223170 }, { "epoch": 1.8058095315154947, "grad_norm": 0.3690142035484314, "learning_rate": 2.8443907282704466e-07, "loss": 0.0167, "step": 223180 }, { "epoch": 1.8058904442106967, "grad_norm": 0.37759312987327576, "learning_rate": 2.84204360658808e-07, "loss": 0.0229, "step": 223190 }, { "epoch": 1.8059713569058986, "grad_norm": 0.21610136330127716, "learning_rate": 2.8396974253716123e-07, "loss": 0.0088, "step": 223200 }, { "epoch": 1.8060522696011003, "grad_norm": 0.6232396960258484, "learning_rate": 2.8373521846678185e-07, "loss": 0.0161, "step": 223210 }, { "epoch": 1.8061331822963023, "grad_norm": 0.6275827288627625, "learning_rate": 2.835007884523455e-07, "loss": 0.0206, "step": 223220 }, { "epoch": 1.8062140949915042, "grad_norm": 0.45691978931427, "learning_rate": 2.8326645249853126e-07, "loss": 0.0122, "step": 223230 }, { "epoch": 1.806295007686706, "grad_norm": 0.796456515789032, "learning_rate": 2.8303221061000865e-07, "loss": 0.0298, "step": 223240 }, { "epoch": 1.806375920381908, "grad_norm": 0.30736902356147766, "learning_rate": 2.827980627914506e-07, "loss": 0.0142, "step": 223250 }, { "epoch": 1.8064568330771098, "grad_norm": 0.13321059942245483, "learning_rate": 2.825640090475279e-07, "loss": 0.0128, "step": 223260 }, { "epoch": 1.8065377457723115, "grad_norm": 0.1883191466331482, "learning_rate": 2.823300493829051e-07, "loss": 0.0135, "step": 223270 }, { "epoch": 1.8066186584675137, "grad_norm": 0.37599682807922363, "learning_rate": 2.8209618380225126e-07, "loss": 0.0139, "step": 223280 }, { "epoch": 1.8066995711627154, "grad_norm": 0.3539370000362396, "learning_rate": 2.818624123102298e-07, "loss": 0.0175, "step": 223290 }, { "epoch": 1.8067804838579173, "grad_norm": 0.5320872664451599, "learning_rate": 2.81628734911501e-07, "loss": 0.0139, "step": 223300 }, { "epoch": 1.8068613965531193, "grad_norm": 0.4486214816570282, "learning_rate": 2.813951516107266e-07, "loss": 0.0157, "step": 223310 }, { "epoch": 1.806942309248321, "grad_norm": 0.23252621293067932, "learning_rate": 2.8116166241256514e-07, "loss": 0.0173, "step": 223320 }, { "epoch": 1.807023221943523, "grad_norm": 0.18828463554382324, "learning_rate": 2.809282673216707e-07, "loss": 0.0172, "step": 223330 }, { "epoch": 1.8071041346387249, "grad_norm": 0.027440229430794716, "learning_rate": 2.8069496634270055e-07, "loss": 0.0139, "step": 223340 }, { "epoch": 1.8071850473339266, "grad_norm": 0.20980992913246155, "learning_rate": 2.8046175948030663e-07, "loss": 0.0208, "step": 223350 }, { "epoch": 1.8072659600291285, "grad_norm": 0.3206806480884552, "learning_rate": 2.802286467391391e-07, "loss": 0.0259, "step": 223360 }, { "epoch": 1.8073468727243305, "grad_norm": 0.33432596921920776, "learning_rate": 2.799956281238475e-07, "loss": 0.0084, "step": 223370 }, { "epoch": 1.8074277854195322, "grad_norm": 0.21591666340827942, "learning_rate": 2.7976270363907876e-07, "loss": 0.0177, "step": 223380 }, { "epoch": 1.8075086981147344, "grad_norm": 0.5632249116897583, "learning_rate": 2.79529873289478e-07, "loss": 0.0269, "step": 223390 }, { "epoch": 1.807589610809936, "grad_norm": 0.31215932965278625, "learning_rate": 2.7929713707968876e-07, "loss": 0.0153, "step": 223400 }, { "epoch": 1.8076705235051378, "grad_norm": 0.2523052990436554, "learning_rate": 2.790644950143512e-07, "loss": 0.0116, "step": 223410 }, { "epoch": 1.80775143620034, "grad_norm": 0.2671729028224945, "learning_rate": 2.788319470981077e-07, "loss": 0.0111, "step": 223420 }, { "epoch": 1.8078323488955417, "grad_norm": 0.22496932744979858, "learning_rate": 2.7859949333559343e-07, "loss": 0.024, "step": 223430 }, { "epoch": 1.8079132615907436, "grad_norm": 0.7720943093299866, "learning_rate": 2.783671337314442e-07, "loss": 0.02, "step": 223440 }, { "epoch": 1.8079941742859456, "grad_norm": 0.2717922031879425, "learning_rate": 2.7813486829029624e-07, "loss": 0.0156, "step": 223450 }, { "epoch": 1.8080750869811473, "grad_norm": 0.00094039790565148, "learning_rate": 2.779026970167792e-07, "loss": 0.0157, "step": 223460 }, { "epoch": 1.8081559996763492, "grad_norm": 0.6708715558052063, "learning_rate": 2.7767061991552323e-07, "loss": 0.0189, "step": 223470 }, { "epoch": 1.8082369123715512, "grad_norm": 0.4345155358314514, "learning_rate": 2.7743863699115857e-07, "loss": 0.0242, "step": 223480 }, { "epoch": 1.8083178250667529, "grad_norm": 0.3326713442802429, "learning_rate": 2.7720674824830987e-07, "loss": 0.0125, "step": 223490 }, { "epoch": 1.8083987377619548, "grad_norm": 0.5942465662956238, "learning_rate": 2.7697495369160223e-07, "loss": 0.0212, "step": 223500 }, { "epoch": 1.8084796504571568, "grad_norm": 0.7372852563858032, "learning_rate": 2.767432533256598e-07, "loss": 0.0247, "step": 223510 }, { "epoch": 1.8085605631523585, "grad_norm": 0.376297265291214, "learning_rate": 2.765116471551005e-07, "loss": 0.0126, "step": 223520 }, { "epoch": 1.8086414758475606, "grad_norm": 0.14793884754180908, "learning_rate": 2.7628013518454513e-07, "loss": 0.0177, "step": 223530 }, { "epoch": 1.8087223885427623, "grad_norm": 0.3573188781738281, "learning_rate": 2.760487174186099e-07, "loss": 0.0138, "step": 223540 }, { "epoch": 1.8088033012379643, "grad_norm": 0.16375775635242462, "learning_rate": 2.758173938619108e-07, "loss": 0.0148, "step": 223550 }, { "epoch": 1.8088842139331662, "grad_norm": 0.29513758420944214, "learning_rate": 2.755861645190605e-07, "loss": 0.0166, "step": 223560 }, { "epoch": 1.808965126628368, "grad_norm": 0.25578027963638306, "learning_rate": 2.753550293946705e-07, "loss": 0.0183, "step": 223570 }, { "epoch": 1.8090460393235699, "grad_norm": 0.23066017031669617, "learning_rate": 2.7512398849334985e-07, "loss": 0.0112, "step": 223580 }, { "epoch": 1.8091269520187718, "grad_norm": 0.3015475869178772, "learning_rate": 2.748930418197071e-07, "loss": 0.0234, "step": 223590 }, { "epoch": 1.8092078647139735, "grad_norm": 0.2464895248413086, "learning_rate": 2.746621893783463e-07, "loss": 0.0217, "step": 223600 }, { "epoch": 1.8092887774091755, "grad_norm": 0.46855056285858154, "learning_rate": 2.744314311738744e-07, "loss": 0.0164, "step": 223610 }, { "epoch": 1.8093696901043774, "grad_norm": 0.009300372563302517, "learning_rate": 2.7420076721089096e-07, "loss": 0.0272, "step": 223620 }, { "epoch": 1.8094506027995791, "grad_norm": 0.22928859293460846, "learning_rate": 2.739701974939951e-07, "loss": 0.0216, "step": 223630 }, { "epoch": 1.809531515494781, "grad_norm": 0.24221038818359375, "learning_rate": 2.737397220277888e-07, "loss": 0.0162, "step": 223640 }, { "epoch": 1.809612428189983, "grad_norm": 0.24572469294071198, "learning_rate": 2.7350934081686487e-07, "loss": 0.0164, "step": 223650 }, { "epoch": 1.8096933408851847, "grad_norm": 0.4459383487701416, "learning_rate": 2.7327905386581863e-07, "loss": 0.0136, "step": 223660 }, { "epoch": 1.809774253580387, "grad_norm": 0.1673043966293335, "learning_rate": 2.730488611792448e-07, "loss": 0.0202, "step": 223670 }, { "epoch": 1.8098551662755886, "grad_norm": 0.742452085018158, "learning_rate": 2.7281876276173015e-07, "loss": 0.0237, "step": 223680 }, { "epoch": 1.8099360789707906, "grad_norm": 0.3796423077583313, "learning_rate": 2.7258875861786715e-07, "loss": 0.0209, "step": 223690 }, { "epoch": 1.8100169916659925, "grad_norm": 0.6412084698677063, "learning_rate": 2.7235884875224217e-07, "loss": 0.0207, "step": 223700 }, { "epoch": 1.8100979043611942, "grad_norm": 0.2880706489086151, "learning_rate": 2.7212903316943764e-07, "loss": 0.0126, "step": 223710 }, { "epoch": 1.8101788170563962, "grad_norm": 0.29816362261772156, "learning_rate": 2.718993118740393e-07, "loss": 0.0168, "step": 223720 }, { "epoch": 1.810259729751598, "grad_norm": 0.3241753578186035, "learning_rate": 2.7166968487062797e-07, "loss": 0.0249, "step": 223730 }, { "epoch": 1.8103406424467998, "grad_norm": 0.2523774802684784, "learning_rate": 2.7144015216378274e-07, "loss": 0.0342, "step": 223740 }, { "epoch": 1.8104215551420018, "grad_norm": 0.620144784450531, "learning_rate": 2.7121071375808104e-07, "loss": 0.0204, "step": 223750 }, { "epoch": 1.8105024678372037, "grad_norm": 0.48995935916900635, "learning_rate": 2.7098136965809875e-07, "loss": 0.0182, "step": 223760 }, { "epoch": 1.8105833805324054, "grad_norm": 0.2420583963394165, "learning_rate": 2.7075211986840986e-07, "loss": 0.0166, "step": 223770 }, { "epoch": 1.8106642932276074, "grad_norm": 0.43770113587379456, "learning_rate": 2.7052296439358584e-07, "loss": 0.02, "step": 223780 }, { "epoch": 1.8107452059228093, "grad_norm": 0.337253212928772, "learning_rate": 2.702939032381968e-07, "loss": 0.0176, "step": 223790 }, { "epoch": 1.810826118618011, "grad_norm": 0.3146330714225769, "learning_rate": 2.7006493640681143e-07, "loss": 0.0109, "step": 223800 }, { "epoch": 1.8109070313132132, "grad_norm": 0.3961560130119324, "learning_rate": 2.6983606390399544e-07, "loss": 0.0141, "step": 223810 }, { "epoch": 1.810987944008415, "grad_norm": 0.2705092132091522, "learning_rate": 2.696072857343124e-07, "loss": 0.0135, "step": 223820 }, { "epoch": 1.8110688567036168, "grad_norm": 0.24670711159706116, "learning_rate": 2.693786019023276e-07, "loss": 0.0223, "step": 223830 }, { "epoch": 1.8111497693988188, "grad_norm": 0.25236988067626953, "learning_rate": 2.6915001241259895e-07, "loss": 0.0195, "step": 223840 }, { "epoch": 1.8112306820940205, "grad_norm": 0.2504505515098572, "learning_rate": 2.689215172696852e-07, "loss": 0.0179, "step": 223850 }, { "epoch": 1.8113115947892224, "grad_norm": 0.2840518653392792, "learning_rate": 2.686931164781453e-07, "loss": 0.0083, "step": 223860 }, { "epoch": 1.8113925074844244, "grad_norm": 0.5809680819511414, "learning_rate": 2.684648100425324e-07, "loss": 0.0166, "step": 223870 }, { "epoch": 1.811473420179626, "grad_norm": 0.23610328137874603, "learning_rate": 2.682365979673995e-07, "loss": 0.0261, "step": 223880 }, { "epoch": 1.811554332874828, "grad_norm": 0.1986674815416336, "learning_rate": 2.680084802572996e-07, "loss": 0.016, "step": 223890 }, { "epoch": 1.81163524557003, "grad_norm": 0.49480071663856506, "learning_rate": 2.677804569167797e-07, "loss": 0.0169, "step": 223900 }, { "epoch": 1.8117161582652317, "grad_norm": 0.348698228597641, "learning_rate": 2.6755252795038933e-07, "loss": 0.0202, "step": 223910 }, { "epoch": 1.8117970709604339, "grad_norm": 0.5304847359657288, "learning_rate": 2.6732469336267274e-07, "loss": 0.0107, "step": 223920 }, { "epoch": 1.8118779836556356, "grad_norm": 0.43893054127693176, "learning_rate": 2.670969531581741e-07, "loss": 0.0153, "step": 223930 }, { "epoch": 1.8119588963508373, "grad_norm": 0.3398096263408661, "learning_rate": 2.6686930734143526e-07, "loss": 0.0147, "step": 223940 }, { "epoch": 1.8120398090460395, "grad_norm": 0.20835871994495392, "learning_rate": 2.6664175591699646e-07, "loss": 0.0166, "step": 223950 }, { "epoch": 1.8121207217412412, "grad_norm": 0.27782031893730164, "learning_rate": 2.6641429888939475e-07, "loss": 0.024, "step": 223960 }, { "epoch": 1.812201634436443, "grad_norm": 0.4040989577770233, "learning_rate": 2.661869362631669e-07, "loss": 0.0332, "step": 223970 }, { "epoch": 1.812282547131645, "grad_norm": 0.27724847197532654, "learning_rate": 2.659596680428467e-07, "loss": 0.0136, "step": 223980 }, { "epoch": 1.8123634598268468, "grad_norm": 0.3591587543487549, "learning_rate": 2.6573249423296755e-07, "loss": 0.022, "step": 223990 }, { "epoch": 1.8124443725220487, "grad_norm": 0.4039470851421356, "learning_rate": 2.6550541483805867e-07, "loss": 0.0295, "step": 224000 }, { "epoch": 1.8125252852172506, "grad_norm": 0.2586299180984497, "learning_rate": 2.652784298626498e-07, "loss": 0.0066, "step": 224010 }, { "epoch": 1.8126061979124524, "grad_norm": 0.47677767276763916, "learning_rate": 2.650515393112674e-07, "loss": 0.0211, "step": 224020 }, { "epoch": 1.8126871106076543, "grad_norm": 0.47250106930732727, "learning_rate": 2.64824743188436e-07, "loss": 0.0252, "step": 224030 }, { "epoch": 1.8127680233028562, "grad_norm": 0.3495924770832062, "learning_rate": 2.6459804149867764e-07, "loss": 0.0302, "step": 224040 }, { "epoch": 1.812848935998058, "grad_norm": 0.258395254611969, "learning_rate": 2.643714342465159e-07, "loss": 0.0317, "step": 224050 }, { "epoch": 1.8129298486932601, "grad_norm": 0.6849596500396729, "learning_rate": 2.641449214364683e-07, "loss": 0.0219, "step": 224060 }, { "epoch": 1.8130107613884618, "grad_norm": 0.22727221250534058, "learning_rate": 2.639185030730512e-07, "loss": 0.0147, "step": 224070 }, { "epoch": 1.8130916740836636, "grad_norm": 0.477124959230423, "learning_rate": 2.636921791607827e-07, "loss": 0.0134, "step": 224080 }, { "epoch": 1.8131725867788657, "grad_norm": 0.24792978167533875, "learning_rate": 2.634659497041736e-07, "loss": 0.011, "step": 224090 }, { "epoch": 1.8132534994740674, "grad_norm": 0.06144876778125763, "learning_rate": 2.632398147077381e-07, "loss": 0.0171, "step": 224100 }, { "epoch": 1.8133344121692694, "grad_norm": 0.33915796875953674, "learning_rate": 2.6301377417598417e-07, "loss": 0.0137, "step": 224110 }, { "epoch": 1.8134153248644713, "grad_norm": 0.4603286683559418, "learning_rate": 2.627878281134205e-07, "loss": 0.0308, "step": 224120 }, { "epoch": 1.813496237559673, "grad_norm": 0.3958490192890167, "learning_rate": 2.625619765245524e-07, "loss": 0.0162, "step": 224130 }, { "epoch": 1.813577150254875, "grad_norm": 0.2630273997783661, "learning_rate": 2.623362194138851e-07, "loss": 0.0258, "step": 224140 }, { "epoch": 1.813658062950077, "grad_norm": 0.07596489787101746, "learning_rate": 2.6211055678592e-07, "loss": 0.0177, "step": 224150 }, { "epoch": 1.8137389756452786, "grad_norm": 0.48648175597190857, "learning_rate": 2.61884988645158e-07, "loss": 0.0271, "step": 224160 }, { "epoch": 1.8138198883404806, "grad_norm": 0.1693929135799408, "learning_rate": 2.616595149960971e-07, "loss": 0.0148, "step": 224170 }, { "epoch": 1.8139008010356825, "grad_norm": 0.27028483152389526, "learning_rate": 2.6143413584323374e-07, "loss": 0.0121, "step": 224180 }, { "epoch": 1.8139817137308842, "grad_norm": 0.36783063411712646, "learning_rate": 2.6120885119106375e-07, "loss": 0.0132, "step": 224190 }, { "epoch": 1.8140626264260864, "grad_norm": 0.26144981384277344, "learning_rate": 2.609836610440786e-07, "loss": 0.0159, "step": 224200 }, { "epoch": 1.8141435391212881, "grad_norm": 0.2684252858161926, "learning_rate": 2.6075856540677015e-07, "loss": 0.0151, "step": 224210 }, { "epoch": 1.81422445181649, "grad_norm": 0.25473248958587646, "learning_rate": 2.605335642836265e-07, "loss": 0.0214, "step": 224220 }, { "epoch": 1.814305364511692, "grad_norm": 0.34962886571884155, "learning_rate": 2.6030865767913527e-07, "loss": 0.013, "step": 224230 }, { "epoch": 1.8143862772068937, "grad_norm": 0.38927653431892395, "learning_rate": 2.6008384559778335e-07, "loss": 0.0164, "step": 224240 }, { "epoch": 1.8144671899020957, "grad_norm": 0.605252742767334, "learning_rate": 2.5985912804405213e-07, "loss": 0.0175, "step": 224250 }, { "epoch": 1.8145481025972976, "grad_norm": 0.5698755979537964, "learning_rate": 2.596345050224225e-07, "loss": 0.0339, "step": 224260 }, { "epoch": 1.8146290152924993, "grad_norm": 0.05151950567960739, "learning_rate": 2.594099765373764e-07, "loss": 0.0173, "step": 224270 }, { "epoch": 1.8147099279877013, "grad_norm": 0.5362671613693237, "learning_rate": 2.591855425933915e-07, "loss": 0.0241, "step": 224280 }, { "epoch": 1.8147908406829032, "grad_norm": 0.32397255301475525, "learning_rate": 2.5896120319494067e-07, "loss": 0.0156, "step": 224290 }, { "epoch": 1.814871753378105, "grad_norm": 0.22196875512599945, "learning_rate": 2.5873695834650104e-07, "loss": 0.0142, "step": 224300 }, { "epoch": 1.8149526660733069, "grad_norm": 0.5391594171524048, "learning_rate": 2.5851280805254286e-07, "loss": 0.0259, "step": 224310 }, { "epoch": 1.8150335787685088, "grad_norm": 0.3272033631801605, "learning_rate": 2.582887523175376e-07, "loss": 0.0086, "step": 224320 }, { "epoch": 1.8151144914637105, "grad_norm": 0.5216637849807739, "learning_rate": 2.580647911459527e-07, "loss": 0.0177, "step": 224330 }, { "epoch": 1.8151954041589127, "grad_norm": 0.295014888048172, "learning_rate": 2.578409245422553e-07, "loss": 0.015, "step": 224340 }, { "epoch": 1.8152763168541144, "grad_norm": 0.4627935290336609, "learning_rate": 2.5761715251090893e-07, "loss": 0.0141, "step": 224350 }, { "epoch": 1.8153572295493163, "grad_norm": 0.1645774096250534, "learning_rate": 2.5739347505637725e-07, "loss": 0.0117, "step": 224360 }, { "epoch": 1.8154381422445183, "grad_norm": 0.5929746627807617, "learning_rate": 2.5716989218312005e-07, "loss": 0.0192, "step": 224370 }, { "epoch": 1.81551905493972, "grad_norm": 0.09904739260673523, "learning_rate": 2.5694640389559764e-07, "loss": 0.0215, "step": 224380 }, { "epoch": 1.815599967634922, "grad_norm": 0.2760884761810303, "learning_rate": 2.567230101982654e-07, "loss": 0.0141, "step": 224390 }, { "epoch": 1.8156808803301239, "grad_norm": 0.694803774356842, "learning_rate": 2.564997110955797e-07, "loss": 0.0253, "step": 224400 }, { "epoch": 1.8157617930253256, "grad_norm": 0.4519084095954895, "learning_rate": 2.562765065919931e-07, "loss": 0.0228, "step": 224410 }, { "epoch": 1.8158427057205275, "grad_norm": 0.4262014925479889, "learning_rate": 2.5605339669195705e-07, "loss": 0.0082, "step": 224420 }, { "epoch": 1.8159236184157295, "grad_norm": 0.33364036679267883, "learning_rate": 2.5583038139992135e-07, "loss": 0.0175, "step": 224430 }, { "epoch": 1.8160045311109312, "grad_norm": 0.37286657094955444, "learning_rate": 2.5560746072033296e-07, "loss": 0.0243, "step": 224440 }, { "epoch": 1.8160854438061333, "grad_norm": 0.522494375705719, "learning_rate": 2.5538463465763773e-07, "loss": 0.0309, "step": 224450 }, { "epoch": 1.816166356501335, "grad_norm": 0.6630224585533142, "learning_rate": 2.5516190321628e-07, "loss": 0.0283, "step": 224460 }, { "epoch": 1.8162472691965368, "grad_norm": 0.3503386676311493, "learning_rate": 2.549392664007022e-07, "loss": 0.0138, "step": 224470 }, { "epoch": 1.816328181891739, "grad_norm": 0.6364378333091736, "learning_rate": 2.547167242153414e-07, "loss": 0.0189, "step": 224480 }, { "epoch": 1.8164090945869407, "grad_norm": 0.4025176167488098, "learning_rate": 2.544942766646391e-07, "loss": 0.0363, "step": 224490 }, { "epoch": 1.8164900072821426, "grad_norm": 0.004633141681551933, "learning_rate": 2.5427192375303114e-07, "loss": 0.0126, "step": 224500 }, { "epoch": 1.8165709199773445, "grad_norm": 0.39016854763031006, "learning_rate": 2.540496654849489e-07, "loss": 0.0334, "step": 224510 }, { "epoch": 1.8166518326725463, "grad_norm": 0.24043862521648407, "learning_rate": 2.538275018648284e-07, "loss": 0.016, "step": 224520 }, { "epoch": 1.8167327453677482, "grad_norm": 0.4474172294139862, "learning_rate": 2.5360543289709826e-07, "loss": 0.0155, "step": 224530 }, { "epoch": 1.8168136580629501, "grad_norm": 0.18524079024791718, "learning_rate": 2.5338345858618773e-07, "loss": 0.0168, "step": 224540 }, { "epoch": 1.8168945707581519, "grad_norm": 0.3286972939968109, "learning_rate": 2.531615789365238e-07, "loss": 0.014, "step": 224550 }, { "epoch": 1.8169754834533538, "grad_norm": 0.289853572845459, "learning_rate": 2.5293979395253077e-07, "loss": 0.0113, "step": 224560 }, { "epoch": 1.8170563961485557, "grad_norm": 0.2867778241634369, "learning_rate": 2.5271810363863225e-07, "loss": 0.0278, "step": 224570 }, { "epoch": 1.8171373088437575, "grad_norm": 0.4015443027019501, "learning_rate": 2.5249650799924916e-07, "loss": 0.0149, "step": 224580 }, { "epoch": 1.8172182215389596, "grad_norm": 0.46174922585487366, "learning_rate": 2.522750070388014e-07, "loss": 0.016, "step": 224590 }, { "epoch": 1.8172991342341613, "grad_norm": 0.26369503140449524, "learning_rate": 2.520536007617047e-07, "loss": 0.0199, "step": 224600 }, { "epoch": 1.817380046929363, "grad_norm": 0.46937307715415955, "learning_rate": 2.518322891723762e-07, "loss": 0.0136, "step": 224610 }, { "epoch": 1.8174609596245652, "grad_norm": 0.32107871770858765, "learning_rate": 2.5161107227522905e-07, "loss": 0.0141, "step": 224620 }, { "epoch": 1.817541872319767, "grad_norm": 0.46682167053222656, "learning_rate": 2.5138995007467417e-07, "loss": 0.019, "step": 224630 }, { "epoch": 1.8176227850149689, "grad_norm": 0.42059725522994995, "learning_rate": 2.511689225751218e-07, "loss": 0.0169, "step": 224640 }, { "epoch": 1.8177036977101708, "grad_norm": 0.47860532999038696, "learning_rate": 2.509479897809802e-07, "loss": 0.0235, "step": 224650 }, { "epoch": 1.8177846104053725, "grad_norm": 0.49781501293182373, "learning_rate": 2.507271516966564e-07, "loss": 0.0156, "step": 224660 }, { "epoch": 1.8178655231005745, "grad_norm": 0.3555947542190552, "learning_rate": 2.505064083265518e-07, "loss": 0.0111, "step": 224670 }, { "epoch": 1.8179464357957764, "grad_norm": 0.44395560026168823, "learning_rate": 2.502857596750713e-07, "loss": 0.0137, "step": 224680 }, { "epoch": 1.8180273484909781, "grad_norm": 0.4210950434207916, "learning_rate": 2.5006520574661464e-07, "loss": 0.0145, "step": 224690 }, { "epoch": 1.81810826118618, "grad_norm": 0.8563183546066284, "learning_rate": 2.4984474654557835e-07, "loss": 0.0239, "step": 224700 }, { "epoch": 1.818189173881382, "grad_norm": 0.030083810910582542, "learning_rate": 2.4962438207636166e-07, "loss": 0.0204, "step": 224710 }, { "epoch": 1.8182700865765837, "grad_norm": 0.49238893389701843, "learning_rate": 2.494041123433577e-07, "loss": 0.0144, "step": 224720 }, { "epoch": 1.818350999271786, "grad_norm": 0.0731942355632782, "learning_rate": 2.4918393735096027e-07, "loss": 0.0126, "step": 224730 }, { "epoch": 1.8184319119669876, "grad_norm": 0.2360129952430725, "learning_rate": 2.489638571035591e-07, "loss": 0.0084, "step": 224740 }, { "epoch": 1.8185128246621893, "grad_norm": 0.35493993759155273, "learning_rate": 2.487438716055446e-07, "loss": 0.0132, "step": 224750 }, { "epoch": 1.8185937373573915, "grad_norm": 0.0029305440839380026, "learning_rate": 2.4852398086130327e-07, "loss": 0.015, "step": 224760 }, { "epoch": 1.8186746500525932, "grad_norm": 0.3405574560165405, "learning_rate": 2.483041848752199e-07, "loss": 0.0175, "step": 224770 }, { "epoch": 1.8187555627477952, "grad_norm": 0.4621146023273468, "learning_rate": 2.4808448365167824e-07, "loss": 0.0199, "step": 224780 }, { "epoch": 1.818836475442997, "grad_norm": 0.27506473660469055, "learning_rate": 2.478648771950598e-07, "loss": 0.0181, "step": 224790 }, { "epoch": 1.8189173881381988, "grad_norm": 0.21280886232852936, "learning_rate": 2.4764536550974495e-07, "loss": 0.0209, "step": 224800 }, { "epoch": 1.8189983008334007, "grad_norm": 0.3831799328327179, "learning_rate": 2.474259486001096e-07, "loss": 0.0219, "step": 224810 }, { "epoch": 1.8190792135286027, "grad_norm": 0.1259101778268814, "learning_rate": 2.472066264705314e-07, "loss": 0.0132, "step": 224820 }, { "epoch": 1.8191601262238044, "grad_norm": 0.41140952706336975, "learning_rate": 2.4698739912538296e-07, "loss": 0.0203, "step": 224830 }, { "epoch": 1.8192410389190063, "grad_norm": 0.21998238563537598, "learning_rate": 2.467682665690363e-07, "loss": 0.0192, "step": 224840 }, { "epoch": 1.8193219516142083, "grad_norm": 0.3487129509449005, "learning_rate": 2.465492288058635e-07, "loss": 0.0136, "step": 224850 }, { "epoch": 1.81940286430941, "grad_norm": 0.6343350410461426, "learning_rate": 2.4633028584022946e-07, "loss": 0.0354, "step": 224860 }, { "epoch": 1.8194837770046122, "grad_norm": 0.5944506525993347, "learning_rate": 2.4611143767650335e-07, "loss": 0.0182, "step": 224870 }, { "epoch": 1.8195646896998139, "grad_norm": 0.5665692090988159, "learning_rate": 2.4589268431904955e-07, "loss": 0.0235, "step": 224880 }, { "epoch": 1.8196456023950158, "grad_norm": 0.18086451292037964, "learning_rate": 2.4567402577222844e-07, "loss": 0.0162, "step": 224890 }, { "epoch": 1.8197265150902178, "grad_norm": 0.3814215362071991, "learning_rate": 2.4545546204040313e-07, "loss": 0.0252, "step": 224900 }, { "epoch": 1.8198074277854195, "grad_norm": 0.47339391708374023, "learning_rate": 2.4523699312793135e-07, "loss": 0.0207, "step": 224910 }, { "epoch": 1.8198883404806214, "grad_norm": 0.15330494940280914, "learning_rate": 2.45018619039169e-07, "loss": 0.0165, "step": 224920 }, { "epoch": 1.8199692531758234, "grad_norm": 0.4424898624420166, "learning_rate": 2.448003397784726e-07, "loss": 0.0176, "step": 224930 }, { "epoch": 1.820050165871025, "grad_norm": 0.2785133421421051, "learning_rate": 2.445821553501948e-07, "loss": 0.0258, "step": 224940 }, { "epoch": 1.820131078566227, "grad_norm": 0.31959202885627747, "learning_rate": 2.4436406575868654e-07, "loss": 0.0183, "step": 224950 }, { "epoch": 1.820211991261429, "grad_norm": 0.2172878533601761, "learning_rate": 2.4414607100829765e-07, "loss": 0.017, "step": 224960 }, { "epoch": 1.8202929039566307, "grad_norm": 0.6254210472106934, "learning_rate": 2.439281711033753e-07, "loss": 0.0189, "step": 224970 }, { "epoch": 1.8203738166518326, "grad_norm": 0.29395759105682373, "learning_rate": 2.437103660482654e-07, "loss": 0.0199, "step": 224980 }, { "epoch": 1.8204547293470346, "grad_norm": 0.43190255761146545, "learning_rate": 2.434926558473105e-07, "loss": 0.0206, "step": 224990 }, { "epoch": 1.8205356420422363, "grad_norm": 0.5567942261695862, "learning_rate": 2.432750405048534e-07, "loss": 0.018, "step": 225000 }, { "epoch": 1.8206165547374384, "grad_norm": 0.16959063708782196, "learning_rate": 2.430575200252344e-07, "loss": 0.0265, "step": 225010 }, { "epoch": 1.8206974674326402, "grad_norm": 0.36211884021759033, "learning_rate": 2.428400944127901e-07, "loss": 0.0337, "step": 225020 }, { "epoch": 1.820778380127842, "grad_norm": 0.6302443742752075, "learning_rate": 2.42622763671857e-07, "loss": 0.0252, "step": 225030 }, { "epoch": 1.820859292823044, "grad_norm": 0.542677104473114, "learning_rate": 2.424055278067711e-07, "loss": 0.0195, "step": 225040 }, { "epoch": 1.8209402055182458, "grad_norm": 0.2820475399494171, "learning_rate": 2.421883868218622e-07, "loss": 0.017, "step": 225050 }, { "epoch": 1.8210211182134477, "grad_norm": 0.3051204979419708, "learning_rate": 2.4197134072146077e-07, "loss": 0.0204, "step": 225060 }, { "epoch": 1.8211020309086496, "grad_norm": 0.37362074851989746, "learning_rate": 2.4175438950989837e-07, "loss": 0.0208, "step": 225070 }, { "epoch": 1.8211829436038514, "grad_norm": 0.3450670540332794, "learning_rate": 2.4153753319149766e-07, "loss": 0.0141, "step": 225080 }, { "epoch": 1.8212638562990533, "grad_norm": 0.5951293706893921, "learning_rate": 2.413207717705862e-07, "loss": 0.0208, "step": 225090 }, { "epoch": 1.8213447689942552, "grad_norm": 0.23796208202838898, "learning_rate": 2.4110410525148675e-07, "loss": 0.0187, "step": 225100 }, { "epoch": 1.821425681689457, "grad_norm": 0.17316731810569763, "learning_rate": 2.4088753363851745e-07, "loss": 0.0147, "step": 225110 }, { "epoch": 1.8215065943846591, "grad_norm": 0.5767846703529358, "learning_rate": 2.40671056936001e-07, "loss": 0.0133, "step": 225120 }, { "epoch": 1.8215875070798608, "grad_norm": 0.5297530293464661, "learning_rate": 2.404546751482523e-07, "loss": 0.0171, "step": 225130 }, { "epoch": 1.8216684197750626, "grad_norm": 0.3945179879665375, "learning_rate": 2.402383882795872e-07, "loss": 0.0272, "step": 225140 }, { "epoch": 1.8217493324702647, "grad_norm": 0.2500127851963043, "learning_rate": 2.4002219633431964e-07, "loss": 0.0219, "step": 225150 }, { "epoch": 1.8218302451654664, "grad_norm": 0.11605852842330933, "learning_rate": 2.3980609931676e-07, "loss": 0.0129, "step": 225160 }, { "epoch": 1.8219111578606684, "grad_norm": 0.22563892602920532, "learning_rate": 2.3959009723121874e-07, "loss": 0.0216, "step": 225170 }, { "epoch": 1.8219920705558703, "grad_norm": 0.33390700817108154, "learning_rate": 2.3937419008200344e-07, "loss": 0.0138, "step": 225180 }, { "epoch": 1.822072983251072, "grad_norm": 0.5038437843322754, "learning_rate": 2.391583778734191e-07, "loss": 0.0233, "step": 225190 }, { "epoch": 1.822153895946274, "grad_norm": 0.23560363054275513, "learning_rate": 2.3894266060977167e-07, "loss": 0.0129, "step": 225200 }, { "epoch": 1.822234808641476, "grad_norm": 0.18449310958385468, "learning_rate": 2.3872703829536105e-07, "loss": 0.0137, "step": 225210 }, { "epoch": 1.8223157213366776, "grad_norm": 0.9285510778427124, "learning_rate": 2.385115109344877e-07, "loss": 0.0257, "step": 225220 }, { "epoch": 1.8223966340318796, "grad_norm": 0.384750097990036, "learning_rate": 2.3829607853145142e-07, "loss": 0.0224, "step": 225230 }, { "epoch": 1.8224775467270815, "grad_norm": 0.23110902309417725, "learning_rate": 2.3808074109054725e-07, "loss": 0.0125, "step": 225240 }, { "epoch": 1.8225584594222832, "grad_norm": 0.5169960856437683, "learning_rate": 2.3786549861606834e-07, "loss": 0.0196, "step": 225250 }, { "epoch": 1.8226393721174854, "grad_norm": 0.16035524010658264, "learning_rate": 2.376503511123107e-07, "loss": 0.0148, "step": 225260 }, { "epoch": 1.822720284812687, "grad_norm": 0.597363293170929, "learning_rate": 2.374352985835615e-07, "loss": 0.0137, "step": 225270 }, { "epoch": 1.8228011975078888, "grad_norm": 0.1602574735879898, "learning_rate": 2.3722034103411173e-07, "loss": 0.016, "step": 225280 }, { "epoch": 1.822882110203091, "grad_norm": 0.8393409252166748, "learning_rate": 2.370054784682485e-07, "loss": 0.0176, "step": 225290 }, { "epoch": 1.8229630228982927, "grad_norm": 0.4004446566104889, "learning_rate": 2.3679071089025452e-07, "loss": 0.0236, "step": 225300 }, { "epoch": 1.8230439355934946, "grad_norm": 0.4381351172924042, "learning_rate": 2.3657603830441466e-07, "loss": 0.0164, "step": 225310 }, { "epoch": 1.8231248482886966, "grad_norm": 0.004633509088307619, "learning_rate": 2.3636146071501054e-07, "loss": 0.0203, "step": 225320 }, { "epoch": 1.8232057609838983, "grad_norm": 0.315901517868042, "learning_rate": 2.3614697812631871e-07, "loss": 0.034, "step": 225330 }, { "epoch": 1.8232866736791002, "grad_norm": 0.4070628583431244, "learning_rate": 2.3593259054261962e-07, "loss": 0.0171, "step": 225340 }, { "epoch": 1.8233675863743022, "grad_norm": 0.40469709038734436, "learning_rate": 2.3571829796818768e-07, "loss": 0.026, "step": 225350 }, { "epoch": 1.823448499069504, "grad_norm": 0.4413388967514038, "learning_rate": 2.355041004072961e-07, "loss": 0.0179, "step": 225360 }, { "epoch": 1.8235294117647058, "grad_norm": 0.014839664101600647, "learning_rate": 2.3528999786421758e-07, "loss": 0.0212, "step": 225370 }, { "epoch": 1.8236103244599078, "grad_norm": 0.3983244001865387, "learning_rate": 2.3507599034322092e-07, "loss": 0.0126, "step": 225380 }, { "epoch": 1.8236912371551095, "grad_norm": 0.5412670969963074, "learning_rate": 2.348620778485744e-07, "loss": 0.021, "step": 225390 }, { "epoch": 1.8237721498503117, "grad_norm": 0.7001131176948547, "learning_rate": 2.3464826038454403e-07, "loss": 0.0294, "step": 225400 }, { "epoch": 1.8238530625455134, "grad_norm": 0.14547672867774963, "learning_rate": 2.3443453795539306e-07, "loss": 0.0191, "step": 225410 }, { "epoch": 1.8239339752407153, "grad_norm": 0.4980621635913849, "learning_rate": 2.3422091056538643e-07, "loss": 0.0117, "step": 225420 }, { "epoch": 1.8240148879359173, "grad_norm": 0.3772752285003662, "learning_rate": 2.3400737821878184e-07, "loss": 0.0111, "step": 225430 }, { "epoch": 1.824095800631119, "grad_norm": 0.2784208655357361, "learning_rate": 2.3379394091983808e-07, "loss": 0.0106, "step": 225440 }, { "epoch": 1.824176713326321, "grad_norm": 0.4956074059009552, "learning_rate": 2.3358059867281402e-07, "loss": 0.0195, "step": 225450 }, { "epoch": 1.8242576260215229, "grad_norm": 0.41120800375938416, "learning_rate": 2.3336735148196122e-07, "loss": 0.0229, "step": 225460 }, { "epoch": 1.8243385387167246, "grad_norm": 0.6235213875770569, "learning_rate": 2.331541993515335e-07, "loss": 0.0224, "step": 225470 }, { "epoch": 1.8244194514119265, "grad_norm": 0.1863246113061905, "learning_rate": 2.3294114228578302e-07, "loss": 0.0144, "step": 225480 }, { "epoch": 1.8245003641071285, "grad_norm": 0.2744462788105011, "learning_rate": 2.3272818028895693e-07, "loss": 0.0171, "step": 225490 }, { "epoch": 1.8245812768023302, "grad_norm": 0.1606825739145279, "learning_rate": 2.3251531336530354e-07, "loss": 0.0127, "step": 225500 }, { "epoch": 1.8246621894975321, "grad_norm": 0.14720988273620605, "learning_rate": 2.323025415190683e-07, "loss": 0.0124, "step": 225510 }, { "epoch": 1.824743102192734, "grad_norm": 0.5312259793281555, "learning_rate": 2.3208986475449223e-07, "loss": 0.0184, "step": 225520 }, { "epoch": 1.8248240148879358, "grad_norm": 0.6341872215270996, "learning_rate": 2.3187728307581924e-07, "loss": 0.0189, "step": 225530 }, { "epoch": 1.824904927583138, "grad_norm": 0.308887243270874, "learning_rate": 2.3166479648728758e-07, "loss": 0.0193, "step": 225540 }, { "epoch": 1.8249858402783397, "grad_norm": 0.554434061050415, "learning_rate": 2.3145240499313547e-07, "loss": 0.0137, "step": 225550 }, { "epoch": 1.8250667529735416, "grad_norm": 0.2957676351070404, "learning_rate": 2.312401085975985e-07, "loss": 0.0181, "step": 225560 }, { "epoch": 1.8251476656687435, "grad_norm": 0.5683619976043701, "learning_rate": 2.3102790730490987e-07, "loss": 0.0251, "step": 225570 }, { "epoch": 1.8252285783639453, "grad_norm": 0.24216385185718536, "learning_rate": 2.308158011193018e-07, "loss": 0.0224, "step": 225580 }, { "epoch": 1.8253094910591472, "grad_norm": 0.2876836657524109, "learning_rate": 2.3060379004500476e-07, "loss": 0.0236, "step": 225590 }, { "epoch": 1.8253904037543491, "grad_norm": 0.11227187514305115, "learning_rate": 2.3039187408624596e-07, "loss": 0.0208, "step": 225600 }, { "epoch": 1.8254713164495509, "grad_norm": 0.12348268926143646, "learning_rate": 2.3018005324725256e-07, "loss": 0.0181, "step": 225610 }, { "epoch": 1.8255522291447528, "grad_norm": 0.3213313817977905, "learning_rate": 2.299683275322484e-07, "loss": 0.0234, "step": 225620 }, { "epoch": 1.8256331418399547, "grad_norm": 0.19707675278186798, "learning_rate": 2.297566969454551e-07, "loss": 0.0168, "step": 225630 }, { "epoch": 1.8257140545351564, "grad_norm": 0.21591724455356598, "learning_rate": 2.2954516149109595e-07, "loss": 0.012, "step": 225640 }, { "epoch": 1.8257949672303584, "grad_norm": 0.7723525166511536, "learning_rate": 2.2933372117338648e-07, "loss": 0.0205, "step": 225650 }, { "epoch": 1.8258758799255603, "grad_norm": 0.5412136912345886, "learning_rate": 2.2912237599654386e-07, "loss": 0.0194, "step": 225660 }, { "epoch": 1.825956792620762, "grad_norm": 0.10709129273891449, "learning_rate": 2.2891112596478525e-07, "loss": 0.0209, "step": 225670 }, { "epoch": 1.8260377053159642, "grad_norm": 0.5535822510719299, "learning_rate": 2.2869997108232067e-07, "loss": 0.0164, "step": 225680 }, { "epoch": 1.826118618011166, "grad_norm": 0.24098046123981476, "learning_rate": 2.2848891135336338e-07, "loss": 0.0275, "step": 225690 }, { "epoch": 1.8261995307063679, "grad_norm": 0.3660176694393158, "learning_rate": 2.2827794678212224e-07, "loss": 0.0161, "step": 225700 }, { "epoch": 1.8262804434015698, "grad_norm": 0.19982776045799255, "learning_rate": 2.280670773728022e-07, "loss": 0.027, "step": 225710 }, { "epoch": 1.8263613560967715, "grad_norm": 0.28416258096694946, "learning_rate": 2.278563031296116e-07, "loss": 0.0163, "step": 225720 }, { "epoch": 1.8264422687919735, "grad_norm": 0.5901951789855957, "learning_rate": 2.2764562405675207e-07, "loss": 0.0191, "step": 225730 }, { "epoch": 1.8265231814871754, "grad_norm": 0.10970450192689896, "learning_rate": 2.2743504015842577e-07, "loss": 0.0125, "step": 225740 }, { "epoch": 1.8266040941823771, "grad_norm": 0.395442932844162, "learning_rate": 2.2722455143883216e-07, "loss": 0.0201, "step": 225750 }, { "epoch": 1.826685006877579, "grad_norm": 0.281252920627594, "learning_rate": 2.270141579021695e-07, "loss": 0.0185, "step": 225760 }, { "epoch": 1.826765919572781, "grad_norm": 0.27435460686683655, "learning_rate": 2.2680385955263284e-07, "loss": 0.011, "step": 225770 }, { "epoch": 1.8268468322679827, "grad_norm": 0.5000187158584595, "learning_rate": 2.2659365639441654e-07, "loss": 0.0256, "step": 225780 }, { "epoch": 1.8269277449631849, "grad_norm": 0.40754830837249756, "learning_rate": 2.2638354843171284e-07, "loss": 0.0217, "step": 225790 }, { "epoch": 1.8270086576583866, "grad_norm": 0.5557286143302917, "learning_rate": 2.2617353566871168e-07, "loss": 0.0217, "step": 225800 }, { "epoch": 1.8270895703535883, "grad_norm": 0.8078687191009521, "learning_rate": 2.2596361810960145e-07, "loss": 0.0141, "step": 225810 }, { "epoch": 1.8271704830487905, "grad_norm": 0.4099009037017822, "learning_rate": 2.257537957585676e-07, "loss": 0.0124, "step": 225820 }, { "epoch": 1.8272513957439922, "grad_norm": 0.3284222483634949, "learning_rate": 2.2554406861979683e-07, "loss": 0.0225, "step": 225830 }, { "epoch": 1.8273323084391941, "grad_norm": 0.3217426836490631, "learning_rate": 2.2533443669746968e-07, "loss": 0.0232, "step": 225840 }, { "epoch": 1.827413221134396, "grad_norm": 0.26986607909202576, "learning_rate": 2.251248999957667e-07, "loss": 0.0221, "step": 225850 }, { "epoch": 1.8274941338295978, "grad_norm": 0.0925830528140068, "learning_rate": 2.2491545851886842e-07, "loss": 0.0219, "step": 225860 }, { "epoch": 1.8275750465247997, "grad_norm": 0.1813342571258545, "learning_rate": 2.2470611227095042e-07, "loss": 0.0272, "step": 225870 }, { "epoch": 1.8276559592200017, "grad_norm": 0.3518544137477875, "learning_rate": 2.2449686125618707e-07, "loss": 0.0134, "step": 225880 }, { "epoch": 1.8277368719152034, "grad_norm": 1.3231958150863647, "learning_rate": 2.2428770547875344e-07, "loss": 0.0152, "step": 225890 }, { "epoch": 1.8278177846104053, "grad_norm": 0.44335687160491943, "learning_rate": 2.2407864494281838e-07, "loss": 0.01, "step": 225900 }, { "epoch": 1.8278986973056073, "grad_norm": 0.24046114087104797, "learning_rate": 2.2386967965255302e-07, "loss": 0.0135, "step": 225910 }, { "epoch": 1.827979610000809, "grad_norm": 0.3122851252555847, "learning_rate": 2.2366080961212456e-07, "loss": 0.0181, "step": 225920 }, { "epoch": 1.8280605226960112, "grad_norm": 0.3482254445552826, "learning_rate": 2.2345203482569743e-07, "loss": 0.0134, "step": 225930 }, { "epoch": 1.8281414353912129, "grad_norm": 0.3777301609516144, "learning_rate": 2.2324335529743557e-07, "loss": 0.017, "step": 225940 }, { "epoch": 1.8282223480864146, "grad_norm": 0.5314823389053345, "learning_rate": 2.2303477103150118e-07, "loss": 0.02, "step": 225950 }, { "epoch": 1.8283032607816168, "grad_norm": 0.44868892431259155, "learning_rate": 2.2282628203205313e-07, "loss": 0.0152, "step": 225960 }, { "epoch": 1.8283841734768185, "grad_norm": 0.6327100992202759, "learning_rate": 2.2261788830325037e-07, "loss": 0.0135, "step": 225970 }, { "epoch": 1.8284650861720204, "grad_norm": 0.36527684330940247, "learning_rate": 2.2240958984924787e-07, "loss": 0.0186, "step": 225980 }, { "epoch": 1.8285459988672224, "grad_norm": 0.6426529884338379, "learning_rate": 2.2220138667420066e-07, "loss": 0.0207, "step": 225990 }, { "epoch": 1.828626911562424, "grad_norm": 0.5155646204948425, "learning_rate": 2.2199327878225986e-07, "loss": 0.0186, "step": 226000 }, { "epoch": 1.828707824257626, "grad_norm": 0.414869487285614, "learning_rate": 2.2178526617757658e-07, "loss": 0.0274, "step": 226010 }, { "epoch": 1.828788736952828, "grad_norm": 0.36108121275901794, "learning_rate": 2.2157734886429915e-07, "loss": 0.0111, "step": 226020 }, { "epoch": 1.8288696496480297, "grad_norm": 0.008740107528865337, "learning_rate": 2.2136952684657375e-07, "loss": 0.018, "step": 226030 }, { "epoch": 1.8289505623432316, "grad_norm": 0.30996131896972656, "learning_rate": 2.211618001285437e-07, "loss": 0.0193, "step": 226040 }, { "epoch": 1.8290314750384336, "grad_norm": 0.26515305042266846, "learning_rate": 2.2095416871435514e-07, "loss": 0.0213, "step": 226050 }, { "epoch": 1.8291123877336353, "grad_norm": 0.7287419438362122, "learning_rate": 2.2074663260814533e-07, "loss": 0.0223, "step": 226060 }, { "epoch": 1.8291933004288374, "grad_norm": 0.2348068654537201, "learning_rate": 2.2053919181405427e-07, "loss": 0.015, "step": 226070 }, { "epoch": 1.8292742131240392, "grad_norm": 0.3125196695327759, "learning_rate": 2.2033184633621974e-07, "loss": 0.03, "step": 226080 }, { "epoch": 1.829355125819241, "grad_norm": 0.20065976679325104, "learning_rate": 2.201245961787768e-07, "loss": 0.0163, "step": 226090 }, { "epoch": 1.829436038514443, "grad_norm": 0.2598430812358856, "learning_rate": 2.1991744134585658e-07, "loss": 0.0167, "step": 226100 }, { "epoch": 1.8295169512096447, "grad_norm": 0.2150825411081314, "learning_rate": 2.1971038184159243e-07, "loss": 0.0106, "step": 226110 }, { "epoch": 1.8295978639048467, "grad_norm": 0.4812176823616028, "learning_rate": 2.195034176701133e-07, "loss": 0.0164, "step": 226120 }, { "epoch": 1.8296787766000486, "grad_norm": 0.43239691853523254, "learning_rate": 2.192965488355464e-07, "loss": 0.0174, "step": 226130 }, { "epoch": 1.8297596892952503, "grad_norm": 0.3785093426704407, "learning_rate": 2.1908977534201736e-07, "loss": 0.022, "step": 226140 }, { "epoch": 1.8298406019904523, "grad_norm": 0.47741541266441345, "learning_rate": 2.1888309719364954e-07, "loss": 0.0143, "step": 226150 }, { "epoch": 1.8299215146856542, "grad_norm": 0.1616344302892685, "learning_rate": 2.1867651439456516e-07, "loss": 0.014, "step": 226160 }, { "epoch": 1.830002427380856, "grad_norm": 0.273514062166214, "learning_rate": 2.1847002694888375e-07, "loss": 0.0168, "step": 226170 }, { "epoch": 1.8300833400760579, "grad_norm": 0.37865346670150757, "learning_rate": 2.1826363486072367e-07, "loss": 0.0139, "step": 226180 }, { "epoch": 1.8301642527712598, "grad_norm": 0.18511539697647095, "learning_rate": 2.180573381342005e-07, "loss": 0.0132, "step": 226190 }, { "epoch": 1.8302451654664615, "grad_norm": 0.07968492060899734, "learning_rate": 2.1785113677342817e-07, "loss": 0.0194, "step": 226200 }, { "epoch": 1.8303260781616637, "grad_norm": 0.4921455383300781, "learning_rate": 2.1764503078252008e-07, "loss": 0.0212, "step": 226210 }, { "epoch": 1.8304069908568654, "grad_norm": 0.07231352478265762, "learning_rate": 2.1743902016558572e-07, "loss": 0.0164, "step": 226220 }, { "epoch": 1.8304879035520674, "grad_norm": 0.4858747720718384, "learning_rate": 2.1723310492673232e-07, "loss": 0.0161, "step": 226230 }, { "epoch": 1.8305688162472693, "grad_norm": 0.1969175785779953, "learning_rate": 2.1702728507006998e-07, "loss": 0.0178, "step": 226240 }, { "epoch": 1.830649728942471, "grad_norm": 0.8826763033866882, "learning_rate": 2.1682156059969983e-07, "loss": 0.026, "step": 226250 }, { "epoch": 1.830730641637673, "grad_norm": 0.5408263206481934, "learning_rate": 2.166159315197258e-07, "loss": 0.0145, "step": 226260 }, { "epoch": 1.830811554332875, "grad_norm": 0.015992645174264908, "learning_rate": 2.164103978342491e-07, "loss": 0.0122, "step": 226270 }, { "epoch": 1.8308924670280766, "grad_norm": 0.4716514050960541, "learning_rate": 2.162049595473692e-07, "loss": 0.0228, "step": 226280 }, { "epoch": 1.8309733797232786, "grad_norm": 0.09735102951526642, "learning_rate": 2.1599961666318115e-07, "loss": 0.011, "step": 226290 }, { "epoch": 1.8310542924184805, "grad_norm": 0.5025069713592529, "learning_rate": 2.1579436918578166e-07, "loss": 0.0151, "step": 226300 }, { "epoch": 1.8311352051136822, "grad_norm": 0.49339908361434937, "learning_rate": 2.1558921711926362e-07, "loss": 0.0106, "step": 226310 }, { "epoch": 1.8312161178088842, "grad_norm": 0.4152927100658417, "learning_rate": 2.1538416046771872e-07, "loss": 0.0192, "step": 226320 }, { "epoch": 1.831297030504086, "grad_norm": 0.6264362931251526, "learning_rate": 2.1517919923523535e-07, "loss": 0.0199, "step": 226330 }, { "epoch": 1.8313779431992878, "grad_norm": 0.19598166644573212, "learning_rate": 2.149743334259019e-07, "loss": 0.011, "step": 226340 }, { "epoch": 1.83145885589449, "grad_norm": 0.08882872760295868, "learning_rate": 2.1476956304380403e-07, "loss": 0.0132, "step": 226350 }, { "epoch": 1.8315397685896917, "grad_norm": 0.2877795696258545, "learning_rate": 2.145648880930251e-07, "loss": 0.0141, "step": 226360 }, { "epoch": 1.8316206812848936, "grad_norm": 0.18830156326293945, "learning_rate": 2.143603085776469e-07, "loss": 0.0205, "step": 226370 }, { "epoch": 1.8317015939800956, "grad_norm": 0.42657631635665894, "learning_rate": 2.1415582450174889e-07, "loss": 0.0185, "step": 226380 }, { "epoch": 1.8317825066752973, "grad_norm": 0.5426883697509766, "learning_rate": 2.1395143586941002e-07, "loss": 0.021, "step": 226390 }, { "epoch": 1.8318634193704992, "grad_norm": 0.44696545600891113, "learning_rate": 2.1374714268470597e-07, "loss": 0.0163, "step": 226400 }, { "epoch": 1.8319443320657012, "grad_norm": 0.4009196162223816, "learning_rate": 2.1354294495171124e-07, "loss": 0.0173, "step": 226410 }, { "epoch": 1.832025244760903, "grad_norm": 0.6173781156539917, "learning_rate": 2.13338842674497e-07, "loss": 0.0234, "step": 226420 }, { "epoch": 1.8321061574561048, "grad_norm": 0.23186306655406952, "learning_rate": 2.1313483585713503e-07, "loss": 0.0156, "step": 226430 }, { "epoch": 1.8321870701513068, "grad_norm": 0.07946177572011948, "learning_rate": 2.1293092450369312e-07, "loss": 0.0249, "step": 226440 }, { "epoch": 1.8322679828465085, "grad_norm": 0.18589606881141663, "learning_rate": 2.1272710861823753e-07, "loss": 0.0093, "step": 226450 }, { "epoch": 1.8323488955417107, "grad_norm": 0.3629668653011322, "learning_rate": 2.1252338820483386e-07, "loss": 0.0174, "step": 226460 }, { "epoch": 1.8324298082369124, "grad_norm": 0.005497198086231947, "learning_rate": 2.12319763267545e-07, "loss": 0.0137, "step": 226470 }, { "epoch": 1.832510720932114, "grad_norm": 0.5930254459381104, "learning_rate": 2.121162338104299e-07, "loss": 0.0136, "step": 226480 }, { "epoch": 1.8325916336273163, "grad_norm": 0.4088391363620758, "learning_rate": 2.1191279983754976e-07, "loss": 0.0294, "step": 226490 }, { "epoch": 1.832672546322518, "grad_norm": 0.13310018181800842, "learning_rate": 2.1170946135296134e-07, "loss": 0.0145, "step": 226500 }, { "epoch": 1.83275345901772, "grad_norm": 0.404631644487381, "learning_rate": 2.115062183607175e-07, "loss": 0.0154, "step": 226510 }, { "epoch": 1.8328343717129219, "grad_norm": 0.2270926684141159, "learning_rate": 2.1130307086487388e-07, "loss": 0.0182, "step": 226520 }, { "epoch": 1.8329152844081236, "grad_norm": 0.3102662265300751, "learning_rate": 2.1110001886948116e-07, "loss": 0.0122, "step": 226530 }, { "epoch": 1.8329961971033255, "grad_norm": 0.44275301694869995, "learning_rate": 2.1089706237858887e-07, "loss": 0.0149, "step": 226540 }, { "epoch": 1.8330771097985274, "grad_norm": 0.15025056898593903, "learning_rate": 2.1069420139624485e-07, "loss": 0.018, "step": 226550 }, { "epoch": 1.8331580224937292, "grad_norm": 0.262649804353714, "learning_rate": 2.1049143592649368e-07, "loss": 0.0385, "step": 226560 }, { "epoch": 1.833238935188931, "grad_norm": 0.11055406928062439, "learning_rate": 2.1028876597337988e-07, "loss": 0.0252, "step": 226570 }, { "epoch": 1.833319847884133, "grad_norm": 0.3672061860561371, "learning_rate": 2.1008619154094523e-07, "loss": 0.0093, "step": 226580 }, { "epoch": 1.8334007605793348, "grad_norm": 0.2523041069507599, "learning_rate": 2.0988371263322927e-07, "loss": 0.0162, "step": 226590 }, { "epoch": 1.833481673274537, "grad_norm": 0.4952279031276703, "learning_rate": 2.09681329254271e-07, "loss": 0.0214, "step": 226600 }, { "epoch": 1.8335625859697386, "grad_norm": 0.3403759300708771, "learning_rate": 2.0947904140810493e-07, "loss": 0.0239, "step": 226610 }, { "epoch": 1.8336434986649404, "grad_norm": 0.23516739904880524, "learning_rate": 2.0927684909876678e-07, "loss": 0.0139, "step": 226620 }, { "epoch": 1.8337244113601425, "grad_norm": 0.395499050617218, "learning_rate": 2.0907475233028772e-07, "loss": 0.0297, "step": 226630 }, { "epoch": 1.8338053240553442, "grad_norm": 0.4553280770778656, "learning_rate": 2.08872751106699e-07, "loss": 0.0179, "step": 226640 }, { "epoch": 1.8338862367505462, "grad_norm": 0.7859982252120972, "learning_rate": 2.0867084543202799e-07, "loss": 0.0187, "step": 226650 }, { "epoch": 1.8339671494457481, "grad_norm": 0.21867488324642181, "learning_rate": 2.0846903531030304e-07, "loss": 0.0259, "step": 226660 }, { "epoch": 1.8340480621409498, "grad_norm": 0.17024309933185577, "learning_rate": 2.0826732074554656e-07, "loss": 0.0193, "step": 226670 }, { "epoch": 1.8341289748361518, "grad_norm": 0.1896091252565384, "learning_rate": 2.0806570174178366e-07, "loss": 0.0104, "step": 226680 }, { "epoch": 1.8342098875313537, "grad_norm": 0.5372838973999023, "learning_rate": 2.078641783030344e-07, "loss": 0.0143, "step": 226690 }, { "epoch": 1.8342908002265554, "grad_norm": 0.24685215950012207, "learning_rate": 2.0766275043331619e-07, "loss": 0.018, "step": 226700 }, { "epoch": 1.8343717129217574, "grad_norm": 0.31350675225257874, "learning_rate": 2.07461418136648e-07, "loss": 0.0179, "step": 226710 }, { "epoch": 1.8344526256169593, "grad_norm": 0.2657739818096161, "learning_rate": 2.072601814170444e-07, "loss": 0.0128, "step": 226720 }, { "epoch": 1.834533538312161, "grad_norm": 0.6190968155860901, "learning_rate": 2.0705904027851887e-07, "loss": 0.0157, "step": 226730 }, { "epoch": 1.8346144510073632, "grad_norm": 0.24647830426692963, "learning_rate": 2.0685799472508206e-07, "loss": 0.019, "step": 226740 }, { "epoch": 1.834695363702565, "grad_norm": 0.2667364180088043, "learning_rate": 2.0665704476074355e-07, "loss": 0.0124, "step": 226750 }, { "epoch": 1.8347762763977669, "grad_norm": 0.1497441977262497, "learning_rate": 2.0645619038951126e-07, "loss": 0.0139, "step": 226760 }, { "epoch": 1.8348571890929688, "grad_norm": 0.3869538903236389, "learning_rate": 2.0625543161539087e-07, "loss": 0.0097, "step": 226770 }, { "epoch": 1.8349381017881705, "grad_norm": 0.36746618151664734, "learning_rate": 2.060547684423858e-07, "loss": 0.0154, "step": 226780 }, { "epoch": 1.8350190144833725, "grad_norm": 0.4458886384963989, "learning_rate": 2.0585420087449793e-07, "loss": 0.0149, "step": 226790 }, { "epoch": 1.8350999271785744, "grad_norm": 0.006320531014353037, "learning_rate": 2.0565372891572678e-07, "loss": 0.0139, "step": 226800 }, { "epoch": 1.8351808398737761, "grad_norm": 0.4131511449813843, "learning_rate": 2.0545335257007026e-07, "loss": 0.0228, "step": 226810 }, { "epoch": 1.835261752568978, "grad_norm": 0.35154110193252563, "learning_rate": 2.0525307184152632e-07, "loss": 0.0182, "step": 226820 }, { "epoch": 1.83534266526418, "grad_norm": 0.4385468661785126, "learning_rate": 2.0505288673408728e-07, "loss": 0.0165, "step": 226830 }, { "epoch": 1.8354235779593817, "grad_norm": 0.33414506912231445, "learning_rate": 2.0485279725174446e-07, "loss": 0.0204, "step": 226840 }, { "epoch": 1.8355044906545837, "grad_norm": 0.3763209283351898, "learning_rate": 2.0465280339849126e-07, "loss": 0.021, "step": 226850 }, { "epoch": 1.8355854033497856, "grad_norm": 0.4597078561782837, "learning_rate": 2.044529051783134e-07, "loss": 0.0182, "step": 226860 }, { "epoch": 1.8356663160449873, "grad_norm": 0.37258920073509216, "learning_rate": 2.0425310259519882e-07, "loss": 0.0123, "step": 226870 }, { "epoch": 1.8357472287401895, "grad_norm": 0.621323823928833, "learning_rate": 2.0405339565313264e-07, "loss": 0.0192, "step": 226880 }, { "epoch": 1.8358281414353912, "grad_norm": 0.17008551955223083, "learning_rate": 2.038537843560956e-07, "loss": 0.0167, "step": 226890 }, { "epoch": 1.8359090541305931, "grad_norm": 0.3574483394622803, "learning_rate": 2.0365426870807004e-07, "loss": 0.0154, "step": 226900 }, { "epoch": 1.835989966825795, "grad_norm": 0.22756808996200562, "learning_rate": 2.0345484871303556e-07, "loss": 0.0126, "step": 226910 }, { "epoch": 1.8360708795209968, "grad_norm": 0.20552243292331696, "learning_rate": 2.0325552437496677e-07, "loss": 0.0215, "step": 226920 }, { "epoch": 1.8361517922161987, "grad_norm": 0.5963674187660217, "learning_rate": 2.03056295697841e-07, "loss": 0.0201, "step": 226930 }, { "epoch": 1.8362327049114007, "grad_norm": 0.014983048662543297, "learning_rate": 2.0285716268563016e-07, "loss": 0.0084, "step": 226940 }, { "epoch": 1.8363136176066024, "grad_norm": 0.19045506417751312, "learning_rate": 2.02658125342306e-07, "loss": 0.0185, "step": 226950 }, { "epoch": 1.8363945303018043, "grad_norm": 0.4256593585014343, "learning_rate": 2.0245918367183813e-07, "loss": 0.0234, "step": 226960 }, { "epoch": 1.8364754429970063, "grad_norm": 0.36811304092407227, "learning_rate": 2.0226033767819397e-07, "loss": 0.0184, "step": 226970 }, { "epoch": 1.836556355692208, "grad_norm": 0.0037308181636035442, "learning_rate": 2.0206158736533866e-07, "loss": 0.0162, "step": 226980 }, { "epoch": 1.8366372683874101, "grad_norm": 0.4554908573627472, "learning_rate": 2.0186293273723623e-07, "loss": 0.0185, "step": 226990 }, { "epoch": 1.8367181810826119, "grad_norm": 0.3052904009819031, "learning_rate": 2.016643737978474e-07, "loss": 0.019, "step": 227000 }, { "epoch": 1.8367990937778136, "grad_norm": 0.2973221242427826, "learning_rate": 2.0146591055113406e-07, "loss": 0.0188, "step": 227010 }, { "epoch": 1.8368800064730157, "grad_norm": 0.23260851204395294, "learning_rate": 2.01267543001053e-07, "loss": 0.0111, "step": 227020 }, { "epoch": 1.8369609191682175, "grad_norm": 0.181112602353096, "learning_rate": 2.0106927115155883e-07, "loss": 0.0203, "step": 227030 }, { "epoch": 1.8370418318634194, "grad_norm": 0.6247169971466064, "learning_rate": 2.0087109500660895e-07, "loss": 0.0279, "step": 227040 }, { "epoch": 1.8371227445586213, "grad_norm": 0.4219217896461487, "learning_rate": 2.0067301457015242e-07, "loss": 0.0182, "step": 227050 }, { "epoch": 1.837203657253823, "grad_norm": 0.45959314703941345, "learning_rate": 2.0047502984614054e-07, "loss": 0.0203, "step": 227060 }, { "epoch": 1.837284569949025, "grad_norm": 0.2826348543167114, "learning_rate": 2.0027714083852346e-07, "loss": 0.0115, "step": 227070 }, { "epoch": 1.837365482644227, "grad_norm": 0.43313246965408325, "learning_rate": 2.0007934755124413e-07, "loss": 0.0224, "step": 227080 }, { "epoch": 1.8374463953394287, "grad_norm": 0.5136983394622803, "learning_rate": 1.9988164998825056e-07, "loss": 0.0171, "step": 227090 }, { "epoch": 1.8375273080346306, "grad_norm": 0.42126980423927307, "learning_rate": 1.9968404815348398e-07, "loss": 0.0214, "step": 227100 }, { "epoch": 1.8376082207298325, "grad_norm": 0.6651659607887268, "learning_rate": 1.9948654205088458e-07, "loss": 0.0183, "step": 227110 }, { "epoch": 1.8376891334250343, "grad_norm": 0.16961383819580078, "learning_rate": 1.99289131684392e-07, "loss": 0.0137, "step": 227120 }, { "epoch": 1.8377700461202364, "grad_norm": 0.963234007358551, "learning_rate": 1.990918170579431e-07, "loss": 0.0228, "step": 227130 }, { "epoch": 1.8378509588154381, "grad_norm": 0.2906824052333832, "learning_rate": 1.9889459817547196e-07, "loss": 0.0216, "step": 227140 }, { "epoch": 1.8379318715106399, "grad_norm": 0.2561277747154236, "learning_rate": 1.986974750409132e-07, "loss": 0.0177, "step": 227150 }, { "epoch": 1.838012784205842, "grad_norm": 0.43428778648376465, "learning_rate": 1.9850044765819697e-07, "loss": 0.0239, "step": 227160 }, { "epoch": 1.8380936969010437, "grad_norm": 0.325361430644989, "learning_rate": 1.9830351603125352e-07, "loss": 0.0077, "step": 227170 }, { "epoch": 1.8381746095962457, "grad_norm": 0.16194240748882294, "learning_rate": 1.9810668016400913e-07, "loss": 0.0129, "step": 227180 }, { "epoch": 1.8382555222914476, "grad_norm": 0.32746773958206177, "learning_rate": 1.9790994006038956e-07, "loss": 0.0176, "step": 227190 }, { "epoch": 1.8383364349866493, "grad_norm": 0.5278549194335938, "learning_rate": 1.9771329572431887e-07, "loss": 0.0123, "step": 227200 }, { "epoch": 1.8384173476818513, "grad_norm": 0.19918543100357056, "learning_rate": 1.9751674715971835e-07, "loss": 0.0171, "step": 227210 }, { "epoch": 1.8384982603770532, "grad_norm": 0.3696810007095337, "learning_rate": 1.973202943705077e-07, "loss": 0.022, "step": 227220 }, { "epoch": 1.838579173072255, "grad_norm": 0.2651650011539459, "learning_rate": 1.9712393736060543e-07, "loss": 0.0142, "step": 227230 }, { "epoch": 1.8386600857674569, "grad_norm": 0.24092629551887512, "learning_rate": 1.9692767613392673e-07, "loss": 0.014, "step": 227240 }, { "epoch": 1.8387409984626588, "grad_norm": 0.3372125029563904, "learning_rate": 1.9673151069438457e-07, "loss": 0.0154, "step": 227250 }, { "epoch": 1.8388219111578605, "grad_norm": 0.15345986187458038, "learning_rate": 1.9653544104589417e-07, "loss": 0.0165, "step": 227260 }, { "epoch": 1.8389028238530627, "grad_norm": 0.23676800727844238, "learning_rate": 1.9633946719236242e-07, "loss": 0.0197, "step": 227270 }, { "epoch": 1.8389837365482644, "grad_norm": 0.6766088008880615, "learning_rate": 1.9614358913769948e-07, "loss": 0.0132, "step": 227280 }, { "epoch": 1.8390646492434664, "grad_norm": 0.2780752182006836, "learning_rate": 1.9594780688581172e-07, "loss": 0.0158, "step": 227290 }, { "epoch": 1.8391455619386683, "grad_norm": 0.25274184346199036, "learning_rate": 1.957521204406021e-07, "loss": 0.0213, "step": 227300 }, { "epoch": 1.83922647463387, "grad_norm": 0.3858085572719574, "learning_rate": 1.9555652980597471e-07, "loss": 0.0208, "step": 227310 }, { "epoch": 1.839307387329072, "grad_norm": 0.29076775908470154, "learning_rate": 1.9536103498583093e-07, "loss": 0.0128, "step": 227320 }, { "epoch": 1.839388300024274, "grad_norm": 0.9610527157783508, "learning_rate": 1.9516563598406646e-07, "loss": 0.0217, "step": 227330 }, { "epoch": 1.8394692127194756, "grad_norm": 0.48785313963890076, "learning_rate": 1.9497033280458044e-07, "loss": 0.0185, "step": 227340 }, { "epoch": 1.8395501254146776, "grad_norm": 0.06312370300292969, "learning_rate": 1.9477512545126752e-07, "loss": 0.0192, "step": 227350 }, { "epoch": 1.8396310381098795, "grad_norm": 0.3317536413669586, "learning_rate": 1.9458001392802074e-07, "loss": 0.0141, "step": 227360 }, { "epoch": 1.8397119508050812, "grad_norm": 0.3550862669944763, "learning_rate": 1.9438499823873025e-07, "loss": 0.0119, "step": 227370 }, { "epoch": 1.8397928635002831, "grad_norm": 0.6306616067886353, "learning_rate": 1.9419007838728632e-07, "loss": 0.0329, "step": 227380 }, { "epoch": 1.839873776195485, "grad_norm": 0.5263770222663879, "learning_rate": 1.9399525437757582e-07, "loss": 0.0271, "step": 227390 }, { "epoch": 1.8399546888906868, "grad_norm": 0.3586858808994293, "learning_rate": 1.9380052621348344e-07, "loss": 0.0129, "step": 227400 }, { "epoch": 1.840035601585889, "grad_norm": 0.15153799951076508, "learning_rate": 1.9360589389889328e-07, "loss": 0.0138, "step": 227410 }, { "epoch": 1.8401165142810907, "grad_norm": 0.5765313506126404, "learning_rate": 1.934113574376878e-07, "loss": 0.0185, "step": 227420 }, { "epoch": 1.8401974269762926, "grad_norm": 0.6008664965629578, "learning_rate": 1.93216916833745e-07, "loss": 0.0275, "step": 227430 }, { "epoch": 1.8402783396714946, "grad_norm": 0.005855595227330923, "learning_rate": 1.9302257209094233e-07, "loss": 0.0243, "step": 227440 }, { "epoch": 1.8403592523666963, "grad_norm": 0.2487015277147293, "learning_rate": 1.9282832321315837e-07, "loss": 0.014, "step": 227450 }, { "epoch": 1.8404401650618982, "grad_norm": 0.20880582928657532, "learning_rate": 1.9263417020426389e-07, "loss": 0.0157, "step": 227460 }, { "epoch": 1.8405210777571002, "grad_norm": 0.21986965835094452, "learning_rate": 1.9244011306813192e-07, "loss": 0.0176, "step": 227470 }, { "epoch": 1.8406019904523019, "grad_norm": 0.18562428653240204, "learning_rate": 1.9224615180863383e-07, "loss": 0.0275, "step": 227480 }, { "epoch": 1.8406829031475038, "grad_norm": 0.3090430796146393, "learning_rate": 1.9205228642963537e-07, "loss": 0.0111, "step": 227490 }, { "epoch": 1.8407638158427058, "grad_norm": 0.49117621779441833, "learning_rate": 1.9185851693500456e-07, "loss": 0.0273, "step": 227500 }, { "epoch": 1.8408447285379075, "grad_norm": 0.4501465857028961, "learning_rate": 1.916648433286056e-07, "loss": 0.021, "step": 227510 }, { "epoch": 1.8409256412331094, "grad_norm": 0.4191167652606964, "learning_rate": 1.9147126561429917e-07, "loss": 0.0175, "step": 227520 }, { "epoch": 1.8410065539283114, "grad_norm": 0.528756320476532, "learning_rate": 1.9127778379594842e-07, "loss": 0.0171, "step": 227530 }, { "epoch": 1.841087466623513, "grad_norm": 0.042707353830337524, "learning_rate": 1.9108439787741017e-07, "loss": 0.0264, "step": 227540 }, { "epoch": 1.8411683793187152, "grad_norm": 0.22976407408714294, "learning_rate": 1.9089110786254138e-07, "loss": 0.0128, "step": 227550 }, { "epoch": 1.841249292013917, "grad_norm": 0.34521329402923584, "learning_rate": 1.906979137551973e-07, "loss": 0.0289, "step": 227560 }, { "epoch": 1.841330204709119, "grad_norm": 0.36572179198265076, "learning_rate": 1.9050481555923038e-07, "loss": 0.0224, "step": 227570 }, { "epoch": 1.8414111174043208, "grad_norm": 0.2976611852645874, "learning_rate": 1.9031181327849146e-07, "loss": 0.0093, "step": 227580 }, { "epoch": 1.8414920300995226, "grad_norm": 0.2632474899291992, "learning_rate": 1.9011890691682967e-07, "loss": 0.017, "step": 227590 }, { "epoch": 1.8415729427947245, "grad_norm": 0.3407411575317383, "learning_rate": 1.8992609647809245e-07, "loss": 0.0168, "step": 227600 }, { "epoch": 1.8416538554899264, "grad_norm": 0.3016563653945923, "learning_rate": 1.8973338196612456e-07, "loss": 0.0228, "step": 227610 }, { "epoch": 1.8417347681851282, "grad_norm": 0.002952298615127802, "learning_rate": 1.89540763384769e-07, "loss": 0.018, "step": 227620 }, { "epoch": 1.84181568088033, "grad_norm": 0.2552085816860199, "learning_rate": 1.8934824073786772e-07, "loss": 0.0208, "step": 227630 }, { "epoch": 1.841896593575532, "grad_norm": 0.4504900276660919, "learning_rate": 1.8915581402926098e-07, "loss": 0.025, "step": 227640 }, { "epoch": 1.8419775062707338, "grad_norm": 0.3463677763938904, "learning_rate": 1.889634832627846e-07, "loss": 0.0219, "step": 227650 }, { "epoch": 1.842058418965936, "grad_norm": 0.5172317624092102, "learning_rate": 1.8877124844227435e-07, "loss": 0.0276, "step": 227660 }, { "epoch": 1.8421393316611376, "grad_norm": 0.4501025378704071, "learning_rate": 1.8857910957156612e-07, "loss": 0.023, "step": 227670 }, { "epoch": 1.8422202443563394, "grad_norm": 0.3092581331729889, "learning_rate": 1.8838706665448958e-07, "loss": 0.0143, "step": 227680 }, { "epoch": 1.8423011570515415, "grad_norm": 0.44957512617111206, "learning_rate": 1.8819511969487447e-07, "loss": 0.0123, "step": 227690 }, { "epoch": 1.8423820697467432, "grad_norm": 0.4818560779094696, "learning_rate": 1.8800326869655104e-07, "loss": 0.0235, "step": 227700 }, { "epoch": 1.8424629824419452, "grad_norm": 0.2450794130563736, "learning_rate": 1.8781151366334239e-07, "loss": 0.0124, "step": 227710 }, { "epoch": 1.8425438951371471, "grad_norm": 0.019646992906928062, "learning_rate": 1.8761985459907428e-07, "loss": 0.0167, "step": 227720 }, { "epoch": 1.8426248078323488, "grad_norm": 0.14325036108493805, "learning_rate": 1.8742829150756925e-07, "loss": 0.0124, "step": 227730 }, { "epoch": 1.8427057205275508, "grad_norm": 0.7836846113204956, "learning_rate": 1.8723682439264701e-07, "loss": 0.0222, "step": 227740 }, { "epoch": 1.8427866332227527, "grad_norm": 0.3698849380016327, "learning_rate": 1.8704545325812616e-07, "loss": 0.0199, "step": 227750 }, { "epoch": 1.8428675459179544, "grad_norm": 0.3677396774291992, "learning_rate": 1.868541781078237e-07, "loss": 0.0212, "step": 227760 }, { "epoch": 1.8429484586131564, "grad_norm": 0.39514121413230896, "learning_rate": 1.8666299894555318e-07, "loss": 0.0153, "step": 227770 }, { "epoch": 1.8430293713083583, "grad_norm": 0.5453149080276489, "learning_rate": 1.864719157751277e-07, "loss": 0.0163, "step": 227780 }, { "epoch": 1.84311028400356, "grad_norm": 0.4094773232936859, "learning_rate": 1.8628092860035806e-07, "loss": 0.0136, "step": 227790 }, { "epoch": 1.8431911966987622, "grad_norm": 0.4380292594432831, "learning_rate": 1.8609003742505293e-07, "loss": 0.01, "step": 227800 }, { "epoch": 1.843272109393964, "grad_norm": 0.2202751487493515, "learning_rate": 1.858992422530198e-07, "loss": 0.0258, "step": 227810 }, { "epoch": 1.8433530220891656, "grad_norm": 0.4567309617996216, "learning_rate": 1.857085430880623e-07, "loss": 0.0157, "step": 227820 }, { "epoch": 1.8434339347843678, "grad_norm": 0.3804283142089844, "learning_rate": 1.855179399339857e-07, "loss": 0.0103, "step": 227830 }, { "epoch": 1.8435148474795695, "grad_norm": 0.052603352814912796, "learning_rate": 1.8532743279458976e-07, "loss": 0.0187, "step": 227840 }, { "epoch": 1.8435957601747714, "grad_norm": 0.2522192597389221, "learning_rate": 1.8513702167367254e-07, "loss": 0.0167, "step": 227850 }, { "epoch": 1.8436766728699734, "grad_norm": 0.27999022603034973, "learning_rate": 1.8494670657503488e-07, "loss": 0.0101, "step": 227860 }, { "epoch": 1.843757585565175, "grad_norm": 0.6990537643432617, "learning_rate": 1.8475648750246932e-07, "loss": 0.0196, "step": 227870 }, { "epoch": 1.843838498260377, "grad_norm": 0.3945474326610565, "learning_rate": 1.8456636445976894e-07, "loss": 0.0133, "step": 227880 }, { "epoch": 1.843919410955579, "grad_norm": 0.4759083390235901, "learning_rate": 1.8437633745072792e-07, "loss": 0.0117, "step": 227890 }, { "epoch": 1.8440003236507807, "grad_norm": 0.26284098625183105, "learning_rate": 1.8418640647913432e-07, "loss": 0.024, "step": 227900 }, { "epoch": 1.8440812363459826, "grad_norm": 0.6301441192626953, "learning_rate": 1.8399657154877627e-07, "loss": 0.0159, "step": 227910 }, { "epoch": 1.8441621490411846, "grad_norm": 0.4123052656650543, "learning_rate": 1.8380683266343956e-07, "loss": 0.0196, "step": 227920 }, { "epoch": 1.8442430617363863, "grad_norm": 0.35613277554512024, "learning_rate": 1.8361718982690847e-07, "loss": 0.0226, "step": 227930 }, { "epoch": 1.8443239744315885, "grad_norm": 0.2549680471420288, "learning_rate": 1.8342764304296434e-07, "loss": 0.0148, "step": 227940 }, { "epoch": 1.8444048871267902, "grad_norm": 0.36739957332611084, "learning_rate": 1.8323819231538753e-07, "loss": 0.0135, "step": 227950 }, { "epoch": 1.8444857998219921, "grad_norm": 0.19082771241664886, "learning_rate": 1.8304883764795668e-07, "loss": 0.0098, "step": 227960 }, { "epoch": 1.844566712517194, "grad_norm": 0.3407611846923828, "learning_rate": 1.8285957904444707e-07, "loss": 0.0137, "step": 227970 }, { "epoch": 1.8446476252123958, "grad_norm": 0.35432881116867065, "learning_rate": 1.8267041650863458e-07, "loss": 0.0166, "step": 227980 }, { "epoch": 1.8447285379075977, "grad_norm": 0.24137793481349945, "learning_rate": 1.8248135004429014e-07, "loss": 0.0181, "step": 227990 }, { "epoch": 1.8448094506027997, "grad_norm": 0.3761807084083557, "learning_rate": 1.822923796551851e-07, "loss": 0.0234, "step": 228000 }, { "epoch": 1.8448903632980014, "grad_norm": 0.40106138586997986, "learning_rate": 1.8210350534508814e-07, "loss": 0.0215, "step": 228010 }, { "epoch": 1.8449712759932033, "grad_norm": 0.44292253255844116, "learning_rate": 1.819147271177657e-07, "loss": 0.0149, "step": 228020 }, { "epoch": 1.8450521886884053, "grad_norm": 0.3192480802536011, "learning_rate": 1.8172604497698254e-07, "loss": 0.012, "step": 228030 }, { "epoch": 1.845133101383607, "grad_norm": 0.3827497959136963, "learning_rate": 1.8153745892650122e-07, "loss": 0.0135, "step": 228040 }, { "epoch": 1.845214014078809, "grad_norm": 0.5768111944198608, "learning_rate": 1.8134896897008368e-07, "loss": 0.0209, "step": 228050 }, { "epoch": 1.8452949267740109, "grad_norm": 0.3611583113670349, "learning_rate": 1.811605751114881e-07, "loss": 0.0253, "step": 228060 }, { "epoch": 1.8453758394692126, "grad_norm": 0.5004894733428955, "learning_rate": 1.8097227735447142e-07, "loss": 0.0206, "step": 228070 }, { "epoch": 1.8454567521644147, "grad_norm": 0.17818255722522736, "learning_rate": 1.8078407570278956e-07, "loss": 0.0248, "step": 228080 }, { "epoch": 1.8455376648596165, "grad_norm": 0.5172025561332703, "learning_rate": 1.8059597016019615e-07, "loss": 0.0161, "step": 228090 }, { "epoch": 1.8456185775548184, "grad_norm": 0.1938369870185852, "learning_rate": 1.8040796073044042e-07, "loss": 0.0156, "step": 228100 }, { "epoch": 1.8456994902500203, "grad_norm": 1.0498236417770386, "learning_rate": 1.802200474172744e-07, "loss": 0.0155, "step": 228110 }, { "epoch": 1.845780402945222, "grad_norm": 0.34932929277420044, "learning_rate": 1.8003223022444393e-07, "loss": 0.0123, "step": 228120 }, { "epoch": 1.845861315640424, "grad_norm": 0.49021437764167786, "learning_rate": 1.798445091556955e-07, "loss": 0.0176, "step": 228130 }, { "epoch": 1.845942228335626, "grad_norm": 0.14654280245304108, "learning_rate": 1.7965688421477278e-07, "loss": 0.0156, "step": 228140 }, { "epoch": 1.8460231410308277, "grad_norm": 0.3524560034275055, "learning_rate": 1.794693554054172e-07, "loss": 0.0121, "step": 228150 }, { "epoch": 1.8461040537260296, "grad_norm": 0.9573106169700623, "learning_rate": 1.7928192273136857e-07, "loss": 0.0258, "step": 228160 }, { "epoch": 1.8461849664212315, "grad_norm": 0.6347271203994751, "learning_rate": 1.7909458619636554e-07, "loss": 0.019, "step": 228170 }, { "epoch": 1.8462658791164333, "grad_norm": 0.24834062159061432, "learning_rate": 1.7890734580414294e-07, "loss": 0.0232, "step": 228180 }, { "epoch": 1.8463467918116352, "grad_norm": 0.41417959332466125, "learning_rate": 1.7872020155843605e-07, "loss": 0.0267, "step": 228190 }, { "epoch": 1.8464277045068371, "grad_norm": 0.42308562994003296, "learning_rate": 1.7853315346297585e-07, "loss": 0.0162, "step": 228200 }, { "epoch": 1.8465086172020388, "grad_norm": 0.2630910277366638, "learning_rate": 1.783462015214943e-07, "loss": 0.0124, "step": 228210 }, { "epoch": 1.846589529897241, "grad_norm": 0.231122225522995, "learning_rate": 1.7815934573771787e-07, "loss": 0.0164, "step": 228220 }, { "epoch": 1.8466704425924427, "grad_norm": 0.27159830927848816, "learning_rate": 1.779725861153747e-07, "loss": 0.011, "step": 228230 }, { "epoch": 1.8467513552876447, "grad_norm": 0.647741436958313, "learning_rate": 1.7778592265818794e-07, "loss": 0.0187, "step": 228240 }, { "epoch": 1.8468322679828466, "grad_norm": 0.03816654533147812, "learning_rate": 1.7759935536988125e-07, "loss": 0.0162, "step": 228250 }, { "epoch": 1.8469131806780483, "grad_norm": 0.5052437782287598, "learning_rate": 1.7741288425417392e-07, "loss": 0.0164, "step": 228260 }, { "epoch": 1.8469940933732503, "grad_norm": 0.35498401522636414, "learning_rate": 1.772265093147868e-07, "loss": 0.0194, "step": 228270 }, { "epoch": 1.8470750060684522, "grad_norm": 0.2720131576061249, "learning_rate": 1.7704023055543584e-07, "loss": 0.0128, "step": 228280 }, { "epoch": 1.847155918763654, "grad_norm": 0.2259756624698639, "learning_rate": 1.7685404797983473e-07, "loss": 0.0253, "step": 228290 }, { "epoch": 1.8472368314588559, "grad_norm": 0.2845725119113922, "learning_rate": 1.7666796159169775e-07, "loss": 0.0272, "step": 228300 }, { "epoch": 1.8473177441540578, "grad_norm": 0.23908625543117523, "learning_rate": 1.7648197139473634e-07, "loss": 0.0155, "step": 228310 }, { "epoch": 1.8473986568492595, "grad_norm": 0.3017420172691345, "learning_rate": 1.7629607739265863e-07, "loss": 0.0262, "step": 228320 }, { "epoch": 1.8474795695444617, "grad_norm": 0.32730114459991455, "learning_rate": 1.7611027958917226e-07, "loss": 0.0243, "step": 228330 }, { "epoch": 1.8475604822396634, "grad_norm": 0.33094748854637146, "learning_rate": 1.7592457798798313e-07, "loss": 0.0136, "step": 228340 }, { "epoch": 1.8476413949348651, "grad_norm": 0.2629357874393463, "learning_rate": 1.7573897259279382e-07, "loss": 0.0145, "step": 228350 }, { "epoch": 1.8477223076300673, "grad_norm": 0.19423352181911469, "learning_rate": 1.7555346340730693e-07, "loss": 0.0199, "step": 228360 }, { "epoch": 1.847803220325269, "grad_norm": 0.374216765165329, "learning_rate": 1.7536805043522065e-07, "loss": 0.0188, "step": 228370 }, { "epoch": 1.847884133020471, "grad_norm": 0.34065496921539307, "learning_rate": 1.7518273368023363e-07, "loss": 0.0112, "step": 228380 }, { "epoch": 1.8479650457156729, "grad_norm": 0.16606645286083221, "learning_rate": 1.749975131460413e-07, "loss": 0.0122, "step": 228390 }, { "epoch": 1.8480459584108746, "grad_norm": 0.38522952795028687, "learning_rate": 1.7481238883633733e-07, "loss": 0.0209, "step": 228400 }, { "epoch": 1.8481268711060765, "grad_norm": 0.35827013850212097, "learning_rate": 1.7462736075481435e-07, "loss": 0.0284, "step": 228410 }, { "epoch": 1.8482077838012785, "grad_norm": 0.4704152047634125, "learning_rate": 1.744424289051616e-07, "loss": 0.0228, "step": 228420 }, { "epoch": 1.8482886964964802, "grad_norm": 0.10937320441007614, "learning_rate": 1.7425759329106729e-07, "loss": 0.0138, "step": 228430 }, { "epoch": 1.8483696091916821, "grad_norm": 0.42663273215293884, "learning_rate": 1.7407285391621785e-07, "loss": 0.0205, "step": 228440 }, { "epoch": 1.848450521886884, "grad_norm": 0.19971142709255219, "learning_rate": 1.7388821078429652e-07, "loss": 0.0302, "step": 228450 }, { "epoch": 1.8485314345820858, "grad_norm": 0.18133655190467834, "learning_rate": 1.7370366389898696e-07, "loss": 0.0392, "step": 228460 }, { "epoch": 1.848612347277288, "grad_norm": 0.47431719303131104, "learning_rate": 1.7351921326397015e-07, "loss": 0.0136, "step": 228470 }, { "epoch": 1.8486932599724897, "grad_norm": 0.44242537021636963, "learning_rate": 1.73334858882922e-07, "loss": 0.0192, "step": 228480 }, { "epoch": 1.8487741726676914, "grad_norm": 0.21010129153728485, "learning_rate": 1.7315060075952128e-07, "loss": 0.0198, "step": 228490 }, { "epoch": 1.8488550853628936, "grad_norm": 0.4013949930667877, "learning_rate": 1.7296643889744225e-07, "loss": 0.026, "step": 228500 }, { "epoch": 1.8489359980580953, "grad_norm": 0.3653448522090912, "learning_rate": 1.7278237330035642e-07, "loss": 0.016, "step": 228510 }, { "epoch": 1.8490169107532972, "grad_norm": 0.32006481289863586, "learning_rate": 1.7259840397193584e-07, "loss": 0.0132, "step": 228520 }, { "epoch": 1.8490978234484992, "grad_norm": 0.19873611629009247, "learning_rate": 1.724145309158487e-07, "loss": 0.0189, "step": 228530 }, { "epoch": 1.8491787361437009, "grad_norm": 0.5308998823165894, "learning_rate": 1.7223075413576264e-07, "loss": 0.0215, "step": 228540 }, { "epoch": 1.8492596488389028, "grad_norm": 0.46903231739997864, "learning_rate": 1.7204707363534245e-07, "loss": 0.0259, "step": 228550 }, { "epoch": 1.8493405615341048, "grad_norm": 0.3436839282512665, "learning_rate": 1.7186348941825082e-07, "loss": 0.0207, "step": 228560 }, { "epoch": 1.8494214742293065, "grad_norm": 0.38220494985580444, "learning_rate": 1.7168000148814923e-07, "loss": 0.0198, "step": 228570 }, { "epoch": 1.8495023869245084, "grad_norm": 0.6557007431983948, "learning_rate": 1.7149660984869697e-07, "loss": 0.0184, "step": 228580 }, { "epoch": 1.8495832996197104, "grad_norm": 0.28860214352607727, "learning_rate": 1.713133145035517e-07, "loss": 0.0155, "step": 228590 }, { "epoch": 1.849664212314912, "grad_norm": 1.003024697303772, "learning_rate": 1.7113011545636827e-07, "loss": 0.0159, "step": 228600 }, { "epoch": 1.8497451250101142, "grad_norm": 0.47872236371040344, "learning_rate": 1.709470127108004e-07, "loss": 0.0145, "step": 228610 }, { "epoch": 1.849826037705316, "grad_norm": 0.32738959789276123, "learning_rate": 1.707640062704996e-07, "loss": 0.0129, "step": 228620 }, { "epoch": 1.849906950400518, "grad_norm": 0.1738167107105255, "learning_rate": 1.7058109613911634e-07, "loss": 0.0157, "step": 228630 }, { "epoch": 1.8499878630957198, "grad_norm": 0.6052209138870239, "learning_rate": 1.7039828232029765e-07, "loss": 0.022, "step": 228640 }, { "epoch": 1.8500687757909215, "grad_norm": 0.5602196455001831, "learning_rate": 1.7021556481768897e-07, "loss": 0.015, "step": 228650 }, { "epoch": 1.8501496884861235, "grad_norm": 0.4762076437473297, "learning_rate": 1.7003294363493571e-07, "loss": 0.0193, "step": 228660 }, { "epoch": 1.8502306011813254, "grad_norm": 0.3409629166126251, "learning_rate": 1.698504187756772e-07, "loss": 0.0179, "step": 228670 }, { "epoch": 1.8503115138765271, "grad_norm": 0.17755526304244995, "learning_rate": 1.6966799024355662e-07, "loss": 0.0124, "step": 228680 }, { "epoch": 1.850392426571729, "grad_norm": 0.25309810042381287, "learning_rate": 1.694856580422105e-07, "loss": 0.0149, "step": 228690 }, { "epoch": 1.850473339266931, "grad_norm": 0.23225407302379608, "learning_rate": 1.6930342217527485e-07, "loss": 0.0143, "step": 228700 }, { "epoch": 1.8505542519621327, "grad_norm": 0.029046490788459778, "learning_rate": 1.691212826463845e-07, "loss": 0.019, "step": 228710 }, { "epoch": 1.8506351646573347, "grad_norm": 0.5843105912208557, "learning_rate": 1.6893923945917266e-07, "loss": 0.0139, "step": 228720 }, { "epoch": 1.8507160773525366, "grad_norm": 0.13870869576931, "learning_rate": 1.6875729261726704e-07, "loss": 0.025, "step": 228730 }, { "epoch": 1.8507969900477383, "grad_norm": 0.31541645526885986, "learning_rate": 1.685754421242991e-07, "loss": 0.0124, "step": 228740 }, { "epoch": 1.8508779027429405, "grad_norm": 0.48175185918807983, "learning_rate": 1.6839368798389433e-07, "loss": 0.0218, "step": 228750 }, { "epoch": 1.8509588154381422, "grad_norm": 0.27975690364837646, "learning_rate": 1.682120301996776e-07, "loss": 0.0092, "step": 228760 }, { "epoch": 1.8510397281333442, "grad_norm": 0.10544124245643616, "learning_rate": 1.6803046877527153e-07, "loss": 0.0157, "step": 228770 }, { "epoch": 1.851120640828546, "grad_norm": 0.5965510606765747, "learning_rate": 1.6784900371429658e-07, "loss": 0.0184, "step": 228780 }, { "epoch": 1.8512015535237478, "grad_norm": 0.6267754435539246, "learning_rate": 1.6766763502037264e-07, "loss": 0.0207, "step": 228790 }, { "epoch": 1.8512824662189498, "grad_norm": 0.2912171185016632, "learning_rate": 1.6748636269711628e-07, "loss": 0.0155, "step": 228800 }, { "epoch": 1.8513633789141517, "grad_norm": 0.11624111980199814, "learning_rate": 1.6730518674814179e-07, "loss": 0.0197, "step": 228810 }, { "epoch": 1.8514442916093534, "grad_norm": 0.4949056804180145, "learning_rate": 1.6712410717706406e-07, "loss": 0.0142, "step": 228820 }, { "epoch": 1.8515252043045554, "grad_norm": 0.4063534736633301, "learning_rate": 1.6694312398749246e-07, "loss": 0.0219, "step": 228830 }, { "epoch": 1.8516061169997573, "grad_norm": 0.1218598410487175, "learning_rate": 1.6676223718303742e-07, "loss": 0.0141, "step": 228840 }, { "epoch": 1.851687029694959, "grad_norm": 0.22683291137218475, "learning_rate": 1.6658144676730658e-07, "loss": 0.0165, "step": 228850 }, { "epoch": 1.8517679423901612, "grad_norm": 0.3006175458431244, "learning_rate": 1.664007527439043e-07, "loss": 0.0225, "step": 228860 }, { "epoch": 1.851848855085363, "grad_norm": 0.15067879855632782, "learning_rate": 1.6622015511643553e-07, "loss": 0.0107, "step": 228870 }, { "epoch": 1.8519297677805646, "grad_norm": 0.2522045969963074, "learning_rate": 1.6603965388850117e-07, "loss": 0.0183, "step": 228880 }, { "epoch": 1.8520106804757668, "grad_norm": 0.2813451290130615, "learning_rate": 1.6585924906370064e-07, "loss": 0.0196, "step": 228890 }, { "epoch": 1.8520915931709685, "grad_norm": 0.35424649715423584, "learning_rate": 1.6567894064563218e-07, "loss": 0.026, "step": 228900 }, { "epoch": 1.8521725058661704, "grad_norm": 0.4216485917568207, "learning_rate": 1.6549872863789174e-07, "loss": 0.0203, "step": 228910 }, { "epoch": 1.8522534185613724, "grad_norm": 0.37305551767349243, "learning_rate": 1.653186130440726e-07, "loss": 0.0248, "step": 228920 }, { "epoch": 1.852334331256574, "grad_norm": 0.5098825693130493, "learning_rate": 1.6513859386776743e-07, "loss": 0.0144, "step": 228930 }, { "epoch": 1.852415243951776, "grad_norm": 0.2249426245689392, "learning_rate": 1.6495867111256614e-07, "loss": 0.0204, "step": 228940 }, { "epoch": 1.852496156646978, "grad_norm": 0.9253998398780823, "learning_rate": 1.6477884478205696e-07, "loss": 0.023, "step": 228950 }, { "epoch": 1.8525770693421797, "grad_norm": 0.14201728999614716, "learning_rate": 1.6459911487982648e-07, "loss": 0.0253, "step": 228960 }, { "epoch": 1.8526579820373816, "grad_norm": 0.3342774510383606, "learning_rate": 1.6441948140945852e-07, "loss": 0.0117, "step": 228970 }, { "epoch": 1.8527388947325836, "grad_norm": 0.13004733622074127, "learning_rate": 1.6423994437453515e-07, "loss": 0.0172, "step": 228980 }, { "epoch": 1.8528198074277853, "grad_norm": 0.3556062877178192, "learning_rate": 1.6406050377863803e-07, "loss": 0.0212, "step": 228990 }, { "epoch": 1.8529007201229875, "grad_norm": 0.48787721991539, "learning_rate": 1.6388115962534423e-07, "loss": 0.017, "step": 229000 }, { "epoch": 1.8529816328181892, "grad_norm": 0.34800922870635986, "learning_rate": 1.6370191191823204e-07, "loss": 0.0204, "step": 229010 }, { "epoch": 1.853062545513391, "grad_norm": 0.3374289572238922, "learning_rate": 1.6352276066087525e-07, "loss": 0.0195, "step": 229020 }, { "epoch": 1.853143458208593, "grad_norm": 0.11855659633874893, "learning_rate": 1.63343705856846e-07, "loss": 0.0254, "step": 229030 }, { "epoch": 1.8532243709037948, "grad_norm": 0.46056097745895386, "learning_rate": 1.6316474750971645e-07, "loss": 0.0181, "step": 229040 }, { "epoch": 1.8533052835989967, "grad_norm": 0.47446560859680176, "learning_rate": 1.629858856230554e-07, "loss": 0.0199, "step": 229050 }, { "epoch": 1.8533861962941987, "grad_norm": 0.31816837191581726, "learning_rate": 1.6280712020042832e-07, "loss": 0.0086, "step": 229060 }, { "epoch": 1.8534671089894004, "grad_norm": 0.425846666097641, "learning_rate": 1.6262845124540293e-07, "loss": 0.024, "step": 229070 }, { "epoch": 1.8535480216846023, "grad_norm": 0.2025759071111679, "learning_rate": 1.6244987876153917e-07, "loss": 0.0137, "step": 229080 }, { "epoch": 1.8536289343798042, "grad_norm": 0.37480759620666504, "learning_rate": 1.6227140275240138e-07, "loss": 0.0163, "step": 229090 }, { "epoch": 1.853709847075006, "grad_norm": 0.13093896210193634, "learning_rate": 1.6209302322154784e-07, "loss": 0.0123, "step": 229100 }, { "epoch": 1.853790759770208, "grad_norm": 0.5110591053962708, "learning_rate": 1.6191474017253406e-07, "loss": 0.0212, "step": 229110 }, { "epoch": 1.8538716724654098, "grad_norm": 0.41156718134880066, "learning_rate": 1.6173655360891826e-07, "loss": 0.0184, "step": 229120 }, { "epoch": 1.8539525851606116, "grad_norm": 0.2651195526123047, "learning_rate": 1.6155846353425376e-07, "loss": 0.0214, "step": 229130 }, { "epoch": 1.8540334978558137, "grad_norm": 0.25815919041633606, "learning_rate": 1.6138046995208935e-07, "loss": 0.0273, "step": 229140 }, { "epoch": 1.8541144105510154, "grad_norm": 0.7137284874916077, "learning_rate": 1.6120257286597774e-07, "loss": 0.0233, "step": 229150 }, { "epoch": 1.8541953232462174, "grad_norm": 0.4831935465335846, "learning_rate": 1.6102477227946555e-07, "loss": 0.0151, "step": 229160 }, { "epoch": 1.8542762359414193, "grad_norm": 0.35804715752601624, "learning_rate": 1.6084706819609887e-07, "loss": 0.0135, "step": 229170 }, { "epoch": 1.854357148636621, "grad_norm": 0.11200154572725296, "learning_rate": 1.6066946061942145e-07, "loss": 0.0131, "step": 229180 }, { "epoch": 1.854438061331823, "grad_norm": 0.33141860365867615, "learning_rate": 1.6049194955297498e-07, "loss": 0.0228, "step": 229190 }, { "epoch": 1.854518974027025, "grad_norm": 0.3406953513622284, "learning_rate": 1.6031453500030048e-07, "loss": 0.0282, "step": 229200 }, { "epoch": 1.8545998867222266, "grad_norm": 0.32138702273368835, "learning_rate": 1.601372169649351e-07, "loss": 0.0098, "step": 229210 }, { "epoch": 1.8546807994174286, "grad_norm": 0.1339656263589859, "learning_rate": 1.599599954504155e-07, "loss": 0.0181, "step": 229220 }, { "epoch": 1.8547617121126305, "grad_norm": 0.13635095953941345, "learning_rate": 1.5978287046027662e-07, "loss": 0.0151, "step": 229230 }, { "epoch": 1.8548426248078322, "grad_norm": 0.35155147314071655, "learning_rate": 1.5960584199805007e-07, "loss": 0.0312, "step": 229240 }, { "epoch": 1.8549235375030342, "grad_norm": 0.3794986605644226, "learning_rate": 1.594289100672658e-07, "loss": 0.0113, "step": 229250 }, { "epoch": 1.8550044501982361, "grad_norm": 0.52240389585495, "learning_rate": 1.5925207467145377e-07, "loss": 0.018, "step": 229260 }, { "epoch": 1.8550853628934378, "grad_norm": 0.45548397302627563, "learning_rate": 1.5907533581413947e-07, "loss": 0.0237, "step": 229270 }, { "epoch": 1.85516627558864, "grad_norm": 0.4253065288066864, "learning_rate": 1.588986934988479e-07, "loss": 0.0319, "step": 229280 }, { "epoch": 1.8552471882838417, "grad_norm": 0.3809325397014618, "learning_rate": 1.5872214772910234e-07, "loss": 0.0223, "step": 229290 }, { "epoch": 1.8553281009790437, "grad_norm": 0.27356579899787903, "learning_rate": 1.5854569850842217e-07, "loss": 0.0117, "step": 229300 }, { "epoch": 1.8554090136742456, "grad_norm": 0.5205004215240479, "learning_rate": 1.5836934584032793e-07, "loss": 0.0185, "step": 229310 }, { "epoch": 1.8554899263694473, "grad_norm": 0.5235301852226257, "learning_rate": 1.581930897283368e-07, "loss": 0.0216, "step": 229320 }, { "epoch": 1.8555708390646493, "grad_norm": 0.3183809816837311, "learning_rate": 1.5801693017596153e-07, "loss": 0.0195, "step": 229330 }, { "epoch": 1.8556517517598512, "grad_norm": 0.5857868790626526, "learning_rate": 1.578408671867171e-07, "loss": 0.0154, "step": 229340 }, { "epoch": 1.855732664455053, "grad_norm": 0.18805375695228577, "learning_rate": 1.5766490076411455e-07, "loss": 0.0183, "step": 229350 }, { "epoch": 1.8558135771502549, "grad_norm": 0.0978689044713974, "learning_rate": 1.5748903091166335e-07, "loss": 0.018, "step": 229360 }, { "epoch": 1.8558944898454568, "grad_norm": 0.47331827878952026, "learning_rate": 1.5731325763286954e-07, "loss": 0.0177, "step": 229370 }, { "epoch": 1.8559754025406585, "grad_norm": 0.5792451500892639, "learning_rate": 1.5713758093124033e-07, "loss": 0.0206, "step": 229380 }, { "epoch": 1.8560563152358605, "grad_norm": 0.4935058057308197, "learning_rate": 1.569620008102779e-07, "loss": 0.0139, "step": 229390 }, { "epoch": 1.8561372279310624, "grad_norm": 0.22148902714252472, "learning_rate": 1.5678651727348392e-07, "loss": 0.0206, "step": 229400 }, { "epoch": 1.8562181406262641, "grad_norm": 0.375973105430603, "learning_rate": 1.5661113032435837e-07, "loss": 0.0107, "step": 229410 }, { "epoch": 1.8562990533214663, "grad_norm": 0.5392550230026245, "learning_rate": 1.564358399664001e-07, "loss": 0.0161, "step": 229420 }, { "epoch": 1.856379966016668, "grad_norm": 0.13533823192119598, "learning_rate": 1.56260646203103e-07, "loss": 0.0119, "step": 229430 }, { "epoch": 1.85646087871187, "grad_norm": 0.60591059923172, "learning_rate": 1.560855490379609e-07, "loss": 0.0116, "step": 229440 }, { "epoch": 1.8565417914070719, "grad_norm": 0.21009472012519836, "learning_rate": 1.5591054847446829e-07, "loss": 0.0282, "step": 229450 }, { "epoch": 1.8566227041022736, "grad_norm": 0.06300529092550278, "learning_rate": 1.5573564451611233e-07, "loss": 0.024, "step": 229460 }, { "epoch": 1.8567036167974755, "grad_norm": 0.28288793563842773, "learning_rate": 1.5556083716638193e-07, "loss": 0.0329, "step": 229470 }, { "epoch": 1.8567845294926775, "grad_norm": 0.4728069007396698, "learning_rate": 1.5538612642876482e-07, "loss": 0.0248, "step": 229480 }, { "epoch": 1.8568654421878792, "grad_norm": 0.18555407226085663, "learning_rate": 1.552115123067427e-07, "loss": 0.0147, "step": 229490 }, { "epoch": 1.8569463548830811, "grad_norm": 0.2863042652606964, "learning_rate": 1.5503699480379995e-07, "loss": 0.0169, "step": 229500 }, { "epoch": 1.857027267578283, "grad_norm": 0.8721463084220886, "learning_rate": 1.5486257392341665e-07, "loss": 0.0288, "step": 229510 }, { "epoch": 1.8571081802734848, "grad_norm": 0.3343885540962219, "learning_rate": 1.5468824966906936e-07, "loss": 0.015, "step": 229520 }, { "epoch": 1.857189092968687, "grad_norm": 0.36005887389183044, "learning_rate": 1.5451402204423704e-07, "loss": 0.0098, "step": 229530 }, { "epoch": 1.8572700056638887, "grad_norm": 0.24682289361953735, "learning_rate": 1.54339891052393e-07, "loss": 0.0127, "step": 229540 }, { "epoch": 1.8573509183590904, "grad_norm": 0.4320511519908905, "learning_rate": 1.5416585669701e-07, "loss": 0.0218, "step": 229550 }, { "epoch": 1.8574318310542925, "grad_norm": 0.08946944028139114, "learning_rate": 1.5399191898155918e-07, "loss": 0.016, "step": 229560 }, { "epoch": 1.8575127437494943, "grad_norm": 0.3397849202156067, "learning_rate": 1.5381807790950943e-07, "loss": 0.0125, "step": 229570 }, { "epoch": 1.8575936564446962, "grad_norm": 0.17669391632080078, "learning_rate": 1.5364433348432683e-07, "loss": 0.0106, "step": 229580 }, { "epoch": 1.8576745691398981, "grad_norm": 0.19604170322418213, "learning_rate": 1.5347068570947753e-07, "loss": 0.0218, "step": 229590 }, { "epoch": 1.8577554818350999, "grad_norm": 0.2052147388458252, "learning_rate": 1.532971345884232e-07, "loss": 0.0179, "step": 229600 }, { "epoch": 1.8578363945303018, "grad_norm": 0.044659458100795746, "learning_rate": 1.531236801246261e-07, "loss": 0.0146, "step": 229610 }, { "epoch": 1.8579173072255037, "grad_norm": 0.35613468289375305, "learning_rate": 1.5295032232154504e-07, "loss": 0.0229, "step": 229620 }, { "epoch": 1.8579982199207055, "grad_norm": 0.31848058104515076, "learning_rate": 1.527770611826368e-07, "loss": 0.0251, "step": 229630 }, { "epoch": 1.8580791326159074, "grad_norm": 0.42760413885116577, "learning_rate": 1.52603896711358e-07, "loss": 0.0149, "step": 229640 }, { "epoch": 1.8581600453111093, "grad_norm": 0.4953848123550415, "learning_rate": 1.5243082891116034e-07, "loss": 0.0068, "step": 229650 }, { "epoch": 1.858240958006311, "grad_norm": 0.35337239503860474, "learning_rate": 1.5225785778549605e-07, "loss": 0.014, "step": 229660 }, { "epoch": 1.8583218707015132, "grad_norm": 0.4487125277519226, "learning_rate": 1.5208498333781574e-07, "loss": 0.0199, "step": 229670 }, { "epoch": 1.858402783396715, "grad_norm": 0.36052408814430237, "learning_rate": 1.5191220557156494e-07, "loss": 0.0204, "step": 229680 }, { "epoch": 1.8584836960919167, "grad_norm": 0.5223466753959656, "learning_rate": 1.5173952449019036e-07, "loss": 0.0184, "step": 229690 }, { "epoch": 1.8585646087871188, "grad_norm": 0.7098641991615295, "learning_rate": 1.5156694009713701e-07, "loss": 0.027, "step": 229700 }, { "epoch": 1.8586455214823205, "grad_norm": 0.41797298192977905, "learning_rate": 1.513944523958444e-07, "loss": 0.0178, "step": 229710 }, { "epoch": 1.8587264341775225, "grad_norm": 0.3389659523963928, "learning_rate": 1.5122206138975415e-07, "loss": 0.025, "step": 229720 }, { "epoch": 1.8588073468727244, "grad_norm": 1.0235936641693115, "learning_rate": 1.5104976708230357e-07, "loss": 0.0404, "step": 229730 }, { "epoch": 1.8588882595679261, "grad_norm": 0.26437246799468994, "learning_rate": 1.5087756947692876e-07, "loss": 0.0161, "step": 229740 }, { "epoch": 1.858969172263128, "grad_norm": 0.3497057855129242, "learning_rate": 1.5070546857706425e-07, "loss": 0.0189, "step": 229750 }, { "epoch": 1.85905008495833, "grad_norm": 0.510951042175293, "learning_rate": 1.5053346438614115e-07, "loss": 0.0173, "step": 229760 }, { "epoch": 1.8591309976535317, "grad_norm": 0.20650939643383026, "learning_rate": 1.5036155690759113e-07, "loss": 0.0177, "step": 229770 }, { "epoch": 1.8592119103487337, "grad_norm": 0.2880363166332245, "learning_rate": 1.501897461448415e-07, "loss": 0.0144, "step": 229780 }, { "epoch": 1.8592928230439356, "grad_norm": 0.18621426820755005, "learning_rate": 1.5001803210131893e-07, "loss": 0.0144, "step": 229790 }, { "epoch": 1.8593737357391373, "grad_norm": 0.2915084958076477, "learning_rate": 1.4984641478044792e-07, "loss": 0.0155, "step": 229800 }, { "epoch": 1.8594546484343395, "grad_norm": 0.6610205769538879, "learning_rate": 1.4967489418565129e-07, "loss": 0.0232, "step": 229810 }, { "epoch": 1.8595355611295412, "grad_norm": 0.3479335308074951, "learning_rate": 1.4950347032034907e-07, "loss": 0.0254, "step": 229820 }, { "epoch": 1.8596164738247432, "grad_norm": 0.22482459247112274, "learning_rate": 1.4933214318796073e-07, "loss": 0.0266, "step": 229830 }, { "epoch": 1.859697386519945, "grad_norm": 0.3709016442298889, "learning_rate": 1.4916091279190192e-07, "loss": 0.0144, "step": 229840 }, { "epoch": 1.8597782992151468, "grad_norm": 0.4593721330165863, "learning_rate": 1.489897791355882e-07, "loss": 0.0187, "step": 229850 }, { "epoch": 1.8598592119103488, "grad_norm": 0.24017375707626343, "learning_rate": 1.4881874222243296e-07, "loss": 0.014, "step": 229860 }, { "epoch": 1.8599401246055507, "grad_norm": 0.15580150485038757, "learning_rate": 1.4864780205584628e-07, "loss": 0.0197, "step": 229870 }, { "epoch": 1.8600210373007524, "grad_norm": 0.31512996554374695, "learning_rate": 1.4847695863923704e-07, "loss": 0.017, "step": 229880 }, { "epoch": 1.8601019499959544, "grad_norm": 0.5785536766052246, "learning_rate": 1.483062119760137e-07, "loss": 0.0203, "step": 229890 }, { "epoch": 1.8601828626911563, "grad_norm": 0.4146416187286377, "learning_rate": 1.4813556206958013e-07, "loss": 0.021, "step": 229900 }, { "epoch": 1.860263775386358, "grad_norm": 0.14661480486392975, "learning_rate": 1.479650089233403e-07, "loss": 0.0398, "step": 229910 }, { "epoch": 1.86034468808156, "grad_norm": 0.5117999315261841, "learning_rate": 1.477945525406954e-07, "loss": 0.018, "step": 229920 }, { "epoch": 1.860425600776762, "grad_norm": 0.38138556480407715, "learning_rate": 1.4762419292504437e-07, "loss": 0.0125, "step": 229930 }, { "epoch": 1.8605065134719636, "grad_norm": 0.4363110065460205, "learning_rate": 1.4745393007978503e-07, "loss": 0.0155, "step": 229940 }, { "epoch": 1.8605874261671658, "grad_norm": 0.5047771334648132, "learning_rate": 1.4728376400831302e-07, "loss": 0.0192, "step": 229950 }, { "epoch": 1.8606683388623675, "grad_norm": 0.5548155307769775, "learning_rate": 1.471136947140217e-07, "loss": 0.0175, "step": 229960 }, { "epoch": 1.8607492515575694, "grad_norm": 0.34496933221817017, "learning_rate": 1.4694372220030285e-07, "loss": 0.0172, "step": 229970 }, { "epoch": 1.8608301642527714, "grad_norm": 0.2807772755622864, "learning_rate": 1.4677384647054593e-07, "loss": 0.0281, "step": 229980 }, { "epoch": 1.860911076947973, "grad_norm": 0.22763949632644653, "learning_rate": 1.4660406752813937e-07, "loss": 0.0135, "step": 229990 }, { "epoch": 1.860991989643175, "grad_norm": 0.24244645237922668, "learning_rate": 1.4643438537646882e-07, "loss": 0.0124, "step": 230000 }, { "epoch": 1.861072902338377, "grad_norm": 0.47279033064842224, "learning_rate": 1.4626480001891764e-07, "loss": 0.0176, "step": 230010 }, { "epoch": 1.8611538150335787, "grad_norm": 0.4273361265659332, "learning_rate": 1.460953114588687e-07, "loss": 0.0175, "step": 230020 }, { "epoch": 1.8612347277287806, "grad_norm": 0.39593175053596497, "learning_rate": 1.459259196997015e-07, "loss": 0.0261, "step": 230030 }, { "epoch": 1.8613156404239826, "grad_norm": 0.34129995107650757, "learning_rate": 1.457566247447939e-07, "loss": 0.0199, "step": 230040 }, { "epoch": 1.8613965531191843, "grad_norm": 0.34973183274269104, "learning_rate": 1.4558742659752433e-07, "loss": 0.0148, "step": 230050 }, { "epoch": 1.8614774658143862, "grad_norm": 0.13508641719818115, "learning_rate": 1.4541832526126398e-07, "loss": 0.0091, "step": 230060 }, { "epoch": 1.8615583785095882, "grad_norm": 0.4851714074611664, "learning_rate": 1.4524932073938681e-07, "loss": 0.0204, "step": 230070 }, { "epoch": 1.8616392912047899, "grad_norm": 0.40889331698417664, "learning_rate": 1.4508041303526343e-07, "loss": 0.0172, "step": 230080 }, { "epoch": 1.861720203899992, "grad_norm": 0.5022828578948975, "learning_rate": 1.449116021522623e-07, "loss": 0.0164, "step": 230090 }, { "epoch": 1.8618011165951938, "grad_norm": 0.1075124740600586, "learning_rate": 1.4474288809374903e-07, "loss": 0.0195, "step": 230100 }, { "epoch": 1.8618820292903957, "grad_norm": 0.4054205119609833, "learning_rate": 1.4457427086308929e-07, "loss": 0.0161, "step": 230110 }, { "epoch": 1.8619629419855976, "grad_norm": 0.04794972762465477, "learning_rate": 1.4440575046364534e-07, "loss": 0.0118, "step": 230120 }, { "epoch": 1.8620438546807994, "grad_norm": 0.11253484338521957, "learning_rate": 1.4423732689877844e-07, "loss": 0.0136, "step": 230130 }, { "epoch": 1.8621247673760013, "grad_norm": 0.36547720432281494, "learning_rate": 1.44069000171847e-07, "loss": 0.0171, "step": 230140 }, { "epoch": 1.8622056800712032, "grad_norm": 0.4678318202495575, "learning_rate": 1.4390077028620775e-07, "loss": 0.023, "step": 230150 }, { "epoch": 1.862286592766405, "grad_norm": 0.3470490872859955, "learning_rate": 1.4373263724521634e-07, "loss": 0.0225, "step": 230160 }, { "epoch": 1.862367505461607, "grad_norm": 0.2706350088119507, "learning_rate": 1.4356460105222515e-07, "loss": 0.0289, "step": 230170 }, { "epoch": 1.8624484181568088, "grad_norm": 0.3558761775493622, "learning_rate": 1.433966617105853e-07, "loss": 0.0129, "step": 230180 }, { "epoch": 1.8625293308520106, "grad_norm": 0.41385525465011597, "learning_rate": 1.4322881922364695e-07, "loss": 0.0219, "step": 230190 }, { "epoch": 1.8626102435472127, "grad_norm": 0.35584545135498047, "learning_rate": 1.430610735947563e-07, "loss": 0.0214, "step": 230200 }, { "epoch": 1.8626911562424144, "grad_norm": 0.5915288925170898, "learning_rate": 1.4289342482725953e-07, "loss": 0.0133, "step": 230210 }, { "epoch": 1.8627720689376162, "grad_norm": 0.18500253558158875, "learning_rate": 1.4272587292449902e-07, "loss": 0.0144, "step": 230220 }, { "epoch": 1.8628529816328183, "grad_norm": 0.32155513763427734, "learning_rate": 1.4255841788981706e-07, "loss": 0.0163, "step": 230230 }, { "epoch": 1.86293389432802, "grad_norm": 0.5197516083717346, "learning_rate": 1.423910597265532e-07, "loss": 0.0222, "step": 230240 }, { "epoch": 1.863014807023222, "grad_norm": 0.5754839181900024, "learning_rate": 1.4222379843804423e-07, "loss": 0.0226, "step": 230250 }, { "epoch": 1.863095719718424, "grad_norm": 0.39187902212142944, "learning_rate": 1.4205663402762639e-07, "loss": 0.0214, "step": 230260 }, { "epoch": 1.8631766324136256, "grad_norm": 0.11026763916015625, "learning_rate": 1.418895664986336e-07, "loss": 0.017, "step": 230270 }, { "epoch": 1.8632575451088276, "grad_norm": 0.26306262612342834, "learning_rate": 1.417225958543983e-07, "loss": 0.0139, "step": 230280 }, { "epoch": 1.8633384578040295, "grad_norm": 0.5033441781997681, "learning_rate": 1.415557220982483e-07, "loss": 0.0138, "step": 230290 }, { "epoch": 1.8634193704992312, "grad_norm": 0.17399141192436218, "learning_rate": 1.413889452335132e-07, "loss": 0.0234, "step": 230300 }, { "epoch": 1.8635002831944332, "grad_norm": 0.3218807876110077, "learning_rate": 1.4122226526351923e-07, "loss": 0.0162, "step": 230310 }, { "epoch": 1.8635811958896351, "grad_norm": 0.6808648705482483, "learning_rate": 1.4105568219158872e-07, "loss": 0.0165, "step": 230320 }, { "epoch": 1.8636621085848368, "grad_norm": 0.31028592586517334, "learning_rate": 1.4088919602104568e-07, "loss": 0.0114, "step": 230330 }, { "epoch": 1.863743021280039, "grad_norm": 0.32651010155677795, "learning_rate": 1.4072280675520912e-07, "loss": 0.0193, "step": 230340 }, { "epoch": 1.8638239339752407, "grad_norm": 0.7045677900314331, "learning_rate": 1.405565143973986e-07, "loss": 0.0182, "step": 230350 }, { "epoch": 1.8639048466704424, "grad_norm": 0.2304474264383316, "learning_rate": 1.403903189509287e-07, "loss": 0.0173, "step": 230360 }, { "epoch": 1.8639857593656446, "grad_norm": 0.32877200841903687, "learning_rate": 1.4022422041911566e-07, "loss": 0.0123, "step": 230370 }, { "epoch": 1.8640666720608463, "grad_norm": 0.28854772448539734, "learning_rate": 1.400582188052707e-07, "loss": 0.0167, "step": 230380 }, { "epoch": 1.8641475847560482, "grad_norm": 0.9890501499176025, "learning_rate": 1.3989231411270453e-07, "loss": 0.0173, "step": 230390 }, { "epoch": 1.8642284974512502, "grad_norm": 0.5564588308334351, "learning_rate": 1.3972650634472674e-07, "loss": 0.0259, "step": 230400 }, { "epoch": 1.864309410146452, "grad_norm": 0.4935191571712494, "learning_rate": 1.3956079550464242e-07, "loss": 0.0218, "step": 230410 }, { "epoch": 1.8643903228416538, "grad_norm": 0.4816468358039856, "learning_rate": 1.3939518159575782e-07, "loss": 0.0189, "step": 230420 }, { "epoch": 1.8644712355368558, "grad_norm": 0.23352232575416565, "learning_rate": 1.3922966462137422e-07, "loss": 0.0108, "step": 230430 }, { "epoch": 1.8645521482320575, "grad_norm": 0.6177754402160645, "learning_rate": 1.3906424458479506e-07, "loss": 0.0276, "step": 230440 }, { "epoch": 1.8646330609272594, "grad_norm": 0.2999916076660156, "learning_rate": 1.3889892148931604e-07, "loss": 0.0244, "step": 230450 }, { "epoch": 1.8647139736224614, "grad_norm": 0.6461942791938782, "learning_rate": 1.3873369533823623e-07, "loss": 0.0148, "step": 230460 }, { "epoch": 1.864794886317663, "grad_norm": 0.4676847755908966, "learning_rate": 1.3856856613485126e-07, "loss": 0.0168, "step": 230470 }, { "epoch": 1.8648757990128653, "grad_norm": 0.3258620798587799, "learning_rate": 1.3840353388245187e-07, "loss": 0.0146, "step": 230480 }, { "epoch": 1.864956711708067, "grad_norm": 0.12513367831707, "learning_rate": 1.3823859858433152e-07, "loss": 0.0133, "step": 230490 }, { "epoch": 1.865037624403269, "grad_norm": 0.43390214443206787, "learning_rate": 1.3807376024377928e-07, "loss": 0.014, "step": 230500 }, { "epoch": 1.8651185370984709, "grad_norm": 0.42139923572540283, "learning_rate": 1.3790901886408026e-07, "loss": 0.013, "step": 230510 }, { "epoch": 1.8651994497936726, "grad_norm": 0.10518530756235123, "learning_rate": 1.377443744485224e-07, "loss": 0.0108, "step": 230520 }, { "epoch": 1.8652803624888745, "grad_norm": 0.2111365795135498, "learning_rate": 1.3757982700038863e-07, "loss": 0.0217, "step": 230530 }, { "epoch": 1.8653612751840765, "grad_norm": 0.1921961009502411, "learning_rate": 1.374153765229602e-07, "loss": 0.0117, "step": 230540 }, { "epoch": 1.8654421878792782, "grad_norm": 0.4367388188838959, "learning_rate": 1.3725102301951621e-07, "loss": 0.0246, "step": 230550 }, { "epoch": 1.8655231005744801, "grad_norm": 0.25424161553382874, "learning_rate": 1.3708676649333508e-07, "loss": 0.0109, "step": 230560 }, { "epoch": 1.865604013269682, "grad_norm": 0.804144024848938, "learning_rate": 1.3692260694769256e-07, "loss": 0.0194, "step": 230570 }, { "epoch": 1.8656849259648838, "grad_norm": 0.331142395734787, "learning_rate": 1.3675854438586155e-07, "loss": 0.0175, "step": 230580 }, { "epoch": 1.8657658386600857, "grad_norm": 0.18789225816726685, "learning_rate": 1.3659457881111504e-07, "loss": 0.0364, "step": 230590 }, { "epoch": 1.8658467513552877, "grad_norm": 0.4105475842952728, "learning_rate": 1.364307102267226e-07, "loss": 0.0185, "step": 230600 }, { "epoch": 1.8659276640504894, "grad_norm": 0.6931445002555847, "learning_rate": 1.362669386359522e-07, "loss": 0.0197, "step": 230610 }, { "epoch": 1.8660085767456915, "grad_norm": 0.15260019898414612, "learning_rate": 1.3610326404206952e-07, "loss": 0.0161, "step": 230620 }, { "epoch": 1.8660894894408933, "grad_norm": 0.455526739358902, "learning_rate": 1.3593968644834032e-07, "loss": 0.0217, "step": 230630 }, { "epoch": 1.8661704021360952, "grad_norm": 0.34972408413887024, "learning_rate": 1.3577620585802475e-07, "loss": 0.0223, "step": 230640 }, { "epoch": 1.8662513148312971, "grad_norm": 0.5356460213661194, "learning_rate": 1.3561282227438298e-07, "loss": 0.0285, "step": 230650 }, { "epoch": 1.8663322275264989, "grad_norm": 0.6993470788002014, "learning_rate": 1.3544953570067576e-07, "loss": 0.0221, "step": 230660 }, { "epoch": 1.8664131402217008, "grad_norm": 0.31999391317367554, "learning_rate": 1.352863461401571e-07, "loss": 0.0112, "step": 230670 }, { "epoch": 1.8664940529169027, "grad_norm": 0.5981101989746094, "learning_rate": 1.3512325359608282e-07, "loss": 0.0149, "step": 230680 }, { "epoch": 1.8665749656121045, "grad_norm": 0.2976010739803314, "learning_rate": 1.3496025807170575e-07, "loss": 0.0149, "step": 230690 }, { "epoch": 1.8666558783073064, "grad_norm": 0.2311851978302002, "learning_rate": 1.347973595702745e-07, "loss": 0.0142, "step": 230700 }, { "epoch": 1.8667367910025083, "grad_norm": 0.2995583415031433, "learning_rate": 1.346345580950392e-07, "loss": 0.0146, "step": 230710 }, { "epoch": 1.86681770369771, "grad_norm": 0.20070640742778778, "learning_rate": 1.344718536492473e-07, "loss": 0.0205, "step": 230720 }, { "epoch": 1.8668986163929122, "grad_norm": 0.5902143120765686, "learning_rate": 1.3430924623614173e-07, "loss": 0.0216, "step": 230730 }, { "epoch": 1.866979529088114, "grad_norm": 0.5105782151222229, "learning_rate": 1.3414673585896654e-07, "loss": 0.0187, "step": 230740 }, { "epoch": 1.8670604417833156, "grad_norm": 0.10199008136987686, "learning_rate": 1.3398432252096306e-07, "loss": 0.0098, "step": 230750 }, { "epoch": 1.8671413544785178, "grad_norm": 0.3438378572463989, "learning_rate": 1.3382200622536922e-07, "loss": 0.014, "step": 230760 }, { "epoch": 1.8672222671737195, "grad_norm": 0.4528804421424866, "learning_rate": 1.3365978697542248e-07, "loss": 0.0112, "step": 230770 }, { "epoch": 1.8673031798689215, "grad_norm": 0.5920117497444153, "learning_rate": 1.33497664774358e-07, "loss": 0.0272, "step": 230780 }, { "epoch": 1.8673840925641234, "grad_norm": 0.17630332708358765, "learning_rate": 1.3333563962540875e-07, "loss": 0.0117, "step": 230790 }, { "epoch": 1.8674650052593251, "grad_norm": 0.48437827825546265, "learning_rate": 1.3317371153180657e-07, "loss": 0.0147, "step": 230800 }, { "epoch": 1.867545917954527, "grad_norm": 0.5186091065406799, "learning_rate": 1.330118804967795e-07, "loss": 0.021, "step": 230810 }, { "epoch": 1.867626830649729, "grad_norm": 0.34107837080955505, "learning_rate": 1.328501465235571e-07, "loss": 0.0178, "step": 230820 }, { "epoch": 1.8677077433449307, "grad_norm": 0.27684223651885986, "learning_rate": 1.3268850961536295e-07, "loss": 0.0183, "step": 230830 }, { "epoch": 1.8677886560401327, "grad_norm": 0.40297701954841614, "learning_rate": 1.3252696977542057e-07, "loss": 0.0119, "step": 230840 }, { "epoch": 1.8678695687353346, "grad_norm": 0.42437753081321716, "learning_rate": 1.3236552700695294e-07, "loss": 0.0208, "step": 230850 }, { "epoch": 1.8679504814305363, "grad_norm": 0.5251851677894592, "learning_rate": 1.3220418131317858e-07, "loss": 0.0182, "step": 230860 }, { "epoch": 1.8680313941257385, "grad_norm": 0.37751030921936035, "learning_rate": 1.3204293269731493e-07, "loss": 0.0132, "step": 230870 }, { "epoch": 1.8681123068209402, "grad_norm": 0.13920274376869202, "learning_rate": 1.3188178116257944e-07, "loss": 0.0133, "step": 230880 }, { "epoch": 1.868193219516142, "grad_norm": 0.3372665047645569, "learning_rate": 1.317207267121834e-07, "loss": 0.0112, "step": 230890 }, { "epoch": 1.868274132211344, "grad_norm": 0.0077207256108522415, "learning_rate": 1.315597693493409e-07, "loss": 0.0189, "step": 230900 }, { "epoch": 1.8683550449065458, "grad_norm": 0.40477150678634644, "learning_rate": 1.313989090772616e-07, "loss": 0.0225, "step": 230910 }, { "epoch": 1.8684359576017477, "grad_norm": 0.07725220918655396, "learning_rate": 1.3123814589915186e-07, "loss": 0.0133, "step": 230920 }, { "epoch": 1.8685168702969497, "grad_norm": 0.26785337924957275, "learning_rate": 1.3107747981821905e-07, "loss": 0.0175, "step": 230930 }, { "epoch": 1.8685977829921514, "grad_norm": 0.42728808522224426, "learning_rate": 1.3091691083766733e-07, "loss": 0.0205, "step": 230940 }, { "epoch": 1.8686786956873533, "grad_norm": 0.2317631095647812, "learning_rate": 1.3075643896069911e-07, "loss": 0.0126, "step": 230950 }, { "epoch": 1.8687596083825553, "grad_norm": 0.7031252384185791, "learning_rate": 1.3059606419051408e-07, "loss": 0.014, "step": 230960 }, { "epoch": 1.868840521077757, "grad_norm": 0.20802633464336395, "learning_rate": 1.3043578653031074e-07, "loss": 0.0102, "step": 230970 }, { "epoch": 1.868921433772959, "grad_norm": 0.24068309366703033, "learning_rate": 1.3027560598328547e-07, "loss": 0.02, "step": 230980 }, { "epoch": 1.8690023464681609, "grad_norm": 0.028367700055241585, "learning_rate": 1.3011552255263295e-07, "loss": 0.0142, "step": 230990 }, { "epoch": 1.8690832591633626, "grad_norm": 0.3799012005329132, "learning_rate": 1.2995553624154445e-07, "loss": 0.011, "step": 231000 }, { "epoch": 1.8691641718585648, "grad_norm": 0.28241950273513794, "learning_rate": 1.2979564705321357e-07, "loss": 0.0168, "step": 231010 }, { "epoch": 1.8692450845537665, "grad_norm": 0.19037151336669922, "learning_rate": 1.296358549908261e-07, "loss": 0.0137, "step": 231020 }, { "epoch": 1.8693259972489684, "grad_norm": 0.23869550228118896, "learning_rate": 1.2947616005756892e-07, "loss": 0.0105, "step": 231030 }, { "epoch": 1.8694069099441704, "grad_norm": 0.32983091473579407, "learning_rate": 1.2931656225662893e-07, "loss": 0.0203, "step": 231040 }, { "epoch": 1.869487822639372, "grad_norm": 0.3978103697299957, "learning_rate": 1.2915706159118747e-07, "loss": 0.014, "step": 231050 }, { "epoch": 1.869568735334574, "grad_norm": 0.2928551435470581, "learning_rate": 1.2899765806442478e-07, "loss": 0.0232, "step": 231060 }, { "epoch": 1.869649648029776, "grad_norm": 0.5494774580001831, "learning_rate": 1.2883835167952163e-07, "loss": 0.0253, "step": 231070 }, { "epoch": 1.8697305607249777, "grad_norm": 0.3648001253604889, "learning_rate": 1.2867914243965274e-07, "loss": 0.02, "step": 231080 }, { "epoch": 1.8698114734201796, "grad_norm": 0.290666401386261, "learning_rate": 1.2852003034799554e-07, "loss": 0.0091, "step": 231090 }, { "epoch": 1.8698923861153816, "grad_norm": 0.35430121421813965, "learning_rate": 1.283610154077225e-07, "loss": 0.0152, "step": 231100 }, { "epoch": 1.8699732988105833, "grad_norm": 0.059303008019924164, "learning_rate": 1.2820209762200385e-07, "loss": 0.0122, "step": 231110 }, { "epoch": 1.8700542115057852, "grad_norm": 0.13171979784965515, "learning_rate": 1.280432769940093e-07, "loss": 0.0175, "step": 231120 }, { "epoch": 1.8701351242009872, "grad_norm": 0.3395382761955261, "learning_rate": 1.2788455352690744e-07, "loss": 0.02, "step": 231130 }, { "epoch": 1.8702160368961889, "grad_norm": 0.3052603304386139, "learning_rate": 1.277259272238618e-07, "loss": 0.0166, "step": 231140 }, { "epoch": 1.870296949591391, "grad_norm": 0.40717190504074097, "learning_rate": 1.275673980880371e-07, "loss": 0.0172, "step": 231150 }, { "epoch": 1.8703778622865928, "grad_norm": 0.08175656944513321, "learning_rate": 1.2740896612259413e-07, "loss": 0.015, "step": 231160 }, { "epoch": 1.8704587749817947, "grad_norm": 0.3828573524951935, "learning_rate": 1.2725063133069314e-07, "loss": 0.0229, "step": 231170 }, { "epoch": 1.8705396876769966, "grad_norm": 0.23806391656398773, "learning_rate": 1.2709239371549165e-07, "loss": 0.0198, "step": 231180 }, { "epoch": 1.8706206003721983, "grad_norm": 0.1729034185409546, "learning_rate": 1.2693425328014485e-07, "loss": 0.0247, "step": 231190 }, { "epoch": 1.8707015130674003, "grad_norm": 0.3928057849407196, "learning_rate": 1.2677621002780694e-07, "loss": 0.0207, "step": 231200 }, { "epoch": 1.8707824257626022, "grad_norm": 0.4535468518733978, "learning_rate": 1.2661826396162979e-07, "loss": 0.0175, "step": 231210 }, { "epoch": 1.870863338457804, "grad_norm": 0.3926721215248108, "learning_rate": 1.264604150847626e-07, "loss": 0.0159, "step": 231220 }, { "epoch": 1.8709442511530059, "grad_norm": 0.4986504018306732, "learning_rate": 1.2630266340035503e-07, "loss": 0.0166, "step": 231230 }, { "epoch": 1.8710251638482078, "grad_norm": 0.17418509721755981, "learning_rate": 1.261450089115518e-07, "loss": 0.0107, "step": 231240 }, { "epoch": 1.8711060765434095, "grad_norm": 0.24126192927360535, "learning_rate": 1.259874516214965e-07, "loss": 0.0194, "step": 231250 }, { "epoch": 1.8711869892386115, "grad_norm": 0.3243091404438019, "learning_rate": 1.2582999153333276e-07, "loss": 0.0169, "step": 231260 }, { "epoch": 1.8712679019338134, "grad_norm": 0.2570716440677643, "learning_rate": 1.2567262865019969e-07, "loss": 0.0074, "step": 231270 }, { "epoch": 1.8713488146290151, "grad_norm": 0.2797189950942993, "learning_rate": 1.2551536297523537e-07, "loss": 0.0162, "step": 231280 }, { "epoch": 1.8714297273242173, "grad_norm": 0.07177379727363586, "learning_rate": 1.2535819451157727e-07, "loss": 0.0144, "step": 231290 }, { "epoch": 1.871510640019419, "grad_norm": 0.556075930595398, "learning_rate": 1.2520112326235846e-07, "loss": 0.0219, "step": 231300 }, { "epoch": 1.871591552714621, "grad_norm": 0.03560452163219452, "learning_rate": 1.250441492307125e-07, "loss": 0.0143, "step": 231310 }, { "epoch": 1.871672465409823, "grad_norm": 0.19272027909755707, "learning_rate": 1.248872724197703e-07, "loss": 0.0103, "step": 231320 }, { "epoch": 1.8717533781050246, "grad_norm": 0.2061382234096527, "learning_rate": 1.247304928326587e-07, "loss": 0.0182, "step": 231330 }, { "epoch": 1.8718342908002266, "grad_norm": 0.10454026609659195, "learning_rate": 1.2457381047250527e-07, "loss": 0.0102, "step": 231340 }, { "epoch": 1.8719152034954285, "grad_norm": 0.2660134434700012, "learning_rate": 1.244172253424347e-07, "loss": 0.0165, "step": 231350 }, { "epoch": 1.8719961161906302, "grad_norm": 0.5734927654266357, "learning_rate": 1.242607374455701e-07, "loss": 0.0222, "step": 231360 }, { "epoch": 1.8720770288858322, "grad_norm": 0.2356182485818863, "learning_rate": 1.2410434678503225e-07, "loss": 0.0208, "step": 231370 }, { "epoch": 1.872157941581034, "grad_norm": 0.7357816696166992, "learning_rate": 1.2394805336393922e-07, "loss": 0.0132, "step": 231380 }, { "epoch": 1.8722388542762358, "grad_norm": 0.4642289876937866, "learning_rate": 1.2379185718540854e-07, "loss": 0.0142, "step": 231390 }, { "epoch": 1.872319766971438, "grad_norm": 0.61119145154953, "learning_rate": 1.2363575825255492e-07, "loss": 0.027, "step": 231400 }, { "epoch": 1.8724006796666397, "grad_norm": 0.4169650673866272, "learning_rate": 1.23479756568492e-07, "loss": 0.0212, "step": 231410 }, { "epoch": 1.8724815923618414, "grad_norm": 0.3755098283290863, "learning_rate": 1.2332385213633058e-07, "loss": 0.0239, "step": 231420 }, { "epoch": 1.8725625050570436, "grad_norm": 0.9735487103462219, "learning_rate": 1.2316804495917932e-07, "loss": 0.019, "step": 231430 }, { "epoch": 1.8726434177522453, "grad_norm": 0.19058258831501007, "learning_rate": 1.230123350401463e-07, "loss": 0.0164, "step": 231440 }, { "epoch": 1.8727243304474472, "grad_norm": 0.5931143164634705, "learning_rate": 1.2285672238233683e-07, "loss": 0.0197, "step": 231450 }, { "epoch": 1.8728052431426492, "grad_norm": 1.2604384422302246, "learning_rate": 1.2270120698885336e-07, "loss": 0.0213, "step": 231460 }, { "epoch": 1.872886155837851, "grad_norm": 0.3235422372817993, "learning_rate": 1.225457888627979e-07, "loss": 0.015, "step": 231470 }, { "epoch": 1.8729670685330528, "grad_norm": 0.18264874815940857, "learning_rate": 1.2239046800727072e-07, "loss": 0.0167, "step": 231480 }, { "epoch": 1.8730479812282548, "grad_norm": 0.1890539675951004, "learning_rate": 1.2223524442536716e-07, "loss": 0.0175, "step": 231490 }, { "epoch": 1.8731288939234565, "grad_norm": 0.2923528254032135, "learning_rate": 1.2208011812018526e-07, "loss": 0.0205, "step": 231500 }, { "epoch": 1.8732098066186584, "grad_norm": 0.254752516746521, "learning_rate": 1.219250890948176e-07, "loss": 0.0257, "step": 231510 }, { "epoch": 1.8732907193138604, "grad_norm": 0.43666326999664307, "learning_rate": 1.2177015735235554e-07, "loss": 0.0162, "step": 231520 }, { "epoch": 1.873371632009062, "grad_norm": 0.35421910881996155, "learning_rate": 1.216153228958894e-07, "loss": 0.0189, "step": 231530 }, { "epoch": 1.8734525447042643, "grad_norm": 0.5063210725784302, "learning_rate": 1.2146058572850726e-07, "loss": 0.0147, "step": 231540 }, { "epoch": 1.873533457399466, "grad_norm": 0.37418079376220703, "learning_rate": 1.2130594585329448e-07, "loss": 0.0151, "step": 231550 }, { "epoch": 1.8736143700946677, "grad_norm": 0.2552913725376129, "learning_rate": 1.2115140327333518e-07, "loss": 0.0225, "step": 231560 }, { "epoch": 1.8736952827898699, "grad_norm": 0.13880036771297455, "learning_rate": 1.209969579917114e-07, "loss": 0.0145, "step": 231570 }, { "epoch": 1.8737761954850716, "grad_norm": 0.19676905870437622, "learning_rate": 1.2084261001150343e-07, "loss": 0.0259, "step": 231580 }, { "epoch": 1.8738571081802735, "grad_norm": 0.4507734775543213, "learning_rate": 1.2068835933578938e-07, "loss": 0.0203, "step": 231590 }, { "epoch": 1.8739380208754755, "grad_norm": 0.349042683839798, "learning_rate": 1.2053420596764508e-07, "loss": 0.0103, "step": 231600 }, { "epoch": 1.8740189335706772, "grad_norm": 0.40543392300605774, "learning_rate": 1.203801499101448e-07, "loss": 0.018, "step": 231610 }, { "epoch": 1.874099846265879, "grad_norm": 0.2247849553823471, "learning_rate": 1.2022619116636157e-07, "loss": 0.0178, "step": 231620 }, { "epoch": 1.874180758961081, "grad_norm": 0.1585109680891037, "learning_rate": 1.2007232973936468e-07, "loss": 0.0202, "step": 231630 }, { "epoch": 1.8742616716562828, "grad_norm": 0.7063809633255005, "learning_rate": 1.1991856563222382e-07, "loss": 0.0104, "step": 231640 }, { "epoch": 1.8743425843514847, "grad_norm": 0.29103830456733704, "learning_rate": 1.197648988480049e-07, "loss": 0.0104, "step": 231650 }, { "epoch": 1.8744234970466866, "grad_norm": 0.20239748060703278, "learning_rate": 1.196113293897716e-07, "loss": 0.0103, "step": 231660 }, { "epoch": 1.8745044097418884, "grad_norm": 0.2646493911743164, "learning_rate": 1.1945785726058812e-07, "loss": 0.0204, "step": 231670 }, { "epoch": 1.8745853224370905, "grad_norm": 0.535766065120697, "learning_rate": 1.1930448246351424e-07, "loss": 0.0105, "step": 231680 }, { "epoch": 1.8746662351322922, "grad_norm": 0.4658214747905731, "learning_rate": 1.1915120500160859e-07, "loss": 0.0176, "step": 231690 }, { "epoch": 1.8747471478274942, "grad_norm": 0.4792921245098114, "learning_rate": 1.1899802487792878e-07, "loss": 0.0215, "step": 231700 }, { "epoch": 1.8748280605226961, "grad_norm": 0.28749850392341614, "learning_rate": 1.1884494209552843e-07, "loss": 0.0118, "step": 231710 }, { "epoch": 1.8749089732178978, "grad_norm": 0.3319237232208252, "learning_rate": 1.186919566574618e-07, "loss": 0.0295, "step": 231720 }, { "epoch": 1.8749898859130998, "grad_norm": 0.3377103805541992, "learning_rate": 1.1853906856677866e-07, "loss": 0.0136, "step": 231730 }, { "epoch": 1.8750707986083017, "grad_norm": 0.3096349537372589, "learning_rate": 1.1838627782652878e-07, "loss": 0.0181, "step": 231740 }, { "epoch": 1.8751517113035034, "grad_norm": 0.20986607670783997, "learning_rate": 1.1823358443975919e-07, "loss": 0.0232, "step": 231750 }, { "epoch": 1.8752326239987054, "grad_norm": 0.3245997428894043, "learning_rate": 1.1808098840951465e-07, "loss": 0.0192, "step": 231760 }, { "epoch": 1.8753135366939073, "grad_norm": 0.144929900765419, "learning_rate": 1.1792848973883886e-07, "loss": 0.0169, "step": 231770 }, { "epoch": 1.875394449389109, "grad_norm": 0.2984161078929901, "learning_rate": 1.1777608843077271e-07, "loss": 0.0103, "step": 231780 }, { "epoch": 1.875475362084311, "grad_norm": 0.14693836867809296, "learning_rate": 1.1762378448835599e-07, "loss": 0.0217, "step": 231790 }, { "epoch": 1.875556274779513, "grad_norm": 0.40463849902153015, "learning_rate": 1.1747157791462515e-07, "loss": 0.0148, "step": 231800 }, { "epoch": 1.8756371874747146, "grad_norm": 0.4453214704990387, "learning_rate": 1.1731946871261668e-07, "loss": 0.0319, "step": 231810 }, { "epoch": 1.8757181001699168, "grad_norm": 0.4250377118587494, "learning_rate": 1.1716745688536313e-07, "loss": 0.0272, "step": 231820 }, { "epoch": 1.8757990128651185, "grad_norm": 0.585246741771698, "learning_rate": 1.1701554243589708e-07, "loss": 0.0191, "step": 231830 }, { "epoch": 1.8758799255603205, "grad_norm": 0.1932532638311386, "learning_rate": 1.1686372536724722e-07, "loss": 0.0142, "step": 231840 }, { "epoch": 1.8759608382555224, "grad_norm": 0.1325518637895584, "learning_rate": 1.1671200568244112e-07, "loss": 0.0187, "step": 231850 }, { "epoch": 1.8760417509507241, "grad_norm": 0.11950796097517014, "learning_rate": 1.1656038338450636e-07, "loss": 0.0137, "step": 231860 }, { "epoch": 1.876122663645926, "grad_norm": 0.2875324487686157, "learning_rate": 1.1640885847646443e-07, "loss": 0.0279, "step": 231870 }, { "epoch": 1.876203576341128, "grad_norm": 0.274773508310318, "learning_rate": 1.162574309613379e-07, "loss": 0.0123, "step": 231880 }, { "epoch": 1.8762844890363297, "grad_norm": 0.4101565480232239, "learning_rate": 1.1610610084214713e-07, "loss": 0.0346, "step": 231890 }, { "epoch": 1.8763654017315317, "grad_norm": 0.20476682484149933, "learning_rate": 1.1595486812191026e-07, "loss": 0.0238, "step": 231900 }, { "epoch": 1.8764463144267336, "grad_norm": 0.1404220163822174, "learning_rate": 1.158037328036421e-07, "loss": 0.0263, "step": 231910 }, { "epoch": 1.8765272271219353, "grad_norm": 0.31886547803878784, "learning_rate": 1.1565269489035802e-07, "loss": 0.0264, "step": 231920 }, { "epoch": 1.8766081398171373, "grad_norm": 0.357695996761322, "learning_rate": 1.155017543850695e-07, "loss": 0.0231, "step": 231930 }, { "epoch": 1.8766890525123392, "grad_norm": 0.23308949172496796, "learning_rate": 1.1535091129078691e-07, "loss": 0.0113, "step": 231940 }, { "epoch": 1.876769965207541, "grad_norm": 0.31469547748565674, "learning_rate": 1.1520016561051839e-07, "loss": 0.0226, "step": 231950 }, { "epoch": 1.876850877902743, "grad_norm": 0.1319650560617447, "learning_rate": 1.1504951734727043e-07, "loss": 0.021, "step": 231960 }, { "epoch": 1.8769317905979448, "grad_norm": 0.2493007630109787, "learning_rate": 1.1489896650404675e-07, "loss": 0.0123, "step": 231970 }, { "epoch": 1.8770127032931467, "grad_norm": 0.34086060523986816, "learning_rate": 1.1474851308385104e-07, "loss": 0.0241, "step": 231980 }, { "epoch": 1.8770936159883487, "grad_norm": 0.33561941981315613, "learning_rate": 1.1459815708968258e-07, "loss": 0.0096, "step": 231990 }, { "epoch": 1.8771745286835504, "grad_norm": 0.21360059082508087, "learning_rate": 1.1444789852454008e-07, "loss": 0.0116, "step": 232000 }, { "epoch": 1.8772554413787523, "grad_norm": 0.6044825911521912, "learning_rate": 1.1429773739142058e-07, "loss": 0.0204, "step": 232010 }, { "epoch": 1.8773363540739543, "grad_norm": 0.29687735438346863, "learning_rate": 1.1414767369331892e-07, "loss": 0.0167, "step": 232020 }, { "epoch": 1.877417266769156, "grad_norm": 0.7610193490982056, "learning_rate": 1.1399770743322713e-07, "loss": 0.0267, "step": 232030 }, { "epoch": 1.877498179464358, "grad_norm": 0.09861389547586441, "learning_rate": 1.1384783861413562e-07, "loss": 0.0134, "step": 232040 }, { "epoch": 1.8775790921595599, "grad_norm": 0.45363518595695496, "learning_rate": 1.1369806723903531e-07, "loss": 0.0324, "step": 232050 }, { "epoch": 1.8776600048547616, "grad_norm": 0.18992002308368683, "learning_rate": 1.135483933109105e-07, "loss": 0.0127, "step": 232060 }, { "epoch": 1.8777409175499638, "grad_norm": 0.30841630697250366, "learning_rate": 1.1339881683274712e-07, "loss": 0.0167, "step": 232070 }, { "epoch": 1.8778218302451655, "grad_norm": 0.46834397315979004, "learning_rate": 1.132493378075289e-07, "loss": 0.0166, "step": 232080 }, { "epoch": 1.8779027429403672, "grad_norm": 0.3074188232421875, "learning_rate": 1.1309995623823622e-07, "loss": 0.0102, "step": 232090 }, { "epoch": 1.8779836556355693, "grad_norm": 0.5633866190910339, "learning_rate": 1.1295067212784782e-07, "loss": 0.0099, "step": 232100 }, { "epoch": 1.878064568330771, "grad_norm": 0.23034299910068512, "learning_rate": 1.1280148547934134e-07, "loss": 0.0143, "step": 232110 }, { "epoch": 1.878145481025973, "grad_norm": 0.41707172989845276, "learning_rate": 1.1265239629569214e-07, "loss": 0.0133, "step": 232120 }, { "epoch": 1.878226393721175, "grad_norm": 0.13971008360385895, "learning_rate": 1.1250340457987286e-07, "loss": 0.0173, "step": 232130 }, { "epoch": 1.8783073064163767, "grad_norm": 0.5366643667221069, "learning_rate": 1.1235451033485556e-07, "loss": 0.0262, "step": 232140 }, { "epoch": 1.8783882191115786, "grad_norm": 0.1766887605190277, "learning_rate": 1.1220571356360898e-07, "loss": 0.0235, "step": 232150 }, { "epoch": 1.8784691318067805, "grad_norm": 0.3589416742324829, "learning_rate": 1.1205701426910132e-07, "loss": 0.0116, "step": 232160 }, { "epoch": 1.8785500445019823, "grad_norm": 0.33194583654403687, "learning_rate": 1.119084124542974e-07, "loss": 0.0148, "step": 232170 }, { "epoch": 1.8786309571971842, "grad_norm": 0.47797533869743347, "learning_rate": 1.1175990812216097e-07, "loss": 0.0168, "step": 232180 }, { "epoch": 1.8787118698923861, "grad_norm": 0.32211554050445557, "learning_rate": 1.1161150127565357e-07, "loss": 0.0162, "step": 232190 }, { "epoch": 1.8787927825875879, "grad_norm": 0.288418710231781, "learning_rate": 1.1146319191773503e-07, "loss": 0.0229, "step": 232200 }, { "epoch": 1.87887369528279, "grad_norm": 0.3395719528198242, "learning_rate": 1.1131498005136243e-07, "loss": 0.0164, "step": 232210 }, { "epoch": 1.8789546079779917, "grad_norm": 0.17438504099845886, "learning_rate": 1.1116686567949287e-07, "loss": 0.0113, "step": 232220 }, { "epoch": 1.8790355206731935, "grad_norm": 0.7140975594520569, "learning_rate": 1.1101884880507896e-07, "loss": 0.0263, "step": 232230 }, { "epoch": 1.8791164333683956, "grad_norm": 0.5330913066864014, "learning_rate": 1.1087092943107281e-07, "loss": 0.0337, "step": 232240 }, { "epoch": 1.8791973460635973, "grad_norm": 0.41473743319511414, "learning_rate": 1.1072310756042481e-07, "loss": 0.012, "step": 232250 }, { "epoch": 1.8792782587587993, "grad_norm": 0.28050023317337036, "learning_rate": 1.1057538319608263e-07, "loss": 0.0249, "step": 232260 }, { "epoch": 1.8793591714540012, "grad_norm": 0.6387547850608826, "learning_rate": 1.1042775634099223e-07, "loss": 0.0189, "step": 232270 }, { "epoch": 1.879440084149203, "grad_norm": 0.2801707684993744, "learning_rate": 1.1028022699809904e-07, "loss": 0.012, "step": 232280 }, { "epoch": 1.8795209968444049, "grad_norm": 0.20022015273571014, "learning_rate": 1.1013279517034237e-07, "loss": 0.0274, "step": 232290 }, { "epoch": 1.8796019095396068, "grad_norm": 0.3204880654811859, "learning_rate": 1.0998546086066542e-07, "loss": 0.0175, "step": 232300 }, { "epoch": 1.8796828222348085, "grad_norm": 0.37392348051071167, "learning_rate": 1.0983822407200473e-07, "loss": 0.0102, "step": 232310 }, { "epoch": 1.8797637349300105, "grad_norm": 0.3044486343860626, "learning_rate": 1.0969108480729684e-07, "loss": 0.0147, "step": 232320 }, { "epoch": 1.8798446476252124, "grad_norm": 0.22818920016288757, "learning_rate": 1.0954404306947664e-07, "loss": 0.006, "step": 232330 }, { "epoch": 1.8799255603204141, "grad_norm": 0.29491350054740906, "learning_rate": 1.093970988614762e-07, "loss": 0.0246, "step": 232340 }, { "epoch": 1.8800064730156163, "grad_norm": 0.1974066197872162, "learning_rate": 1.0925025218622598e-07, "loss": 0.0114, "step": 232350 }, { "epoch": 1.880087385710818, "grad_norm": 0.5739601254463196, "learning_rate": 1.0910350304665474e-07, "loss": 0.0185, "step": 232360 }, { "epoch": 1.88016829840602, "grad_norm": 0.19969458878040314, "learning_rate": 1.0895685144568902e-07, "loss": 0.0137, "step": 232370 }, { "epoch": 1.880249211101222, "grad_norm": 0.17798975110054016, "learning_rate": 1.088102973862537e-07, "loss": 0.018, "step": 232380 }, { "epoch": 1.8803301237964236, "grad_norm": 0.23083113133907318, "learning_rate": 1.0866384087127092e-07, "loss": 0.0237, "step": 232390 }, { "epoch": 1.8804110364916256, "grad_norm": 0.30604287981987, "learning_rate": 1.0851748190366218e-07, "loss": 0.0126, "step": 232400 }, { "epoch": 1.8804919491868275, "grad_norm": 0.023472419008612633, "learning_rate": 1.0837122048634518e-07, "loss": 0.0223, "step": 232410 }, { "epoch": 1.8805728618820292, "grad_norm": 0.2626926004886627, "learning_rate": 1.0822505662223814e-07, "loss": 0.0132, "step": 232420 }, { "epoch": 1.8806537745772312, "grad_norm": 0.02542162872850895, "learning_rate": 1.0807899031425484e-07, "loss": 0.0161, "step": 232430 }, { "epoch": 1.880734687272433, "grad_norm": 0.21699553728103638, "learning_rate": 1.0793302156530905e-07, "loss": 0.0198, "step": 232440 }, { "epoch": 1.8808155999676348, "grad_norm": 0.18132087588310242, "learning_rate": 1.0778715037831178e-07, "loss": 0.0148, "step": 232450 }, { "epoch": 1.8808965126628368, "grad_norm": 0.18795958161354065, "learning_rate": 1.0764137675617125e-07, "loss": 0.0187, "step": 232460 }, { "epoch": 1.8809774253580387, "grad_norm": 0.23727189004421234, "learning_rate": 1.0749570070179627e-07, "loss": 0.0163, "step": 232470 }, { "epoch": 1.8810583380532404, "grad_norm": 0.33377036452293396, "learning_rate": 1.0735012221808949e-07, "loss": 0.0108, "step": 232480 }, { "epoch": 1.8811392507484426, "grad_norm": 0.3648749589920044, "learning_rate": 1.0720464130795694e-07, "loss": 0.0158, "step": 232490 }, { "epoch": 1.8812201634436443, "grad_norm": 0.46877944469451904, "learning_rate": 1.0705925797429851e-07, "loss": 0.0189, "step": 232500 }, { "epoch": 1.8813010761388462, "grad_norm": 0.30634236335754395, "learning_rate": 1.0691397222001299e-07, "loss": 0.0254, "step": 232510 }, { "epoch": 1.8813819888340482, "grad_norm": 0.2681677043437958, "learning_rate": 1.0676878404799918e-07, "loss": 0.0108, "step": 232520 }, { "epoch": 1.8814629015292499, "grad_norm": 0.25859254598617554, "learning_rate": 1.0662369346115142e-07, "loss": 0.0234, "step": 232530 }, { "epoch": 1.8815438142244518, "grad_norm": 0.5957933664321899, "learning_rate": 1.0647870046236409e-07, "loss": 0.0235, "step": 232540 }, { "epoch": 1.8816247269196538, "grad_norm": 0.21579745411872864, "learning_rate": 1.0633380505452817e-07, "loss": 0.0143, "step": 232550 }, { "epoch": 1.8817056396148555, "grad_norm": 0.20908550918102264, "learning_rate": 1.0618900724053415e-07, "loss": 0.0169, "step": 232560 }, { "epoch": 1.8817865523100574, "grad_norm": 0.3582686185836792, "learning_rate": 1.0604430702326863e-07, "loss": 0.0261, "step": 232570 }, { "epoch": 1.8818674650052594, "grad_norm": 0.3737356662750244, "learning_rate": 1.0589970440561758e-07, "loss": 0.0236, "step": 232580 }, { "epoch": 1.881948377700461, "grad_norm": 0.3854082524776459, "learning_rate": 1.0575519939046541e-07, "loss": 0.012, "step": 232590 }, { "epoch": 1.8820292903956632, "grad_norm": 0.20455008745193481, "learning_rate": 1.0561079198069313e-07, "loss": 0.0158, "step": 232600 }, { "epoch": 1.882110203090865, "grad_norm": 0.37488073110580444, "learning_rate": 1.0546648217918176e-07, "loss": 0.0224, "step": 232610 }, { "epoch": 1.8821911157860667, "grad_norm": 0.3729456961154938, "learning_rate": 1.0532226998880734e-07, "loss": 0.0154, "step": 232620 }, { "epoch": 1.8822720284812688, "grad_norm": 0.6608521938323975, "learning_rate": 1.0517815541244869e-07, "loss": 0.0148, "step": 232630 }, { "epoch": 1.8823529411764706, "grad_norm": 0.24461868405342102, "learning_rate": 1.0503413845297739e-07, "loss": 0.0177, "step": 232640 }, { "epoch": 1.8824338538716725, "grad_norm": 0.3181142508983612, "learning_rate": 1.0489021911326668e-07, "loss": 0.0151, "step": 232650 }, { "epoch": 1.8825147665668744, "grad_norm": 0.2308449000120163, "learning_rate": 1.0474639739618653e-07, "loss": 0.0185, "step": 232660 }, { "epoch": 1.8825956792620762, "grad_norm": 0.6378964781761169, "learning_rate": 1.0460267330460516e-07, "loss": 0.0285, "step": 232670 }, { "epoch": 1.882676591957278, "grad_norm": 0.2802508473396301, "learning_rate": 1.0445904684138863e-07, "loss": 0.0249, "step": 232680 }, { "epoch": 1.88275750465248, "grad_norm": 0.25641554594039917, "learning_rate": 1.0431551800940187e-07, "loss": 0.0161, "step": 232690 }, { "epoch": 1.8828384173476818, "grad_norm": 0.04086410999298096, "learning_rate": 1.0417208681150648e-07, "loss": 0.0205, "step": 232700 }, { "epoch": 1.8829193300428837, "grad_norm": 0.3154420852661133, "learning_rate": 1.0402875325056349e-07, "loss": 0.0158, "step": 232710 }, { "epoch": 1.8830002427380856, "grad_norm": 0.6539326310157776, "learning_rate": 1.0388551732943119e-07, "loss": 0.0281, "step": 232720 }, { "epoch": 1.8830811554332874, "grad_norm": 0.2220565378665924, "learning_rate": 1.0374237905096563e-07, "loss": 0.0163, "step": 232730 }, { "epoch": 1.8831620681284895, "grad_norm": 0.8160320520401001, "learning_rate": 1.0359933841802228e-07, "loss": 0.02, "step": 232740 }, { "epoch": 1.8832429808236912, "grad_norm": 0.3447433114051819, "learning_rate": 1.0345639543345332e-07, "loss": 0.027, "step": 232750 }, { "epoch": 1.883323893518893, "grad_norm": 0.6011978983879089, "learning_rate": 1.0331355010010924e-07, "loss": 0.0136, "step": 232760 }, { "epoch": 1.8834048062140951, "grad_norm": 0.9943894743919373, "learning_rate": 1.0317080242083943e-07, "loss": 0.0192, "step": 232770 }, { "epoch": 1.8834857189092968, "grad_norm": 0.26922836899757385, "learning_rate": 1.0302815239848996e-07, "loss": 0.0139, "step": 232780 }, { "epoch": 1.8835666316044988, "grad_norm": 0.22739537060260773, "learning_rate": 1.0288560003590631e-07, "loss": 0.014, "step": 232790 }, { "epoch": 1.8836475442997007, "grad_norm": 0.3170163333415985, "learning_rate": 1.0274314533593122e-07, "loss": 0.0143, "step": 232800 }, { "epoch": 1.8837284569949024, "grad_norm": 0.3534499406814575, "learning_rate": 1.0260078830140464e-07, "loss": 0.0203, "step": 232810 }, { "epoch": 1.8838093696901044, "grad_norm": 0.30632123351097107, "learning_rate": 1.0245852893516761e-07, "loss": 0.0096, "step": 232820 }, { "epoch": 1.8838902823853063, "grad_norm": 0.35985296964645386, "learning_rate": 1.0231636724005567e-07, "loss": 0.0177, "step": 232830 }, { "epoch": 1.883971195080508, "grad_norm": 0.36116427183151245, "learning_rate": 1.0217430321890375e-07, "loss": 0.0172, "step": 232840 }, { "epoch": 1.88405210777571, "grad_norm": 0.532689094543457, "learning_rate": 1.0203233687454684e-07, "loss": 0.0229, "step": 232850 }, { "epoch": 1.884133020470912, "grad_norm": 0.16373862326145172, "learning_rate": 1.0189046820981374e-07, "loss": 0.0116, "step": 232860 }, { "epoch": 1.8842139331661136, "grad_norm": 0.247468501329422, "learning_rate": 1.01748697227535e-07, "loss": 0.0208, "step": 232870 }, { "epoch": 1.8842948458613158, "grad_norm": 0.5189964771270752, "learning_rate": 1.0160702393053833e-07, "loss": 0.0133, "step": 232880 }, { "epoch": 1.8843757585565175, "grad_norm": 0.10281442850828171, "learning_rate": 1.0146544832164817e-07, "loss": 0.0233, "step": 232890 }, { "epoch": 1.8844566712517192, "grad_norm": 0.3969533145427704, "learning_rate": 1.0132397040368836e-07, "loss": 0.0176, "step": 232900 }, { "epoch": 1.8845375839469214, "grad_norm": 0.488942414522171, "learning_rate": 1.0118259017948106e-07, "loss": 0.0221, "step": 232910 }, { "epoch": 1.884618496642123, "grad_norm": 0.4442540109157562, "learning_rate": 1.0104130765184461e-07, "loss": 0.011, "step": 232920 }, { "epoch": 1.884699409337325, "grad_norm": 0.13039162755012512, "learning_rate": 1.0090012282359674e-07, "loss": 0.0248, "step": 232930 }, { "epoch": 1.884780322032527, "grad_norm": 0.4678920805454254, "learning_rate": 1.0075903569755462e-07, "loss": 0.0184, "step": 232940 }, { "epoch": 1.8848612347277287, "grad_norm": 0.4068466126918793, "learning_rate": 1.0061804627652938e-07, "loss": 0.026, "step": 232950 }, { "epoch": 1.8849421474229306, "grad_norm": 0.6426492929458618, "learning_rate": 1.0047715456333485e-07, "loss": 0.0202, "step": 232960 }, { "epoch": 1.8850230601181326, "grad_norm": 0.3355419337749481, "learning_rate": 1.0033636056078044e-07, "loss": 0.0218, "step": 232970 }, { "epoch": 1.8851039728133343, "grad_norm": 0.6958321928977966, "learning_rate": 1.0019566427167338e-07, "loss": 0.0181, "step": 232980 }, { "epoch": 1.8851848855085362, "grad_norm": 0.5158588290214539, "learning_rate": 1.0005506569881973e-07, "loss": 0.0167, "step": 232990 }, { "epoch": 1.8852657982037382, "grad_norm": 0.30467870831489563, "learning_rate": 9.991456484502337e-08, "loss": 0.0151, "step": 233000 }, { "epoch": 1.88534671089894, "grad_norm": 0.6367955207824707, "learning_rate": 9.977416171308707e-08, "loss": 0.017, "step": 233010 }, { "epoch": 1.885427623594142, "grad_norm": 0.17014434933662415, "learning_rate": 9.963385630580969e-08, "loss": 0.0221, "step": 233020 }, { "epoch": 1.8855085362893438, "grad_norm": 0.12809628248214722, "learning_rate": 9.949364862599009e-08, "loss": 0.0241, "step": 233030 }, { "epoch": 1.8855894489845457, "grad_norm": 0.029661567881703377, "learning_rate": 9.935353867642439e-08, "loss": 0.012, "step": 233040 }, { "epoch": 1.8856703616797477, "grad_norm": 0.92763751745224, "learning_rate": 9.9213526459907e-08, "loss": 0.0131, "step": 233050 }, { "epoch": 1.8857512743749494, "grad_norm": 0.3486931025981903, "learning_rate": 9.907361197922849e-08, "loss": 0.0195, "step": 233060 }, { "epoch": 1.8858321870701513, "grad_norm": 0.6439037919044495, "learning_rate": 9.893379523718216e-08, "loss": 0.0204, "step": 233070 }, { "epoch": 1.8859130997653533, "grad_norm": 0.2144538313150406, "learning_rate": 9.879407623655357e-08, "loss": 0.0205, "step": 233080 }, { "epoch": 1.885994012460555, "grad_norm": 0.48999351263046265, "learning_rate": 9.865445498013049e-08, "loss": 0.0203, "step": 233090 }, { "epoch": 1.886074925155757, "grad_norm": 0.3159100115299225, "learning_rate": 9.851493147069735e-08, "loss": 0.0209, "step": 233100 }, { "epoch": 1.8861558378509589, "grad_norm": 0.15703646838665009, "learning_rate": 9.837550571103582e-08, "loss": 0.0186, "step": 233110 }, { "epoch": 1.8862367505461606, "grad_norm": 0.26421302556991577, "learning_rate": 9.823617770392757e-08, "loss": 0.016, "step": 233120 }, { "epoch": 1.8863176632413625, "grad_norm": 0.33212608098983765, "learning_rate": 9.809694745215093e-08, "loss": 0.0132, "step": 233130 }, { "epoch": 1.8863985759365645, "grad_norm": 0.3700060248374939, "learning_rate": 9.795781495848145e-08, "loss": 0.0216, "step": 233140 }, { "epoch": 1.8864794886317662, "grad_norm": 1.186850666999817, "learning_rate": 9.781878022569524e-08, "loss": 0.0237, "step": 233150 }, { "epoch": 1.8865604013269683, "grad_norm": 0.28886106610298157, "learning_rate": 9.767984325656454e-08, "loss": 0.0223, "step": 233160 }, { "epoch": 1.88664131402217, "grad_norm": 0.4223133325576782, "learning_rate": 9.754100405385991e-08, "loss": 0.0152, "step": 233170 }, { "epoch": 1.886722226717372, "grad_norm": 0.3559381067752838, "learning_rate": 9.740226262035024e-08, "loss": 0.0202, "step": 233180 }, { "epoch": 1.886803139412574, "grad_norm": 0.5878408551216125, "learning_rate": 9.726361895880276e-08, "loss": 0.0246, "step": 233190 }, { "epoch": 1.8868840521077757, "grad_norm": 0.22940939664840698, "learning_rate": 9.712507307198249e-08, "loss": 0.0111, "step": 233200 }, { "epoch": 1.8869649648029776, "grad_norm": 0.3647826910018921, "learning_rate": 9.698662496265165e-08, "loss": 0.0162, "step": 233210 }, { "epoch": 1.8870458774981795, "grad_norm": 0.4420846998691559, "learning_rate": 9.684827463357193e-08, "loss": 0.0233, "step": 233220 }, { "epoch": 1.8871267901933813, "grad_norm": 0.38629063963890076, "learning_rate": 9.671002208750336e-08, "loss": 0.0317, "step": 233230 }, { "epoch": 1.8872077028885832, "grad_norm": 0.3039691746234894, "learning_rate": 9.657186732720148e-08, "loss": 0.0163, "step": 233240 }, { "epoch": 1.8872886155837851, "grad_norm": 0.15681850910186768, "learning_rate": 9.643381035542133e-08, "loss": 0.0131, "step": 233250 }, { "epoch": 1.8873695282789869, "grad_norm": 0.5086527466773987, "learning_rate": 9.629585117491846e-08, "loss": 0.0196, "step": 233260 }, { "epoch": 1.887450440974189, "grad_norm": 0.11411819607019424, "learning_rate": 9.615798978844182e-08, "loss": 0.0212, "step": 233270 }, { "epoch": 1.8875313536693907, "grad_norm": 0.2586856186389923, "learning_rate": 9.602022619874086e-08, "loss": 0.0157, "step": 233280 }, { "epoch": 1.8876122663645925, "grad_norm": 0.3256760537624359, "learning_rate": 9.588256040856502e-08, "loss": 0.0101, "step": 233290 }, { "epoch": 1.8876931790597946, "grad_norm": 0.2576252222061157, "learning_rate": 9.574499242065771e-08, "loss": 0.0159, "step": 233300 }, { "epoch": 1.8877740917549963, "grad_norm": 0.70185786485672, "learning_rate": 9.560752223776337e-08, "loss": 0.0285, "step": 233310 }, { "epoch": 1.8878550044501983, "grad_norm": 0.689415693283081, "learning_rate": 9.54701498626237e-08, "loss": 0.0211, "step": 233320 }, { "epoch": 1.8879359171454002, "grad_norm": 0.2740034759044647, "learning_rate": 9.533287529797708e-08, "loss": 0.0146, "step": 233330 }, { "epoch": 1.888016829840602, "grad_norm": 0.4277360141277313, "learning_rate": 9.519569854656297e-08, "loss": 0.0145, "step": 233340 }, { "epoch": 1.8880977425358039, "grad_norm": 0.4660569131374359, "learning_rate": 9.505861961111585e-08, "loss": 0.0172, "step": 233350 }, { "epoch": 1.8881786552310058, "grad_norm": 0.06209354102611542, "learning_rate": 9.492163849436963e-08, "loss": 0.011, "step": 233360 }, { "epoch": 1.8882595679262075, "grad_norm": 0.1975366622209549, "learning_rate": 9.478475519905662e-08, "loss": 0.024, "step": 233370 }, { "epoch": 1.8883404806214095, "grad_norm": 0.4041268825531006, "learning_rate": 9.464796972790569e-08, "loss": 0.0198, "step": 233380 }, { "epoch": 1.8884213933166114, "grad_norm": 0.31560057401657104, "learning_rate": 9.45112820836458e-08, "loss": 0.0135, "step": 233390 }, { "epoch": 1.8885023060118131, "grad_norm": 0.32094743847846985, "learning_rate": 9.437469226900253e-08, "loss": 0.0183, "step": 233400 }, { "epoch": 1.8885832187070153, "grad_norm": 0.26713448762893677, "learning_rate": 9.423820028669983e-08, "loss": 0.0236, "step": 233410 }, { "epoch": 1.888664131402217, "grad_norm": 0.1008353978395462, "learning_rate": 9.410180613945941e-08, "loss": 0.0125, "step": 233420 }, { "epoch": 1.8887450440974187, "grad_norm": 0.3356759548187256, "learning_rate": 9.39655098300013e-08, "loss": 0.0259, "step": 233430 }, { "epoch": 1.8888259567926209, "grad_norm": 0.5024222731590271, "learning_rate": 9.382931136104445e-08, "loss": 0.0164, "step": 233440 }, { "epoch": 1.8889068694878226, "grad_norm": 0.3448132872581482, "learning_rate": 9.369321073530501e-08, "loss": 0.01, "step": 233450 }, { "epoch": 1.8889877821830245, "grad_norm": 0.430074542760849, "learning_rate": 9.355720795549638e-08, "loss": 0.0131, "step": 233460 }, { "epoch": 1.8890686948782265, "grad_norm": 0.24565242230892181, "learning_rate": 9.342130302433084e-08, "loss": 0.0165, "step": 233470 }, { "epoch": 1.8891496075734282, "grad_norm": 0.28064823150634766, "learning_rate": 9.328549594452007e-08, "loss": 0.0159, "step": 233480 }, { "epoch": 1.8892305202686301, "grad_norm": 0.7420569062232971, "learning_rate": 9.314978671877084e-08, "loss": 0.0354, "step": 233490 }, { "epoch": 1.889311432963832, "grad_norm": 0.2982546091079712, "learning_rate": 9.301417534979041e-08, "loss": 0.0217, "step": 233500 }, { "epoch": 1.8893923456590338, "grad_norm": 0.1732311248779297, "learning_rate": 9.287866184028327e-08, "loss": 0.0212, "step": 233510 }, { "epoch": 1.8894732583542357, "grad_norm": 0.03407158702611923, "learning_rate": 9.274324619295171e-08, "loss": 0.0155, "step": 233520 }, { "epoch": 1.8895541710494377, "grad_norm": 0.20011676847934723, "learning_rate": 9.260792841049637e-08, "loss": 0.0141, "step": 233530 }, { "epoch": 1.8896350837446394, "grad_norm": 0.4750920832157135, "learning_rate": 9.247270849561618e-08, "loss": 0.0211, "step": 233540 }, { "epoch": 1.8897159964398416, "grad_norm": 0.2741633355617523, "learning_rate": 9.233758645100732e-08, "loss": 0.0153, "step": 233550 }, { "epoch": 1.8897969091350433, "grad_norm": 0.31871268153190613, "learning_rate": 9.220256227936541e-08, "loss": 0.0184, "step": 233560 }, { "epoch": 1.8898778218302452, "grad_norm": 0.5134080648422241, "learning_rate": 9.206763598338164e-08, "loss": 0.0268, "step": 233570 }, { "epoch": 1.8899587345254472, "grad_norm": 0.3698221743106842, "learning_rate": 9.19328075657483e-08, "loss": 0.0152, "step": 233580 }, { "epoch": 1.8900396472206489, "grad_norm": 0.5442391037940979, "learning_rate": 9.179807702915378e-08, "loss": 0.0296, "step": 233590 }, { "epoch": 1.8901205599158508, "grad_norm": 0.6710761785507202, "learning_rate": 9.166344437628483e-08, "loss": 0.0183, "step": 233600 }, { "epoch": 1.8902014726110528, "grad_norm": 0.2802222967147827, "learning_rate": 9.152890960982652e-08, "loss": 0.0231, "step": 233610 }, { "epoch": 1.8902823853062545, "grad_norm": 0.25688081979751587, "learning_rate": 9.139447273246172e-08, "loss": 0.0207, "step": 233620 }, { "epoch": 1.8903632980014564, "grad_norm": 0.3132546842098236, "learning_rate": 9.12601337468716e-08, "loss": 0.0118, "step": 233630 }, { "epoch": 1.8904442106966584, "grad_norm": 0.2376895695924759, "learning_rate": 9.112589265573569e-08, "loss": 0.0242, "step": 233640 }, { "epoch": 1.89052512339186, "grad_norm": 0.10023364424705505, "learning_rate": 9.099174946173072e-08, "loss": 0.017, "step": 233650 }, { "epoch": 1.890606036087062, "grad_norm": 0.18530793488025665, "learning_rate": 9.085770416753126e-08, "loss": 0.0304, "step": 233660 }, { "epoch": 1.890686948782264, "grad_norm": 0.5918826460838318, "learning_rate": 9.072375677581235e-08, "loss": 0.0184, "step": 233670 }, { "epoch": 1.8907678614774657, "grad_norm": 0.10525673627853394, "learning_rate": 9.058990728924355e-08, "loss": 0.0213, "step": 233680 }, { "epoch": 1.8908487741726678, "grad_norm": 0.25056371092796326, "learning_rate": 9.045615571049438e-08, "loss": 0.0124, "step": 233690 }, { "epoch": 1.8909296868678696, "grad_norm": 0.3639441430568695, "learning_rate": 9.032250204223325e-08, "loss": 0.0265, "step": 233700 }, { "epoch": 1.8910105995630715, "grad_norm": 0.3159109652042389, "learning_rate": 9.018894628712472e-08, "loss": 0.0161, "step": 233710 }, { "epoch": 1.8910915122582734, "grad_norm": 0.22168384492397308, "learning_rate": 9.005548844783274e-08, "loss": 0.0132, "step": 233720 }, { "epoch": 1.8911724249534752, "grad_norm": 0.2876196801662445, "learning_rate": 8.992212852701854e-08, "loss": 0.0129, "step": 233730 }, { "epoch": 1.891253337648677, "grad_norm": 0.9864765405654907, "learning_rate": 8.97888665273422e-08, "loss": 0.0149, "step": 233740 }, { "epoch": 1.891334250343879, "grad_norm": 0.20408840477466583, "learning_rate": 8.965570245146104e-08, "loss": 0.0154, "step": 233750 }, { "epoch": 1.8914151630390807, "grad_norm": 0.404948353767395, "learning_rate": 8.952263630203018e-08, "loss": 0.0217, "step": 233760 }, { "epoch": 1.8914960757342827, "grad_norm": 0.10678064078092575, "learning_rate": 8.938966808170413e-08, "loss": 0.0103, "step": 233770 }, { "epoch": 1.8915769884294846, "grad_norm": 0.3672899603843689, "learning_rate": 8.925679779313467e-08, "loss": 0.0177, "step": 233780 }, { "epoch": 1.8916579011246863, "grad_norm": 0.22281886637210846, "learning_rate": 8.912402543897081e-08, "loss": 0.0148, "step": 233790 }, { "epoch": 1.8917388138198883, "grad_norm": 0.1713874340057373, "learning_rate": 8.899135102186152e-08, "loss": 0.0268, "step": 233800 }, { "epoch": 1.8918197265150902, "grad_norm": 0.08642860502004623, "learning_rate": 8.885877454445191e-08, "loss": 0.0208, "step": 233810 }, { "epoch": 1.891900639210292, "grad_norm": 0.015962539240717888, "learning_rate": 8.872629600938598e-08, "loss": 0.016, "step": 233820 }, { "epoch": 1.891981551905494, "grad_norm": 0.4556019604206085, "learning_rate": 8.859391541930606e-08, "loss": 0.0269, "step": 233830 }, { "epoch": 1.8920624646006958, "grad_norm": 0.668069064617157, "learning_rate": 8.846163277685227e-08, "loss": 0.0201, "step": 233840 }, { "epoch": 1.8921433772958978, "grad_norm": 0.2857609689235687, "learning_rate": 8.832944808466193e-08, "loss": 0.0182, "step": 233850 }, { "epoch": 1.8922242899910997, "grad_norm": 0.08390162885189056, "learning_rate": 8.819736134537238e-08, "loss": 0.0182, "step": 233860 }, { "epoch": 1.8923052026863014, "grad_norm": 0.533115565776825, "learning_rate": 8.806537256161651e-08, "loss": 0.0216, "step": 233870 }, { "epoch": 1.8923861153815034, "grad_norm": 0.2619839310646057, "learning_rate": 8.793348173602722e-08, "loss": 0.0161, "step": 233880 }, { "epoch": 1.8924670280767053, "grad_norm": 0.3638959527015686, "learning_rate": 8.78016888712352e-08, "loss": 0.0146, "step": 233890 }, { "epoch": 1.892547940771907, "grad_norm": 0.3390975296497345, "learning_rate": 8.766999396986886e-08, "loss": 0.026, "step": 233900 }, { "epoch": 1.892628853467109, "grad_norm": 0.2631685435771942, "learning_rate": 8.753839703455336e-08, "loss": 0.0146, "step": 233910 }, { "epoch": 1.892709766162311, "grad_norm": 0.5753151178359985, "learning_rate": 8.740689806791436e-08, "loss": 0.0168, "step": 233920 }, { "epoch": 1.8927906788575126, "grad_norm": 0.5017684102058411, "learning_rate": 8.727549707257366e-08, "loss": 0.0221, "step": 233930 }, { "epoch": 1.8928715915527148, "grad_norm": 0.6001068949699402, "learning_rate": 8.714419405115137e-08, "loss": 0.0223, "step": 233940 }, { "epoch": 1.8929525042479165, "grad_norm": 0.2771638333797455, "learning_rate": 8.701298900626765e-08, "loss": 0.0155, "step": 233950 }, { "epoch": 1.8930334169431182, "grad_norm": 0.3588216304779053, "learning_rate": 8.688188194053759e-08, "loss": 0.0221, "step": 233960 }, { "epoch": 1.8931143296383204, "grad_norm": 0.33934906125068665, "learning_rate": 8.675087285657635e-08, "loss": 0.0101, "step": 233970 }, { "epoch": 1.893195242333522, "grad_norm": 0.3211554288864136, "learning_rate": 8.661996175699628e-08, "loss": 0.0192, "step": 233980 }, { "epoch": 1.893276155028724, "grad_norm": 0.35574871301651, "learning_rate": 8.648914864440916e-08, "loss": 0.0284, "step": 233990 }, { "epoch": 1.893357067723926, "grad_norm": 0.4471539258956909, "learning_rate": 8.635843352142237e-08, "loss": 0.0215, "step": 234000 }, { "epoch": 1.8934379804191277, "grad_norm": 0.2715242803096771, "learning_rate": 8.622781639064382e-08, "loss": 0.0164, "step": 234010 }, { "epoch": 1.8935188931143296, "grad_norm": 0.6306957006454468, "learning_rate": 8.609729725467808e-08, "loss": 0.0275, "step": 234020 }, { "epoch": 1.8935998058095316, "grad_norm": 0.02534107118844986, "learning_rate": 8.596687611612809e-08, "loss": 0.0155, "step": 234030 }, { "epoch": 1.8936807185047333, "grad_norm": 0.1639775037765503, "learning_rate": 8.583655297759452e-08, "loss": 0.0212, "step": 234040 }, { "epoch": 1.8937616311999352, "grad_norm": 0.35782289505004883, "learning_rate": 8.570632784167643e-08, "loss": 0.0161, "step": 234050 }, { "epoch": 1.8938425438951372, "grad_norm": 0.42582789063453674, "learning_rate": 8.557620071097228e-08, "loss": 0.0176, "step": 234060 }, { "epoch": 1.893923456590339, "grad_norm": 1.0812103748321533, "learning_rate": 8.544617158807444e-08, "loss": 0.0179, "step": 234070 }, { "epoch": 1.894004369285541, "grad_norm": 0.3870353102684021, "learning_rate": 8.531624047557863e-08, "loss": 0.0204, "step": 234080 }, { "epoch": 1.8940852819807428, "grad_norm": 0.5262851119041443, "learning_rate": 8.518640737607498e-08, "loss": 0.0167, "step": 234090 }, { "epoch": 1.8941661946759445, "grad_norm": 0.5634157061576843, "learning_rate": 8.505667229215254e-08, "loss": 0.0206, "step": 234100 }, { "epoch": 1.8942471073711467, "grad_norm": 0.5477736592292786, "learning_rate": 8.492703522639867e-08, "loss": 0.0226, "step": 234110 }, { "epoch": 1.8943280200663484, "grad_norm": 0.1021433174610138, "learning_rate": 8.47974961813991e-08, "loss": 0.0062, "step": 234120 }, { "epoch": 1.8944089327615503, "grad_norm": 0.3868023455142975, "learning_rate": 8.46680551597373e-08, "loss": 0.0115, "step": 234130 }, { "epoch": 1.8944898454567523, "grad_norm": 0.049514397978782654, "learning_rate": 8.453871216399401e-08, "loss": 0.0154, "step": 234140 }, { "epoch": 1.894570758151954, "grad_norm": 0.38237863779067993, "learning_rate": 8.440946719674937e-08, "loss": 0.0161, "step": 234150 }, { "epoch": 1.894651670847156, "grad_norm": 0.2993018329143524, "learning_rate": 8.428032026058075e-08, "loss": 0.0154, "step": 234160 }, { "epoch": 1.8947325835423579, "grad_norm": 0.295833945274353, "learning_rate": 8.415127135806334e-08, "loss": 0.0177, "step": 234170 }, { "epoch": 1.8948134962375596, "grad_norm": 0.4534650146961212, "learning_rate": 8.402232049177118e-08, "loss": 0.0107, "step": 234180 }, { "epoch": 1.8948944089327615, "grad_norm": 0.39424851536750793, "learning_rate": 8.389346766427553e-08, "loss": 0.0158, "step": 234190 }, { "epoch": 1.8949753216279634, "grad_norm": 0.45968320965766907, "learning_rate": 8.376471287814658e-08, "loss": 0.0092, "step": 234200 }, { "epoch": 1.8950562343231652, "grad_norm": 0.35158658027648926, "learning_rate": 8.363605613595171e-08, "loss": 0.0095, "step": 234210 }, { "epoch": 1.8951371470183673, "grad_norm": 0.29415926337242126, "learning_rate": 8.350749744025666e-08, "loss": 0.014, "step": 234220 }, { "epoch": 1.895218059713569, "grad_norm": 0.07538039982318878, "learning_rate": 8.337903679362546e-08, "loss": 0.0137, "step": 234230 }, { "epoch": 1.895298972408771, "grad_norm": 0.6513815522193909, "learning_rate": 8.325067419861944e-08, "loss": 0.0184, "step": 234240 }, { "epoch": 1.895379885103973, "grad_norm": 0.2847033143043518, "learning_rate": 8.312240965779983e-08, "loss": 0.0212, "step": 234250 }, { "epoch": 1.8954607977991746, "grad_norm": 0.4009910821914673, "learning_rate": 8.299424317372295e-08, "loss": 0.0155, "step": 234260 }, { "epoch": 1.8955417104943766, "grad_norm": 0.4188630282878876, "learning_rate": 8.28661747489462e-08, "loss": 0.0234, "step": 234270 }, { "epoch": 1.8956226231895785, "grad_norm": 0.5593505501747131, "learning_rate": 8.273820438602364e-08, "loss": 0.0169, "step": 234280 }, { "epoch": 1.8957035358847802, "grad_norm": 0.27134519815444946, "learning_rate": 8.261033208750546e-08, "loss": 0.0205, "step": 234290 }, { "epoch": 1.8957844485799822, "grad_norm": 0.5275682210922241, "learning_rate": 8.248255785594406e-08, "loss": 0.0111, "step": 234300 }, { "epoch": 1.8958653612751841, "grad_norm": 0.6402187943458557, "learning_rate": 8.235488169388684e-08, "loss": 0.0125, "step": 234310 }, { "epoch": 1.8959462739703858, "grad_norm": 0.3051341772079468, "learning_rate": 8.222730360387898e-08, "loss": 0.0245, "step": 234320 }, { "epoch": 1.8960271866655878, "grad_norm": 0.3806471824645996, "learning_rate": 8.209982358846625e-08, "loss": 0.0171, "step": 234330 }, { "epoch": 1.8961080993607897, "grad_norm": 0.47582414746284485, "learning_rate": 8.197244165019047e-08, "loss": 0.0157, "step": 234340 }, { "epoch": 1.8961890120559914, "grad_norm": 0.40186405181884766, "learning_rate": 8.184515779159185e-08, "loss": 0.0242, "step": 234350 }, { "epoch": 1.8962699247511936, "grad_norm": 0.4691579341888428, "learning_rate": 8.171797201520892e-08, "loss": 0.0178, "step": 234360 }, { "epoch": 1.8963508374463953, "grad_norm": 0.20376573503017426, "learning_rate": 8.159088432357798e-08, "loss": 0.0133, "step": 234370 }, { "epoch": 1.8964317501415973, "grad_norm": 0.3117293417453766, "learning_rate": 8.146389471923366e-08, "loss": 0.0264, "step": 234380 }, { "epoch": 1.8965126628367992, "grad_norm": 0.7644990086555481, "learning_rate": 8.133700320470839e-08, "loss": 0.0317, "step": 234390 }, { "epoch": 1.896593575532001, "grad_norm": 0.30443456768989563, "learning_rate": 8.121020978253291e-08, "loss": 0.0108, "step": 234400 }, { "epoch": 1.8966744882272029, "grad_norm": 0.1630556881427765, "learning_rate": 8.108351445523576e-08, "loss": 0.0212, "step": 234410 }, { "epoch": 1.8967554009224048, "grad_norm": 0.5836718678474426, "learning_rate": 8.095691722534327e-08, "loss": 0.0251, "step": 234420 }, { "epoch": 1.8968363136176065, "grad_norm": 0.4361630380153656, "learning_rate": 8.083041809538062e-08, "loss": 0.0214, "step": 234430 }, { "epoch": 1.8969172263128085, "grad_norm": 0.3186708688735962, "learning_rate": 8.070401706787135e-08, "loss": 0.0242, "step": 234440 }, { "epoch": 1.8969981390080104, "grad_norm": 0.6716892719268799, "learning_rate": 8.057771414533455e-08, "loss": 0.0185, "step": 234450 }, { "epoch": 1.8970790517032121, "grad_norm": 0.21816949546337128, "learning_rate": 8.045150933028989e-08, "loss": 0.019, "step": 234460 }, { "epoch": 1.897159964398414, "grad_norm": 0.3213544487953186, "learning_rate": 8.032540262525479e-08, "loss": 0.0168, "step": 234470 }, { "epoch": 1.897240877093616, "grad_norm": 0.13251721858978271, "learning_rate": 8.019939403274279e-08, "loss": 0.0272, "step": 234480 }, { "epoch": 1.8973217897888177, "grad_norm": 0.3681265413761139, "learning_rate": 8.007348355526801e-08, "loss": 0.016, "step": 234490 }, { "epoch": 1.8974027024840199, "grad_norm": 0.3772013485431671, "learning_rate": 7.994767119534175e-08, "loss": 0.0145, "step": 234500 }, { "epoch": 1.8974836151792216, "grad_norm": 0.3707999885082245, "learning_rate": 7.982195695547145e-08, "loss": 0.0152, "step": 234510 }, { "epoch": 1.8975645278744235, "grad_norm": 0.5323362350463867, "learning_rate": 7.969634083816624e-08, "loss": 0.0207, "step": 234520 }, { "epoch": 1.8976454405696255, "grad_norm": 0.39532288908958435, "learning_rate": 7.957082284593021e-08, "loss": 0.0135, "step": 234530 }, { "epoch": 1.8977263532648272, "grad_norm": 0.3232540786266327, "learning_rate": 7.94454029812658e-08, "loss": 0.0181, "step": 234540 }, { "epoch": 1.8978072659600291, "grad_norm": 0.023740995675325394, "learning_rate": 7.9320081246676e-08, "loss": 0.014, "step": 234550 }, { "epoch": 1.897888178655231, "grad_norm": 0.4142938256263733, "learning_rate": 7.919485764465884e-08, "loss": 0.0298, "step": 234560 }, { "epoch": 1.8979690913504328, "grad_norm": 0.2835574150085449, "learning_rate": 7.906973217771175e-08, "loss": 0.0142, "step": 234570 }, { "epoch": 1.8980500040456347, "grad_norm": 0.4202655255794525, "learning_rate": 7.89447048483305e-08, "loss": 0.0153, "step": 234580 }, { "epoch": 1.8981309167408367, "grad_norm": 0.3492329716682434, "learning_rate": 7.881977565900812e-08, "loss": 0.0291, "step": 234590 }, { "epoch": 1.8982118294360384, "grad_norm": 0.402047723531723, "learning_rate": 7.869494461223647e-08, "loss": 0.0154, "step": 234600 }, { "epoch": 1.8982927421312406, "grad_norm": 0.6900389194488525, "learning_rate": 7.857021171050472e-08, "loss": 0.0158, "step": 234610 }, { "epoch": 1.8983736548264423, "grad_norm": 0.34414035081863403, "learning_rate": 7.844557695630029e-08, "loss": 0.0056, "step": 234620 }, { "epoch": 1.898454567521644, "grad_norm": 0.48985806107521057, "learning_rate": 7.832104035210952e-08, "loss": 0.0138, "step": 234630 }, { "epoch": 1.8985354802168461, "grad_norm": 0.16936790943145752, "learning_rate": 7.819660190041544e-08, "loss": 0.0143, "step": 234640 }, { "epoch": 1.8986163929120479, "grad_norm": 0.45933762192726135, "learning_rate": 7.807226160369885e-08, "loss": 0.0144, "step": 234650 }, { "epoch": 1.8986973056072498, "grad_norm": 0.16092899441719055, "learning_rate": 7.794801946444163e-08, "loss": 0.0105, "step": 234660 }, { "epoch": 1.8987782183024517, "grad_norm": 0.3632509410381317, "learning_rate": 7.782387548511905e-08, "loss": 0.0076, "step": 234670 }, { "epoch": 1.8988591309976535, "grad_norm": 0.1432170867919922, "learning_rate": 7.769982966820855e-08, "loss": 0.0185, "step": 234680 }, { "epoch": 1.8989400436928554, "grad_norm": 0.34851178526878357, "learning_rate": 7.757588201618427e-08, "loss": 0.0229, "step": 234690 }, { "epoch": 1.8990209563880573, "grad_norm": 0.5238375663757324, "learning_rate": 7.74520325315159e-08, "loss": 0.0139, "step": 234700 }, { "epoch": 1.899101869083259, "grad_norm": 0.6811633706092834, "learning_rate": 7.73282812166759e-08, "loss": 0.0188, "step": 234710 }, { "epoch": 1.899182781778461, "grad_norm": 0.2284024953842163, "learning_rate": 7.720462807413065e-08, "loss": 0.0134, "step": 234720 }, { "epoch": 1.899263694473663, "grad_norm": 0.5873277187347412, "learning_rate": 7.70810731063465e-08, "loss": 0.0248, "step": 234730 }, { "epoch": 1.8993446071688647, "grad_norm": 0.13817764818668365, "learning_rate": 7.695761631578813e-08, "loss": 0.0152, "step": 234740 }, { "epoch": 1.8994255198640668, "grad_norm": 0.28708648681640625, "learning_rate": 7.683425770491637e-08, "loss": 0.0255, "step": 234750 }, { "epoch": 1.8995064325592685, "grad_norm": 0.31626662611961365, "learning_rate": 7.671099727619258e-08, "loss": 0.0128, "step": 234760 }, { "epoch": 1.8995873452544703, "grad_norm": 0.5054442882537842, "learning_rate": 7.658783503207423e-08, "loss": 0.0247, "step": 234770 }, { "epoch": 1.8996682579496724, "grad_norm": 0.1171250268816948, "learning_rate": 7.646477097501825e-08, "loss": 0.0227, "step": 234780 }, { "epoch": 1.8997491706448741, "grad_norm": 0.12731559574604034, "learning_rate": 7.634180510747768e-08, "loss": 0.0133, "step": 234790 }, { "epoch": 1.899830083340076, "grad_norm": 0.35885390639305115, "learning_rate": 7.621893743190611e-08, "loss": 0.0158, "step": 234800 }, { "epoch": 1.899910996035278, "grad_norm": 0.23957093060016632, "learning_rate": 7.609616795075214e-08, "loss": 0.018, "step": 234810 }, { "epoch": 1.8999919087304797, "grad_norm": 0.21884556114673615, "learning_rate": 7.597349666646658e-08, "loss": 0.0208, "step": 234820 }, { "epoch": 1.9000728214256817, "grad_norm": 0.41993069648742676, "learning_rate": 7.585092358149415e-08, "loss": 0.0117, "step": 234830 }, { "epoch": 1.9001537341208836, "grad_norm": 0.32386237382888794, "learning_rate": 7.5728448698279e-08, "loss": 0.0272, "step": 234840 }, { "epoch": 1.9002346468160853, "grad_norm": 0.3953704833984375, "learning_rate": 7.560607201926528e-08, "loss": 0.0192, "step": 234850 }, { "epoch": 1.9003155595112873, "grad_norm": 0.3676254451274872, "learning_rate": 7.548379354689273e-08, "loss": 0.0176, "step": 234860 }, { "epoch": 1.9003964722064892, "grad_norm": 0.08854246139526367, "learning_rate": 7.536161328359882e-08, "loss": 0.0144, "step": 234870 }, { "epoch": 1.900477384901691, "grad_norm": 0.7019797563552856, "learning_rate": 7.523953123182215e-08, "loss": 0.0197, "step": 234880 }, { "epoch": 1.900558297596893, "grad_norm": 0.6288848519325256, "learning_rate": 7.511754739399524e-08, "loss": 0.0141, "step": 234890 }, { "epoch": 1.9006392102920948, "grad_norm": 0.3428080976009369, "learning_rate": 7.499566177255279e-08, "loss": 0.0232, "step": 234900 }, { "epoch": 1.9007201229872968, "grad_norm": 0.30785587430000305, "learning_rate": 7.48738743699251e-08, "loss": 0.0102, "step": 234910 }, { "epoch": 1.9008010356824987, "grad_norm": 0.4218045771121979, "learning_rate": 7.475218518853966e-08, "loss": 0.0161, "step": 234920 }, { "epoch": 1.9008819483777004, "grad_norm": 0.3396570682525635, "learning_rate": 7.463059423082452e-08, "loss": 0.0188, "step": 234930 }, { "epoch": 1.9009628610729024, "grad_norm": 0.20119237899780273, "learning_rate": 7.450910149920499e-08, "loss": 0.0194, "step": 234940 }, { "epoch": 1.9010437737681043, "grad_norm": 0.18029463291168213, "learning_rate": 7.438770699610187e-08, "loss": 0.0116, "step": 234950 }, { "epoch": 1.901124686463306, "grad_norm": 0.2576780617237091, "learning_rate": 7.426641072393826e-08, "loss": 0.0273, "step": 234960 }, { "epoch": 1.901205599158508, "grad_norm": 0.5238797664642334, "learning_rate": 7.414521268513275e-08, "loss": 0.0148, "step": 234970 }, { "epoch": 1.90128651185371, "grad_norm": 0.47200220823287964, "learning_rate": 7.402411288210176e-08, "loss": 0.013, "step": 234980 }, { "epoch": 1.9013674245489116, "grad_norm": 0.4967445433139801, "learning_rate": 7.390311131726057e-08, "loss": 0.0205, "step": 234990 }, { "epoch": 1.9014483372441136, "grad_norm": 0.44631728529930115, "learning_rate": 7.378220799302227e-08, "loss": 0.0137, "step": 235000 }, { "epoch": 1.9015292499393155, "grad_norm": 0.17788055539131165, "learning_rate": 7.366140291179824e-08, "loss": 0.0231, "step": 235010 }, { "epoch": 1.9016101626345172, "grad_norm": 0.04280063137412071, "learning_rate": 7.354069607599768e-08, "loss": 0.0136, "step": 235020 }, { "epoch": 1.9016910753297194, "grad_norm": 0.4569491744041443, "learning_rate": 7.342008748802698e-08, "loss": 0.0182, "step": 235030 }, { "epoch": 1.901771988024921, "grad_norm": 0.5863567590713501, "learning_rate": 7.329957715029312e-08, "loss": 0.0157, "step": 235040 }, { "epoch": 1.901852900720123, "grad_norm": 0.44353219866752625, "learning_rate": 7.317916506519807e-08, "loss": 0.0133, "step": 235050 }, { "epoch": 1.901933813415325, "grad_norm": 0.5884631276130676, "learning_rate": 7.305885123514322e-08, "loss": 0.0176, "step": 235060 }, { "epoch": 1.9020147261105267, "grad_norm": 0.3521028161048889, "learning_rate": 7.293863566252945e-08, "loss": 0.0199, "step": 235070 }, { "epoch": 1.9020956388057286, "grad_norm": 0.4880625307559967, "learning_rate": 7.281851834975262e-08, "loss": 0.0172, "step": 235080 }, { "epoch": 1.9021765515009306, "grad_norm": 0.3971758484840393, "learning_rate": 7.269849929920803e-08, "loss": 0.0118, "step": 235090 }, { "epoch": 1.9022574641961323, "grad_norm": 0.5628172755241394, "learning_rate": 7.257857851329042e-08, "loss": 0.0132, "step": 235100 }, { "epoch": 1.9023383768913342, "grad_norm": 0.0890110582113266, "learning_rate": 7.245875599439067e-08, "loss": 0.0089, "step": 235110 }, { "epoch": 1.9024192895865362, "grad_norm": 0.5192533731460571, "learning_rate": 7.233903174489854e-08, "loss": 0.0181, "step": 235120 }, { "epoch": 1.9025002022817379, "grad_norm": 0.419147789478302, "learning_rate": 7.22194057672021e-08, "loss": 0.0247, "step": 235130 }, { "epoch": 1.90258111497694, "grad_norm": 0.22219721972942352, "learning_rate": 7.209987806368613e-08, "loss": 0.0176, "step": 235140 }, { "epoch": 1.9026620276721418, "grad_norm": 0.1602356731891632, "learning_rate": 7.198044863673481e-08, "loss": 0.0113, "step": 235150 }, { "epoch": 1.9027429403673435, "grad_norm": 0.49776163697242737, "learning_rate": 7.186111748873015e-08, "loss": 0.0209, "step": 235160 }, { "epoch": 1.9028238530625456, "grad_norm": 0.3758794665336609, "learning_rate": 7.174188462205133e-08, "loss": 0.0186, "step": 235170 }, { "epoch": 1.9029047657577474, "grad_norm": 0.6558619737625122, "learning_rate": 7.162275003907704e-08, "loss": 0.0271, "step": 235180 }, { "epoch": 1.9029856784529493, "grad_norm": 0.15702345967292786, "learning_rate": 7.150371374218257e-08, "loss": 0.017, "step": 235190 }, { "epoch": 1.9030665911481512, "grad_norm": 0.42161083221435547, "learning_rate": 7.138477573374214e-08, "loss": 0.0111, "step": 235200 }, { "epoch": 1.903147503843353, "grad_norm": 0.06760477274656296, "learning_rate": 7.126593601612775e-08, "loss": 0.0143, "step": 235210 }, { "epoch": 1.903228416538555, "grad_norm": 0.521394670009613, "learning_rate": 7.11471945917086e-08, "loss": 0.0151, "step": 235220 }, { "epoch": 1.9033093292337568, "grad_norm": 0.6475527882575989, "learning_rate": 7.102855146285448e-08, "loss": 0.0207, "step": 235230 }, { "epoch": 1.9033902419289586, "grad_norm": 0.4551997184753418, "learning_rate": 7.091000663192959e-08, "loss": 0.012, "step": 235240 }, { "epoch": 1.9034711546241605, "grad_norm": 0.5104246735572815, "learning_rate": 7.079156010129873e-08, "loss": 0.0106, "step": 235250 }, { "epoch": 1.9035520673193624, "grad_norm": 0.528427243232727, "learning_rate": 7.067321187332444e-08, "loss": 0.0209, "step": 235260 }, { "epoch": 1.9036329800145642, "grad_norm": 0.20480206608772278, "learning_rate": 7.055496195036704e-08, "loss": 0.0154, "step": 235270 }, { "epoch": 1.9037138927097663, "grad_norm": 0.08264278620481491, "learning_rate": 7.043681033478356e-08, "loss": 0.0106, "step": 235280 }, { "epoch": 1.903794805404968, "grad_norm": 0.4214419424533844, "learning_rate": 7.031875702893154e-08, "loss": 0.012, "step": 235290 }, { "epoch": 1.9038757181001698, "grad_norm": 0.4550919234752655, "learning_rate": 7.020080203516466e-08, "loss": 0.0158, "step": 235300 }, { "epoch": 1.903956630795372, "grad_norm": 0.46262967586517334, "learning_rate": 7.008294535583605e-08, "loss": 0.0197, "step": 235310 }, { "epoch": 1.9040375434905736, "grad_norm": 0.3279147148132324, "learning_rate": 6.996518699329546e-08, "loss": 0.0114, "step": 235320 }, { "epoch": 1.9041184561857756, "grad_norm": 0.14966481924057007, "learning_rate": 6.984752694989105e-08, "loss": 0.0166, "step": 235330 }, { "epoch": 1.9041993688809775, "grad_norm": 0.6470756530761719, "learning_rate": 6.972996522796982e-08, "loss": 0.0162, "step": 235340 }, { "epoch": 1.9042802815761792, "grad_norm": 0.3229256868362427, "learning_rate": 6.961250182987655e-08, "loss": 0.0226, "step": 235350 }, { "epoch": 1.9043611942713812, "grad_norm": 0.4589073359966278, "learning_rate": 6.949513675795328e-08, "loss": 0.0266, "step": 235360 }, { "epoch": 1.9044421069665831, "grad_norm": 0.17215628921985626, "learning_rate": 6.937787001454033e-08, "loss": 0.0149, "step": 235370 }, { "epoch": 1.9045230196617848, "grad_norm": 0.5592895746231079, "learning_rate": 6.926070160197752e-08, "loss": 0.0248, "step": 235380 }, { "epoch": 1.9046039323569868, "grad_norm": 0.08841373026371002, "learning_rate": 6.914363152260018e-08, "loss": 0.0189, "step": 235390 }, { "epoch": 1.9046848450521887, "grad_norm": 0.2512882947921753, "learning_rate": 6.902665977874368e-08, "loss": 0.0257, "step": 235400 }, { "epoch": 1.9047657577473904, "grad_norm": 0.14155477285385132, "learning_rate": 6.890978637274115e-08, "loss": 0.0221, "step": 235410 }, { "epoch": 1.9048466704425926, "grad_norm": 0.3097575604915619, "learning_rate": 6.87930113069224e-08, "loss": 0.0207, "step": 235420 }, { "epoch": 1.9049275831377943, "grad_norm": 0.533286452293396, "learning_rate": 6.867633458361667e-08, "loss": 0.0313, "step": 235430 }, { "epoch": 1.9050084958329963, "grad_norm": 0.13333426415920258, "learning_rate": 6.855975620515098e-08, "loss": 0.014, "step": 235440 }, { "epoch": 1.9050894085281982, "grad_norm": 0.18351079523563385, "learning_rate": 6.844327617385127e-08, "loss": 0.018, "step": 235450 }, { "epoch": 1.9051703212234, "grad_norm": 0.393587201833725, "learning_rate": 6.832689449203845e-08, "loss": 0.0109, "step": 235460 }, { "epoch": 1.9052512339186018, "grad_norm": 0.4429711699485779, "learning_rate": 6.821061116203454e-08, "loss": 0.0201, "step": 235470 }, { "epoch": 1.9053321466138038, "grad_norm": 0.47227969765663147, "learning_rate": 6.809442618615881e-08, "loss": 0.0132, "step": 235480 }, { "epoch": 1.9054130593090055, "grad_norm": 0.5896017551422119, "learning_rate": 6.797833956672828e-08, "loss": 0.0142, "step": 235490 }, { "epoch": 1.9054939720042074, "grad_norm": 0.46881452202796936, "learning_rate": 6.786235130605667e-08, "loss": 0.0233, "step": 235500 }, { "epoch": 1.9055748846994094, "grad_norm": 0.18177838623523712, "learning_rate": 6.774646140645936e-08, "loss": 0.0161, "step": 235510 }, { "epoch": 1.905655797394611, "grad_norm": 0.23402632772922516, "learning_rate": 6.763066987024613e-08, "loss": 0.0169, "step": 235520 }, { "epoch": 1.905736710089813, "grad_norm": 0.04800291731953621, "learning_rate": 6.751497669972629e-08, "loss": 0.012, "step": 235530 }, { "epoch": 1.905817622785015, "grad_norm": 0.2007790058851242, "learning_rate": 6.73993818972074e-08, "loss": 0.0122, "step": 235540 }, { "epoch": 1.9058985354802167, "grad_norm": 0.45441940426826477, "learning_rate": 6.728388546499487e-08, "loss": 0.0167, "step": 235550 }, { "epoch": 1.9059794481754189, "grad_norm": 0.5168029069900513, "learning_rate": 6.716848740539184e-08, "loss": 0.0158, "step": 235560 }, { "epoch": 1.9060603608706206, "grad_norm": 0.4072713851928711, "learning_rate": 6.705318772069924e-08, "loss": 0.0159, "step": 235570 }, { "epoch": 1.9061412735658225, "grad_norm": 0.43227607011795044, "learning_rate": 6.693798641321747e-08, "loss": 0.0118, "step": 235580 }, { "epoch": 1.9062221862610245, "grad_norm": 0.01623888500034809, "learning_rate": 6.682288348524302e-08, "loss": 0.0108, "step": 235590 }, { "epoch": 1.9063030989562262, "grad_norm": 0.2424153834581375, "learning_rate": 6.670787893907182e-08, "loss": 0.0257, "step": 235600 }, { "epoch": 1.9063840116514281, "grad_norm": 0.20603080093860626, "learning_rate": 6.659297277699761e-08, "loss": 0.0101, "step": 235610 }, { "epoch": 1.90646492434663, "grad_norm": 0.4419287145137787, "learning_rate": 6.647816500131132e-08, "loss": 0.0147, "step": 235620 }, { "epoch": 1.9065458370418318, "grad_norm": 0.5400547385215759, "learning_rate": 6.63634556143028e-08, "loss": 0.0153, "step": 235630 }, { "epoch": 1.9066267497370337, "grad_norm": 0.11223886162042618, "learning_rate": 6.62488446182602e-08, "loss": 0.0119, "step": 235640 }, { "epoch": 1.9067076624322357, "grad_norm": 0.07250600308179855, "learning_rate": 6.613433201546837e-08, "loss": 0.013, "step": 235650 }, { "epoch": 1.9067885751274374, "grad_norm": 0.1913086622953415, "learning_rate": 6.60199178082116e-08, "loss": 0.014, "step": 235660 }, { "epoch": 1.9068694878226393, "grad_norm": 0.2764703631401062, "learning_rate": 6.590560199877194e-08, "loss": 0.0145, "step": 235670 }, { "epoch": 1.9069504005178413, "grad_norm": 0.4592852294445038, "learning_rate": 6.579138458942813e-08, "loss": 0.0251, "step": 235680 }, { "epoch": 1.907031313213043, "grad_norm": 0.3114217519760132, "learning_rate": 6.567726558245835e-08, "loss": 0.01, "step": 235690 }, { "epoch": 1.9071122259082451, "grad_norm": 0.4984973669052124, "learning_rate": 6.556324498013911e-08, "loss": 0.0346, "step": 235700 }, { "epoch": 1.9071931386034469, "grad_norm": 0.39082053303718567, "learning_rate": 6.544932278474414e-08, "loss": 0.0125, "step": 235710 }, { "epoch": 1.9072740512986488, "grad_norm": 0.09346845746040344, "learning_rate": 6.53354989985444e-08, "loss": 0.0181, "step": 235720 }, { "epoch": 1.9073549639938507, "grad_norm": 0.1771089881658554, "learning_rate": 6.522177362381144e-08, "loss": 0.0195, "step": 235730 }, { "epoch": 1.9074358766890525, "grad_norm": 0.4337383508682251, "learning_rate": 6.510814666281173e-08, "loss": 0.0254, "step": 235740 }, { "epoch": 1.9075167893842544, "grad_norm": 0.39635327458381653, "learning_rate": 6.499461811781238e-08, "loss": 0.0115, "step": 235750 }, { "epoch": 1.9075977020794563, "grad_norm": 0.35337531566619873, "learning_rate": 6.488118799107712e-08, "loss": 0.0212, "step": 235760 }, { "epoch": 1.907678614774658, "grad_norm": 0.6338609457015991, "learning_rate": 6.476785628486804e-08, "loss": 0.0224, "step": 235770 }, { "epoch": 1.90775952746986, "grad_norm": 0.24070818722248077, "learning_rate": 6.465462300144554e-08, "loss": 0.0127, "step": 235780 }, { "epoch": 1.907840440165062, "grad_norm": 0.3263096213340759, "learning_rate": 6.454148814306727e-08, "loss": 0.0236, "step": 235790 }, { "epoch": 1.9079213528602637, "grad_norm": 0.6478808522224426, "learning_rate": 6.442845171198975e-08, "loss": 0.0249, "step": 235800 }, { "epoch": 1.9080022655554658, "grad_norm": 0.24254317581653595, "learning_rate": 6.431551371046729e-08, "loss": 0.0168, "step": 235810 }, { "epoch": 1.9080831782506675, "grad_norm": 0.8651109337806702, "learning_rate": 6.420267414075254e-08, "loss": 0.0328, "step": 235820 }, { "epoch": 1.9081640909458693, "grad_norm": 0.36947324872016907, "learning_rate": 6.408993300509536e-08, "loss": 0.0177, "step": 235830 }, { "epoch": 1.9082450036410714, "grad_norm": 0.35404524207115173, "learning_rate": 6.397729030574451e-08, "loss": 0.018, "step": 235840 }, { "epoch": 1.9083259163362731, "grad_norm": 0.3812462389469147, "learning_rate": 6.386474604494541e-08, "loss": 0.0328, "step": 235850 }, { "epoch": 1.908406829031475, "grad_norm": 0.20751798152923584, "learning_rate": 6.375230022494406e-08, "loss": 0.0125, "step": 235860 }, { "epoch": 1.908487741726677, "grad_norm": 0.27161628007888794, "learning_rate": 6.363995284798253e-08, "loss": 0.0168, "step": 235870 }, { "epoch": 1.9085686544218787, "grad_norm": 0.5794501304626465, "learning_rate": 6.352770391630014e-08, "loss": 0.0216, "step": 235880 }, { "epoch": 1.9086495671170807, "grad_norm": 0.30030426383018494, "learning_rate": 6.341555343213679e-08, "loss": 0.0192, "step": 235890 }, { "epoch": 1.9087304798122826, "grad_norm": 0.6964510083198547, "learning_rate": 6.330350139772845e-08, "loss": 0.022, "step": 235900 }, { "epoch": 1.9088113925074843, "grad_norm": 0.24076372385025024, "learning_rate": 6.319154781531e-08, "loss": 0.0272, "step": 235910 }, { "epoch": 1.9088923052026863, "grad_norm": 0.49853023886680603, "learning_rate": 6.307969268711411e-08, "loss": 0.0181, "step": 235920 }, { "epoch": 1.9089732178978882, "grad_norm": 0.12434442341327667, "learning_rate": 6.296793601537122e-08, "loss": 0.0301, "step": 235930 }, { "epoch": 1.90905413059309, "grad_norm": 0.4694010019302368, "learning_rate": 6.285627780231063e-08, "loss": 0.0207, "step": 235940 }, { "epoch": 1.909135043288292, "grad_norm": 0.5568390488624573, "learning_rate": 6.274471805015892e-08, "loss": 0.0222, "step": 235950 }, { "epoch": 1.9092159559834938, "grad_norm": 0.5021317601203918, "learning_rate": 6.263325676114041e-08, "loss": 0.0114, "step": 235960 }, { "epoch": 1.9092968686786955, "grad_norm": 0.0811130702495575, "learning_rate": 6.252189393747888e-08, "loss": 0.0133, "step": 235970 }, { "epoch": 1.9093777813738977, "grad_norm": 0.23279164731502533, "learning_rate": 6.241062958139421e-08, "loss": 0.0232, "step": 235980 }, { "epoch": 1.9094586940690994, "grad_norm": 0.7840554714202881, "learning_rate": 6.229946369510575e-08, "loss": 0.02, "step": 235990 }, { "epoch": 1.9095396067643013, "grad_norm": 0.24594715237617493, "learning_rate": 6.218839628083117e-08, "loss": 0.0211, "step": 236000 }, { "epoch": 1.9096205194595033, "grad_norm": 0.37199312448501587, "learning_rate": 6.207742734078426e-08, "loss": 0.0126, "step": 236010 }, { "epoch": 1.909701432154705, "grad_norm": 0.4769248366355896, "learning_rate": 6.196655687717879e-08, "loss": 0.0297, "step": 236020 }, { "epoch": 1.909782344849907, "grad_norm": 0.29969844222068787, "learning_rate": 6.185578489222577e-08, "loss": 0.0137, "step": 236030 }, { "epoch": 1.9098632575451089, "grad_norm": 0.6128672957420349, "learning_rate": 6.1745111388134e-08, "loss": 0.0256, "step": 236040 }, { "epoch": 1.9099441702403106, "grad_norm": 0.3806028664112091, "learning_rate": 6.163453636711114e-08, "loss": 0.0182, "step": 236050 }, { "epoch": 1.9100250829355125, "grad_norm": 0.15850333869457245, "learning_rate": 6.152405983136211e-08, "loss": 0.0147, "step": 236060 }, { "epoch": 1.9101059956307145, "grad_norm": 0.3446011245250702, "learning_rate": 6.14136817830896e-08, "loss": 0.0141, "step": 236070 }, { "epoch": 1.9101869083259162, "grad_norm": 0.32168641686439514, "learning_rate": 6.130340222449627e-08, "loss": 0.0164, "step": 236080 }, { "epoch": 1.9102678210211184, "grad_norm": 0.5075919032096863, "learning_rate": 6.119322115778037e-08, "loss": 0.0189, "step": 236090 }, { "epoch": 1.91034873371632, "grad_norm": 0.35564082860946655, "learning_rate": 6.108313858513848e-08, "loss": 0.0115, "step": 236100 }, { "epoch": 1.910429646411522, "grad_norm": 0.21631476283073425, "learning_rate": 6.097315450876773e-08, "loss": 0.0205, "step": 236110 }, { "epoch": 1.910510559106724, "grad_norm": 0.3576790988445282, "learning_rate": 6.086326893086081e-08, "loss": 0.0138, "step": 236120 }, { "epoch": 1.9105914718019257, "grad_norm": 0.6077043414115906, "learning_rate": 6.075348185360819e-08, "loss": 0.0199, "step": 236130 }, { "epoch": 1.9106723844971276, "grad_norm": 0.34269219636917114, "learning_rate": 6.064379327920034e-08, "loss": 0.0259, "step": 236140 }, { "epoch": 1.9107532971923296, "grad_norm": 0.3926878571510315, "learning_rate": 6.053420320982495e-08, "loss": 0.0221, "step": 236150 }, { "epoch": 1.9108342098875313, "grad_norm": 0.07400887459516525, "learning_rate": 6.04247116476675e-08, "loss": 0.0107, "step": 236160 }, { "epoch": 1.9109151225827332, "grad_norm": 0.246194526553154, "learning_rate": 6.031531859491069e-08, "loss": 0.0083, "step": 236170 }, { "epoch": 1.9109960352779352, "grad_norm": 0.49243631958961487, "learning_rate": 6.020602405373665e-08, "loss": 0.0291, "step": 236180 }, { "epoch": 1.9110769479731369, "grad_norm": 0.4269154667854309, "learning_rate": 6.009682802632533e-08, "loss": 0.0192, "step": 236190 }, { "epoch": 1.9111578606683388, "grad_norm": 0.4638839364051819, "learning_rate": 5.99877305148544e-08, "loss": 0.0119, "step": 236200 }, { "epoch": 1.9112387733635408, "grad_norm": 0.12580826878547668, "learning_rate": 5.987873152149937e-08, "loss": 0.0249, "step": 236210 }, { "epoch": 1.9113196860587425, "grad_norm": 0.4423263967037201, "learning_rate": 5.976983104843348e-08, "loss": 0.0135, "step": 236220 }, { "epoch": 1.9114005987539446, "grad_norm": 0.5511059761047363, "learning_rate": 5.966102909782945e-08, "loss": 0.0237, "step": 236230 }, { "epoch": 1.9114815114491464, "grad_norm": 0.3423944115638733, "learning_rate": 5.9552325671856095e-08, "loss": 0.0184, "step": 236240 }, { "epoch": 1.9115624241443483, "grad_norm": 0.3332745432853699, "learning_rate": 5.944372077268279e-08, "loss": 0.0132, "step": 236250 }, { "epoch": 1.9116433368395502, "grad_norm": 0.5052919387817383, "learning_rate": 5.933521440247336e-08, "loss": 0.0237, "step": 236260 }, { "epoch": 1.911724249534752, "grad_norm": 0.4840569496154785, "learning_rate": 5.9226806563393856e-08, "loss": 0.0225, "step": 236270 }, { "epoch": 1.911805162229954, "grad_norm": 0.4052272439002991, "learning_rate": 5.911849725760532e-08, "loss": 0.0156, "step": 236280 }, { "epoch": 1.9118860749251558, "grad_norm": 0.5456230044364929, "learning_rate": 5.901028648726659e-08, "loss": 0.0119, "step": 236290 }, { "epoch": 1.9119669876203575, "grad_norm": 0.41632986068725586, "learning_rate": 5.890217425453759e-08, "loss": 0.0158, "step": 236300 }, { "epoch": 1.9120479003155595, "grad_norm": 0.3691521883010864, "learning_rate": 5.879416056157383e-08, "loss": 0.0106, "step": 236310 }, { "epoch": 1.9121288130107614, "grad_norm": 0.2391962707042694, "learning_rate": 5.868624541052859e-08, "loss": 0.0187, "step": 236320 }, { "epoch": 1.9122097257059631, "grad_norm": 0.2873256504535675, "learning_rate": 5.857842880355513e-08, "loss": 0.0142, "step": 236330 }, { "epoch": 1.912290638401165, "grad_norm": 0.4690476059913635, "learning_rate": 5.847071074280286e-08, "loss": 0.0222, "step": 236340 }, { "epoch": 1.912371551096367, "grad_norm": 0.3602987825870514, "learning_rate": 5.836309123042006e-08, "loss": 0.0138, "step": 236350 }, { "epoch": 1.9124524637915687, "grad_norm": 0.15962593257427216, "learning_rate": 5.825557026855333e-08, "loss": 0.0194, "step": 236360 }, { "epoch": 1.912533376486771, "grad_norm": 0.5087319016456604, "learning_rate": 5.814814785934708e-08, "loss": 0.0252, "step": 236370 }, { "epoch": 1.9126142891819726, "grad_norm": 0.48929363489151, "learning_rate": 5.8040824004942934e-08, "loss": 0.0193, "step": 236380 }, { "epoch": 1.9126952018771746, "grad_norm": 0.06803528964519501, "learning_rate": 5.793359870748139e-08, "loss": 0.0154, "step": 236390 }, { "epoch": 1.9127761145723765, "grad_norm": 0.09061030298471451, "learning_rate": 5.782647196910129e-08, "loss": 0.0146, "step": 236400 }, { "epoch": 1.9128570272675782, "grad_norm": 0.33106571435928345, "learning_rate": 5.771944379193928e-08, "loss": 0.0106, "step": 236410 }, { "epoch": 1.9129379399627802, "grad_norm": 0.6318354606628418, "learning_rate": 5.761251417812863e-08, "loss": 0.0275, "step": 236420 }, { "epoch": 1.913018852657982, "grad_norm": 0.3238306939601898, "learning_rate": 5.7505683129802645e-08, "loss": 0.0211, "step": 236430 }, { "epoch": 1.9130997653531838, "grad_norm": 0.47834643721580505, "learning_rate": 5.73989506490924e-08, "loss": 0.0142, "step": 236440 }, { "epoch": 1.9131806780483858, "grad_norm": 0.5448523163795471, "learning_rate": 5.729231673812507e-08, "loss": 0.0211, "step": 236450 }, { "epoch": 1.9132615907435877, "grad_norm": 0.024819979444146156, "learning_rate": 5.7185781399027864e-08, "loss": 0.0174, "step": 236460 }, { "epoch": 1.9133425034387894, "grad_norm": 0.31132256984710693, "learning_rate": 5.707934463392628e-08, "loss": 0.0139, "step": 236470 }, { "epoch": 1.9134234161339916, "grad_norm": 0.32811540365219116, "learning_rate": 5.6973006444941416e-08, "loss": 0.0249, "step": 236480 }, { "epoch": 1.9135043288291933, "grad_norm": 0.4305489957332611, "learning_rate": 5.6866766834195455e-08, "loss": 0.0196, "step": 236490 }, { "epoch": 1.913585241524395, "grad_norm": 0.8890470862388611, "learning_rate": 5.676062580380615e-08, "loss": 0.0236, "step": 236500 }, { "epoch": 1.9136661542195972, "grad_norm": 0.30452021956443787, "learning_rate": 5.6654583355890134e-08, "loss": 0.0162, "step": 236510 }, { "epoch": 1.913747066914799, "grad_norm": 0.2648058831691742, "learning_rate": 5.654863949256295e-08, "loss": 0.0261, "step": 236520 }, { "epoch": 1.9138279796100008, "grad_norm": 0.18300852179527283, "learning_rate": 5.6442794215937344e-08, "loss": 0.0146, "step": 236530 }, { "epoch": 1.9139088923052028, "grad_norm": 0.30959320068359375, "learning_rate": 5.6337047528123304e-08, "loss": 0.0159, "step": 236540 }, { "epoch": 1.9139898050004045, "grad_norm": 0.15934410691261292, "learning_rate": 5.6231399431230794e-08, "loss": 0.0184, "step": 236550 }, { "epoch": 1.9140707176956064, "grad_norm": 0.2399425059556961, "learning_rate": 5.612584992736591e-08, "loss": 0.019, "step": 236560 }, { "epoch": 1.9141516303908084, "grad_norm": 0.5270121693611145, "learning_rate": 5.602039901863421e-08, "loss": 0.0156, "step": 236570 }, { "epoch": 1.91423254308601, "grad_norm": 0.24606820940971375, "learning_rate": 5.591504670713843e-08, "loss": 0.0137, "step": 236580 }, { "epoch": 1.914313455781212, "grad_norm": 0.025950899347662926, "learning_rate": 5.580979299497913e-08, "loss": 0.0131, "step": 236590 }, { "epoch": 1.914394368476414, "grad_norm": 0.2439149171113968, "learning_rate": 5.570463788425629e-08, "loss": 0.0117, "step": 236600 }, { "epoch": 1.9144752811716157, "grad_norm": 0.41599658131599426, "learning_rate": 5.559958137706656e-08, "loss": 0.0312, "step": 236610 }, { "epoch": 1.9145561938668179, "grad_norm": 0.3568357229232788, "learning_rate": 5.549462347550494e-08, "loss": 0.0215, "step": 236620 }, { "epoch": 1.9146371065620196, "grad_norm": 0.17857278883457184, "learning_rate": 5.5389764181665306e-08, "loss": 0.0179, "step": 236630 }, { "epoch": 1.9147180192572213, "grad_norm": 0.34582623839378357, "learning_rate": 5.528500349763766e-08, "loss": 0.0143, "step": 236640 }, { "epoch": 1.9147989319524235, "grad_norm": 0.6015791296958923, "learning_rate": 5.5180341425511985e-08, "loss": 0.0121, "step": 236650 }, { "epoch": 1.9148798446476252, "grad_norm": 0.4175756573677063, "learning_rate": 5.507577796737551e-08, "loss": 0.012, "step": 236660 }, { "epoch": 1.9149607573428271, "grad_norm": 0.4808451533317566, "learning_rate": 5.4971313125313787e-08, "loss": 0.0324, "step": 236670 }, { "epoch": 1.915041670038029, "grad_norm": 0.35911494493484497, "learning_rate": 5.4866946901409035e-08, "loss": 0.015, "step": 236680 }, { "epoch": 1.9151225827332308, "grad_norm": 0.2020435482263565, "learning_rate": 5.476267929774404e-08, "loss": 0.0211, "step": 236690 }, { "epoch": 1.9152034954284327, "grad_norm": 0.04775276780128479, "learning_rate": 5.465851031639713e-08, "loss": 0.0094, "step": 236700 }, { "epoch": 1.9152844081236347, "grad_norm": 0.32066237926483154, "learning_rate": 5.4554439959446095e-08, "loss": 0.0237, "step": 236710 }, { "epoch": 1.9153653208188364, "grad_norm": 0.26635822653770447, "learning_rate": 5.4450468228967046e-08, "loss": 0.0155, "step": 236720 }, { "epoch": 1.9154462335140383, "grad_norm": 0.4144715666770935, "learning_rate": 5.434659512703222e-08, "loss": 0.01, "step": 236730 }, { "epoch": 1.9155271462092403, "grad_norm": 0.4739997386932373, "learning_rate": 5.42428206557144e-08, "loss": 0.0127, "step": 236740 }, { "epoch": 1.915608058904442, "grad_norm": 0.21995025873184204, "learning_rate": 5.4139144817081934e-08, "loss": 0.0168, "step": 236750 }, { "epoch": 1.9156889715996441, "grad_norm": 0.4525047242641449, "learning_rate": 5.403556761320372e-08, "loss": 0.019, "step": 236760 }, { "epoch": 1.9157698842948458, "grad_norm": 0.3011232614517212, "learning_rate": 5.393208904614422e-08, "loss": 0.0157, "step": 236770 }, { "epoch": 1.9158507969900478, "grad_norm": 0.1934731900691986, "learning_rate": 5.3828709117967895e-08, "loss": 0.0157, "step": 236780 }, { "epoch": 1.9159317096852497, "grad_norm": 0.30721622705459595, "learning_rate": 5.372542783073587e-08, "loss": 0.0114, "step": 236790 }, { "epoch": 1.9160126223804514, "grad_norm": 1.212720274925232, "learning_rate": 5.362224518650871e-08, "loss": 0.0181, "step": 236800 }, { "epoch": 1.9160935350756534, "grad_norm": 0.020501652732491493, "learning_rate": 5.351916118734257e-08, "loss": 0.0105, "step": 236810 }, { "epoch": 1.9161744477708553, "grad_norm": 0.3647036552429199, "learning_rate": 5.3416175835295234e-08, "loss": 0.0171, "step": 236820 }, { "epoch": 1.916255360466057, "grad_norm": 0.001454247278161347, "learning_rate": 5.331328913241951e-08, "loss": 0.0108, "step": 236830 }, { "epoch": 1.916336273161259, "grad_norm": 0.3645637631416321, "learning_rate": 5.321050108076653e-08, "loss": 0.0174, "step": 236840 }, { "epoch": 1.916417185856461, "grad_norm": 0.15865030884742737, "learning_rate": 5.310781168238799e-08, "loss": 0.0191, "step": 236850 }, { "epoch": 1.9164980985516626, "grad_norm": 0.19179965555667877, "learning_rate": 5.3005220939330026e-08, "loss": 0.0327, "step": 236860 }, { "epoch": 1.9165790112468646, "grad_norm": 0.49484309554100037, "learning_rate": 5.290272885363934e-08, "loss": 0.0164, "step": 236870 }, { "epoch": 1.9166599239420665, "grad_norm": 0.2871313691139221, "learning_rate": 5.280033542736096e-08, "loss": 0.0119, "step": 236880 }, { "epoch": 1.9167408366372682, "grad_norm": 0.026221971958875656, "learning_rate": 5.269804066253437e-08, "loss": 0.024, "step": 236890 }, { "epoch": 1.9168217493324704, "grad_norm": 0.3186284005641937, "learning_rate": 5.259584456120237e-08, "loss": 0.015, "step": 236900 }, { "epoch": 1.9169026620276721, "grad_norm": 0.31922662258148193, "learning_rate": 5.2493747125401675e-08, "loss": 0.0082, "step": 236910 }, { "epoch": 1.916983574722874, "grad_norm": 0.2992717921733856, "learning_rate": 5.239174835716787e-08, "loss": 0.0107, "step": 236920 }, { "epoch": 1.917064487418076, "grad_norm": 0.3097183108329773, "learning_rate": 5.2289848258536e-08, "loss": 0.0235, "step": 236930 }, { "epoch": 1.9171454001132777, "grad_norm": 0.8651483058929443, "learning_rate": 5.218804683153833e-08, "loss": 0.0161, "step": 236940 }, { "epoch": 1.9172263128084797, "grad_norm": 0.329456090927124, "learning_rate": 5.208634407820434e-08, "loss": 0.0246, "step": 236950 }, { "epoch": 1.9173072255036816, "grad_norm": 0.49055275321006775, "learning_rate": 5.1984740000562974e-08, "loss": 0.0128, "step": 236960 }, { "epoch": 1.9173881381988833, "grad_norm": 0.4549384117126465, "learning_rate": 5.188323460064038e-08, "loss": 0.0163, "step": 236970 }, { "epoch": 1.9174690508940853, "grad_norm": 0.0018996167927980423, "learning_rate": 5.178182788046049e-08, "loss": 0.0206, "step": 236980 }, { "epoch": 1.9175499635892872, "grad_norm": 0.4342767894268036, "learning_rate": 5.168051984204614e-08, "loss": 0.0218, "step": 236990 }, { "epoch": 1.917630876284489, "grad_norm": 0.22745990753173828, "learning_rate": 5.1579310487417375e-08, "loss": 0.0119, "step": 237000 }, { "epoch": 1.917711788979691, "grad_norm": 0.333636999130249, "learning_rate": 5.147819981859259e-08, "loss": 0.0152, "step": 237010 }, { "epoch": 1.9177927016748928, "grad_norm": 0.17838498950004578, "learning_rate": 5.137718783758849e-08, "loss": 0.0274, "step": 237020 }, { "epoch": 1.9178736143700945, "grad_norm": 0.036207906901836395, "learning_rate": 5.127627454641904e-08, "loss": 0.0097, "step": 237030 }, { "epoch": 1.9179545270652967, "grad_norm": 0.44409066438674927, "learning_rate": 5.1175459947097605e-08, "loss": 0.0149, "step": 237040 }, { "epoch": 1.9180354397604984, "grad_norm": 0.3294028341770172, "learning_rate": 5.1074744041633704e-08, "loss": 0.0173, "step": 237050 }, { "epoch": 1.9181163524557003, "grad_norm": 0.3449544906616211, "learning_rate": 5.0974126832036286e-08, "loss": 0.0154, "step": 237060 }, { "epoch": 1.9181972651509023, "grad_norm": 0.0059366123750805855, "learning_rate": 5.0873608320312626e-08, "loss": 0.016, "step": 237070 }, { "epoch": 1.918278177846104, "grad_norm": 0.1663852334022522, "learning_rate": 5.0773188508466686e-08, "loss": 0.0116, "step": 237080 }, { "epoch": 1.918359090541306, "grad_norm": 0.4551653563976288, "learning_rate": 5.0672867398500746e-08, "loss": 0.0156, "step": 237090 }, { "epoch": 1.9184400032365079, "grad_norm": 0.21863088011741638, "learning_rate": 5.057264499241654e-08, "loss": 0.0184, "step": 237100 }, { "epoch": 1.9185209159317096, "grad_norm": 0.3115006983280182, "learning_rate": 5.0472521292211366e-08, "loss": 0.0142, "step": 237110 }, { "epoch": 1.9186018286269115, "grad_norm": 0.19990062713623047, "learning_rate": 5.037249629988361e-08, "loss": 0.0193, "step": 237120 }, { "epoch": 1.9186827413221135, "grad_norm": 0.49755433201789856, "learning_rate": 5.027257001742669e-08, "loss": 0.0203, "step": 237130 }, { "epoch": 1.9187636540173152, "grad_norm": 0.29361432790756226, "learning_rate": 5.017274244683457e-08, "loss": 0.0208, "step": 237140 }, { "epoch": 1.9188445667125174, "grad_norm": 0.3123808205127716, "learning_rate": 5.007301359009731e-08, "loss": 0.0203, "step": 237150 }, { "epoch": 1.918925479407719, "grad_norm": 0.28412482142448425, "learning_rate": 4.9973383449204435e-08, "loss": 0.0071, "step": 237160 }, { "epoch": 1.9190063921029208, "grad_norm": 0.5854151248931885, "learning_rate": 4.987385202614214e-08, "loss": 0.019, "step": 237170 }, { "epoch": 1.919087304798123, "grad_norm": 0.5287166833877563, "learning_rate": 4.977441932289551e-08, "loss": 0.0193, "step": 237180 }, { "epoch": 1.9191682174933247, "grad_norm": 0.5447837114334106, "learning_rate": 4.967508534144794e-08, "loss": 0.0111, "step": 237190 }, { "epoch": 1.9192491301885266, "grad_norm": 0.43534576892852783, "learning_rate": 4.9575850083780074e-08, "loss": 0.016, "step": 237200 }, { "epoch": 1.9193300428837285, "grad_norm": 0.5987301468849182, "learning_rate": 4.9476713551870895e-08, "loss": 0.0167, "step": 237210 }, { "epoch": 1.9194109555789303, "grad_norm": 0.22420287132263184, "learning_rate": 4.937767574769825e-08, "loss": 0.0193, "step": 237220 }, { "epoch": 1.9194918682741322, "grad_norm": 0.3860211670398712, "learning_rate": 4.927873667323613e-08, "loss": 0.0208, "step": 237230 }, { "epoch": 1.9195727809693341, "grad_norm": 0.4512924253940582, "learning_rate": 4.917989633045794e-08, "loss": 0.0199, "step": 237240 }, { "epoch": 1.9196536936645359, "grad_norm": 0.36028581857681274, "learning_rate": 4.908115472133546e-08, "loss": 0.0192, "step": 237250 }, { "epoch": 1.9197346063597378, "grad_norm": 0.4433028995990753, "learning_rate": 4.8982511847837644e-08, "loss": 0.0149, "step": 237260 }, { "epoch": 1.9198155190549397, "grad_norm": 0.3154814541339874, "learning_rate": 4.888396771193127e-08, "loss": 0.0214, "step": 237270 }, { "epoch": 1.9198964317501415, "grad_norm": 0.27490168809890747, "learning_rate": 4.878552231558198e-08, "loss": 0.016, "step": 237280 }, { "epoch": 1.9199773444453436, "grad_norm": 0.3300171494483948, "learning_rate": 4.868717566075321e-08, "loss": 0.021, "step": 237290 }, { "epoch": 1.9200582571405453, "grad_norm": 0.3642425835132599, "learning_rate": 4.85889277494056e-08, "loss": 0.0125, "step": 237300 }, { "epoch": 1.9201391698357473, "grad_norm": 0.2283235341310501, "learning_rate": 4.849077858349927e-08, "loss": 0.0245, "step": 237310 }, { "epoch": 1.9202200825309492, "grad_norm": 0.46860653162002563, "learning_rate": 4.839272816499097e-08, "loss": 0.0149, "step": 237320 }, { "epoch": 1.920300995226151, "grad_norm": 0.12289007008075714, "learning_rate": 4.8294776495836357e-08, "loss": 0.0142, "step": 237330 }, { "epoch": 1.9203819079213529, "grad_norm": 0.45642536878585815, "learning_rate": 4.819692357798944e-08, "loss": 0.0267, "step": 237340 }, { "epoch": 1.9204628206165548, "grad_norm": 0.3879375755786896, "learning_rate": 4.809916941340087e-08, "loss": 0.0265, "step": 237350 }, { "epoch": 1.9205437333117565, "grad_norm": 0.6468203663825989, "learning_rate": 4.80015140040202e-08, "loss": 0.0232, "step": 237360 }, { "epoch": 1.9206246460069585, "grad_norm": 0.21435163915157318, "learning_rate": 4.7903957351795315e-08, "loss": 0.0093, "step": 237370 }, { "epoch": 1.9207055587021604, "grad_norm": 0.22456669807434082, "learning_rate": 4.780649945867188e-08, "loss": 0.0266, "step": 237380 }, { "epoch": 1.9207864713973621, "grad_norm": 0.36505594849586487, "learning_rate": 4.770914032659335e-08, "loss": 0.0179, "step": 237390 }, { "epoch": 1.920867384092564, "grad_norm": 0.4845196008682251, "learning_rate": 4.761187995750094e-08, "loss": 0.0159, "step": 237400 }, { "epoch": 1.920948296787766, "grad_norm": 1.0177770853042603, "learning_rate": 4.7514718353334766e-08, "loss": 0.0202, "step": 237410 }, { "epoch": 1.9210292094829677, "grad_norm": 0.6335923075675964, "learning_rate": 4.741765551603272e-08, "loss": 0.0175, "step": 237420 }, { "epoch": 1.92111012217817, "grad_norm": 0.38761448860168457, "learning_rate": 4.732069144752993e-08, "loss": 0.0131, "step": 237430 }, { "epoch": 1.9211910348733716, "grad_norm": 0.3449862599372864, "learning_rate": 4.722382614975984e-08, "loss": 0.0155, "step": 237440 }, { "epoch": 1.9212719475685736, "grad_norm": 0.10944215953350067, "learning_rate": 4.712705962465536e-08, "loss": 0.0162, "step": 237450 }, { "epoch": 1.9213528602637755, "grad_norm": 0.09156042337417603, "learning_rate": 4.7030391874145486e-08, "loss": 0.0167, "step": 237460 }, { "epoch": 1.9214337729589772, "grad_norm": 0.5718046426773071, "learning_rate": 4.693382290015813e-08, "loss": 0.0256, "step": 237470 }, { "epoch": 1.9215146856541792, "grad_norm": 0.05960233882069588, "learning_rate": 4.6837352704619535e-08, "loss": 0.0201, "step": 237480 }, { "epoch": 1.921595598349381, "grad_norm": 0.439441442489624, "learning_rate": 4.67409812894537e-08, "loss": 0.0183, "step": 237490 }, { "epoch": 1.9216765110445828, "grad_norm": 0.3401564359664917, "learning_rate": 4.6644708656581326e-08, "loss": 0.0169, "step": 237500 }, { "epoch": 1.9217574237397848, "grad_norm": 0.45661160349845886, "learning_rate": 4.654853480792365e-08, "loss": 0.0205, "step": 237510 }, { "epoch": 1.9218383364349867, "grad_norm": 0.491720974445343, "learning_rate": 4.6452459745398584e-08, "loss": 0.011, "step": 237520 }, { "epoch": 1.9219192491301884, "grad_norm": 0.6335533261299133, "learning_rate": 4.6356483470921255e-08, "loss": 0.0122, "step": 237530 }, { "epoch": 1.9220001618253904, "grad_norm": 0.2972162663936615, "learning_rate": 4.62606059864068e-08, "loss": 0.0179, "step": 237540 }, { "epoch": 1.9220810745205923, "grad_norm": 0.4062803089618683, "learning_rate": 4.6164827293766476e-08, "loss": 0.0131, "step": 237550 }, { "epoch": 1.922161987215794, "grad_norm": 0.3034387230873108, "learning_rate": 4.606914739491042e-08, "loss": 0.008, "step": 237560 }, { "epoch": 1.9222428999109962, "grad_norm": 0.7156654000282288, "learning_rate": 4.59735662917471e-08, "loss": 0.0283, "step": 237570 }, { "epoch": 1.922323812606198, "grad_norm": 1.3937699794769287, "learning_rate": 4.5878083986182764e-08, "loss": 0.0334, "step": 237580 }, { "epoch": 1.9224047253013998, "grad_norm": 0.3644533157348633, "learning_rate": 4.578270048012145e-08, "loss": 0.0166, "step": 237590 }, { "epoch": 1.9224856379966018, "grad_norm": 0.5532845258712769, "learning_rate": 4.568741577546554e-08, "loss": 0.0172, "step": 237600 }, { "epoch": 1.9225665506918035, "grad_norm": 0.46336543560028076, "learning_rate": 4.55922298741146e-08, "loss": 0.0332, "step": 237610 }, { "epoch": 1.9226474633870054, "grad_norm": 0.4556630253791809, "learning_rate": 4.5497142777967687e-08, "loss": 0.0216, "step": 237620 }, { "epoch": 1.9227283760822074, "grad_norm": 0.534515917301178, "learning_rate": 4.5402154488921044e-08, "loss": 0.0139, "step": 237630 }, { "epoch": 1.922809288777409, "grad_norm": 0.14186809957027435, "learning_rate": 4.530726500886873e-08, "loss": 0.0195, "step": 237640 }, { "epoch": 1.922890201472611, "grad_norm": 0.5324267148971558, "learning_rate": 4.5212474339703106e-08, "loss": 0.0147, "step": 237650 }, { "epoch": 1.922971114167813, "grad_norm": 0.20648112893104553, "learning_rate": 4.511778248331433e-08, "loss": 0.0182, "step": 237660 }, { "epoch": 1.9230520268630147, "grad_norm": 0.5943349003791809, "learning_rate": 4.502318944159145e-08, "loss": 0.0204, "step": 237670 }, { "epoch": 1.9231329395582168, "grad_norm": 0.5157280564308167, "learning_rate": 4.492869521642074e-08, "loss": 0.0152, "step": 237680 }, { "epoch": 1.9232138522534186, "grad_norm": 0.5156855583190918, "learning_rate": 4.4834299809686235e-08, "loss": 0.0202, "step": 237690 }, { "epoch": 1.9232947649486203, "grad_norm": 0.3352922201156616, "learning_rate": 4.474000322327143e-08, "loss": 0.0145, "step": 237700 }, { "epoch": 1.9233756776438224, "grad_norm": 0.49886858463287354, "learning_rate": 4.464580545905595e-08, "loss": 0.0162, "step": 237710 }, { "epoch": 1.9234565903390242, "grad_norm": 0.20655347406864166, "learning_rate": 4.455170651891827e-08, "loss": 0.0171, "step": 237720 }, { "epoch": 1.923537503034226, "grad_norm": 0.2139090597629547, "learning_rate": 4.445770640473579e-08, "loss": 0.0165, "step": 237730 }, { "epoch": 1.923618415729428, "grad_norm": 0.48451757431030273, "learning_rate": 4.436380511838256e-08, "loss": 0.0192, "step": 237740 }, { "epoch": 1.9236993284246298, "grad_norm": 0.23056663572788239, "learning_rate": 4.427000266173154e-08, "loss": 0.0158, "step": 237750 }, { "epoch": 1.9237802411198317, "grad_norm": 0.1657140702009201, "learning_rate": 4.4176299036653436e-08, "loss": 0.0136, "step": 237760 }, { "epoch": 1.9238611538150336, "grad_norm": 0.11132945865392685, "learning_rate": 4.408269424501677e-08, "loss": 0.0164, "step": 237770 }, { "epoch": 1.9239420665102354, "grad_norm": 0.24178054928779602, "learning_rate": 4.398918828868837e-08, "loss": 0.0161, "step": 237780 }, { "epoch": 1.9240229792054373, "grad_norm": 0.4194687604904175, "learning_rate": 4.389578116953286e-08, "loss": 0.0143, "step": 237790 }, { "epoch": 1.9241038919006392, "grad_norm": 0.008165040984749794, "learning_rate": 4.38024728894132e-08, "loss": 0.0208, "step": 237800 }, { "epoch": 1.924184804595841, "grad_norm": 0.312531441450119, "learning_rate": 4.370926345019011e-08, "loss": 0.0129, "step": 237810 }, { "epoch": 1.9242657172910431, "grad_norm": 0.34313225746154785, "learning_rate": 4.361615285372267e-08, "loss": 0.0139, "step": 237820 }, { "epoch": 1.9243466299862448, "grad_norm": 0.2960604429244995, "learning_rate": 4.352314110186773e-08, "loss": 0.0215, "step": 237830 }, { "epoch": 1.9244275426814466, "grad_norm": 0.480852335691452, "learning_rate": 4.343022819648046e-08, "loss": 0.0195, "step": 237840 }, { "epoch": 1.9245084553766487, "grad_norm": 0.4400949776172638, "learning_rate": 4.333741413941273e-08, "loss": 0.0127, "step": 237850 }, { "epoch": 1.9245893680718504, "grad_norm": 0.2945898175239563, "learning_rate": 4.3244698932516926e-08, "loss": 0.0171, "step": 237860 }, { "epoch": 1.9246702807670524, "grad_norm": 0.3651193380355835, "learning_rate": 4.3152082577641584e-08, "loss": 0.0225, "step": 237870 }, { "epoch": 1.9247511934622543, "grad_norm": 0.32880258560180664, "learning_rate": 4.3059565076633e-08, "loss": 0.0155, "step": 237880 }, { "epoch": 1.924832106157456, "grad_norm": 0.611109733581543, "learning_rate": 4.296714643133693e-08, "loss": 0.0284, "step": 237890 }, { "epoch": 1.924913018852658, "grad_norm": 0.5905559062957764, "learning_rate": 4.287482664359688e-08, "loss": 0.0157, "step": 237900 }, { "epoch": 1.92499393154786, "grad_norm": 0.46417737007141113, "learning_rate": 4.2782605715253053e-08, "loss": 0.0121, "step": 237910 }, { "epoch": 1.9250748442430616, "grad_norm": 0.20880310237407684, "learning_rate": 4.26904836481451e-08, "loss": 0.009, "step": 237920 }, { "epoch": 1.9251557569382636, "grad_norm": 0.5476949214935303, "learning_rate": 4.2598460444109866e-08, "loss": 0.018, "step": 237930 }, { "epoch": 1.9252366696334655, "grad_norm": 0.32791656255722046, "learning_rate": 4.250653610498312e-08, "loss": 0.0106, "step": 237940 }, { "epoch": 1.9253175823286672, "grad_norm": 0.17634724080562592, "learning_rate": 4.2414710632597835e-08, "loss": 0.0159, "step": 237950 }, { "epoch": 1.9253984950238694, "grad_norm": 0.2602293789386749, "learning_rate": 4.232298402878476e-08, "loss": 0.0131, "step": 237960 }, { "epoch": 1.9254794077190711, "grad_norm": 0.6432095170021057, "learning_rate": 4.223135629537412e-08, "loss": 0.023, "step": 237970 }, { "epoch": 1.925560320414273, "grad_norm": 0.4120767116546631, "learning_rate": 4.213982743419276e-08, "loss": 0.0191, "step": 237980 }, { "epoch": 1.925641233109475, "grad_norm": 0.6066969037055969, "learning_rate": 4.20483974470659e-08, "loss": 0.0158, "step": 237990 }, { "epoch": 1.9257221458046767, "grad_norm": 0.37601226568222046, "learning_rate": 4.195706633581709e-08, "loss": 0.0176, "step": 238000 }, { "epoch": 1.9258030584998787, "grad_norm": 0.6184188723564148, "learning_rate": 4.1865834102267634e-08, "loss": 0.0161, "step": 238010 }, { "epoch": 1.9258839711950806, "grad_norm": 0.39895060658454895, "learning_rate": 4.177470074823664e-08, "loss": 0.0394, "step": 238020 }, { "epoch": 1.9259648838902823, "grad_norm": 0.023401383310556412, "learning_rate": 4.1683666275542654e-08, "loss": 0.0139, "step": 238030 }, { "epoch": 1.9260457965854842, "grad_norm": 0.5290699005126953, "learning_rate": 4.159273068599978e-08, "loss": 0.0227, "step": 238040 }, { "epoch": 1.9261267092806862, "grad_norm": 0.34053683280944824, "learning_rate": 4.150189398142268e-08, "loss": 0.0143, "step": 238050 }, { "epoch": 1.926207621975888, "grad_norm": 0.37422648072242737, "learning_rate": 4.141115616362268e-08, "loss": 0.0147, "step": 238060 }, { "epoch": 1.9262885346710898, "grad_norm": 0.4515268802642822, "learning_rate": 4.132051723440833e-08, "loss": 0.0242, "step": 238070 }, { "epoch": 1.9263694473662918, "grad_norm": 0.3306364119052887, "learning_rate": 4.1229977195588744e-08, "loss": 0.0175, "step": 238080 }, { "epoch": 1.9264503600614935, "grad_norm": 0.3161409795284271, "learning_rate": 4.113953604896859e-08, "loss": 0.0167, "step": 238090 }, { "epoch": 1.9265312727566957, "grad_norm": 0.31317996978759766, "learning_rate": 4.104919379635197e-08, "loss": 0.022, "step": 238100 }, { "epoch": 1.9266121854518974, "grad_norm": 0.4054119884967804, "learning_rate": 4.095895043953968e-08, "loss": 0.0219, "step": 238110 }, { "epoch": 1.9266930981470993, "grad_norm": 0.34728971123695374, "learning_rate": 4.086880598033305e-08, "loss": 0.0123, "step": 238120 }, { "epoch": 1.9267740108423013, "grad_norm": 0.025573618710041046, "learning_rate": 4.077876042052842e-08, "loss": 0.0122, "step": 238130 }, { "epoch": 1.926854923537503, "grad_norm": 0.2755664885044098, "learning_rate": 4.068881376192213e-08, "loss": 0.0165, "step": 238140 }, { "epoch": 1.926935836232705, "grad_norm": 0.35817283391952515, "learning_rate": 4.059896600630775e-08, "loss": 0.0167, "step": 238150 }, { "epoch": 1.9270167489279069, "grad_norm": 0.42685946822166443, "learning_rate": 4.0509217155477174e-08, "loss": 0.0267, "step": 238160 }, { "epoch": 1.9270976616231086, "grad_norm": 0.3550495505332947, "learning_rate": 4.041956721122065e-08, "loss": 0.0312, "step": 238170 }, { "epoch": 1.9271785743183105, "grad_norm": 0.39010557532310486, "learning_rate": 4.033001617532561e-08, "loss": 0.0234, "step": 238180 }, { "epoch": 1.9272594870135125, "grad_norm": 0.07537063956260681, "learning_rate": 4.024056404957788e-08, "loss": 0.017, "step": 238190 }, { "epoch": 1.9273403997087142, "grad_norm": 0.31234318017959595, "learning_rate": 4.0151210835761567e-08, "loss": 0.0119, "step": 238200 }, { "epoch": 1.9274213124039161, "grad_norm": 0.37528371810913086, "learning_rate": 4.006195653565914e-08, "loss": 0.0131, "step": 238210 }, { "epoch": 1.927502225099118, "grad_norm": 0.3159715533256531, "learning_rate": 3.9972801151049734e-08, "loss": 0.0224, "step": 238220 }, { "epoch": 1.9275831377943198, "grad_norm": 0.05553380027413368, "learning_rate": 3.9883744683711925e-08, "loss": 0.0142, "step": 238230 }, { "epoch": 1.927664050489522, "grad_norm": 0.19132877886295319, "learning_rate": 3.9794787135420956e-08, "loss": 0.0124, "step": 238240 }, { "epoch": 1.9277449631847237, "grad_norm": 0.38519254326820374, "learning_rate": 3.9705928507952075e-08, "loss": 0.0104, "step": 238250 }, { "epoch": 1.9278258758799256, "grad_norm": 0.31549301743507385, "learning_rate": 3.961716880307609e-08, "loss": 0.0289, "step": 238260 }, { "epoch": 1.9279067885751275, "grad_norm": 0.22814278304576874, "learning_rate": 3.9528508022564916e-08, "loss": 0.0161, "step": 238270 }, { "epoch": 1.9279877012703293, "grad_norm": 0.6403940320014954, "learning_rate": 3.943994616818492e-08, "loss": 0.0246, "step": 238280 }, { "epoch": 1.9280686139655312, "grad_norm": 0.2510818541049957, "learning_rate": 3.9351483241703013e-08, "loss": 0.0277, "step": 238290 }, { "epoch": 1.9281495266607331, "grad_norm": 0.5854202508926392, "learning_rate": 3.926311924488391e-08, "loss": 0.0176, "step": 238300 }, { "epoch": 1.9282304393559349, "grad_norm": 0.5118860602378845, "learning_rate": 3.9174854179488966e-08, "loss": 0.0112, "step": 238310 }, { "epoch": 1.9283113520511368, "grad_norm": 0.19169753789901733, "learning_rate": 3.9086688047278445e-08, "loss": 0.0153, "step": 238320 }, { "epoch": 1.9283922647463387, "grad_norm": 0.4622333347797394, "learning_rate": 3.8998620850011494e-08, "loss": 0.016, "step": 238330 }, { "epoch": 1.9284731774415405, "grad_norm": 0.18026882410049438, "learning_rate": 3.891065258944338e-08, "loss": 0.0201, "step": 238340 }, { "epoch": 1.9285540901367426, "grad_norm": 0.5824918746948242, "learning_rate": 3.882278326732936e-08, "loss": 0.019, "step": 238350 }, { "epoch": 1.9286350028319443, "grad_norm": 0.27187371253967285, "learning_rate": 3.873501288542137e-08, "loss": 0.0088, "step": 238360 }, { "epoch": 1.928715915527146, "grad_norm": 0.29926061630249023, "learning_rate": 3.864734144547022e-08, "loss": 0.0199, "step": 238370 }, { "epoch": 1.9287968282223482, "grad_norm": 0.35378044843673706, "learning_rate": 3.855976894922342e-08, "loss": 0.0165, "step": 238380 }, { "epoch": 1.92887774091755, "grad_norm": 0.3449859917163849, "learning_rate": 3.847229539842845e-08, "loss": 0.0172, "step": 238390 }, { "epoch": 1.9289586536127519, "grad_norm": 0.368833065032959, "learning_rate": 3.8384920794828365e-08, "loss": 0.0194, "step": 238400 }, { "epoch": 1.9290395663079538, "grad_norm": 0.4530235528945923, "learning_rate": 3.829764514016787e-08, "loss": 0.0111, "step": 238410 }, { "epoch": 1.9291204790031555, "grad_norm": 1.0058780908584595, "learning_rate": 3.821046843618559e-08, "loss": 0.0155, "step": 238420 }, { "epoch": 1.9292013916983575, "grad_norm": 0.25398313999176025, "learning_rate": 3.812339068462012e-08, "loss": 0.0122, "step": 238430 }, { "epoch": 1.9292823043935594, "grad_norm": 0.234984889626503, "learning_rate": 3.8036411887209526e-08, "loss": 0.0261, "step": 238440 }, { "epoch": 1.9293632170887611, "grad_norm": 0.5805809497833252, "learning_rate": 3.794953204568741e-08, "loss": 0.0157, "step": 238450 }, { "epoch": 1.929444129783963, "grad_norm": 0.13033060729503632, "learning_rate": 3.786275116178628e-08, "loss": 0.0101, "step": 238460 }, { "epoch": 1.929525042479165, "grad_norm": 0.5011539459228516, "learning_rate": 3.777606923723698e-08, "loss": 0.0251, "step": 238470 }, { "epoch": 1.9296059551743667, "grad_norm": 0.49428442120552063, "learning_rate": 3.768948627376812e-08, "loss": 0.0165, "step": 238480 }, { "epoch": 1.929686867869569, "grad_norm": 0.43612122535705566, "learning_rate": 3.76030022731072e-08, "loss": 0.0139, "step": 238490 }, { "epoch": 1.9297677805647706, "grad_norm": 0.12778230011463165, "learning_rate": 3.751661723697786e-08, "loss": 0.02, "step": 238500 }, { "epoch": 1.9298486932599723, "grad_norm": 0.2267606407403946, "learning_rate": 3.743033116710315e-08, "loss": 0.0183, "step": 238510 }, { "epoch": 1.9299296059551745, "grad_norm": 0.44556134939193726, "learning_rate": 3.734414406520448e-08, "loss": 0.0163, "step": 238520 }, { "epoch": 1.9300105186503762, "grad_norm": 0.3902510702610016, "learning_rate": 3.725805593300047e-08, "loss": 0.0125, "step": 238530 }, { "epoch": 1.9300914313455781, "grad_norm": 0.33740249276161194, "learning_rate": 3.7172066772206975e-08, "loss": 0.0119, "step": 238540 }, { "epoch": 1.93017234404078, "grad_norm": 0.5150240659713745, "learning_rate": 3.7086176584539834e-08, "loss": 0.0185, "step": 238550 }, { "epoch": 1.9302532567359818, "grad_norm": 0.814717710018158, "learning_rate": 3.7000385371711575e-08, "loss": 0.0231, "step": 238560 }, { "epoch": 1.9303341694311837, "grad_norm": 0.48431286215782166, "learning_rate": 3.691469313543361e-08, "loss": 0.016, "step": 238570 }, { "epoch": 1.9304150821263857, "grad_norm": 0.256314218044281, "learning_rate": 3.6829099877414563e-08, "loss": 0.0226, "step": 238580 }, { "epoch": 1.9304959948215874, "grad_norm": 0.07282917201519012, "learning_rate": 3.674360559936141e-08, "loss": 0.013, "step": 238590 }, { "epoch": 1.9305769075167893, "grad_norm": 0.10311681032180786, "learning_rate": 3.665821030297889e-08, "loss": 0.013, "step": 238600 }, { "epoch": 1.9306578202119913, "grad_norm": 0.08966629207134247, "learning_rate": 3.657291398997065e-08, "loss": 0.0085, "step": 238610 }, { "epoch": 1.930738732907193, "grad_norm": 1.1641831398010254, "learning_rate": 3.6487716662036985e-08, "loss": 0.0258, "step": 238620 }, { "epoch": 1.9308196456023952, "grad_norm": 0.35873687267303467, "learning_rate": 3.6402618320877656e-08, "loss": 0.012, "step": 238630 }, { "epoch": 1.9309005582975969, "grad_norm": 0.25964081287384033, "learning_rate": 3.6317618968189084e-08, "loss": 0.0137, "step": 238640 }, { "epoch": 1.9309814709927988, "grad_norm": 0.6831008195877075, "learning_rate": 3.623271860566713e-08, "loss": 0.0116, "step": 238650 }, { "epoch": 1.9310623836880008, "grad_norm": 0.607071042060852, "learning_rate": 3.614791723500488e-08, "loss": 0.0204, "step": 238660 }, { "epoch": 1.9311432963832025, "grad_norm": 0.4258560240268707, "learning_rate": 3.6063214857892656e-08, "loss": 0.0142, "step": 238670 }, { "epoch": 1.9312242090784044, "grad_norm": 0.3351978361606598, "learning_rate": 3.5978611476020776e-08, "loss": 0.0201, "step": 238680 }, { "epoch": 1.9313051217736064, "grad_norm": 0.4064534604549408, "learning_rate": 3.589410709107566e-08, "loss": 0.0147, "step": 238690 }, { "epoch": 1.931386034468808, "grad_norm": 0.3952770233154297, "learning_rate": 3.580970170474318e-08, "loss": 0.0188, "step": 238700 }, { "epoch": 1.93146694716401, "grad_norm": 0.27724725008010864, "learning_rate": 3.572539531870589e-08, "loss": 0.0082, "step": 238710 }, { "epoch": 1.931547859859212, "grad_norm": 0.4496099650859833, "learning_rate": 3.564118793464633e-08, "loss": 0.0307, "step": 238720 }, { "epoch": 1.9316287725544137, "grad_norm": 0.355726033449173, "learning_rate": 3.5557079554242034e-08, "loss": 0.0159, "step": 238730 }, { "epoch": 1.9317096852496156, "grad_norm": 0.04642411693930626, "learning_rate": 3.5473070179171674e-08, "loss": 0.0185, "step": 238740 }, { "epoch": 1.9317905979448176, "grad_norm": 0.21211844682693481, "learning_rate": 3.538915981111057e-08, "loss": 0.0128, "step": 238750 }, { "epoch": 1.9318715106400193, "grad_norm": 0.2186059206724167, "learning_rate": 3.530534845173239e-08, "loss": 0.0161, "step": 238760 }, { "epoch": 1.9319524233352214, "grad_norm": 0.33966559171676636, "learning_rate": 3.5221636102707456e-08, "loss": 0.0172, "step": 238770 }, { "epoch": 1.9320333360304232, "grad_norm": 0.11197374761104584, "learning_rate": 3.5138022765706105e-08, "loss": 0.0174, "step": 238780 }, { "epoch": 1.932114248725625, "grad_norm": 0.26928576827049255, "learning_rate": 3.505450844239533e-08, "loss": 0.0149, "step": 238790 }, { "epoch": 1.932195161420827, "grad_norm": 0.3509538769721985, "learning_rate": 3.4971093134441026e-08, "loss": 0.0142, "step": 238800 }, { "epoch": 1.9322760741160288, "grad_norm": 0.31736043095588684, "learning_rate": 3.488777684350686e-08, "loss": 0.0185, "step": 238810 }, { "epoch": 1.9323569868112307, "grad_norm": 0.3568043112754822, "learning_rate": 3.480455957125428e-08, "loss": 0.0157, "step": 238820 }, { "epoch": 1.9324378995064326, "grad_norm": 0.7676675319671631, "learning_rate": 3.472144131934252e-08, "loss": 0.0298, "step": 238830 }, { "epoch": 1.9325188122016344, "grad_norm": 0.2268141657114029, "learning_rate": 3.4638422089429134e-08, "loss": 0.0188, "step": 238840 }, { "epoch": 1.9325997248968363, "grad_norm": 0.15630857646465302, "learning_rate": 3.455550188317058e-08, "loss": 0.0168, "step": 238850 }, { "epoch": 1.9326806375920382, "grad_norm": 0.20637235045433044, "learning_rate": 3.4472680702219984e-08, "loss": 0.0285, "step": 238860 }, { "epoch": 1.93276155028724, "grad_norm": 0.21643221378326416, "learning_rate": 3.43899585482288e-08, "loss": 0.0126, "step": 238870 }, { "epoch": 1.9328424629824421, "grad_norm": 0.5518991351127625, "learning_rate": 3.4307335422847366e-08, "loss": 0.0073, "step": 238880 }, { "epoch": 1.9329233756776438, "grad_norm": 0.43068116903305054, "learning_rate": 3.4224811327722705e-08, "loss": 0.0211, "step": 238890 }, { "epoch": 1.9330042883728455, "grad_norm": 0.8869194984436035, "learning_rate": 3.414238626450073e-08, "loss": 0.0174, "step": 238900 }, { "epoch": 1.9330852010680477, "grad_norm": 1.2219243049621582, "learning_rate": 3.406006023482622e-08, "loss": 0.0268, "step": 238910 }, { "epoch": 1.9331661137632494, "grad_norm": 0.48994824290275574, "learning_rate": 3.397783324033954e-08, "loss": 0.0132, "step": 238920 }, { "epoch": 1.9332470264584514, "grad_norm": 0.47674646973609924, "learning_rate": 3.38957052826816e-08, "loss": 0.0097, "step": 238930 }, { "epoch": 1.9333279391536533, "grad_norm": 0.42412272095680237, "learning_rate": 3.381367636348998e-08, "loss": 0.0145, "step": 238940 }, { "epoch": 1.933408851848855, "grad_norm": 0.6849055290222168, "learning_rate": 3.3731746484400585e-08, "loss": 0.0202, "step": 238950 }, { "epoch": 1.933489764544057, "grad_norm": 0.26940861344337463, "learning_rate": 3.3649915647047115e-08, "loss": 0.0119, "step": 238960 }, { "epoch": 1.933570677239259, "grad_norm": 0.5832215547561646, "learning_rate": 3.35681838530616e-08, "loss": 0.0134, "step": 238970 }, { "epoch": 1.9336515899344606, "grad_norm": 0.24339494109153748, "learning_rate": 3.348655110407384e-08, "loss": 0.0198, "step": 238980 }, { "epoch": 1.9337325026296626, "grad_norm": 0.3044474422931671, "learning_rate": 3.340501740171254e-08, "loss": 0.0164, "step": 238990 }, { "epoch": 1.9338134153248645, "grad_norm": 0.7720367908477783, "learning_rate": 3.332358274760306e-08, "loss": 0.0179, "step": 239000 }, { "epoch": 1.9338943280200662, "grad_norm": 0.2179841250181198, "learning_rate": 3.3242247143369654e-08, "loss": 0.0129, "step": 239010 }, { "epoch": 1.9339752407152684, "grad_norm": 0.1305253654718399, "learning_rate": 3.3161010590633816e-08, "loss": 0.0112, "step": 239020 }, { "epoch": 1.93405615341047, "grad_norm": 0.39300310611724854, "learning_rate": 3.3079873091016455e-08, "loss": 0.017, "step": 239030 }, { "epoch": 1.9341370661056718, "grad_norm": 0.2834547460079193, "learning_rate": 3.2998834646135734e-08, "loss": 0.0173, "step": 239040 }, { "epoch": 1.934217978800874, "grad_norm": 0.21113231778144836, "learning_rate": 3.2917895257607025e-08, "loss": 0.0123, "step": 239050 }, { "epoch": 1.9342988914960757, "grad_norm": 0.19608230888843536, "learning_rate": 3.2837054927045144e-08, "loss": 0.0282, "step": 239060 }, { "epoch": 1.9343798041912776, "grad_norm": 0.1644613891839981, "learning_rate": 3.275631365606213e-08, "loss": 0.0188, "step": 239070 }, { "epoch": 1.9344607168864796, "grad_norm": 0.242248997092247, "learning_rate": 3.2675671446267824e-08, "loss": 0.0179, "step": 239080 }, { "epoch": 1.9345416295816813, "grad_norm": 0.6196688413619995, "learning_rate": 3.259512829927092e-08, "loss": 0.024, "step": 239090 }, { "epoch": 1.9346225422768832, "grad_norm": 0.43032106757164, "learning_rate": 3.251468421667792e-08, "loss": 0.0171, "step": 239100 }, { "epoch": 1.9347034549720852, "grad_norm": 0.36750465631484985, "learning_rate": 3.243433920009198e-08, "loss": 0.0201, "step": 239110 }, { "epoch": 1.934784367667287, "grad_norm": 0.33443745970726013, "learning_rate": 3.235409325111627e-08, "loss": 0.021, "step": 239120 }, { "epoch": 1.9348652803624888, "grad_norm": 0.4155198931694031, "learning_rate": 3.227394637135117e-08, "loss": 0.0181, "step": 239130 }, { "epoch": 1.9349461930576908, "grad_norm": 0.00751157570630312, "learning_rate": 3.2193898562394855e-08, "loss": 0.0124, "step": 239140 }, { "epoch": 1.9350271057528925, "grad_norm": 0.30203360319137573, "learning_rate": 3.211394982584437e-08, "loss": 0.0154, "step": 239150 }, { "epoch": 1.9351080184480947, "grad_norm": 0.45487868785858154, "learning_rate": 3.203410016329289e-08, "loss": 0.01, "step": 239160 }, { "epoch": 1.9351889311432964, "grad_norm": 0.15673330426216125, "learning_rate": 3.1954349576333034e-08, "loss": 0.0253, "step": 239170 }, { "epoch": 1.935269843838498, "grad_norm": 0.4425937235355377, "learning_rate": 3.18746980665563e-08, "loss": 0.0184, "step": 239180 }, { "epoch": 1.9353507565337003, "grad_norm": 0.486157089471817, "learning_rate": 3.1795145635550305e-08, "loss": 0.021, "step": 239190 }, { "epoch": 1.935431669228902, "grad_norm": 0.5134220719337463, "learning_rate": 3.1715692284902125e-08, "loss": 0.0135, "step": 239200 }, { "epoch": 1.935512581924104, "grad_norm": 0.2764894366264343, "learning_rate": 3.1636338016195476e-08, "loss": 0.0229, "step": 239210 }, { "epoch": 1.9355934946193059, "grad_norm": 0.32683712244033813, "learning_rate": 3.155708283101355e-08, "loss": 0.0272, "step": 239220 }, { "epoch": 1.9356744073145076, "grad_norm": 0.5441148281097412, "learning_rate": 3.147792673093675e-08, "loss": 0.022, "step": 239230 }, { "epoch": 1.9357553200097095, "grad_norm": 0.47385090589523315, "learning_rate": 3.13988697175438e-08, "loss": 0.0213, "step": 239240 }, { "epoch": 1.9358362327049115, "grad_norm": 0.4108215868473053, "learning_rate": 3.131991179241067e-08, "loss": 0.0094, "step": 239250 }, { "epoch": 1.9359171454001132, "grad_norm": 0.38444268703460693, "learning_rate": 3.124105295711333e-08, "loss": 0.0104, "step": 239260 }, { "epoch": 1.935998058095315, "grad_norm": 0.1454204022884369, "learning_rate": 3.116229321322328e-08, "loss": 0.0144, "step": 239270 }, { "epoch": 1.936078970790517, "grad_norm": 0.25843966007232666, "learning_rate": 3.108363256231151e-08, "loss": 0.018, "step": 239280 }, { "epoch": 1.9361598834857188, "grad_norm": 0.19327309727668762, "learning_rate": 3.1005071005947294e-08, "loss": 0.0239, "step": 239290 }, { "epoch": 1.936240796180921, "grad_norm": 0.3181907534599304, "learning_rate": 3.0926608545696626e-08, "loss": 0.0172, "step": 239300 }, { "epoch": 1.9363217088761226, "grad_norm": 0.3507426381111145, "learning_rate": 3.084824518312435e-08, "loss": 0.0309, "step": 239310 }, { "epoch": 1.9364026215713246, "grad_norm": 0.3256869614124298, "learning_rate": 3.0769980919793664e-08, "loss": 0.022, "step": 239320 }, { "epoch": 1.9364835342665265, "grad_norm": 0.2620086967945099, "learning_rate": 3.069181575726554e-08, "loss": 0.0128, "step": 239330 }, { "epoch": 1.9365644469617282, "grad_norm": 0.008331737481057644, "learning_rate": 3.0613749697098184e-08, "loss": 0.0202, "step": 239340 }, { "epoch": 1.9366453596569302, "grad_norm": 0.5088812708854675, "learning_rate": 3.053578274084867e-08, "loss": 0.0171, "step": 239350 }, { "epoch": 1.9367262723521321, "grad_norm": 0.4658595025539398, "learning_rate": 3.045791489007188e-08, "loss": 0.0231, "step": 239360 }, { "epoch": 1.9368071850473338, "grad_norm": 0.49891388416290283, "learning_rate": 3.038014614632101e-08, "loss": 0.0163, "step": 239370 }, { "epoch": 1.9368880977425358, "grad_norm": 0.37587666511535645, "learning_rate": 3.030247651114648e-08, "loss": 0.0147, "step": 239380 }, { "epoch": 1.9369690104377377, "grad_norm": 0.4152551591396332, "learning_rate": 3.0224905986098175e-08, "loss": 0.0151, "step": 239390 }, { "epoch": 1.9370499231329394, "grad_norm": 0.35980716347694397, "learning_rate": 3.0147434572722066e-08, "loss": 0.0167, "step": 239400 }, { "epoch": 1.9371308358281414, "grad_norm": 0.18950894474983215, "learning_rate": 3.007006227256359e-08, "loss": 0.0126, "step": 239410 }, { "epoch": 1.9372117485233433, "grad_norm": 0.2253396064043045, "learning_rate": 2.999278908716596e-08, "loss": 0.0151, "step": 239420 }, { "epoch": 1.937292661218545, "grad_norm": 0.3445513844490051, "learning_rate": 2.991561501806961e-08, "loss": 0.0173, "step": 239430 }, { "epoch": 1.9373735739137472, "grad_norm": 0.312431275844574, "learning_rate": 2.983854006681386e-08, "loss": 0.0138, "step": 239440 }, { "epoch": 1.937454486608949, "grad_norm": 0.49163618683815, "learning_rate": 2.976156423493637e-08, "loss": 0.0208, "step": 239450 }, { "epoch": 1.9375353993041509, "grad_norm": 0.3962723910808563, "learning_rate": 2.9684687523972024e-08, "loss": 0.0123, "step": 239460 }, { "epoch": 1.9376163119993528, "grad_norm": 0.5566253066062927, "learning_rate": 2.9607909935453483e-08, "loss": 0.0219, "step": 239470 }, { "epoch": 1.9376972246945545, "grad_norm": 0.25407445430755615, "learning_rate": 2.9531231470912304e-08, "loss": 0.0141, "step": 239480 }, { "epoch": 1.9377781373897565, "grad_norm": 0.5001180768013, "learning_rate": 2.945465213187726e-08, "loss": 0.0163, "step": 239490 }, { "epoch": 1.9378590500849584, "grad_norm": 0.25324681401252747, "learning_rate": 2.937817191987602e-08, "loss": 0.0247, "step": 239500 }, { "epoch": 1.9379399627801601, "grad_norm": 0.3473075032234192, "learning_rate": 2.930179083643403e-08, "loss": 0.0167, "step": 239510 }, { "epoch": 1.938020875475362, "grad_norm": 0.5384989380836487, "learning_rate": 2.922550888307396e-08, "loss": 0.0227, "step": 239520 }, { "epoch": 1.938101788170564, "grad_norm": 0.24191288650035858, "learning_rate": 2.9149326061317373e-08, "loss": 0.0273, "step": 239530 }, { "epoch": 1.9381827008657657, "grad_norm": 0.3563058376312256, "learning_rate": 2.9073242372683607e-08, "loss": 0.0187, "step": 239540 }, { "epoch": 1.9382636135609679, "grad_norm": 0.5499951839447021, "learning_rate": 2.8997257818689783e-08, "loss": 0.0257, "step": 239550 }, { "epoch": 1.9383445262561696, "grad_norm": 0.4280029535293579, "learning_rate": 2.8921372400851355e-08, "loss": 0.0155, "step": 239560 }, { "epoch": 1.9384254389513713, "grad_norm": 0.14617782831192017, "learning_rate": 2.884558612068211e-08, "loss": 0.022, "step": 239570 }, { "epoch": 1.9385063516465735, "grad_norm": 0.3022330105304718, "learning_rate": 2.8769898979693066e-08, "loss": 0.0199, "step": 239580 }, { "epoch": 1.9385872643417752, "grad_norm": 0.5461872816085815, "learning_rate": 2.869431097939357e-08, "loss": 0.0241, "step": 239590 }, { "epoch": 1.9386681770369771, "grad_norm": 0.3973209857940674, "learning_rate": 2.8618822121290746e-08, "loss": 0.0136, "step": 239600 }, { "epoch": 1.938749089732179, "grad_norm": 0.09533912688493729, "learning_rate": 2.8543432406890615e-08, "loss": 0.0174, "step": 239610 }, { "epoch": 1.9388300024273808, "grad_norm": 0.3670269250869751, "learning_rate": 2.8468141837696972e-08, "loss": 0.0108, "step": 239620 }, { "epoch": 1.9389109151225827, "grad_norm": 0.21390044689178467, "learning_rate": 2.8392950415210285e-08, "loss": 0.0142, "step": 239630 }, { "epoch": 1.9389918278177847, "grad_norm": 0.4384661912918091, "learning_rate": 2.831785814093102e-08, "loss": 0.0181, "step": 239640 }, { "epoch": 1.9390727405129864, "grad_norm": 0.24261854588985443, "learning_rate": 2.8242865016356312e-08, "loss": 0.0168, "step": 239650 }, { "epoch": 1.9391536532081883, "grad_norm": 0.39185675978660583, "learning_rate": 2.8167971042981633e-08, "loss": 0.0201, "step": 239660 }, { "epoch": 1.9392345659033903, "grad_norm": 0.24358311295509338, "learning_rate": 2.8093176222300788e-08, "loss": 0.0186, "step": 239670 }, { "epoch": 1.939315478598592, "grad_norm": 0.4115478992462158, "learning_rate": 2.8018480555805915e-08, "loss": 0.0262, "step": 239680 }, { "epoch": 1.9393963912937942, "grad_norm": 0.4519733786582947, "learning_rate": 2.7943884044985825e-08, "loss": 0.015, "step": 239690 }, { "epoch": 1.9394773039889959, "grad_norm": 0.21909913420677185, "learning_rate": 2.7869386691328214e-08, "loss": 0.0216, "step": 239700 }, { "epoch": 1.9395582166841976, "grad_norm": 0.7442082762718201, "learning_rate": 2.7794988496319675e-08, "loss": 0.0216, "step": 239710 }, { "epoch": 1.9396391293793998, "grad_norm": 0.3858875334262848, "learning_rate": 2.772068946144235e-08, "loss": 0.0172, "step": 239720 }, { "epoch": 1.9397200420746015, "grad_norm": 0.3189830780029297, "learning_rate": 2.7646489588179505e-08, "loss": 0.0108, "step": 239730 }, { "epoch": 1.9398009547698034, "grad_norm": 0.483359158039093, "learning_rate": 2.7572388878010502e-08, "loss": 0.0226, "step": 239740 }, { "epoch": 1.9398818674650053, "grad_norm": 0.35286417603492737, "learning_rate": 2.7498387332412502e-08, "loss": 0.0128, "step": 239750 }, { "epoch": 1.939962780160207, "grad_norm": 0.404908686876297, "learning_rate": 2.7424484952862097e-08, "loss": 0.0075, "step": 239760 }, { "epoch": 1.940043692855409, "grad_norm": 0.21888776123523712, "learning_rate": 2.735068174083255e-08, "loss": 0.022, "step": 239770 }, { "epoch": 1.940124605550611, "grad_norm": 0.5944998264312744, "learning_rate": 2.7276977697796025e-08, "loss": 0.0167, "step": 239780 }, { "epoch": 1.9402055182458127, "grad_norm": 0.26903024315834045, "learning_rate": 2.7203372825222453e-08, "loss": 0.0237, "step": 239790 }, { "epoch": 1.9402864309410146, "grad_norm": 0.1511240005493164, "learning_rate": 2.7129867124579544e-08, "loss": 0.009, "step": 239800 }, { "epoch": 1.9403673436362165, "grad_norm": 0.473690003156662, "learning_rate": 2.70564605973328e-08, "loss": 0.015, "step": 239810 }, { "epoch": 1.9404482563314183, "grad_norm": 0.6760120391845703, "learning_rate": 2.6983153244947158e-08, "loss": 0.0155, "step": 239820 }, { "epoch": 1.9405291690266204, "grad_norm": 0.6257482767105103, "learning_rate": 2.690994506888367e-08, "loss": 0.0268, "step": 239830 }, { "epoch": 1.9406100817218221, "grad_norm": 0.6652224063873291, "learning_rate": 2.6836836070602835e-08, "loss": 0.0249, "step": 239840 }, { "epoch": 1.940690994417024, "grad_norm": 0.2894134819507599, "learning_rate": 2.676382625156182e-08, "loss": 0.0093, "step": 239850 }, { "epoch": 1.940771907112226, "grad_norm": 0.19473962485790253, "learning_rate": 2.669091561321835e-08, "loss": 0.0191, "step": 239860 }, { "epoch": 1.9408528198074277, "grad_norm": 0.22498472034931183, "learning_rate": 2.661810415702515e-08, "loss": 0.0229, "step": 239870 }, { "epoch": 1.9409337325026297, "grad_norm": 0.29214975237846375, "learning_rate": 2.654539188443439e-08, "loss": 0.0174, "step": 239880 }, { "epoch": 1.9410146451978316, "grad_norm": 0.3100462853908539, "learning_rate": 2.6472778796896025e-08, "loss": 0.023, "step": 239890 }, { "epoch": 1.9410955578930333, "grad_norm": 0.3338352143764496, "learning_rate": 2.640026489585945e-08, "loss": 0.0291, "step": 239900 }, { "epoch": 1.9411764705882353, "grad_norm": 0.3180738389492035, "learning_rate": 2.6327850182769065e-08, "loss": 0.0198, "step": 239910 }, { "epoch": 1.9412573832834372, "grad_norm": 0.6108052134513855, "learning_rate": 2.6255534659069825e-08, "loss": 0.0172, "step": 239920 }, { "epoch": 1.941338295978639, "grad_norm": 0.10585300624370575, "learning_rate": 2.6183318326204464e-08, "loss": 0.0144, "step": 239930 }, { "epoch": 1.9414192086738409, "grad_norm": 0.4436699450016022, "learning_rate": 2.611120118561239e-08, "loss": 0.0207, "step": 239940 }, { "epoch": 1.9415001213690428, "grad_norm": 0.3152073323726654, "learning_rate": 2.603918323873189e-08, "loss": 0.024, "step": 239950 }, { "epoch": 1.9415810340642445, "grad_norm": 0.0006625687819905579, "learning_rate": 2.59672644869996e-08, "loss": 0.0138, "step": 239960 }, { "epoch": 1.9416619467594467, "grad_norm": 0.20755429565906525, "learning_rate": 2.5895444931849368e-08, "loss": 0.0149, "step": 239970 }, { "epoch": 1.9417428594546484, "grad_norm": 0.10818314552307129, "learning_rate": 2.582372457471338e-08, "loss": 0.0142, "step": 239980 }, { "epoch": 1.9418237721498504, "grad_norm": 0.3604543209075928, "learning_rate": 2.5752103417022723e-08, "loss": 0.0221, "step": 239990 }, { "epoch": 1.9419046848450523, "grad_norm": 0.10600405186414719, "learning_rate": 2.5680581460205136e-08, "loss": 0.0162, "step": 240000 }, { "epoch": 1.941985597540254, "grad_norm": 0.24928542971611023, "learning_rate": 2.5609158705687254e-08, "loss": 0.0127, "step": 240010 }, { "epoch": 1.942066510235456, "grad_norm": 0.2541627287864685, "learning_rate": 2.5537835154893498e-08, "loss": 0.0119, "step": 240020 }, { "epoch": 1.942147422930658, "grad_norm": 0.17855753004550934, "learning_rate": 2.5466610809245508e-08, "loss": 0.0138, "step": 240030 }, { "epoch": 1.9422283356258596, "grad_norm": 0.17775803804397583, "learning_rate": 2.5395485670164367e-08, "loss": 0.0139, "step": 240040 }, { "epoch": 1.9423092483210616, "grad_norm": 0.4148862063884735, "learning_rate": 2.5324459739068387e-08, "loss": 0.0232, "step": 240050 }, { "epoch": 1.9423901610162635, "grad_norm": 0.633438766002655, "learning_rate": 2.5253533017374766e-08, "loss": 0.017, "step": 240060 }, { "epoch": 1.9424710737114652, "grad_norm": 0.3842002749443054, "learning_rate": 2.5182705506496265e-08, "loss": 0.0175, "step": 240070 }, { "epoch": 1.9425519864066672, "grad_norm": 0.028100762516260147, "learning_rate": 2.511197720784675e-08, "loss": 0.0102, "step": 240080 }, { "epoch": 1.942632899101869, "grad_norm": 0.5361628532409668, "learning_rate": 2.5041348122836762e-08, "loss": 0.0209, "step": 240090 }, { "epoch": 1.9427138117970708, "grad_norm": 0.3804921507835388, "learning_rate": 2.4970818252874064e-08, "loss": 0.018, "step": 240100 }, { "epoch": 1.942794724492273, "grad_norm": 0.3486155569553375, "learning_rate": 2.490038759936586e-08, "loss": 0.0132, "step": 240110 }, { "epoch": 1.9428756371874747, "grad_norm": 0.39851924777030945, "learning_rate": 2.483005616371659e-08, "loss": 0.0181, "step": 240120 }, { "epoch": 1.9429565498826766, "grad_norm": 0.3524777293205261, "learning_rate": 2.4759823947328455e-08, "loss": 0.0081, "step": 240130 }, { "epoch": 1.9430374625778786, "grad_norm": 0.3711027503013611, "learning_rate": 2.4689690951603117e-08, "loss": 0.0184, "step": 240140 }, { "epoch": 1.9431183752730803, "grad_norm": 0.2105608880519867, "learning_rate": 2.4619657177937794e-08, "loss": 0.0174, "step": 240150 }, { "epoch": 1.9431992879682822, "grad_norm": 0.2893078029155731, "learning_rate": 2.4549722627730256e-08, "loss": 0.0261, "step": 240160 }, { "epoch": 1.9432802006634842, "grad_norm": 0.23917372524738312, "learning_rate": 2.447988730237494e-08, "loss": 0.0159, "step": 240170 }, { "epoch": 1.9433611133586859, "grad_norm": 0.40626633167266846, "learning_rate": 2.4410151203264065e-08, "loss": 0.017, "step": 240180 }, { "epoch": 1.9434420260538878, "grad_norm": 0.24540740251541138, "learning_rate": 2.43405143317893e-08, "loss": 0.0242, "step": 240190 }, { "epoch": 1.9435229387490898, "grad_norm": 0.4475669264793396, "learning_rate": 2.427097668933842e-08, "loss": 0.0171, "step": 240200 }, { "epoch": 1.9436038514442915, "grad_norm": 0.37669309973716736, "learning_rate": 2.4201538277298654e-08, "loss": 0.0164, "step": 240210 }, { "epoch": 1.9436847641394936, "grad_norm": 0.07026585191488266, "learning_rate": 2.4132199097055554e-08, "loss": 0.0262, "step": 240220 }, { "epoch": 1.9437656768346954, "grad_norm": 0.3876537084579468, "learning_rate": 2.4062959149990793e-08, "loss": 0.0133, "step": 240230 }, { "epoch": 1.943846589529897, "grad_norm": 0.26473793387413025, "learning_rate": 2.3993818437485494e-08, "loss": 0.0144, "step": 240240 }, { "epoch": 1.9439275022250992, "grad_norm": 0.8630541563034058, "learning_rate": 2.3924776960919103e-08, "loss": 0.0197, "step": 240250 }, { "epoch": 1.944008414920301, "grad_norm": 0.37704575061798096, "learning_rate": 2.385583472166775e-08, "loss": 0.0289, "step": 240260 }, { "epoch": 1.944089327615503, "grad_norm": 0.2798137664794922, "learning_rate": 2.3786991721106435e-08, "loss": 0.012, "step": 240270 }, { "epoch": 1.9441702403107048, "grad_norm": 0.27213454246520996, "learning_rate": 2.371824796060851e-08, "loss": 0.0166, "step": 240280 }, { "epoch": 1.9442511530059066, "grad_norm": 0.39110878109931946, "learning_rate": 2.3649603441544543e-08, "loss": 0.0165, "step": 240290 }, { "epoch": 1.9443320657011085, "grad_norm": 0.3199186623096466, "learning_rate": 2.3581058165283997e-08, "loss": 0.028, "step": 240300 }, { "epoch": 1.9444129783963104, "grad_norm": 0.48653215169906616, "learning_rate": 2.3512612133194113e-08, "loss": 0.0153, "step": 240310 }, { "epoch": 1.9444938910915122, "grad_norm": 0.28160351514816284, "learning_rate": 2.344426534663824e-08, "loss": 0.0201, "step": 240320 }, { "epoch": 1.944574803786714, "grad_norm": 0.17584660649299622, "learning_rate": 2.33760178069814e-08, "loss": 0.0232, "step": 240330 }, { "epoch": 1.944655716481916, "grad_norm": 0.7190033197402954, "learning_rate": 2.330786951558306e-08, "loss": 0.0257, "step": 240340 }, { "epoch": 1.9447366291771178, "grad_norm": 0.3612607419490814, "learning_rate": 2.3239820473803242e-08, "loss": 0.0135, "step": 240350 }, { "epoch": 1.94481754187232, "grad_norm": 0.38752326369285583, "learning_rate": 2.31718706829992e-08, "loss": 0.03, "step": 240360 }, { "epoch": 1.9448984545675216, "grad_norm": 0.289635568857193, "learning_rate": 2.3104020144525396e-08, "loss": 0.0201, "step": 240370 }, { "epoch": 1.9449793672627234, "grad_norm": 0.3668271005153656, "learning_rate": 2.30362688597352e-08, "loss": 0.017, "step": 240380 }, { "epoch": 1.9450602799579255, "grad_norm": 0.20166918635368347, "learning_rate": 2.296861682997975e-08, "loss": 0.0131, "step": 240390 }, { "epoch": 1.9451411926531272, "grad_norm": 0.20058903098106384, "learning_rate": 2.2901064056607968e-08, "loss": 0.0116, "step": 240400 }, { "epoch": 1.9452221053483292, "grad_norm": 0.4624469578266144, "learning_rate": 2.2833610540967666e-08, "loss": 0.0129, "step": 240410 }, { "epoch": 1.9453030180435311, "grad_norm": 0.34765997529029846, "learning_rate": 2.2766256284403874e-08, "loss": 0.0204, "step": 240420 }, { "epoch": 1.9453839307387328, "grad_norm": 0.1620347946882248, "learning_rate": 2.269900128825886e-08, "loss": 0.0181, "step": 240430 }, { "epoch": 1.9454648434339348, "grad_norm": 0.3218713700771332, "learning_rate": 2.2631845553875986e-08, "loss": 0.0229, "step": 240440 }, { "epoch": 1.9455457561291367, "grad_norm": 0.27391666173934937, "learning_rate": 2.256478908259252e-08, "loss": 0.0162, "step": 240450 }, { "epoch": 1.9456266688243384, "grad_norm": 0.13843894004821777, "learning_rate": 2.249783187574628e-08, "loss": 0.0202, "step": 240460 }, { "epoch": 1.9457075815195404, "grad_norm": 0.314656138420105, "learning_rate": 2.2430973934673417e-08, "loss": 0.0119, "step": 240470 }, { "epoch": 1.9457884942147423, "grad_norm": 0.43488726019859314, "learning_rate": 2.2364215260706203e-08, "loss": 0.0105, "step": 240480 }, { "epoch": 1.945869406909944, "grad_norm": 0.1490616351366043, "learning_rate": 2.22975558551769e-08, "loss": 0.0308, "step": 240490 }, { "epoch": 1.9459503196051462, "grad_norm": 0.4227881133556366, "learning_rate": 2.223099571941445e-08, "loss": 0.0199, "step": 240500 }, { "epoch": 1.946031232300348, "grad_norm": 0.29790201783180237, "learning_rate": 2.2164534854745567e-08, "loss": 0.0139, "step": 240510 }, { "epoch": 1.9461121449955499, "grad_norm": 0.2454429566860199, "learning_rate": 2.209817326249697e-08, "loss": 0.0244, "step": 240520 }, { "epoch": 1.9461930576907518, "grad_norm": 0.02108743228018284, "learning_rate": 2.2031910943991485e-08, "loss": 0.027, "step": 240530 }, { "epoch": 1.9462739703859535, "grad_norm": 0.5551291704177856, "learning_rate": 2.1965747900550284e-08, "loss": 0.0165, "step": 240540 }, { "epoch": 1.9463548830811555, "grad_norm": 0.581621527671814, "learning_rate": 2.1899684133493414e-08, "loss": 0.0112, "step": 240550 }, { "epoch": 1.9464357957763574, "grad_norm": 0.5386511087417603, "learning_rate": 2.183371964413761e-08, "loss": 0.0173, "step": 240560 }, { "epoch": 1.946516708471559, "grad_norm": 0.3430105149745941, "learning_rate": 2.1767854433799586e-08, "loss": 0.009, "step": 240570 }, { "epoch": 1.946597621166761, "grad_norm": 0.36136212944984436, "learning_rate": 2.170208850379163e-08, "loss": 0.017, "step": 240580 }, { "epoch": 1.946678533861963, "grad_norm": 0.2529604732990265, "learning_rate": 2.1636421855426026e-08, "loss": 0.0151, "step": 240590 }, { "epoch": 1.9467594465571647, "grad_norm": 0.22597017884254456, "learning_rate": 2.1570854490012284e-08, "loss": 0.0075, "step": 240600 }, { "epoch": 1.9468403592523666, "grad_norm": 0.33978816866874695, "learning_rate": 2.150538640885824e-08, "loss": 0.0194, "step": 240610 }, { "epoch": 1.9469212719475686, "grad_norm": 0.4761344790458679, "learning_rate": 2.1440017613268415e-08, "loss": 0.0225, "step": 240620 }, { "epoch": 1.9470021846427703, "grad_norm": 0.30090880393981934, "learning_rate": 2.137474810454787e-08, "loss": 0.0178, "step": 240630 }, { "epoch": 1.9470830973379725, "grad_norm": 0.26666927337646484, "learning_rate": 2.130957788399779e-08, "loss": 0.0217, "step": 240640 }, { "epoch": 1.9471640100331742, "grad_norm": 0.35901939868927, "learning_rate": 2.1244506952917132e-08, "loss": 0.0187, "step": 240650 }, { "epoch": 1.9472449227283761, "grad_norm": 0.2696644365787506, "learning_rate": 2.1179535312604304e-08, "loss": 0.0192, "step": 240660 }, { "epoch": 1.947325835423578, "grad_norm": 0.5220447182655334, "learning_rate": 2.1114662964354937e-08, "loss": 0.0147, "step": 240670 }, { "epoch": 1.9474067481187798, "grad_norm": 0.24003034830093384, "learning_rate": 2.1049889909462995e-08, "loss": 0.0136, "step": 240680 }, { "epoch": 1.9474876608139817, "grad_norm": 0.20674802362918854, "learning_rate": 2.098521614921967e-08, "loss": 0.0147, "step": 240690 }, { "epoch": 1.9475685735091837, "grad_norm": 0.02982979454100132, "learning_rate": 2.0920641684915034e-08, "loss": 0.0177, "step": 240700 }, { "epoch": 1.9476494862043854, "grad_norm": 0.506695032119751, "learning_rate": 2.0856166517836952e-08, "loss": 0.0147, "step": 240710 }, { "epoch": 1.9477303988995873, "grad_norm": 0.07123066484928131, "learning_rate": 2.0791790649271613e-08, "loss": 0.0178, "step": 240720 }, { "epoch": 1.9478113115947893, "grad_norm": 0.5850152969360352, "learning_rate": 2.0727514080501332e-08, "loss": 0.0134, "step": 240730 }, { "epoch": 1.947892224289991, "grad_norm": 0.45994529128074646, "learning_rate": 2.0663336812810076e-08, "loss": 0.0239, "step": 240740 }, { "epoch": 1.9479731369851931, "grad_norm": 0.013085994869470596, "learning_rate": 2.0599258847476267e-08, "loss": 0.0188, "step": 240750 }, { "epoch": 1.9480540496803949, "grad_norm": 0.4003070294857025, "learning_rate": 2.0535280185778328e-08, "loss": 0.0273, "step": 240760 }, { "epoch": 1.9481349623755966, "grad_norm": 0.25957566499710083, "learning_rate": 2.047140082899246e-08, "loss": 0.0184, "step": 240770 }, { "epoch": 1.9482158750707987, "grad_norm": 0.4937014877796173, "learning_rate": 2.0407620778392088e-08, "loss": 0.0214, "step": 240780 }, { "epoch": 1.9482967877660005, "grad_norm": 0.3538829982280731, "learning_rate": 2.0343940035248976e-08, "loss": 0.0147, "step": 240790 }, { "epoch": 1.9483777004612024, "grad_norm": 0.3513084053993225, "learning_rate": 2.028035860083377e-08, "loss": 0.0168, "step": 240800 }, { "epoch": 1.9484586131564043, "grad_norm": 0.8416820764541626, "learning_rate": 2.0216876476414348e-08, "loss": 0.0142, "step": 240810 }, { "epoch": 1.948539525851606, "grad_norm": 0.10932853817939758, "learning_rate": 2.015349366325581e-08, "loss": 0.0242, "step": 240820 }, { "epoch": 1.948620438546808, "grad_norm": 0.22081732749938965, "learning_rate": 2.0090210162623802e-08, "loss": 0.0153, "step": 240830 }, { "epoch": 1.94870135124201, "grad_norm": 0.33471208810806274, "learning_rate": 2.0027025975778437e-08, "loss": 0.0232, "step": 240840 }, { "epoch": 1.9487822639372117, "grad_norm": 0.4434165954589844, "learning_rate": 1.9963941103982033e-08, "loss": 0.0225, "step": 240850 }, { "epoch": 1.9488631766324136, "grad_norm": 0.28912657499313354, "learning_rate": 1.9900955548490808e-08, "loss": 0.0234, "step": 240860 }, { "epoch": 1.9489440893276155, "grad_norm": 0.5401239991188049, "learning_rate": 1.9838069310561537e-08, "loss": 0.0231, "step": 240870 }, { "epoch": 1.9490250020228173, "grad_norm": 0.3654966652393341, "learning_rate": 1.977528239144877e-08, "loss": 0.0267, "step": 240880 }, { "epoch": 1.9491059147180194, "grad_norm": 0.8714866042137146, "learning_rate": 1.9712594792404284e-08, "loss": 0.0127, "step": 240890 }, { "epoch": 1.9491868274132211, "grad_norm": 0.4693315625190735, "learning_rate": 1.9650006514677633e-08, "loss": 0.0197, "step": 240900 }, { "epoch": 1.9492677401084229, "grad_norm": 0.4954775273799896, "learning_rate": 1.9587517559518378e-08, "loss": 0.0237, "step": 240910 }, { "epoch": 1.949348652803625, "grad_norm": 0.4107518196105957, "learning_rate": 1.9525127928171628e-08, "loss": 0.0204, "step": 240920 }, { "epoch": 1.9494295654988267, "grad_norm": 0.4896520972251892, "learning_rate": 1.9462837621881946e-08, "loss": 0.0148, "step": 240930 }, { "epoch": 1.9495104781940287, "grad_norm": 0.40416139364242554, "learning_rate": 1.940064664189112e-08, "loss": 0.0211, "step": 240940 }, { "epoch": 1.9495913908892306, "grad_norm": 0.3395027220249176, "learning_rate": 1.9338554989440374e-08, "loss": 0.0164, "step": 240950 }, { "epoch": 1.9496723035844323, "grad_norm": 0.6901800632476807, "learning_rate": 1.9276562665767606e-08, "loss": 0.0152, "step": 240960 }, { "epoch": 1.9497532162796343, "grad_norm": 0.5617108941078186, "learning_rate": 1.9214669672109054e-08, "loss": 0.018, "step": 240970 }, { "epoch": 1.9498341289748362, "grad_norm": 0.4603554606437683, "learning_rate": 1.915287600969873e-08, "loss": 0.0249, "step": 240980 }, { "epoch": 1.949915041670038, "grad_norm": 0.3599540591239929, "learning_rate": 1.9091181679769533e-08, "loss": 0.0211, "step": 240990 }, { "epoch": 1.9499959543652399, "grad_norm": 0.23314543068408966, "learning_rate": 1.9029586683551594e-08, "loss": 0.0151, "step": 241000 }, { "epoch": 1.9500768670604418, "grad_norm": 0.22752012312412262, "learning_rate": 1.8968091022272818e-08, "loss": 0.0105, "step": 241010 }, { "epoch": 1.9501577797556435, "grad_norm": 0.18384838104248047, "learning_rate": 1.8906694697160556e-08, "loss": 0.0135, "step": 241020 }, { "epoch": 1.9502386924508457, "grad_norm": 0.5524950623512268, "learning_rate": 1.8845397709438273e-08, "loss": 0.0152, "step": 241030 }, { "epoch": 1.9503196051460474, "grad_norm": 0.494660884141922, "learning_rate": 1.878420006032888e-08, "loss": 0.0156, "step": 241040 }, { "epoch": 1.9504005178412491, "grad_norm": 0.17913858592510223, "learning_rate": 1.8723101751053074e-08, "loss": 0.0156, "step": 241050 }, { "epoch": 1.9504814305364513, "grad_norm": 0.20124195516109467, "learning_rate": 1.866210278282876e-08, "loss": 0.0222, "step": 241060 }, { "epoch": 1.950562343231653, "grad_norm": 0.21069779992103577, "learning_rate": 1.8601203156873305e-08, "loss": 0.0127, "step": 241070 }, { "epoch": 1.950643255926855, "grad_norm": 0.2854355275630951, "learning_rate": 1.8540402874400176e-08, "loss": 0.0172, "step": 241080 }, { "epoch": 1.9507241686220569, "grad_norm": 0.4951326549053192, "learning_rate": 1.8479701936622297e-08, "loss": 0.0168, "step": 241090 }, { "epoch": 1.9508050813172586, "grad_norm": 0.2432987242937088, "learning_rate": 1.8419100344750364e-08, "loss": 0.0099, "step": 241100 }, { "epoch": 1.9508859940124605, "grad_norm": 0.33891037106513977, "learning_rate": 1.835859809999285e-08, "loss": 0.0115, "step": 241110 }, { "epoch": 1.9509669067076625, "grad_norm": 0.39837637543678284, "learning_rate": 1.8298195203556578e-08, "loss": 0.0203, "step": 241120 }, { "epoch": 1.9510478194028642, "grad_norm": 0.11900696903467178, "learning_rate": 1.8237891656646135e-08, "loss": 0.0167, "step": 241130 }, { "epoch": 1.9511287320980661, "grad_norm": 0.1352246105670929, "learning_rate": 1.8177687460463887e-08, "loss": 0.0134, "step": 241140 }, { "epoch": 1.951209644793268, "grad_norm": 0.39609766006469727, "learning_rate": 1.8117582616209994e-08, "loss": 0.0205, "step": 241150 }, { "epoch": 1.9512905574884698, "grad_norm": 0.19521203637123108, "learning_rate": 1.80575771250846e-08, "loss": 0.0129, "step": 241160 }, { "epoch": 1.951371470183672, "grad_norm": 0.1975374072790146, "learning_rate": 1.799767098828287e-08, "loss": 0.0192, "step": 241170 }, { "epoch": 1.9514523828788737, "grad_norm": 0.3329631984233856, "learning_rate": 1.7937864206999944e-08, "loss": 0.0139, "step": 241180 }, { "epoch": 1.9515332955740756, "grad_norm": 0.15567849576473236, "learning_rate": 1.7878156782429324e-08, "loss": 0.0158, "step": 241190 }, { "epoch": 1.9516142082692776, "grad_norm": 0.14490950107574463, "learning_rate": 1.781854871576061e-08, "loss": 0.0077, "step": 241200 }, { "epoch": 1.9516951209644793, "grad_norm": 0.44286543130874634, "learning_rate": 1.7759040008183404e-08, "loss": 0.016, "step": 241210 }, { "epoch": 1.9517760336596812, "grad_norm": 0.4704393744468689, "learning_rate": 1.7699630660884537e-08, "loss": 0.0134, "step": 241220 }, { "epoch": 1.9518569463548832, "grad_norm": 0.2934139370918274, "learning_rate": 1.7640320675048063e-08, "loss": 0.0168, "step": 241230 }, { "epoch": 1.9519378590500849, "grad_norm": 0.1055600494146347, "learning_rate": 1.758111005185692e-08, "loss": 0.0102, "step": 241240 }, { "epoch": 1.9520187717452868, "grad_norm": 0.2591609060764313, "learning_rate": 1.752199879249239e-08, "loss": 0.0121, "step": 241250 }, { "epoch": 1.9520996844404888, "grad_norm": 0.34249523282051086, "learning_rate": 1.746298689813297e-08, "loss": 0.0172, "step": 241260 }, { "epoch": 1.9521805971356905, "grad_norm": 0.5881128311157227, "learning_rate": 1.7404074369956058e-08, "loss": 0.0182, "step": 241270 }, { "epoch": 1.9522615098308924, "grad_norm": 0.6537858843803406, "learning_rate": 1.734526120913571e-08, "loss": 0.0281, "step": 241280 }, { "epoch": 1.9523424225260944, "grad_norm": 0.3395092785358429, "learning_rate": 1.7286547416845433e-08, "loss": 0.0157, "step": 241290 }, { "epoch": 1.952423335221296, "grad_norm": 0.3252200782299042, "learning_rate": 1.7227932994255958e-08, "loss": 0.0221, "step": 241300 }, { "epoch": 1.9525042479164982, "grad_norm": 0.34715259075164795, "learning_rate": 1.716941794253635e-08, "loss": 0.0362, "step": 241310 }, { "epoch": 1.9525851606117, "grad_norm": 0.41467174887657166, "learning_rate": 1.711100226285345e-08, "loss": 0.0282, "step": 241320 }, { "epoch": 1.952666073306902, "grad_norm": 0.46681174635887146, "learning_rate": 1.705268595637244e-08, "loss": 0.0216, "step": 241330 }, { "epoch": 1.9527469860021038, "grad_norm": 0.4382636547088623, "learning_rate": 1.6994469024255722e-08, "loss": 0.0214, "step": 241340 }, { "epoch": 1.9528278986973056, "grad_norm": 0.37481632828712463, "learning_rate": 1.6936351467665147e-08, "loss": 0.0176, "step": 241350 }, { "epoch": 1.9529088113925075, "grad_norm": 0.38294920325279236, "learning_rate": 1.687833328775923e-08, "loss": 0.0163, "step": 241360 }, { "epoch": 1.9529897240877094, "grad_norm": 0.3078697621822357, "learning_rate": 1.6820414485694824e-08, "loss": 0.029, "step": 241370 }, { "epoch": 1.9530706367829112, "grad_norm": 0.2923288345336914, "learning_rate": 1.6762595062627673e-08, "loss": 0.0223, "step": 241380 }, { "epoch": 1.953151549478113, "grad_norm": 0.399456262588501, "learning_rate": 1.6704875019710188e-08, "loss": 0.012, "step": 241390 }, { "epoch": 1.953232462173315, "grad_norm": 0.284023255109787, "learning_rate": 1.664725435809422e-08, "loss": 0.022, "step": 241400 }, { "epoch": 1.9533133748685167, "grad_norm": 0.3795687258243561, "learning_rate": 1.6589733078928306e-08, "loss": 0.0161, "step": 241410 }, { "epoch": 1.953394287563719, "grad_norm": 0.06483761966228485, "learning_rate": 1.6532311183359295e-08, "loss": 0.0115, "step": 241420 }, { "epoch": 1.9534752002589206, "grad_norm": 0.274600088596344, "learning_rate": 1.6474988672532943e-08, "loss": 0.0145, "step": 241430 }, { "epoch": 1.9535561129541223, "grad_norm": 0.2550327479839325, "learning_rate": 1.6417765547592225e-08, "loss": 0.0131, "step": 241440 }, { "epoch": 1.9536370256493245, "grad_norm": 0.4427853226661682, "learning_rate": 1.6360641809679e-08, "loss": 0.0256, "step": 241450 }, { "epoch": 1.9537179383445262, "grad_norm": 0.3181948959827423, "learning_rate": 1.6303617459931252e-08, "loss": 0.0201, "step": 241460 }, { "epoch": 1.9537988510397282, "grad_norm": 0.09971653670072556, "learning_rate": 1.6246692499486406e-08, "loss": 0.0195, "step": 241470 }, { "epoch": 1.95387976373493, "grad_norm": 0.25444674491882324, "learning_rate": 1.618986692948077e-08, "loss": 0.0093, "step": 241480 }, { "epoch": 1.9539606764301318, "grad_norm": 0.39051979780197144, "learning_rate": 1.613314075104677e-08, "loss": 0.0117, "step": 241490 }, { "epoch": 1.9540415891253338, "grad_norm": 0.29854634404182434, "learning_rate": 1.607651396531573e-08, "loss": 0.0146, "step": 241500 }, { "epoch": 1.9541225018205357, "grad_norm": 0.49830639362335205, "learning_rate": 1.60199865734173e-08, "loss": 0.0297, "step": 241510 }, { "epoch": 1.9542034145157374, "grad_norm": 0.2739136815071106, "learning_rate": 1.5963558576478356e-08, "loss": 0.0179, "step": 241520 }, { "epoch": 1.9542843272109394, "grad_norm": 0.5627134442329407, "learning_rate": 1.590722997562466e-08, "loss": 0.0169, "step": 241530 }, { "epoch": 1.9543652399061413, "grad_norm": 0.5240078568458557, "learning_rate": 1.5851000771979208e-08, "loss": 0.0246, "step": 241540 }, { "epoch": 1.954446152601343, "grad_norm": 0.028608007356524467, "learning_rate": 1.579487096666388e-08, "loss": 0.0187, "step": 241550 }, { "epoch": 1.9545270652965452, "grad_norm": 0.47662103176116943, "learning_rate": 1.5738840560797775e-08, "loss": 0.0269, "step": 241560 }, { "epoch": 1.954607977991747, "grad_norm": 0.3473969101905823, "learning_rate": 1.5682909555497782e-08, "loss": 0.0123, "step": 241570 }, { "epoch": 1.9546888906869486, "grad_norm": 0.2885311245918274, "learning_rate": 1.562707795188023e-08, "loss": 0.0147, "step": 241580 }, { "epoch": 1.9547698033821508, "grad_norm": 0.2859050929546356, "learning_rate": 1.5571345751057565e-08, "loss": 0.0205, "step": 241590 }, { "epoch": 1.9548507160773525, "grad_norm": 0.2902771532535553, "learning_rate": 1.5515712954142224e-08, "loss": 0.0122, "step": 241600 }, { "epoch": 1.9549316287725544, "grad_norm": 0.46800515055656433, "learning_rate": 1.5460179562243326e-08, "loss": 0.0316, "step": 241610 }, { "epoch": 1.9550125414677564, "grad_norm": 0.30003008246421814, "learning_rate": 1.5404745576468316e-08, "loss": 0.0141, "step": 241620 }, { "epoch": 1.955093454162958, "grad_norm": 0.19096598029136658, "learning_rate": 1.534941099792242e-08, "loss": 0.0199, "step": 241630 }, { "epoch": 1.95517436685816, "grad_norm": 0.22624517977237701, "learning_rate": 1.5294175827709757e-08, "loss": 0.0113, "step": 241640 }, { "epoch": 1.955255279553362, "grad_norm": 0.4388100504875183, "learning_rate": 1.523904006693111e-08, "loss": 0.0146, "step": 241650 }, { "epoch": 1.9553361922485637, "grad_norm": 0.3276727795600891, "learning_rate": 1.5184003716686713e-08, "loss": 0.0205, "step": 241660 }, { "epoch": 1.9554171049437656, "grad_norm": 0.3629421889781952, "learning_rate": 1.512906677807402e-08, "loss": 0.0137, "step": 241670 }, { "epoch": 1.9554980176389676, "grad_norm": 0.2163008600473404, "learning_rate": 1.507422925218882e-08, "loss": 0.0094, "step": 241680 }, { "epoch": 1.9555789303341693, "grad_norm": 0.2871609926223755, "learning_rate": 1.5019491140124132e-08, "loss": 0.0232, "step": 241690 }, { "epoch": 1.9556598430293715, "grad_norm": 0.6486411690711975, "learning_rate": 1.496485244297241e-08, "loss": 0.0359, "step": 241700 }, { "epoch": 1.9557407557245732, "grad_norm": 0.6895115971565247, "learning_rate": 1.4910313161822786e-08, "loss": 0.0175, "step": 241710 }, { "epoch": 1.9558216684197751, "grad_norm": 0.35473132133483887, "learning_rate": 1.4855873297762169e-08, "loss": 0.0179, "step": 241720 }, { "epoch": 1.955902581114977, "grad_norm": 0.07044075429439545, "learning_rate": 1.480153285187802e-08, "loss": 0.0146, "step": 241730 }, { "epoch": 1.9559834938101788, "grad_norm": 0.3002687990665436, "learning_rate": 1.4747291825252252e-08, "loss": 0.019, "step": 241740 }, { "epoch": 1.9560644065053807, "grad_norm": 0.5858721733093262, "learning_rate": 1.469315021896789e-08, "loss": 0.014, "step": 241750 }, { "epoch": 1.9561453192005827, "grad_norm": 0.2858774662017822, "learning_rate": 1.4639108034104065e-08, "loss": 0.0185, "step": 241760 }, { "epoch": 1.9562262318957844, "grad_norm": 0.6280713677406311, "learning_rate": 1.458516527173881e-08, "loss": 0.014, "step": 241770 }, { "epoch": 1.9563071445909863, "grad_norm": 0.591415286064148, "learning_rate": 1.4531321932947373e-08, "loss": 0.0217, "step": 241780 }, { "epoch": 1.9563880572861883, "grad_norm": 0.4380642771720886, "learning_rate": 1.4477578018804451e-08, "loss": 0.013, "step": 241790 }, { "epoch": 1.95646896998139, "grad_norm": 0.4473980665206909, "learning_rate": 1.4423933530380851e-08, "loss": 0.0205, "step": 241800 }, { "epoch": 1.956549882676592, "grad_norm": 0.24729861319065094, "learning_rate": 1.4370388468747386e-08, "loss": 0.0204, "step": 241810 }, { "epoch": 1.9566307953717939, "grad_norm": 0.7167255878448486, "learning_rate": 1.4316942834970981e-08, "loss": 0.0091, "step": 241820 }, { "epoch": 1.9567117080669956, "grad_norm": 0.16677649319171906, "learning_rate": 1.4263596630118003e-08, "loss": 0.0115, "step": 241830 }, { "epoch": 1.9567926207621977, "grad_norm": 0.35555756092071533, "learning_rate": 1.4210349855252603e-08, "loss": 0.0281, "step": 241840 }, { "epoch": 1.9568735334573994, "grad_norm": 0.24801643192768097, "learning_rate": 1.4157202511435597e-08, "loss": 0.0218, "step": 241850 }, { "epoch": 1.9569544461526014, "grad_norm": 0.5746963024139404, "learning_rate": 1.4104154599727804e-08, "loss": 0.0225, "step": 241860 }, { "epoch": 1.9570353588478033, "grad_norm": 0.37632817029953003, "learning_rate": 1.4051206121187822e-08, "loss": 0.0178, "step": 241870 }, { "epoch": 1.957116271543005, "grad_norm": 0.2362876534461975, "learning_rate": 1.3998357076869807e-08, "loss": 0.0167, "step": 241880 }, { "epoch": 1.957197184238207, "grad_norm": 1.3045051097869873, "learning_rate": 1.394560746782847e-08, "loss": 0.0249, "step": 241890 }, { "epoch": 1.957278096933409, "grad_norm": 0.39836639165878296, "learning_rate": 1.389295729511686e-08, "loss": 0.0212, "step": 241900 }, { "epoch": 1.9573590096286106, "grad_norm": 0.19037437438964844, "learning_rate": 1.3840406559783581e-08, "loss": 0.0181, "step": 241910 }, { "epoch": 1.9574399223238126, "grad_norm": 0.5872962474822998, "learning_rate": 1.3787955262877239e-08, "loss": 0.0209, "step": 241920 }, { "epoch": 1.9575208350190145, "grad_norm": 0.3606051504611969, "learning_rate": 1.3735603405443665e-08, "loss": 0.0148, "step": 241930 }, { "epoch": 1.9576017477142162, "grad_norm": 0.26011064648628235, "learning_rate": 1.3683350988527022e-08, "loss": 0.0091, "step": 241940 }, { "epoch": 1.9576826604094182, "grad_norm": 0.35355904698371887, "learning_rate": 1.3631198013169256e-08, "loss": 0.0114, "step": 241950 }, { "epoch": 1.9577635731046201, "grad_norm": 0.6712324023246765, "learning_rate": 1.3579144480410644e-08, "loss": 0.0108, "step": 241960 }, { "epoch": 1.9578444857998218, "grad_norm": 0.21064172685146332, "learning_rate": 1.3527190391289246e-08, "loss": 0.0134, "step": 241970 }, { "epoch": 1.957925398495024, "grad_norm": 0.3070618510246277, "learning_rate": 1.3475335746840902e-08, "loss": 0.0078, "step": 241980 }, { "epoch": 1.9580063111902257, "grad_norm": 0.6128497123718262, "learning_rate": 1.3423580548100334e-08, "loss": 0.0313, "step": 241990 }, { "epoch": 1.9580872238854277, "grad_norm": 0.23650714755058289, "learning_rate": 1.3371924796098945e-08, "loss": 0.0182, "step": 242000 }, { "epoch": 1.9581681365806296, "grad_norm": 0.33603355288505554, "learning_rate": 1.3320368491867575e-08, "loss": 0.017, "step": 242010 }, { "epoch": 1.9582490492758313, "grad_norm": 0.40143099427223206, "learning_rate": 1.3268911636433734e-08, "loss": 0.0189, "step": 242020 }, { "epoch": 1.9583299619710333, "grad_norm": 0.31289857625961304, "learning_rate": 1.3217554230824937e-08, "loss": 0.014, "step": 242030 }, { "epoch": 1.9584108746662352, "grad_norm": 0.3800906836986542, "learning_rate": 1.3166296276063695e-08, "loss": 0.0145, "step": 242040 }, { "epoch": 1.958491787361437, "grad_norm": 0.17081420123577118, "learning_rate": 1.3115137773172526e-08, "loss": 0.0328, "step": 242050 }, { "epoch": 1.9585727000566389, "grad_norm": 0.31720447540283203, "learning_rate": 1.306407872317339e-08, "loss": 0.0205, "step": 242060 }, { "epoch": 1.9586536127518408, "grad_norm": 0.6536924839019775, "learning_rate": 1.3013119127082142e-08, "loss": 0.0255, "step": 242070 }, { "epoch": 1.9587345254470425, "grad_norm": 0.46722257137298584, "learning_rate": 1.2962258985916853e-08, "loss": 0.0177, "step": 242080 }, { "epoch": 1.9588154381422447, "grad_norm": 0.35920995473861694, "learning_rate": 1.2911498300691161e-08, "loss": 0.0191, "step": 242090 }, { "epoch": 1.9588963508374464, "grad_norm": 0.2566526532173157, "learning_rate": 1.2860837072417588e-08, "loss": 0.0121, "step": 242100 }, { "epoch": 1.9589772635326481, "grad_norm": 0.37293440103530884, "learning_rate": 1.281027530210588e-08, "loss": 0.0142, "step": 242110 }, { "epoch": 1.9590581762278503, "grad_norm": 0.37390607595443726, "learning_rate": 1.275981299076523e-08, "loss": 0.0193, "step": 242120 }, { "epoch": 1.959139088923052, "grad_norm": 0.11400339007377625, "learning_rate": 1.2709450139400948e-08, "loss": 0.011, "step": 242130 }, { "epoch": 1.959220001618254, "grad_norm": 0.3520996868610382, "learning_rate": 1.2659186749018893e-08, "loss": 0.0122, "step": 242140 }, { "epoch": 1.9593009143134559, "grad_norm": 0.27622780203819275, "learning_rate": 1.2609022820619931e-08, "loss": 0.0139, "step": 242150 }, { "epoch": 1.9593818270086576, "grad_norm": 0.26665163040161133, "learning_rate": 1.2558958355205487e-08, "loss": 0.0171, "step": 242160 }, { "epoch": 1.9594627397038595, "grad_norm": 0.16863802075386047, "learning_rate": 1.2508993353773646e-08, "loss": 0.0191, "step": 242170 }, { "epoch": 1.9595436523990615, "grad_norm": 0.0548846460878849, "learning_rate": 1.2459127817320838e-08, "loss": 0.0113, "step": 242180 }, { "epoch": 1.9596245650942632, "grad_norm": 0.11296168714761734, "learning_rate": 1.2409361746841264e-08, "loss": 0.0121, "step": 242190 }, { "epoch": 1.9597054777894651, "grad_norm": 0.5474591255187988, "learning_rate": 1.2359695143328021e-08, "loss": 0.0241, "step": 242200 }, { "epoch": 1.959786390484667, "grad_norm": 0.4801633358001709, "learning_rate": 1.2310128007771427e-08, "loss": 0.0317, "step": 242210 }, { "epoch": 1.9598673031798688, "grad_norm": 0.23050746321678162, "learning_rate": 1.226066034115958e-08, "loss": 0.0096, "step": 242220 }, { "epoch": 1.959948215875071, "grad_norm": 0.11998666822910309, "learning_rate": 1.221129214447947e-08, "loss": 0.013, "step": 242230 }, { "epoch": 1.9600291285702727, "grad_norm": 0.13518781960010529, "learning_rate": 1.2162023418715308e-08, "loss": 0.0156, "step": 242240 }, { "epoch": 1.9601100412654744, "grad_norm": 0.539334237575531, "learning_rate": 1.2112854164849642e-08, "loss": 0.0193, "step": 242250 }, { "epoch": 1.9601909539606766, "grad_norm": 0.4557611346244812, "learning_rate": 1.2063784383863353e-08, "loss": 0.0182, "step": 242260 }, { "epoch": 1.9602718666558783, "grad_norm": 0.13893026113510132, "learning_rate": 1.2014814076735104e-08, "loss": 0.0186, "step": 242270 }, { "epoch": 1.9603527793510802, "grad_norm": 0.37606775760650635, "learning_rate": 1.1965943244440781e-08, "loss": 0.0075, "step": 242280 }, { "epoch": 1.9604336920462822, "grad_norm": 0.5402942895889282, "learning_rate": 1.1917171887955715e-08, "loss": 0.0116, "step": 242290 }, { "epoch": 1.9605146047414839, "grad_norm": 0.08040431141853333, "learning_rate": 1.186850000825246e-08, "loss": 0.0105, "step": 242300 }, { "epoch": 1.9605955174366858, "grad_norm": 0.3524116277694702, "learning_rate": 1.1819927606301906e-08, "loss": 0.0116, "step": 242310 }, { "epoch": 1.9606764301318877, "grad_norm": 0.629323422908783, "learning_rate": 1.177145468307217e-08, "loss": 0.0206, "step": 242320 }, { "epoch": 1.9607573428270895, "grad_norm": 0.32039833068847656, "learning_rate": 1.1723081239529699e-08, "loss": 0.019, "step": 242330 }, { "epoch": 1.9608382555222914, "grad_norm": 0.4573156535625458, "learning_rate": 1.1674807276640388e-08, "loss": 0.0135, "step": 242340 }, { "epoch": 1.9609191682174933, "grad_norm": 0.9484503269195557, "learning_rate": 1.1626632795365688e-08, "loss": 0.0157, "step": 242350 }, { "epoch": 1.961000080912695, "grad_norm": 0.36227312684059143, "learning_rate": 1.15785577966665e-08, "loss": 0.0161, "step": 242360 }, { "epoch": 1.9610809936078972, "grad_norm": 0.3664003610610962, "learning_rate": 1.1530582281502612e-08, "loss": 0.014, "step": 242370 }, { "epoch": 1.961161906303099, "grad_norm": 0.2744506001472473, "learning_rate": 1.1482706250829923e-08, "loss": 0.0278, "step": 242380 }, { "epoch": 1.9612428189983009, "grad_norm": 0.6430068016052246, "learning_rate": 1.1434929705603782e-08, "loss": 0.0233, "step": 242390 }, { "epoch": 1.9613237316935028, "grad_norm": 0.2551167905330658, "learning_rate": 1.1387252646776204e-08, "loss": 0.0171, "step": 242400 }, { "epoch": 1.9614046443887045, "grad_norm": 0.45298439264297485, "learning_rate": 1.1339675075298651e-08, "loss": 0.0104, "step": 242410 }, { "epoch": 1.9614855570839065, "grad_norm": 0.9308999180793762, "learning_rate": 1.1292196992119809e-08, "loss": 0.0225, "step": 242420 }, { "epoch": 1.9615664697791084, "grad_norm": 0.2646509110927582, "learning_rate": 1.1244818398186141e-08, "loss": 0.0267, "step": 242430 }, { "epoch": 1.9616473824743101, "grad_norm": 0.6539987921714783, "learning_rate": 1.1197539294443005e-08, "loss": 0.0288, "step": 242440 }, { "epoch": 1.961728295169512, "grad_norm": 0.4167581796646118, "learning_rate": 1.1150359681832979e-08, "loss": 0.0236, "step": 242450 }, { "epoch": 1.961809207864714, "grad_norm": 0.3098532557487488, "learning_rate": 1.1103279561296975e-08, "loss": 0.0109, "step": 242460 }, { "epoch": 1.9618901205599157, "grad_norm": 0.19668114185333252, "learning_rate": 1.1056298933774801e-08, "loss": 0.0136, "step": 242470 }, { "epoch": 1.9619710332551177, "grad_norm": 0.5216975808143616, "learning_rate": 1.1009417800201816e-08, "loss": 0.0289, "step": 242480 }, { "epoch": 1.9620519459503196, "grad_norm": 0.47811686992645264, "learning_rate": 1.096263616151394e-08, "loss": 0.0181, "step": 242490 }, { "epoch": 1.9621328586455213, "grad_norm": 0.4868778586387634, "learning_rate": 1.0915954018644315e-08, "loss": 0.0197, "step": 242500 }, { "epoch": 1.9622137713407235, "grad_norm": 0.13693702220916748, "learning_rate": 1.0869371372523307e-08, "loss": 0.0162, "step": 242510 }, { "epoch": 1.9622946840359252, "grad_norm": 0.31108522415161133, "learning_rate": 1.0822888224080175e-08, "loss": 0.0284, "step": 242520 }, { "epoch": 1.9623755967311272, "grad_norm": 0.33741894364356995, "learning_rate": 1.0776504574242508e-08, "loss": 0.0146, "step": 242530 }, { "epoch": 1.962456509426329, "grad_norm": 0.08992381393909454, "learning_rate": 1.0730220423934013e-08, "loss": 0.0226, "step": 242540 }, { "epoch": 1.9625374221215308, "grad_norm": 0.34449073672294617, "learning_rate": 1.0684035774078394e-08, "loss": 0.0142, "step": 242550 }, { "epoch": 1.9626183348167328, "grad_norm": 0.272393137216568, "learning_rate": 1.0637950625597137e-08, "loss": 0.0144, "step": 242560 }, { "epoch": 1.9626992475119347, "grad_norm": 0.3278562128543854, "learning_rate": 1.0591964979408954e-08, "loss": 0.018, "step": 242570 }, { "epoch": 1.9627801602071364, "grad_norm": 0.14268560707569122, "learning_rate": 1.0546078836430329e-08, "loss": 0.0139, "step": 242580 }, { "epoch": 1.9628610729023384, "grad_norm": 0.36587172746658325, "learning_rate": 1.0500292197577756e-08, "loss": 0.0125, "step": 242590 }, { "epoch": 1.9629419855975403, "grad_norm": 0.27562886476516724, "learning_rate": 1.0454605063763279e-08, "loss": 0.0133, "step": 242600 }, { "epoch": 1.963022898292742, "grad_norm": 0.31547316908836365, "learning_rate": 1.0409017435897838e-08, "loss": 0.0152, "step": 242610 }, { "epoch": 1.963103810987944, "grad_norm": 0.5954700112342834, "learning_rate": 1.0363529314891818e-08, "loss": 0.0161, "step": 242620 }, { "epoch": 1.963184723683146, "grad_norm": 0.2279885709285736, "learning_rate": 1.0318140701651159e-08, "loss": 0.0118, "step": 242630 }, { "epoch": 1.9632656363783476, "grad_norm": 0.212217777967453, "learning_rate": 1.02728515970818e-08, "loss": 0.0187, "step": 242640 }, { "epoch": 1.9633465490735498, "grad_norm": 0.29413893818855286, "learning_rate": 1.0227662002085803e-08, "loss": 0.0093, "step": 242650 }, { "epoch": 1.9634274617687515, "grad_norm": 0.25211113691329956, "learning_rate": 1.018257191756633e-08, "loss": 0.011, "step": 242660 }, { "epoch": 1.9635083744639534, "grad_norm": 0.7174833416938782, "learning_rate": 1.0137581344420999e-08, "loss": 0.0238, "step": 242670 }, { "epoch": 1.9635892871591554, "grad_norm": 0.511253833770752, "learning_rate": 1.0092690283547424e-08, "loss": 0.0351, "step": 242680 }, { "epoch": 1.963670199854357, "grad_norm": 0.7468361854553223, "learning_rate": 1.0047898735840999e-08, "loss": 0.0218, "step": 242690 }, { "epoch": 1.963751112549559, "grad_norm": 0.551484227180481, "learning_rate": 1.00032067021949e-08, "loss": 0.0205, "step": 242700 }, { "epoch": 1.963832025244761, "grad_norm": 0.2564946115016937, "learning_rate": 9.958614183500636e-09, "loss": 0.0171, "step": 242710 }, { "epoch": 1.9639129379399627, "grad_norm": 0.37275636196136475, "learning_rate": 9.914121180647495e-09, "loss": 0.0174, "step": 242720 }, { "epoch": 1.9639938506351646, "grad_norm": 0.4957912862300873, "learning_rate": 9.869727694522547e-09, "loss": 0.0177, "step": 242730 }, { "epoch": 1.9640747633303666, "grad_norm": 0.25422918796539307, "learning_rate": 9.82543372601119e-09, "loss": 0.0156, "step": 242740 }, { "epoch": 1.9641556760255683, "grad_norm": 0.5848363041877747, "learning_rate": 9.781239275997168e-09, "loss": 0.026, "step": 242750 }, { "epoch": 1.9642365887207704, "grad_norm": 0.14388668537139893, "learning_rate": 9.737144345361438e-09, "loss": 0.0207, "step": 242760 }, { "epoch": 1.9643175014159722, "grad_norm": 0.6469393968582153, "learning_rate": 9.693148934982744e-09, "loss": 0.0234, "step": 242770 }, { "epoch": 1.9643984141111739, "grad_norm": 0.2885570228099823, "learning_rate": 9.649253045739826e-09, "loss": 0.0196, "step": 242780 }, { "epoch": 1.964479326806376, "grad_norm": 0.2857624888420105, "learning_rate": 9.60545667850754e-09, "loss": 0.0165, "step": 242790 }, { "epoch": 1.9645602395015778, "grad_norm": 0.2842554748058319, "learning_rate": 9.56175983415908e-09, "loss": 0.0297, "step": 242800 }, { "epoch": 1.9646411521967797, "grad_norm": 0.48653265833854675, "learning_rate": 9.518162513565966e-09, "loss": 0.0159, "step": 242810 }, { "epoch": 1.9647220648919816, "grad_norm": 0.369535356760025, "learning_rate": 9.474664717597504e-09, "loss": 0.0189, "step": 242820 }, { "epoch": 1.9648029775871834, "grad_norm": 0.3610207438468933, "learning_rate": 9.43126644712189e-09, "loss": 0.0214, "step": 242830 }, { "epoch": 1.9648838902823853, "grad_norm": 0.7034962177276611, "learning_rate": 9.38796770300343e-09, "loss": 0.0192, "step": 242840 }, { "epoch": 1.9649648029775872, "grad_norm": 0.644182026386261, "learning_rate": 9.344768486106991e-09, "loss": 0.0205, "step": 242850 }, { "epoch": 1.965045715672789, "grad_norm": 0.2538638710975647, "learning_rate": 9.301668797292995e-09, "loss": 0.0137, "step": 242860 }, { "epoch": 1.965126628367991, "grad_norm": 0.4764127731323242, "learning_rate": 9.258668637421863e-09, "loss": 0.0218, "step": 242870 }, { "epoch": 1.9652075410631928, "grad_norm": 0.5787283778190613, "learning_rate": 9.215768007350135e-09, "loss": 0.0122, "step": 242880 }, { "epoch": 1.9652884537583946, "grad_norm": 0.5782214403152466, "learning_rate": 9.172966907934344e-09, "loss": 0.0207, "step": 242890 }, { "epoch": 1.9653693664535967, "grad_norm": 0.20304609835147858, "learning_rate": 9.130265340027144e-09, "loss": 0.0133, "step": 242900 }, { "epoch": 1.9654502791487984, "grad_norm": 0.31677424907684326, "learning_rate": 9.087663304481187e-09, "loss": 0.0332, "step": 242910 }, { "epoch": 1.9655311918440002, "grad_norm": 0.7144857048988342, "learning_rate": 9.045160802144682e-09, "loss": 0.0211, "step": 242920 }, { "epoch": 1.9656121045392023, "grad_norm": 0.42584580183029175, "learning_rate": 9.002757833866948e-09, "loss": 0.0095, "step": 242930 }, { "epoch": 1.965693017234404, "grad_norm": 0.006079687271267176, "learning_rate": 8.960454400492313e-09, "loss": 0.0164, "step": 242940 }, { "epoch": 1.965773929929606, "grad_norm": 0.5207670331001282, "learning_rate": 8.918250502864544e-09, "loss": 0.0239, "step": 242950 }, { "epoch": 1.965854842624808, "grad_norm": 0.16336248815059662, "learning_rate": 8.876146141826303e-09, "loss": 0.0225, "step": 242960 }, { "epoch": 1.9659357553200096, "grad_norm": 0.30399882793426514, "learning_rate": 8.83414131821636e-09, "loss": 0.0134, "step": 242970 }, { "epoch": 1.9660166680152116, "grad_norm": 0.22042298316955566, "learning_rate": 8.79223603287238e-09, "loss": 0.0104, "step": 242980 }, { "epoch": 1.9660975807104135, "grad_norm": 0.21282431483268738, "learning_rate": 8.750430286630363e-09, "loss": 0.0189, "step": 242990 }, { "epoch": 1.9661784934056152, "grad_norm": 0.3259095251560211, "learning_rate": 8.708724080324083e-09, "loss": 0.0227, "step": 243000 }, { "epoch": 1.9662594061008172, "grad_norm": 0.4028984010219574, "learning_rate": 8.667117414785653e-09, "loss": 0.0198, "step": 243010 }, { "epoch": 1.9663403187960191, "grad_norm": 0.4311635494232178, "learning_rate": 8.625610290843856e-09, "loss": 0.0173, "step": 243020 }, { "epoch": 1.9664212314912208, "grad_norm": 0.5179760456085205, "learning_rate": 8.584202709326916e-09, "loss": 0.0183, "step": 243030 }, { "epoch": 1.966502144186423, "grad_norm": 0.5234264731407166, "learning_rate": 8.542894671061398e-09, "loss": 0.0101, "step": 243040 }, { "epoch": 1.9665830568816247, "grad_norm": 0.2638970911502838, "learning_rate": 8.501686176869972e-09, "loss": 0.0171, "step": 243050 }, { "epoch": 1.9666639695768267, "grad_norm": 0.7276357412338257, "learning_rate": 8.460577227574762e-09, "loss": 0.0186, "step": 243060 }, { "epoch": 1.9667448822720286, "grad_norm": 0.2595824897289276, "learning_rate": 8.419567823996222e-09, "loss": 0.0188, "step": 243070 }, { "epoch": 1.9668257949672303, "grad_norm": 0.270929217338562, "learning_rate": 8.378657966951476e-09, "loss": 0.0079, "step": 243080 }, { "epoch": 1.9669067076624323, "grad_norm": 0.2770875096321106, "learning_rate": 8.337847657255982e-09, "loss": 0.0111, "step": 243090 }, { "epoch": 1.9669876203576342, "grad_norm": 0.3576401174068451, "learning_rate": 8.297136895725199e-09, "loss": 0.0182, "step": 243100 }, { "epoch": 1.967068533052836, "grad_norm": 0.02428540773689747, "learning_rate": 8.256525683169591e-09, "loss": 0.0112, "step": 243110 }, { "epoch": 1.9671494457480379, "grad_norm": 0.17753282189369202, "learning_rate": 8.21601402039962e-09, "loss": 0.0149, "step": 243120 }, { "epoch": 1.9672303584432398, "grad_norm": 0.3964169919490814, "learning_rate": 8.175601908222975e-09, "loss": 0.017, "step": 243130 }, { "epoch": 1.9673112711384415, "grad_norm": 0.3196459412574768, "learning_rate": 8.135289347446229e-09, "loss": 0.0145, "step": 243140 }, { "epoch": 1.9673921838336434, "grad_norm": 0.620933473110199, "learning_rate": 8.095076338872076e-09, "loss": 0.0172, "step": 243150 }, { "epoch": 1.9674730965288454, "grad_norm": 0.19491314888000488, "learning_rate": 8.054962883303763e-09, "loss": 0.023, "step": 243160 }, { "epoch": 1.967554009224047, "grad_norm": 0.041920002549886703, "learning_rate": 8.014948981540649e-09, "loss": 0.0212, "step": 243170 }, { "epoch": 1.9676349219192493, "grad_norm": 0.19140009582042694, "learning_rate": 7.975034634380984e-09, "loss": 0.0129, "step": 243180 }, { "epoch": 1.967715834614451, "grad_norm": 0.24645982682704926, "learning_rate": 7.9352198426208e-09, "loss": 0.0203, "step": 243190 }, { "epoch": 1.967796747309653, "grad_norm": 0.21701933443546295, "learning_rate": 7.89550460705335e-09, "loss": 0.0261, "step": 243200 }, { "epoch": 1.9678776600048549, "grad_norm": 0.2558586597442627, "learning_rate": 7.855888928471334e-09, "loss": 0.0164, "step": 243210 }, { "epoch": 1.9679585727000566, "grad_norm": 0.3080761730670929, "learning_rate": 7.81637280766523e-09, "loss": 0.0158, "step": 243220 }, { "epoch": 1.9680394853952585, "grad_norm": 0.3517237901687622, "learning_rate": 7.776956245422185e-09, "loss": 0.0204, "step": 243230 }, { "epoch": 1.9681203980904605, "grad_norm": 0.5244665741920471, "learning_rate": 7.737639242528793e-09, "loss": 0.0176, "step": 243240 }, { "epoch": 1.9682013107856622, "grad_norm": 0.6666042804718018, "learning_rate": 7.698421799768873e-09, "loss": 0.0211, "step": 243250 }, { "epoch": 1.9682822234808641, "grad_norm": 0.36880970001220703, "learning_rate": 7.659303917925131e-09, "loss": 0.0208, "step": 243260 }, { "epoch": 1.968363136176066, "grad_norm": 0.05787171795964241, "learning_rate": 7.6202855977775e-09, "loss": 0.0141, "step": 243270 }, { "epoch": 1.9684440488712678, "grad_norm": 0.32062602043151855, "learning_rate": 7.581366840103132e-09, "loss": 0.0182, "step": 243280 }, { "epoch": 1.96852496156647, "grad_norm": 0.2658701241016388, "learning_rate": 7.542547645679188e-09, "loss": 0.0129, "step": 243290 }, { "epoch": 1.9686058742616717, "grad_norm": 0.3348199725151062, "learning_rate": 7.503828015280046e-09, "loss": 0.0182, "step": 243300 }, { "epoch": 1.9686867869568734, "grad_norm": 0.44806230068206787, "learning_rate": 7.465207949676756e-09, "loss": 0.012, "step": 243310 }, { "epoch": 1.9687676996520755, "grad_norm": 0.30742940306663513, "learning_rate": 7.426687449640368e-09, "loss": 0.0182, "step": 243320 }, { "epoch": 1.9688486123472773, "grad_norm": 0.3247622549533844, "learning_rate": 7.388266515939158e-09, "loss": 0.0249, "step": 243330 }, { "epoch": 1.9689295250424792, "grad_norm": 0.42559579014778137, "learning_rate": 7.349945149338622e-09, "loss": 0.0195, "step": 243340 }, { "epoch": 1.9690104377376811, "grad_norm": 0.2296980917453766, "learning_rate": 7.31172335060426e-09, "loss": 0.0143, "step": 243350 }, { "epoch": 1.9690913504328829, "grad_norm": 0.19556596875190735, "learning_rate": 7.273601120497132e-09, "loss": 0.011, "step": 243360 }, { "epoch": 1.9691722631280848, "grad_norm": 0.17864461243152618, "learning_rate": 7.235578459777737e-09, "loss": 0.0175, "step": 243370 }, { "epoch": 1.9692531758232867, "grad_norm": 0.6794975996017456, "learning_rate": 7.19765536920436e-09, "loss": 0.0158, "step": 243380 }, { "epoch": 1.9693340885184885, "grad_norm": 0.32882118225097656, "learning_rate": 7.159831849534171e-09, "loss": 0.0173, "step": 243390 }, { "epoch": 1.9694150012136904, "grad_norm": 0.0702669620513916, "learning_rate": 7.1221079015204586e-09, "loss": 0.0087, "step": 243400 }, { "epoch": 1.9694959139088923, "grad_norm": 0.3388812839984894, "learning_rate": 7.084483525915953e-09, "loss": 0.0155, "step": 243410 }, { "epoch": 1.969576826604094, "grad_norm": 0.4619764983654022, "learning_rate": 7.046958723470609e-09, "loss": 0.0153, "step": 243420 }, { "epoch": 1.9696577392992962, "grad_norm": 0.6038585305213928, "learning_rate": 7.009533494933274e-09, "loss": 0.015, "step": 243430 }, { "epoch": 1.969738651994498, "grad_norm": 0.22775191068649292, "learning_rate": 6.972207841050016e-09, "loss": 0.011, "step": 243440 }, { "epoch": 1.9698195646896997, "grad_norm": 0.41926997900009155, "learning_rate": 6.9349817625652405e-09, "loss": 0.0248, "step": 243450 }, { "epoch": 1.9699004773849018, "grad_norm": 0.29337114095687866, "learning_rate": 6.897855260221686e-09, "loss": 0.0198, "step": 243460 }, { "epoch": 1.9699813900801035, "grad_norm": 0.5098801851272583, "learning_rate": 6.860828334759317e-09, "loss": 0.0115, "step": 243470 }, { "epoch": 1.9700623027753055, "grad_norm": 0.19783519208431244, "learning_rate": 6.823900986916987e-09, "loss": 0.0109, "step": 243480 }, { "epoch": 1.9701432154705074, "grad_norm": 0.2825050950050354, "learning_rate": 6.7870732174307726e-09, "loss": 0.023, "step": 243490 }, { "epoch": 1.9702241281657091, "grad_norm": 0.10438186675310135, "learning_rate": 6.750345027035088e-09, "loss": 0.0201, "step": 243500 }, { "epoch": 1.970305040860911, "grad_norm": 0.2593397796154022, "learning_rate": 6.713716416462679e-09, "loss": 0.0115, "step": 243510 }, { "epoch": 1.970385953556113, "grad_norm": 0.7170579433441162, "learning_rate": 6.6771873864440725e-09, "loss": 0.0227, "step": 243520 }, { "epoch": 1.9704668662513147, "grad_norm": 0.4300304353237152, "learning_rate": 6.640757937707021e-09, "loss": 0.0142, "step": 243530 }, { "epoch": 1.9705477789465167, "grad_norm": 0.4894808530807495, "learning_rate": 6.604428070979274e-09, "loss": 0.0136, "step": 243540 }, { "epoch": 1.9706286916417186, "grad_norm": 0.5025160312652588, "learning_rate": 6.5681977869841425e-09, "loss": 0.0133, "step": 243550 }, { "epoch": 1.9707096043369203, "grad_norm": 0.42737507820129395, "learning_rate": 6.532067086445493e-09, "loss": 0.0176, "step": 243560 }, { "epoch": 1.9707905170321225, "grad_norm": 0.15150365233421326, "learning_rate": 6.496035970082193e-09, "loss": 0.0108, "step": 243570 }, { "epoch": 1.9708714297273242, "grad_norm": 1.2519943714141846, "learning_rate": 6.4601044386142235e-09, "loss": 0.0228, "step": 243580 }, { "epoch": 1.9709523424225261, "grad_norm": 0.2624964416027069, "learning_rate": 6.424272492757677e-09, "loss": 0.0084, "step": 243590 }, { "epoch": 1.971033255117728, "grad_norm": 0.23458996415138245, "learning_rate": 6.388540133226983e-09, "loss": 0.0131, "step": 243600 }, { "epoch": 1.9711141678129298, "grad_norm": 0.3707382380962372, "learning_rate": 6.35290736073435e-09, "loss": 0.0193, "step": 243610 }, { "epoch": 1.9711950805081317, "grad_norm": 0.5448655486106873, "learning_rate": 6.317374175991431e-09, "loss": 0.0218, "step": 243620 }, { "epoch": 1.9712759932033337, "grad_norm": 0.368251770734787, "learning_rate": 6.281940579706547e-09, "loss": 0.0135, "step": 243630 }, { "epoch": 1.9713569058985354, "grad_norm": 0.4060388505458832, "learning_rate": 6.2466065725858e-09, "loss": 0.0212, "step": 243640 }, { "epoch": 1.9714378185937373, "grad_norm": 0.4564799666404724, "learning_rate": 6.211372155334183e-09, "loss": 0.0186, "step": 243650 }, { "epoch": 1.9715187312889393, "grad_norm": 0.10243640840053558, "learning_rate": 6.176237328654467e-09, "loss": 0.0183, "step": 243660 }, { "epoch": 1.971599643984141, "grad_norm": 0.12990467250347137, "learning_rate": 6.141202093247755e-09, "loss": 0.0164, "step": 243670 }, { "epoch": 1.971680556679343, "grad_norm": 0.28875941038131714, "learning_rate": 6.106266449811826e-09, "loss": 0.0145, "step": 243680 }, { "epoch": 1.9717614693745449, "grad_norm": 0.17739161849021912, "learning_rate": 6.0714303990438984e-09, "loss": 0.0164, "step": 243690 }, { "epoch": 1.9718423820697466, "grad_norm": 0.34236761927604675, "learning_rate": 6.0366939416384164e-09, "loss": 0.0101, "step": 243700 }, { "epoch": 1.9719232947649488, "grad_norm": 0.3346386253833771, "learning_rate": 6.002057078288715e-09, "loss": 0.017, "step": 243710 }, { "epoch": 1.9720042074601505, "grad_norm": 0.3427480161190033, "learning_rate": 5.967519809684796e-09, "loss": 0.0136, "step": 243720 }, { "epoch": 1.9720851201553524, "grad_norm": 0.49833688139915466, "learning_rate": 5.9330821365161104e-09, "loss": 0.0277, "step": 243730 }, { "epoch": 1.9721660328505544, "grad_norm": 0.200691357254982, "learning_rate": 5.898744059468775e-09, "loss": 0.0174, "step": 243740 }, { "epoch": 1.972246945545756, "grad_norm": 0.5191700458526611, "learning_rate": 5.864505579228352e-09, "loss": 0.0207, "step": 243750 }, { "epoch": 1.972327858240958, "grad_norm": 0.27965521812438965, "learning_rate": 5.830366696477074e-09, "loss": 0.0087, "step": 243760 }, { "epoch": 1.97240877093616, "grad_norm": 0.3084433376789093, "learning_rate": 5.796327411896619e-09, "loss": 0.0197, "step": 243770 }, { "epoch": 1.9724896836313617, "grad_norm": 0.34939122200012207, "learning_rate": 5.76238772616422e-09, "loss": 0.0219, "step": 243780 }, { "epoch": 1.9725705963265636, "grad_norm": 0.33121418952941895, "learning_rate": 5.728547639958226e-09, "loss": 0.0243, "step": 243790 }, { "epoch": 1.9726515090217656, "grad_norm": 0.17704211175441742, "learning_rate": 5.694807153953097e-09, "loss": 0.018, "step": 243800 }, { "epoch": 1.9727324217169673, "grad_norm": 0.17767098546028137, "learning_rate": 5.661166268821072e-09, "loss": 0.0152, "step": 243810 }, { "epoch": 1.9728133344121692, "grad_norm": 0.2032739669084549, "learning_rate": 5.627624985233837e-09, "loss": 0.0098, "step": 243820 }, { "epoch": 1.9728942471073712, "grad_norm": 0.18165040016174316, "learning_rate": 5.594183303860301e-09, "loss": 0.0254, "step": 243830 }, { "epoch": 1.9729751598025729, "grad_norm": 0.2386282980442047, "learning_rate": 5.560841225367153e-09, "loss": 0.02, "step": 243840 }, { "epoch": 1.973056072497775, "grad_norm": 0.2073112279176712, "learning_rate": 5.527598750418861e-09, "loss": 0.0177, "step": 243850 }, { "epoch": 1.9731369851929768, "grad_norm": 0.31885865330696106, "learning_rate": 5.494455879678784e-09, "loss": 0.0219, "step": 243860 }, { "epoch": 1.9732178978881787, "grad_norm": 0.5618539452552795, "learning_rate": 5.461412613808614e-09, "loss": 0.0168, "step": 243870 }, { "epoch": 1.9732988105833806, "grad_norm": 0.19358308613300323, "learning_rate": 5.428468953465604e-09, "loss": 0.0194, "step": 243880 }, { "epoch": 1.9733797232785824, "grad_norm": 0.22200678288936615, "learning_rate": 5.395624899308671e-09, "loss": 0.0254, "step": 243890 }, { "epoch": 1.9734606359737843, "grad_norm": 0.33302050828933716, "learning_rate": 5.3628804519917365e-09, "loss": 0.0147, "step": 243900 }, { "epoch": 1.9735415486689862, "grad_norm": 0.38688915967941284, "learning_rate": 5.330235612167611e-09, "loss": 0.0277, "step": 243910 }, { "epoch": 1.973622461364188, "grad_norm": 0.32343044877052307, "learning_rate": 5.297690380487997e-09, "loss": 0.0101, "step": 243920 }, { "epoch": 1.97370337405939, "grad_norm": 0.37151145935058594, "learning_rate": 5.265244757601817e-09, "loss": 0.0259, "step": 243930 }, { "epoch": 1.9737842867545918, "grad_norm": 0.3684297502040863, "learning_rate": 5.232898744155224e-09, "loss": 0.0124, "step": 243940 }, { "epoch": 1.9738651994497936, "grad_norm": 0.5051050186157227, "learning_rate": 5.2006523407949205e-09, "loss": 0.0138, "step": 243950 }, { "epoch": 1.9739461121449957, "grad_norm": 0.3127800226211548, "learning_rate": 5.168505548162617e-09, "loss": 0.0185, "step": 243960 }, { "epoch": 1.9740270248401974, "grad_norm": 0.41506168246269226, "learning_rate": 5.136458366900021e-09, "loss": 0.0232, "step": 243970 }, { "epoch": 1.9741079375353991, "grad_norm": 0.629199743270874, "learning_rate": 5.104510797646067e-09, "loss": 0.0161, "step": 243980 }, { "epoch": 1.9741888502306013, "grad_norm": 0.6985025405883789, "learning_rate": 5.072662841038578e-09, "loss": 0.0247, "step": 243990 }, { "epoch": 1.974269762925803, "grad_norm": 0.3941410779953003, "learning_rate": 5.040914497711491e-09, "loss": 0.0073, "step": 244000 }, { "epoch": 1.974269762925803, "eval_loss": 0.018561428412795067, "eval_runtime": 3.8204, "eval_samples_per_second": 52.351, "eval_steps_per_second": 26.176, "step": 244000 }, { "epoch": 1.974350675621005, "grad_norm": 0.2639777362346649, "learning_rate": 5.009265768298743e-09, "loss": 0.0124, "step": 244010 }, { "epoch": 1.974431588316207, "grad_norm": 0.43595537543296814, "learning_rate": 4.9777166534309415e-09, "loss": 0.0145, "step": 244020 }, { "epoch": 1.9745125010114086, "grad_norm": 0.43571963906288147, "learning_rate": 4.9462671537375826e-09, "loss": 0.0192, "step": 244030 }, { "epoch": 1.9745934137066106, "grad_norm": 0.120612233877182, "learning_rate": 4.914917269846498e-09, "loss": 0.0212, "step": 244040 }, { "epoch": 1.9746743264018125, "grad_norm": 0.33885958790779114, "learning_rate": 4.8836670023816315e-09, "loss": 0.0202, "step": 244050 }, { "epoch": 1.9747552390970142, "grad_norm": 0.4157070517539978, "learning_rate": 4.852516351967485e-09, "loss": 0.0265, "step": 244060 }, { "epoch": 1.9748361517922162, "grad_norm": 0.5698933005332947, "learning_rate": 4.821465319224117e-09, "loss": 0.0173, "step": 244070 }, { "epoch": 1.974917064487418, "grad_norm": 0.3157826364040375, "learning_rate": 4.7905139047715875e-09, "loss": 0.0191, "step": 244080 }, { "epoch": 1.9749979771826198, "grad_norm": 0.09830890595912933, "learning_rate": 4.7596621092271815e-09, "loss": 0.0193, "step": 244090 }, { "epoch": 1.975078889877822, "grad_norm": 0.30038902163505554, "learning_rate": 4.728909933205405e-09, "loss": 0.0181, "step": 244100 }, { "epoch": 1.9751598025730237, "grad_norm": 0.5310422778129578, "learning_rate": 4.698257377320769e-09, "loss": 0.0228, "step": 244110 }, { "epoch": 1.9752407152682254, "grad_norm": 0.0985776036977768, "learning_rate": 4.667704442183341e-09, "loss": 0.0183, "step": 244120 }, { "epoch": 1.9753216279634276, "grad_norm": 0.5535323023796082, "learning_rate": 4.637251128402631e-09, "loss": 0.0188, "step": 244130 }, { "epoch": 1.9754025406586293, "grad_norm": 0.13852868974208832, "learning_rate": 4.606897436586488e-09, "loss": 0.0235, "step": 244140 }, { "epoch": 1.9754834533538312, "grad_norm": 0.7199172377586365, "learning_rate": 4.576643367340539e-09, "loss": 0.0274, "step": 244150 }, { "epoch": 1.9755643660490332, "grad_norm": 0.4015326499938965, "learning_rate": 4.546488921267078e-09, "loss": 0.0159, "step": 244160 }, { "epoch": 1.975645278744235, "grad_norm": 0.27439987659454346, "learning_rate": 4.516434098968403e-09, "loss": 0.0177, "step": 244170 }, { "epoch": 1.9757261914394368, "grad_norm": 0.441413551568985, "learning_rate": 4.4864789010429234e-09, "loss": 0.0168, "step": 244180 }, { "epoch": 1.9758071041346388, "grad_norm": 0.39084553718566895, "learning_rate": 4.456623328089049e-09, "loss": 0.0168, "step": 244190 }, { "epoch": 1.9758880168298405, "grad_norm": 0.12479285895824432, "learning_rate": 4.42686738070186e-09, "loss": 0.012, "step": 244200 }, { "epoch": 1.9759689295250424, "grad_norm": 0.2465449869632721, "learning_rate": 4.397211059474216e-09, "loss": 0.0176, "step": 244210 }, { "epoch": 1.9760498422202444, "grad_norm": 0.39015892148017883, "learning_rate": 4.3676543649984195e-09, "loss": 0.0191, "step": 244220 }, { "epoch": 1.976130754915446, "grad_norm": 0.20596960186958313, "learning_rate": 4.338197297863444e-09, "loss": 0.0144, "step": 244230 }, { "epoch": 1.9762116676106483, "grad_norm": 0.2995658218860626, "learning_rate": 4.308839858656599e-09, "loss": 0.0135, "step": 244240 }, { "epoch": 1.97629258030585, "grad_norm": 0.5009588599205017, "learning_rate": 4.279582047964081e-09, "loss": 0.0148, "step": 244250 }, { "epoch": 1.976373493001052, "grad_norm": 0.39642879366874695, "learning_rate": 4.250423866368203e-09, "loss": 0.0197, "step": 244260 }, { "epoch": 1.9764544056962539, "grad_norm": 0.287536084651947, "learning_rate": 4.2213653144518305e-09, "loss": 0.0239, "step": 244270 }, { "epoch": 1.9765353183914556, "grad_norm": 0.3108811676502228, "learning_rate": 4.192406392793391e-09, "loss": 0.0109, "step": 244280 }, { "epoch": 1.9766162310866575, "grad_norm": 0.6134787201881409, "learning_rate": 4.163547101970755e-09, "loss": 0.0181, "step": 244290 }, { "epoch": 1.9766971437818595, "grad_norm": 0.6339853405952454, "learning_rate": 4.134787442559574e-09, "loss": 0.0183, "step": 244300 }, { "epoch": 1.9767780564770612, "grad_norm": 0.2352723777294159, "learning_rate": 4.1061274151332765e-09, "loss": 0.0128, "step": 244310 }, { "epoch": 1.9768589691722631, "grad_norm": 0.32401299476623535, "learning_rate": 4.077567020263073e-09, "loss": 0.0193, "step": 244320 }, { "epoch": 1.976939881867465, "grad_norm": 0.349124938249588, "learning_rate": 4.04910625851962e-09, "loss": 0.0169, "step": 244330 }, { "epoch": 1.9770207945626668, "grad_norm": 0.3561212122440338, "learning_rate": 4.020745130469683e-09, "loss": 0.0187, "step": 244340 }, { "epoch": 1.9771017072578687, "grad_norm": 0.25657644867897034, "learning_rate": 3.9924836366783685e-09, "loss": 0.0142, "step": 244350 }, { "epoch": 1.9771826199530707, "grad_norm": 0.6095962524414062, "learning_rate": 3.964321777710778e-09, "loss": 0.0214, "step": 244360 }, { "epoch": 1.9772635326482724, "grad_norm": 0.8060789704322815, "learning_rate": 3.93625955412702e-09, "loss": 0.0187, "step": 244370 }, { "epoch": 1.9773444453434745, "grad_norm": 1.2909421920776367, "learning_rate": 3.9082969664877566e-09, "loss": 0.0257, "step": 244380 }, { "epoch": 1.9774253580386763, "grad_norm": 0.2998043894767761, "learning_rate": 3.880434015350321e-09, "loss": 0.0121, "step": 244390 }, { "epoch": 1.9775062707338782, "grad_norm": 0.20288199186325073, "learning_rate": 3.852670701270378e-09, "loss": 0.0166, "step": 244400 }, { "epoch": 1.9775871834290801, "grad_norm": 0.6078183054924011, "learning_rate": 3.825007024801375e-09, "loss": 0.009, "step": 244410 }, { "epoch": 1.9776680961242818, "grad_norm": 0.17547817528247833, "learning_rate": 3.797442986495093e-09, "loss": 0.026, "step": 244420 }, { "epoch": 1.9777490088194838, "grad_norm": 0.22692899405956268, "learning_rate": 3.769978586901646e-09, "loss": 0.0203, "step": 244430 }, { "epoch": 1.9778299215146857, "grad_norm": 0.07168863713741302, "learning_rate": 3.7426138265683755e-09, "loss": 0.0156, "step": 244440 }, { "epoch": 1.9779108342098874, "grad_norm": 0.22695086896419525, "learning_rate": 3.7153487060415105e-09, "loss": 0.0109, "step": 244450 }, { "epoch": 1.9779917469050894, "grad_norm": 0.4287131130695343, "learning_rate": 3.6881832258639506e-09, "loss": 0.0177, "step": 244460 }, { "epoch": 1.9780726596002913, "grad_norm": 0.44662460684776306, "learning_rate": 3.661117386578039e-09, "loss": 0.0216, "step": 244470 }, { "epoch": 1.978153572295493, "grad_norm": 0.25111180543899536, "learning_rate": 3.6341511887233448e-09, "loss": 0.0173, "step": 244480 }, { "epoch": 1.978234484990695, "grad_norm": 0.2097553312778473, "learning_rate": 3.6072846328372156e-09, "loss": 0.0118, "step": 244490 }, { "epoch": 1.978315397685897, "grad_norm": 0.20114901661872864, "learning_rate": 3.5805177194569995e-09, "loss": 0.0136, "step": 244500 }, { "epoch": 1.9783963103810986, "grad_norm": 0.5851539373397827, "learning_rate": 3.553850449114493e-09, "loss": 0.0229, "step": 244510 }, { "epoch": 1.9784772230763008, "grad_norm": 0.6564391851425171, "learning_rate": 3.5272828223431586e-09, "loss": 0.0193, "step": 244520 }, { "epoch": 1.9785581357715025, "grad_norm": 0.29032906889915466, "learning_rate": 3.5008148396714626e-09, "loss": 0.0166, "step": 244530 }, { "epoch": 1.9786390484667045, "grad_norm": 0.471716046333313, "learning_rate": 3.4744465016284257e-09, "loss": 0.0153, "step": 244540 }, { "epoch": 1.9787199611619064, "grad_norm": 0.010439474135637283, "learning_rate": 3.4481778087391835e-09, "loss": 0.0122, "step": 244550 }, { "epoch": 1.9788008738571081, "grad_norm": 0.7382277846336365, "learning_rate": 3.4220087615283172e-09, "loss": 0.0265, "step": 244560 }, { "epoch": 1.97888178655231, "grad_norm": 0.4315362274646759, "learning_rate": 3.3959393605165203e-09, "loss": 0.0211, "step": 244570 }, { "epoch": 1.978962699247512, "grad_norm": 0.5944314002990723, "learning_rate": 3.3699696062250432e-09, "loss": 0.0206, "step": 244580 }, { "epoch": 1.9790436119427137, "grad_norm": 0.2931724786758423, "learning_rate": 3.3440994991706943e-09, "loss": 0.0201, "step": 244590 }, { "epoch": 1.9791245246379157, "grad_norm": 0.10235738009214401, "learning_rate": 3.3183290398702827e-09, "loss": 0.0129, "step": 244600 }, { "epoch": 1.9792054373331176, "grad_norm": 0.2927975356578827, "learning_rate": 3.2926582288372867e-09, "loss": 0.0129, "step": 244610 }, { "epoch": 1.9792863500283193, "grad_norm": 0.11962106823921204, "learning_rate": 3.2670870665835188e-09, "loss": 0.0208, "step": 244620 }, { "epoch": 1.9793672627235215, "grad_norm": 0.19469034671783447, "learning_rate": 3.241615553619126e-09, "loss": 0.0139, "step": 244630 }, { "epoch": 1.9794481754187232, "grad_norm": 0.42917904257774353, "learning_rate": 3.2162436904520366e-09, "loss": 0.0191, "step": 244640 }, { "epoch": 1.979529088113925, "grad_norm": 0.22686974704265594, "learning_rate": 3.1909714775885114e-09, "loss": 0.0132, "step": 244650 }, { "epoch": 1.979610000809127, "grad_norm": 0.7207342386245728, "learning_rate": 3.165798915532037e-09, "loss": 0.0159, "step": 244660 }, { "epoch": 1.9796909135043288, "grad_norm": 0.24814291298389435, "learning_rate": 3.1407260047849885e-09, "loss": 0.0198, "step": 244670 }, { "epoch": 1.9797718261995307, "grad_norm": 0.2920747399330139, "learning_rate": 3.1157527458475225e-09, "loss": 0.0208, "step": 244680 }, { "epoch": 1.9798527388947327, "grad_norm": 0.5571433901786804, "learning_rate": 3.090879139217573e-09, "loss": 0.0253, "step": 244690 }, { "epoch": 1.9799336515899344, "grad_norm": 0.5137393474578857, "learning_rate": 3.066105185390855e-09, "loss": 0.0183, "step": 244700 }, { "epoch": 1.9800145642851363, "grad_norm": 0.19162005186080933, "learning_rate": 3.0414308848614183e-09, "loss": 0.016, "step": 244710 }, { "epoch": 1.9800954769803383, "grad_norm": 0.5380616784095764, "learning_rate": 3.0168562381222012e-09, "loss": 0.0135, "step": 244720 }, { "epoch": 1.98017638967554, "grad_norm": 0.0823964923620224, "learning_rate": 2.992381245662257e-09, "loss": 0.0138, "step": 244730 }, { "epoch": 1.980257302370742, "grad_norm": 0.09891463071107864, "learning_rate": 2.968005907970084e-09, "loss": 0.0152, "step": 244740 }, { "epoch": 1.9803382150659439, "grad_norm": 0.36569222807884216, "learning_rate": 2.94373022553196e-09, "loss": 0.0157, "step": 244750 }, { "epoch": 1.9804191277611456, "grad_norm": 0.41718366742134094, "learning_rate": 2.919554198831942e-09, "loss": 0.0133, "step": 244760 }, { "epoch": 1.9805000404563478, "grad_norm": 0.20148754119873047, "learning_rate": 2.895477828352422e-09, "loss": 0.0095, "step": 244770 }, { "epoch": 1.9805809531515495, "grad_norm": 0.7079442739486694, "learning_rate": 2.8715011145724613e-09, "loss": 0.0155, "step": 244780 }, { "epoch": 1.9806618658467512, "grad_norm": 0.43966346979141235, "learning_rate": 2.8476240579716762e-09, "loss": 0.0192, "step": 244790 }, { "epoch": 1.9807427785419534, "grad_norm": 0.663456916809082, "learning_rate": 2.8238466590252424e-09, "loss": 0.031, "step": 244800 }, { "epoch": 1.980823691237155, "grad_norm": 0.3665258288383484, "learning_rate": 2.8001689182077796e-09, "loss": 0.0184, "step": 244810 }, { "epoch": 1.980904603932357, "grad_norm": 0.40418902039527893, "learning_rate": 2.776590835991133e-09, "loss": 0.018, "step": 244820 }, { "epoch": 1.980985516627559, "grad_norm": 0.28125810623168945, "learning_rate": 2.7531124128460373e-09, "loss": 0.0187, "step": 244830 }, { "epoch": 1.9810664293227607, "grad_norm": 0.802724301815033, "learning_rate": 2.7297336492404515e-09, "loss": 0.0277, "step": 244840 }, { "epoch": 1.9811473420179626, "grad_norm": 0.5031706094741821, "learning_rate": 2.706454545640669e-09, "loss": 0.0181, "step": 244850 }, { "epoch": 1.9812282547131645, "grad_norm": 0.6285778284072876, "learning_rate": 2.683275102511318e-09, "loss": 0.019, "step": 244860 }, { "epoch": 1.9813091674083663, "grad_norm": 0.578843355178833, "learning_rate": 2.6601953203136965e-09, "loss": 0.0133, "step": 244870 }, { "epoch": 1.9813900801035682, "grad_norm": 0.6850372552871704, "learning_rate": 2.637215199508547e-09, "loss": 0.0201, "step": 244880 }, { "epoch": 1.9814709927987701, "grad_norm": 0.6513057351112366, "learning_rate": 2.6143347405543918e-09, "loss": 0.0134, "step": 244890 }, { "epoch": 1.9815519054939719, "grad_norm": 0.2793193757534027, "learning_rate": 2.591553943907532e-09, "loss": 0.0115, "step": 244900 }, { "epoch": 1.981632818189174, "grad_norm": 0.43427059054374695, "learning_rate": 2.5688728100220493e-09, "loss": 0.0235, "step": 244910 }, { "epoch": 1.9817137308843757, "grad_norm": 0.7216499447822571, "learning_rate": 2.5462913393498047e-09, "loss": 0.0172, "step": 244920 }, { "epoch": 1.9817946435795777, "grad_norm": 0.35227835178375244, "learning_rate": 2.523809532342658e-09, "loss": 0.0138, "step": 244930 }, { "epoch": 1.9818755562747796, "grad_norm": 0.23661160469055176, "learning_rate": 2.501427389447475e-09, "loss": 0.0117, "step": 244940 }, { "epoch": 1.9819564689699813, "grad_norm": 1.5304266214370728, "learning_rate": 2.47914491111112e-09, "loss": 0.0169, "step": 244950 }, { "epoch": 1.9820373816651833, "grad_norm": 0.46095916628837585, "learning_rate": 2.456962097777682e-09, "loss": 0.0233, "step": 244960 }, { "epoch": 1.9821182943603852, "grad_norm": 0.44592005014419556, "learning_rate": 2.4348789498901403e-09, "loss": 0.0137, "step": 244970 }, { "epoch": 1.982199207055587, "grad_norm": 0.2776614725589752, "learning_rate": 2.4128954678886985e-09, "loss": 0.0274, "step": 244980 }, { "epoch": 1.9822801197507889, "grad_norm": 0.2501801550388336, "learning_rate": 2.3910116522118943e-09, "loss": 0.0182, "step": 244990 }, { "epoch": 1.9823610324459908, "grad_norm": 0.10828746110200882, "learning_rate": 2.36922750329549e-09, "loss": 0.0207, "step": 245000 }, { "epoch": 1.9824419451411925, "grad_norm": 0.7337175011634827, "learning_rate": 2.347543021574694e-09, "loss": 0.023, "step": 245010 }, { "epoch": 1.9825228578363945, "grad_norm": 0.6116883158683777, "learning_rate": 2.3259582074813825e-09, "loss": 0.0196, "step": 245020 }, { "epoch": 1.9826037705315964, "grad_norm": 0.2841247022151947, "learning_rate": 2.304473061446877e-09, "loss": 0.0192, "step": 245030 }, { "epoch": 1.9826846832267981, "grad_norm": 0.6384320855140686, "learning_rate": 2.2830875838986133e-09, "loss": 0.0167, "step": 245040 }, { "epoch": 1.9827655959220003, "grad_norm": 0.26871833205223083, "learning_rate": 2.2618017752634723e-09, "loss": 0.027, "step": 245050 }, { "epoch": 1.982846508617202, "grad_norm": 0.21151000261306763, "learning_rate": 2.2406156359655594e-09, "loss": 0.0168, "step": 245060 }, { "epoch": 1.982927421312404, "grad_norm": 0.3479049503803253, "learning_rate": 2.2195291664284247e-09, "loss": 0.0197, "step": 245070 }, { "epoch": 1.983008334007606, "grad_norm": 0.5247193574905396, "learning_rate": 2.198542367072287e-09, "loss": 0.0185, "step": 245080 }, { "epoch": 1.9830892467028076, "grad_norm": 0.2879226505756378, "learning_rate": 2.177655238314591e-09, "loss": 0.0066, "step": 245090 }, { "epoch": 1.9831701593980096, "grad_norm": 0.7774472832679749, "learning_rate": 2.1568677805733353e-09, "loss": 0.0151, "step": 245100 }, { "epoch": 1.9832510720932115, "grad_norm": 0.2685390114784241, "learning_rate": 2.1361799942620776e-09, "loss": 0.0145, "step": 245110 }, { "epoch": 1.9833319847884132, "grad_norm": 0.27693912386894226, "learning_rate": 2.1155918797938213e-09, "loss": 0.0248, "step": 245120 }, { "epoch": 1.9834128974836152, "grad_norm": 0.3003263473510742, "learning_rate": 2.095103437579349e-09, "loss": 0.0159, "step": 245130 }, { "epoch": 1.983493810178817, "grad_norm": 0.2977968156337738, "learning_rate": 2.0747146680266673e-09, "loss": 0.0129, "step": 245140 }, { "epoch": 1.9835747228740188, "grad_norm": 0.6046836972236633, "learning_rate": 2.054425571542673e-09, "loss": 0.0183, "step": 245150 }, { "epoch": 1.983655635569221, "grad_norm": 0.24464909732341766, "learning_rate": 2.034236148532043e-09, "loss": 0.022, "step": 245160 }, { "epoch": 1.9837365482644227, "grad_norm": 0.5215844511985779, "learning_rate": 2.014146399397787e-09, "loss": 0.0181, "step": 245170 }, { "epoch": 1.9838174609596244, "grad_norm": 0.6240866184234619, "learning_rate": 1.994156324540142e-09, "loss": 0.0162, "step": 245180 }, { "epoch": 1.9838983736548266, "grad_norm": 0.374606192111969, "learning_rate": 1.9742659243571214e-09, "loss": 0.015, "step": 245190 }, { "epoch": 1.9839792863500283, "grad_norm": 0.2876415550708771, "learning_rate": 1.9544751992467414e-09, "loss": 0.0337, "step": 245200 }, { "epoch": 1.9840601990452302, "grad_norm": 0.3022836446762085, "learning_rate": 1.9347841496025756e-09, "loss": 0.0206, "step": 245210 }, { "epoch": 1.9841411117404322, "grad_norm": 0.3890942633152008, "learning_rate": 1.9151927758181978e-09, "loss": 0.0163, "step": 245220 }, { "epoch": 1.984222024435634, "grad_norm": 0.2892606854438782, "learning_rate": 1.8957010782838515e-09, "loss": 0.0125, "step": 245230 }, { "epoch": 1.9843029371308358, "grad_norm": 0.21043910086154938, "learning_rate": 1.876309057388115e-09, "loss": 0.0173, "step": 245240 }, { "epoch": 1.9843838498260378, "grad_norm": 0.3012423813343048, "learning_rate": 1.8570167135173456e-09, "loss": 0.0256, "step": 245250 }, { "epoch": 1.9844647625212395, "grad_norm": 0.03764597326517105, "learning_rate": 1.8378240470573461e-09, "loss": 0.0089, "step": 245260 }, { "epoch": 1.9845456752164414, "grad_norm": 0.2236884981393814, "learning_rate": 1.8187310583905881e-09, "loss": 0.0203, "step": 245270 }, { "epoch": 1.9846265879116434, "grad_norm": 0.4439089298248291, "learning_rate": 1.7997377478973232e-09, "loss": 0.0162, "step": 245280 }, { "epoch": 1.984707500606845, "grad_norm": 0.3033960461616516, "learning_rate": 1.7808441159566925e-09, "loss": 0.0124, "step": 245290 }, { "epoch": 1.9847884133020472, "grad_norm": 0.00035617881803773344, "learning_rate": 1.7620501629456167e-09, "loss": 0.024, "step": 245300 }, { "epoch": 1.984869325997249, "grad_norm": 0.03002798929810524, "learning_rate": 1.7433558892382406e-09, "loss": 0.0224, "step": 245310 }, { "epoch": 1.9849502386924507, "grad_norm": 0.743029773235321, "learning_rate": 1.7247612952081549e-09, "loss": 0.0153, "step": 245320 }, { "epoch": 1.9850311513876528, "grad_norm": 0.20929159224033356, "learning_rate": 1.7062663812256186e-09, "loss": 0.0164, "step": 245330 }, { "epoch": 1.9851120640828546, "grad_norm": 0.059663768857717514, "learning_rate": 1.687871147659781e-09, "loss": 0.0097, "step": 245340 }, { "epoch": 1.9851929767780565, "grad_norm": 0.3365391492843628, "learning_rate": 1.6695755948775705e-09, "loss": 0.0093, "step": 245350 }, { "epoch": 1.9852738894732584, "grad_norm": 0.273875892162323, "learning_rate": 1.6513797232436956e-09, "loss": 0.009, "step": 245360 }, { "epoch": 1.9853548021684602, "grad_norm": 1.0174474716186523, "learning_rate": 1.6332835331211993e-09, "loss": 0.0285, "step": 245370 }, { "epoch": 1.985435714863662, "grad_norm": 0.43150681257247925, "learning_rate": 1.615287024870904e-09, "loss": 0.0142, "step": 245380 }, { "epoch": 1.985516627558864, "grad_norm": 0.3743624687194824, "learning_rate": 1.5973901988514118e-09, "loss": 0.0161, "step": 245390 }, { "epoch": 1.9855975402540658, "grad_norm": 0.29398319125175476, "learning_rate": 1.5795930554202144e-09, "loss": 0.0228, "step": 245400 }, { "epoch": 1.9856784529492677, "grad_norm": 0.049956995993852615, "learning_rate": 1.561895594931473e-09, "loss": 0.0154, "step": 245410 }, { "epoch": 1.9857593656444696, "grad_norm": 0.4029032289981842, "learning_rate": 1.5442978177387936e-09, "loss": 0.0166, "step": 245420 }, { "epoch": 1.9858402783396714, "grad_norm": 0.4613976776599884, "learning_rate": 1.5267997241924515e-09, "loss": 0.0195, "step": 245430 }, { "epoch": 1.9859211910348735, "grad_norm": 0.3772493600845337, "learning_rate": 1.5094013146427222e-09, "loss": 0.0135, "step": 245440 }, { "epoch": 1.9860021037300752, "grad_norm": 0.37234506011009216, "learning_rate": 1.492102589434885e-09, "loss": 0.0129, "step": 245450 }, { "epoch": 1.9860830164252772, "grad_norm": 0.33991026878356934, "learning_rate": 1.4749035489153297e-09, "loss": 0.0105, "step": 245460 }, { "epoch": 1.9861639291204791, "grad_norm": 0.22889584302902222, "learning_rate": 1.4578041934260046e-09, "loss": 0.0174, "step": 245470 }, { "epoch": 1.9862448418156808, "grad_norm": 0.35091233253479004, "learning_rate": 1.4408045233083034e-09, "loss": 0.0227, "step": 245480 }, { "epoch": 1.9863257545108828, "grad_norm": 0.4737122058868408, "learning_rate": 1.4239045389019545e-09, "loss": 0.0207, "step": 245490 }, { "epoch": 1.9864066672060847, "grad_norm": 0.2775018811225891, "learning_rate": 1.4071042405428004e-09, "loss": 0.0221, "step": 245500 }, { "epoch": 1.9864875799012864, "grad_norm": 0.6250790357589722, "learning_rate": 1.3904036285666833e-09, "loss": 0.0181, "step": 245510 }, { "epoch": 1.9865684925964884, "grad_norm": 0.14691615104675293, "learning_rate": 1.3738027033066704e-09, "loss": 0.019, "step": 245520 }, { "epoch": 1.9866494052916903, "grad_norm": 0.5573200583457947, "learning_rate": 1.3573014650930527e-09, "loss": 0.0347, "step": 245530 }, { "epoch": 1.986730317986892, "grad_norm": 0.5748445987701416, "learning_rate": 1.3408999142561218e-09, "loss": 0.0251, "step": 245540 }, { "epoch": 1.986811230682094, "grad_norm": 0.32129746675491333, "learning_rate": 1.3245980511217282e-09, "loss": 0.0094, "step": 245550 }, { "epoch": 1.986892143377296, "grad_norm": 0.3588256537914276, "learning_rate": 1.3083958760157222e-09, "loss": 0.0371, "step": 245560 }, { "epoch": 1.9869730560724976, "grad_norm": 0.5307402610778809, "learning_rate": 1.292293389261179e-09, "loss": 0.0218, "step": 245570 }, { "epoch": 1.9870539687676998, "grad_norm": 0.286751925945282, "learning_rate": 1.2762905911789525e-09, "loss": 0.0155, "step": 245580 }, { "epoch": 1.9871348814629015, "grad_norm": 0.31746670603752136, "learning_rate": 1.2603874820882323e-09, "loss": 0.0068, "step": 245590 }, { "epoch": 1.9872157941581035, "grad_norm": 0.21181906759738922, "learning_rate": 1.244584062306542e-09, "loss": 0.0117, "step": 245600 }, { "epoch": 1.9872967068533054, "grad_norm": 0.5315128564834595, "learning_rate": 1.2288803321486298e-09, "loss": 0.0227, "step": 245610 }, { "epoch": 1.9873776195485071, "grad_norm": 0.15372546017169952, "learning_rate": 1.2132762919281338e-09, "loss": 0.0114, "step": 245620 }, { "epoch": 1.987458532243709, "grad_norm": 0.017586784437298775, "learning_rate": 1.1977719419553613e-09, "loss": 0.0106, "step": 245630 }, { "epoch": 1.987539444938911, "grad_norm": 0.32691508531570435, "learning_rate": 1.1823672825406197e-09, "loss": 0.0122, "step": 245640 }, { "epoch": 1.9876203576341127, "grad_norm": 0.32522106170654297, "learning_rate": 1.1670623139903303e-09, "loss": 0.0215, "step": 245650 }, { "epoch": 1.9877012703293147, "grad_norm": 0.40795812010765076, "learning_rate": 1.1518570366098048e-09, "loss": 0.0149, "step": 245660 }, { "epoch": 1.9877821830245166, "grad_norm": 0.41469287872314453, "learning_rate": 1.136751450702689e-09, "loss": 0.0124, "step": 245670 }, { "epoch": 1.9878630957197183, "grad_norm": 0.4973617196083069, "learning_rate": 1.1217455565704083e-09, "loss": 0.0191, "step": 245680 }, { "epoch": 1.9879440084149202, "grad_norm": 0.24397730827331543, "learning_rate": 1.1068393545110579e-09, "loss": 0.0279, "step": 245690 }, { "epoch": 1.9880249211101222, "grad_norm": 0.2467420995235443, "learning_rate": 1.0920328448232875e-09, "loss": 0.0256, "step": 245700 }, { "epoch": 1.988105833805324, "grad_norm": 0.41104012727737427, "learning_rate": 1.0773260278013065e-09, "loss": 0.0187, "step": 245710 }, { "epoch": 1.988186746500526, "grad_norm": 0.3285995423793793, "learning_rate": 1.0627189037387686e-09, "loss": 0.0211, "step": 245720 }, { "epoch": 1.9882676591957278, "grad_norm": 0.34432682394981384, "learning_rate": 1.0482114729276627e-09, "loss": 0.0169, "step": 245730 }, { "epoch": 1.9883485718909297, "grad_norm": 0.21367888152599335, "learning_rate": 1.0338037356560914e-09, "loss": 0.0226, "step": 245740 }, { "epoch": 1.9884294845861317, "grad_norm": 0.2350316047668457, "learning_rate": 1.019495692212713e-09, "loss": 0.0095, "step": 245750 }, { "epoch": 1.9885103972813334, "grad_norm": 1.1170581579208374, "learning_rate": 1.0052873428817445e-09, "loss": 0.0175, "step": 245760 }, { "epoch": 1.9885913099765353, "grad_norm": 0.33710137009620667, "learning_rate": 9.911786879468476e-10, "loss": 0.0146, "step": 245770 }, { "epoch": 1.9886722226717373, "grad_norm": 0.43970245122909546, "learning_rate": 9.771697276894642e-10, "loss": 0.0189, "step": 245780 }, { "epoch": 1.988753135366939, "grad_norm": 0.1212400496006012, "learning_rate": 9.632604623893704e-10, "loss": 0.0169, "step": 245790 }, { "epoch": 1.988834048062141, "grad_norm": 0.17014053463935852, "learning_rate": 9.494508923235668e-10, "loss": 0.0157, "step": 245800 }, { "epoch": 1.9889149607573429, "grad_norm": 0.2190757542848587, "learning_rate": 9.357410177673888e-10, "loss": 0.0184, "step": 245810 }, { "epoch": 1.9889958734525446, "grad_norm": 0.6078625917434692, "learning_rate": 9.22130838994506e-10, "loss": 0.0193, "step": 245820 }, { "epoch": 1.9890767861477467, "grad_norm": 0.5871521830558777, "learning_rate": 9.086203562758133e-10, "loss": 0.0268, "step": 245830 }, { "epoch": 1.9891576988429485, "grad_norm": 0.28556618094444275, "learning_rate": 8.952095698816499e-10, "loss": 0.0265, "step": 245840 }, { "epoch": 1.9892386115381502, "grad_norm": 0.4120951294898987, "learning_rate": 8.818984800784691e-10, "loss": 0.0201, "step": 245850 }, { "epoch": 1.9893195242333523, "grad_norm": 0.3517255485057831, "learning_rate": 8.686870871327246e-10, "loss": 0.0187, "step": 245860 }, { "epoch": 1.989400436928554, "grad_norm": 0.3997623026371002, "learning_rate": 8.555753913069842e-10, "loss": 0.0149, "step": 245870 }, { "epoch": 1.989481349623756, "grad_norm": 0.43703946471214294, "learning_rate": 8.425633928632604e-10, "loss": 0.0173, "step": 245880 }, { "epoch": 1.989562262318958, "grad_norm": 0.37498778104782104, "learning_rate": 8.296510920607903e-10, "loss": 0.0144, "step": 245890 }, { "epoch": 1.9896431750141597, "grad_norm": 0.34745338559150696, "learning_rate": 8.168384891577008e-10, "loss": 0.0159, "step": 245900 }, { "epoch": 1.9897240877093616, "grad_norm": 0.32774460315704346, "learning_rate": 8.04125584408788e-10, "loss": 0.0243, "step": 245910 }, { "epoch": 1.9898050004045635, "grad_norm": 0.6115894317626953, "learning_rate": 7.915123780677381e-10, "loss": 0.0142, "step": 245920 }, { "epoch": 1.9898859130997653, "grad_norm": 0.107940174639225, "learning_rate": 7.789988703860163e-10, "loss": 0.0096, "step": 245930 }, { "epoch": 1.9899668257949672, "grad_norm": 0.14077138900756836, "learning_rate": 7.66585061613423e-10, "loss": 0.0148, "step": 245940 }, { "epoch": 1.9900477384901691, "grad_norm": 0.28360384702682495, "learning_rate": 7.542709519975378e-10, "loss": 0.0173, "step": 245950 }, { "epoch": 1.9901286511853709, "grad_norm": 0.4509262144565582, "learning_rate": 7.4205654178372e-10, "loss": 0.0189, "step": 245960 }, { "epoch": 1.990209563880573, "grad_norm": 0.4628470242023468, "learning_rate": 7.299418312156637e-10, "loss": 0.0088, "step": 245970 }, { "epoch": 1.9902904765757747, "grad_norm": 0.46294209361076355, "learning_rate": 7.179268205353973e-10, "loss": 0.026, "step": 245980 }, { "epoch": 1.9903713892709765, "grad_norm": 0.5004953145980835, "learning_rate": 7.060115099816189e-10, "loss": 0.0224, "step": 245990 }, { "epoch": 1.9904523019661786, "grad_norm": 0.3259005844593048, "learning_rate": 6.941958997924713e-10, "loss": 0.024, "step": 246000 }, { "epoch": 1.9905332146613803, "grad_norm": 0.22713039815425873, "learning_rate": 6.82479990203877e-10, "loss": 0.0178, "step": 246010 }, { "epoch": 1.9906141273565823, "grad_norm": 0.46126067638397217, "learning_rate": 6.708637814489827e-10, "loss": 0.0166, "step": 246020 }, { "epoch": 1.9906950400517842, "grad_norm": 0.4575682580471039, "learning_rate": 6.59347273759825e-10, "loss": 0.0169, "step": 246030 }, { "epoch": 1.990775952746986, "grad_norm": 0.26459264755249023, "learning_rate": 6.47930467365665e-10, "loss": 0.021, "step": 246040 }, { "epoch": 1.9908568654421879, "grad_norm": 0.21932992339134216, "learning_rate": 6.366133624946536e-10, "loss": 0.007, "step": 246050 }, { "epoch": 1.9909377781373898, "grad_norm": 0.507438600063324, "learning_rate": 6.253959593721659e-10, "loss": 0.021, "step": 246060 }, { "epoch": 1.9910186908325915, "grad_norm": 0.412146657705307, "learning_rate": 6.142782582224671e-10, "loss": 0.0156, "step": 246070 }, { "epoch": 1.9910996035277935, "grad_norm": 0.3924744725227356, "learning_rate": 6.032602592664916e-10, "loss": 0.0172, "step": 246080 }, { "epoch": 1.9911805162229954, "grad_norm": 0.24027374386787415, "learning_rate": 5.923419627240634e-10, "loss": 0.016, "step": 246090 }, { "epoch": 1.9912614289181971, "grad_norm": 0.3467082679271698, "learning_rate": 5.815233688133415e-10, "loss": 0.0187, "step": 246100 }, { "epoch": 1.9913423416133993, "grad_norm": 0.32763156294822693, "learning_rate": 5.70804477749709e-10, "loss": 0.018, "step": 246110 }, { "epoch": 1.991423254308601, "grad_norm": 0.6405308842658997, "learning_rate": 5.601852897474391e-10, "loss": 0.0203, "step": 246120 }, { "epoch": 1.991504167003803, "grad_norm": 0.44063329696655273, "learning_rate": 5.496658050174741e-10, "loss": 0.0207, "step": 246130 }, { "epoch": 1.991585079699005, "grad_norm": 0.26324933767318726, "learning_rate": 5.392460237707564e-10, "loss": 0.0139, "step": 246140 }, { "epoch": 1.9916659923942066, "grad_norm": 0.3365533649921417, "learning_rate": 5.289259462143426e-10, "loss": 0.0162, "step": 246150 }, { "epoch": 1.9917469050894085, "grad_norm": 0.2738516628742218, "learning_rate": 5.187055725536239e-10, "loss": 0.029, "step": 246160 }, { "epoch": 1.9918278177846105, "grad_norm": 0.22197537124156952, "learning_rate": 5.085849029934365e-10, "loss": 0.0172, "step": 246170 }, { "epoch": 1.9919087304798122, "grad_norm": 0.48832499980926514, "learning_rate": 4.985639377347307e-10, "loss": 0.0145, "step": 246180 }, { "epoch": 1.9919896431750141, "grad_norm": 0.34709593653678894, "learning_rate": 4.886426769784569e-10, "loss": 0.0097, "step": 246190 }, { "epoch": 1.992070555870216, "grad_norm": 0.5761618614196777, "learning_rate": 4.788211209211247e-10, "loss": 0.0277, "step": 246200 }, { "epoch": 1.9921514685654178, "grad_norm": 0.4724327325820923, "learning_rate": 4.690992697592433e-10, "loss": 0.0126, "step": 246210 }, { "epoch": 1.9922323812606197, "grad_norm": 0.7985678911209106, "learning_rate": 4.5947712368710205e-10, "loss": 0.0232, "step": 246220 }, { "epoch": 1.9923132939558217, "grad_norm": 0.53978431224823, "learning_rate": 4.4995468289621425e-10, "loss": 0.0262, "step": 246230 }, { "epoch": 1.9923942066510234, "grad_norm": 0.18454192578792572, "learning_rate": 4.405319475758729e-10, "loss": 0.021, "step": 246240 }, { "epoch": 1.9924751193462256, "grad_norm": 0.36615753173828125, "learning_rate": 4.312089179153711e-10, "loss": 0.0113, "step": 246250 }, { "epoch": 1.9925560320414273, "grad_norm": 0.640207827091217, "learning_rate": 4.2198559409900585e-10, "loss": 0.0201, "step": 246260 }, { "epoch": 1.9926369447366292, "grad_norm": 0.34568116068840027, "learning_rate": 4.128619763121844e-10, "loss": 0.0212, "step": 246270 }, { "epoch": 1.9927178574318312, "grad_norm": 0.24245046079158783, "learning_rate": 4.0383806473587305e-10, "loss": 0.0092, "step": 246280 }, { "epoch": 1.9927987701270329, "grad_norm": 0.6423857808113098, "learning_rate": 3.9491385955048313e-10, "loss": 0.0174, "step": 246290 }, { "epoch": 1.9928796828222348, "grad_norm": 0.4055318534374237, "learning_rate": 3.860893609342053e-10, "loss": 0.0268, "step": 246300 }, { "epoch": 1.9929605955174368, "grad_norm": 0.27720263600349426, "learning_rate": 3.77364569062455e-10, "loss": 0.0164, "step": 246310 }, { "epoch": 1.9930415082126385, "grad_norm": 0.297626256942749, "learning_rate": 3.6873948410953706e-10, "loss": 0.0195, "step": 246320 }, { "epoch": 1.9931224209078404, "grad_norm": 0.19176696240901947, "learning_rate": 3.6021410624753615e-10, "loss": 0.0214, "step": 246330 }, { "epoch": 1.9932033336030424, "grad_norm": 0.28294458985328674, "learning_rate": 3.5178843564631637e-10, "loss": 0.0121, "step": 246340 }, { "epoch": 1.993284246298244, "grad_norm": 0.6645389795303345, "learning_rate": 3.434624724735214e-10, "loss": 0.0196, "step": 246350 }, { "epoch": 1.993365158993446, "grad_norm": 0.6484402418136597, "learning_rate": 3.3523621689623976e-10, "loss": 0.0283, "step": 246360 }, { "epoch": 1.993446071688648, "grad_norm": 0.3136989176273346, "learning_rate": 3.271096690776743e-10, "loss": 0.0145, "step": 246370 }, { "epoch": 1.9935269843838497, "grad_norm": 0.3990887701511383, "learning_rate": 3.190828291799175e-10, "loss": 0.0219, "step": 246380 }, { "epoch": 1.9936078970790518, "grad_norm": 0.22331348061561584, "learning_rate": 3.1115569736339666e-10, "loss": 0.017, "step": 246390 }, { "epoch": 1.9936888097742536, "grad_norm": 0.33059829473495483, "learning_rate": 3.0332827378631857e-10, "loss": 0.0214, "step": 246400 }, { "epoch": 1.9937697224694555, "grad_norm": 0.18124093115329742, "learning_rate": 2.956005586041144e-10, "loss": 0.0166, "step": 246410 }, { "epoch": 1.9938506351646574, "grad_norm": 0.2513602674007416, "learning_rate": 2.879725519716603e-10, "loss": 0.0164, "step": 246420 }, { "epoch": 1.9939315478598592, "grad_norm": 0.6659533381462097, "learning_rate": 2.8044425404050166e-10, "loss": 0.0274, "step": 246430 }, { "epoch": 1.994012460555061, "grad_norm": 0.6305739283561707, "learning_rate": 2.730156649610738e-10, "loss": 0.0277, "step": 246440 }, { "epoch": 1.994093373250263, "grad_norm": 0.35399338603019714, "learning_rate": 2.656867848815914e-10, "loss": 0.0114, "step": 246450 }, { "epoch": 1.9941742859454648, "grad_norm": 0.12575387954711914, "learning_rate": 2.584576139480488e-10, "loss": 0.0109, "step": 246460 }, { "epoch": 1.9942551986406667, "grad_norm": 0.467842161655426, "learning_rate": 2.5132815230421994e-10, "loss": 0.0191, "step": 246470 }, { "epoch": 1.9943361113358686, "grad_norm": 0.2897701859474182, "learning_rate": 2.4429840009332353e-10, "loss": 0.0145, "step": 246480 }, { "epoch": 1.9944170240310704, "grad_norm": 0.3182433545589447, "learning_rate": 2.373683574546926e-10, "loss": 0.0126, "step": 246490 }, { "epoch": 1.9944979367262725, "grad_norm": 0.25922757387161255, "learning_rate": 2.3053802452654984e-10, "loss": 0.0169, "step": 246500 }, { "epoch": 1.9945788494214742, "grad_norm": 0.3405058979988098, "learning_rate": 2.2380740144545275e-10, "loss": 0.0162, "step": 246510 }, { "epoch": 1.994659762116676, "grad_norm": 0.3306869864463806, "learning_rate": 2.1717648834573834e-10, "loss": 0.0137, "step": 246520 }, { "epoch": 1.9947406748118781, "grad_norm": 0.47039979696273804, "learning_rate": 2.1064528535952312e-10, "loss": 0.0189, "step": 246530 }, { "epoch": 1.9948215875070798, "grad_norm": 0.28511226177215576, "learning_rate": 2.0421379261670316e-10, "loss": 0.0172, "step": 246540 }, { "epoch": 1.9949025002022818, "grad_norm": 0.2208896428346634, "learning_rate": 1.9788201024606434e-10, "loss": 0.0162, "step": 246550 }, { "epoch": 1.9949834128974837, "grad_norm": 0.2160096913576126, "learning_rate": 1.916499383730619e-10, "loss": 0.0136, "step": 246560 }, { "epoch": 1.9950643255926854, "grad_norm": 0.284795880317688, "learning_rate": 1.85517577123151e-10, "loss": 0.0181, "step": 246570 }, { "epoch": 1.9951452382878874, "grad_norm": 0.4517786502838135, "learning_rate": 1.7948492661790107e-10, "loss": 0.0154, "step": 246580 }, { "epoch": 1.9952261509830893, "grad_norm": 0.41497018933296204, "learning_rate": 1.735519869772162e-10, "loss": 0.0209, "step": 246590 }, { "epoch": 1.995307063678291, "grad_norm": 0.26517829298973083, "learning_rate": 1.6771875832044539e-10, "loss": 0.0137, "step": 246600 }, { "epoch": 1.995387976373493, "grad_norm": 0.7944148182868958, "learning_rate": 1.6198524076305177e-10, "loss": 0.028, "step": 246610 }, { "epoch": 1.995468889068695, "grad_norm": 0.34677839279174805, "learning_rate": 1.563514344199435e-10, "loss": 0.02, "step": 246620 }, { "epoch": 1.9955498017638966, "grad_norm": 0.611608624458313, "learning_rate": 1.5081733940380817e-10, "loss": 0.0146, "step": 246630 }, { "epoch": 1.9956307144590988, "grad_norm": 0.2759859263896942, "learning_rate": 1.4538295582344762e-10, "loss": 0.0224, "step": 246640 }, { "epoch": 1.9957116271543005, "grad_norm": 0.013456554152071476, "learning_rate": 1.40048283788774e-10, "loss": 0.0159, "step": 246650 }, { "epoch": 1.9957925398495022, "grad_norm": 0.508303165435791, "learning_rate": 1.3481332340581355e-10, "loss": 0.0204, "step": 246660 }, { "epoch": 1.9958734525447044, "grad_norm": 0.228586807847023, "learning_rate": 1.2967807477892724e-10, "loss": 0.0098, "step": 246670 }, { "epoch": 1.995954365239906, "grad_norm": 0.7715239524841309, "learning_rate": 1.246425380097005e-10, "loss": 0.0275, "step": 246680 }, { "epoch": 1.996035277935108, "grad_norm": 0.265023410320282, "learning_rate": 1.1970671320027382e-10, "loss": 0.0211, "step": 246690 }, { "epoch": 1.99611619063031, "grad_norm": 0.6082337498664856, "learning_rate": 1.1487060044723664e-10, "loss": 0.021, "step": 246700 }, { "epoch": 1.9961971033255117, "grad_norm": 0.25668948888778687, "learning_rate": 1.1013419984828854e-10, "loss": 0.0113, "step": 246710 }, { "epoch": 1.9962780160207136, "grad_norm": 0.38126030564308167, "learning_rate": 1.0549751149724341e-10, "loss": 0.015, "step": 246720 }, { "epoch": 1.9963589287159156, "grad_norm": 0.3349926173686981, "learning_rate": 1.0096053548680485e-10, "loss": 0.0201, "step": 246730 }, { "epoch": 1.9964398414111173, "grad_norm": 0.36557433009147644, "learning_rate": 9.652327190801114e-11, "loss": 0.023, "step": 246740 }, { "epoch": 1.9965207541063192, "grad_norm": 0.17278684675693512, "learning_rate": 9.21857208480148e-11, "loss": 0.0264, "step": 246750 }, { "epoch": 1.9966016668015212, "grad_norm": 0.17663492262363434, "learning_rate": 8.794788239452345e-11, "loss": 0.0232, "step": 246760 }, { "epoch": 1.996682579496723, "grad_norm": 0.36911043524742126, "learning_rate": 8.380975663135893e-11, "loss": 0.0157, "step": 246770 }, { "epoch": 1.996763492191925, "grad_norm": 0.3882579803466797, "learning_rate": 7.977134364178796e-11, "loss": 0.0174, "step": 246780 }, { "epoch": 1.9968444048871268, "grad_norm": 0.13166543841362, "learning_rate": 7.58326435057466e-11, "loss": 0.0108, "step": 246790 }, { "epoch": 1.9969253175823287, "grad_norm": 0.22130583226680756, "learning_rate": 7.199365630150557e-11, "loss": 0.0226, "step": 246800 }, { "epoch": 1.9970062302775307, "grad_norm": 0.5950666666030884, "learning_rate": 6.825438210622536e-11, "loss": 0.0147, "step": 246810 }, { "epoch": 1.9970871429727324, "grad_norm": 0.2467559427022934, "learning_rate": 6.461482099429095e-11, "loss": 0.0156, "step": 246820 }, { "epoch": 1.9971680556679343, "grad_norm": 0.6484383940696716, "learning_rate": 6.107497303842192e-11, "loss": 0.0219, "step": 246830 }, { "epoch": 1.9972489683631363, "grad_norm": 0.0298758614808321, "learning_rate": 5.763483830911743e-11, "loss": 0.0195, "step": 246840 }, { "epoch": 1.997329881058338, "grad_norm": 0.6223955154418945, "learning_rate": 5.4294416874656195e-11, "loss": 0.0351, "step": 246850 }, { "epoch": 1.99741079375354, "grad_norm": 0.4903997480869293, "learning_rate": 5.1053708802206727e-11, "loss": 0.0134, "step": 246860 }, { "epoch": 1.9974917064487419, "grad_norm": 0.18472500145435333, "learning_rate": 4.7912714156161944e-11, "loss": 0.0173, "step": 246870 }, { "epoch": 1.9975726191439436, "grad_norm": 0.4393630921840668, "learning_rate": 4.487143299924945e-11, "loss": 0.0242, "step": 246880 }, { "epoch": 1.9976535318391455, "grad_norm": 0.5189626216888428, "learning_rate": 4.19298653914213e-11, "loss": 0.0151, "step": 246890 }, { "epoch": 1.9977344445343475, "grad_norm": 0.4911949336528778, "learning_rate": 3.908801139207441e-11, "loss": 0.0221, "step": 246900 }, { "epoch": 1.9978153572295492, "grad_norm": 0.22949077188968658, "learning_rate": 3.6345871058385274e-11, "loss": 0.0149, "step": 246910 }, { "epoch": 1.9978962699247513, "grad_norm": 0.3042394816875458, "learning_rate": 3.37034444436446e-11, "loss": 0.0173, "step": 246920 }, { "epoch": 1.997977182619953, "grad_norm": 0.5448695421218872, "learning_rate": 3.116073160169819e-11, "loss": 0.0208, "step": 246930 }, { "epoch": 1.998058095315155, "grad_norm": 0.42889025807380676, "learning_rate": 2.8717732582506098e-11, "loss": 0.0217, "step": 246940 }, { "epoch": 1.998139008010357, "grad_norm": 0.376738041639328, "learning_rate": 2.6374447435473237e-11, "loss": 0.0105, "step": 246950 }, { "epoch": 1.9982199207055586, "grad_norm": 0.8397945165634155, "learning_rate": 2.413087620667387e-11, "loss": 0.024, "step": 246960 }, { "epoch": 1.9983008334007606, "grad_norm": 0.196078822016716, "learning_rate": 2.1987018941627135e-11, "loss": 0.0173, "step": 246970 }, { "epoch": 1.9983817460959625, "grad_norm": 0.5879111886024475, "learning_rate": 1.9942875682521512e-11, "loss": 0.0286, "step": 246980 }, { "epoch": 1.9984626587911642, "grad_norm": 0.16509199142456055, "learning_rate": 1.7998446469880138e-11, "loss": 0.0152, "step": 246990 }, { "epoch": 1.9985435714863662, "grad_norm": 0.35671865940093994, "learning_rate": 1.615373134311593e-11, "loss": 0.0125, "step": 247000 }, { "epoch": 1.9986244841815681, "grad_norm": 0.31835418939590454, "learning_rate": 1.4408730338311139e-11, "loss": 0.017, "step": 247010 }, { "epoch": 1.9987053968767698, "grad_norm": 0.35233697295188904, "learning_rate": 1.27634434909929e-11, "loss": 0.0189, "step": 247020 }, { "epoch": 1.998786309571972, "grad_norm": 0.6503198742866516, "learning_rate": 1.1217870833912791e-11, "loss": 0.0236, "step": 247030 }, { "epoch": 1.9988672222671737, "grad_norm": 0.12692949175834656, "learning_rate": 9.772012397046837e-12, "loss": 0.0115, "step": 247040 }, { "epoch": 1.9989481349623754, "grad_norm": 0.386295348405838, "learning_rate": 8.425868210371058e-12, "loss": 0.0159, "step": 247050 }, { "epoch": 1.9990290476575776, "grad_norm": 0.25442469120025635, "learning_rate": 7.179438299975694e-12, "loss": 0.0121, "step": 247060 }, { "epoch": 1.9991099603527793, "grad_norm": 0.47224125266075134, "learning_rate": 6.0327226908407645e-12, "loss": 0.0224, "step": 247070 }, { "epoch": 1.9991908730479813, "grad_norm": 0.8080037236213684, "learning_rate": 4.985721406280952e-12, "loss": 0.0081, "step": 247080 }, { "epoch": 1.9992717857431832, "grad_norm": 0.444012314081192, "learning_rate": 4.038434466835384e-12, "loss": 0.0155, "step": 247090 }, { "epoch": 1.999352698438385, "grad_norm": 0.39353787899017334, "learning_rate": 3.1908618908227384e-12, "loss": 0.0149, "step": 247100 }, { "epoch": 1.9994336111335869, "grad_norm": 0.3516027331352234, "learning_rate": 2.443003696006585e-12, "loss": 0.0235, "step": 247110 }, { "epoch": 1.9995145238287888, "grad_norm": 0.7125397324562073, "learning_rate": 1.7948598973749343e-12, "loss": 0.0166, "step": 247120 }, { "epoch": 1.9995954365239905, "grad_norm": 0.1502450406551361, "learning_rate": 1.2464305071402395e-12, "loss": 0.0161, "step": 247130 }, { "epoch": 1.9996763492191925, "grad_norm": 0.08991529792547226, "learning_rate": 7.977155364047307e-13, "loss": 0.0099, "step": 247140 }, { "epoch": 1.9997572619143944, "grad_norm": 0.6391549706459045, "learning_rate": 4.4871499460530376e-13, "loss": 0.0143, "step": 247150 }, { "epoch": 1.9998381746095961, "grad_norm": 0.1527327001094818, "learning_rate": 1.9942888784818536e-13, "loss": 0.008, "step": 247160 }, { "epoch": 1.9999190873047983, "grad_norm": 0.052670933306217194, "learning_rate": 4.98572222396021e-14, "loss": 0.0161, "step": 247170 }, { "epoch": 2.0, "grad_norm": 0.27219924330711365, "learning_rate": 0.0, "loss": 0.009, "step": 247180 }, { "epoch": 2.0, "step": 247180, "total_flos": 1.1570533976082743e+19, "train_loss": 0.03249076200670694, "train_runtime": 108704.5915, "train_samples_per_second": 9.095, "train_steps_per_second": 2.274 } ], "logging_steps": 10, "max_steps": 247180, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1570533976082743e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }