{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9963614702236248, "eval_steps": 500, "global_step": 31000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000643987571039879, "grad_norm": 2.7412095069885254, "learning_rate": 0.00019995706680405291, "loss": 1.8998, "step": 10 }, { "epoch": 0.001287975142079758, "grad_norm": 0.5238905549049377, "learning_rate": 0.0001999141336081058, "loss": 0.2837, "step": 20 }, { "epoch": 0.0019319627131196369, "grad_norm": 0.25239691138267517, "learning_rate": 0.0001998712004121587, "loss": 0.2159, "step": 30 }, { "epoch": 0.002575950284159516, "grad_norm": 0.3675299882888794, "learning_rate": 0.0001998282672162116, "loss": 0.2081, "step": 40 }, { "epoch": 0.0032199378551993948, "grad_norm": 0.25016647577285767, "learning_rate": 0.00019978533402026448, "loss": 0.2023, "step": 50 }, { "epoch": 0.0038639254262392737, "grad_norm": 0.2116996794939041, "learning_rate": 0.00019974240082431735, "loss": 0.2076, "step": 60 }, { "epoch": 0.004507912997279153, "grad_norm": 0.3199557065963745, "learning_rate": 0.00019969946762837026, "loss": 0.203, "step": 70 }, { "epoch": 0.005151900568319032, "grad_norm": 0.22560523450374603, "learning_rate": 0.00019965653443242316, "loss": 0.1968, "step": 80 }, { "epoch": 0.005795888139358911, "grad_norm": 0.17416805028915405, "learning_rate": 0.00019961360123647607, "loss": 0.1954, "step": 90 }, { "epoch": 0.0064398757103987896, "grad_norm": 0.1828640103340149, "learning_rate": 0.00019957066804052894, "loss": 0.2007, "step": 100 }, { "epoch": 0.0070838632814386685, "grad_norm": 0.18520274758338928, "learning_rate": 0.00019952773484458185, "loss": 0.2055, "step": 110 }, { "epoch": 0.0077278508524785475, "grad_norm": 0.1818382889032364, "learning_rate": 0.00019948480164863475, "loss": 0.2006, "step": 120 }, { "epoch": 0.008371838423518426, "grad_norm": 0.1857714205980301, "learning_rate": 0.00019944186845268763, "loss": 0.1979, "step": 130 }, { "epoch": 0.009015825994558305, "grad_norm": 0.18299847841262817, "learning_rate": 0.0001993989352567405, "loss": 0.1989, "step": 140 }, { "epoch": 0.009659813565598184, "grad_norm": 0.17529398202896118, "learning_rate": 0.0001993560020607934, "loss": 0.2035, "step": 150 }, { "epoch": 0.010303801136638063, "grad_norm": 0.168183833360672, "learning_rate": 0.00019931306886484631, "loss": 0.1917, "step": 160 }, { "epoch": 0.010947788707677942, "grad_norm": 0.15697088837623596, "learning_rate": 0.0001992701356688992, "loss": 0.1885, "step": 170 }, { "epoch": 0.011591776278717821, "grad_norm": 0.1728343963623047, "learning_rate": 0.0001992272024729521, "loss": 0.1915, "step": 180 }, { "epoch": 0.0122357638497577, "grad_norm": 0.14803245663642883, "learning_rate": 0.000199184269277005, "loss": 0.1985, "step": 190 }, { "epoch": 0.012879751420797579, "grad_norm": 0.1496073454618454, "learning_rate": 0.0001991413360810579, "loss": 0.1948, "step": 200 }, { "epoch": 0.013523738991837458, "grad_norm": 0.16709764301776886, "learning_rate": 0.00019909840288511078, "loss": 0.1976, "step": 210 }, { "epoch": 0.014167726562877337, "grad_norm": 0.1576743870973587, "learning_rate": 0.00019905546968916366, "loss": 0.1931, "step": 220 }, { "epoch": 0.014811714133917216, "grad_norm": 0.1579398214817047, "learning_rate": 0.00019901253649321656, "loss": 0.1893, "step": 230 }, { "epoch": 0.015455701704957095, "grad_norm": 0.16362576186656952, "learning_rate": 0.00019896960329726947, "loss": 0.1933, "step": 240 }, { "epoch": 0.016099689275996972, "grad_norm": 0.15291285514831543, "learning_rate": 0.00019892667010132234, "loss": 0.1918, "step": 250 }, { "epoch": 0.016743676847036853, "grad_norm": 0.1412012279033661, "learning_rate": 0.00019888373690537525, "loss": 0.1962, "step": 260 }, { "epoch": 0.01738766441807673, "grad_norm": 0.15624891221523285, "learning_rate": 0.00019884080370942815, "loss": 0.1903, "step": 270 }, { "epoch": 0.01803165198911661, "grad_norm": 0.16451551020145416, "learning_rate": 0.00019879787051348106, "loss": 0.1917, "step": 280 }, { "epoch": 0.018675639560156488, "grad_norm": 0.15273632109165192, "learning_rate": 0.0001987549373175339, "loss": 0.1838, "step": 290 }, { "epoch": 0.01931962713119637, "grad_norm": 0.12786222994327545, "learning_rate": 0.0001987120041215868, "loss": 0.1867, "step": 300 }, { "epoch": 0.019963614702236246, "grad_norm": 0.15592075884342194, "learning_rate": 0.00019866907092563972, "loss": 0.1936, "step": 310 }, { "epoch": 0.020607602273276127, "grad_norm": 0.16774752736091614, "learning_rate": 0.00019862613772969262, "loss": 0.1935, "step": 320 }, { "epoch": 0.021251589844316004, "grad_norm": 0.1367022842168808, "learning_rate": 0.0001985832045337455, "loss": 0.1914, "step": 330 }, { "epoch": 0.021895577415355885, "grad_norm": 0.14718040823936462, "learning_rate": 0.0001985402713377984, "loss": 0.1915, "step": 340 }, { "epoch": 0.02253956498639576, "grad_norm": 0.1525706648826599, "learning_rate": 0.0001984973381418513, "loss": 0.1942, "step": 350 }, { "epoch": 0.023183552557435642, "grad_norm": 0.14309810101985931, "learning_rate": 0.00019845440494590418, "loss": 0.1912, "step": 360 }, { "epoch": 0.02382754012847552, "grad_norm": 0.13245372474193573, "learning_rate": 0.00019841147174995706, "loss": 0.1894, "step": 370 }, { "epoch": 0.0244715276995154, "grad_norm": 0.13465426862239838, "learning_rate": 0.00019836853855400996, "loss": 0.2004, "step": 380 }, { "epoch": 0.025115515270555278, "grad_norm": 0.1574591100215912, "learning_rate": 0.00019832560535806287, "loss": 0.1928, "step": 390 }, { "epoch": 0.025759502841595158, "grad_norm": 0.12459868937730789, "learning_rate": 0.00019828267216211574, "loss": 0.1829, "step": 400 }, { "epoch": 0.026403490412635035, "grad_norm": 0.15623831748962402, "learning_rate": 0.00019823973896616865, "loss": 0.192, "step": 410 }, { "epoch": 0.027047477983674916, "grad_norm": 0.14335164427757263, "learning_rate": 0.00019819680577022155, "loss": 0.1931, "step": 420 }, { "epoch": 0.027691465554714793, "grad_norm": 0.13950879871845245, "learning_rate": 0.00019815387257427446, "loss": 0.1883, "step": 430 }, { "epoch": 0.028335453125754674, "grad_norm": 0.13386070728302002, "learning_rate": 0.00019811093937832733, "loss": 0.1959, "step": 440 }, { "epoch": 0.02897944069679455, "grad_norm": 0.14737282693386078, "learning_rate": 0.0001980680061823802, "loss": 0.1948, "step": 450 }, { "epoch": 0.029623428267834432, "grad_norm": 0.15332983434200287, "learning_rate": 0.00019802507298643312, "loss": 0.1926, "step": 460 }, { "epoch": 0.03026741583887431, "grad_norm": 0.12397896498441696, "learning_rate": 0.00019798213979048602, "loss": 0.1926, "step": 470 }, { "epoch": 0.03091140340991419, "grad_norm": 0.13465717434883118, "learning_rate": 0.0001979392065945389, "loss": 0.1936, "step": 480 }, { "epoch": 0.03155539098095407, "grad_norm": 0.1298041194677353, "learning_rate": 0.0001978962733985918, "loss": 0.1925, "step": 490 }, { "epoch": 0.032199378551993944, "grad_norm": 0.15924254059791565, "learning_rate": 0.0001978533402026447, "loss": 0.1885, "step": 500 }, { "epoch": 0.032843366123033825, "grad_norm": 0.13485732674598694, "learning_rate": 0.00019781040700669758, "loss": 0.1903, "step": 510 }, { "epoch": 0.033487353694073706, "grad_norm": 0.1586502343416214, "learning_rate": 0.0001977674738107505, "loss": 0.1952, "step": 520 }, { "epoch": 0.034131341265113586, "grad_norm": 0.12863652408123016, "learning_rate": 0.00019772454061480336, "loss": 0.1945, "step": 530 }, { "epoch": 0.03477532883615346, "grad_norm": 0.14454385638237, "learning_rate": 0.00019768160741885627, "loss": 0.1917, "step": 540 }, { "epoch": 0.03541931640719334, "grad_norm": 0.1240016520023346, "learning_rate": 0.00019763867422290917, "loss": 0.1842, "step": 550 }, { "epoch": 0.03606330397823322, "grad_norm": 0.15586091578006744, "learning_rate": 0.00019759574102696205, "loss": 0.1852, "step": 560 }, { "epoch": 0.0367072915492731, "grad_norm": 0.11573415249586105, "learning_rate": 0.00019755280783101495, "loss": 0.1934, "step": 570 }, { "epoch": 0.037351279120312976, "grad_norm": 0.14882509410381317, "learning_rate": 0.00019750987463506786, "loss": 0.1954, "step": 580 }, { "epoch": 0.03799526669135286, "grad_norm": 0.11247146129608154, "learning_rate": 0.00019746694143912074, "loss": 0.1922, "step": 590 }, { "epoch": 0.03863925426239274, "grad_norm": 0.15223833918571472, "learning_rate": 0.00019742400824317364, "loss": 0.1895, "step": 600 }, { "epoch": 0.03928324183343262, "grad_norm": 0.1385798156261444, "learning_rate": 0.00019738107504722652, "loss": 0.1939, "step": 610 }, { "epoch": 0.03992722940447249, "grad_norm": 0.13057337701320648, "learning_rate": 0.00019733814185127942, "loss": 0.1928, "step": 620 }, { "epoch": 0.04057121697551237, "grad_norm": 0.14241348206996918, "learning_rate": 0.0001972952086553323, "loss": 0.1903, "step": 630 }, { "epoch": 0.04121520454655225, "grad_norm": 0.15680907666683197, "learning_rate": 0.0001972522754593852, "loss": 0.1849, "step": 640 }, { "epoch": 0.041859192117592134, "grad_norm": 0.13233844935894012, "learning_rate": 0.0001972093422634381, "loss": 0.1916, "step": 650 }, { "epoch": 0.04250317968863201, "grad_norm": 0.12605097889900208, "learning_rate": 0.000197166409067491, "loss": 0.1926, "step": 660 }, { "epoch": 0.04314716725967189, "grad_norm": 0.13506381213665009, "learning_rate": 0.0001971234758715439, "loss": 0.1813, "step": 670 }, { "epoch": 0.04379115483071177, "grad_norm": 0.11859085410833359, "learning_rate": 0.0001970805426755968, "loss": 0.1868, "step": 680 }, { "epoch": 0.04443514240175164, "grad_norm": 0.14204798638820648, "learning_rate": 0.00019703760947964967, "loss": 0.198, "step": 690 }, { "epoch": 0.04507912997279152, "grad_norm": 0.1160082295536995, "learning_rate": 0.00019699467628370257, "loss": 0.1844, "step": 700 }, { "epoch": 0.045723117543831404, "grad_norm": 0.15759636461734772, "learning_rate": 0.00019695174308775545, "loss": 0.1877, "step": 710 }, { "epoch": 0.046367105114871285, "grad_norm": 0.1493363380432129, "learning_rate": 0.00019690880989180835, "loss": 0.1893, "step": 720 }, { "epoch": 0.04701109268591116, "grad_norm": 0.13622109591960907, "learning_rate": 0.00019686587669586126, "loss": 0.1897, "step": 730 }, { "epoch": 0.04765508025695104, "grad_norm": 0.15356650948524475, "learning_rate": 0.00019682294349991414, "loss": 0.1898, "step": 740 }, { "epoch": 0.04829906782799092, "grad_norm": 0.13751520216464996, "learning_rate": 0.00019678001030396704, "loss": 0.191, "step": 750 }, { "epoch": 0.0489430553990308, "grad_norm": 0.12410351634025574, "learning_rate": 0.00019673707710801994, "loss": 0.1982, "step": 760 }, { "epoch": 0.049587042970070674, "grad_norm": 0.13539336621761322, "learning_rate": 0.00019669414391207282, "loss": 0.1858, "step": 770 }, { "epoch": 0.050231030541110555, "grad_norm": 0.15427105128765106, "learning_rate": 0.0001966512107161257, "loss": 0.1873, "step": 780 }, { "epoch": 0.050875018112150436, "grad_norm": 0.1294224113225937, "learning_rate": 0.0001966082775201786, "loss": 0.1866, "step": 790 }, { "epoch": 0.051519005683190316, "grad_norm": 0.13548098504543304, "learning_rate": 0.0001965653443242315, "loss": 0.1895, "step": 800 }, { "epoch": 0.05216299325423019, "grad_norm": 0.1378280073404312, "learning_rate": 0.0001965224111282844, "loss": 0.1925, "step": 810 }, { "epoch": 0.05280698082527007, "grad_norm": 0.1243923082947731, "learning_rate": 0.0001964794779323373, "loss": 0.1912, "step": 820 }, { "epoch": 0.05345096839630995, "grad_norm": 0.13750891387462616, "learning_rate": 0.0001964365447363902, "loss": 0.1873, "step": 830 }, { "epoch": 0.05409495596734983, "grad_norm": 0.12474951148033142, "learning_rate": 0.0001963936115404431, "loss": 0.1818, "step": 840 }, { "epoch": 0.054738943538389706, "grad_norm": 0.11818917095661163, "learning_rate": 0.00019635067834449597, "loss": 0.1884, "step": 850 }, { "epoch": 0.05538293110942959, "grad_norm": 0.12633201479911804, "learning_rate": 0.00019630774514854885, "loss": 0.1852, "step": 860 }, { "epoch": 0.05602691868046947, "grad_norm": 0.12080872058868408, "learning_rate": 0.00019626481195260176, "loss": 0.1913, "step": 870 }, { "epoch": 0.05667090625150935, "grad_norm": 0.13261809945106506, "learning_rate": 0.00019622187875665466, "loss": 0.1881, "step": 880 }, { "epoch": 0.05731489382254922, "grad_norm": 0.1371004432439804, "learning_rate": 0.00019617894556070754, "loss": 0.1866, "step": 890 }, { "epoch": 0.0579588813935891, "grad_norm": 0.12731194496154785, "learning_rate": 0.00019613601236476044, "loss": 0.1873, "step": 900 }, { "epoch": 0.05860286896462898, "grad_norm": 0.1414366513490677, "learning_rate": 0.00019609307916881335, "loss": 0.194, "step": 910 }, { "epoch": 0.059246856535668864, "grad_norm": 0.12785807251930237, "learning_rate": 0.00019605014597286625, "loss": 0.1871, "step": 920 }, { "epoch": 0.05989084410670874, "grad_norm": 0.15242384374141693, "learning_rate": 0.00019600721277691913, "loss": 0.1942, "step": 930 }, { "epoch": 0.06053483167774862, "grad_norm": 0.12848378717899323, "learning_rate": 0.000195964279580972, "loss": 0.1832, "step": 940 }, { "epoch": 0.0611788192487885, "grad_norm": 0.11843331158161163, "learning_rate": 0.0001959213463850249, "loss": 0.1953, "step": 950 }, { "epoch": 0.06182280681982838, "grad_norm": 0.1435762196779251, "learning_rate": 0.0001958784131890778, "loss": 0.1906, "step": 960 }, { "epoch": 0.062466794390868254, "grad_norm": 0.1540411114692688, "learning_rate": 0.0001958354799931307, "loss": 0.1913, "step": 970 }, { "epoch": 0.06311078196190814, "grad_norm": 0.11884116381406784, "learning_rate": 0.0001957925467971836, "loss": 0.1904, "step": 980 }, { "epoch": 0.06375476953294801, "grad_norm": 0.13739290833473206, "learning_rate": 0.0001957496136012365, "loss": 0.1894, "step": 990 }, { "epoch": 0.06439875710398789, "grad_norm": 0.13768114149570465, "learning_rate": 0.0001957066804052894, "loss": 0.1829, "step": 1000 }, { "epoch": 0.06504274467502777, "grad_norm": 0.148823544383049, "learning_rate": 0.00019566374720934225, "loss": 0.1861, "step": 1010 }, { "epoch": 0.06568673224606765, "grad_norm": 0.14453282952308655, "learning_rate": 0.00019562081401339516, "loss": 0.1875, "step": 1020 }, { "epoch": 0.06633071981710753, "grad_norm": 0.12290053814649582, "learning_rate": 0.00019557788081744806, "loss": 0.1913, "step": 1030 }, { "epoch": 0.06697470738814741, "grad_norm": 0.12319938838481903, "learning_rate": 0.00019553494762150096, "loss": 0.1883, "step": 1040 }, { "epoch": 0.06761869495918729, "grad_norm": 0.1291642040014267, "learning_rate": 0.00019549201442555384, "loss": 0.1905, "step": 1050 }, { "epoch": 0.06826268253022717, "grad_norm": 0.12210606038570404, "learning_rate": 0.00019544908122960675, "loss": 0.1826, "step": 1060 }, { "epoch": 0.06890667010126704, "grad_norm": 0.14755992591381073, "learning_rate": 0.00019540614803365965, "loss": 0.1844, "step": 1070 }, { "epoch": 0.06955065767230692, "grad_norm": 0.11359313875436783, "learning_rate": 0.00019536321483771253, "loss": 0.1898, "step": 1080 }, { "epoch": 0.0701946452433468, "grad_norm": 0.12423041462898254, "learning_rate": 0.0001953202816417654, "loss": 0.187, "step": 1090 }, { "epoch": 0.07083863281438668, "grad_norm": 0.1241864487528801, "learning_rate": 0.0001952773484458183, "loss": 0.1819, "step": 1100 }, { "epoch": 0.07148262038542656, "grad_norm": 0.11481311917304993, "learning_rate": 0.0001952344152498712, "loss": 0.1868, "step": 1110 }, { "epoch": 0.07212660795646644, "grad_norm": 0.13524523377418518, "learning_rate": 0.0001951914820539241, "loss": 0.1944, "step": 1120 }, { "epoch": 0.07277059552750632, "grad_norm": 0.12383612245321274, "learning_rate": 0.000195148548857977, "loss": 0.1929, "step": 1130 }, { "epoch": 0.0734145830985462, "grad_norm": 0.13220486044883728, "learning_rate": 0.0001951056156620299, "loss": 0.1926, "step": 1140 }, { "epoch": 0.07405857066958607, "grad_norm": 0.12388086318969727, "learning_rate": 0.0001950626824660828, "loss": 0.1893, "step": 1150 }, { "epoch": 0.07470255824062595, "grad_norm": 0.12133756279945374, "learning_rate": 0.00019501974927013568, "loss": 0.1936, "step": 1160 }, { "epoch": 0.07534654581166583, "grad_norm": 0.13118097186088562, "learning_rate": 0.00019497681607418856, "loss": 0.1852, "step": 1170 }, { "epoch": 0.07599053338270571, "grad_norm": 0.1276014745235443, "learning_rate": 0.00019493388287824146, "loss": 0.1861, "step": 1180 }, { "epoch": 0.0766345209537456, "grad_norm": 0.13258102536201477, "learning_rate": 0.00019489094968229437, "loss": 0.1827, "step": 1190 }, { "epoch": 0.07727850852478547, "grad_norm": 0.1188349649310112, "learning_rate": 0.00019484801648634724, "loss": 0.181, "step": 1200 }, { "epoch": 0.07792249609582536, "grad_norm": 0.1269504725933075, "learning_rate": 0.00019480508329040015, "loss": 0.1862, "step": 1210 }, { "epoch": 0.07856648366686524, "grad_norm": 0.12257451564073563, "learning_rate": 0.00019476215009445305, "loss": 0.1938, "step": 1220 }, { "epoch": 0.0792104712379051, "grad_norm": 0.13650836050510406, "learning_rate": 0.00019471921689850593, "loss": 0.1879, "step": 1230 }, { "epoch": 0.07985445880894498, "grad_norm": 0.14198192954063416, "learning_rate": 0.00019467628370255883, "loss": 0.1986, "step": 1240 }, { "epoch": 0.08049844637998486, "grad_norm": 0.1368267685174942, "learning_rate": 0.0001946333505066117, "loss": 0.1877, "step": 1250 }, { "epoch": 0.08114243395102474, "grad_norm": 0.12303021550178528, "learning_rate": 0.0001945904173106646, "loss": 0.1919, "step": 1260 }, { "epoch": 0.08178642152206463, "grad_norm": 0.13155768811702728, "learning_rate": 0.00019454748411471752, "loss": 0.1895, "step": 1270 }, { "epoch": 0.0824304090931045, "grad_norm": 0.12290187925100327, "learning_rate": 0.0001945045509187704, "loss": 0.1876, "step": 1280 }, { "epoch": 0.08307439666414439, "grad_norm": 0.11622683703899384, "learning_rate": 0.0001944616177228233, "loss": 0.1964, "step": 1290 }, { "epoch": 0.08371838423518427, "grad_norm": 0.12428302317857742, "learning_rate": 0.0001944186845268762, "loss": 0.191, "step": 1300 }, { "epoch": 0.08436237180622413, "grad_norm": 0.11611688137054443, "learning_rate": 0.00019437575133092908, "loss": 0.1936, "step": 1310 }, { "epoch": 0.08500635937726402, "grad_norm": 0.13114310801029205, "learning_rate": 0.00019433281813498198, "loss": 0.1899, "step": 1320 }, { "epoch": 0.0856503469483039, "grad_norm": 0.17882029712200165, "learning_rate": 0.00019428988493903486, "loss": 0.1884, "step": 1330 }, { "epoch": 0.08629433451934378, "grad_norm": 0.143156960606575, "learning_rate": 0.00019424695174308777, "loss": 0.194, "step": 1340 }, { "epoch": 0.08693832209038366, "grad_norm": 0.1135677844285965, "learning_rate": 0.00019420401854714064, "loss": 0.1918, "step": 1350 }, { "epoch": 0.08758230966142354, "grad_norm": 0.10851840674877167, "learning_rate": 0.00019416108535119355, "loss": 0.1845, "step": 1360 }, { "epoch": 0.08822629723246342, "grad_norm": 0.1202281191945076, "learning_rate": 0.00019411815215524645, "loss": 0.1863, "step": 1370 }, { "epoch": 0.08887028480350329, "grad_norm": 0.13040275871753693, "learning_rate": 0.00019407521895929936, "loss": 0.1925, "step": 1380 }, { "epoch": 0.08951427237454317, "grad_norm": 0.12484655529260635, "learning_rate": 0.00019403228576335223, "loss": 0.1857, "step": 1390 }, { "epoch": 0.09015825994558305, "grad_norm": 0.15478584170341492, "learning_rate": 0.00019398935256740514, "loss": 0.1895, "step": 1400 }, { "epoch": 0.09080224751662293, "grad_norm": 0.1312055140733719, "learning_rate": 0.00019394641937145801, "loss": 0.1913, "step": 1410 }, { "epoch": 0.09144623508766281, "grad_norm": 0.12741465866565704, "learning_rate": 0.00019390348617551092, "loss": 0.184, "step": 1420 }, { "epoch": 0.09209022265870269, "grad_norm": 0.12442752718925476, "learning_rate": 0.0001938605529795638, "loss": 0.1866, "step": 1430 }, { "epoch": 0.09273421022974257, "grad_norm": 0.11229942739009857, "learning_rate": 0.0001938176197836167, "loss": 0.1995, "step": 1440 }, { "epoch": 0.09337819780078245, "grad_norm": 0.11654070019721985, "learning_rate": 0.0001937746865876696, "loss": 0.1921, "step": 1450 }, { "epoch": 0.09402218537182232, "grad_norm": 0.14681275188922882, "learning_rate": 0.00019373175339172248, "loss": 0.1925, "step": 1460 }, { "epoch": 0.0946661729428622, "grad_norm": 0.11613903194665909, "learning_rate": 0.00019368882019577539, "loss": 0.1856, "step": 1470 }, { "epoch": 0.09531016051390208, "grad_norm": 0.12478963285684586, "learning_rate": 0.0001936458869998283, "loss": 0.1954, "step": 1480 }, { "epoch": 0.09595414808494196, "grad_norm": 0.13673624396324158, "learning_rate": 0.00019360295380388117, "loss": 0.1867, "step": 1490 }, { "epoch": 0.09659813565598184, "grad_norm": 0.12692295014858246, "learning_rate": 0.00019356002060793404, "loss": 0.1906, "step": 1500 }, { "epoch": 0.09724212322702172, "grad_norm": 0.13169437646865845, "learning_rate": 0.00019351708741198695, "loss": 0.1856, "step": 1510 }, { "epoch": 0.0978861107980616, "grad_norm": 0.1501220315694809, "learning_rate": 0.00019347415421603985, "loss": 0.1823, "step": 1520 }, { "epoch": 0.09853009836910148, "grad_norm": 0.10876377671957016, "learning_rate": 0.00019343122102009276, "loss": 0.1888, "step": 1530 }, { "epoch": 0.09917408594014135, "grad_norm": 0.1202109307050705, "learning_rate": 0.00019338828782414563, "loss": 0.1857, "step": 1540 }, { "epoch": 0.09981807351118123, "grad_norm": 0.1165483295917511, "learning_rate": 0.00019334535462819854, "loss": 0.1891, "step": 1550 }, { "epoch": 0.10046206108222111, "grad_norm": 0.14654968678951263, "learning_rate": 0.00019330242143225144, "loss": 0.1904, "step": 1560 }, { "epoch": 0.10110604865326099, "grad_norm": 0.13336946070194244, "learning_rate": 0.00019325948823630432, "loss": 0.1879, "step": 1570 }, { "epoch": 0.10175003622430087, "grad_norm": 0.1319202333688736, "learning_rate": 0.0001932165550403572, "loss": 0.1839, "step": 1580 }, { "epoch": 0.10239402379534075, "grad_norm": 0.14524778723716736, "learning_rate": 0.0001931736218444101, "loss": 0.1862, "step": 1590 }, { "epoch": 0.10303801136638063, "grad_norm": 0.11064255237579346, "learning_rate": 0.000193130688648463, "loss": 0.1901, "step": 1600 }, { "epoch": 0.10368199893742051, "grad_norm": 0.12047114223241806, "learning_rate": 0.00019308775545251588, "loss": 0.1888, "step": 1610 }, { "epoch": 0.10432598650846038, "grad_norm": 0.13456296920776367, "learning_rate": 0.00019304482225656879, "loss": 0.1898, "step": 1620 }, { "epoch": 0.10496997407950026, "grad_norm": 0.12299712747335434, "learning_rate": 0.0001930018890606217, "loss": 0.1896, "step": 1630 }, { "epoch": 0.10561396165054014, "grad_norm": 0.13578340411186218, "learning_rate": 0.0001929589558646746, "loss": 0.1863, "step": 1640 }, { "epoch": 0.10625794922158002, "grad_norm": 0.13501976430416107, "learning_rate": 0.00019291602266872747, "loss": 0.1891, "step": 1650 }, { "epoch": 0.1069019367926199, "grad_norm": 0.12338942289352417, "learning_rate": 0.00019287308947278035, "loss": 0.1948, "step": 1660 }, { "epoch": 0.10754592436365978, "grad_norm": 0.14314007759094238, "learning_rate": 0.00019283015627683325, "loss": 0.1889, "step": 1670 }, { "epoch": 0.10818991193469966, "grad_norm": 0.14502228796482086, "learning_rate": 0.00019278722308088616, "loss": 0.1799, "step": 1680 }, { "epoch": 0.10883389950573955, "grad_norm": 0.11983921378850937, "learning_rate": 0.00019274428988493903, "loss": 0.1922, "step": 1690 }, { "epoch": 0.10947788707677941, "grad_norm": 0.12542444467544556, "learning_rate": 0.00019270135668899194, "loss": 0.188, "step": 1700 }, { "epoch": 0.11012187464781929, "grad_norm": 0.139082133769989, "learning_rate": 0.00019265842349304484, "loss": 0.1835, "step": 1710 }, { "epoch": 0.11076586221885917, "grad_norm": 0.15210776031017303, "learning_rate": 0.00019261549029709775, "loss": 0.1932, "step": 1720 }, { "epoch": 0.11140984978989905, "grad_norm": 0.12085700035095215, "learning_rate": 0.0001925725571011506, "loss": 0.1891, "step": 1730 }, { "epoch": 0.11205383736093893, "grad_norm": 0.10863097757101059, "learning_rate": 0.0001925296239052035, "loss": 0.1836, "step": 1740 }, { "epoch": 0.11269782493197882, "grad_norm": 0.13292193412780762, "learning_rate": 0.0001924866907092564, "loss": 0.1788, "step": 1750 }, { "epoch": 0.1133418125030187, "grad_norm": 0.14304505288600922, "learning_rate": 0.0001924437575133093, "loss": 0.1934, "step": 1760 }, { "epoch": 0.11398580007405858, "grad_norm": 0.11966352164745331, "learning_rate": 0.0001924008243173622, "loss": 0.1818, "step": 1770 }, { "epoch": 0.11462978764509844, "grad_norm": 0.12158819288015366, "learning_rate": 0.0001923578911214151, "loss": 0.19, "step": 1780 }, { "epoch": 0.11527377521613832, "grad_norm": 0.13564924895763397, "learning_rate": 0.000192314957925468, "loss": 0.194, "step": 1790 }, { "epoch": 0.1159177627871782, "grad_norm": 0.11015324294567108, "learning_rate": 0.00019227202472952087, "loss": 0.1953, "step": 1800 }, { "epoch": 0.11656175035821809, "grad_norm": 0.13461442291736603, "learning_rate": 0.00019222909153357375, "loss": 0.1855, "step": 1810 }, { "epoch": 0.11720573792925797, "grad_norm": 0.13253551721572876, "learning_rate": 0.00019218615833762665, "loss": 0.1913, "step": 1820 }, { "epoch": 0.11784972550029785, "grad_norm": 0.13126640021800995, "learning_rate": 0.00019214322514167956, "loss": 0.1829, "step": 1830 }, { "epoch": 0.11849371307133773, "grad_norm": 0.11623182892799377, "learning_rate": 0.00019210029194573243, "loss": 0.1897, "step": 1840 }, { "epoch": 0.11913770064237761, "grad_norm": 0.1050465926527977, "learning_rate": 0.00019205735874978534, "loss": 0.1929, "step": 1850 }, { "epoch": 0.11978168821341748, "grad_norm": 0.16369561851024628, "learning_rate": 0.00019201442555383824, "loss": 0.1898, "step": 1860 }, { "epoch": 0.12042567578445736, "grad_norm": 0.14770829677581787, "learning_rate": 0.00019197149235789115, "loss": 0.1883, "step": 1870 }, { "epoch": 0.12106966335549724, "grad_norm": 0.1214405819773674, "learning_rate": 0.00019192855916194402, "loss": 0.1902, "step": 1880 }, { "epoch": 0.12171365092653712, "grad_norm": 0.12661924958229065, "learning_rate": 0.00019188562596599693, "loss": 0.1891, "step": 1890 }, { "epoch": 0.122357638497577, "grad_norm": 0.1316099911928177, "learning_rate": 0.0001918426927700498, "loss": 0.1866, "step": 1900 }, { "epoch": 0.12300162606861688, "grad_norm": 0.11886880546808243, "learning_rate": 0.0001917997595741027, "loss": 0.1977, "step": 1910 }, { "epoch": 0.12364561363965676, "grad_norm": 0.10805089771747589, "learning_rate": 0.0001917568263781556, "loss": 0.1905, "step": 1920 }, { "epoch": 0.12428960121069664, "grad_norm": 0.14096061885356903, "learning_rate": 0.0001917138931822085, "loss": 0.1915, "step": 1930 }, { "epoch": 0.12493358878173651, "grad_norm": 0.12838415801525116, "learning_rate": 0.0001916709599862614, "loss": 0.1881, "step": 1940 }, { "epoch": 0.1255775763527764, "grad_norm": 0.12060903012752533, "learning_rate": 0.00019162802679031427, "loss": 0.1848, "step": 1950 }, { "epoch": 0.12622156392381628, "grad_norm": 0.13352909684181213, "learning_rate": 0.00019158509359436718, "loss": 0.1861, "step": 1960 }, { "epoch": 0.12686555149485615, "grad_norm": 0.11928246915340424, "learning_rate": 0.00019154216039842008, "loss": 0.1865, "step": 1970 }, { "epoch": 0.12750953906589602, "grad_norm": 0.12565603852272034, "learning_rate": 0.00019149922720247296, "loss": 0.1843, "step": 1980 }, { "epoch": 0.1281535266369359, "grad_norm": 0.11372803896665573, "learning_rate": 0.00019145629400652586, "loss": 0.1855, "step": 1990 }, { "epoch": 0.12879751420797578, "grad_norm": 0.12066502124071121, "learning_rate": 0.00019141336081057874, "loss": 0.1904, "step": 2000 }, { "epoch": 0.12944150177901567, "grad_norm": 0.12696704268455505, "learning_rate": 0.00019137042761463164, "loss": 0.1945, "step": 2010 }, { "epoch": 0.13008548935005554, "grad_norm": 0.12330330908298492, "learning_rate": 0.00019132749441868455, "loss": 0.1817, "step": 2020 }, { "epoch": 0.13072947692109543, "grad_norm": 0.13744008541107178, "learning_rate": 0.00019128456122273743, "loss": 0.1939, "step": 2030 }, { "epoch": 0.1313734644921353, "grad_norm": 0.11562083661556244, "learning_rate": 0.00019124162802679033, "loss": 0.1842, "step": 2040 }, { "epoch": 0.13201745206317517, "grad_norm": 0.1592063009738922, "learning_rate": 0.00019119869483084323, "loss": 0.1928, "step": 2050 }, { "epoch": 0.13266143963421506, "grad_norm": 0.11548040807247162, "learning_rate": 0.0001911557616348961, "loss": 0.1943, "step": 2060 }, { "epoch": 0.13330542720525493, "grad_norm": 0.13047988712787628, "learning_rate": 0.000191112828438949, "loss": 0.1874, "step": 2070 }, { "epoch": 0.13394941477629482, "grad_norm": 0.12109759449958801, "learning_rate": 0.0001910698952430019, "loss": 0.1908, "step": 2080 }, { "epoch": 0.1345934023473347, "grad_norm": 0.11847807466983795, "learning_rate": 0.0001910269620470548, "loss": 0.1852, "step": 2090 }, { "epoch": 0.13523738991837458, "grad_norm": 0.11731862276792526, "learning_rate": 0.0001909840288511077, "loss": 0.1887, "step": 2100 }, { "epoch": 0.13588137748941445, "grad_norm": 0.12323199212551117, "learning_rate": 0.00019094109565516058, "loss": 0.1827, "step": 2110 }, { "epoch": 0.13652536506045435, "grad_norm": 0.1216215044260025, "learning_rate": 0.00019089816245921348, "loss": 0.1828, "step": 2120 }, { "epoch": 0.1371693526314942, "grad_norm": 0.12557610869407654, "learning_rate": 0.00019085522926326639, "loss": 0.1831, "step": 2130 }, { "epoch": 0.13781334020253408, "grad_norm": 0.14151227474212646, "learning_rate": 0.00019081229606731926, "loss": 0.1897, "step": 2140 }, { "epoch": 0.13845732777357397, "grad_norm": 0.13856565952301025, "learning_rate": 0.00019076936287137214, "loss": 0.1886, "step": 2150 }, { "epoch": 0.13910131534461384, "grad_norm": 0.1119798943400383, "learning_rate": 0.00019072642967542504, "loss": 0.1872, "step": 2160 }, { "epoch": 0.13974530291565374, "grad_norm": 0.1279832422733307, "learning_rate": 0.00019068349647947795, "loss": 0.1874, "step": 2170 }, { "epoch": 0.1403892904866936, "grad_norm": 0.1433877944946289, "learning_rate": 0.00019064056328353083, "loss": 0.1893, "step": 2180 }, { "epoch": 0.1410332780577335, "grad_norm": 0.14069855213165283, "learning_rate": 0.00019059763008758373, "loss": 0.182, "step": 2190 }, { "epoch": 0.14167726562877336, "grad_norm": 0.13060183823108673, "learning_rate": 0.00019055469689163663, "loss": 0.1866, "step": 2200 }, { "epoch": 0.14232125319981323, "grad_norm": 0.11641053110361099, "learning_rate": 0.00019051176369568954, "loss": 0.1851, "step": 2210 }, { "epoch": 0.14296524077085312, "grad_norm": 0.12291021645069122, "learning_rate": 0.0001904688304997424, "loss": 0.19, "step": 2220 }, { "epoch": 0.143609228341893, "grad_norm": 0.11829876154661179, "learning_rate": 0.0001904258973037953, "loss": 0.1873, "step": 2230 }, { "epoch": 0.1442532159129329, "grad_norm": 0.12123451381921768, "learning_rate": 0.0001903829641078482, "loss": 0.1905, "step": 2240 }, { "epoch": 0.14489720348397275, "grad_norm": 0.15051627159118652, "learning_rate": 0.0001903400309119011, "loss": 0.1813, "step": 2250 }, { "epoch": 0.14554119105501265, "grad_norm": 0.11060353368520737, "learning_rate": 0.00019029709771595398, "loss": 0.1862, "step": 2260 }, { "epoch": 0.14618517862605251, "grad_norm": 0.1203673928976059, "learning_rate": 0.00019025416452000688, "loss": 0.1801, "step": 2270 }, { "epoch": 0.1468291661970924, "grad_norm": 0.13593044877052307, "learning_rate": 0.0001902112313240598, "loss": 0.1869, "step": 2280 }, { "epoch": 0.14747315376813228, "grad_norm": 0.11886560916900635, "learning_rate": 0.00019016829812811266, "loss": 0.1875, "step": 2290 }, { "epoch": 0.14811714133917214, "grad_norm": 0.13132813572883606, "learning_rate": 0.00019012536493216554, "loss": 0.1914, "step": 2300 }, { "epoch": 0.14876112891021204, "grad_norm": 0.14411859214305878, "learning_rate": 0.00019008243173621845, "loss": 0.1832, "step": 2310 }, { "epoch": 0.1494051164812519, "grad_norm": 0.13294664025306702, "learning_rate": 0.00019003949854027135, "loss": 0.1776, "step": 2320 }, { "epoch": 0.1500491040522918, "grad_norm": 0.11205649375915527, "learning_rate": 0.00018999656534432423, "loss": 0.1836, "step": 2330 }, { "epoch": 0.15069309162333167, "grad_norm": 0.12043093144893646, "learning_rate": 0.00018995363214837713, "loss": 0.1874, "step": 2340 }, { "epoch": 0.15133707919437156, "grad_norm": 0.12885698676109314, "learning_rate": 0.00018991069895243003, "loss": 0.1909, "step": 2350 }, { "epoch": 0.15198106676541143, "grad_norm": 0.12911555171012878, "learning_rate": 0.00018986776575648294, "loss": 0.1837, "step": 2360 }, { "epoch": 0.1526250543364513, "grad_norm": 0.11271936446428299, "learning_rate": 0.00018982483256053582, "loss": 0.1823, "step": 2370 }, { "epoch": 0.1532690419074912, "grad_norm": 0.1462479531764984, "learning_rate": 0.0001897818993645887, "loss": 0.1848, "step": 2380 }, { "epoch": 0.15391302947853105, "grad_norm": 0.11530794948339462, "learning_rate": 0.0001897389661686416, "loss": 0.1899, "step": 2390 }, { "epoch": 0.15455701704957095, "grad_norm": 0.11966421455144882, "learning_rate": 0.0001896960329726945, "loss": 0.1837, "step": 2400 }, { "epoch": 0.15520100462061082, "grad_norm": 0.11982399225234985, "learning_rate": 0.00018965309977674738, "loss": 0.1811, "step": 2410 }, { "epoch": 0.1558449921916507, "grad_norm": 0.14573408663272858, "learning_rate": 0.00018961016658080028, "loss": 0.1829, "step": 2420 }, { "epoch": 0.15648897976269058, "grad_norm": 0.15061990916728973, "learning_rate": 0.0001895672333848532, "loss": 0.182, "step": 2430 }, { "epoch": 0.15713296733373047, "grad_norm": 0.11393927037715912, "learning_rate": 0.0001895243001889061, "loss": 0.1841, "step": 2440 }, { "epoch": 0.15777695490477034, "grad_norm": 0.12879756093025208, "learning_rate": 0.00018948136699295897, "loss": 0.1975, "step": 2450 }, { "epoch": 0.1584209424758102, "grad_norm": 0.11175518482923508, "learning_rate": 0.00018943843379701185, "loss": 0.1835, "step": 2460 }, { "epoch": 0.1590649300468501, "grad_norm": 0.12546314299106598, "learning_rate": 0.00018939550060106475, "loss": 0.1928, "step": 2470 }, { "epoch": 0.15970891761788997, "grad_norm": 0.12330900877714157, "learning_rate": 0.00018935256740511765, "loss": 0.1815, "step": 2480 }, { "epoch": 0.16035290518892986, "grad_norm": 0.1416454017162323, "learning_rate": 0.00018930963420917053, "loss": 0.1892, "step": 2490 }, { "epoch": 0.16099689275996973, "grad_norm": 0.1188899576663971, "learning_rate": 0.00018926670101322344, "loss": 0.1862, "step": 2500 }, { "epoch": 0.16164088033100962, "grad_norm": 0.12594597041606903, "learning_rate": 0.00018922376781727634, "loss": 0.1825, "step": 2510 }, { "epoch": 0.1622848679020495, "grad_norm": 0.11091957241296768, "learning_rate": 0.00018918083462132922, "loss": 0.1879, "step": 2520 }, { "epoch": 0.16292885547308936, "grad_norm": 0.12016882747411728, "learning_rate": 0.00018913790142538212, "loss": 0.196, "step": 2530 }, { "epoch": 0.16357284304412925, "grad_norm": 0.13156910240650177, "learning_rate": 0.000189094968229435, "loss": 0.1879, "step": 2540 }, { "epoch": 0.16421683061516912, "grad_norm": 0.1292656809091568, "learning_rate": 0.0001890520350334879, "loss": 0.1788, "step": 2550 }, { "epoch": 0.164860818186209, "grad_norm": 0.1124243289232254, "learning_rate": 0.00018900910183754078, "loss": 0.1837, "step": 2560 }, { "epoch": 0.16550480575724888, "grad_norm": 0.1369020789861679, "learning_rate": 0.00018896616864159368, "loss": 0.192, "step": 2570 }, { "epoch": 0.16614879332828877, "grad_norm": 0.1176152154803276, "learning_rate": 0.0001889232354456466, "loss": 0.1845, "step": 2580 }, { "epoch": 0.16679278089932864, "grad_norm": 0.11664386093616486, "learning_rate": 0.0001888803022496995, "loss": 0.1895, "step": 2590 }, { "epoch": 0.16743676847036854, "grad_norm": 0.1124495416879654, "learning_rate": 0.00018883736905375237, "loss": 0.1816, "step": 2600 }, { "epoch": 0.1680807560414084, "grad_norm": 0.13833622634410858, "learning_rate": 0.00018879443585780527, "loss": 0.1827, "step": 2610 }, { "epoch": 0.16872474361244827, "grad_norm": 0.10604006052017212, "learning_rate": 0.00018875150266185815, "loss": 0.1799, "step": 2620 }, { "epoch": 0.16936873118348816, "grad_norm": 0.12259844690561295, "learning_rate": 0.00018870856946591106, "loss": 0.1834, "step": 2630 }, { "epoch": 0.17001271875452803, "grad_norm": 0.1223016306757927, "learning_rate": 0.00018866563626996393, "loss": 0.1886, "step": 2640 }, { "epoch": 0.17065670632556793, "grad_norm": 0.12070266157388687, "learning_rate": 0.00018862270307401684, "loss": 0.1866, "step": 2650 }, { "epoch": 0.1713006938966078, "grad_norm": 0.11773532629013062, "learning_rate": 0.00018857976987806974, "loss": 0.1898, "step": 2660 }, { "epoch": 0.1719446814676477, "grad_norm": 0.11706838011741638, "learning_rate": 0.00018853683668212262, "loss": 0.183, "step": 2670 }, { "epoch": 0.17258866903868755, "grad_norm": 0.11816330254077911, "learning_rate": 0.00018849390348617552, "loss": 0.1891, "step": 2680 }, { "epoch": 0.17323265660972742, "grad_norm": 0.12328661978244781, "learning_rate": 0.00018845097029022843, "loss": 0.1857, "step": 2690 }, { "epoch": 0.17387664418076731, "grad_norm": 0.12047053873538971, "learning_rate": 0.0001884080370942813, "loss": 0.1813, "step": 2700 }, { "epoch": 0.17452063175180718, "grad_norm": 0.12222851812839508, "learning_rate": 0.0001883651038983342, "loss": 0.1955, "step": 2710 }, { "epoch": 0.17516461932284708, "grad_norm": 0.13964077830314636, "learning_rate": 0.00018832217070238708, "loss": 0.1877, "step": 2720 }, { "epoch": 0.17580860689388694, "grad_norm": 0.11761181056499481, "learning_rate": 0.00018827923750644, "loss": 0.1825, "step": 2730 }, { "epoch": 0.17645259446492684, "grad_norm": 0.11494957655668259, "learning_rate": 0.0001882363043104929, "loss": 0.1795, "step": 2740 }, { "epoch": 0.1770965820359667, "grad_norm": 0.12171987444162369, "learning_rate": 0.00018819337111454577, "loss": 0.1904, "step": 2750 }, { "epoch": 0.17774056960700657, "grad_norm": 0.1145947203040123, "learning_rate": 0.00018815043791859867, "loss": 0.1875, "step": 2760 }, { "epoch": 0.17838455717804647, "grad_norm": 0.13245347142219543, "learning_rate": 0.00018810750472265158, "loss": 0.1907, "step": 2770 }, { "epoch": 0.17902854474908633, "grad_norm": 0.12940892577171326, "learning_rate": 0.00018806457152670446, "loss": 0.1839, "step": 2780 }, { "epoch": 0.17967253232012623, "grad_norm": 0.12561967968940735, "learning_rate": 0.00018802163833075733, "loss": 0.1943, "step": 2790 }, { "epoch": 0.1803165198911661, "grad_norm": 0.1084495484828949, "learning_rate": 0.00018797870513481024, "loss": 0.1868, "step": 2800 }, { "epoch": 0.180960507462206, "grad_norm": 0.12326683849096298, "learning_rate": 0.00018793577193886314, "loss": 0.1867, "step": 2810 }, { "epoch": 0.18160449503324586, "grad_norm": 0.11379634588956833, "learning_rate": 0.00018789283874291605, "loss": 0.193, "step": 2820 }, { "epoch": 0.18224848260428575, "grad_norm": 0.1292853206396103, "learning_rate": 0.00018784990554696892, "loss": 0.1888, "step": 2830 }, { "epoch": 0.18289247017532562, "grad_norm": 0.12383440136909485, "learning_rate": 0.00018780697235102183, "loss": 0.1838, "step": 2840 }, { "epoch": 0.18353645774636548, "grad_norm": 0.12261859327554703, "learning_rate": 0.00018776403915507473, "loss": 0.1862, "step": 2850 }, { "epoch": 0.18418044531740538, "grad_norm": 0.13619232177734375, "learning_rate": 0.0001877211059591276, "loss": 0.183, "step": 2860 }, { "epoch": 0.18482443288844524, "grad_norm": 0.157752126455307, "learning_rate": 0.00018767817276318049, "loss": 0.1887, "step": 2870 }, { "epoch": 0.18546842045948514, "grad_norm": 0.13743528723716736, "learning_rate": 0.0001876352395672334, "loss": 0.1754, "step": 2880 }, { "epoch": 0.186112408030525, "grad_norm": 0.132490336894989, "learning_rate": 0.0001875923063712863, "loss": 0.1842, "step": 2890 }, { "epoch": 0.1867563956015649, "grad_norm": 0.09999225288629532, "learning_rate": 0.00018754937317533917, "loss": 0.1877, "step": 2900 }, { "epoch": 0.18740038317260477, "grad_norm": 0.11113845556974411, "learning_rate": 0.00018750643997939208, "loss": 0.1876, "step": 2910 }, { "epoch": 0.18804437074364463, "grad_norm": 0.12223660945892334, "learning_rate": 0.00018746350678344498, "loss": 0.1901, "step": 2920 }, { "epoch": 0.18868835831468453, "grad_norm": 0.15062910318374634, "learning_rate": 0.00018742057358749788, "loss": 0.1904, "step": 2930 }, { "epoch": 0.1893323458857244, "grad_norm": 0.12959827482700348, "learning_rate": 0.00018737764039155073, "loss": 0.19, "step": 2940 }, { "epoch": 0.1899763334567643, "grad_norm": 0.11317887157201767, "learning_rate": 0.00018733470719560364, "loss": 0.1842, "step": 2950 }, { "epoch": 0.19062032102780416, "grad_norm": 0.12884590029716492, "learning_rate": 0.00018729177399965654, "loss": 0.1863, "step": 2960 }, { "epoch": 0.19126430859884405, "grad_norm": 0.12452477961778641, "learning_rate": 0.00018724884080370945, "loss": 0.1947, "step": 2970 }, { "epoch": 0.19190829616988392, "grad_norm": 0.10875706374645233, "learning_rate": 0.00018720590760776232, "loss": 0.1926, "step": 2980 }, { "epoch": 0.1925522837409238, "grad_norm": 0.11797066777944565, "learning_rate": 0.00018716297441181523, "loss": 0.1875, "step": 2990 }, { "epoch": 0.19319627131196368, "grad_norm": 0.1267586648464203, "learning_rate": 0.00018712004121586813, "loss": 0.1858, "step": 3000 }, { "epoch": 0.19384025888300355, "grad_norm": 0.12350243330001831, "learning_rate": 0.000187077108019921, "loss": 0.1907, "step": 3010 }, { "epoch": 0.19448424645404344, "grad_norm": 0.12335234880447388, "learning_rate": 0.00018703417482397389, "loss": 0.1932, "step": 3020 }, { "epoch": 0.1951282340250833, "grad_norm": 0.13383042812347412, "learning_rate": 0.0001869912416280268, "loss": 0.1957, "step": 3030 }, { "epoch": 0.1957722215961232, "grad_norm": 0.1341938078403473, "learning_rate": 0.0001869483084320797, "loss": 0.1882, "step": 3040 }, { "epoch": 0.19641620916716307, "grad_norm": 0.13048155605793, "learning_rate": 0.00018690537523613257, "loss": 0.1909, "step": 3050 }, { "epoch": 0.19706019673820296, "grad_norm": 0.13441182672977448, "learning_rate": 0.00018686244204018548, "loss": 0.1833, "step": 3060 }, { "epoch": 0.19770418430924283, "grad_norm": 0.1247468963265419, "learning_rate": 0.00018681950884423838, "loss": 0.181, "step": 3070 }, { "epoch": 0.1983481718802827, "grad_norm": 0.13333657383918762, "learning_rate": 0.00018677657564829128, "loss": 0.194, "step": 3080 }, { "epoch": 0.1989921594513226, "grad_norm": 0.15299059450626373, "learning_rate": 0.00018673364245234416, "loss": 0.1821, "step": 3090 }, { "epoch": 0.19963614702236246, "grad_norm": 0.13455583155155182, "learning_rate": 0.00018669070925639704, "loss": 0.1774, "step": 3100 }, { "epoch": 0.20028013459340235, "grad_norm": 0.12858708202838898, "learning_rate": 0.00018664777606044994, "loss": 0.1932, "step": 3110 }, { "epoch": 0.20092412216444222, "grad_norm": 0.13834957778453827, "learning_rate": 0.00018660484286450285, "loss": 0.1939, "step": 3120 }, { "epoch": 0.20156810973548211, "grad_norm": 0.1380980759859085, "learning_rate": 0.00018656190966855572, "loss": 0.1815, "step": 3130 }, { "epoch": 0.20221209730652198, "grad_norm": 0.11065258830785751, "learning_rate": 0.00018651897647260863, "loss": 0.1861, "step": 3140 }, { "epoch": 0.20285608487756188, "grad_norm": 0.1309739500284195, "learning_rate": 0.00018647604327666153, "loss": 0.1873, "step": 3150 }, { "epoch": 0.20350007244860174, "grad_norm": 0.1193334311246872, "learning_rate": 0.00018643311008071444, "loss": 0.1885, "step": 3160 }, { "epoch": 0.2041440600196416, "grad_norm": 0.14658281207084656, "learning_rate": 0.00018639017688476731, "loss": 0.1901, "step": 3170 }, { "epoch": 0.2047880475906815, "grad_norm": 0.11737065017223358, "learning_rate": 0.0001863472436888202, "loss": 0.1883, "step": 3180 }, { "epoch": 0.20543203516172137, "grad_norm": 0.1229642704129219, "learning_rate": 0.0001863043104928731, "loss": 0.1867, "step": 3190 }, { "epoch": 0.20607602273276127, "grad_norm": 0.1241893544793129, "learning_rate": 0.000186261377296926, "loss": 0.1887, "step": 3200 }, { "epoch": 0.20672001030380113, "grad_norm": 0.1129128634929657, "learning_rate": 0.00018621844410097888, "loss": 0.1848, "step": 3210 }, { "epoch": 0.20736399787484103, "grad_norm": 0.12819507718086243, "learning_rate": 0.00018617551090503178, "loss": 0.1836, "step": 3220 }, { "epoch": 0.2080079854458809, "grad_norm": 0.13154222071170807, "learning_rate": 0.00018613257770908468, "loss": 0.1923, "step": 3230 }, { "epoch": 0.20865197301692076, "grad_norm": 0.11395961046218872, "learning_rate": 0.00018608964451313756, "loss": 0.1888, "step": 3240 }, { "epoch": 0.20929596058796066, "grad_norm": 0.12984494864940643, "learning_rate": 0.00018604671131719047, "loss": 0.1847, "step": 3250 }, { "epoch": 0.20993994815900052, "grad_norm": 0.13540759682655334, "learning_rate": 0.00018600377812124334, "loss": 0.1825, "step": 3260 }, { "epoch": 0.21058393573004042, "grad_norm": 0.12186232954263687, "learning_rate": 0.00018596084492529625, "loss": 0.1854, "step": 3270 }, { "epoch": 0.21122792330108028, "grad_norm": 0.13595296442508698, "learning_rate": 0.00018591791172934912, "loss": 0.192, "step": 3280 }, { "epoch": 0.21187191087212018, "grad_norm": 0.12268677353858948, "learning_rate": 0.00018587497853340203, "loss": 0.1925, "step": 3290 }, { "epoch": 0.21251589844316005, "grad_norm": 0.13079483807086945, "learning_rate": 0.00018583204533745493, "loss": 0.185, "step": 3300 }, { "epoch": 0.21315988601419994, "grad_norm": 0.13907180726528168, "learning_rate": 0.00018578911214150784, "loss": 0.1911, "step": 3310 }, { "epoch": 0.2138038735852398, "grad_norm": 0.11527260392904282, "learning_rate": 0.00018574617894556071, "loss": 0.1859, "step": 3320 }, { "epoch": 0.21444786115627967, "grad_norm": 0.13353483378887177, "learning_rate": 0.00018570324574961362, "loss": 0.182, "step": 3330 }, { "epoch": 0.21509184872731957, "grad_norm": 0.11006516963243484, "learning_rate": 0.0001856603125536665, "loss": 0.1934, "step": 3340 }, { "epoch": 0.21573583629835943, "grad_norm": 0.11658990383148193, "learning_rate": 0.0001856173793577194, "loss": 0.1859, "step": 3350 }, { "epoch": 0.21637982386939933, "grad_norm": 0.1118716150522232, "learning_rate": 0.00018557444616177228, "loss": 0.1872, "step": 3360 }, { "epoch": 0.2170238114404392, "grad_norm": 0.12155017256736755, "learning_rate": 0.00018553151296582518, "loss": 0.1874, "step": 3370 }, { "epoch": 0.2176677990114791, "grad_norm": 0.13152168691158295, "learning_rate": 0.00018548857976987809, "loss": 0.1891, "step": 3380 }, { "epoch": 0.21831178658251896, "grad_norm": 0.13325989246368408, "learning_rate": 0.00018544564657393096, "loss": 0.1889, "step": 3390 }, { "epoch": 0.21895577415355882, "grad_norm": 0.11738722026348114, "learning_rate": 0.00018540271337798387, "loss": 0.192, "step": 3400 }, { "epoch": 0.21959976172459872, "grad_norm": 0.10563652962446213, "learning_rate": 0.00018535978018203677, "loss": 0.1866, "step": 3410 }, { "epoch": 0.22024374929563859, "grad_norm": 0.1138257160782814, "learning_rate": 0.00018531684698608965, "loss": 0.1892, "step": 3420 }, { "epoch": 0.22088773686667848, "grad_norm": 0.12300156056880951, "learning_rate": 0.00018527391379014255, "loss": 0.1835, "step": 3430 }, { "epoch": 0.22153172443771835, "grad_norm": 0.13220950961112976, "learning_rate": 0.00018523098059419543, "loss": 0.1949, "step": 3440 }, { "epoch": 0.22217571200875824, "grad_norm": 0.15940818190574646, "learning_rate": 0.00018518804739824833, "loss": 0.1971, "step": 3450 }, { "epoch": 0.2228196995797981, "grad_norm": 0.13176622986793518, "learning_rate": 0.00018514511420230124, "loss": 0.1854, "step": 3460 }, { "epoch": 0.22346368715083798, "grad_norm": 0.14342670142650604, "learning_rate": 0.00018510218100635412, "loss": 0.1892, "step": 3470 }, { "epoch": 0.22410767472187787, "grad_norm": 0.12839195132255554, "learning_rate": 0.00018505924781040702, "loss": 0.1839, "step": 3480 }, { "epoch": 0.22475166229291774, "grad_norm": 0.11127232760190964, "learning_rate": 0.00018501631461445992, "loss": 0.188, "step": 3490 }, { "epoch": 0.22539564986395763, "grad_norm": 0.11903993785381317, "learning_rate": 0.0001849733814185128, "loss": 0.1884, "step": 3500 }, { "epoch": 0.2260396374349975, "grad_norm": 0.11980284005403519, "learning_rate": 0.00018493044822256568, "loss": 0.197, "step": 3510 }, { "epoch": 0.2266836250060374, "grad_norm": 0.11008738726377487, "learning_rate": 0.00018488751502661858, "loss": 0.1811, "step": 3520 }, { "epoch": 0.22732761257707726, "grad_norm": 0.1271456778049469, "learning_rate": 0.00018484458183067149, "loss": 0.1828, "step": 3530 }, { "epoch": 0.22797160014811715, "grad_norm": 0.11039859801530838, "learning_rate": 0.0001848016486347244, "loss": 0.1834, "step": 3540 }, { "epoch": 0.22861558771915702, "grad_norm": 0.12219779193401337, "learning_rate": 0.00018475871543877727, "loss": 0.1869, "step": 3550 }, { "epoch": 0.2292595752901969, "grad_norm": 0.1432413011789322, "learning_rate": 0.00018471578224283017, "loss": 0.1786, "step": 3560 }, { "epoch": 0.22990356286123678, "grad_norm": 0.13823896646499634, "learning_rate": 0.00018467284904688308, "loss": 0.1883, "step": 3570 }, { "epoch": 0.23054755043227665, "grad_norm": 0.1058020070195198, "learning_rate": 0.00018462991585093595, "loss": 0.184, "step": 3580 }, { "epoch": 0.23119153800331654, "grad_norm": 0.12764045596122742, "learning_rate": 0.00018458698265498883, "loss": 0.1861, "step": 3590 }, { "epoch": 0.2318355255743564, "grad_norm": 0.12039545178413391, "learning_rate": 0.00018454404945904173, "loss": 0.1824, "step": 3600 }, { "epoch": 0.2324795131453963, "grad_norm": 0.12375465035438538, "learning_rate": 0.00018450111626309464, "loss": 0.1904, "step": 3610 }, { "epoch": 0.23312350071643617, "grad_norm": 0.1243257075548172, "learning_rate": 0.00018445818306714752, "loss": 0.1864, "step": 3620 }, { "epoch": 0.23376748828747604, "grad_norm": 0.1371324509382248, "learning_rate": 0.00018441524987120042, "loss": 0.1879, "step": 3630 }, { "epoch": 0.23441147585851593, "grad_norm": 0.1262369453907013, "learning_rate": 0.00018437231667525332, "loss": 0.1932, "step": 3640 }, { "epoch": 0.2350554634295558, "grad_norm": 0.14259742200374603, "learning_rate": 0.00018432938347930623, "loss": 0.1891, "step": 3650 }, { "epoch": 0.2356994510005957, "grad_norm": 0.11607065796852112, "learning_rate": 0.00018428645028335908, "loss": 0.1864, "step": 3660 }, { "epoch": 0.23634343857163556, "grad_norm": 0.14228256046772003, "learning_rate": 0.00018424351708741198, "loss": 0.1845, "step": 3670 }, { "epoch": 0.23698742614267546, "grad_norm": 0.11941976100206375, "learning_rate": 0.0001842005838914649, "loss": 0.1898, "step": 3680 }, { "epoch": 0.23763141371371532, "grad_norm": 0.12153748422861099, "learning_rate": 0.0001841576506955178, "loss": 0.1859, "step": 3690 }, { "epoch": 0.23827540128475522, "grad_norm": 0.14496752619743347, "learning_rate": 0.00018411471749957067, "loss": 0.1839, "step": 3700 }, { "epoch": 0.23891938885579508, "grad_norm": 0.13618862628936768, "learning_rate": 0.00018407178430362357, "loss": 0.1845, "step": 3710 }, { "epoch": 0.23956337642683495, "grad_norm": 0.11352390050888062, "learning_rate": 0.00018402885110767648, "loss": 0.1853, "step": 3720 }, { "epoch": 0.24020736399787485, "grad_norm": 0.1252513974905014, "learning_rate": 0.00018398591791172935, "loss": 0.1886, "step": 3730 }, { "epoch": 0.2408513515689147, "grad_norm": 0.12618067860603333, "learning_rate": 0.00018394298471578223, "loss": 0.1829, "step": 3740 }, { "epoch": 0.2414953391399546, "grad_norm": 0.12123875319957733, "learning_rate": 0.00018390005151983514, "loss": 0.1868, "step": 3750 }, { "epoch": 0.24213932671099447, "grad_norm": 0.11724622547626495, "learning_rate": 0.00018385711832388804, "loss": 0.1799, "step": 3760 }, { "epoch": 0.24278331428203437, "grad_norm": 0.12793110311031342, "learning_rate": 0.00018381418512794092, "loss": 0.1843, "step": 3770 }, { "epoch": 0.24342730185307423, "grad_norm": 0.12920093536376953, "learning_rate": 0.00018377125193199382, "loss": 0.184, "step": 3780 }, { "epoch": 0.2440712894241141, "grad_norm": 0.13967235386371613, "learning_rate": 0.00018372831873604672, "loss": 0.1778, "step": 3790 }, { "epoch": 0.244715276995154, "grad_norm": 0.10601527988910675, "learning_rate": 0.00018368538554009963, "loss": 0.1842, "step": 3800 }, { "epoch": 0.24535926456619386, "grad_norm": 0.13530969619750977, "learning_rate": 0.0001836424523441525, "loss": 0.1845, "step": 3810 }, { "epoch": 0.24600325213723376, "grad_norm": 0.1250510811805725, "learning_rate": 0.0001835995191482054, "loss": 0.1869, "step": 3820 }, { "epoch": 0.24664723970827362, "grad_norm": 0.12066790461540222, "learning_rate": 0.0001835565859522583, "loss": 0.1933, "step": 3830 }, { "epoch": 0.24729122727931352, "grad_norm": 0.11606336385011673, "learning_rate": 0.0001835136527563112, "loss": 0.1895, "step": 3840 }, { "epoch": 0.24793521485035339, "grad_norm": 0.1170002669095993, "learning_rate": 0.00018347071956036407, "loss": 0.1903, "step": 3850 }, { "epoch": 0.24857920242139328, "grad_norm": 0.13147616386413574, "learning_rate": 0.00018342778636441697, "loss": 0.1846, "step": 3860 }, { "epoch": 0.24922318999243315, "grad_norm": 0.11179982125759125, "learning_rate": 0.00018338485316846988, "loss": 0.1882, "step": 3870 }, { "epoch": 0.24986717756347301, "grad_norm": 0.13078872859477997, "learning_rate": 0.00018334191997252278, "loss": 0.185, "step": 3880 }, { "epoch": 0.2505111651345129, "grad_norm": 0.12739327549934387, "learning_rate": 0.00018329898677657566, "loss": 0.1871, "step": 3890 }, { "epoch": 0.2511551527055528, "grad_norm": 0.14129404723644257, "learning_rate": 0.00018325605358062856, "loss": 0.1815, "step": 3900 }, { "epoch": 0.25179914027659267, "grad_norm": 0.12478490173816681, "learning_rate": 0.00018321312038468144, "loss": 0.1868, "step": 3910 }, { "epoch": 0.25244312784763256, "grad_norm": 0.1112632006406784, "learning_rate": 0.00018317018718873434, "loss": 0.1936, "step": 3920 }, { "epoch": 0.2530871154186724, "grad_norm": 0.1175381988286972, "learning_rate": 0.00018312725399278722, "loss": 0.1807, "step": 3930 }, { "epoch": 0.2537311029897123, "grad_norm": 0.14305385947227478, "learning_rate": 0.00018308432079684013, "loss": 0.1887, "step": 3940 }, { "epoch": 0.2543750905607522, "grad_norm": 0.11945531517267227, "learning_rate": 0.00018304138760089303, "loss": 0.1844, "step": 3950 }, { "epoch": 0.25501907813179203, "grad_norm": 0.12495551258325577, "learning_rate": 0.0001829984544049459, "loss": 0.1831, "step": 3960 }, { "epoch": 0.2556630657028319, "grad_norm": 0.10794277489185333, "learning_rate": 0.0001829555212089988, "loss": 0.1776, "step": 3970 }, { "epoch": 0.2563070532738718, "grad_norm": 0.12116601318120956, "learning_rate": 0.00018291258801305172, "loss": 0.1847, "step": 3980 }, { "epoch": 0.2569510408449117, "grad_norm": 0.12584926187992096, "learning_rate": 0.0001828696548171046, "loss": 0.1862, "step": 3990 }, { "epoch": 0.25759502841595155, "grad_norm": 0.1230476051568985, "learning_rate": 0.00018282672162115747, "loss": 0.1851, "step": 4000 }, { "epoch": 0.25823901598699145, "grad_norm": 0.1409701406955719, "learning_rate": 0.00018278378842521037, "loss": 0.184, "step": 4010 }, { "epoch": 0.25888300355803134, "grad_norm": 0.11237083375453949, "learning_rate": 0.00018274085522926328, "loss": 0.1935, "step": 4020 }, { "epoch": 0.2595269911290712, "grad_norm": 0.12274663895368576, "learning_rate": 0.00018269792203331618, "loss": 0.1861, "step": 4030 }, { "epoch": 0.2601709787001111, "grad_norm": 0.1473870426416397, "learning_rate": 0.00018265498883736906, "loss": 0.1852, "step": 4040 }, { "epoch": 0.26081496627115097, "grad_norm": 0.1144891157746315, "learning_rate": 0.00018261205564142196, "loss": 0.1958, "step": 4050 }, { "epoch": 0.26145895384219087, "grad_norm": 0.10764404386281967, "learning_rate": 0.00018256912244547487, "loss": 0.1882, "step": 4060 }, { "epoch": 0.2621029414132307, "grad_norm": 0.12244770675897598, "learning_rate": 0.00018252618924952774, "loss": 0.1915, "step": 4070 }, { "epoch": 0.2627469289842706, "grad_norm": 0.1270090490579605, "learning_rate": 0.00018248325605358062, "loss": 0.1838, "step": 4080 }, { "epoch": 0.2633909165553105, "grad_norm": 0.13736659288406372, "learning_rate": 0.00018244032285763353, "loss": 0.1856, "step": 4090 }, { "epoch": 0.26403490412635033, "grad_norm": 0.10951870679855347, "learning_rate": 0.00018239738966168643, "loss": 0.1848, "step": 4100 }, { "epoch": 0.26467889169739023, "grad_norm": 0.12814417481422424, "learning_rate": 0.0001823544564657393, "loss": 0.1868, "step": 4110 }, { "epoch": 0.2653228792684301, "grad_norm": 0.11512584239244461, "learning_rate": 0.0001823115232697922, "loss": 0.1834, "step": 4120 }, { "epoch": 0.26596686683947, "grad_norm": 0.13139772415161133, "learning_rate": 0.00018226859007384512, "loss": 0.1853, "step": 4130 }, { "epoch": 0.26661085441050986, "grad_norm": 0.1249198317527771, "learning_rate": 0.00018222565687789802, "loss": 0.1816, "step": 4140 }, { "epoch": 0.26725484198154975, "grad_norm": 0.11116447299718857, "learning_rate": 0.0001821827236819509, "loss": 0.1876, "step": 4150 }, { "epoch": 0.26789882955258965, "grad_norm": 0.12510688602924347, "learning_rate": 0.00018213979048600377, "loss": 0.1803, "step": 4160 }, { "epoch": 0.26854281712362954, "grad_norm": 0.1158675029873848, "learning_rate": 0.00018209685729005668, "loss": 0.1827, "step": 4170 }, { "epoch": 0.2691868046946694, "grad_norm": 0.10818415880203247, "learning_rate": 0.00018205392409410958, "loss": 0.1854, "step": 4180 }, { "epoch": 0.2698307922657093, "grad_norm": 0.12388768047094345, "learning_rate": 0.00018201099089816246, "loss": 0.1968, "step": 4190 }, { "epoch": 0.27047477983674917, "grad_norm": 0.11823561042547226, "learning_rate": 0.00018196805770221536, "loss": 0.1733, "step": 4200 }, { "epoch": 0.271118767407789, "grad_norm": 0.11463792622089386, "learning_rate": 0.00018192512450626827, "loss": 0.1878, "step": 4210 }, { "epoch": 0.2717627549788289, "grad_norm": 0.14099855720996857, "learning_rate": 0.00018188219131032117, "loss": 0.1822, "step": 4220 }, { "epoch": 0.2724067425498688, "grad_norm": 0.1272314041852951, "learning_rate": 0.00018183925811437402, "loss": 0.1832, "step": 4230 }, { "epoch": 0.2730507301209087, "grad_norm": 0.11244126409292221, "learning_rate": 0.00018179632491842693, "loss": 0.1928, "step": 4240 }, { "epoch": 0.27369471769194853, "grad_norm": 0.12738147377967834, "learning_rate": 0.00018175339172247983, "loss": 0.179, "step": 4250 }, { "epoch": 0.2743387052629884, "grad_norm": 0.12346205860376358, "learning_rate": 0.00018171045852653274, "loss": 0.1843, "step": 4260 }, { "epoch": 0.2749826928340283, "grad_norm": 0.12365183234214783, "learning_rate": 0.0001816675253305856, "loss": 0.1886, "step": 4270 }, { "epoch": 0.27562668040506816, "grad_norm": 0.12525109946727753, "learning_rate": 0.00018162459213463852, "loss": 0.1877, "step": 4280 }, { "epoch": 0.27627066797610805, "grad_norm": 0.1301872432231903, "learning_rate": 0.00018158165893869142, "loss": 0.184, "step": 4290 }, { "epoch": 0.27691465554714795, "grad_norm": 0.12474730610847473, "learning_rate": 0.0001815387257427443, "loss": 0.1874, "step": 4300 }, { "epoch": 0.27755864311818784, "grad_norm": 0.10508374869823456, "learning_rate": 0.00018149579254679718, "loss": 0.1822, "step": 4310 }, { "epoch": 0.2782026306892277, "grad_norm": 0.132108673453331, "learning_rate": 0.00018145285935085008, "loss": 0.1831, "step": 4320 }, { "epoch": 0.2788466182602676, "grad_norm": 0.1316842883825302, "learning_rate": 0.00018140992615490298, "loss": 0.1794, "step": 4330 }, { "epoch": 0.27949060583130747, "grad_norm": 0.10906142741441727, "learning_rate": 0.00018136699295895586, "loss": 0.1845, "step": 4340 }, { "epoch": 0.2801345934023473, "grad_norm": 0.13461710512638092, "learning_rate": 0.00018132405976300876, "loss": 0.1886, "step": 4350 }, { "epoch": 0.2807785809733872, "grad_norm": 0.12971356511116028, "learning_rate": 0.00018128112656706167, "loss": 0.1876, "step": 4360 }, { "epoch": 0.2814225685444271, "grad_norm": 0.11699023842811584, "learning_rate": 0.00018123819337111457, "loss": 0.1813, "step": 4370 }, { "epoch": 0.282066556115467, "grad_norm": 0.10587301850318909, "learning_rate": 0.00018119526017516745, "loss": 0.1702, "step": 4380 }, { "epoch": 0.28271054368650683, "grad_norm": 0.1191600114107132, "learning_rate": 0.00018115232697922033, "loss": 0.1861, "step": 4390 }, { "epoch": 0.2833545312575467, "grad_norm": 0.12814773619174957, "learning_rate": 0.00018110939378327323, "loss": 0.1954, "step": 4400 }, { "epoch": 0.2839985188285866, "grad_norm": 0.12153156101703644, "learning_rate": 0.00018106646058732614, "loss": 0.1877, "step": 4410 }, { "epoch": 0.28464250639962646, "grad_norm": 0.12333043664693832, "learning_rate": 0.000181023527391379, "loss": 0.1803, "step": 4420 }, { "epoch": 0.28528649397066635, "grad_norm": 0.1354047805070877, "learning_rate": 0.00018098059419543192, "loss": 0.1911, "step": 4430 }, { "epoch": 0.28593048154170625, "grad_norm": 0.12450645864009857, "learning_rate": 0.00018093766099948482, "loss": 0.1864, "step": 4440 }, { "epoch": 0.28657446911274614, "grad_norm": 0.1095360517501831, "learning_rate": 0.0001808947278035377, "loss": 0.1806, "step": 4450 }, { "epoch": 0.287218456683786, "grad_norm": 0.1176232323050499, "learning_rate": 0.0001808517946075906, "loss": 0.184, "step": 4460 }, { "epoch": 0.2878624442548259, "grad_norm": 0.12675872445106506, "learning_rate": 0.00018080886141164348, "loss": 0.1819, "step": 4470 }, { "epoch": 0.2885064318258658, "grad_norm": 0.11450546234846115, "learning_rate": 0.00018076592821569638, "loss": 0.1875, "step": 4480 }, { "epoch": 0.28915041939690567, "grad_norm": 0.12686990201473236, "learning_rate": 0.00018072299501974926, "loss": 0.1873, "step": 4490 }, { "epoch": 0.2897944069679455, "grad_norm": 0.13355441391468048, "learning_rate": 0.00018068006182380217, "loss": 0.1923, "step": 4500 }, { "epoch": 0.2904383945389854, "grad_norm": 0.1133539155125618, "learning_rate": 0.00018063712862785507, "loss": 0.1867, "step": 4510 }, { "epoch": 0.2910823821100253, "grad_norm": 0.13349394500255585, "learning_rate": 0.00018059419543190797, "loss": 0.1917, "step": 4520 }, { "epoch": 0.29172636968106513, "grad_norm": 0.12096794694662094, "learning_rate": 0.00018055126223596085, "loss": 0.1917, "step": 4530 }, { "epoch": 0.29237035725210503, "grad_norm": 0.12479700893163681, "learning_rate": 0.00018050832904001376, "loss": 0.1826, "step": 4540 }, { "epoch": 0.2930143448231449, "grad_norm": 0.10523095726966858, "learning_rate": 0.00018046539584406663, "loss": 0.1777, "step": 4550 }, { "epoch": 0.2936583323941848, "grad_norm": 0.12972787022590637, "learning_rate": 0.00018042246264811954, "loss": 0.1788, "step": 4560 }, { "epoch": 0.29430231996522466, "grad_norm": 0.13490504026412964, "learning_rate": 0.00018037952945217241, "loss": 0.1806, "step": 4570 }, { "epoch": 0.29494630753626455, "grad_norm": 0.11496030539274216, "learning_rate": 0.00018033659625622532, "loss": 0.1886, "step": 4580 }, { "epoch": 0.29559029510730445, "grad_norm": 0.12010306864976883, "learning_rate": 0.00018029366306027822, "loss": 0.1758, "step": 4590 }, { "epoch": 0.2962342826783443, "grad_norm": 0.13102851808071136, "learning_rate": 0.00018025072986433113, "loss": 0.1841, "step": 4600 }, { "epoch": 0.2968782702493842, "grad_norm": 0.10861606150865555, "learning_rate": 0.000180207796668384, "loss": 0.184, "step": 4610 }, { "epoch": 0.2975222578204241, "grad_norm": 0.12917938828468323, "learning_rate": 0.0001801648634724369, "loss": 0.1847, "step": 4620 }, { "epoch": 0.29816624539146397, "grad_norm": 0.1349819004535675, "learning_rate": 0.00018012193027648979, "loss": 0.181, "step": 4630 }, { "epoch": 0.2988102329625038, "grad_norm": 0.14248323440551758, "learning_rate": 0.0001800789970805427, "loss": 0.1896, "step": 4640 }, { "epoch": 0.2994542205335437, "grad_norm": 0.1255524456501007, "learning_rate": 0.00018003606388459557, "loss": 0.1792, "step": 4650 }, { "epoch": 0.3000982081045836, "grad_norm": 0.11533594876527786, "learning_rate": 0.00017999313068864847, "loss": 0.1872, "step": 4660 }, { "epoch": 0.30074219567562344, "grad_norm": 0.12790139019489288, "learning_rate": 0.00017995019749270137, "loss": 0.1841, "step": 4670 }, { "epoch": 0.30138618324666333, "grad_norm": 0.10950981825590134, "learning_rate": 0.00017990726429675425, "loss": 0.1835, "step": 4680 }, { "epoch": 0.3020301708177032, "grad_norm": 0.1191803440451622, "learning_rate": 0.00017986433110080716, "loss": 0.1784, "step": 4690 }, { "epoch": 0.3026741583887431, "grad_norm": 0.12272847443819046, "learning_rate": 0.00017982139790486006, "loss": 0.1962, "step": 4700 }, { "epoch": 0.30331814595978296, "grad_norm": 0.10753761976957321, "learning_rate": 0.00017977846470891294, "loss": 0.1839, "step": 4710 }, { "epoch": 0.30396213353082285, "grad_norm": 0.11541954427957535, "learning_rate": 0.00017973553151296581, "loss": 0.1827, "step": 4720 }, { "epoch": 0.30460612110186275, "grad_norm": 0.11564352363348007, "learning_rate": 0.00017969259831701872, "loss": 0.1888, "step": 4730 }, { "epoch": 0.3052501086729026, "grad_norm": 0.13835163414478302, "learning_rate": 0.00017964966512107162, "loss": 0.1951, "step": 4740 }, { "epoch": 0.3058940962439425, "grad_norm": 0.12447880208492279, "learning_rate": 0.00017960673192512453, "loss": 0.1894, "step": 4750 }, { "epoch": 0.3065380838149824, "grad_norm": 0.1283678114414215, "learning_rate": 0.0001795637987291774, "loss": 0.1903, "step": 4760 }, { "epoch": 0.30718207138602227, "grad_norm": 0.13126268982887268, "learning_rate": 0.0001795208655332303, "loss": 0.1843, "step": 4770 }, { "epoch": 0.3078260589570621, "grad_norm": 0.11850028485059738, "learning_rate": 0.0001794779323372832, "loss": 0.1821, "step": 4780 }, { "epoch": 0.308470046528102, "grad_norm": 0.1192048192024231, "learning_rate": 0.0001794349991413361, "loss": 0.1837, "step": 4790 }, { "epoch": 0.3091140340991419, "grad_norm": 0.11942605674266815, "learning_rate": 0.00017939206594538897, "loss": 0.1856, "step": 4800 }, { "epoch": 0.30975802167018174, "grad_norm": 0.1306225210428238, "learning_rate": 0.00017934913274944187, "loss": 0.1836, "step": 4810 }, { "epoch": 0.31040200924122163, "grad_norm": 0.1107044592499733, "learning_rate": 0.00017930619955349478, "loss": 0.1838, "step": 4820 }, { "epoch": 0.3110459968122615, "grad_norm": 0.11753334105014801, "learning_rate": 0.00017926326635754765, "loss": 0.1915, "step": 4830 }, { "epoch": 0.3116899843833014, "grad_norm": 0.12636497616767883, "learning_rate": 0.00017922033316160056, "loss": 0.1855, "step": 4840 }, { "epoch": 0.31233397195434126, "grad_norm": 0.1287175863981247, "learning_rate": 0.00017917739996565346, "loss": 0.1843, "step": 4850 }, { "epoch": 0.31297795952538116, "grad_norm": 0.10306259244680405, "learning_rate": 0.00017913446676970637, "loss": 0.1909, "step": 4860 }, { "epoch": 0.31362194709642105, "grad_norm": 0.12376664578914642, "learning_rate": 0.00017909153357375924, "loss": 0.1812, "step": 4870 }, { "epoch": 0.31426593466746094, "grad_norm": 0.13065123558044434, "learning_rate": 0.00017904860037781212, "loss": 0.1907, "step": 4880 }, { "epoch": 0.3149099222385008, "grad_norm": 0.1251327097415924, "learning_rate": 0.00017900566718186502, "loss": 0.1815, "step": 4890 }, { "epoch": 0.3155539098095407, "grad_norm": 0.11537982523441315, "learning_rate": 0.00017896273398591793, "loss": 0.1954, "step": 4900 }, { "epoch": 0.3161978973805806, "grad_norm": 0.1327873170375824, "learning_rate": 0.0001789198007899708, "loss": 0.1847, "step": 4910 }, { "epoch": 0.3168418849516204, "grad_norm": 0.13201352953910828, "learning_rate": 0.0001788768675940237, "loss": 0.1866, "step": 4920 }, { "epoch": 0.3174858725226603, "grad_norm": 0.1246054396033287, "learning_rate": 0.0001788339343980766, "loss": 0.1895, "step": 4930 }, { "epoch": 0.3181298600937002, "grad_norm": 0.11849283427000046, "learning_rate": 0.00017879100120212952, "loss": 0.1895, "step": 4940 }, { "epoch": 0.3187738476647401, "grad_norm": 0.11482515931129456, "learning_rate": 0.00017874806800618237, "loss": 0.1882, "step": 4950 }, { "epoch": 0.31941783523577993, "grad_norm": 0.1252458393573761, "learning_rate": 0.00017870513481023527, "loss": 0.1859, "step": 4960 }, { "epoch": 0.32006182280681983, "grad_norm": 0.15002459287643433, "learning_rate": 0.00017866220161428818, "loss": 0.1795, "step": 4970 }, { "epoch": 0.3207058103778597, "grad_norm": 0.12242058664560318, "learning_rate": 0.00017861926841834108, "loss": 0.1944, "step": 4980 }, { "epoch": 0.32134979794889956, "grad_norm": 0.11282414942979813, "learning_rate": 0.00017857633522239396, "loss": 0.1871, "step": 4990 }, { "epoch": 0.32199378551993946, "grad_norm": 0.12209327518939972, "learning_rate": 0.00017853340202644686, "loss": 0.1835, "step": 5000 }, { "epoch": 0.32263777309097935, "grad_norm": 0.11756864935159683, "learning_rate": 0.00017849046883049977, "loss": 0.1838, "step": 5010 }, { "epoch": 0.32328176066201925, "grad_norm": 0.11666441708803177, "learning_rate": 0.00017844753563455264, "loss": 0.1756, "step": 5020 }, { "epoch": 0.3239257482330591, "grad_norm": 0.121433325111866, "learning_rate": 0.00017840460243860552, "loss": 0.1872, "step": 5030 }, { "epoch": 0.324569735804099, "grad_norm": 0.12918873131275177, "learning_rate": 0.00017836166924265842, "loss": 0.1832, "step": 5040 }, { "epoch": 0.3252137233751389, "grad_norm": 0.12109582126140594, "learning_rate": 0.00017831873604671133, "loss": 0.1918, "step": 5050 }, { "epoch": 0.3258577109461787, "grad_norm": 0.1399858295917511, "learning_rate": 0.0001782758028507642, "loss": 0.1878, "step": 5060 }, { "epoch": 0.3265016985172186, "grad_norm": 0.11139260977506638, "learning_rate": 0.0001782328696548171, "loss": 0.1848, "step": 5070 }, { "epoch": 0.3271456860882585, "grad_norm": 0.11643856018781662, "learning_rate": 0.00017818993645887001, "loss": 0.1814, "step": 5080 }, { "epoch": 0.3277896736592984, "grad_norm": 0.10988971590995789, "learning_rate": 0.00017814700326292292, "loss": 0.1828, "step": 5090 }, { "epoch": 0.32843366123033824, "grad_norm": 0.1102035641670227, "learning_rate": 0.0001781040700669758, "loss": 0.181, "step": 5100 }, { "epoch": 0.32907764880137813, "grad_norm": 0.1166299358010292, "learning_rate": 0.00017806113687102867, "loss": 0.1805, "step": 5110 }, { "epoch": 0.329721636372418, "grad_norm": 0.12125291675329208, "learning_rate": 0.00017801820367508158, "loss": 0.1743, "step": 5120 }, { "epoch": 0.33036562394345786, "grad_norm": 0.11072365939617157, "learning_rate": 0.00017797527047913448, "loss": 0.1817, "step": 5130 }, { "epoch": 0.33100961151449776, "grad_norm": 0.12799836695194244, "learning_rate": 0.00017793233728318736, "loss": 0.1894, "step": 5140 }, { "epoch": 0.33165359908553765, "grad_norm": 0.11433541774749756, "learning_rate": 0.00017788940408724026, "loss": 0.1835, "step": 5150 }, { "epoch": 0.33229758665657755, "grad_norm": 0.13004042208194733, "learning_rate": 0.00017784647089129317, "loss": 0.1846, "step": 5160 }, { "epoch": 0.3329415742276174, "grad_norm": 0.13145901262760162, "learning_rate": 0.00017780353769534604, "loss": 0.1798, "step": 5170 }, { "epoch": 0.3335855617986573, "grad_norm": 0.12145350128412247, "learning_rate": 0.00017776060449939895, "loss": 0.1831, "step": 5180 }, { "epoch": 0.3342295493696972, "grad_norm": 0.10960350930690765, "learning_rate": 0.00017771767130345183, "loss": 0.1878, "step": 5190 }, { "epoch": 0.33487353694073707, "grad_norm": 0.13064400851726532, "learning_rate": 0.00017767473810750473, "loss": 0.1853, "step": 5200 }, { "epoch": 0.3355175245117769, "grad_norm": 0.13122045993804932, "learning_rate": 0.0001776318049115576, "loss": 0.1847, "step": 5210 }, { "epoch": 0.3361615120828168, "grad_norm": 0.10970936715602875, "learning_rate": 0.0001775888717156105, "loss": 0.1834, "step": 5220 }, { "epoch": 0.3368054996538567, "grad_norm": 0.157416433095932, "learning_rate": 0.00017754593851966341, "loss": 0.1879, "step": 5230 }, { "epoch": 0.33744948722489654, "grad_norm": 0.11380619555711746, "learning_rate": 0.00017750300532371632, "loss": 0.1883, "step": 5240 }, { "epoch": 0.33809347479593643, "grad_norm": 0.12751874327659607, "learning_rate": 0.0001774600721277692, "loss": 0.1874, "step": 5250 }, { "epoch": 0.3387374623669763, "grad_norm": 0.13624587655067444, "learning_rate": 0.0001774171389318221, "loss": 0.1872, "step": 5260 }, { "epoch": 0.3393814499380162, "grad_norm": 0.10920032858848572, "learning_rate": 0.00017737420573587498, "loss": 0.1842, "step": 5270 }, { "epoch": 0.34002543750905606, "grad_norm": 0.11887515336275101, "learning_rate": 0.00017733127253992788, "loss": 0.1842, "step": 5280 }, { "epoch": 0.34066942508009596, "grad_norm": 0.11919160187244415, "learning_rate": 0.00017728833934398076, "loss": 0.1822, "step": 5290 }, { "epoch": 0.34131341265113585, "grad_norm": 0.12469633668661118, "learning_rate": 0.00017724540614803366, "loss": 0.1767, "step": 5300 }, { "epoch": 0.3419574002221757, "grad_norm": 0.1259453147649765, "learning_rate": 0.00017720247295208657, "loss": 0.1843, "step": 5310 }, { "epoch": 0.3426013877932156, "grad_norm": 0.12387842684984207, "learning_rate": 0.00017715953975613947, "loss": 0.1871, "step": 5320 }, { "epoch": 0.3432453753642555, "grad_norm": 0.12509110569953918, "learning_rate": 0.00017711660656019235, "loss": 0.1864, "step": 5330 }, { "epoch": 0.3438893629352954, "grad_norm": 0.11470100283622742, "learning_rate": 0.00017707367336424525, "loss": 0.176, "step": 5340 }, { "epoch": 0.3445333505063352, "grad_norm": 0.10767513513565063, "learning_rate": 0.00017703074016829813, "loss": 0.1847, "step": 5350 }, { "epoch": 0.3451773380773751, "grad_norm": 0.1224648654460907, "learning_rate": 0.00017698780697235103, "loss": 0.1871, "step": 5360 }, { "epoch": 0.345821325648415, "grad_norm": 0.11174054443836212, "learning_rate": 0.0001769448737764039, "loss": 0.1832, "step": 5370 }, { "epoch": 0.34646531321945484, "grad_norm": 0.11595217883586884, "learning_rate": 0.00017690194058045682, "loss": 0.1791, "step": 5380 }, { "epoch": 0.34710930079049473, "grad_norm": 0.11419301480054855, "learning_rate": 0.00017685900738450972, "loss": 0.1766, "step": 5390 }, { "epoch": 0.34775328836153463, "grad_norm": 0.11813237518072128, "learning_rate": 0.0001768160741885626, "loss": 0.1916, "step": 5400 }, { "epoch": 0.3483972759325745, "grad_norm": 0.11515983939170837, "learning_rate": 0.0001767731409926155, "loss": 0.1892, "step": 5410 }, { "epoch": 0.34904126350361436, "grad_norm": 0.12292594462633133, "learning_rate": 0.0001767302077966684, "loss": 0.186, "step": 5420 }, { "epoch": 0.34968525107465426, "grad_norm": 0.12649665772914886, "learning_rate": 0.00017668727460072128, "loss": 0.1827, "step": 5430 }, { "epoch": 0.35032923864569415, "grad_norm": 0.1321176141500473, "learning_rate": 0.00017664434140477416, "loss": 0.184, "step": 5440 }, { "epoch": 0.350973226216734, "grad_norm": 0.1167869046330452, "learning_rate": 0.00017660140820882706, "loss": 0.1886, "step": 5450 }, { "epoch": 0.3516172137877739, "grad_norm": 0.12226454168558121, "learning_rate": 0.00017655847501287997, "loss": 0.1885, "step": 5460 }, { "epoch": 0.3522612013588138, "grad_norm": 0.11228048801422119, "learning_rate": 0.00017651554181693287, "loss": 0.1823, "step": 5470 }, { "epoch": 0.3529051889298537, "grad_norm": 0.11963342875242233, "learning_rate": 0.00017647260862098575, "loss": 0.1855, "step": 5480 }, { "epoch": 0.3535491765008935, "grad_norm": 0.11624482274055481, "learning_rate": 0.00017642967542503865, "loss": 0.1866, "step": 5490 }, { "epoch": 0.3541931640719334, "grad_norm": 0.13675066828727722, "learning_rate": 0.00017638674222909156, "loss": 0.1741, "step": 5500 }, { "epoch": 0.3548371516429733, "grad_norm": 0.12464158236980438, "learning_rate": 0.00017634380903314443, "loss": 0.1922, "step": 5510 }, { "epoch": 0.35548113921401314, "grad_norm": 0.11980850994586945, "learning_rate": 0.0001763008758371973, "loss": 0.1782, "step": 5520 }, { "epoch": 0.35612512678505304, "grad_norm": 0.11533008515834808, "learning_rate": 0.00017625794264125022, "loss": 0.1893, "step": 5530 }, { "epoch": 0.35676911435609293, "grad_norm": 0.11527326703071594, "learning_rate": 0.00017621500944530312, "loss": 0.1752, "step": 5540 }, { "epoch": 0.3574131019271328, "grad_norm": 0.1318752020597458, "learning_rate": 0.000176172076249356, "loss": 0.1848, "step": 5550 }, { "epoch": 0.35805708949817266, "grad_norm": 0.12980863451957703, "learning_rate": 0.0001761291430534089, "loss": 0.1913, "step": 5560 }, { "epoch": 0.35870107706921256, "grad_norm": 0.13323058187961578, "learning_rate": 0.0001760862098574618, "loss": 0.1881, "step": 5570 }, { "epoch": 0.35934506464025245, "grad_norm": 0.13579212129116058, "learning_rate": 0.0001760432766615147, "loss": 0.1889, "step": 5580 }, { "epoch": 0.35998905221129235, "grad_norm": 0.13235555589199066, "learning_rate": 0.0001760003434655676, "loss": 0.1882, "step": 5590 }, { "epoch": 0.3606330397823322, "grad_norm": 0.10737158358097076, "learning_rate": 0.00017595741026962046, "loss": 0.1868, "step": 5600 }, { "epoch": 0.3612770273533721, "grad_norm": 0.11136142164468765, "learning_rate": 0.00017591447707367337, "loss": 0.1834, "step": 5610 }, { "epoch": 0.361921014924412, "grad_norm": 0.13880717754364014, "learning_rate": 0.00017587154387772627, "loss": 0.1813, "step": 5620 }, { "epoch": 0.3625650024954518, "grad_norm": 0.11593722552061081, "learning_rate": 0.00017582861068177915, "loss": 0.1849, "step": 5630 }, { "epoch": 0.3632089900664917, "grad_norm": 0.14474079012870789, "learning_rate": 0.00017578567748583205, "loss": 0.184, "step": 5640 }, { "epoch": 0.3638529776375316, "grad_norm": 0.1221480518579483, "learning_rate": 0.00017574274428988496, "loss": 0.1798, "step": 5650 }, { "epoch": 0.3644969652085715, "grad_norm": 0.13611525297164917, "learning_rate": 0.00017569981109393786, "loss": 0.1834, "step": 5660 }, { "epoch": 0.36514095277961134, "grad_norm": 0.10886432230472565, "learning_rate": 0.00017565687789799074, "loss": 0.1836, "step": 5670 }, { "epoch": 0.36578494035065123, "grad_norm": 0.12883725762367249, "learning_rate": 0.00017561394470204362, "loss": 0.1799, "step": 5680 }, { "epoch": 0.3664289279216911, "grad_norm": 0.12113569676876068, "learning_rate": 0.00017557101150609652, "loss": 0.1921, "step": 5690 }, { "epoch": 0.36707291549273097, "grad_norm": 0.12606143951416016, "learning_rate": 0.00017552807831014943, "loss": 0.1862, "step": 5700 }, { "epoch": 0.36771690306377086, "grad_norm": 0.12937669456005096, "learning_rate": 0.0001754851451142023, "loss": 0.1841, "step": 5710 }, { "epoch": 0.36836089063481076, "grad_norm": 0.11822612583637238, "learning_rate": 0.0001754422119182552, "loss": 0.2004, "step": 5720 }, { "epoch": 0.36900487820585065, "grad_norm": 0.1307377964258194, "learning_rate": 0.0001753992787223081, "loss": 0.1853, "step": 5730 }, { "epoch": 0.3696488657768905, "grad_norm": 0.12593182921409607, "learning_rate": 0.000175356345526361, "loss": 0.1826, "step": 5740 }, { "epoch": 0.3702928533479304, "grad_norm": 0.12490664422512054, "learning_rate": 0.0001753134123304139, "loss": 0.1851, "step": 5750 }, { "epoch": 0.3709368409189703, "grad_norm": 0.13299356400966644, "learning_rate": 0.00017527047913446677, "loss": 0.1796, "step": 5760 }, { "epoch": 0.3715808284900101, "grad_norm": 0.12398755550384521, "learning_rate": 0.00017522754593851967, "loss": 0.1885, "step": 5770 }, { "epoch": 0.37222481606105, "grad_norm": 0.12786270678043365, "learning_rate": 0.00017518461274257255, "loss": 0.1841, "step": 5780 }, { "epoch": 0.3728688036320899, "grad_norm": 0.12290838360786438, "learning_rate": 0.00017514167954662545, "loss": 0.1914, "step": 5790 }, { "epoch": 0.3735127912031298, "grad_norm": 0.11472488939762115, "learning_rate": 0.00017509874635067836, "loss": 0.186, "step": 5800 }, { "epoch": 0.37415677877416964, "grad_norm": 0.10468455404043198, "learning_rate": 0.00017505581315473126, "loss": 0.1828, "step": 5810 }, { "epoch": 0.37480076634520954, "grad_norm": 0.14035804569721222, "learning_rate": 0.00017501287995878414, "loss": 0.1801, "step": 5820 }, { "epoch": 0.37544475391624943, "grad_norm": 0.12943844497203827, "learning_rate": 0.00017496994676283704, "loss": 0.1872, "step": 5830 }, { "epoch": 0.37608874148728927, "grad_norm": 0.12410026788711548, "learning_rate": 0.00017492701356688992, "loss": 0.1803, "step": 5840 }, { "epoch": 0.37673272905832916, "grad_norm": 0.1253429800271988, "learning_rate": 0.00017488408037094283, "loss": 0.1775, "step": 5850 }, { "epoch": 0.37737671662936906, "grad_norm": 0.12102987617254257, "learning_rate": 0.0001748411471749957, "loss": 0.1777, "step": 5860 }, { "epoch": 0.37802070420040895, "grad_norm": 0.123225636780262, "learning_rate": 0.0001747982139790486, "loss": 0.1942, "step": 5870 }, { "epoch": 0.3786646917714488, "grad_norm": 0.14657333493232727, "learning_rate": 0.0001747552807831015, "loss": 0.1959, "step": 5880 }, { "epoch": 0.3793086793424887, "grad_norm": 0.11092278361320496, "learning_rate": 0.0001747123475871544, "loss": 0.1818, "step": 5890 }, { "epoch": 0.3799526669135286, "grad_norm": 0.12533557415008545, "learning_rate": 0.0001746694143912073, "loss": 0.1888, "step": 5900 }, { "epoch": 0.3805966544845685, "grad_norm": 0.12038221955299377, "learning_rate": 0.0001746264811952602, "loss": 0.1865, "step": 5910 }, { "epoch": 0.3812406420556083, "grad_norm": 0.12546831369400024, "learning_rate": 0.00017458354799931307, "loss": 0.1942, "step": 5920 }, { "epoch": 0.3818846296266482, "grad_norm": 0.12475251406431198, "learning_rate": 0.00017454061480336595, "loss": 0.1833, "step": 5930 }, { "epoch": 0.3825286171976881, "grad_norm": 0.12483241409063339, "learning_rate": 0.00017449768160741886, "loss": 0.1904, "step": 5940 }, { "epoch": 0.38317260476872794, "grad_norm": 0.1105113998055458, "learning_rate": 0.00017445474841147176, "loss": 0.1775, "step": 5950 }, { "epoch": 0.38381659233976784, "grad_norm": 0.16173279285430908, "learning_rate": 0.00017441181521552466, "loss": 0.1811, "step": 5960 }, { "epoch": 0.38446057991080773, "grad_norm": 0.10801669210195541, "learning_rate": 0.00017436888201957754, "loss": 0.1897, "step": 5970 }, { "epoch": 0.3851045674818476, "grad_norm": 0.1256638467311859, "learning_rate": 0.00017432594882363045, "loss": 0.1917, "step": 5980 }, { "epoch": 0.38574855505288747, "grad_norm": 0.12317980080842972, "learning_rate": 0.00017428301562768335, "loss": 0.1833, "step": 5990 }, { "epoch": 0.38639254262392736, "grad_norm": 0.11293383687734604, "learning_rate": 0.00017424008243173623, "loss": 0.1824, "step": 6000 }, { "epoch": 0.38703653019496725, "grad_norm": 0.12158932536840439, "learning_rate": 0.0001741971492357891, "loss": 0.1854, "step": 6010 }, { "epoch": 0.3876805177660071, "grad_norm": 0.13037924468517303, "learning_rate": 0.000174154216039842, "loss": 0.1895, "step": 6020 }, { "epoch": 0.388324505337047, "grad_norm": 0.14328646659851074, "learning_rate": 0.0001741112828438949, "loss": 0.1804, "step": 6030 }, { "epoch": 0.3889684929080869, "grad_norm": 0.11337558925151825, "learning_rate": 0.00017406834964794782, "loss": 0.1917, "step": 6040 }, { "epoch": 0.3896124804791268, "grad_norm": 0.12291785329580307, "learning_rate": 0.0001740254164520007, "loss": 0.1861, "step": 6050 }, { "epoch": 0.3902564680501666, "grad_norm": 0.14356815814971924, "learning_rate": 0.0001739824832560536, "loss": 0.191, "step": 6060 }, { "epoch": 0.3909004556212065, "grad_norm": 0.13168646395206451, "learning_rate": 0.0001739395500601065, "loss": 0.1839, "step": 6070 }, { "epoch": 0.3915444431922464, "grad_norm": 0.12031982839107513, "learning_rate": 0.00017389661686415938, "loss": 0.1792, "step": 6080 }, { "epoch": 0.39218843076328624, "grad_norm": 0.15754792094230652, "learning_rate": 0.00017385368366821226, "loss": 0.1859, "step": 6090 }, { "epoch": 0.39283241833432614, "grad_norm": 0.1354035586118698, "learning_rate": 0.00017381075047226516, "loss": 0.1943, "step": 6100 }, { "epoch": 0.39347640590536603, "grad_norm": 0.11748731881380081, "learning_rate": 0.00017376781727631806, "loss": 0.1825, "step": 6110 }, { "epoch": 0.39412039347640593, "grad_norm": 0.1247590035200119, "learning_rate": 0.00017372488408037094, "loss": 0.1843, "step": 6120 }, { "epoch": 0.39476438104744577, "grad_norm": 0.12259422242641449, "learning_rate": 0.00017368195088442385, "loss": 0.1866, "step": 6130 }, { "epoch": 0.39540836861848566, "grad_norm": 0.1200842335820198, "learning_rate": 0.00017363901768847675, "loss": 0.1862, "step": 6140 }, { "epoch": 0.39605235618952556, "grad_norm": 0.1369454711675644, "learning_rate": 0.00017359608449252965, "loss": 0.1754, "step": 6150 }, { "epoch": 0.3966963437605654, "grad_norm": 0.1157006099820137, "learning_rate": 0.0001735531512965825, "loss": 0.1911, "step": 6160 }, { "epoch": 0.3973403313316053, "grad_norm": 0.1234610304236412, "learning_rate": 0.0001735102181006354, "loss": 0.1749, "step": 6170 }, { "epoch": 0.3979843189026452, "grad_norm": 0.12509001791477203, "learning_rate": 0.0001734672849046883, "loss": 0.1931, "step": 6180 }, { "epoch": 0.3986283064736851, "grad_norm": 0.10893188416957855, "learning_rate": 0.00017342435170874122, "loss": 0.1837, "step": 6190 }, { "epoch": 0.3992722940447249, "grad_norm": 0.10293044149875641, "learning_rate": 0.0001733814185127941, "loss": 0.1881, "step": 6200 }, { "epoch": 0.3999162816157648, "grad_norm": 0.15649420022964478, "learning_rate": 0.000173338485316847, "loss": 0.1861, "step": 6210 }, { "epoch": 0.4005602691868047, "grad_norm": 0.14299647510051727, "learning_rate": 0.0001732955521208999, "loss": 0.1863, "step": 6220 }, { "epoch": 0.40120425675784455, "grad_norm": 0.11949121206998825, "learning_rate": 0.00017325261892495278, "loss": 0.1886, "step": 6230 }, { "epoch": 0.40184824432888444, "grad_norm": 0.12569968402385712, "learning_rate": 0.00017320968572900566, "loss": 0.1874, "step": 6240 }, { "epoch": 0.40249223189992434, "grad_norm": 0.11183993518352509, "learning_rate": 0.00017316675253305856, "loss": 0.1849, "step": 6250 }, { "epoch": 0.40313621947096423, "grad_norm": 0.12400779128074646, "learning_rate": 0.00017312381933711147, "loss": 0.1852, "step": 6260 }, { "epoch": 0.40378020704200407, "grad_norm": 0.12916992604732513, "learning_rate": 0.00017308088614116434, "loss": 0.1801, "step": 6270 }, { "epoch": 0.40442419461304396, "grad_norm": 0.1211041808128357, "learning_rate": 0.00017303795294521725, "loss": 0.1928, "step": 6280 }, { "epoch": 0.40506818218408386, "grad_norm": 0.1442290097475052, "learning_rate": 0.00017299501974927015, "loss": 0.1844, "step": 6290 }, { "epoch": 0.40571216975512375, "grad_norm": 0.10809933394193649, "learning_rate": 0.00017295208655332306, "loss": 0.1817, "step": 6300 }, { "epoch": 0.4063561573261636, "grad_norm": 0.12962569296360016, "learning_rate": 0.00017290915335737593, "loss": 0.1853, "step": 6310 }, { "epoch": 0.4070001448972035, "grad_norm": 0.12396442890167236, "learning_rate": 0.0001728662201614288, "loss": 0.184, "step": 6320 }, { "epoch": 0.4076441324682434, "grad_norm": 0.13077880442142487, "learning_rate": 0.00017282328696548171, "loss": 0.1798, "step": 6330 }, { "epoch": 0.4082881200392832, "grad_norm": 0.12326820939779282, "learning_rate": 0.00017278035376953462, "loss": 0.185, "step": 6340 }, { "epoch": 0.4089321076103231, "grad_norm": 0.10102490335702896, "learning_rate": 0.0001727374205735875, "loss": 0.1796, "step": 6350 }, { "epoch": 0.409576095181363, "grad_norm": 0.13022726774215698, "learning_rate": 0.0001726944873776404, "loss": 0.1807, "step": 6360 }, { "epoch": 0.4102200827524029, "grad_norm": 0.14529520273208618, "learning_rate": 0.0001726515541816933, "loss": 0.1878, "step": 6370 }, { "epoch": 0.41086407032344274, "grad_norm": 0.12823377549648285, "learning_rate": 0.0001726086209857462, "loss": 0.1838, "step": 6380 }, { "epoch": 0.41150805789448264, "grad_norm": 0.12896062433719635, "learning_rate": 0.00017256568778979908, "loss": 0.1873, "step": 6390 }, { "epoch": 0.41215204546552253, "grad_norm": 0.1477740854024887, "learning_rate": 0.00017252275459385196, "loss": 0.1903, "step": 6400 }, { "epoch": 0.41279603303656237, "grad_norm": 0.12457086890935898, "learning_rate": 0.00017247982139790487, "loss": 0.1842, "step": 6410 }, { "epoch": 0.41344002060760227, "grad_norm": 0.14959384500980377, "learning_rate": 0.00017243688820195777, "loss": 0.1779, "step": 6420 }, { "epoch": 0.41408400817864216, "grad_norm": 0.1105571836233139, "learning_rate": 0.00017239395500601065, "loss": 0.1842, "step": 6430 }, { "epoch": 0.41472799574968205, "grad_norm": 0.11926892399787903, "learning_rate": 0.00017235102181006355, "loss": 0.1904, "step": 6440 }, { "epoch": 0.4153719833207219, "grad_norm": 0.14861522614955902, "learning_rate": 0.00017230808861411646, "loss": 0.1774, "step": 6450 }, { "epoch": 0.4160159708917618, "grad_norm": 0.11130864173173904, "learning_rate": 0.00017226515541816933, "loss": 0.1837, "step": 6460 }, { "epoch": 0.4166599584628017, "grad_norm": 0.12166684865951538, "learning_rate": 0.00017222222222222224, "loss": 0.1799, "step": 6470 }, { "epoch": 0.4173039460338415, "grad_norm": 0.11071170121431351, "learning_rate": 0.00017217928902627511, "loss": 0.181, "step": 6480 }, { "epoch": 0.4179479336048814, "grad_norm": 0.11683431267738342, "learning_rate": 0.00017213635583032802, "loss": 0.1825, "step": 6490 }, { "epoch": 0.4185919211759213, "grad_norm": 0.16003072261810303, "learning_rate": 0.0001720934226343809, "loss": 0.1817, "step": 6500 }, { "epoch": 0.4192359087469612, "grad_norm": 0.1161658987402916, "learning_rate": 0.0001720504894384338, "loss": 0.1806, "step": 6510 }, { "epoch": 0.41987989631800104, "grad_norm": 0.13428394496440887, "learning_rate": 0.0001720075562424867, "loss": 0.173, "step": 6520 }, { "epoch": 0.42052388388904094, "grad_norm": 0.10949574410915375, "learning_rate": 0.0001719646230465396, "loss": 0.1761, "step": 6530 }, { "epoch": 0.42116787146008083, "grad_norm": 0.11603393405675888, "learning_rate": 0.00017192168985059249, "loss": 0.1847, "step": 6540 }, { "epoch": 0.4218118590311207, "grad_norm": 0.11262483149766922, "learning_rate": 0.0001718787566546454, "loss": 0.1849, "step": 6550 }, { "epoch": 0.42245584660216057, "grad_norm": 0.11427194625139236, "learning_rate": 0.00017183582345869827, "loss": 0.1911, "step": 6560 }, { "epoch": 0.42309983417320046, "grad_norm": 0.1256718784570694, "learning_rate": 0.00017179289026275117, "loss": 0.1804, "step": 6570 }, { "epoch": 0.42374382174424036, "grad_norm": 0.1412668526172638, "learning_rate": 0.00017174995706680405, "loss": 0.1853, "step": 6580 }, { "epoch": 0.4243878093152802, "grad_norm": 0.13718535006046295, "learning_rate": 0.00017170702387085695, "loss": 0.1873, "step": 6590 }, { "epoch": 0.4250317968863201, "grad_norm": 0.12157798558473587, "learning_rate": 0.00017166409067490986, "loss": 0.1812, "step": 6600 }, { "epoch": 0.42567578445736, "grad_norm": 0.13387221097946167, "learning_rate": 0.00017162115747896273, "loss": 0.1914, "step": 6610 }, { "epoch": 0.4263197720283999, "grad_norm": 0.12270978093147278, "learning_rate": 0.00017157822428301564, "loss": 0.1853, "step": 6620 }, { "epoch": 0.4269637595994397, "grad_norm": 0.11960122734308243, "learning_rate": 0.00017153529108706854, "loss": 0.1797, "step": 6630 }, { "epoch": 0.4276077471704796, "grad_norm": 0.1231238916516304, "learning_rate": 0.00017149235789112142, "loss": 0.1847, "step": 6640 }, { "epoch": 0.4282517347415195, "grad_norm": 0.11065858602523804, "learning_rate": 0.0001714494246951743, "loss": 0.1885, "step": 6650 }, { "epoch": 0.42889572231255935, "grad_norm": 0.12030089646577835, "learning_rate": 0.0001714064914992272, "loss": 0.1884, "step": 6660 }, { "epoch": 0.42953970988359924, "grad_norm": 0.11828857660293579, "learning_rate": 0.0001713635583032801, "loss": 0.1907, "step": 6670 }, { "epoch": 0.43018369745463914, "grad_norm": 0.107118159532547, "learning_rate": 0.000171320625107333, "loss": 0.1839, "step": 6680 }, { "epoch": 0.43082768502567903, "grad_norm": 0.11428102105855942, "learning_rate": 0.00017127769191138589, "loss": 0.1879, "step": 6690 }, { "epoch": 0.43147167259671887, "grad_norm": 0.12343620508909225, "learning_rate": 0.0001712347587154388, "loss": 0.1824, "step": 6700 }, { "epoch": 0.43211566016775876, "grad_norm": 0.11531835049390793, "learning_rate": 0.0001711918255194917, "loss": 0.1797, "step": 6710 }, { "epoch": 0.43275964773879866, "grad_norm": 0.11760362237691879, "learning_rate": 0.00017114889232354457, "loss": 0.1852, "step": 6720 }, { "epoch": 0.4334036353098385, "grad_norm": 0.13001690804958344, "learning_rate": 0.00017110595912759745, "loss": 0.1845, "step": 6730 }, { "epoch": 0.4340476228808784, "grad_norm": 0.12635758519172668, "learning_rate": 0.00017106302593165035, "loss": 0.1898, "step": 6740 }, { "epoch": 0.4346916104519183, "grad_norm": 0.12343205511569977, "learning_rate": 0.00017102009273570326, "loss": 0.1904, "step": 6750 }, { "epoch": 0.4353355980229582, "grad_norm": 3.880788564682007, "learning_rate": 0.00017097715953975616, "loss": 0.183, "step": 6760 }, { "epoch": 0.435979585593998, "grad_norm": 1.9436891078948975, "learning_rate": 0.00017093422634380904, "loss": 0.2042, "step": 6770 }, { "epoch": 0.4366235731650379, "grad_norm": 0.17680145800113678, "learning_rate": 0.00017089129314786194, "loss": 0.2021, "step": 6780 }, { "epoch": 0.4372675607360778, "grad_norm": 0.15394671261310577, "learning_rate": 0.00017084835995191485, "loss": 0.1802, "step": 6790 }, { "epoch": 0.43791154830711765, "grad_norm": 0.13112139701843262, "learning_rate": 0.00017080542675596772, "loss": 0.1895, "step": 6800 }, { "epoch": 0.43855553587815754, "grad_norm": 0.1358153223991394, "learning_rate": 0.0001707624935600206, "loss": 0.1806, "step": 6810 }, { "epoch": 0.43919952344919744, "grad_norm": 0.11334224045276642, "learning_rate": 0.0001707195603640735, "loss": 0.1834, "step": 6820 }, { "epoch": 0.43984351102023733, "grad_norm": 0.1346770077943802, "learning_rate": 0.0001706766271681264, "loss": 0.1846, "step": 6830 }, { "epoch": 0.44048749859127717, "grad_norm": 0.12383245676755905, "learning_rate": 0.0001706336939721793, "loss": 0.1896, "step": 6840 }, { "epoch": 0.44113148616231707, "grad_norm": 0.1430908739566803, "learning_rate": 0.0001705907607762322, "loss": 0.1876, "step": 6850 }, { "epoch": 0.44177547373335696, "grad_norm": 0.10586495697498322, "learning_rate": 0.0001705478275802851, "loss": 0.1828, "step": 6860 }, { "epoch": 0.4424194613043968, "grad_norm": 0.13818781077861786, "learning_rate": 0.000170504894384338, "loss": 0.1754, "step": 6870 }, { "epoch": 0.4430634488754367, "grad_norm": 0.10848012566566467, "learning_rate": 0.00017046196118839085, "loss": 0.1789, "step": 6880 }, { "epoch": 0.4437074364464766, "grad_norm": 0.13495562970638275, "learning_rate": 0.00017041902799244375, "loss": 0.1766, "step": 6890 }, { "epoch": 0.4443514240175165, "grad_norm": 0.1243242546916008, "learning_rate": 0.00017037609479649666, "loss": 0.1899, "step": 6900 }, { "epoch": 0.4449954115885563, "grad_norm": 0.133431538939476, "learning_rate": 0.00017033316160054956, "loss": 0.183, "step": 6910 }, { "epoch": 0.4456393991595962, "grad_norm": 0.12797188758850098, "learning_rate": 0.00017029022840460244, "loss": 0.1886, "step": 6920 }, { "epoch": 0.4462833867306361, "grad_norm": 0.12153611332178116, "learning_rate": 0.00017024729520865534, "loss": 0.1925, "step": 6930 }, { "epoch": 0.44692737430167595, "grad_norm": 0.14056751132011414, "learning_rate": 0.00017020436201270825, "loss": 0.1838, "step": 6940 }, { "epoch": 0.44757136187271584, "grad_norm": 0.1406780183315277, "learning_rate": 0.00017016142881676112, "loss": 0.1723, "step": 6950 }, { "epoch": 0.44821534944375574, "grad_norm": 0.12018056213855743, "learning_rate": 0.000170118495620814, "loss": 0.1914, "step": 6960 }, { "epoch": 0.44885933701479563, "grad_norm": 0.1493600606918335, "learning_rate": 0.0001700755624248669, "loss": 0.1833, "step": 6970 }, { "epoch": 0.4495033245858355, "grad_norm": 0.13097545504570007, "learning_rate": 0.0001700326292289198, "loss": 0.1798, "step": 6980 }, { "epoch": 0.45014731215687537, "grad_norm": 0.15075832605361938, "learning_rate": 0.0001699896960329727, "loss": 0.1818, "step": 6990 }, { "epoch": 0.45079129972791526, "grad_norm": 0.12191921472549438, "learning_rate": 0.0001699467628370256, "loss": 0.1796, "step": 7000 }, { "epoch": 0.45143528729895516, "grad_norm": 0.145933136343956, "learning_rate": 0.0001699038296410785, "loss": 0.1835, "step": 7010 }, { "epoch": 0.452079274869995, "grad_norm": 0.11861588805913925, "learning_rate": 0.0001698608964451314, "loss": 0.1842, "step": 7020 }, { "epoch": 0.4527232624410349, "grad_norm": 0.12310803681612015, "learning_rate": 0.00016981796324918428, "loss": 0.1868, "step": 7030 }, { "epoch": 0.4533672500120748, "grad_norm": 0.12434472143650055, "learning_rate": 0.00016977503005323715, "loss": 0.1782, "step": 7040 }, { "epoch": 0.4540112375831146, "grad_norm": 0.1333438903093338, "learning_rate": 0.00016973209685729006, "loss": 0.1825, "step": 7050 }, { "epoch": 0.4546552251541545, "grad_norm": 0.1253935843706131, "learning_rate": 0.00016968916366134296, "loss": 0.1807, "step": 7060 }, { "epoch": 0.4552992127251944, "grad_norm": 0.13624127209186554, "learning_rate": 0.00016964623046539584, "loss": 0.188, "step": 7070 }, { "epoch": 0.4559432002962343, "grad_norm": 0.14290674030780792, "learning_rate": 0.00016960329726944874, "loss": 0.1828, "step": 7080 }, { "epoch": 0.45658718786727415, "grad_norm": 0.11503592133522034, "learning_rate": 0.00016956036407350165, "loss": 0.1782, "step": 7090 }, { "epoch": 0.45723117543831404, "grad_norm": 0.14029444754123688, "learning_rate": 0.00016951743087755455, "loss": 0.1836, "step": 7100 }, { "epoch": 0.45787516300935394, "grad_norm": 0.12661705911159515, "learning_rate": 0.00016947449768160743, "loss": 0.1864, "step": 7110 }, { "epoch": 0.4585191505803938, "grad_norm": 0.14254356920719147, "learning_rate": 0.0001694315644856603, "loss": 0.1912, "step": 7120 }, { "epoch": 0.45916313815143367, "grad_norm": 0.1321607381105423, "learning_rate": 0.0001693886312897132, "loss": 0.1825, "step": 7130 }, { "epoch": 0.45980712572247356, "grad_norm": 0.1210755780339241, "learning_rate": 0.00016934569809376612, "loss": 0.1872, "step": 7140 }, { "epoch": 0.46045111329351346, "grad_norm": 0.1385163962841034, "learning_rate": 0.000169302764897819, "loss": 0.188, "step": 7150 }, { "epoch": 0.4610951008645533, "grad_norm": 0.13019827008247375, "learning_rate": 0.0001692598317018719, "loss": 0.1779, "step": 7160 }, { "epoch": 0.4617390884355932, "grad_norm": 0.13479545712471008, "learning_rate": 0.0001692168985059248, "loss": 0.1799, "step": 7170 }, { "epoch": 0.4623830760066331, "grad_norm": 0.12626811861991882, "learning_rate": 0.00016917396530997768, "loss": 0.186, "step": 7180 }, { "epoch": 0.4630270635776729, "grad_norm": 0.15637332201004028, "learning_rate": 0.00016913103211403058, "loss": 0.1737, "step": 7190 }, { "epoch": 0.4636710511487128, "grad_norm": 0.11737492680549622, "learning_rate": 0.00016908809891808346, "loss": 0.1823, "step": 7200 }, { "epoch": 0.4643150387197527, "grad_norm": 0.11406133323907852, "learning_rate": 0.00016904516572213636, "loss": 0.1755, "step": 7210 }, { "epoch": 0.4649590262907926, "grad_norm": 0.1428227573633194, "learning_rate": 0.00016900223252618924, "loss": 0.1826, "step": 7220 }, { "epoch": 0.46560301386183245, "grad_norm": 0.12444434314966202, "learning_rate": 0.00016895929933024214, "loss": 0.1837, "step": 7230 }, { "epoch": 0.46624700143287234, "grad_norm": 0.13280339539051056, "learning_rate": 0.00016891636613429505, "loss": 0.1822, "step": 7240 }, { "epoch": 0.46689098900391224, "grad_norm": 0.12793241441249847, "learning_rate": 0.00016887343293834795, "loss": 0.1845, "step": 7250 }, { "epoch": 0.4675349765749521, "grad_norm": 0.12555420398712158, "learning_rate": 0.00016883049974240083, "loss": 0.1791, "step": 7260 }, { "epoch": 0.46817896414599197, "grad_norm": 0.14576508104801178, "learning_rate": 0.00016878756654645373, "loss": 0.1812, "step": 7270 }, { "epoch": 0.46882295171703187, "grad_norm": 0.12378506362438202, "learning_rate": 0.0001687446333505066, "loss": 0.1851, "step": 7280 }, { "epoch": 0.46946693928807176, "grad_norm": 0.13608866930007935, "learning_rate": 0.00016870170015455952, "loss": 0.1817, "step": 7290 }, { "epoch": 0.4701109268591116, "grad_norm": 0.13100528717041016, "learning_rate": 0.0001686587669586124, "loss": 0.1845, "step": 7300 }, { "epoch": 0.4707549144301515, "grad_norm": 0.12835736572742462, "learning_rate": 0.0001686158337626653, "loss": 0.1876, "step": 7310 }, { "epoch": 0.4713989020011914, "grad_norm": 0.12743790447711945, "learning_rate": 0.0001685729005667182, "loss": 0.1865, "step": 7320 }, { "epoch": 0.4720428895722313, "grad_norm": 0.13506627082824707, "learning_rate": 0.00016852996737077108, "loss": 0.1804, "step": 7330 }, { "epoch": 0.4726868771432711, "grad_norm": 0.13791793584823608, "learning_rate": 0.00016848703417482398, "loss": 0.1899, "step": 7340 }, { "epoch": 0.473330864714311, "grad_norm": 0.12797726690769196, "learning_rate": 0.0001684441009788769, "loss": 0.1914, "step": 7350 }, { "epoch": 0.4739748522853509, "grad_norm": 0.11841007322072983, "learning_rate": 0.00016840116778292976, "loss": 0.1849, "step": 7360 }, { "epoch": 0.47461883985639075, "grad_norm": 0.12355693429708481, "learning_rate": 0.00016835823458698267, "loss": 0.1735, "step": 7370 }, { "epoch": 0.47526282742743065, "grad_norm": 0.13144242763519287, "learning_rate": 0.00016831530139103555, "loss": 0.18, "step": 7380 }, { "epoch": 0.47590681499847054, "grad_norm": 0.11185991764068604, "learning_rate": 0.00016827236819508845, "loss": 0.1834, "step": 7390 }, { "epoch": 0.47655080256951043, "grad_norm": 0.14802640676498413, "learning_rate": 0.00016822943499914135, "loss": 0.1833, "step": 7400 }, { "epoch": 0.4771947901405503, "grad_norm": 0.11195342987775803, "learning_rate": 0.00016818650180319423, "loss": 0.1958, "step": 7410 }, { "epoch": 0.47783877771159017, "grad_norm": 0.11430490761995316, "learning_rate": 0.00016814356860724714, "loss": 0.1827, "step": 7420 }, { "epoch": 0.47848276528263006, "grad_norm": 0.12103696167469025, "learning_rate": 0.00016810063541130004, "loss": 0.175, "step": 7430 }, { "epoch": 0.4791267528536699, "grad_norm": 0.1173710972070694, "learning_rate": 0.00016805770221535292, "loss": 0.175, "step": 7440 }, { "epoch": 0.4797707404247098, "grad_norm": 0.12967513501644135, "learning_rate": 0.0001680147690194058, "loss": 0.1837, "step": 7450 }, { "epoch": 0.4804147279957497, "grad_norm": 0.11063653230667114, "learning_rate": 0.0001679718358234587, "loss": 0.18, "step": 7460 }, { "epoch": 0.4810587155667896, "grad_norm": 0.1366579532623291, "learning_rate": 0.0001679289026275116, "loss": 0.1842, "step": 7470 }, { "epoch": 0.4817027031378294, "grad_norm": 0.13839580118656158, "learning_rate": 0.0001678859694315645, "loss": 0.1845, "step": 7480 }, { "epoch": 0.4823466907088693, "grad_norm": 0.11461193114519119, "learning_rate": 0.00016784303623561738, "loss": 0.1865, "step": 7490 }, { "epoch": 0.4829906782799092, "grad_norm": 0.14875398576259613, "learning_rate": 0.0001678001030396703, "loss": 0.1745, "step": 7500 }, { "epoch": 0.48363466585094905, "grad_norm": 0.117769755423069, "learning_rate": 0.0001677571698437232, "loss": 0.1868, "step": 7510 }, { "epoch": 0.48427865342198895, "grad_norm": 0.116947241127491, "learning_rate": 0.00016771423664777607, "loss": 0.1734, "step": 7520 }, { "epoch": 0.48492264099302884, "grad_norm": 0.12151771783828735, "learning_rate": 0.00016767130345182895, "loss": 0.1846, "step": 7530 }, { "epoch": 0.48556662856406874, "grad_norm": 0.13028797507286072, "learning_rate": 0.00016762837025588185, "loss": 0.1842, "step": 7540 }, { "epoch": 0.4862106161351086, "grad_norm": 0.12281551957130432, "learning_rate": 0.00016758543705993475, "loss": 0.1804, "step": 7550 }, { "epoch": 0.48685460370614847, "grad_norm": 0.1287756711244583, "learning_rate": 0.00016754250386398763, "loss": 0.1797, "step": 7560 }, { "epoch": 0.48749859127718836, "grad_norm": 0.11968358606100082, "learning_rate": 0.00016749957066804054, "loss": 0.1769, "step": 7570 }, { "epoch": 0.4881425788482282, "grad_norm": 0.13036473095417023, "learning_rate": 0.00016745663747209344, "loss": 0.1762, "step": 7580 }, { "epoch": 0.4887865664192681, "grad_norm": 0.12938518822193146, "learning_rate": 0.00016741370427614634, "loss": 0.1825, "step": 7590 }, { "epoch": 0.489430553990308, "grad_norm": 0.11266776919364929, "learning_rate": 0.00016737077108019922, "loss": 0.1822, "step": 7600 }, { "epoch": 0.4900745415613479, "grad_norm": 0.10801934450864792, "learning_rate": 0.0001673278378842521, "loss": 0.1829, "step": 7610 }, { "epoch": 0.4907185291323877, "grad_norm": 0.13294507563114166, "learning_rate": 0.000167284904688305, "loss": 0.1829, "step": 7620 }, { "epoch": 0.4913625167034276, "grad_norm": 0.13029983639717102, "learning_rate": 0.0001672419714923579, "loss": 0.1783, "step": 7630 }, { "epoch": 0.4920065042744675, "grad_norm": 0.11402368545532227, "learning_rate": 0.00016719903829641078, "loss": 0.1848, "step": 7640 }, { "epoch": 0.49265049184550735, "grad_norm": 0.1079135537147522, "learning_rate": 0.0001671561051004637, "loss": 0.182, "step": 7650 }, { "epoch": 0.49329447941654725, "grad_norm": 0.12867753207683563, "learning_rate": 0.0001671131719045166, "loss": 0.1806, "step": 7660 }, { "epoch": 0.49393846698758714, "grad_norm": 0.1278223693370819, "learning_rate": 0.00016707023870856947, "loss": 0.1821, "step": 7670 }, { "epoch": 0.49458245455862704, "grad_norm": 0.11122014373540878, "learning_rate": 0.00016702730551262237, "loss": 0.1966, "step": 7680 }, { "epoch": 0.4952264421296669, "grad_norm": 0.1150222048163414, "learning_rate": 0.00016698437231667525, "loss": 0.1876, "step": 7690 }, { "epoch": 0.49587042970070677, "grad_norm": 0.13284921646118164, "learning_rate": 0.00016694143912072816, "loss": 0.1809, "step": 7700 }, { "epoch": 0.49651441727174667, "grad_norm": 0.12164290249347687, "learning_rate": 0.00016689850592478103, "loss": 0.1871, "step": 7710 }, { "epoch": 0.49715840484278656, "grad_norm": 0.10903923958539963, "learning_rate": 0.00016685557272883394, "loss": 0.1835, "step": 7720 }, { "epoch": 0.4978023924138264, "grad_norm": 0.12705622613430023, "learning_rate": 0.00016681263953288684, "loss": 0.1785, "step": 7730 }, { "epoch": 0.4984463799848663, "grad_norm": 0.1095200926065445, "learning_rate": 0.00016676970633693975, "loss": 0.1834, "step": 7740 }, { "epoch": 0.4990903675559062, "grad_norm": 0.1274404078722, "learning_rate": 0.00016672677314099262, "loss": 0.1818, "step": 7750 }, { "epoch": 0.49973435512694603, "grad_norm": 0.12839485704898834, "learning_rate": 0.00016668383994504553, "loss": 0.1806, "step": 7760 }, { "epoch": 0.500378342697986, "grad_norm": 0.12063802033662796, "learning_rate": 0.0001666409067490984, "loss": 0.1836, "step": 7770 }, { "epoch": 0.5010223302690258, "grad_norm": 0.11873096227645874, "learning_rate": 0.0001665979735531513, "loss": 0.1848, "step": 7780 }, { "epoch": 0.5016663178400657, "grad_norm": 0.11839767545461655, "learning_rate": 0.00016655504035720418, "loss": 0.1885, "step": 7790 }, { "epoch": 0.5023103054111056, "grad_norm": 0.12136141210794449, "learning_rate": 0.0001665121071612571, "loss": 0.1859, "step": 7800 }, { "epoch": 0.5029542929821454, "grad_norm": 0.13005048036575317, "learning_rate": 0.00016646917396531, "loss": 0.1898, "step": 7810 }, { "epoch": 0.5035982805531853, "grad_norm": 0.12349523603916168, "learning_rate": 0.0001664262407693629, "loss": 0.1787, "step": 7820 }, { "epoch": 0.5042422681242252, "grad_norm": 0.1346592754125595, "learning_rate": 0.00016638330757341577, "loss": 0.178, "step": 7830 }, { "epoch": 0.5048862556952651, "grad_norm": 0.1269155740737915, "learning_rate": 0.00016634037437746868, "loss": 0.1791, "step": 7840 }, { "epoch": 0.5055302432663049, "grad_norm": 0.12999708950519562, "learning_rate": 0.00016629744118152156, "loss": 0.1874, "step": 7850 }, { "epoch": 0.5061742308373448, "grad_norm": 0.13957485556602478, "learning_rate": 0.00016625450798557446, "loss": 0.1875, "step": 7860 }, { "epoch": 0.5068182184083847, "grad_norm": 0.1256454735994339, "learning_rate": 0.00016621157478962734, "loss": 0.1861, "step": 7870 }, { "epoch": 0.5074622059794246, "grad_norm": 0.11940255016088486, "learning_rate": 0.00016616864159368024, "loss": 0.1788, "step": 7880 }, { "epoch": 0.5081061935504645, "grad_norm": 0.1238575354218483, "learning_rate": 0.00016612570839773315, "loss": 0.1854, "step": 7890 }, { "epoch": 0.5087501811215044, "grad_norm": 0.13928361237049103, "learning_rate": 0.00016608277520178602, "loss": 0.1784, "step": 7900 }, { "epoch": 0.5093941686925443, "grad_norm": 0.139267697930336, "learning_rate": 0.00016603984200583893, "loss": 0.1892, "step": 7910 }, { "epoch": 0.5100381562635841, "grad_norm": 0.1155412569642067, "learning_rate": 0.00016599690880989183, "loss": 0.1809, "step": 7920 }, { "epoch": 0.510682143834624, "grad_norm": 0.12263794988393784, "learning_rate": 0.0001659539756139447, "loss": 0.1887, "step": 7930 }, { "epoch": 0.5113261314056639, "grad_norm": 0.11682318150997162, "learning_rate": 0.00016591104241799759, "loss": 0.1871, "step": 7940 }, { "epoch": 0.5119701189767037, "grad_norm": 0.14106059074401855, "learning_rate": 0.0001658681092220505, "loss": 0.1844, "step": 7950 }, { "epoch": 0.5126141065477436, "grad_norm": 0.1269495189189911, "learning_rate": 0.0001658251760261034, "loss": 0.1815, "step": 7960 }, { "epoch": 0.5132580941187835, "grad_norm": 0.10596071183681488, "learning_rate": 0.0001657822428301563, "loss": 0.1806, "step": 7970 }, { "epoch": 0.5139020816898234, "grad_norm": 0.12032703310251236, "learning_rate": 0.00016573930963420918, "loss": 0.1903, "step": 7980 }, { "epoch": 0.5145460692608632, "grad_norm": 0.11095137149095535, "learning_rate": 0.00016569637643826208, "loss": 0.185, "step": 7990 }, { "epoch": 0.5151900568319031, "grad_norm": 0.11385227739810944, "learning_rate": 0.00016565344324231498, "loss": 0.1875, "step": 8000 }, { "epoch": 0.515834044402943, "grad_norm": 0.12675011157989502, "learning_rate": 0.00016561051004636786, "loss": 0.1802, "step": 8010 }, { "epoch": 0.5164780319739829, "grad_norm": 0.12833265960216522, "learning_rate": 0.00016556757685042074, "loss": 0.181, "step": 8020 }, { "epoch": 0.5171220195450228, "grad_norm": 0.11970249563455582, "learning_rate": 0.00016552464365447364, "loss": 0.1884, "step": 8030 }, { "epoch": 0.5177660071160627, "grad_norm": 0.1126246377825737, "learning_rate": 0.00016548171045852655, "loss": 0.1852, "step": 8040 }, { "epoch": 0.5184099946871026, "grad_norm": 0.1156294122338295, "learning_rate": 0.00016543877726257942, "loss": 0.1898, "step": 8050 }, { "epoch": 0.5190539822581424, "grad_norm": 0.12187884747982025, "learning_rate": 0.00016539584406663233, "loss": 0.1909, "step": 8060 }, { "epoch": 0.5196979698291823, "grad_norm": 0.12784461677074432, "learning_rate": 0.00016535291087068523, "loss": 0.1842, "step": 8070 }, { "epoch": 0.5203419574002222, "grad_norm": 0.11569996923208237, "learning_rate": 0.00016530997767473814, "loss": 0.1771, "step": 8080 }, { "epoch": 0.520985944971262, "grad_norm": 0.13162079453468323, "learning_rate": 0.000165267044478791, "loss": 0.1859, "step": 8090 }, { "epoch": 0.5216299325423019, "grad_norm": 0.1050688773393631, "learning_rate": 0.0001652241112828439, "loss": 0.1801, "step": 8100 }, { "epoch": 0.5222739201133418, "grad_norm": 0.1267021894454956, "learning_rate": 0.0001651811780868968, "loss": 0.1814, "step": 8110 }, { "epoch": 0.5229179076843817, "grad_norm": 0.1305539458990097, "learning_rate": 0.0001651382448909497, "loss": 0.1871, "step": 8120 }, { "epoch": 0.5235618952554215, "grad_norm": 0.12416972219944, "learning_rate": 0.00016509531169500258, "loss": 0.1836, "step": 8130 }, { "epoch": 0.5242058828264614, "grad_norm": 0.1237541139125824, "learning_rate": 0.00016505237849905548, "loss": 0.1765, "step": 8140 }, { "epoch": 0.5248498703975013, "grad_norm": 0.12477952241897583, "learning_rate": 0.00016500944530310838, "loss": 0.1796, "step": 8150 }, { "epoch": 0.5254938579685412, "grad_norm": 0.12212605774402618, "learning_rate": 0.00016496651210716126, "loss": 0.1775, "step": 8160 }, { "epoch": 0.5261378455395811, "grad_norm": 0.1279052048921585, "learning_rate": 0.00016492357891121414, "loss": 0.1816, "step": 8170 }, { "epoch": 0.526781833110621, "grad_norm": 0.12842564284801483, "learning_rate": 0.00016488064571526704, "loss": 0.1829, "step": 8180 }, { "epoch": 0.5274258206816609, "grad_norm": 0.13593339920043945, "learning_rate": 0.00016483771251931995, "loss": 0.1819, "step": 8190 }, { "epoch": 0.5280698082527007, "grad_norm": 0.11503425240516663, "learning_rate": 0.00016479477932337285, "loss": 0.1799, "step": 8200 }, { "epoch": 0.5287137958237406, "grad_norm": 0.13958676159381866, "learning_rate": 0.00016475184612742573, "loss": 0.175, "step": 8210 }, { "epoch": 0.5293577833947805, "grad_norm": 0.11291506886482239, "learning_rate": 0.00016470891293147863, "loss": 0.1816, "step": 8220 }, { "epoch": 0.5300017709658204, "grad_norm": 0.12286960333585739, "learning_rate": 0.00016466597973553154, "loss": 0.1807, "step": 8230 }, { "epoch": 0.5306457585368602, "grad_norm": 0.11908277124166489, "learning_rate": 0.00016462304653958441, "loss": 0.1858, "step": 8240 }, { "epoch": 0.5312897461079001, "grad_norm": 0.13042843341827393, "learning_rate": 0.0001645801133436373, "loss": 0.1857, "step": 8250 }, { "epoch": 0.53193373367894, "grad_norm": 0.12704038619995117, "learning_rate": 0.0001645371801476902, "loss": 0.1811, "step": 8260 }, { "epoch": 0.5325777212499799, "grad_norm": 0.12615877389907837, "learning_rate": 0.0001644942469517431, "loss": 0.1815, "step": 8270 }, { "epoch": 0.5332217088210197, "grad_norm": 0.13141842186450958, "learning_rate": 0.00016445131375579598, "loss": 0.1788, "step": 8280 }, { "epoch": 0.5338656963920596, "grad_norm": 0.11031504720449448, "learning_rate": 0.00016440838055984888, "loss": 0.188, "step": 8290 }, { "epoch": 0.5345096839630995, "grad_norm": 0.11535747349262238, "learning_rate": 0.00016436544736390179, "loss": 0.195, "step": 8300 }, { "epoch": 0.5351536715341394, "grad_norm": 0.1229562982916832, "learning_rate": 0.0001643225141679547, "loss": 0.1884, "step": 8310 }, { "epoch": 0.5357976591051793, "grad_norm": 0.11816052347421646, "learning_rate": 0.00016427958097200757, "loss": 0.1819, "step": 8320 }, { "epoch": 0.5364416466762192, "grad_norm": 0.1055048406124115, "learning_rate": 0.00016423664777606044, "loss": 0.1889, "step": 8330 }, { "epoch": 0.5370856342472591, "grad_norm": 0.10512948781251907, "learning_rate": 0.00016419371458011335, "loss": 0.1842, "step": 8340 }, { "epoch": 0.5377296218182989, "grad_norm": 0.10830094665288925, "learning_rate": 0.00016415078138416625, "loss": 0.1832, "step": 8350 }, { "epoch": 0.5383736093893388, "grad_norm": 0.1312839388847351, "learning_rate": 0.00016410784818821913, "loss": 0.1816, "step": 8360 }, { "epoch": 0.5390175969603787, "grad_norm": 0.11186930537223816, "learning_rate": 0.00016406491499227203, "loss": 0.1755, "step": 8370 }, { "epoch": 0.5396615845314185, "grad_norm": 0.1522005945444107, "learning_rate": 0.00016402198179632494, "loss": 0.1834, "step": 8380 }, { "epoch": 0.5403055721024584, "grad_norm": 0.12999819219112396, "learning_rate": 0.00016397904860037781, "loss": 0.1837, "step": 8390 }, { "epoch": 0.5409495596734983, "grad_norm": 0.11412648111581802, "learning_rate": 0.00016393611540443072, "loss": 0.183, "step": 8400 }, { "epoch": 0.5415935472445382, "grad_norm": 0.12179327011108398, "learning_rate": 0.0001638931822084836, "loss": 0.1856, "step": 8410 }, { "epoch": 0.542237534815578, "grad_norm": 0.10984175652265549, "learning_rate": 0.0001638502490125365, "loss": 0.1876, "step": 8420 }, { "epoch": 0.5428815223866179, "grad_norm": 0.12137898802757263, "learning_rate": 0.00016380731581658938, "loss": 0.1792, "step": 8430 }, { "epoch": 0.5435255099576578, "grad_norm": 0.130252406001091, "learning_rate": 0.00016376438262064228, "loss": 0.1872, "step": 8440 }, { "epoch": 0.5441694975286977, "grad_norm": 0.12374990433454514, "learning_rate": 0.00016372144942469519, "loss": 0.19, "step": 8450 }, { "epoch": 0.5448134850997376, "grad_norm": 0.12665484845638275, "learning_rate": 0.0001636785162287481, "loss": 0.1816, "step": 8460 }, { "epoch": 0.5454574726707775, "grad_norm": 0.10971850156784058, "learning_rate": 0.00016363558303280097, "loss": 0.1781, "step": 8470 }, { "epoch": 0.5461014602418174, "grad_norm": 0.12678050994873047, "learning_rate": 0.00016359264983685387, "loss": 0.1771, "step": 8480 }, { "epoch": 0.5467454478128572, "grad_norm": 0.13764366507530212, "learning_rate": 0.00016354971664090675, "loss": 0.1911, "step": 8490 }, { "epoch": 0.5473894353838971, "grad_norm": 0.11314430087804794, "learning_rate": 0.00016350678344495965, "loss": 0.1816, "step": 8500 }, { "epoch": 0.548033422954937, "grad_norm": 0.10905402898788452, "learning_rate": 0.00016346385024901253, "loss": 0.18, "step": 8510 }, { "epoch": 0.5486774105259768, "grad_norm": 0.1372346729040146, "learning_rate": 0.00016342091705306543, "loss": 0.1816, "step": 8520 }, { "epoch": 0.5493213980970167, "grad_norm": 0.12747551500797272, "learning_rate": 0.00016337798385711834, "loss": 0.1868, "step": 8530 }, { "epoch": 0.5499653856680566, "grad_norm": 0.12879876792430878, "learning_rate": 0.00016333505066117124, "loss": 0.1805, "step": 8540 }, { "epoch": 0.5506093732390965, "grad_norm": 0.11907552927732468, "learning_rate": 0.00016329211746522412, "loss": 0.1853, "step": 8550 }, { "epoch": 0.5512533608101363, "grad_norm": 0.13500593602657318, "learning_rate": 0.00016324918426927702, "loss": 0.1782, "step": 8560 }, { "epoch": 0.5518973483811762, "grad_norm": 0.12189771980047226, "learning_rate": 0.0001632062510733299, "loss": 0.1861, "step": 8570 }, { "epoch": 0.5525413359522161, "grad_norm": 0.12442110478878021, "learning_rate": 0.0001631633178773828, "loss": 0.1824, "step": 8580 }, { "epoch": 0.553185323523256, "grad_norm": 0.11819935590028763, "learning_rate": 0.00016312038468143568, "loss": 0.186, "step": 8590 }, { "epoch": 0.5538293110942959, "grad_norm": 0.11987268179655075, "learning_rate": 0.0001630774514854886, "loss": 0.1811, "step": 8600 }, { "epoch": 0.5544732986653358, "grad_norm": 0.11956378817558289, "learning_rate": 0.0001630345182895415, "loss": 0.182, "step": 8610 }, { "epoch": 0.5551172862363757, "grad_norm": 0.1328381448984146, "learning_rate": 0.00016299158509359437, "loss": 0.1817, "step": 8620 }, { "epoch": 0.5557612738074155, "grad_norm": 0.1296807825565338, "learning_rate": 0.00016294865189764727, "loss": 0.1813, "step": 8630 }, { "epoch": 0.5564052613784554, "grad_norm": 0.1535952091217041, "learning_rate": 0.00016290571870170018, "loss": 0.1862, "step": 8640 }, { "epoch": 0.5570492489494953, "grad_norm": 0.12192027270793915, "learning_rate": 0.00016286278550575305, "loss": 0.1861, "step": 8650 }, { "epoch": 0.5576932365205352, "grad_norm": 0.11528676748275757, "learning_rate": 0.00016281985230980593, "loss": 0.1831, "step": 8660 }, { "epoch": 0.558337224091575, "grad_norm": 0.13607464730739594, "learning_rate": 0.00016277691911385883, "loss": 0.1824, "step": 8670 }, { "epoch": 0.5589812116626149, "grad_norm": 0.1291610151529312, "learning_rate": 0.00016273398591791174, "loss": 0.196, "step": 8680 }, { "epoch": 0.5596251992336548, "grad_norm": 0.13937611877918243, "learning_rate": 0.00016269105272196464, "loss": 0.189, "step": 8690 }, { "epoch": 0.5602691868046946, "grad_norm": 0.15351496636867523, "learning_rate": 0.00016264811952601752, "loss": 0.1871, "step": 8700 }, { "epoch": 0.5609131743757345, "grad_norm": 0.1278526484966278, "learning_rate": 0.00016260518633007042, "loss": 0.19, "step": 8710 }, { "epoch": 0.5615571619467744, "grad_norm": 0.12124008685350418, "learning_rate": 0.00016256225313412333, "loss": 0.1854, "step": 8720 }, { "epoch": 0.5622011495178143, "grad_norm": 0.12968719005584717, "learning_rate": 0.0001625193199381762, "loss": 0.1838, "step": 8730 }, { "epoch": 0.5628451370888542, "grad_norm": 0.13571786880493164, "learning_rate": 0.00016247638674222908, "loss": 0.1869, "step": 8740 }, { "epoch": 0.5634891246598941, "grad_norm": 0.1203419417142868, "learning_rate": 0.000162433453546282, "loss": 0.1881, "step": 8750 }, { "epoch": 0.564133112230934, "grad_norm": 0.11846519261598587, "learning_rate": 0.0001623905203503349, "loss": 0.1883, "step": 8760 }, { "epoch": 0.5647770998019738, "grad_norm": 0.10362201184034348, "learning_rate": 0.00016234758715438777, "loss": 0.1906, "step": 8770 }, { "epoch": 0.5654210873730137, "grad_norm": 0.11542064696550369, "learning_rate": 0.00016230465395844067, "loss": 0.186, "step": 8780 }, { "epoch": 0.5660650749440536, "grad_norm": 0.13161948323249817, "learning_rate": 0.00016226172076249358, "loss": 0.1838, "step": 8790 }, { "epoch": 0.5667090625150935, "grad_norm": 0.1205764189362526, "learning_rate": 0.00016221878756654648, "loss": 0.1821, "step": 8800 }, { "epoch": 0.5673530500861333, "grad_norm": 0.11702017486095428, "learning_rate": 0.00016217585437059936, "loss": 0.1886, "step": 8810 }, { "epoch": 0.5679970376571732, "grad_norm": 0.12866714596748352, "learning_rate": 0.00016213292117465224, "loss": 0.18, "step": 8820 }, { "epoch": 0.5686410252282131, "grad_norm": 0.12747050821781158, "learning_rate": 0.00016208998797870514, "loss": 0.1784, "step": 8830 }, { "epoch": 0.5692850127992529, "grad_norm": 0.13773863017559052, "learning_rate": 0.00016204705478275804, "loss": 0.1942, "step": 8840 }, { "epoch": 0.5699290003702928, "grad_norm": 0.12796710431575775, "learning_rate": 0.00016200412158681092, "loss": 0.1921, "step": 8850 }, { "epoch": 0.5705729879413327, "grad_norm": 0.1294478327035904, "learning_rate": 0.00016196118839086383, "loss": 0.1764, "step": 8860 }, { "epoch": 0.5712169755123726, "grad_norm": 0.13999170064926147, "learning_rate": 0.00016191825519491673, "loss": 0.1823, "step": 8870 }, { "epoch": 0.5718609630834125, "grad_norm": 0.1256324052810669, "learning_rate": 0.0001618753219989696, "loss": 0.1868, "step": 8880 }, { "epoch": 0.5725049506544524, "grad_norm": 0.13044750690460205, "learning_rate": 0.00016183238880302248, "loss": 0.1869, "step": 8890 }, { "epoch": 0.5731489382254923, "grad_norm": 0.15160907804965973, "learning_rate": 0.0001617894556070754, "loss": 0.1823, "step": 8900 }, { "epoch": 0.5737929257965321, "grad_norm": 0.10839187353849411, "learning_rate": 0.0001617465224111283, "loss": 0.1785, "step": 8910 }, { "epoch": 0.574436913367572, "grad_norm": 0.12114312499761581, "learning_rate": 0.0001617035892151812, "loss": 0.1794, "step": 8920 }, { "epoch": 0.5750809009386119, "grad_norm": 0.11002717167139053, "learning_rate": 0.00016166065601923407, "loss": 0.1791, "step": 8930 }, { "epoch": 0.5757248885096518, "grad_norm": 0.13620615005493164, "learning_rate": 0.00016161772282328698, "loss": 0.1803, "step": 8940 }, { "epoch": 0.5763688760806917, "grad_norm": 0.11313425004482269, "learning_rate": 0.00016157478962733988, "loss": 0.1714, "step": 8950 }, { "epoch": 0.5770128636517315, "grad_norm": 0.11936705559492111, "learning_rate": 0.00016153185643139276, "loss": 0.1833, "step": 8960 }, { "epoch": 0.5776568512227714, "grad_norm": 0.12389236688613892, "learning_rate": 0.00016148892323544564, "loss": 0.189, "step": 8970 }, { "epoch": 0.5783008387938113, "grad_norm": 0.1215524673461914, "learning_rate": 0.00016144599003949854, "loss": 0.1898, "step": 8980 }, { "epoch": 0.5789448263648511, "grad_norm": 0.1352369487285614, "learning_rate": 0.00016140305684355144, "loss": 0.1904, "step": 8990 }, { "epoch": 0.579588813935891, "grad_norm": 0.1422119289636612, "learning_rate": 0.00016136012364760432, "loss": 0.1906, "step": 9000 }, { "epoch": 0.5802328015069309, "grad_norm": 0.11522985249757767, "learning_rate": 0.00016131719045165723, "loss": 0.1902, "step": 9010 }, { "epoch": 0.5808767890779708, "grad_norm": 0.125756174325943, "learning_rate": 0.00016127425725571013, "loss": 0.1828, "step": 9020 }, { "epoch": 0.5815207766490107, "grad_norm": 0.11505703628063202, "learning_rate": 0.00016123132405976303, "loss": 0.1816, "step": 9030 }, { "epoch": 0.5821647642200506, "grad_norm": 0.10732155293226242, "learning_rate": 0.0001611883908638159, "loss": 0.1829, "step": 9040 }, { "epoch": 0.5828087517910905, "grad_norm": 0.13009953498840332, "learning_rate": 0.0001611454576678688, "loss": 0.1829, "step": 9050 }, { "epoch": 0.5834527393621303, "grad_norm": 0.13308323919773102, "learning_rate": 0.0001611025244719217, "loss": 0.1847, "step": 9060 }, { "epoch": 0.5840967269331702, "grad_norm": 0.1178472712635994, "learning_rate": 0.0001610595912759746, "loss": 0.1884, "step": 9070 }, { "epoch": 0.5847407145042101, "grad_norm": 0.11928070336580276, "learning_rate": 0.00016101665808002747, "loss": 0.1813, "step": 9080 }, { "epoch": 0.58538470207525, "grad_norm": 0.13419072329998016, "learning_rate": 0.00016097372488408038, "loss": 0.1823, "step": 9090 }, { "epoch": 0.5860286896462898, "grad_norm": 0.1102556586265564, "learning_rate": 0.00016093079168813328, "loss": 0.1754, "step": 9100 }, { "epoch": 0.5866726772173297, "grad_norm": 0.11916135996580124, "learning_rate": 0.00016088785849218616, "loss": 0.1801, "step": 9110 }, { "epoch": 0.5873166647883696, "grad_norm": 0.12731367349624634, "learning_rate": 0.00016084492529623906, "loss": 0.1799, "step": 9120 }, { "epoch": 0.5879606523594094, "grad_norm": 0.11546481400728226, "learning_rate": 0.00016080199210029194, "loss": 0.1817, "step": 9130 }, { "epoch": 0.5886046399304493, "grad_norm": 0.11840604990720749, "learning_rate": 0.00016075905890434485, "loss": 0.1755, "step": 9140 }, { "epoch": 0.5892486275014892, "grad_norm": 0.13542455434799194, "learning_rate": 0.00016071612570839772, "loss": 0.1826, "step": 9150 }, { "epoch": 0.5898926150725291, "grad_norm": 0.09852705150842667, "learning_rate": 0.00016067319251245063, "loss": 0.1826, "step": 9160 }, { "epoch": 0.590536602643569, "grad_norm": 0.12975232303142548, "learning_rate": 0.00016063025931650353, "loss": 0.185, "step": 9170 }, { "epoch": 0.5911805902146089, "grad_norm": 0.1102743148803711, "learning_rate": 0.00016058732612055644, "loss": 0.1781, "step": 9180 }, { "epoch": 0.5918245777856488, "grad_norm": 0.13749462366104126, "learning_rate": 0.0001605443929246093, "loss": 0.1916, "step": 9190 }, { "epoch": 0.5924685653566886, "grad_norm": 0.13119053840637207, "learning_rate": 0.00016050145972866222, "loss": 0.1835, "step": 9200 }, { "epoch": 0.5931125529277285, "grad_norm": 0.11947789788246155, "learning_rate": 0.0001604585265327151, "loss": 0.181, "step": 9210 }, { "epoch": 0.5937565404987684, "grad_norm": 0.11008266359567642, "learning_rate": 0.000160415593336768, "loss": 0.1839, "step": 9220 }, { "epoch": 0.5944005280698083, "grad_norm": 0.13181959092617035, "learning_rate": 0.00016037266014082087, "loss": 0.1842, "step": 9230 }, { "epoch": 0.5950445156408481, "grad_norm": 0.1172255352139473, "learning_rate": 0.00016032972694487378, "loss": 0.1796, "step": 9240 }, { "epoch": 0.595688503211888, "grad_norm": 0.12933801114559174, "learning_rate": 0.00016028679374892668, "loss": 0.1815, "step": 9250 }, { "epoch": 0.5963324907829279, "grad_norm": 0.13041269779205322, "learning_rate": 0.0001602438605529796, "loss": 0.1823, "step": 9260 }, { "epoch": 0.5969764783539677, "grad_norm": 0.11017858237028122, "learning_rate": 0.00016020092735703246, "loss": 0.1812, "step": 9270 }, { "epoch": 0.5976204659250076, "grad_norm": 0.10970480740070343, "learning_rate": 0.00016015799416108537, "loss": 0.1886, "step": 9280 }, { "epoch": 0.5982644534960475, "grad_norm": 0.10907194018363953, "learning_rate": 0.00016011506096513825, "loss": 0.1856, "step": 9290 }, { "epoch": 0.5989084410670874, "grad_norm": 0.1436028629541397, "learning_rate": 0.00016007212776919115, "loss": 0.1788, "step": 9300 }, { "epoch": 0.5995524286381273, "grad_norm": 0.12361627072095871, "learning_rate": 0.00016002919457324403, "loss": 0.1837, "step": 9310 }, { "epoch": 0.6001964162091672, "grad_norm": 0.10720311105251312, "learning_rate": 0.00015998626137729693, "loss": 0.1846, "step": 9320 }, { "epoch": 0.6008404037802071, "grad_norm": 0.15615269541740417, "learning_rate": 0.00015994332818134984, "loss": 0.189, "step": 9330 }, { "epoch": 0.6014843913512469, "grad_norm": 0.11246367543935776, "learning_rate": 0.0001599003949854027, "loss": 0.1814, "step": 9340 }, { "epoch": 0.6021283789222868, "grad_norm": 0.11712147295475006, "learning_rate": 0.00015985746178945562, "loss": 0.1861, "step": 9350 }, { "epoch": 0.6027723664933267, "grad_norm": 0.14703349769115448, "learning_rate": 0.00015981452859350852, "loss": 0.1775, "step": 9360 }, { "epoch": 0.6034163540643666, "grad_norm": 0.13053101301193237, "learning_rate": 0.0001597715953975614, "loss": 0.1864, "step": 9370 }, { "epoch": 0.6040603416354065, "grad_norm": 0.12526027858257294, "learning_rate": 0.00015972866220161428, "loss": 0.1867, "step": 9380 }, { "epoch": 0.6047043292064463, "grad_norm": 0.12814810872077942, "learning_rate": 0.00015968572900566718, "loss": 0.1842, "step": 9390 }, { "epoch": 0.6053483167774862, "grad_norm": 0.1324543058872223, "learning_rate": 0.00015964279580972008, "loss": 0.1772, "step": 9400 }, { "epoch": 0.605992304348526, "grad_norm": 0.11403471231460571, "learning_rate": 0.000159599862613773, "loss": 0.1866, "step": 9410 }, { "epoch": 0.6066362919195659, "grad_norm": 0.11346079409122467, "learning_rate": 0.00015955692941782587, "loss": 0.1751, "step": 9420 }, { "epoch": 0.6072802794906058, "grad_norm": 0.10749000310897827, "learning_rate": 0.00015951399622187877, "loss": 0.1824, "step": 9430 }, { "epoch": 0.6079242670616457, "grad_norm": 0.11306248605251312, "learning_rate": 0.00015947106302593167, "loss": 0.1772, "step": 9440 }, { "epoch": 0.6085682546326856, "grad_norm": 0.11676988005638123, "learning_rate": 0.00015942812982998455, "loss": 0.1848, "step": 9450 }, { "epoch": 0.6092122422037255, "grad_norm": 0.11829253286123276, "learning_rate": 0.00015938519663403743, "loss": 0.1847, "step": 9460 }, { "epoch": 0.6098562297747654, "grad_norm": 0.12828205525875092, "learning_rate": 0.00015934226343809033, "loss": 0.1868, "step": 9470 }, { "epoch": 0.6105002173458052, "grad_norm": 0.12106934189796448, "learning_rate": 0.00015929933024214324, "loss": 0.179, "step": 9480 }, { "epoch": 0.6111442049168451, "grad_norm": 0.10699620842933655, "learning_rate": 0.00015925639704619611, "loss": 0.1817, "step": 9490 }, { "epoch": 0.611788192487885, "grad_norm": 0.14670446515083313, "learning_rate": 0.00015921346385024902, "loss": 0.1758, "step": 9500 }, { "epoch": 0.6124321800589249, "grad_norm": 0.1343141347169876, "learning_rate": 0.00015917053065430192, "loss": 0.1838, "step": 9510 }, { "epoch": 0.6130761676299648, "grad_norm": 0.12095517665147781, "learning_rate": 0.00015912759745835483, "loss": 0.1839, "step": 9520 }, { "epoch": 0.6137201552010046, "grad_norm": 0.11612652987241745, "learning_rate": 0.0001590846642624077, "loss": 0.1878, "step": 9530 }, { "epoch": 0.6143641427720445, "grad_norm": 0.12671925127506256, "learning_rate": 0.00015904173106646058, "loss": 0.1858, "step": 9540 }, { "epoch": 0.6150081303430843, "grad_norm": 0.12201143801212311, "learning_rate": 0.00015899879787051348, "loss": 0.1862, "step": 9550 }, { "epoch": 0.6156521179141242, "grad_norm": 0.13402535021305084, "learning_rate": 0.0001589558646745664, "loss": 0.1778, "step": 9560 }, { "epoch": 0.6162961054851641, "grad_norm": 0.13248373568058014, "learning_rate": 0.00015891293147861927, "loss": 0.1807, "step": 9570 }, { "epoch": 0.616940093056204, "grad_norm": 0.12801574170589447, "learning_rate": 0.00015886999828267217, "loss": 0.1798, "step": 9580 }, { "epoch": 0.6175840806272439, "grad_norm": 0.121160127222538, "learning_rate": 0.00015882706508672507, "loss": 0.1851, "step": 9590 }, { "epoch": 0.6182280681982838, "grad_norm": 0.11797921359539032, "learning_rate": 0.00015878413189077795, "loss": 0.1776, "step": 9600 }, { "epoch": 0.6188720557693237, "grad_norm": 0.12163789570331573, "learning_rate": 0.00015874119869483086, "loss": 0.1773, "step": 9610 }, { "epoch": 0.6195160433403635, "grad_norm": 0.12374745309352875, "learning_rate": 0.00015869826549888373, "loss": 0.1788, "step": 9620 }, { "epoch": 0.6201600309114034, "grad_norm": 0.12475545704364777, "learning_rate": 0.00015865533230293664, "loss": 0.185, "step": 9630 }, { "epoch": 0.6208040184824433, "grad_norm": 0.12008689343929291, "learning_rate": 0.00015861239910698954, "loss": 0.1828, "step": 9640 }, { "epoch": 0.6214480060534832, "grad_norm": 0.12422849982976913, "learning_rate": 0.00015856946591104242, "loss": 0.1873, "step": 9650 }, { "epoch": 0.622091993624523, "grad_norm": 0.11693509668111801, "learning_rate": 0.00015852653271509532, "loss": 0.1754, "step": 9660 }, { "epoch": 0.622735981195563, "grad_norm": 0.12975190579891205, "learning_rate": 0.00015848359951914823, "loss": 0.1835, "step": 9670 }, { "epoch": 0.6233799687666028, "grad_norm": 0.12920880317687988, "learning_rate": 0.0001584406663232011, "loss": 0.1877, "step": 9680 }, { "epoch": 0.6240239563376427, "grad_norm": 0.1136992797255516, "learning_rate": 0.000158397733127254, "loss": 0.1929, "step": 9690 }, { "epoch": 0.6246679439086825, "grad_norm": 0.12525349855422974, "learning_rate": 0.00015835479993130689, "loss": 0.1783, "step": 9700 }, { "epoch": 0.6253119314797224, "grad_norm": 0.13938850164413452, "learning_rate": 0.0001583118667353598, "loss": 0.1849, "step": 9710 }, { "epoch": 0.6259559190507623, "grad_norm": 0.11783743649721146, "learning_rate": 0.00015826893353941267, "loss": 0.1831, "step": 9720 }, { "epoch": 0.6265999066218022, "grad_norm": 0.12094628065824509, "learning_rate": 0.00015822600034346557, "loss": 0.1847, "step": 9730 }, { "epoch": 0.6272438941928421, "grad_norm": 0.1236024722456932, "learning_rate": 0.00015818306714751848, "loss": 0.1827, "step": 9740 }, { "epoch": 0.627887881763882, "grad_norm": 0.1303166002035141, "learning_rate": 0.00015814013395157138, "loss": 0.1836, "step": 9750 }, { "epoch": 0.6285318693349219, "grad_norm": 0.11904479563236237, "learning_rate": 0.00015809720075562426, "loss": 0.179, "step": 9760 }, { "epoch": 0.6291758569059617, "grad_norm": 0.13879507780075073, "learning_rate": 0.00015805426755967716, "loss": 0.179, "step": 9770 }, { "epoch": 0.6298198444770016, "grad_norm": 0.13292448222637177, "learning_rate": 0.00015801133436373004, "loss": 0.1847, "step": 9780 }, { "epoch": 0.6304638320480415, "grad_norm": 0.13970786333084106, "learning_rate": 0.00015796840116778294, "loss": 0.1772, "step": 9790 }, { "epoch": 0.6311078196190814, "grad_norm": 0.12745021283626556, "learning_rate": 0.00015792546797183582, "loss": 0.1881, "step": 9800 }, { "epoch": 0.6317518071901213, "grad_norm": 0.1306108683347702, "learning_rate": 0.00015788253477588872, "loss": 0.1792, "step": 9810 }, { "epoch": 0.6323957947611611, "grad_norm": 0.11028672009706497, "learning_rate": 0.00015783960157994163, "loss": 0.193, "step": 9820 }, { "epoch": 0.633039782332201, "grad_norm": 0.1265285164117813, "learning_rate": 0.0001577966683839945, "loss": 0.1829, "step": 9830 }, { "epoch": 0.6336837699032408, "grad_norm": 0.124021515250206, "learning_rate": 0.0001577537351880474, "loss": 0.1748, "step": 9840 }, { "epoch": 0.6343277574742807, "grad_norm": 0.11252953112125397, "learning_rate": 0.0001577108019921003, "loss": 0.1787, "step": 9850 }, { "epoch": 0.6349717450453206, "grad_norm": 0.12429779022932053, "learning_rate": 0.0001576678687961532, "loss": 0.1867, "step": 9860 }, { "epoch": 0.6356157326163605, "grad_norm": 0.12379419058561325, "learning_rate": 0.00015762493560020607, "loss": 0.184, "step": 9870 }, { "epoch": 0.6362597201874004, "grad_norm": 0.12012134492397308, "learning_rate": 0.00015758200240425897, "loss": 0.1871, "step": 9880 }, { "epoch": 0.6369037077584403, "grad_norm": 0.140142560005188, "learning_rate": 0.00015753906920831188, "loss": 0.1834, "step": 9890 }, { "epoch": 0.6375476953294802, "grad_norm": 0.13832300901412964, "learning_rate": 0.00015749613601236478, "loss": 0.1772, "step": 9900 }, { "epoch": 0.63819168290052, "grad_norm": 0.12235907465219498, "learning_rate": 0.00015745320281641766, "loss": 0.1864, "step": 9910 }, { "epoch": 0.6388356704715599, "grad_norm": 0.12046720832586288, "learning_rate": 0.00015741026962047056, "loss": 0.1924, "step": 9920 }, { "epoch": 0.6394796580425998, "grad_norm": 0.11409910768270493, "learning_rate": 0.00015736733642452347, "loss": 0.1849, "step": 9930 }, { "epoch": 0.6401236456136397, "grad_norm": 0.11979924142360687, "learning_rate": 0.00015732440322857634, "loss": 0.1731, "step": 9940 }, { "epoch": 0.6407676331846796, "grad_norm": 0.11986327916383743, "learning_rate": 0.00015728147003262922, "loss": 0.1808, "step": 9950 }, { "epoch": 0.6414116207557194, "grad_norm": 0.15545156598091125, "learning_rate": 0.00015723853683668212, "loss": 0.1854, "step": 9960 }, { "epoch": 0.6420556083267593, "grad_norm": 0.138249471783638, "learning_rate": 0.00015719560364073503, "loss": 0.1777, "step": 9970 }, { "epoch": 0.6426995958977991, "grad_norm": 0.11500449478626251, "learning_rate": 0.00015715267044478793, "loss": 0.1787, "step": 9980 }, { "epoch": 0.643343583468839, "grad_norm": 0.13021157681941986, "learning_rate": 0.0001571097372488408, "loss": 0.1763, "step": 9990 }, { "epoch": 0.6439875710398789, "grad_norm": 0.10585910826921463, "learning_rate": 0.00015706680405289371, "loss": 0.183, "step": 10000 }, { "epoch": 0.6446315586109188, "grad_norm": 0.12538816034793854, "learning_rate": 0.00015702387085694662, "loss": 0.1867, "step": 10010 }, { "epoch": 0.6452755461819587, "grad_norm": 0.1207873523235321, "learning_rate": 0.0001569809376609995, "loss": 0.1829, "step": 10020 }, { "epoch": 0.6459195337529986, "grad_norm": 0.10265393555164337, "learning_rate": 0.00015693800446505237, "loss": 0.187, "step": 10030 }, { "epoch": 0.6465635213240385, "grad_norm": 0.1322927623987198, "learning_rate": 0.00015689507126910528, "loss": 0.186, "step": 10040 }, { "epoch": 0.6472075088950783, "grad_norm": 0.11145605891942978, "learning_rate": 0.00015685213807315818, "loss": 0.1751, "step": 10050 }, { "epoch": 0.6478514964661182, "grad_norm": 0.12812308967113495, "learning_rate": 0.00015680920487721106, "loss": 0.1862, "step": 10060 }, { "epoch": 0.6484954840371581, "grad_norm": 0.11436449736356735, "learning_rate": 0.00015676627168126396, "loss": 0.1855, "step": 10070 }, { "epoch": 0.649139471608198, "grad_norm": 0.1036512553691864, "learning_rate": 0.00015672333848531687, "loss": 0.1715, "step": 10080 }, { "epoch": 0.6497834591792379, "grad_norm": 0.1483590304851532, "learning_rate": 0.00015668040528936977, "loss": 0.1814, "step": 10090 }, { "epoch": 0.6504274467502777, "grad_norm": 0.12994416058063507, "learning_rate": 0.00015663747209342262, "loss": 0.1885, "step": 10100 }, { "epoch": 0.6510714343213176, "grad_norm": 0.11573436856269836, "learning_rate": 0.00015659453889747552, "loss": 0.186, "step": 10110 }, { "epoch": 0.6517154218923574, "grad_norm": 0.11547239124774933, "learning_rate": 0.00015655160570152843, "loss": 0.1791, "step": 10120 }, { "epoch": 0.6523594094633973, "grad_norm": 0.12598951160907745, "learning_rate": 0.00015650867250558133, "loss": 0.1698, "step": 10130 }, { "epoch": 0.6530033970344372, "grad_norm": 0.11780860275030136, "learning_rate": 0.0001564657393096342, "loss": 0.1877, "step": 10140 }, { "epoch": 0.6536473846054771, "grad_norm": 0.12180910259485245, "learning_rate": 0.00015642280611368711, "loss": 0.1851, "step": 10150 }, { "epoch": 0.654291372176517, "grad_norm": 0.12876203656196594, "learning_rate": 0.00015637987291774002, "loss": 0.186, "step": 10160 }, { "epoch": 0.6549353597475569, "grad_norm": 0.12706637382507324, "learning_rate": 0.0001563369397217929, "loss": 0.1805, "step": 10170 }, { "epoch": 0.6555793473185968, "grad_norm": 0.10901609808206558, "learning_rate": 0.00015629400652584577, "loss": 0.1762, "step": 10180 }, { "epoch": 0.6562233348896366, "grad_norm": 0.13943059742450714, "learning_rate": 0.00015625107332989868, "loss": 0.1813, "step": 10190 }, { "epoch": 0.6568673224606765, "grad_norm": 0.1261075884103775, "learning_rate": 0.00015620814013395158, "loss": 0.1792, "step": 10200 }, { "epoch": 0.6575113100317164, "grad_norm": 0.12762604653835297, "learning_rate": 0.00015616520693800446, "loss": 0.1827, "step": 10210 }, { "epoch": 0.6581552976027563, "grad_norm": 0.13128679990768433, "learning_rate": 0.00015612227374205736, "loss": 0.1836, "step": 10220 }, { "epoch": 0.6587992851737962, "grad_norm": 0.11626247316598892, "learning_rate": 0.00015607934054611027, "loss": 0.1796, "step": 10230 }, { "epoch": 0.659443272744836, "grad_norm": 0.11508921533823013, "learning_rate": 0.00015603640735016317, "loss": 0.1828, "step": 10240 }, { "epoch": 0.660087260315876, "grad_norm": 0.11555810272693634, "learning_rate": 0.00015599347415421605, "loss": 0.1853, "step": 10250 }, { "epoch": 0.6607312478869157, "grad_norm": 0.15066371858119965, "learning_rate": 0.00015595054095826893, "loss": 0.1797, "step": 10260 }, { "epoch": 0.6613752354579556, "grad_norm": 0.12681140005588531, "learning_rate": 0.00015590760776232183, "loss": 0.1867, "step": 10270 }, { "epoch": 0.6620192230289955, "grad_norm": 0.1256832629442215, "learning_rate": 0.00015586467456637473, "loss": 0.1827, "step": 10280 }, { "epoch": 0.6626632106000354, "grad_norm": 0.1188385933637619, "learning_rate": 0.0001558217413704276, "loss": 0.1725, "step": 10290 }, { "epoch": 0.6633071981710753, "grad_norm": 0.11634189635515213, "learning_rate": 0.00015577880817448052, "loss": 0.1912, "step": 10300 }, { "epoch": 0.6639511857421152, "grad_norm": 0.1239052414894104, "learning_rate": 0.00015573587497853342, "loss": 0.1873, "step": 10310 }, { "epoch": 0.6645951733131551, "grad_norm": 0.11711441725492477, "learning_rate": 0.0001556929417825863, "loss": 0.1849, "step": 10320 }, { "epoch": 0.6652391608841949, "grad_norm": 0.12301543354988098, "learning_rate": 0.0001556500085866392, "loss": 0.1821, "step": 10330 }, { "epoch": 0.6658831484552348, "grad_norm": 0.11552150547504425, "learning_rate": 0.00015560707539069208, "loss": 0.1805, "step": 10340 }, { "epoch": 0.6665271360262747, "grad_norm": 0.1116911768913269, "learning_rate": 0.00015556414219474498, "loss": 0.1848, "step": 10350 }, { "epoch": 0.6671711235973146, "grad_norm": 0.1292039155960083, "learning_rate": 0.00015552120899879789, "loss": 0.1761, "step": 10360 }, { "epoch": 0.6678151111683545, "grad_norm": 0.1348753273487091, "learning_rate": 0.00015547827580285076, "loss": 0.1789, "step": 10370 }, { "epoch": 0.6684590987393944, "grad_norm": 0.13736863434314728, "learning_rate": 0.00015543534260690367, "loss": 0.1847, "step": 10380 }, { "epoch": 0.6691030863104342, "grad_norm": 0.12522286176681519, "learning_rate": 0.00015539240941095657, "loss": 0.1893, "step": 10390 }, { "epoch": 0.6697470738814741, "grad_norm": 0.13418623805046082, "learning_rate": 0.00015534947621500945, "loss": 0.1826, "step": 10400 }, { "epoch": 0.6703910614525139, "grad_norm": 0.13481448590755463, "learning_rate": 0.00015530654301906235, "loss": 0.1761, "step": 10410 }, { "epoch": 0.6710350490235538, "grad_norm": 0.12215957790613174, "learning_rate": 0.00015526360982311523, "loss": 0.1773, "step": 10420 }, { "epoch": 0.6716790365945937, "grad_norm": 0.10750186443328857, "learning_rate": 0.00015522067662716813, "loss": 0.1863, "step": 10430 }, { "epoch": 0.6723230241656336, "grad_norm": 0.12304190546274185, "learning_rate": 0.000155177743431221, "loss": 0.1861, "step": 10440 }, { "epoch": 0.6729670117366735, "grad_norm": 0.1363278478384018, "learning_rate": 0.00015513481023527392, "loss": 0.1876, "step": 10450 }, { "epoch": 0.6736109993077134, "grad_norm": 0.11273949593305588, "learning_rate": 0.00015509187703932682, "loss": 0.1868, "step": 10460 }, { "epoch": 0.6742549868787533, "grad_norm": 0.12765565514564514, "learning_rate": 0.00015504894384337972, "loss": 0.1828, "step": 10470 }, { "epoch": 0.6748989744497931, "grad_norm": 0.12336055934429169, "learning_rate": 0.0001550060106474326, "loss": 0.1924, "step": 10480 }, { "epoch": 0.675542962020833, "grad_norm": 0.11120251566171646, "learning_rate": 0.0001549630774514855, "loss": 0.1853, "step": 10490 }, { "epoch": 0.6761869495918729, "grad_norm": 0.11801023781299591, "learning_rate": 0.00015492014425553838, "loss": 0.1817, "step": 10500 }, { "epoch": 0.6768309371629128, "grad_norm": 0.11739988625049591, "learning_rate": 0.0001548772110595913, "loss": 0.1782, "step": 10510 }, { "epoch": 0.6774749247339527, "grad_norm": 0.13465306162834167, "learning_rate": 0.00015483427786364416, "loss": 0.1797, "step": 10520 }, { "epoch": 0.6781189123049925, "grad_norm": 0.12915705144405365, "learning_rate": 0.00015479134466769707, "loss": 0.1866, "step": 10530 }, { "epoch": 0.6787628998760324, "grad_norm": 0.11565142124891281, "learning_rate": 0.00015474841147174997, "loss": 0.1884, "step": 10540 }, { "epoch": 0.6794068874470722, "grad_norm": 0.10858917981386185, "learning_rate": 0.00015470547827580285, "loss": 0.1865, "step": 10550 }, { "epoch": 0.6800508750181121, "grad_norm": 0.12801474332809448, "learning_rate": 0.00015466254507985575, "loss": 0.1812, "step": 10560 }, { "epoch": 0.680694862589152, "grad_norm": 0.12707339227199554, "learning_rate": 0.00015461961188390866, "loss": 0.1809, "step": 10570 }, { "epoch": 0.6813388501601919, "grad_norm": 0.12840226292610168, "learning_rate": 0.00015457667868796154, "loss": 0.1854, "step": 10580 }, { "epoch": 0.6819828377312318, "grad_norm": 0.12207160890102386, "learning_rate": 0.0001545337454920144, "loss": 0.1823, "step": 10590 }, { "epoch": 0.6826268253022717, "grad_norm": 0.13766202330589294, "learning_rate": 0.00015449081229606732, "loss": 0.175, "step": 10600 }, { "epoch": 0.6832708128733116, "grad_norm": 0.1290808469057083, "learning_rate": 0.00015444787910012022, "loss": 0.1811, "step": 10610 }, { "epoch": 0.6839148004443514, "grad_norm": 0.13641753792762756, "learning_rate": 0.00015440494590417313, "loss": 0.1828, "step": 10620 }, { "epoch": 0.6845587880153913, "grad_norm": 0.13058632612228394, "learning_rate": 0.000154362012708226, "loss": 0.1779, "step": 10630 }, { "epoch": 0.6852027755864312, "grad_norm": 0.12730783224105835, "learning_rate": 0.0001543190795122789, "loss": 0.1893, "step": 10640 }, { "epoch": 0.6858467631574711, "grad_norm": 0.1335470825433731, "learning_rate": 0.0001542761463163318, "loss": 0.1776, "step": 10650 }, { "epoch": 0.686490750728511, "grad_norm": 0.15676145255565643, "learning_rate": 0.0001542332131203847, "loss": 0.1855, "step": 10660 }, { "epoch": 0.6871347382995509, "grad_norm": 0.12095948308706284, "learning_rate": 0.00015419027992443756, "loss": 0.1811, "step": 10670 }, { "epoch": 0.6877787258705907, "grad_norm": 0.11838562041521072, "learning_rate": 0.00015414734672849047, "loss": 0.1788, "step": 10680 }, { "epoch": 0.6884227134416305, "grad_norm": 0.11441204696893692, "learning_rate": 0.00015410441353254337, "loss": 0.1787, "step": 10690 }, { "epoch": 0.6890667010126704, "grad_norm": 0.12224394828081131, "learning_rate": 0.00015406148033659628, "loss": 0.1878, "step": 10700 }, { "epoch": 0.6897106885837103, "grad_norm": 0.11149287223815918, "learning_rate": 0.00015401854714064915, "loss": 0.1787, "step": 10710 }, { "epoch": 0.6903546761547502, "grad_norm": 0.13191431760787964, "learning_rate": 0.00015397561394470206, "loss": 0.1905, "step": 10720 }, { "epoch": 0.6909986637257901, "grad_norm": 0.1352940797805786, "learning_rate": 0.00015393268074875496, "loss": 0.1811, "step": 10730 }, { "epoch": 0.69164265129683, "grad_norm": 0.1318899244070053, "learning_rate": 0.00015388974755280784, "loss": 0.1795, "step": 10740 }, { "epoch": 0.6922866388678699, "grad_norm": 0.13032390177249908, "learning_rate": 0.00015384681435686072, "loss": 0.1855, "step": 10750 }, { "epoch": 0.6929306264389097, "grad_norm": 0.1378917396068573, "learning_rate": 0.00015380388116091362, "loss": 0.1865, "step": 10760 }, { "epoch": 0.6935746140099496, "grad_norm": 0.5948559641838074, "learning_rate": 0.00015376094796496653, "loss": 0.2238, "step": 10770 }, { "epoch": 0.6942186015809895, "grad_norm": 0.18947122991085052, "learning_rate": 0.0001537180147690194, "loss": 0.2117, "step": 10780 }, { "epoch": 0.6948625891520294, "grad_norm": 0.1646818220615387, "learning_rate": 0.0001536750815730723, "loss": 0.1823, "step": 10790 }, { "epoch": 0.6955065767230693, "grad_norm": 0.1616562008857727, "learning_rate": 0.0001536321483771252, "loss": 0.1896, "step": 10800 }, { "epoch": 0.6961505642941092, "grad_norm": 0.11661975085735321, "learning_rate": 0.00015358921518117812, "loss": 0.1794, "step": 10810 }, { "epoch": 0.696794551865149, "grad_norm": 0.1435999870300293, "learning_rate": 0.00015354628198523097, "loss": 0.188, "step": 10820 }, { "epoch": 0.6974385394361888, "grad_norm": 0.12602081894874573, "learning_rate": 0.00015350334878928387, "loss": 0.1831, "step": 10830 }, { "epoch": 0.6980825270072287, "grad_norm": 0.12404799461364746, "learning_rate": 0.00015346041559333677, "loss": 0.1707, "step": 10840 }, { "epoch": 0.6987265145782686, "grad_norm": 0.14377953112125397, "learning_rate": 0.00015341748239738968, "loss": 0.1844, "step": 10850 }, { "epoch": 0.6993705021493085, "grad_norm": 0.12876540422439575, "learning_rate": 0.00015337454920144256, "loss": 0.185, "step": 10860 }, { "epoch": 0.7000144897203484, "grad_norm": 0.1364852339029312, "learning_rate": 0.00015333161600549546, "loss": 0.1786, "step": 10870 }, { "epoch": 0.7006584772913883, "grad_norm": 0.11494998633861542, "learning_rate": 0.00015328868280954836, "loss": 0.1823, "step": 10880 }, { "epoch": 0.7013024648624282, "grad_norm": 0.14610570669174194, "learning_rate": 0.00015324574961360124, "loss": 0.1826, "step": 10890 }, { "epoch": 0.701946452433468, "grad_norm": 0.13191308081150055, "learning_rate": 0.00015320281641765412, "loss": 0.1817, "step": 10900 }, { "epoch": 0.7025904400045079, "grad_norm": 0.11126526445150375, "learning_rate": 0.00015315988322170702, "loss": 0.1797, "step": 10910 }, { "epoch": 0.7032344275755478, "grad_norm": 0.11863549798727036, "learning_rate": 0.00015311695002575993, "loss": 0.1777, "step": 10920 }, { "epoch": 0.7038784151465877, "grad_norm": 0.12357557564973831, "learning_rate": 0.0001530740168298128, "loss": 0.1822, "step": 10930 }, { "epoch": 0.7045224027176276, "grad_norm": 0.157414048910141, "learning_rate": 0.0001530310836338657, "loss": 0.1866, "step": 10940 }, { "epoch": 0.7051663902886675, "grad_norm": 0.13054810464382172, "learning_rate": 0.0001529881504379186, "loss": 0.1777, "step": 10950 }, { "epoch": 0.7058103778597073, "grad_norm": 0.12848138809204102, "learning_rate": 0.00015294521724197152, "loss": 0.1789, "step": 10960 }, { "epoch": 0.7064543654307471, "grad_norm": 0.11525880545377731, "learning_rate": 0.0001529022840460244, "loss": 0.1752, "step": 10970 }, { "epoch": 0.707098353001787, "grad_norm": 0.1236671656370163, "learning_rate": 0.00015285935085007727, "loss": 0.1824, "step": 10980 }, { "epoch": 0.7077423405728269, "grad_norm": 0.12144391983747482, "learning_rate": 0.00015281641765413017, "loss": 0.1765, "step": 10990 }, { "epoch": 0.7083863281438668, "grad_norm": 0.1258617788553238, "learning_rate": 0.00015277348445818308, "loss": 0.1829, "step": 11000 }, { "epoch": 0.7090303157149067, "grad_norm": 0.13977117836475372, "learning_rate": 0.00015273055126223596, "loss": 0.1846, "step": 11010 }, { "epoch": 0.7096743032859466, "grad_norm": 0.11495742946863174, "learning_rate": 0.00015268761806628886, "loss": 0.1858, "step": 11020 }, { "epoch": 0.7103182908569865, "grad_norm": 0.12212269008159637, "learning_rate": 0.00015264468487034176, "loss": 0.188, "step": 11030 }, { "epoch": 0.7109622784280263, "grad_norm": 0.13735106587409973, "learning_rate": 0.00015260175167439467, "loss": 0.1775, "step": 11040 }, { "epoch": 0.7116062659990662, "grad_norm": 0.1467486172914505, "learning_rate": 0.00015255881847844755, "loss": 0.1781, "step": 11050 }, { "epoch": 0.7122502535701061, "grad_norm": 0.1215139627456665, "learning_rate": 0.00015251588528250042, "loss": 0.1942, "step": 11060 }, { "epoch": 0.712894241141146, "grad_norm": 0.12549735605716705, "learning_rate": 0.00015247295208655333, "loss": 0.1859, "step": 11070 }, { "epoch": 0.7135382287121859, "grad_norm": 0.12103933095932007, "learning_rate": 0.00015243001889060623, "loss": 0.1804, "step": 11080 }, { "epoch": 0.7141822162832258, "grad_norm": 0.1233392059803009, "learning_rate": 0.0001523870856946591, "loss": 0.1878, "step": 11090 }, { "epoch": 0.7148262038542657, "grad_norm": 0.13037101924419403, "learning_rate": 0.000152344152498712, "loss": 0.1816, "step": 11100 }, { "epoch": 0.7154701914253055, "grad_norm": 0.1410302370786667, "learning_rate": 0.00015230121930276492, "loss": 0.1901, "step": 11110 }, { "epoch": 0.7161141789963453, "grad_norm": 0.1427489072084427, "learning_rate": 0.0001522582861068178, "loss": 0.1878, "step": 11120 }, { "epoch": 0.7167581665673852, "grad_norm": 0.1273730844259262, "learning_rate": 0.0001522153529108707, "loss": 0.1791, "step": 11130 }, { "epoch": 0.7174021541384251, "grad_norm": 0.11884095519781113, "learning_rate": 0.00015217241971492358, "loss": 0.176, "step": 11140 }, { "epoch": 0.718046141709465, "grad_norm": 0.1295333206653595, "learning_rate": 0.00015212948651897648, "loss": 0.1807, "step": 11150 }, { "epoch": 0.7186901292805049, "grad_norm": 0.1346159428358078, "learning_rate": 0.00015208655332302936, "loss": 0.1831, "step": 11160 }, { "epoch": 0.7193341168515448, "grad_norm": 0.11896652728319168, "learning_rate": 0.00015204362012708226, "loss": 0.1751, "step": 11170 }, { "epoch": 0.7199781044225847, "grad_norm": 0.13782118260860443, "learning_rate": 0.00015200068693113517, "loss": 0.1778, "step": 11180 }, { "epoch": 0.7206220919936245, "grad_norm": 0.14656130969524384, "learning_rate": 0.00015195775373518807, "loss": 0.1781, "step": 11190 }, { "epoch": 0.7212660795646644, "grad_norm": 0.12403392791748047, "learning_rate": 0.00015191482053924095, "loss": 0.1814, "step": 11200 }, { "epoch": 0.7219100671357043, "grad_norm": 0.10232749581336975, "learning_rate": 0.00015187188734329385, "loss": 0.1797, "step": 11210 }, { "epoch": 0.7225540547067442, "grad_norm": 0.14276835322380066, "learning_rate": 0.00015182895414734673, "loss": 0.1877, "step": 11220 }, { "epoch": 0.7231980422777841, "grad_norm": 0.13864664733409882, "learning_rate": 0.00015178602095139963, "loss": 0.1843, "step": 11230 }, { "epoch": 0.723842029848824, "grad_norm": 0.13407225906848907, "learning_rate": 0.0001517430877554525, "loss": 0.18, "step": 11240 }, { "epoch": 0.7244860174198638, "grad_norm": 0.12601064145565033, "learning_rate": 0.0001517001545595054, "loss": 0.1824, "step": 11250 }, { "epoch": 0.7251300049909036, "grad_norm": 0.12685860693454742, "learning_rate": 0.00015165722136355832, "loss": 0.1895, "step": 11260 }, { "epoch": 0.7257739925619435, "grad_norm": 0.12483492493629456, "learning_rate": 0.0001516142881676112, "loss": 0.1873, "step": 11270 }, { "epoch": 0.7264179801329834, "grad_norm": 0.12133167684078217, "learning_rate": 0.0001515713549716641, "loss": 0.184, "step": 11280 }, { "epoch": 0.7270619677040233, "grad_norm": 0.1263548582792282, "learning_rate": 0.000151528421775717, "loss": 0.1864, "step": 11290 }, { "epoch": 0.7277059552750632, "grad_norm": 0.22570818662643433, "learning_rate": 0.0001514854885797699, "loss": 0.1887, "step": 11300 }, { "epoch": 0.7283499428461031, "grad_norm": 0.13797996938228607, "learning_rate": 0.00015144255538382276, "loss": 0.1811, "step": 11310 }, { "epoch": 0.728993930417143, "grad_norm": 0.12818408012390137, "learning_rate": 0.00015139962218787566, "loss": 0.1767, "step": 11320 }, { "epoch": 0.7296379179881828, "grad_norm": 0.12317291647195816, "learning_rate": 0.00015135668899192857, "loss": 0.183, "step": 11330 }, { "epoch": 0.7302819055592227, "grad_norm": 0.12558196485042572, "learning_rate": 0.00015131375579598147, "loss": 0.1842, "step": 11340 }, { "epoch": 0.7309258931302626, "grad_norm": 0.13556988537311554, "learning_rate": 0.00015127082260003435, "loss": 0.1847, "step": 11350 }, { "epoch": 0.7315698807013025, "grad_norm": 0.126445472240448, "learning_rate": 0.00015122788940408725, "loss": 0.1903, "step": 11360 }, { "epoch": 0.7322138682723424, "grad_norm": 0.11116205900907516, "learning_rate": 0.00015118495620814016, "loss": 0.185, "step": 11370 }, { "epoch": 0.7328578558433823, "grad_norm": 0.1252608597278595, "learning_rate": 0.00015114202301219303, "loss": 0.1771, "step": 11380 }, { "epoch": 0.7335018434144221, "grad_norm": 0.115908183157444, "learning_rate": 0.0001510990898162459, "loss": 0.1868, "step": 11390 }, { "epoch": 0.7341458309854619, "grad_norm": 0.1336739957332611, "learning_rate": 0.00015105615662029881, "loss": 0.1807, "step": 11400 }, { "epoch": 0.7347898185565018, "grad_norm": 0.1070767492055893, "learning_rate": 0.00015101322342435172, "loss": 0.1823, "step": 11410 }, { "epoch": 0.7354338061275417, "grad_norm": 0.14094622433185577, "learning_rate": 0.00015097029022840462, "loss": 0.1805, "step": 11420 }, { "epoch": 0.7360777936985816, "grad_norm": 0.12399724870920181, "learning_rate": 0.0001509273570324575, "loss": 0.174, "step": 11430 }, { "epoch": 0.7367217812696215, "grad_norm": 0.12219887971878052, "learning_rate": 0.0001508844238365104, "loss": 0.1894, "step": 11440 }, { "epoch": 0.7373657688406614, "grad_norm": 0.10727710276842117, "learning_rate": 0.0001508414906405633, "loss": 0.1843, "step": 11450 }, { "epoch": 0.7380097564117013, "grad_norm": 0.12610334157943726, "learning_rate": 0.00015079855744461619, "loss": 0.1739, "step": 11460 }, { "epoch": 0.7386537439827411, "grad_norm": 0.13109715282917023, "learning_rate": 0.00015075562424866906, "loss": 0.1846, "step": 11470 }, { "epoch": 0.739297731553781, "grad_norm": 0.12219800055027008, "learning_rate": 0.00015071269105272197, "loss": 0.1804, "step": 11480 }, { "epoch": 0.7399417191248209, "grad_norm": 0.14216776192188263, "learning_rate": 0.00015066975785677487, "loss": 0.1832, "step": 11490 }, { "epoch": 0.7405857066958608, "grad_norm": 0.14004680514335632, "learning_rate": 0.00015062682466082775, "loss": 0.1856, "step": 11500 }, { "epoch": 0.7412296942669007, "grad_norm": 0.13661547005176544, "learning_rate": 0.00015058389146488065, "loss": 0.1902, "step": 11510 }, { "epoch": 0.7418736818379406, "grad_norm": 0.11802347749471664, "learning_rate": 0.00015054095826893356, "loss": 0.1804, "step": 11520 }, { "epoch": 0.7425176694089805, "grad_norm": 0.16261398792266846, "learning_rate": 0.00015049802507298646, "loss": 0.1805, "step": 11530 }, { "epoch": 0.7431616569800202, "grad_norm": 0.12560710310935974, "learning_rate": 0.00015045509187703934, "loss": 0.1803, "step": 11540 }, { "epoch": 0.7438056445510601, "grad_norm": 0.13283301889896393, "learning_rate": 0.00015041215868109221, "loss": 0.1823, "step": 11550 }, { "epoch": 0.7444496321221, "grad_norm": 0.12267062067985535, "learning_rate": 0.00015036922548514512, "loss": 0.1849, "step": 11560 }, { "epoch": 0.7450936196931399, "grad_norm": 0.13039268553256989, "learning_rate": 0.00015032629228919802, "loss": 0.1809, "step": 11570 }, { "epoch": 0.7457376072641798, "grad_norm": 0.11451885104179382, "learning_rate": 0.0001502833590932509, "loss": 0.1831, "step": 11580 }, { "epoch": 0.7463815948352197, "grad_norm": 0.13164785504341125, "learning_rate": 0.0001502404258973038, "loss": 0.1864, "step": 11590 }, { "epoch": 0.7470255824062596, "grad_norm": 0.11327072978019714, "learning_rate": 0.0001501974927013567, "loss": 0.1742, "step": 11600 }, { "epoch": 0.7476695699772994, "grad_norm": 0.11311496794223785, "learning_rate": 0.00015015455950540959, "loss": 0.1827, "step": 11610 }, { "epoch": 0.7483135575483393, "grad_norm": 0.12215474992990494, "learning_rate": 0.0001501116263094625, "loss": 0.1857, "step": 11620 }, { "epoch": 0.7489575451193792, "grad_norm": 0.12786240875720978, "learning_rate": 0.00015006869311351537, "loss": 0.1887, "step": 11630 }, { "epoch": 0.7496015326904191, "grad_norm": 0.11171654611825943, "learning_rate": 0.00015002575991756827, "loss": 0.1876, "step": 11640 }, { "epoch": 0.750245520261459, "grad_norm": 0.12272095680236816, "learning_rate": 0.00014998282672162115, "loss": 0.1866, "step": 11650 }, { "epoch": 0.7508895078324989, "grad_norm": 0.14321759343147278, "learning_rate": 0.00014993989352567405, "loss": 0.1804, "step": 11660 }, { "epoch": 0.7515334954035388, "grad_norm": 0.1250942051410675, "learning_rate": 0.00014989696032972696, "loss": 0.1783, "step": 11670 }, { "epoch": 0.7521774829745785, "grad_norm": 0.1259155571460724, "learning_rate": 0.00014985402713377986, "loss": 0.1914, "step": 11680 }, { "epoch": 0.7528214705456184, "grad_norm": 0.11394001543521881, "learning_rate": 0.00014981109393783274, "loss": 0.1773, "step": 11690 }, { "epoch": 0.7534654581166583, "grad_norm": 0.12453345209360123, "learning_rate": 0.00014976816074188564, "loss": 0.1834, "step": 11700 }, { "epoch": 0.7541094456876982, "grad_norm": 0.1192169040441513, "learning_rate": 0.00014972522754593852, "loss": 0.1801, "step": 11710 }, { "epoch": 0.7547534332587381, "grad_norm": 0.12484902143478394, "learning_rate": 0.00014968229434999142, "loss": 0.1821, "step": 11720 }, { "epoch": 0.755397420829778, "grad_norm": 0.12469808757305145, "learning_rate": 0.0001496393611540443, "loss": 0.1837, "step": 11730 }, { "epoch": 0.7560414084008179, "grad_norm": 0.1243097335100174, "learning_rate": 0.0001495964279580972, "loss": 0.1842, "step": 11740 }, { "epoch": 0.7566853959718577, "grad_norm": 0.113457590341568, "learning_rate": 0.0001495534947621501, "loss": 0.1828, "step": 11750 }, { "epoch": 0.7573293835428976, "grad_norm": 0.11249776184558868, "learning_rate": 0.00014951056156620301, "loss": 0.1792, "step": 11760 }, { "epoch": 0.7579733711139375, "grad_norm": 0.12997475266456604, "learning_rate": 0.0001494676283702559, "loss": 0.191, "step": 11770 }, { "epoch": 0.7586173586849774, "grad_norm": 0.11402156203985214, "learning_rate": 0.0001494246951743088, "loss": 0.1839, "step": 11780 }, { "epoch": 0.7592613462560173, "grad_norm": 0.13085803389549255, "learning_rate": 0.00014938176197836167, "loss": 0.1867, "step": 11790 }, { "epoch": 0.7599053338270572, "grad_norm": 0.11946132034063339, "learning_rate": 0.00014933882878241458, "loss": 0.1861, "step": 11800 }, { "epoch": 0.7605493213980971, "grad_norm": 0.10724080353975296, "learning_rate": 0.00014929589558646745, "loss": 0.1764, "step": 11810 }, { "epoch": 0.761193308969137, "grad_norm": 0.11313193291425705, "learning_rate": 0.00014925296239052036, "loss": 0.1802, "step": 11820 }, { "epoch": 0.7618372965401767, "grad_norm": 0.12636317312717438, "learning_rate": 0.00014921002919457326, "loss": 0.1786, "step": 11830 }, { "epoch": 0.7624812841112166, "grad_norm": 0.11290590465068817, "learning_rate": 0.00014916709599862614, "loss": 0.1784, "step": 11840 }, { "epoch": 0.7631252716822565, "grad_norm": 0.1347220540046692, "learning_rate": 0.00014912416280267904, "loss": 0.182, "step": 11850 }, { "epoch": 0.7637692592532964, "grad_norm": 0.13220171630382538, "learning_rate": 0.00014908122960673195, "loss": 0.1843, "step": 11860 }, { "epoch": 0.7644132468243363, "grad_norm": 0.12650667130947113, "learning_rate": 0.00014903829641078482, "loss": 0.1825, "step": 11870 }, { "epoch": 0.7650572343953762, "grad_norm": 0.12217368930578232, "learning_rate": 0.0001489953632148377, "loss": 0.1815, "step": 11880 }, { "epoch": 0.7657012219664161, "grad_norm": 0.12662792205810547, "learning_rate": 0.0001489524300188906, "loss": 0.1823, "step": 11890 }, { "epoch": 0.7663452095374559, "grad_norm": 0.12890563905239105, "learning_rate": 0.0001489094968229435, "loss": 0.1854, "step": 11900 }, { "epoch": 0.7669891971084958, "grad_norm": 0.10964226722717285, "learning_rate": 0.00014886656362699641, "loss": 0.1789, "step": 11910 }, { "epoch": 0.7676331846795357, "grad_norm": 0.11558173596858978, "learning_rate": 0.0001488236304310493, "loss": 0.182, "step": 11920 }, { "epoch": 0.7682771722505756, "grad_norm": 0.12948691844940186, "learning_rate": 0.0001487806972351022, "loss": 0.1814, "step": 11930 }, { "epoch": 0.7689211598216155, "grad_norm": 0.1096864566206932, "learning_rate": 0.0001487377640391551, "loss": 0.1791, "step": 11940 }, { "epoch": 0.7695651473926554, "grad_norm": 0.11392433196306229, "learning_rate": 0.00014869483084320798, "loss": 0.1843, "step": 11950 }, { "epoch": 0.7702091349636953, "grad_norm": 0.12159140408039093, "learning_rate": 0.00014865189764726085, "loss": 0.1812, "step": 11960 }, { "epoch": 0.770853122534735, "grad_norm": 0.12319859117269516, "learning_rate": 0.00014860896445131376, "loss": 0.1821, "step": 11970 }, { "epoch": 0.7714971101057749, "grad_norm": 0.11696973443031311, "learning_rate": 0.00014856603125536666, "loss": 0.1879, "step": 11980 }, { "epoch": 0.7721410976768148, "grad_norm": 0.11904364824295044, "learning_rate": 0.00014852309805941954, "loss": 0.1826, "step": 11990 }, { "epoch": 0.7727850852478547, "grad_norm": 0.12303508073091507, "learning_rate": 0.00014848016486347244, "loss": 0.1769, "step": 12000 }, { "epoch": 0.7734290728188946, "grad_norm": 0.12143351137638092, "learning_rate": 0.00014843723166752535, "loss": 0.1831, "step": 12010 }, { "epoch": 0.7740730603899345, "grad_norm": 0.12525080144405365, "learning_rate": 0.00014839429847157825, "loss": 0.1797, "step": 12020 }, { "epoch": 0.7747170479609744, "grad_norm": 0.13215422630310059, "learning_rate": 0.0001483513652756311, "loss": 0.1824, "step": 12030 }, { "epoch": 0.7753610355320142, "grad_norm": 0.1087992787361145, "learning_rate": 0.000148308432079684, "loss": 0.1766, "step": 12040 }, { "epoch": 0.7760050231030541, "grad_norm": 0.12900178134441376, "learning_rate": 0.0001482654988837369, "loss": 0.1836, "step": 12050 }, { "epoch": 0.776649010674094, "grad_norm": 0.1175537183880806, "learning_rate": 0.00014822256568778982, "loss": 0.1834, "step": 12060 }, { "epoch": 0.7772929982451339, "grad_norm": 0.13086000084877014, "learning_rate": 0.0001481796324918427, "loss": 0.1838, "step": 12070 }, { "epoch": 0.7779369858161738, "grad_norm": 0.11117366701364517, "learning_rate": 0.0001481366992958956, "loss": 0.1891, "step": 12080 }, { "epoch": 0.7785809733872137, "grad_norm": 0.11716551333665848, "learning_rate": 0.0001480937660999485, "loss": 0.182, "step": 12090 }, { "epoch": 0.7792249609582536, "grad_norm": 0.11998970061540604, "learning_rate": 0.00014805083290400138, "loss": 0.1789, "step": 12100 }, { "epoch": 0.7798689485292933, "grad_norm": 0.1246514618396759, "learning_rate": 0.00014800789970805425, "loss": 0.1803, "step": 12110 }, { "epoch": 0.7805129361003332, "grad_norm": 0.12811768054962158, "learning_rate": 0.00014796496651210716, "loss": 0.1844, "step": 12120 }, { "epoch": 0.7811569236713731, "grad_norm": 0.10684909671545029, "learning_rate": 0.00014792203331616006, "loss": 0.1798, "step": 12130 }, { "epoch": 0.781800911242413, "grad_norm": 0.12060215324163437, "learning_rate": 0.00014787910012021297, "loss": 0.1858, "step": 12140 }, { "epoch": 0.7824448988134529, "grad_norm": 0.11109094321727753, "learning_rate": 0.00014783616692426584, "loss": 0.1837, "step": 12150 }, { "epoch": 0.7830888863844928, "grad_norm": 0.12112022191286087, "learning_rate": 0.00014779323372831875, "loss": 0.1786, "step": 12160 }, { "epoch": 0.7837328739555327, "grad_norm": 0.11382956802845001, "learning_rate": 0.00014775030053237165, "loss": 0.1828, "step": 12170 }, { "epoch": 0.7843768615265725, "grad_norm": 0.11577167361974716, "learning_rate": 0.00014770736733642453, "loss": 0.1769, "step": 12180 }, { "epoch": 0.7850208490976124, "grad_norm": 0.1250063180923462, "learning_rate": 0.0001476644341404774, "loss": 0.1805, "step": 12190 }, { "epoch": 0.7856648366686523, "grad_norm": 0.11151536554098129, "learning_rate": 0.0001476215009445303, "loss": 0.1876, "step": 12200 }, { "epoch": 0.7863088242396922, "grad_norm": 0.14073039591312408, "learning_rate": 0.00014757856774858322, "loss": 0.1833, "step": 12210 }, { "epoch": 0.7869528118107321, "grad_norm": 0.12852172553539276, "learning_rate": 0.0001475356345526361, "loss": 0.1887, "step": 12220 }, { "epoch": 0.787596799381772, "grad_norm": 0.11804963648319244, "learning_rate": 0.000147492701356689, "loss": 0.1861, "step": 12230 }, { "epoch": 0.7882407869528119, "grad_norm": 0.11475301533937454, "learning_rate": 0.0001474497681607419, "loss": 0.184, "step": 12240 }, { "epoch": 0.7888847745238516, "grad_norm": 0.13396400213241577, "learning_rate": 0.0001474068349647948, "loss": 0.1797, "step": 12250 }, { "epoch": 0.7895287620948915, "grad_norm": 0.11754655838012695, "learning_rate": 0.00014736390176884768, "loss": 0.1749, "step": 12260 }, { "epoch": 0.7901727496659314, "grad_norm": 0.11359921097755432, "learning_rate": 0.00014732096857290056, "loss": 0.1856, "step": 12270 }, { "epoch": 0.7908167372369713, "grad_norm": 0.11906129866838455, "learning_rate": 0.00014727803537695346, "loss": 0.1873, "step": 12280 }, { "epoch": 0.7914607248080112, "grad_norm": 0.11552451550960541, "learning_rate": 0.00014723510218100637, "loss": 0.1772, "step": 12290 }, { "epoch": 0.7921047123790511, "grad_norm": 0.12693136930465698, "learning_rate": 0.00014719216898505925, "loss": 0.1893, "step": 12300 }, { "epoch": 0.792748699950091, "grad_norm": 0.1450146585702896, "learning_rate": 0.00014714923578911215, "loss": 0.1841, "step": 12310 }, { "epoch": 0.7933926875211308, "grad_norm": 0.11934928596019745, "learning_rate": 0.00014710630259316505, "loss": 0.1739, "step": 12320 }, { "epoch": 0.7940366750921707, "grad_norm": 0.12633949518203735, "learning_rate": 0.00014706336939721793, "loss": 0.1853, "step": 12330 }, { "epoch": 0.7946806626632106, "grad_norm": 0.12490373104810715, "learning_rate": 0.00014702043620127084, "loss": 0.1766, "step": 12340 }, { "epoch": 0.7953246502342505, "grad_norm": 0.11008262634277344, "learning_rate": 0.0001469775030053237, "loss": 0.1844, "step": 12350 }, { "epoch": 0.7959686378052904, "grad_norm": 0.12700529396533966, "learning_rate": 0.00014693456980937662, "loss": 0.1873, "step": 12360 }, { "epoch": 0.7966126253763303, "grad_norm": 0.12170003354549408, "learning_rate": 0.0001468916366134295, "loss": 0.1836, "step": 12370 }, { "epoch": 0.7972566129473702, "grad_norm": 0.12487339973449707, "learning_rate": 0.0001468487034174824, "loss": 0.1844, "step": 12380 }, { "epoch": 0.7979006005184099, "grad_norm": 0.12731269001960754, "learning_rate": 0.0001468057702215353, "loss": 0.1851, "step": 12390 }, { "epoch": 0.7985445880894498, "grad_norm": 0.13807149231433868, "learning_rate": 0.0001467628370255882, "loss": 0.1838, "step": 12400 }, { "epoch": 0.7991885756604897, "grad_norm": 0.13162845373153687, "learning_rate": 0.00014671990382964108, "loss": 0.1853, "step": 12410 }, { "epoch": 0.7998325632315296, "grad_norm": 0.13520629703998566, "learning_rate": 0.000146676970633694, "loss": 0.1857, "step": 12420 }, { "epoch": 0.8004765508025695, "grad_norm": 0.13284443318843842, "learning_rate": 0.00014663403743774686, "loss": 0.1863, "step": 12430 }, { "epoch": 0.8011205383736094, "grad_norm": 0.11475035548210144, "learning_rate": 0.00014659110424179977, "loss": 0.1797, "step": 12440 }, { "epoch": 0.8017645259446493, "grad_norm": 0.1497209370136261, "learning_rate": 0.00014654817104585265, "loss": 0.1889, "step": 12450 }, { "epoch": 0.8024085135156891, "grad_norm": 0.1286030262708664, "learning_rate": 0.00014650523784990555, "loss": 0.1791, "step": 12460 }, { "epoch": 0.803052501086729, "grad_norm": 0.12199150025844574, "learning_rate": 0.00014646230465395845, "loss": 0.1879, "step": 12470 }, { "epoch": 0.8036964886577689, "grad_norm": 0.122061587870121, "learning_rate": 0.00014641937145801136, "loss": 0.1812, "step": 12480 }, { "epoch": 0.8043404762288088, "grad_norm": 0.11645975708961487, "learning_rate": 0.00014637643826206424, "loss": 0.1864, "step": 12490 }, { "epoch": 0.8049844637998487, "grad_norm": 0.1504177749156952, "learning_rate": 0.00014633350506611714, "loss": 0.1818, "step": 12500 }, { "epoch": 0.8056284513708886, "grad_norm": 0.14166799187660217, "learning_rate": 0.00014629057187017002, "loss": 0.1757, "step": 12510 }, { "epoch": 0.8062724389419285, "grad_norm": 0.11501164734363556, "learning_rate": 0.00014624763867422292, "loss": 0.1909, "step": 12520 }, { "epoch": 0.8069164265129684, "grad_norm": 0.14242467284202576, "learning_rate": 0.0001462047054782758, "loss": 0.175, "step": 12530 }, { "epoch": 0.8075604140840081, "grad_norm": 0.1153632253408432, "learning_rate": 0.0001461617722823287, "loss": 0.1714, "step": 12540 }, { "epoch": 0.808204401655048, "grad_norm": 0.1463397741317749, "learning_rate": 0.0001461188390863816, "loss": 0.1777, "step": 12550 }, { "epoch": 0.8088483892260879, "grad_norm": 0.13638287782669067, "learning_rate": 0.00014607590589043448, "loss": 0.1818, "step": 12560 }, { "epoch": 0.8094923767971278, "grad_norm": 0.13076531887054443, "learning_rate": 0.0001460329726944874, "loss": 0.1851, "step": 12570 }, { "epoch": 0.8101363643681677, "grad_norm": 0.11168162524700165, "learning_rate": 0.0001459900394985403, "loss": 0.1825, "step": 12580 }, { "epoch": 0.8107803519392076, "grad_norm": 0.12313181906938553, "learning_rate": 0.00014594710630259317, "loss": 0.1811, "step": 12590 }, { "epoch": 0.8114243395102475, "grad_norm": 0.12508834898471832, "learning_rate": 0.00014590417310664605, "loss": 0.1882, "step": 12600 }, { "epoch": 0.8120683270812873, "grad_norm": 0.11763365566730499, "learning_rate": 0.00014586123991069895, "loss": 0.1807, "step": 12610 }, { "epoch": 0.8127123146523272, "grad_norm": 0.1382259875535965, "learning_rate": 0.00014581830671475186, "loss": 0.175, "step": 12620 }, { "epoch": 0.8133563022233671, "grad_norm": 0.11142219603061676, "learning_rate": 0.00014577537351880476, "loss": 0.1852, "step": 12630 }, { "epoch": 0.814000289794407, "grad_norm": 0.10776093602180481, "learning_rate": 0.00014573244032285764, "loss": 0.1783, "step": 12640 }, { "epoch": 0.8146442773654469, "grad_norm": 0.11073575913906097, "learning_rate": 0.00014568950712691054, "loss": 0.181, "step": 12650 }, { "epoch": 0.8152882649364868, "grad_norm": 0.12387504428625107, "learning_rate": 0.00014564657393096344, "loss": 0.182, "step": 12660 }, { "epoch": 0.8159322525075267, "grad_norm": 0.12543143332004547, "learning_rate": 0.00014560364073501632, "loss": 0.1916, "step": 12670 }, { "epoch": 0.8165762400785664, "grad_norm": 0.10985974222421646, "learning_rate": 0.0001455607075390692, "loss": 0.1824, "step": 12680 }, { "epoch": 0.8172202276496063, "grad_norm": 0.1275603026151657, "learning_rate": 0.0001455177743431221, "loss": 0.1896, "step": 12690 }, { "epoch": 0.8178642152206462, "grad_norm": 0.15082557499408722, "learning_rate": 0.000145474841147175, "loss": 0.1829, "step": 12700 }, { "epoch": 0.8185082027916861, "grad_norm": 0.14015859365463257, "learning_rate": 0.00014543190795122788, "loss": 0.1918, "step": 12710 }, { "epoch": 0.819152190362726, "grad_norm": 0.1255173236131668, "learning_rate": 0.0001453889747552808, "loss": 0.1806, "step": 12720 }, { "epoch": 0.8197961779337659, "grad_norm": 0.14821262657642365, "learning_rate": 0.0001453460415593337, "loss": 0.1835, "step": 12730 }, { "epoch": 0.8204401655048058, "grad_norm": 0.13924993574619293, "learning_rate": 0.0001453031083633866, "loss": 0.1855, "step": 12740 }, { "epoch": 0.8210841530758456, "grad_norm": 0.13781791925430298, "learning_rate": 0.00014526017516743945, "loss": 0.1801, "step": 12750 }, { "epoch": 0.8217281406468855, "grad_norm": 0.12160416692495346, "learning_rate": 0.00014521724197149235, "loss": 0.1827, "step": 12760 }, { "epoch": 0.8223721282179254, "grad_norm": 0.1437157541513443, "learning_rate": 0.00014517430877554526, "loss": 0.1849, "step": 12770 }, { "epoch": 0.8230161157889653, "grad_norm": 0.1350329965353012, "learning_rate": 0.00014513137557959816, "loss": 0.1797, "step": 12780 }, { "epoch": 0.8236601033600052, "grad_norm": 0.14214079082012177, "learning_rate": 0.00014508844238365104, "loss": 0.1791, "step": 12790 }, { "epoch": 0.8243040909310451, "grad_norm": 0.13051562011241913, "learning_rate": 0.00014504550918770394, "loss": 0.1821, "step": 12800 }, { "epoch": 0.824948078502085, "grad_norm": 0.12787611782550812, "learning_rate": 0.00014500257599175685, "loss": 0.1787, "step": 12810 }, { "epoch": 0.8255920660731247, "grad_norm": 0.11394061893224716, "learning_rate": 0.00014495964279580972, "loss": 0.1851, "step": 12820 }, { "epoch": 0.8262360536441646, "grad_norm": 0.11425438523292542, "learning_rate": 0.0001449167095998626, "loss": 0.1837, "step": 12830 }, { "epoch": 0.8268800412152045, "grad_norm": 0.13240812718868256, "learning_rate": 0.0001448737764039155, "loss": 0.1871, "step": 12840 }, { "epoch": 0.8275240287862444, "grad_norm": 0.10880971699953079, "learning_rate": 0.0001448308432079684, "loss": 0.1789, "step": 12850 }, { "epoch": 0.8281680163572843, "grad_norm": 0.13588646054267883, "learning_rate": 0.0001447879100120213, "loss": 0.1699, "step": 12860 }, { "epoch": 0.8288120039283242, "grad_norm": 0.13046608865261078, "learning_rate": 0.0001447449768160742, "loss": 0.1813, "step": 12870 }, { "epoch": 0.8294559914993641, "grad_norm": 0.11081693321466446, "learning_rate": 0.0001447020436201271, "loss": 0.1823, "step": 12880 }, { "epoch": 0.8300999790704039, "grad_norm": 0.11025272309780121, "learning_rate": 0.00014465911042418, "loss": 0.1855, "step": 12890 }, { "epoch": 0.8307439666414438, "grad_norm": 0.1346859633922577, "learning_rate": 0.00014461617722823288, "loss": 0.1878, "step": 12900 }, { "epoch": 0.8313879542124837, "grad_norm": 0.14070813357830048, "learning_rate": 0.00014457324403228575, "loss": 0.1816, "step": 12910 }, { "epoch": 0.8320319417835236, "grad_norm": 0.1064886823296547, "learning_rate": 0.00014453031083633866, "loss": 0.1752, "step": 12920 }, { "epoch": 0.8326759293545635, "grad_norm": 0.12828025221824646, "learning_rate": 0.00014448737764039156, "loss": 0.187, "step": 12930 }, { "epoch": 0.8333199169256034, "grad_norm": 0.12530896067619324, "learning_rate": 0.00014444444444444444, "loss": 0.1836, "step": 12940 }, { "epoch": 0.8339639044966433, "grad_norm": 0.13638778030872345, "learning_rate": 0.00014440151124849734, "loss": 0.184, "step": 12950 }, { "epoch": 0.834607892067683, "grad_norm": 0.14236915111541748, "learning_rate": 0.00014435857805255025, "loss": 0.1825, "step": 12960 }, { "epoch": 0.8352518796387229, "grad_norm": 0.11580362170934677, "learning_rate": 0.00014431564485660315, "loss": 0.186, "step": 12970 }, { "epoch": 0.8358958672097628, "grad_norm": 0.11853460967540741, "learning_rate": 0.00014427271166065603, "loss": 0.1778, "step": 12980 }, { "epoch": 0.8365398547808027, "grad_norm": 0.12490308284759521, "learning_rate": 0.0001442297784647089, "loss": 0.1814, "step": 12990 }, { "epoch": 0.8371838423518426, "grad_norm": 0.13447286188602448, "learning_rate": 0.0001441868452687618, "loss": 0.176, "step": 13000 }, { "epoch": 0.8378278299228825, "grad_norm": 0.1076495498418808, "learning_rate": 0.0001441439120728147, "loss": 0.1704, "step": 13010 }, { "epoch": 0.8384718174939224, "grad_norm": 0.135020911693573, "learning_rate": 0.0001441009788768676, "loss": 0.1827, "step": 13020 }, { "epoch": 0.8391158050649622, "grad_norm": 0.1167064756155014, "learning_rate": 0.0001440580456809205, "loss": 0.1806, "step": 13030 }, { "epoch": 0.8397597926360021, "grad_norm": 0.13164491951465607, "learning_rate": 0.0001440151124849734, "loss": 0.1788, "step": 13040 }, { "epoch": 0.840403780207042, "grad_norm": 0.13532397150993347, "learning_rate": 0.00014397217928902628, "loss": 0.1877, "step": 13050 }, { "epoch": 0.8410477677780819, "grad_norm": 0.13357649743556976, "learning_rate": 0.00014392924609307918, "loss": 0.1813, "step": 13060 }, { "epoch": 0.8416917553491218, "grad_norm": 0.11719059199094772, "learning_rate": 0.00014388631289713206, "loss": 0.1889, "step": 13070 }, { "epoch": 0.8423357429201617, "grad_norm": 0.11575347930192947, "learning_rate": 0.00014384337970118496, "loss": 0.1815, "step": 13080 }, { "epoch": 0.8429797304912016, "grad_norm": 0.1304808259010315, "learning_rate": 0.00014380044650523784, "loss": 0.1782, "step": 13090 }, { "epoch": 0.8436237180622413, "grad_norm": 0.11351712048053741, "learning_rate": 0.00014375751330929074, "loss": 0.1747, "step": 13100 }, { "epoch": 0.8442677056332812, "grad_norm": 0.12780362367630005, "learning_rate": 0.00014371458011334365, "loss": 0.1901, "step": 13110 }, { "epoch": 0.8449116932043211, "grad_norm": 0.1158604770898819, "learning_rate": 0.00014367164691739655, "loss": 0.1791, "step": 13120 }, { "epoch": 0.845555680775361, "grad_norm": 0.14435666799545288, "learning_rate": 0.00014362871372144943, "loss": 0.1778, "step": 13130 }, { "epoch": 0.8461996683464009, "grad_norm": 0.13074739277362823, "learning_rate": 0.00014358578052550233, "loss": 0.1796, "step": 13140 }, { "epoch": 0.8468436559174408, "grad_norm": 0.12018807232379913, "learning_rate": 0.00014354284732955524, "loss": 0.1771, "step": 13150 }, { "epoch": 0.8474876434884807, "grad_norm": 0.1246870905160904, "learning_rate": 0.00014349991413360811, "loss": 0.1802, "step": 13160 }, { "epoch": 0.8481316310595205, "grad_norm": 0.1188950166106224, "learning_rate": 0.000143456980937661, "loss": 0.1861, "step": 13170 }, { "epoch": 0.8487756186305604, "grad_norm": 0.12520647048950195, "learning_rate": 0.0001434140477417139, "loss": 0.1811, "step": 13180 }, { "epoch": 0.8494196062016003, "grad_norm": 0.12040659785270691, "learning_rate": 0.0001433711145457668, "loss": 0.1799, "step": 13190 }, { "epoch": 0.8500635937726402, "grad_norm": 0.13584616780281067, "learning_rate": 0.0001433281813498197, "loss": 0.1857, "step": 13200 }, { "epoch": 0.8507075813436801, "grad_norm": 0.1330033242702484, "learning_rate": 0.00014328524815387258, "loss": 0.1814, "step": 13210 }, { "epoch": 0.85135156891472, "grad_norm": 0.11682259291410446, "learning_rate": 0.00014324231495792548, "loss": 0.1763, "step": 13220 }, { "epoch": 0.8519955564857599, "grad_norm": 0.12810729444026947, "learning_rate": 0.0001431993817619784, "loss": 0.1809, "step": 13230 }, { "epoch": 0.8526395440567998, "grad_norm": 0.1281130164861679, "learning_rate": 0.00014315644856603127, "loss": 0.1783, "step": 13240 }, { "epoch": 0.8532835316278395, "grad_norm": 0.12134262919425964, "learning_rate": 0.00014311351537008414, "loss": 0.1835, "step": 13250 }, { "epoch": 0.8539275191988794, "grad_norm": 0.1369975507259369, "learning_rate": 0.00014307058217413705, "loss": 0.1766, "step": 13260 }, { "epoch": 0.8545715067699193, "grad_norm": 0.11421076208353043, "learning_rate": 0.00014302764897818995, "loss": 0.1784, "step": 13270 }, { "epoch": 0.8552154943409592, "grad_norm": 0.1390162855386734, "learning_rate": 0.00014298471578224283, "loss": 0.1934, "step": 13280 }, { "epoch": 0.8558594819119991, "grad_norm": 0.1386224627494812, "learning_rate": 0.00014294178258629573, "loss": 0.1831, "step": 13290 }, { "epoch": 0.856503469483039, "grad_norm": 0.1309196501970291, "learning_rate": 0.00014289884939034864, "loss": 0.1792, "step": 13300 }, { "epoch": 0.8571474570540789, "grad_norm": 0.12347520142793655, "learning_rate": 0.00014285591619440154, "loss": 0.1881, "step": 13310 }, { "epoch": 0.8577914446251187, "grad_norm": 0.12115474045276642, "learning_rate": 0.0001428129829984544, "loss": 0.1718, "step": 13320 }, { "epoch": 0.8584354321961586, "grad_norm": 0.12293334305286407, "learning_rate": 0.0001427700498025073, "loss": 0.1818, "step": 13330 }, { "epoch": 0.8590794197671985, "grad_norm": 0.1332903653383255, "learning_rate": 0.0001427271166065602, "loss": 0.1807, "step": 13340 }, { "epoch": 0.8597234073382384, "grad_norm": 0.11795137077569962, "learning_rate": 0.0001426841834106131, "loss": 0.1862, "step": 13350 }, { "epoch": 0.8603673949092783, "grad_norm": 0.15508773922920227, "learning_rate": 0.00014264125021466598, "loss": 0.186, "step": 13360 }, { "epoch": 0.8610113824803182, "grad_norm": 0.11503136157989502, "learning_rate": 0.00014259831701871889, "loss": 0.1872, "step": 13370 }, { "epoch": 0.8616553700513581, "grad_norm": 0.11584103852510452, "learning_rate": 0.0001425553838227718, "loss": 0.1871, "step": 13380 }, { "epoch": 0.8622993576223978, "grad_norm": 0.1298382431268692, "learning_rate": 0.00014251245062682467, "loss": 0.1858, "step": 13390 }, { "epoch": 0.8629433451934377, "grad_norm": 0.1265525221824646, "learning_rate": 0.00014246951743087754, "loss": 0.1886, "step": 13400 }, { "epoch": 0.8635873327644776, "grad_norm": 0.11862900108098984, "learning_rate": 0.00014242658423493045, "loss": 0.1867, "step": 13410 }, { "epoch": 0.8642313203355175, "grad_norm": 0.13275772333145142, "learning_rate": 0.00014238365103898335, "loss": 0.1715, "step": 13420 }, { "epoch": 0.8648753079065574, "grad_norm": 0.13148252665996552, "learning_rate": 0.00014234071784303623, "loss": 0.1819, "step": 13430 }, { "epoch": 0.8655192954775973, "grad_norm": 0.13335713744163513, "learning_rate": 0.00014229778464708913, "loss": 0.1763, "step": 13440 }, { "epoch": 0.8661632830486372, "grad_norm": 0.12313614785671234, "learning_rate": 0.00014225485145114204, "loss": 0.1817, "step": 13450 }, { "epoch": 0.866807270619677, "grad_norm": 0.11641430854797363, "learning_rate": 0.00014221191825519494, "loss": 0.1816, "step": 13460 }, { "epoch": 0.8674512581907169, "grad_norm": 0.1574096828699112, "learning_rate": 0.00014216898505924782, "loss": 0.189, "step": 13470 }, { "epoch": 0.8680952457617568, "grad_norm": 0.18329231441020966, "learning_rate": 0.0001421260518633007, "loss": 0.182, "step": 13480 }, { "epoch": 0.8687392333327967, "grad_norm": 0.13451464474201202, "learning_rate": 0.0001420831186673536, "loss": 0.1843, "step": 13490 }, { "epoch": 0.8693832209038366, "grad_norm": 0.13599875569343567, "learning_rate": 0.0001420401854714065, "loss": 0.1787, "step": 13500 }, { "epoch": 0.8700272084748765, "grad_norm": 0.11507534980773926, "learning_rate": 0.00014199725227545938, "loss": 0.177, "step": 13510 }, { "epoch": 0.8706711960459164, "grad_norm": 0.12031406164169312, "learning_rate": 0.00014195431907951229, "loss": 0.1884, "step": 13520 }, { "epoch": 0.8713151836169561, "grad_norm": 0.12062572687864304, "learning_rate": 0.0001419113858835652, "loss": 0.1735, "step": 13530 }, { "epoch": 0.871959171187996, "grad_norm": 0.1196262463927269, "learning_rate": 0.00014186845268761807, "loss": 0.1757, "step": 13540 }, { "epoch": 0.8726031587590359, "grad_norm": 0.13397535681724548, "learning_rate": 0.00014182551949167097, "loss": 0.1828, "step": 13550 }, { "epoch": 0.8732471463300758, "grad_norm": 0.13034608960151672, "learning_rate": 0.00014178258629572385, "loss": 0.1804, "step": 13560 }, { "epoch": 0.8738911339011157, "grad_norm": 0.12392203509807587, "learning_rate": 0.00014173965309977675, "loss": 0.1867, "step": 13570 }, { "epoch": 0.8745351214721556, "grad_norm": 0.10961595177650452, "learning_rate": 0.00014169671990382966, "loss": 0.1842, "step": 13580 }, { "epoch": 0.8751791090431955, "grad_norm": 0.1318638026714325, "learning_rate": 0.00014165378670788253, "loss": 0.1826, "step": 13590 }, { "epoch": 0.8758230966142353, "grad_norm": 0.13395333290100098, "learning_rate": 0.00014161085351193544, "loss": 0.1808, "step": 13600 }, { "epoch": 0.8764670841852752, "grad_norm": 0.11758317053318024, "learning_rate": 0.00014156792031598834, "loss": 0.1861, "step": 13610 }, { "epoch": 0.8771110717563151, "grad_norm": 0.13486051559448242, "learning_rate": 0.00014152498712004122, "loss": 0.1889, "step": 13620 }, { "epoch": 0.877755059327355, "grad_norm": 0.13333112001419067, "learning_rate": 0.00014148205392409412, "loss": 0.181, "step": 13630 }, { "epoch": 0.8783990468983949, "grad_norm": 0.12608037889003754, "learning_rate": 0.000141439120728147, "loss": 0.178, "step": 13640 }, { "epoch": 0.8790430344694348, "grad_norm": 0.13435441255569458, "learning_rate": 0.0001413961875321999, "loss": 0.1809, "step": 13650 }, { "epoch": 0.8796870220404747, "grad_norm": 0.12077762186527252, "learning_rate": 0.00014135325433625278, "loss": 0.1798, "step": 13660 }, { "epoch": 0.8803310096115144, "grad_norm": 0.13403920829296112, "learning_rate": 0.0001413103211403057, "loss": 0.1856, "step": 13670 }, { "epoch": 0.8809749971825543, "grad_norm": 0.12024153023958206, "learning_rate": 0.0001412673879443586, "loss": 0.1818, "step": 13680 }, { "epoch": 0.8816189847535942, "grad_norm": 0.1183406189084053, "learning_rate": 0.0001412244547484115, "loss": 0.1823, "step": 13690 }, { "epoch": 0.8822629723246341, "grad_norm": 0.12733426690101624, "learning_rate": 0.00014118152155246437, "loss": 0.186, "step": 13700 }, { "epoch": 0.882906959895674, "grad_norm": 0.13373027741909027, "learning_rate": 0.00014113858835651728, "loss": 0.1851, "step": 13710 }, { "epoch": 0.8835509474667139, "grad_norm": 0.12066039443016052, "learning_rate": 0.00014109565516057015, "loss": 0.177, "step": 13720 }, { "epoch": 0.8841949350377538, "grad_norm": 0.13114455342292786, "learning_rate": 0.00014105272196462306, "loss": 0.1759, "step": 13730 }, { "epoch": 0.8848389226087936, "grad_norm": 0.13710126280784607, "learning_rate": 0.00014100978876867594, "loss": 0.1818, "step": 13740 }, { "epoch": 0.8854829101798335, "grad_norm": 0.1373017430305481, "learning_rate": 0.00014096685557272884, "loss": 0.1868, "step": 13750 }, { "epoch": 0.8861268977508734, "grad_norm": 0.12344484031200409, "learning_rate": 0.00014092392237678174, "loss": 0.1845, "step": 13760 }, { "epoch": 0.8867708853219133, "grad_norm": 0.13528287410736084, "learning_rate": 0.00014088098918083462, "loss": 0.1821, "step": 13770 }, { "epoch": 0.8874148728929532, "grad_norm": 0.11782804876565933, "learning_rate": 0.00014083805598488753, "loss": 0.18, "step": 13780 }, { "epoch": 0.8880588604639931, "grad_norm": 0.12353253364562988, "learning_rate": 0.00014079512278894043, "loss": 0.1803, "step": 13790 }, { "epoch": 0.888702848035033, "grad_norm": 0.1233520358800888, "learning_rate": 0.0001407521895929933, "loss": 0.188, "step": 13800 }, { "epoch": 0.8893468356060727, "grad_norm": 0.13026055693626404, "learning_rate": 0.00014070925639704618, "loss": 0.1858, "step": 13810 }, { "epoch": 0.8899908231771126, "grad_norm": 0.12750910222530365, "learning_rate": 0.0001406663232010991, "loss": 0.1873, "step": 13820 }, { "epoch": 0.8906348107481525, "grad_norm": 0.12530504167079926, "learning_rate": 0.000140623390005152, "loss": 0.185, "step": 13830 }, { "epoch": 0.8912787983191924, "grad_norm": 0.12178435921669006, "learning_rate": 0.0001405804568092049, "loss": 0.176, "step": 13840 }, { "epoch": 0.8919227858902323, "grad_norm": 0.12537451088428497, "learning_rate": 0.00014053752361325777, "loss": 0.1837, "step": 13850 }, { "epoch": 0.8925667734612722, "grad_norm": 0.12672914564609528, "learning_rate": 0.00014049459041731068, "loss": 0.1815, "step": 13860 }, { "epoch": 0.8932107610323121, "grad_norm": 0.13283084332942963, "learning_rate": 0.00014045165722136358, "loss": 0.1824, "step": 13870 }, { "epoch": 0.8938547486033519, "grad_norm": 0.12152808159589767, "learning_rate": 0.00014040872402541646, "loss": 0.1909, "step": 13880 }, { "epoch": 0.8944987361743918, "grad_norm": 0.1140124723315239, "learning_rate": 0.00014036579082946934, "loss": 0.1811, "step": 13890 }, { "epoch": 0.8951427237454317, "grad_norm": 0.12149060517549515, "learning_rate": 0.00014032285763352224, "loss": 0.1907, "step": 13900 }, { "epoch": 0.8957867113164716, "grad_norm": 0.12946978211402893, "learning_rate": 0.00014027992443757514, "loss": 0.183, "step": 13910 }, { "epoch": 0.8964306988875115, "grad_norm": 0.13261859118938446, "learning_rate": 0.00014023699124162805, "loss": 0.1824, "step": 13920 }, { "epoch": 0.8970746864585514, "grad_norm": 0.11395319551229477, "learning_rate": 0.00014019405804568093, "loss": 0.1785, "step": 13930 }, { "epoch": 0.8977186740295913, "grad_norm": 0.12941443920135498, "learning_rate": 0.00014015112484973383, "loss": 0.1819, "step": 13940 }, { "epoch": 0.8983626616006312, "grad_norm": 0.11823008209466934, "learning_rate": 0.00014010819165378673, "loss": 0.1823, "step": 13950 }, { "epoch": 0.899006649171671, "grad_norm": 0.1175430417060852, "learning_rate": 0.0001400652584578396, "loss": 0.1805, "step": 13960 }, { "epoch": 0.8996506367427108, "grad_norm": 0.10893058031797409, "learning_rate": 0.0001400223252618925, "loss": 0.1796, "step": 13970 }, { "epoch": 0.9002946243137507, "grad_norm": 0.13851185142993927, "learning_rate": 0.0001399793920659454, "loss": 0.1789, "step": 13980 }, { "epoch": 0.9009386118847906, "grad_norm": 0.12162554264068604, "learning_rate": 0.0001399364588699983, "loss": 0.1789, "step": 13990 }, { "epoch": 0.9015825994558305, "grad_norm": 0.12030459195375443, "learning_rate": 0.00013989352567405117, "loss": 0.1857, "step": 14000 }, { "epoch": 0.9022265870268704, "grad_norm": 0.1371152698993683, "learning_rate": 0.00013985059247810408, "loss": 0.1779, "step": 14010 }, { "epoch": 0.9028705745979103, "grad_norm": 0.13825735449790955, "learning_rate": 0.00013980765928215698, "loss": 0.1861, "step": 14020 }, { "epoch": 0.9035145621689501, "grad_norm": 0.12374751269817352, "learning_rate": 0.0001397647260862099, "loss": 0.1818, "step": 14030 }, { "epoch": 0.90415854973999, "grad_norm": 0.10770852863788605, "learning_rate": 0.00013972179289026274, "loss": 0.1722, "step": 14040 }, { "epoch": 0.9048025373110299, "grad_norm": 0.11738148331642151, "learning_rate": 0.00013967885969431564, "loss": 0.1903, "step": 14050 }, { "epoch": 0.9054465248820698, "grad_norm": 0.12332795560359955, "learning_rate": 0.00013963592649836855, "loss": 0.1756, "step": 14060 }, { "epoch": 0.9060905124531097, "grad_norm": 0.1273031234741211, "learning_rate": 0.00013959299330242145, "loss": 0.1778, "step": 14070 }, { "epoch": 0.9067345000241496, "grad_norm": 0.12324702739715576, "learning_rate": 0.00013955006010647433, "loss": 0.1869, "step": 14080 }, { "epoch": 0.9073784875951895, "grad_norm": 0.12329238653182983, "learning_rate": 0.00013950712691052723, "loss": 0.1768, "step": 14090 }, { "epoch": 0.9080224751662292, "grad_norm": 0.11335393786430359, "learning_rate": 0.00013946419371458013, "loss": 0.181, "step": 14100 }, { "epoch": 0.9086664627372691, "grad_norm": 0.14185039699077606, "learning_rate": 0.000139421260518633, "loss": 0.179, "step": 14110 }, { "epoch": 0.909310450308309, "grad_norm": 0.11181429773569107, "learning_rate": 0.0001393783273226859, "loss": 0.181, "step": 14120 }, { "epoch": 0.9099544378793489, "grad_norm": 0.11291835457086563, "learning_rate": 0.0001393353941267388, "loss": 0.1803, "step": 14130 }, { "epoch": 0.9105984254503888, "grad_norm": 0.12753477692604065, "learning_rate": 0.0001392924609307917, "loss": 0.1852, "step": 14140 }, { "epoch": 0.9112424130214287, "grad_norm": 0.1281924843788147, "learning_rate": 0.00013924952773484457, "loss": 0.1748, "step": 14150 }, { "epoch": 0.9118864005924686, "grad_norm": 0.1139293685555458, "learning_rate": 0.00013920659453889748, "loss": 0.1772, "step": 14160 }, { "epoch": 0.9125303881635084, "grad_norm": 0.12394507229328156, "learning_rate": 0.00013916366134295038, "loss": 0.1727, "step": 14170 }, { "epoch": 0.9131743757345483, "grad_norm": 0.12688469886779785, "learning_rate": 0.0001391207281470033, "loss": 0.1795, "step": 14180 }, { "epoch": 0.9138183633055882, "grad_norm": 0.12201543152332306, "learning_rate": 0.00013907779495105616, "loss": 0.1894, "step": 14190 }, { "epoch": 0.9144623508766281, "grad_norm": 0.1172158271074295, "learning_rate": 0.00013903486175510904, "loss": 0.1889, "step": 14200 }, { "epoch": 0.915106338447668, "grad_norm": 0.14511990547180176, "learning_rate": 0.00013899192855916195, "loss": 0.1847, "step": 14210 }, { "epoch": 0.9157503260187079, "grad_norm": 0.12348190695047379, "learning_rate": 0.00013894899536321485, "loss": 0.1867, "step": 14220 }, { "epoch": 0.9163943135897478, "grad_norm": 0.11786377429962158, "learning_rate": 0.00013890606216726773, "loss": 0.1904, "step": 14230 }, { "epoch": 0.9170383011607876, "grad_norm": 0.15013204514980316, "learning_rate": 0.00013886312897132063, "loss": 0.1813, "step": 14240 }, { "epoch": 0.9176822887318274, "grad_norm": 0.13345468044281006, "learning_rate": 0.00013882019577537354, "loss": 0.1774, "step": 14250 }, { "epoch": 0.9183262763028673, "grad_norm": 0.11581270396709442, "learning_rate": 0.0001387772625794264, "loss": 0.1816, "step": 14260 }, { "epoch": 0.9189702638739072, "grad_norm": 0.12498687207698822, "learning_rate": 0.00013873432938347932, "loss": 0.182, "step": 14270 }, { "epoch": 0.9196142514449471, "grad_norm": 0.10015971213579178, "learning_rate": 0.0001386913961875322, "loss": 0.1842, "step": 14280 }, { "epoch": 0.920258239015987, "grad_norm": 0.11884758621454239, "learning_rate": 0.0001386484629915851, "loss": 0.1915, "step": 14290 }, { "epoch": 0.9209022265870269, "grad_norm": 0.12438521534204483, "learning_rate": 0.000138605529795638, "loss": 0.1814, "step": 14300 }, { "epoch": 0.9215462141580667, "grad_norm": 0.12370435148477554, "learning_rate": 0.00013856259659969088, "loss": 0.1755, "step": 14310 }, { "epoch": 0.9221902017291066, "grad_norm": 0.13018448650836945, "learning_rate": 0.00013851966340374378, "loss": 0.1776, "step": 14320 }, { "epoch": 0.9228341893001465, "grad_norm": 0.11898566037416458, "learning_rate": 0.0001384767302077967, "loss": 0.1796, "step": 14330 }, { "epoch": 0.9234781768711864, "grad_norm": 0.12294477224349976, "learning_rate": 0.00013843379701184957, "loss": 0.1793, "step": 14340 }, { "epoch": 0.9241221644422263, "grad_norm": 0.13554643094539642, "learning_rate": 0.00013839086381590247, "loss": 0.1878, "step": 14350 }, { "epoch": 0.9247661520132662, "grad_norm": 0.11805419623851776, "learning_rate": 0.00013834793061995535, "loss": 0.1842, "step": 14360 }, { "epoch": 0.9254101395843061, "grad_norm": 0.10838731378316879, "learning_rate": 0.00013830499742400825, "loss": 0.1815, "step": 14370 }, { "epoch": 0.9260541271553459, "grad_norm": 0.13037194311618805, "learning_rate": 0.00013826206422806113, "loss": 0.1849, "step": 14380 }, { "epoch": 0.9266981147263857, "grad_norm": 0.11299508064985275, "learning_rate": 0.00013821913103211403, "loss": 0.1783, "step": 14390 }, { "epoch": 0.9273421022974256, "grad_norm": 0.1580413579940796, "learning_rate": 0.00013817619783616694, "loss": 0.1898, "step": 14400 }, { "epoch": 0.9279860898684655, "grad_norm": 0.14113950729370117, "learning_rate": 0.00013813326464021984, "loss": 0.1786, "step": 14410 }, { "epoch": 0.9286300774395054, "grad_norm": 0.11687666177749634, "learning_rate": 0.00013809033144427272, "loss": 0.1729, "step": 14420 }, { "epoch": 0.9292740650105453, "grad_norm": 0.11692973226308823, "learning_rate": 0.00013804739824832562, "loss": 0.1787, "step": 14430 }, { "epoch": 0.9299180525815852, "grad_norm": 0.11922963708639145, "learning_rate": 0.0001380044650523785, "loss": 0.1832, "step": 14440 }, { "epoch": 0.930562040152625, "grad_norm": 0.11700713634490967, "learning_rate": 0.0001379615318564314, "loss": 0.1808, "step": 14450 }, { "epoch": 0.9312060277236649, "grad_norm": 0.12986619770526886, "learning_rate": 0.00013791859866048428, "loss": 0.1757, "step": 14460 }, { "epoch": 0.9318500152947048, "grad_norm": 0.14071808755397797, "learning_rate": 0.00013787566546453718, "loss": 0.187, "step": 14470 }, { "epoch": 0.9324940028657447, "grad_norm": 0.15334954857826233, "learning_rate": 0.0001378327322685901, "loss": 0.1783, "step": 14480 }, { "epoch": 0.9331379904367846, "grad_norm": 0.13762453198432922, "learning_rate": 0.00013778979907264297, "loss": 0.1896, "step": 14490 }, { "epoch": 0.9337819780078245, "grad_norm": 0.12008744478225708, "learning_rate": 0.00013774686587669587, "loss": 0.1843, "step": 14500 }, { "epoch": 0.9344259655788644, "grad_norm": 0.1171671524643898, "learning_rate": 0.00013770393268074877, "loss": 0.1741, "step": 14510 }, { "epoch": 0.9350699531499042, "grad_norm": 0.12015850841999054, "learning_rate": 0.00013766099948480165, "loss": 0.1883, "step": 14520 }, { "epoch": 0.935713940720944, "grad_norm": 0.10542617738246918, "learning_rate": 0.00013761806628885453, "loss": 0.1732, "step": 14530 }, { "epoch": 0.9363579282919839, "grad_norm": 0.14650556445121765, "learning_rate": 0.00013757513309290743, "loss": 0.1762, "step": 14540 }, { "epoch": 0.9370019158630238, "grad_norm": 0.12085564434528351, "learning_rate": 0.00013753219989696034, "loss": 0.1792, "step": 14550 }, { "epoch": 0.9376459034340637, "grad_norm": 0.10622498393058777, "learning_rate": 0.00013748926670101324, "loss": 0.1751, "step": 14560 }, { "epoch": 0.9382898910051036, "grad_norm": 0.1249416172504425, "learning_rate": 0.00013744633350506612, "loss": 0.1888, "step": 14570 }, { "epoch": 0.9389338785761435, "grad_norm": 0.12535668909549713, "learning_rate": 0.00013740340030911902, "loss": 0.1783, "step": 14580 }, { "epoch": 0.9395778661471833, "grad_norm": 0.12063421308994293, "learning_rate": 0.00013736046711317193, "loss": 0.1891, "step": 14590 }, { "epoch": 0.9402218537182232, "grad_norm": 0.11089546978473663, "learning_rate": 0.0001373175339172248, "loss": 0.1769, "step": 14600 }, { "epoch": 0.9408658412892631, "grad_norm": 0.12717542052268982, "learning_rate": 0.00013727460072127768, "loss": 0.1709, "step": 14610 }, { "epoch": 0.941509828860303, "grad_norm": 0.11859888583421707, "learning_rate": 0.00013723166752533059, "loss": 0.1779, "step": 14620 }, { "epoch": 0.9421538164313429, "grad_norm": 0.13095878064632416, "learning_rate": 0.0001371887343293835, "loss": 0.1823, "step": 14630 }, { "epoch": 0.9427978040023828, "grad_norm": 0.12000405043363571, "learning_rate": 0.0001371458011334364, "loss": 0.1889, "step": 14640 }, { "epoch": 0.9434417915734227, "grad_norm": 0.12077592313289642, "learning_rate": 0.00013710286793748927, "loss": 0.1863, "step": 14650 }, { "epoch": 0.9440857791444626, "grad_norm": 0.11917223781347275, "learning_rate": 0.00013705993474154217, "loss": 0.181, "step": 14660 }, { "epoch": 0.9447297667155024, "grad_norm": 0.1035728007555008, "learning_rate": 0.00013701700154559508, "loss": 0.1801, "step": 14670 }, { "epoch": 0.9453737542865422, "grad_norm": 0.1467197835445404, "learning_rate": 0.00013697406834964796, "loss": 0.1761, "step": 14680 }, { "epoch": 0.9460177418575821, "grad_norm": 0.12190321087837219, "learning_rate": 0.00013693113515370083, "loss": 0.1789, "step": 14690 }, { "epoch": 0.946661729428622, "grad_norm": 0.15911166369915009, "learning_rate": 0.00013688820195775374, "loss": 0.1862, "step": 14700 }, { "epoch": 0.9473057169996619, "grad_norm": 0.13140343129634857, "learning_rate": 0.00013684526876180664, "loss": 0.179, "step": 14710 }, { "epoch": 0.9479497045707018, "grad_norm": 0.11218202114105225, "learning_rate": 0.00013680233556585952, "loss": 0.1801, "step": 14720 }, { "epoch": 0.9485936921417417, "grad_norm": 0.1106395423412323, "learning_rate": 0.00013675940236991242, "loss": 0.1743, "step": 14730 }, { "epoch": 0.9492376797127815, "grad_norm": 0.13949383795261383, "learning_rate": 0.00013671646917396533, "loss": 0.181, "step": 14740 }, { "epoch": 0.9498816672838214, "grad_norm": 0.13340827822685242, "learning_rate": 0.00013667353597801823, "loss": 0.179, "step": 14750 }, { "epoch": 0.9505256548548613, "grad_norm": 0.1183142215013504, "learning_rate": 0.00013663060278207108, "loss": 0.1829, "step": 14760 }, { "epoch": 0.9511696424259012, "grad_norm": 0.11205948144197464, "learning_rate": 0.00013658766958612399, "loss": 0.1842, "step": 14770 }, { "epoch": 0.9518136299969411, "grad_norm": 0.13509351015090942, "learning_rate": 0.0001365447363901769, "loss": 0.1719, "step": 14780 }, { "epoch": 0.952457617567981, "grad_norm": 0.11884520947933197, "learning_rate": 0.0001365018031942298, "loss": 0.1799, "step": 14790 }, { "epoch": 0.9531016051390209, "grad_norm": 0.11746937781572342, "learning_rate": 0.00013645886999828267, "loss": 0.1752, "step": 14800 }, { "epoch": 0.9537455927100607, "grad_norm": 0.1535262167453766, "learning_rate": 0.00013641593680233558, "loss": 0.1877, "step": 14810 }, { "epoch": 0.9543895802811005, "grad_norm": 0.11958853155374527, "learning_rate": 0.00013637300360638848, "loss": 0.1784, "step": 14820 }, { "epoch": 0.9550335678521404, "grad_norm": 0.12269605696201324, "learning_rate": 0.00013633007041044136, "loss": 0.1754, "step": 14830 }, { "epoch": 0.9556775554231803, "grad_norm": 0.10928655415773392, "learning_rate": 0.00013628713721449423, "loss": 0.181, "step": 14840 }, { "epoch": 0.9563215429942202, "grad_norm": 0.11661892384290695, "learning_rate": 0.00013624420401854714, "loss": 0.1842, "step": 14850 }, { "epoch": 0.9569655305652601, "grad_norm": 0.11889955401420593, "learning_rate": 0.00013620127082260004, "loss": 0.1806, "step": 14860 }, { "epoch": 0.9576095181363, "grad_norm": 0.11229906231164932, "learning_rate": 0.00013615833762665292, "loss": 0.1778, "step": 14870 }, { "epoch": 0.9582535057073398, "grad_norm": 0.10054229944944382, "learning_rate": 0.00013611540443070582, "loss": 0.181, "step": 14880 }, { "epoch": 0.9588974932783797, "grad_norm": 0.12333395332098007, "learning_rate": 0.00013607247123475873, "loss": 0.1709, "step": 14890 }, { "epoch": 0.9595414808494196, "grad_norm": 0.12048995494842529, "learning_rate": 0.00013602953803881163, "loss": 0.1821, "step": 14900 }, { "epoch": 0.9601854684204595, "grad_norm": 0.12226367741823196, "learning_rate": 0.0001359866048428645, "loss": 0.1787, "step": 14910 }, { "epoch": 0.9608294559914994, "grad_norm": 0.10079090297222137, "learning_rate": 0.0001359436716469174, "loss": 0.1847, "step": 14920 }, { "epoch": 0.9614734435625393, "grad_norm": 0.11376500874757767, "learning_rate": 0.0001359007384509703, "loss": 0.179, "step": 14930 }, { "epoch": 0.9621174311335792, "grad_norm": 0.12101765722036362, "learning_rate": 0.0001358578052550232, "loss": 0.1836, "step": 14940 }, { "epoch": 0.962761418704619, "grad_norm": 0.13760899007320404, "learning_rate": 0.00013581487205907607, "loss": 0.1821, "step": 14950 }, { "epoch": 0.9634054062756588, "grad_norm": 0.11976280063390732, "learning_rate": 0.00013577193886312898, "loss": 0.1676, "step": 14960 }, { "epoch": 0.9640493938466987, "grad_norm": 0.11602098494768143, "learning_rate": 0.00013572900566718188, "loss": 0.1846, "step": 14970 }, { "epoch": 0.9646933814177386, "grad_norm": 0.12015461176633835, "learning_rate": 0.00013568607247123476, "loss": 0.1752, "step": 14980 }, { "epoch": 0.9653373689887785, "grad_norm": 0.12855705618858337, "learning_rate": 0.00013564313927528766, "loss": 0.181, "step": 14990 }, { "epoch": 0.9659813565598184, "grad_norm": 0.12646137177944183, "learning_rate": 0.00013560020607934054, "loss": 0.1825, "step": 15000 }, { "epoch": 0.9666253441308583, "grad_norm": 0.12618522346019745, "learning_rate": 0.00013555727288339344, "loss": 0.1835, "step": 15010 }, { "epoch": 0.9672693317018981, "grad_norm": 0.13129208981990814, "learning_rate": 0.00013551433968744635, "loss": 0.1852, "step": 15020 }, { "epoch": 0.967913319272938, "grad_norm": 0.12320291996002197, "learning_rate": 0.00013547140649149922, "loss": 0.18, "step": 15030 }, { "epoch": 0.9685573068439779, "grad_norm": 0.13250532746315002, "learning_rate": 0.00013542847329555213, "loss": 0.181, "step": 15040 }, { "epoch": 0.9692012944150178, "grad_norm": 0.11205331236124039, "learning_rate": 0.00013538554009960503, "loss": 0.1798, "step": 15050 }, { "epoch": 0.9698452819860577, "grad_norm": 0.11175742000341415, "learning_rate": 0.0001353426069036579, "loss": 0.1857, "step": 15060 }, { "epoch": 0.9704892695570976, "grad_norm": 0.11652005463838577, "learning_rate": 0.00013529967370771081, "loss": 0.1766, "step": 15070 }, { "epoch": 0.9711332571281375, "grad_norm": 0.13503310084342957, "learning_rate": 0.00013525674051176372, "loss": 0.1929, "step": 15080 }, { "epoch": 0.9717772446991773, "grad_norm": 0.12596091628074646, "learning_rate": 0.0001352138073158166, "loss": 0.1812, "step": 15090 }, { "epoch": 0.9724212322702172, "grad_norm": 0.11896634101867676, "learning_rate": 0.00013517087411986947, "loss": 0.1851, "step": 15100 }, { "epoch": 0.973065219841257, "grad_norm": 0.11475136876106262, "learning_rate": 0.00013512794092392238, "loss": 0.1729, "step": 15110 }, { "epoch": 0.9737092074122969, "grad_norm": 0.12506593763828278, "learning_rate": 0.00013508500772797528, "loss": 0.1838, "step": 15120 }, { "epoch": 0.9743531949833368, "grad_norm": 0.12476658076047897, "learning_rate": 0.00013504207453202819, "loss": 0.1898, "step": 15130 }, { "epoch": 0.9749971825543767, "grad_norm": 0.12200205028057098, "learning_rate": 0.00013499914133608106, "loss": 0.1792, "step": 15140 }, { "epoch": 0.9756411701254166, "grad_norm": 0.1235913410782814, "learning_rate": 0.00013495620814013397, "loss": 0.1822, "step": 15150 }, { "epoch": 0.9762851576964564, "grad_norm": 0.11723452806472778, "learning_rate": 0.00013491327494418687, "loss": 0.1777, "step": 15160 }, { "epoch": 0.9769291452674963, "grad_norm": 0.13057664036750793, "learning_rate": 0.00013487034174823975, "loss": 0.1793, "step": 15170 }, { "epoch": 0.9775731328385362, "grad_norm": 0.11589407175779343, "learning_rate": 0.00013482740855229263, "loss": 0.1864, "step": 15180 }, { "epoch": 0.9782171204095761, "grad_norm": 0.12778756022453308, "learning_rate": 0.00013478447535634553, "loss": 0.1814, "step": 15190 }, { "epoch": 0.978861107980616, "grad_norm": 0.124376580119133, "learning_rate": 0.00013474154216039843, "loss": 0.1773, "step": 15200 }, { "epoch": 0.9795050955516559, "grad_norm": 0.1082288846373558, "learning_rate": 0.0001346986089644513, "loss": 0.1836, "step": 15210 }, { "epoch": 0.9801490831226958, "grad_norm": 0.11699283868074417, "learning_rate": 0.00013465567576850421, "loss": 0.1906, "step": 15220 }, { "epoch": 0.9807930706937356, "grad_norm": 0.1370406150817871, "learning_rate": 0.00013461274257255712, "loss": 0.1763, "step": 15230 }, { "epoch": 0.9814370582647755, "grad_norm": 0.12472226470708847, "learning_rate": 0.00013456980937661002, "loss": 0.183, "step": 15240 }, { "epoch": 0.9820810458358153, "grad_norm": 0.15383067727088928, "learning_rate": 0.00013452687618066287, "loss": 0.1812, "step": 15250 }, { "epoch": 0.9827250334068552, "grad_norm": 0.11746356636285782, "learning_rate": 0.00013448394298471578, "loss": 0.1807, "step": 15260 }, { "epoch": 0.9833690209778951, "grad_norm": 0.14012116193771362, "learning_rate": 0.00013444100978876868, "loss": 0.1857, "step": 15270 }, { "epoch": 0.984013008548935, "grad_norm": 0.12021970748901367, "learning_rate": 0.00013439807659282159, "loss": 0.1868, "step": 15280 }, { "epoch": 0.9846569961199749, "grad_norm": 0.12157026678323746, "learning_rate": 0.00013435514339687446, "loss": 0.1853, "step": 15290 }, { "epoch": 0.9853009836910147, "grad_norm": 0.12922550737857819, "learning_rate": 0.00013431221020092737, "loss": 0.1859, "step": 15300 }, { "epoch": 0.9859449712620546, "grad_norm": 0.14082151651382446, "learning_rate": 0.00013426927700498027, "loss": 0.1878, "step": 15310 }, { "epoch": 0.9865889588330945, "grad_norm": 0.1307188868522644, "learning_rate": 0.00013422634380903315, "loss": 0.1742, "step": 15320 }, { "epoch": 0.9872329464041344, "grad_norm": 0.14745371043682098, "learning_rate": 0.00013418341061308603, "loss": 0.1734, "step": 15330 }, { "epoch": 0.9878769339751743, "grad_norm": 0.12152179330587387, "learning_rate": 0.00013414047741713893, "loss": 0.1762, "step": 15340 }, { "epoch": 0.9885209215462142, "grad_norm": 0.13630938529968262, "learning_rate": 0.00013409754422119183, "loss": 0.1718, "step": 15350 }, { "epoch": 0.9891649091172541, "grad_norm": 0.13219647109508514, "learning_rate": 0.00013405461102524474, "loss": 0.1818, "step": 15360 }, { "epoch": 0.989808896688294, "grad_norm": 0.1335272639989853, "learning_rate": 0.00013401167782929762, "loss": 0.1831, "step": 15370 }, { "epoch": 0.9904528842593338, "grad_norm": 0.12494009733200073, "learning_rate": 0.00013396874463335052, "loss": 0.1798, "step": 15380 }, { "epoch": 0.9910968718303736, "grad_norm": 0.12130030244588852, "learning_rate": 0.00013392581143740342, "loss": 0.1872, "step": 15390 }, { "epoch": 0.9917408594014135, "grad_norm": 0.1297132670879364, "learning_rate": 0.0001338828782414563, "loss": 0.1853, "step": 15400 }, { "epoch": 0.9923848469724534, "grad_norm": 0.1165451928973198, "learning_rate": 0.00013383994504550918, "loss": 0.1798, "step": 15410 }, { "epoch": 0.9930288345434933, "grad_norm": 0.12815703451633453, "learning_rate": 0.00013379701184956208, "loss": 0.184, "step": 15420 }, { "epoch": 0.9936728221145332, "grad_norm": 0.11647510528564453, "learning_rate": 0.000133754078653615, "loss": 0.1782, "step": 15430 }, { "epoch": 0.9943168096855731, "grad_norm": 0.11108002066612244, "learning_rate": 0.00013371114545766786, "loss": 0.1854, "step": 15440 }, { "epoch": 0.9949607972566129, "grad_norm": 0.12886935472488403, "learning_rate": 0.00013366821226172077, "loss": 0.1827, "step": 15450 }, { "epoch": 0.9956047848276528, "grad_norm": 0.2037731558084488, "learning_rate": 0.00013362527906577367, "loss": 0.1839, "step": 15460 }, { "epoch": 0.9962487723986927, "grad_norm": 0.12124759703874588, "learning_rate": 0.00013358234586982658, "loss": 0.1802, "step": 15470 }, { "epoch": 0.9968927599697326, "grad_norm": 0.11188387870788574, "learning_rate": 0.00013353941267387945, "loss": 0.1863, "step": 15480 }, { "epoch": 0.9975367475407725, "grad_norm": 0.1261894255876541, "learning_rate": 0.00013349647947793233, "loss": 0.1818, "step": 15490 }, { "epoch": 0.9981807351118124, "grad_norm": 0.12012599408626556, "learning_rate": 0.00013345354628198524, "loss": 0.1765, "step": 15500 }, { "epoch": 0.9988247226828523, "grad_norm": 0.12074902653694153, "learning_rate": 0.00013341061308603814, "loss": 0.1862, "step": 15510 }, { "epoch": 0.9994687102538921, "grad_norm": 0.12215737998485565, "learning_rate": 0.00013336767989009102, "loss": 0.1797, "step": 15520 }, { "epoch": 1.000112697824932, "grad_norm": 0.10930517315864563, "learning_rate": 0.00013332474669414392, "loss": 0.1798, "step": 15530 }, { "epoch": 1.000756685395972, "grad_norm": 0.12824746966362, "learning_rate": 0.00013328181349819682, "loss": 0.1764, "step": 15540 }, { "epoch": 1.0014006729670117, "grad_norm": 0.13124455511569977, "learning_rate": 0.0001332388803022497, "loss": 0.1744, "step": 15550 }, { "epoch": 1.0020446605380515, "grad_norm": 0.11920972913503647, "learning_rate": 0.0001331959471063026, "loss": 0.1739, "step": 15560 }, { "epoch": 1.0026886481090915, "grad_norm": 0.12937653064727783, "learning_rate": 0.00013315301391035548, "loss": 0.1785, "step": 15570 }, { "epoch": 1.0033326356801313, "grad_norm": 0.11444423347711563, "learning_rate": 0.0001331100807144084, "loss": 0.1798, "step": 15580 }, { "epoch": 1.0039766232511713, "grad_norm": 0.1398259848356247, "learning_rate": 0.00013306714751846126, "loss": 0.1777, "step": 15590 }, { "epoch": 1.004620610822211, "grad_norm": 0.12342202663421631, "learning_rate": 0.00013302421432251417, "loss": 0.1791, "step": 15600 }, { "epoch": 1.005264598393251, "grad_norm": 0.13104422390460968, "learning_rate": 0.00013298128112656707, "loss": 0.177, "step": 15610 }, { "epoch": 1.005908585964291, "grad_norm": 0.13560868799686432, "learning_rate": 0.00013293834793061998, "loss": 0.1775, "step": 15620 }, { "epoch": 1.0065525735353307, "grad_norm": 0.14058542251586914, "learning_rate": 0.00013289541473467285, "loss": 0.1754, "step": 15630 }, { "epoch": 1.0071965611063707, "grad_norm": 0.13253679871559143, "learning_rate": 0.00013285248153872576, "loss": 0.1694, "step": 15640 }, { "epoch": 1.0078405486774105, "grad_norm": 0.13676904141902924, "learning_rate": 0.00013280954834277864, "loss": 0.1695, "step": 15650 }, { "epoch": 1.0084845362484505, "grad_norm": 0.13095560669898987, "learning_rate": 0.00013276661514683154, "loss": 0.1779, "step": 15660 }, { "epoch": 1.0091285238194903, "grad_norm": 0.13252544403076172, "learning_rate": 0.00013272368195088442, "loss": 0.179, "step": 15670 }, { "epoch": 1.0097725113905303, "grad_norm": 0.12298309803009033, "learning_rate": 0.00013268074875493732, "loss": 0.177, "step": 15680 }, { "epoch": 1.01041649896157, "grad_norm": 0.11923740059137344, "learning_rate": 0.00013263781555899023, "loss": 0.1666, "step": 15690 }, { "epoch": 1.0110604865326098, "grad_norm": 0.12082485109567642, "learning_rate": 0.0001325948823630431, "loss": 0.1771, "step": 15700 }, { "epoch": 1.0117044741036498, "grad_norm": 0.12133154273033142, "learning_rate": 0.000132551949167096, "loss": 0.1778, "step": 15710 }, { "epoch": 1.0123484616746896, "grad_norm": 0.12418254464864731, "learning_rate": 0.0001325090159711489, "loss": 0.1839, "step": 15720 }, { "epoch": 1.0129924492457296, "grad_norm": 0.13333559036254883, "learning_rate": 0.0001324660827752018, "loss": 0.1734, "step": 15730 }, { "epoch": 1.0136364368167694, "grad_norm": 0.11779139190912247, "learning_rate": 0.0001324231495792547, "loss": 0.1709, "step": 15740 }, { "epoch": 1.0142804243878094, "grad_norm": 0.12488538026809692, "learning_rate": 0.00013238021638330757, "loss": 0.1686, "step": 15750 }, { "epoch": 1.0149244119588492, "grad_norm": 0.1405273973941803, "learning_rate": 0.00013233728318736047, "loss": 0.1664, "step": 15760 }, { "epoch": 1.015568399529889, "grad_norm": 0.1369633823633194, "learning_rate": 0.00013229434999141338, "loss": 0.1762, "step": 15770 }, { "epoch": 1.016212387100929, "grad_norm": 0.12681257724761963, "learning_rate": 0.00013225141679546626, "loss": 0.1695, "step": 15780 }, { "epoch": 1.0168563746719688, "grad_norm": 0.13001319766044617, "learning_rate": 0.00013220848359951916, "loss": 0.1822, "step": 15790 }, { "epoch": 1.0175003622430088, "grad_norm": 0.10550416260957718, "learning_rate": 0.00013216555040357206, "loss": 0.1721, "step": 15800 }, { "epoch": 1.0181443498140486, "grad_norm": 0.11950920522212982, "learning_rate": 0.00013212261720762494, "loss": 0.1756, "step": 15810 }, { "epoch": 1.0187883373850886, "grad_norm": 0.12675365805625916, "learning_rate": 0.00013207968401167782, "loss": 0.1711, "step": 15820 }, { "epoch": 1.0194323249561283, "grad_norm": 0.11480483412742615, "learning_rate": 0.00013203675081573072, "loss": 0.1771, "step": 15830 }, { "epoch": 1.0200763125271681, "grad_norm": 0.13444365561008453, "learning_rate": 0.00013199381761978363, "loss": 0.1705, "step": 15840 }, { "epoch": 1.0207203000982081, "grad_norm": 0.12708884477615356, "learning_rate": 0.00013195088442383653, "loss": 0.1807, "step": 15850 }, { "epoch": 1.021364287669248, "grad_norm": 0.13939440250396729, "learning_rate": 0.0001319079512278894, "loss": 0.1765, "step": 15860 }, { "epoch": 1.022008275240288, "grad_norm": 0.1337936520576477, "learning_rate": 0.0001318650180319423, "loss": 0.18, "step": 15870 }, { "epoch": 1.0226522628113277, "grad_norm": 0.132439523935318, "learning_rate": 0.00013182208483599522, "loss": 0.1748, "step": 15880 }, { "epoch": 1.0232962503823677, "grad_norm": 0.12954220175743103, "learning_rate": 0.0001317791516400481, "loss": 0.1732, "step": 15890 }, { "epoch": 1.0239402379534075, "grad_norm": 0.14357155561447144, "learning_rate": 0.00013173621844410097, "loss": 0.1741, "step": 15900 }, { "epoch": 1.0245842255244473, "grad_norm": 0.14137688279151917, "learning_rate": 0.00013169328524815387, "loss": 0.1734, "step": 15910 }, { "epoch": 1.0252282130954873, "grad_norm": 0.14108732342720032, "learning_rate": 0.00013165035205220678, "loss": 0.173, "step": 15920 }, { "epoch": 1.025872200666527, "grad_norm": 0.1330333650112152, "learning_rate": 0.00013160741885625966, "loss": 0.1745, "step": 15930 }, { "epoch": 1.026516188237567, "grad_norm": 0.13821035623550415, "learning_rate": 0.00013156448566031256, "loss": 0.1724, "step": 15940 }, { "epoch": 1.0271601758086069, "grad_norm": 0.1317996233701706, "learning_rate": 0.00013152155246436546, "loss": 0.1674, "step": 15950 }, { "epoch": 1.0278041633796469, "grad_norm": 0.13631917536258698, "learning_rate": 0.00013147861926841837, "loss": 0.1723, "step": 15960 }, { "epoch": 1.0284481509506866, "grad_norm": 0.11932393163442612, "learning_rate": 0.00013143568607247122, "loss": 0.1739, "step": 15970 }, { "epoch": 1.0290921385217264, "grad_norm": 0.13175125420093536, "learning_rate": 0.00013139275287652412, "loss": 0.1838, "step": 15980 }, { "epoch": 1.0297361260927664, "grad_norm": 0.148899644613266, "learning_rate": 0.00013134981968057703, "loss": 0.1735, "step": 15990 }, { "epoch": 1.0303801136638062, "grad_norm": 0.1302066594362259, "learning_rate": 0.00013130688648462993, "loss": 0.169, "step": 16000 }, { "epoch": 1.0310241012348462, "grad_norm": 0.13850553333759308, "learning_rate": 0.0001312639532886828, "loss": 0.1683, "step": 16010 }, { "epoch": 1.031668088805886, "grad_norm": 0.14178094267845154, "learning_rate": 0.0001312210200927357, "loss": 0.1738, "step": 16020 }, { "epoch": 1.032312076376926, "grad_norm": 0.12964002788066864, "learning_rate": 0.00013117808689678862, "loss": 0.1711, "step": 16030 }, { "epoch": 1.0329560639479658, "grad_norm": 0.13824696838855743, "learning_rate": 0.0001311351537008415, "loss": 0.1742, "step": 16040 }, { "epoch": 1.0336000515190056, "grad_norm": 0.13928435742855072, "learning_rate": 0.00013109222050489437, "loss": 0.173, "step": 16050 }, { "epoch": 1.0342440390900456, "grad_norm": 0.13306038081645966, "learning_rate": 0.00013104928730894728, "loss": 0.1757, "step": 16060 }, { "epoch": 1.0348880266610854, "grad_norm": 0.13998208940029144, "learning_rate": 0.00013100635411300018, "loss": 0.1716, "step": 16070 }, { "epoch": 1.0355320142321254, "grad_norm": 0.15958115458488464, "learning_rate": 0.00013096342091705308, "loss": 0.1722, "step": 16080 }, { "epoch": 1.0361760018031652, "grad_norm": 0.1364937722682953, "learning_rate": 0.00013092048772110596, "loss": 0.1852, "step": 16090 }, { "epoch": 1.0368199893742052, "grad_norm": 0.13165535032749176, "learning_rate": 0.00013087755452515886, "loss": 0.167, "step": 16100 }, { "epoch": 1.037463976945245, "grad_norm": 0.13012497127056122, "learning_rate": 0.00013083462132921177, "loss": 0.1731, "step": 16110 }, { "epoch": 1.0381079645162847, "grad_norm": 0.11635333299636841, "learning_rate": 0.00013079168813326465, "loss": 0.1774, "step": 16120 }, { "epoch": 1.0387519520873247, "grad_norm": 0.1622191071510315, "learning_rate": 0.00013074875493731752, "loss": 0.1691, "step": 16130 }, { "epoch": 1.0393959396583645, "grad_norm": 0.13356581330299377, "learning_rate": 0.00013070582174137043, "loss": 0.1772, "step": 16140 }, { "epoch": 1.0400399272294045, "grad_norm": 0.14244596660137177, "learning_rate": 0.00013066288854542333, "loss": 0.1785, "step": 16150 }, { "epoch": 1.0406839148004443, "grad_norm": 0.12603901326656342, "learning_rate": 0.0001306199553494762, "loss": 0.1788, "step": 16160 }, { "epoch": 1.0413279023714843, "grad_norm": 0.13126958906650543, "learning_rate": 0.0001305770221535291, "loss": 0.1721, "step": 16170 }, { "epoch": 1.041971889942524, "grad_norm": 0.12673409283161163, "learning_rate": 0.00013053408895758202, "loss": 0.1729, "step": 16180 }, { "epoch": 1.0426158775135639, "grad_norm": 0.11954262107610703, "learning_rate": 0.00013049115576163492, "loss": 0.1707, "step": 16190 }, { "epoch": 1.0432598650846039, "grad_norm": 0.12746989727020264, "learning_rate": 0.0001304482225656878, "loss": 0.1744, "step": 16200 }, { "epoch": 1.0439038526556437, "grad_norm": 0.12874457240104675, "learning_rate": 0.00013040528936974068, "loss": 0.1714, "step": 16210 }, { "epoch": 1.0445478402266837, "grad_norm": 0.11396441608667374, "learning_rate": 0.00013036235617379358, "loss": 0.1769, "step": 16220 }, { "epoch": 1.0451918277977235, "grad_norm": 0.12133173644542694, "learning_rate": 0.00013031942297784648, "loss": 0.173, "step": 16230 }, { "epoch": 1.0458358153687635, "grad_norm": 0.13111525774002075, "learning_rate": 0.00013027648978189936, "loss": 0.1714, "step": 16240 }, { "epoch": 1.0464798029398032, "grad_norm": 0.14384983479976654, "learning_rate": 0.00013023355658595227, "loss": 0.1839, "step": 16250 }, { "epoch": 1.047123790510843, "grad_norm": 0.16398075222969055, "learning_rate": 0.00013019062339000517, "loss": 0.1691, "step": 16260 }, { "epoch": 1.047767778081883, "grad_norm": 0.11512302607297897, "learning_rate": 0.00013014769019405805, "loss": 0.176, "step": 16270 }, { "epoch": 1.0484117656529228, "grad_norm": 0.11759896576404572, "learning_rate": 0.00013010475699811095, "loss": 0.1771, "step": 16280 }, { "epoch": 1.0490557532239628, "grad_norm": 0.13450947403907776, "learning_rate": 0.00013006182380216383, "loss": 0.1719, "step": 16290 }, { "epoch": 1.0496997407950026, "grad_norm": 0.15195810794830322, "learning_rate": 0.00013001889060621673, "loss": 0.1779, "step": 16300 }, { "epoch": 1.0503437283660426, "grad_norm": 0.13236702978610992, "learning_rate": 0.0001299759574102696, "loss": 0.1793, "step": 16310 }, { "epoch": 1.0509877159370824, "grad_norm": 0.16313360631465912, "learning_rate": 0.00012993302421432251, "loss": 0.1757, "step": 16320 }, { "epoch": 1.0516317035081224, "grad_norm": 0.1280718594789505, "learning_rate": 0.00012989009101837542, "loss": 0.1743, "step": 16330 }, { "epoch": 1.0522756910791622, "grad_norm": 0.12888510525226593, "learning_rate": 0.00012984715782242832, "loss": 0.1763, "step": 16340 }, { "epoch": 1.052919678650202, "grad_norm": 0.11371864378452301, "learning_rate": 0.0001298042246264812, "loss": 0.1662, "step": 16350 }, { "epoch": 1.053563666221242, "grad_norm": 0.1479182243347168, "learning_rate": 0.0001297612914305341, "loss": 0.17, "step": 16360 }, { "epoch": 1.0542076537922818, "grad_norm": 0.13329827785491943, "learning_rate": 0.00012971835823458698, "loss": 0.1747, "step": 16370 }, { "epoch": 1.0548516413633218, "grad_norm": 0.12777791917324066, "learning_rate": 0.00012967542503863988, "loss": 0.18, "step": 16380 }, { "epoch": 1.0554956289343616, "grad_norm": 0.1306074857711792, "learning_rate": 0.00012963249184269276, "loss": 0.1733, "step": 16390 }, { "epoch": 1.0561396165054013, "grad_norm": 0.15094999969005585, "learning_rate": 0.00012958955864674567, "loss": 0.1763, "step": 16400 }, { "epoch": 1.0567836040764413, "grad_norm": 0.1356460154056549, "learning_rate": 0.00012954662545079857, "loss": 0.1711, "step": 16410 }, { "epoch": 1.0574275916474811, "grad_norm": 0.1435125470161438, "learning_rate": 0.00012950369225485145, "loss": 0.1746, "step": 16420 }, { "epoch": 1.0580715792185211, "grad_norm": 0.15129783749580383, "learning_rate": 0.00012946075905890435, "loss": 0.1767, "step": 16430 }, { "epoch": 1.058715566789561, "grad_norm": 0.12802980840206146, "learning_rate": 0.00012941782586295726, "loss": 0.176, "step": 16440 }, { "epoch": 1.059359554360601, "grad_norm": 0.17059670388698578, "learning_rate": 0.00012937489266701013, "loss": 0.1831, "step": 16450 }, { "epoch": 1.0600035419316407, "grad_norm": 0.11709977686405182, "learning_rate": 0.00012933195947106304, "loss": 0.1671, "step": 16460 }, { "epoch": 1.0606475295026807, "grad_norm": 0.1333414614200592, "learning_rate": 0.00012928902627511591, "loss": 0.1738, "step": 16470 }, { "epoch": 1.0612915170737205, "grad_norm": 0.12795691192150116, "learning_rate": 0.00012924609307916882, "loss": 0.1756, "step": 16480 }, { "epoch": 1.0619355046447603, "grad_norm": 0.14788047969341278, "learning_rate": 0.00012920315988322172, "loss": 0.1747, "step": 16490 }, { "epoch": 1.0625794922158003, "grad_norm": 0.15883871912956238, "learning_rate": 0.0001291602266872746, "loss": 0.1726, "step": 16500 }, { "epoch": 1.06322347978684, "grad_norm": 0.12420821189880371, "learning_rate": 0.0001291172934913275, "loss": 0.1746, "step": 16510 }, { "epoch": 1.06386746735788, "grad_norm": 0.13075871765613556, "learning_rate": 0.0001290743602953804, "loss": 0.1762, "step": 16520 }, { "epoch": 1.0645114549289199, "grad_norm": 0.11601628363132477, "learning_rate": 0.00012903142709943329, "loss": 0.1728, "step": 16530 }, { "epoch": 1.0651554424999596, "grad_norm": 0.1301683783531189, "learning_rate": 0.00012898849390348616, "loss": 0.1832, "step": 16540 }, { "epoch": 1.0657994300709996, "grad_norm": 0.1354856789112091, "learning_rate": 0.00012894556070753907, "loss": 0.1717, "step": 16550 }, { "epoch": 1.0664434176420394, "grad_norm": 0.19930154085159302, "learning_rate": 0.00012890262751159197, "loss": 0.1773, "step": 16560 }, { "epoch": 1.0670874052130794, "grad_norm": 0.13913019001483917, "learning_rate": 0.00012885969431564488, "loss": 0.1731, "step": 16570 }, { "epoch": 1.0677313927841192, "grad_norm": 0.12298724800348282, "learning_rate": 0.00012881676111969775, "loss": 0.1727, "step": 16580 }, { "epoch": 1.0683753803551592, "grad_norm": 0.1638004332780838, "learning_rate": 0.00012877382792375066, "loss": 0.1804, "step": 16590 }, { "epoch": 1.069019367926199, "grad_norm": 0.12309394776821136, "learning_rate": 0.00012873089472780356, "loss": 0.1643, "step": 16600 }, { "epoch": 1.069663355497239, "grad_norm": 0.14139865338802338, "learning_rate": 0.00012868796153185644, "loss": 0.1753, "step": 16610 }, { "epoch": 1.0703073430682788, "grad_norm": 0.12702815234661102, "learning_rate": 0.00012864502833590932, "loss": 0.1786, "step": 16620 }, { "epoch": 1.0709513306393186, "grad_norm": 0.15113641321659088, "learning_rate": 0.00012860209513996222, "loss": 0.1735, "step": 16630 }, { "epoch": 1.0715953182103586, "grad_norm": 0.13782447576522827, "learning_rate": 0.00012855916194401512, "loss": 0.1741, "step": 16640 }, { "epoch": 1.0722393057813984, "grad_norm": 0.1382203847169876, "learning_rate": 0.000128516228748068, "loss": 0.1779, "step": 16650 }, { "epoch": 1.0728832933524384, "grad_norm": 0.11252477020025253, "learning_rate": 0.0001284732955521209, "loss": 0.1793, "step": 16660 }, { "epoch": 1.0735272809234782, "grad_norm": 0.15449810028076172, "learning_rate": 0.0001284303623561738, "loss": 0.1775, "step": 16670 }, { "epoch": 1.074171268494518, "grad_norm": 0.1619141399860382, "learning_rate": 0.0001283874291602267, "loss": 0.1693, "step": 16680 }, { "epoch": 1.074815256065558, "grad_norm": 0.14270204305648804, "learning_rate": 0.00012834449596427956, "loss": 0.167, "step": 16690 }, { "epoch": 1.0754592436365977, "grad_norm": 0.1449723243713379, "learning_rate": 0.00012830156276833247, "loss": 0.1717, "step": 16700 }, { "epoch": 1.0761032312076377, "grad_norm": 0.13803766667842865, "learning_rate": 0.00012825862957238537, "loss": 0.1743, "step": 16710 }, { "epoch": 1.0767472187786775, "grad_norm": 0.1357034146785736, "learning_rate": 0.00012821569637643828, "loss": 0.1677, "step": 16720 }, { "epoch": 1.0773912063497175, "grad_norm": 0.13444702327251434, "learning_rate": 0.00012817276318049115, "loss": 0.176, "step": 16730 }, { "epoch": 1.0780351939207573, "grad_norm": 0.13935059309005737, "learning_rate": 0.00012812982998454406, "loss": 0.1731, "step": 16740 }, { "epoch": 1.0786791814917973, "grad_norm": 0.13121509552001953, "learning_rate": 0.00012808689678859696, "loss": 0.1725, "step": 16750 }, { "epoch": 1.079323169062837, "grad_norm": 0.12144935876131058, "learning_rate": 0.00012804396359264984, "loss": 0.1766, "step": 16760 }, { "epoch": 1.0799671566338769, "grad_norm": 0.14479634165763855, "learning_rate": 0.00012800103039670272, "loss": 0.1755, "step": 16770 }, { "epoch": 1.0806111442049169, "grad_norm": 0.1435413807630539, "learning_rate": 0.00012795809720075562, "loss": 0.1743, "step": 16780 }, { "epoch": 1.0812551317759567, "grad_norm": 0.12993212044239044, "learning_rate": 0.00012791516400480852, "loss": 0.1769, "step": 16790 }, { "epoch": 1.0818991193469967, "grad_norm": 0.11959531903266907, "learning_rate": 0.00012787223080886143, "loss": 0.1719, "step": 16800 }, { "epoch": 1.0825431069180365, "grad_norm": 0.1347496509552002, "learning_rate": 0.0001278292976129143, "loss": 0.1739, "step": 16810 }, { "epoch": 1.0831870944890765, "grad_norm": 0.12023822963237762, "learning_rate": 0.0001277863644169672, "loss": 0.1788, "step": 16820 }, { "epoch": 1.0838310820601162, "grad_norm": 0.13553470373153687, "learning_rate": 0.00012774343122102011, "loss": 0.1823, "step": 16830 }, { "epoch": 1.084475069631156, "grad_norm": 0.14794106781482697, "learning_rate": 0.000127700498025073, "loss": 0.1749, "step": 16840 }, { "epoch": 1.085119057202196, "grad_norm": 0.15446247160434723, "learning_rate": 0.00012765756482912587, "loss": 0.1701, "step": 16850 }, { "epoch": 1.0857630447732358, "grad_norm": 0.15765096247196198, "learning_rate": 0.00012761463163317877, "loss": 0.1724, "step": 16860 }, { "epoch": 1.0864070323442758, "grad_norm": 0.14140544831752777, "learning_rate": 0.00012757169843723168, "loss": 0.1682, "step": 16870 }, { "epoch": 1.0870510199153156, "grad_norm": 0.13772918283939362, "learning_rate": 0.00012752876524128455, "loss": 0.1807, "step": 16880 }, { "epoch": 1.0876950074863556, "grad_norm": 0.12830355763435364, "learning_rate": 0.00012748583204533746, "loss": 0.1713, "step": 16890 }, { "epoch": 1.0883389950573954, "grad_norm": 0.12116768956184387, "learning_rate": 0.00012744289884939036, "loss": 0.1729, "step": 16900 }, { "epoch": 1.0889829826284352, "grad_norm": 0.12012067437171936, "learning_rate": 0.00012739996565344327, "loss": 0.1704, "step": 16910 }, { "epoch": 1.0896269701994752, "grad_norm": 0.13145123422145844, "learning_rate": 0.00012735703245749614, "loss": 0.169, "step": 16920 }, { "epoch": 1.090270957770515, "grad_norm": 0.13279803097248077, "learning_rate": 0.00012731409926154905, "loss": 0.1731, "step": 16930 }, { "epoch": 1.090914945341555, "grad_norm": 0.12806172668933868, "learning_rate": 0.00012727116606560192, "loss": 0.1759, "step": 16940 }, { "epoch": 1.0915589329125948, "grad_norm": 0.13587863743305206, "learning_rate": 0.00012722823286965483, "loss": 0.1813, "step": 16950 }, { "epoch": 1.0922029204836348, "grad_norm": 0.13647578656673431, "learning_rate": 0.0001271852996737077, "loss": 0.1763, "step": 16960 }, { "epoch": 1.0928469080546745, "grad_norm": 0.13365112245082855, "learning_rate": 0.0001271423664777606, "loss": 0.172, "step": 16970 }, { "epoch": 1.0934908956257143, "grad_norm": 0.14343833923339844, "learning_rate": 0.00012709943328181351, "loss": 0.1761, "step": 16980 }, { "epoch": 1.0941348831967543, "grad_norm": 0.14254385232925415, "learning_rate": 0.0001270565000858664, "loss": 0.1825, "step": 16990 }, { "epoch": 1.0947788707677941, "grad_norm": 0.1478775143623352, "learning_rate": 0.0001270135668899193, "loss": 0.1758, "step": 17000 }, { "epoch": 1.0954228583388341, "grad_norm": 0.13817040622234344, "learning_rate": 0.0001269706336939722, "loss": 0.173, "step": 17010 }, { "epoch": 1.096066845909874, "grad_norm": 0.1403549164533615, "learning_rate": 0.00012692770049802508, "loss": 0.1763, "step": 17020 }, { "epoch": 1.096710833480914, "grad_norm": 0.13607999682426453, "learning_rate": 0.00012688476730207795, "loss": 0.1774, "step": 17030 }, { "epoch": 1.0973548210519537, "grad_norm": 0.12927202880382538, "learning_rate": 0.00012684183410613086, "loss": 0.1726, "step": 17040 }, { "epoch": 1.0979988086229935, "grad_norm": 0.1538647562265396, "learning_rate": 0.00012679890091018376, "loss": 0.1714, "step": 17050 }, { "epoch": 1.0986427961940335, "grad_norm": 0.12869948148727417, "learning_rate": 0.00012675596771423667, "loss": 0.1689, "step": 17060 }, { "epoch": 1.0992867837650733, "grad_norm": 0.17684046924114227, "learning_rate": 0.00012671303451828954, "loss": 0.1852, "step": 17070 }, { "epoch": 1.0999307713361133, "grad_norm": 0.16763275861740112, "learning_rate": 0.00012667010132234245, "loss": 0.1766, "step": 17080 }, { "epoch": 1.100574758907153, "grad_norm": 0.13739065825939178, "learning_rate": 0.00012662716812639535, "loss": 0.1803, "step": 17090 }, { "epoch": 1.101218746478193, "grad_norm": 0.14123578369617462, "learning_rate": 0.00012658423493044823, "loss": 0.1752, "step": 17100 }, { "epoch": 1.1018627340492329, "grad_norm": 0.13280892372131348, "learning_rate": 0.0001265413017345011, "loss": 0.1672, "step": 17110 }, { "epoch": 1.1025067216202726, "grad_norm": 0.12539070844650269, "learning_rate": 0.000126498368538554, "loss": 0.1765, "step": 17120 }, { "epoch": 1.1031507091913126, "grad_norm": 0.14992517232894897, "learning_rate": 0.00012645543534260692, "loss": 0.1752, "step": 17130 }, { "epoch": 1.1037946967623524, "grad_norm": 0.12141266465187073, "learning_rate": 0.0001264125021466598, "loss": 0.1742, "step": 17140 }, { "epoch": 1.1044386843333924, "grad_norm": 0.13220304250717163, "learning_rate": 0.0001263695689507127, "loss": 0.1757, "step": 17150 }, { "epoch": 1.1050826719044322, "grad_norm": 0.11885107308626175, "learning_rate": 0.0001263266357547656, "loss": 0.1711, "step": 17160 }, { "epoch": 1.1057266594754722, "grad_norm": 0.14010374248027802, "learning_rate": 0.0001262837025588185, "loss": 0.1766, "step": 17170 }, { "epoch": 1.106370647046512, "grad_norm": 0.13666768372058868, "learning_rate": 0.00012624076936287138, "loss": 0.1661, "step": 17180 }, { "epoch": 1.1070146346175518, "grad_norm": 0.14479708671569824, "learning_rate": 0.00012619783616692426, "loss": 0.1749, "step": 17190 }, { "epoch": 1.1076586221885918, "grad_norm": 0.14918489754199982, "learning_rate": 0.00012615490297097716, "loss": 0.1679, "step": 17200 }, { "epoch": 1.1083026097596316, "grad_norm": 0.1371740698814392, "learning_rate": 0.00012611196977503007, "loss": 0.175, "step": 17210 }, { "epoch": 1.1089465973306716, "grad_norm": 0.13906264305114746, "learning_rate": 0.00012606903657908294, "loss": 0.1748, "step": 17220 }, { "epoch": 1.1095905849017114, "grad_norm": 0.15174366533756256, "learning_rate": 0.00012602610338313585, "loss": 0.1762, "step": 17230 }, { "epoch": 1.1102345724727514, "grad_norm": 0.13997900485992432, "learning_rate": 0.00012598317018718875, "loss": 0.1664, "step": 17240 }, { "epoch": 1.1108785600437912, "grad_norm": 0.1309305727481842, "learning_rate": 0.00012594023699124166, "loss": 0.1826, "step": 17250 }, { "epoch": 1.111522547614831, "grad_norm": 0.14092007279396057, "learning_rate": 0.0001258973037952945, "loss": 0.1743, "step": 17260 }, { "epoch": 1.112166535185871, "grad_norm": 0.1513521671295166, "learning_rate": 0.0001258543705993474, "loss": 0.1699, "step": 17270 }, { "epoch": 1.1128105227569107, "grad_norm": 0.15364302694797516, "learning_rate": 0.00012581143740340032, "loss": 0.1757, "step": 17280 }, { "epoch": 1.1134545103279507, "grad_norm": 0.12997905910015106, "learning_rate": 0.00012576850420745322, "loss": 0.1759, "step": 17290 }, { "epoch": 1.1140984978989905, "grad_norm": 0.13143028318881989, "learning_rate": 0.0001257255710115061, "loss": 0.1724, "step": 17300 }, { "epoch": 1.1147424854700305, "grad_norm": 0.1574828177690506, "learning_rate": 0.000125682637815559, "loss": 0.1745, "step": 17310 }, { "epoch": 1.1153864730410703, "grad_norm": 0.16728320717811584, "learning_rate": 0.0001256397046196119, "loss": 0.1828, "step": 17320 }, { "epoch": 1.11603046061211, "grad_norm": 0.13204282522201538, "learning_rate": 0.00012559677142366478, "loss": 0.1768, "step": 17330 }, { "epoch": 1.11667444818315, "grad_norm": 0.14306341111660004, "learning_rate": 0.00012555383822771766, "loss": 0.1717, "step": 17340 }, { "epoch": 1.1173184357541899, "grad_norm": 0.13577599823474884, "learning_rate": 0.00012551090503177056, "loss": 0.1725, "step": 17350 }, { "epoch": 1.1179624233252299, "grad_norm": 0.1332784742116928, "learning_rate": 0.00012546797183582347, "loss": 0.1769, "step": 17360 }, { "epoch": 1.1186064108962697, "grad_norm": 0.1388246864080429, "learning_rate": 0.00012542503863987635, "loss": 0.1757, "step": 17370 }, { "epoch": 1.1192503984673097, "grad_norm": 0.12589982151985168, "learning_rate": 0.00012538210544392925, "loss": 0.1739, "step": 17380 }, { "epoch": 1.1198943860383495, "grad_norm": 0.13973939418792725, "learning_rate": 0.00012533917224798215, "loss": 0.1847, "step": 17390 }, { "epoch": 1.1205383736093892, "grad_norm": 0.13390304148197174, "learning_rate": 0.00012529623905203506, "loss": 0.1741, "step": 17400 }, { "epoch": 1.1211823611804292, "grad_norm": 0.12520112097263336, "learning_rate": 0.00012525330585608794, "loss": 0.1768, "step": 17410 }, { "epoch": 1.121826348751469, "grad_norm": 0.11815532296895981, "learning_rate": 0.0001252103726601408, "loss": 0.1771, "step": 17420 }, { "epoch": 1.122470336322509, "grad_norm": 0.12474886327981949, "learning_rate": 0.00012516743946419372, "loss": 0.176, "step": 17430 }, { "epoch": 1.1231143238935488, "grad_norm": 0.1535138189792633, "learning_rate": 0.00012512450626824662, "loss": 0.1773, "step": 17440 }, { "epoch": 1.1237583114645888, "grad_norm": 0.1259736269712448, "learning_rate": 0.0001250815730722995, "loss": 0.1716, "step": 17450 }, { "epoch": 1.1244022990356286, "grad_norm": 0.13954590260982513, "learning_rate": 0.0001250386398763524, "loss": 0.1837, "step": 17460 }, { "epoch": 1.1250462866066684, "grad_norm": 0.1338525414466858, "learning_rate": 0.0001249957066804053, "loss": 0.1808, "step": 17470 }, { "epoch": 1.1256902741777084, "grad_norm": 0.12459764629602432, "learning_rate": 0.00012495277348445818, "loss": 0.1763, "step": 17480 }, { "epoch": 1.1263342617487482, "grad_norm": 0.13595038652420044, "learning_rate": 0.0001249098402885111, "loss": 0.173, "step": 17490 }, { "epoch": 1.1269782493197882, "grad_norm": 0.12246861308813095, "learning_rate": 0.00012486690709256397, "loss": 0.171, "step": 17500 }, { "epoch": 1.127622236890828, "grad_norm": 0.12515889108181, "learning_rate": 0.00012482397389661687, "loss": 0.1717, "step": 17510 }, { "epoch": 1.128266224461868, "grad_norm": 0.13808731734752655, "learning_rate": 0.00012478104070066977, "loss": 0.1801, "step": 17520 }, { "epoch": 1.1289102120329078, "grad_norm": 0.13265231251716614, "learning_rate": 0.00012473810750472265, "loss": 0.1784, "step": 17530 }, { "epoch": 1.1295541996039478, "grad_norm": 0.13685911893844604, "learning_rate": 0.00012469517430877555, "loss": 0.1717, "step": 17540 }, { "epoch": 1.1301981871749875, "grad_norm": 0.1410847008228302, "learning_rate": 0.00012465224111282846, "loss": 0.1776, "step": 17550 }, { "epoch": 1.1308421747460273, "grad_norm": 0.13079971075057983, "learning_rate": 0.00012460930791688134, "loss": 0.1767, "step": 17560 }, { "epoch": 1.1314861623170673, "grad_norm": 0.1451231837272644, "learning_rate": 0.00012456637472093424, "loss": 0.1738, "step": 17570 }, { "epoch": 1.1321301498881071, "grad_norm": 0.14119642972946167, "learning_rate": 0.00012452344152498712, "loss": 0.1715, "step": 17580 }, { "epoch": 1.1327741374591471, "grad_norm": 0.1358432173728943, "learning_rate": 0.00012448050832904002, "loss": 0.1727, "step": 17590 }, { "epoch": 1.133418125030187, "grad_norm": 0.17729364335536957, "learning_rate": 0.0001244375751330929, "loss": 0.1697, "step": 17600 }, { "epoch": 1.1340621126012267, "grad_norm": 0.16262751817703247, "learning_rate": 0.0001243946419371458, "loss": 0.1806, "step": 17610 }, { "epoch": 1.1347061001722667, "grad_norm": 0.1390761286020279, "learning_rate": 0.0001243517087411987, "loss": 0.1779, "step": 17620 }, { "epoch": 1.1353500877433065, "grad_norm": 0.1332385241985321, "learning_rate": 0.0001243087755452516, "loss": 0.1777, "step": 17630 }, { "epoch": 1.1359940753143465, "grad_norm": 0.13882066309452057, "learning_rate": 0.0001242658423493045, "loss": 0.1728, "step": 17640 }, { "epoch": 1.1366380628853863, "grad_norm": 0.14075975120067596, "learning_rate": 0.0001242229091533574, "loss": 0.178, "step": 17650 }, { "epoch": 1.1372820504564263, "grad_norm": 0.14671260118484497, "learning_rate": 0.00012417997595741027, "loss": 0.1734, "step": 17660 }, { "epoch": 1.137926038027466, "grad_norm": 0.13693131506443024, "learning_rate": 0.00012413704276146317, "loss": 0.1725, "step": 17670 }, { "epoch": 1.138570025598506, "grad_norm": 0.11977869272232056, "learning_rate": 0.00012409410956551605, "loss": 0.1752, "step": 17680 }, { "epoch": 1.1392140131695458, "grad_norm": 0.1313997358083725, "learning_rate": 0.00012405117636956896, "loss": 0.1739, "step": 17690 }, { "epoch": 1.1398580007405856, "grad_norm": 0.11377724260091782, "learning_rate": 0.00012400824317362186, "loss": 0.1767, "step": 17700 }, { "epoch": 1.1405019883116256, "grad_norm": 0.15086132287979126, "learning_rate": 0.00012396530997767474, "loss": 0.1697, "step": 17710 }, { "epoch": 1.1411459758826654, "grad_norm": 0.13887199759483337, "learning_rate": 0.00012392237678172764, "loss": 0.18, "step": 17720 }, { "epoch": 1.1417899634537054, "grad_norm": 0.1326393485069275, "learning_rate": 0.00012387944358578055, "loss": 0.1744, "step": 17730 }, { "epoch": 1.1424339510247452, "grad_norm": 0.1401253342628479, "learning_rate": 0.00012383651038983342, "loss": 0.174, "step": 17740 }, { "epoch": 1.143077938595785, "grad_norm": 0.1339322179555893, "learning_rate": 0.0001237935771938863, "loss": 0.1751, "step": 17750 }, { "epoch": 1.143721926166825, "grad_norm": 0.13989681005477905, "learning_rate": 0.0001237506439979392, "loss": 0.1716, "step": 17760 }, { "epoch": 1.1443659137378648, "grad_norm": 0.14241741597652435, "learning_rate": 0.0001237077108019921, "loss": 0.1783, "step": 17770 }, { "epoch": 1.1450099013089048, "grad_norm": 0.12968406081199646, "learning_rate": 0.000123664777606045, "loss": 0.1728, "step": 17780 }, { "epoch": 1.1456538888799446, "grad_norm": 0.13966558873653412, "learning_rate": 0.0001236218444100979, "loss": 0.1768, "step": 17790 }, { "epoch": 1.1462978764509846, "grad_norm": 0.1271013468503952, "learning_rate": 0.0001235789112141508, "loss": 0.1643, "step": 17800 }, { "epoch": 1.1469418640220244, "grad_norm": 0.13609689474105835, "learning_rate": 0.0001235359780182037, "loss": 0.1815, "step": 17810 }, { "epoch": 1.1475858515930644, "grad_norm": 0.1397160142660141, "learning_rate": 0.00012349304482225657, "loss": 0.1806, "step": 17820 }, { "epoch": 1.1482298391641041, "grad_norm": 0.12264504283666611, "learning_rate": 0.00012345011162630945, "loss": 0.1785, "step": 17830 }, { "epoch": 1.148873826735144, "grad_norm": 0.1274278610944748, "learning_rate": 0.00012340717843036236, "loss": 0.1737, "step": 17840 }, { "epoch": 1.149517814306184, "grad_norm": 0.12884369492530823, "learning_rate": 0.00012336424523441526, "loss": 0.1807, "step": 17850 }, { "epoch": 1.1501618018772237, "grad_norm": 0.1372179090976715, "learning_rate": 0.00012332131203846814, "loss": 0.1714, "step": 17860 }, { "epoch": 1.1508057894482637, "grad_norm": 0.13448357582092285, "learning_rate": 0.00012327837884252104, "loss": 0.167, "step": 17870 }, { "epoch": 1.1514497770193035, "grad_norm": 0.16355498135089874, "learning_rate": 0.00012323544564657395, "loss": 0.1707, "step": 17880 }, { "epoch": 1.1520937645903433, "grad_norm": 0.13585568964481354, "learning_rate": 0.00012319251245062685, "loss": 0.1742, "step": 17890 }, { "epoch": 1.1527377521613833, "grad_norm": 0.12321072816848755, "learning_rate": 0.00012314957925467973, "loss": 0.1689, "step": 17900 }, { "epoch": 1.153381739732423, "grad_norm": 0.12862180173397064, "learning_rate": 0.0001231066460587326, "loss": 0.1733, "step": 17910 }, { "epoch": 1.154025727303463, "grad_norm": 0.14244221150875092, "learning_rate": 0.0001230637128627855, "loss": 0.1788, "step": 17920 }, { "epoch": 1.1546697148745029, "grad_norm": 0.13171330094337463, "learning_rate": 0.0001230207796668384, "loss": 0.1762, "step": 17930 }, { "epoch": 1.1553137024455429, "grad_norm": 0.12739551067352295, "learning_rate": 0.0001229778464708913, "loss": 0.1759, "step": 17940 }, { "epoch": 1.1559576900165827, "grad_norm": 0.14368867874145508, "learning_rate": 0.0001229349132749442, "loss": 0.184, "step": 17950 }, { "epoch": 1.1566016775876227, "grad_norm": 0.14962226152420044, "learning_rate": 0.0001228919800789971, "loss": 0.1744, "step": 17960 }, { "epoch": 1.1572456651586625, "grad_norm": 0.1326979547739029, "learning_rate": 0.00012284904688305, "loss": 0.1741, "step": 17970 }, { "epoch": 1.1578896527297022, "grad_norm": 0.13111551105976105, "learning_rate": 0.00012280611368710285, "loss": 0.1812, "step": 17980 }, { "epoch": 1.1585336403007422, "grad_norm": 0.15750883519649506, "learning_rate": 0.00012276318049115576, "loss": 0.1755, "step": 17990 }, { "epoch": 1.159177627871782, "grad_norm": 0.13021984696388245, "learning_rate": 0.00012272024729520866, "loss": 0.1846, "step": 18000 }, { "epoch": 1.159821615442822, "grad_norm": 0.11910473555326462, "learning_rate": 0.00012267731409926157, "loss": 0.1796, "step": 18010 }, { "epoch": 1.1604656030138618, "grad_norm": 0.16056357324123383, "learning_rate": 0.00012263438090331444, "loss": 0.1733, "step": 18020 }, { "epoch": 1.1611095905849016, "grad_norm": 0.12919503450393677, "learning_rate": 0.00012259144770736735, "loss": 0.1772, "step": 18030 }, { "epoch": 1.1617535781559416, "grad_norm": 0.1305883377790451, "learning_rate": 0.00012254851451142025, "loss": 0.1795, "step": 18040 }, { "epoch": 1.1623975657269814, "grad_norm": 0.1376548409461975, "learning_rate": 0.00012250558131547313, "loss": 0.1747, "step": 18050 }, { "epoch": 1.1630415532980214, "grad_norm": 0.1369004249572754, "learning_rate": 0.000122462648119526, "loss": 0.1695, "step": 18060 }, { "epoch": 1.1636855408690612, "grad_norm": 0.12904928624629974, "learning_rate": 0.0001224197149235789, "loss": 0.1814, "step": 18070 }, { "epoch": 1.1643295284401012, "grad_norm": 0.15712399780750275, "learning_rate": 0.0001223767817276318, "loss": 0.1754, "step": 18080 }, { "epoch": 1.164973516011141, "grad_norm": 0.12907636165618896, "learning_rate": 0.0001223338485316847, "loss": 0.1745, "step": 18090 }, { "epoch": 1.165617503582181, "grad_norm": 0.13696327805519104, "learning_rate": 0.0001222909153357376, "loss": 0.1768, "step": 18100 }, { "epoch": 1.1662614911532208, "grad_norm": 0.14513014256954193, "learning_rate": 0.0001222479821397905, "loss": 0.1705, "step": 18110 }, { "epoch": 1.1669054787242605, "grad_norm": 0.13584080338478088, "learning_rate": 0.0001222050489438434, "loss": 0.1778, "step": 18120 }, { "epoch": 1.1675494662953005, "grad_norm": 0.15758898854255676, "learning_rate": 0.00012216211574789628, "loss": 0.1846, "step": 18130 }, { "epoch": 1.1681934538663403, "grad_norm": 0.1171552911400795, "learning_rate": 0.00012211918255194916, "loss": 0.1756, "step": 18140 }, { "epoch": 1.1688374414373803, "grad_norm": 0.12330713868141174, "learning_rate": 0.00012207624935600206, "loss": 0.181, "step": 18150 }, { "epoch": 1.1694814290084201, "grad_norm": 0.12696073949337006, "learning_rate": 0.00012203331616005495, "loss": 0.1715, "step": 18160 }, { "epoch": 1.17012541657946, "grad_norm": 0.12521158158779144, "learning_rate": 0.00012199038296410786, "loss": 0.1708, "step": 18170 }, { "epoch": 1.1707694041505, "grad_norm": 0.16008839011192322, "learning_rate": 0.00012194744976816075, "loss": 0.1728, "step": 18180 }, { "epoch": 1.1714133917215397, "grad_norm": 0.12988567352294922, "learning_rate": 0.00012190451657221365, "loss": 0.1792, "step": 18190 }, { "epoch": 1.1720573792925797, "grad_norm": 0.12192658334970474, "learning_rate": 0.00012186158337626654, "loss": 0.1708, "step": 18200 }, { "epoch": 1.1727013668636195, "grad_norm": 0.14921365678310394, "learning_rate": 0.00012181865018031943, "loss": 0.1737, "step": 18210 }, { "epoch": 1.1733453544346595, "grad_norm": 0.1405935138463974, "learning_rate": 0.00012177571698437231, "loss": 0.1724, "step": 18220 }, { "epoch": 1.1739893420056993, "grad_norm": 0.1433720737695694, "learning_rate": 0.00012173278378842521, "loss": 0.1818, "step": 18230 }, { "epoch": 1.1746333295767393, "grad_norm": 0.13713210821151733, "learning_rate": 0.0001216898505924781, "loss": 0.1762, "step": 18240 }, { "epoch": 1.175277317147779, "grad_norm": 0.13249745965003967, "learning_rate": 0.000121646917396531, "loss": 0.1723, "step": 18250 }, { "epoch": 1.1759213047188188, "grad_norm": 0.15293817222118378, "learning_rate": 0.0001216039842005839, "loss": 0.1814, "step": 18260 }, { "epoch": 1.1765652922898588, "grad_norm": 0.13614743947982788, "learning_rate": 0.00012156105100463679, "loss": 0.1736, "step": 18270 }, { "epoch": 1.1772092798608986, "grad_norm": 0.13039974868297577, "learning_rate": 0.0001215181178086897, "loss": 0.1696, "step": 18280 }, { "epoch": 1.1778532674319386, "grad_norm": 0.1354525089263916, "learning_rate": 0.00012147518461274259, "loss": 0.1746, "step": 18290 }, { "epoch": 1.1784972550029784, "grad_norm": 0.12620486319065094, "learning_rate": 0.00012143225141679546, "loss": 0.1695, "step": 18300 }, { "epoch": 1.1791412425740182, "grad_norm": 0.13223139941692352, "learning_rate": 0.00012138931822084835, "loss": 0.1739, "step": 18310 }, { "epoch": 1.1797852301450582, "grad_norm": 0.17300228774547577, "learning_rate": 0.00012134638502490126, "loss": 0.1749, "step": 18320 }, { "epoch": 1.180429217716098, "grad_norm": 0.1678594946861267, "learning_rate": 0.00012130345182895415, "loss": 0.1823, "step": 18330 }, { "epoch": 1.181073205287138, "grad_norm": 0.13785363733768463, "learning_rate": 0.00012126051863300705, "loss": 0.1703, "step": 18340 }, { "epoch": 1.1817171928581778, "grad_norm": 0.12978799641132355, "learning_rate": 0.00012121758543705994, "loss": 0.1694, "step": 18350 }, { "epoch": 1.1823611804292178, "grad_norm": 0.149604931473732, "learning_rate": 0.00012117465224111285, "loss": 0.1677, "step": 18360 }, { "epoch": 1.1830051680002576, "grad_norm": 0.1354008913040161, "learning_rate": 0.00012113171904516574, "loss": 0.1732, "step": 18370 }, { "epoch": 1.1836491555712976, "grad_norm": 0.12676864862442017, "learning_rate": 0.00012108878584921861, "loss": 0.1665, "step": 18380 }, { "epoch": 1.1842931431423374, "grad_norm": 0.14545513689517975, "learning_rate": 0.0001210458526532715, "loss": 0.1728, "step": 18390 }, { "epoch": 1.1849371307133771, "grad_norm": 0.11551347374916077, "learning_rate": 0.00012100291945732441, "loss": 0.1823, "step": 18400 }, { "epoch": 1.1855811182844171, "grad_norm": 0.1484372317790985, "learning_rate": 0.0001209599862613773, "loss": 0.175, "step": 18410 }, { "epoch": 1.186225105855457, "grad_norm": 0.11232521384954453, "learning_rate": 0.00012091705306543019, "loss": 0.1765, "step": 18420 }, { "epoch": 1.186869093426497, "grad_norm": 0.1450515240430832, "learning_rate": 0.0001208741198694831, "loss": 0.1787, "step": 18430 }, { "epoch": 1.1875130809975367, "grad_norm": 0.12430396676063538, "learning_rate": 0.00012083118667353599, "loss": 0.17, "step": 18440 }, { "epoch": 1.1881570685685765, "grad_norm": 0.17320412397384644, "learning_rate": 0.00012078825347758889, "loss": 0.1815, "step": 18450 }, { "epoch": 1.1888010561396165, "grad_norm": 0.12625102698802948, "learning_rate": 0.00012074532028164175, "loss": 0.176, "step": 18460 }, { "epoch": 1.1894450437106563, "grad_norm": 0.13749054074287415, "learning_rate": 0.00012070238708569466, "loss": 0.1719, "step": 18470 }, { "epoch": 1.1900890312816963, "grad_norm": 0.1201244592666626, "learning_rate": 0.00012065945388974755, "loss": 0.1757, "step": 18480 }, { "epoch": 1.190733018852736, "grad_norm": 0.14012746512889862, "learning_rate": 0.00012061652069380045, "loss": 0.1708, "step": 18490 }, { "epoch": 1.191377006423776, "grad_norm": 0.13591374456882477, "learning_rate": 0.00012057358749785334, "loss": 0.174, "step": 18500 }, { "epoch": 1.1920209939948159, "grad_norm": 0.14036902785301208, "learning_rate": 0.00012053065430190625, "loss": 0.1773, "step": 18510 }, { "epoch": 1.1926649815658559, "grad_norm": 0.13750538229942322, "learning_rate": 0.00012048772110595914, "loss": 0.1778, "step": 18520 }, { "epoch": 1.1933089691368957, "grad_norm": 0.1434217244386673, "learning_rate": 0.00012044478791001203, "loss": 0.1816, "step": 18530 }, { "epoch": 1.1939529567079354, "grad_norm": 0.12861737608909607, "learning_rate": 0.0001204018547140649, "loss": 0.1834, "step": 18540 }, { "epoch": 1.1945969442789754, "grad_norm": 0.1283174455165863, "learning_rate": 0.00012035892151811781, "loss": 0.1724, "step": 18550 }, { "epoch": 1.1952409318500152, "grad_norm": 0.1456732153892517, "learning_rate": 0.0001203159883221707, "loss": 0.177, "step": 18560 }, { "epoch": 1.1958849194210552, "grad_norm": 0.1259283721446991, "learning_rate": 0.0001202730551262236, "loss": 0.1662, "step": 18570 }, { "epoch": 1.196528906992095, "grad_norm": 0.1500595211982727, "learning_rate": 0.0001202301219302765, "loss": 0.1736, "step": 18580 }, { "epoch": 1.197172894563135, "grad_norm": 0.1349402815103531, "learning_rate": 0.00012018718873432939, "loss": 0.1757, "step": 18590 }, { "epoch": 1.1978168821341748, "grad_norm": 0.12113208323717117, "learning_rate": 0.00012014425553838229, "loss": 0.1754, "step": 18600 }, { "epoch": 1.1984608697052146, "grad_norm": 0.1310352236032486, "learning_rate": 0.00012010132234243518, "loss": 0.1735, "step": 18610 }, { "epoch": 1.1991048572762546, "grad_norm": 0.1531498283147812, "learning_rate": 0.00012005838914648806, "loss": 0.1749, "step": 18620 }, { "epoch": 1.1997488448472944, "grad_norm": 0.14077003300189972, "learning_rate": 0.00012001545595054095, "loss": 0.1767, "step": 18630 }, { "epoch": 1.2003928324183344, "grad_norm": 0.13434956967830658, "learning_rate": 0.00011997252275459385, "loss": 0.1762, "step": 18640 }, { "epoch": 1.2010368199893742, "grad_norm": 0.14463220536708832, "learning_rate": 0.00011992958955864674, "loss": 0.1873, "step": 18650 }, { "epoch": 1.2016808075604142, "grad_norm": 0.13636524975299835, "learning_rate": 0.00011988665636269965, "loss": 0.1705, "step": 18660 }, { "epoch": 1.202324795131454, "grad_norm": 0.11996162682771683, "learning_rate": 0.00011984372316675254, "loss": 0.1703, "step": 18670 }, { "epoch": 1.2029687827024937, "grad_norm": 0.11470887064933777, "learning_rate": 0.00011980078997080544, "loss": 0.1776, "step": 18680 }, { "epoch": 1.2036127702735337, "grad_norm": 0.14867368340492249, "learning_rate": 0.00011975785677485833, "loss": 0.1775, "step": 18690 }, { "epoch": 1.2042567578445735, "grad_norm": 0.11620430648326874, "learning_rate": 0.00011971492357891121, "loss": 0.1748, "step": 18700 }, { "epoch": 1.2049007454156135, "grad_norm": 0.15783710777759552, "learning_rate": 0.0001196719903829641, "loss": 0.173, "step": 18710 }, { "epoch": 1.2055447329866533, "grad_norm": 0.16559724509716034, "learning_rate": 0.000119629057187017, "loss": 0.1743, "step": 18720 }, { "epoch": 1.2061887205576933, "grad_norm": 0.1386536955833435, "learning_rate": 0.0001195861239910699, "loss": 0.1741, "step": 18730 }, { "epoch": 1.2068327081287331, "grad_norm": 0.1350770890712738, "learning_rate": 0.0001195431907951228, "loss": 0.1758, "step": 18740 }, { "epoch": 1.2074766956997731, "grad_norm": 0.14202623069286346, "learning_rate": 0.00011950025759917569, "loss": 0.1737, "step": 18750 }, { "epoch": 1.208120683270813, "grad_norm": 0.13398440182209015, "learning_rate": 0.00011945732440322858, "loss": 0.1711, "step": 18760 }, { "epoch": 1.2087646708418527, "grad_norm": 0.14396823942661285, "learning_rate": 0.00011941439120728149, "loss": 0.1797, "step": 18770 }, { "epoch": 1.2094086584128927, "grad_norm": 0.11817579716444016, "learning_rate": 0.00011937145801133436, "loss": 0.1698, "step": 18780 }, { "epoch": 1.2100526459839325, "grad_norm": 0.1413506120443344, "learning_rate": 0.00011932852481538725, "loss": 0.1749, "step": 18790 }, { "epoch": 1.2106966335549725, "grad_norm": 0.16915923357009888, "learning_rate": 0.00011928559161944014, "loss": 0.1785, "step": 18800 }, { "epoch": 1.2113406211260123, "grad_norm": 0.15304416418075562, "learning_rate": 0.00011924265842349305, "loss": 0.1829, "step": 18810 }, { "epoch": 1.211984608697052, "grad_norm": 0.17435166239738464, "learning_rate": 0.00011919972522754594, "loss": 0.1737, "step": 18820 }, { "epoch": 1.212628596268092, "grad_norm": 0.13925284147262573, "learning_rate": 0.00011915679203159884, "loss": 0.1864, "step": 18830 }, { "epoch": 1.2132725838391318, "grad_norm": 0.15061205625534058, "learning_rate": 0.00011911385883565173, "loss": 0.1722, "step": 18840 }, { "epoch": 1.2139165714101718, "grad_norm": 0.13127531111240387, "learning_rate": 0.00011907092563970464, "loss": 0.1799, "step": 18850 }, { "epoch": 1.2145605589812116, "grad_norm": 0.12919269502162933, "learning_rate": 0.00011902799244375753, "loss": 0.1807, "step": 18860 }, { "epoch": 1.2152045465522516, "grad_norm": 0.1328379213809967, "learning_rate": 0.0001189850592478104, "loss": 0.1713, "step": 18870 }, { "epoch": 1.2158485341232914, "grad_norm": 0.172357439994812, "learning_rate": 0.0001189421260518633, "loss": 0.1804, "step": 18880 }, { "epoch": 1.2164925216943314, "grad_norm": 0.1507343053817749, "learning_rate": 0.0001188991928559162, "loss": 0.1783, "step": 18890 }, { "epoch": 1.2171365092653712, "grad_norm": 0.1505764126777649, "learning_rate": 0.00011885625965996909, "loss": 0.1759, "step": 18900 }, { "epoch": 1.217780496836411, "grad_norm": 0.1518537849187851, "learning_rate": 0.000118813326464022, "loss": 0.1776, "step": 18910 }, { "epoch": 1.218424484407451, "grad_norm": 0.1252574324607849, "learning_rate": 0.00011877039326807489, "loss": 0.1748, "step": 18920 }, { "epoch": 1.2190684719784908, "grad_norm": 0.1359870433807373, "learning_rate": 0.00011872746007212778, "loss": 0.172, "step": 18930 }, { "epoch": 1.2197124595495308, "grad_norm": 0.1373443901538849, "learning_rate": 0.00011868452687618068, "loss": 0.1729, "step": 18940 }, { "epoch": 1.2203564471205706, "grad_norm": 0.13086824119091034, "learning_rate": 0.00011864159368023356, "loss": 0.1751, "step": 18950 }, { "epoch": 1.2210004346916103, "grad_norm": 0.1393962800502777, "learning_rate": 0.00011859866048428645, "loss": 0.1755, "step": 18960 }, { "epoch": 1.2216444222626504, "grad_norm": 0.15412099659442902, "learning_rate": 0.00011855572728833934, "loss": 0.1797, "step": 18970 }, { "epoch": 1.2222884098336901, "grad_norm": 0.1516958773136139, "learning_rate": 0.00011851279409239224, "loss": 0.1761, "step": 18980 }, { "epoch": 1.2229323974047301, "grad_norm": 0.14279986917972565, "learning_rate": 0.00011846986089644514, "loss": 0.1792, "step": 18990 }, { "epoch": 1.22357638497577, "grad_norm": 0.1379707008600235, "learning_rate": 0.00011842692770049804, "loss": 0.1795, "step": 19000 }, { "epoch": 1.22422037254681, "grad_norm": 0.1328478306531906, "learning_rate": 0.00011838399450455093, "loss": 0.1756, "step": 19010 }, { "epoch": 1.2248643601178497, "grad_norm": 0.1423920840024948, "learning_rate": 0.00011834106130860383, "loss": 0.1778, "step": 19020 }, { "epoch": 1.2255083476888897, "grad_norm": 0.12863004207611084, "learning_rate": 0.0001182981281126567, "loss": 0.1688, "step": 19030 }, { "epoch": 1.2261523352599295, "grad_norm": 0.13076473772525787, "learning_rate": 0.0001182551949167096, "loss": 0.1745, "step": 19040 }, { "epoch": 1.2267963228309693, "grad_norm": 0.12974318861961365, "learning_rate": 0.00011821226172076249, "loss": 0.17, "step": 19050 }, { "epoch": 1.2274403104020093, "grad_norm": 0.1283918172121048, "learning_rate": 0.0001181693285248154, "loss": 0.1668, "step": 19060 }, { "epoch": 1.228084297973049, "grad_norm": 0.14604367315769196, "learning_rate": 0.00011812639532886829, "loss": 0.1765, "step": 19070 }, { "epoch": 1.228728285544089, "grad_norm": 0.1321973204612732, "learning_rate": 0.00011808346213292119, "loss": 0.1747, "step": 19080 }, { "epoch": 1.2293722731151289, "grad_norm": 0.14522933959960938, "learning_rate": 0.00011804052893697408, "loss": 0.1726, "step": 19090 }, { "epoch": 1.2300162606861686, "grad_norm": 0.13042984902858734, "learning_rate": 0.00011799759574102697, "loss": 0.1752, "step": 19100 }, { "epoch": 1.2306602482572087, "grad_norm": 0.1359633356332779, "learning_rate": 0.00011795466254507985, "loss": 0.1798, "step": 19110 }, { "epoch": 1.2313042358282484, "grad_norm": 0.12583805620670319, "learning_rate": 0.00011791172934913275, "loss": 0.1674, "step": 19120 }, { "epoch": 1.2319482233992884, "grad_norm": 0.13043057918548584, "learning_rate": 0.00011786879615318565, "loss": 0.1769, "step": 19130 }, { "epoch": 1.2325922109703282, "grad_norm": 0.1189577504992485, "learning_rate": 0.00011782586295723854, "loss": 0.1699, "step": 19140 }, { "epoch": 1.2332361985413682, "grad_norm": 0.12823054194450378, "learning_rate": 0.00011778292976129144, "loss": 0.1707, "step": 19150 }, { "epoch": 1.233880186112408, "grad_norm": 0.12748244404792786, "learning_rate": 0.00011773999656534433, "loss": 0.17, "step": 19160 }, { "epoch": 1.234524173683448, "grad_norm": 0.14150062203407288, "learning_rate": 0.00011769706336939724, "loss": 0.1688, "step": 19170 }, { "epoch": 1.2351681612544878, "grad_norm": 0.1238512471318245, "learning_rate": 0.00011765413017345013, "loss": 0.1738, "step": 19180 }, { "epoch": 1.2358121488255276, "grad_norm": 0.12413612008094788, "learning_rate": 0.000117611196977503, "loss": 0.1793, "step": 19190 }, { "epoch": 1.2364561363965676, "grad_norm": 0.1327962577342987, "learning_rate": 0.0001175682637815559, "loss": 0.1759, "step": 19200 }, { "epoch": 1.2371001239676074, "grad_norm": 0.12205935269594193, "learning_rate": 0.0001175253305856088, "loss": 0.1809, "step": 19210 }, { "epoch": 1.2377441115386474, "grad_norm": 0.12673348188400269, "learning_rate": 0.00011748239738966169, "loss": 0.1669, "step": 19220 }, { "epoch": 1.2383880991096872, "grad_norm": 0.14270274341106415, "learning_rate": 0.00011743946419371459, "loss": 0.1738, "step": 19230 }, { "epoch": 1.239032086680727, "grad_norm": 0.1371132731437683, "learning_rate": 0.00011739653099776748, "loss": 0.1713, "step": 19240 }, { "epoch": 1.239676074251767, "grad_norm": 0.1353389322757721, "learning_rate": 0.00011735359780182037, "loss": 0.1757, "step": 19250 }, { "epoch": 1.2403200618228067, "grad_norm": 0.13322605192661285, "learning_rate": 0.00011731066460587328, "loss": 0.1782, "step": 19260 }, { "epoch": 1.2409640493938467, "grad_norm": 0.13779090344905853, "learning_rate": 0.00011726773140992616, "loss": 0.1774, "step": 19270 }, { "epoch": 1.2416080369648865, "grad_norm": 0.12287355959415436, "learning_rate": 0.00011722479821397905, "loss": 0.173, "step": 19280 }, { "epoch": 1.2422520245359265, "grad_norm": 0.14446334540843964, "learning_rate": 0.00011718186501803195, "loss": 0.171, "step": 19290 }, { "epoch": 1.2428960121069663, "grad_norm": 0.13746009767055511, "learning_rate": 0.00011713893182208484, "loss": 0.1769, "step": 19300 }, { "epoch": 1.2435399996780063, "grad_norm": 0.138585165143013, "learning_rate": 0.00011709599862613773, "loss": 0.1771, "step": 19310 }, { "epoch": 1.244183987249046, "grad_norm": 0.15418113768100739, "learning_rate": 0.00011705306543019064, "loss": 0.1722, "step": 19320 }, { "epoch": 1.244827974820086, "grad_norm": 0.1388901025056839, "learning_rate": 0.00011701013223424353, "loss": 0.1744, "step": 19330 }, { "epoch": 1.245471962391126, "grad_norm": 0.14209753274917603, "learning_rate": 0.00011696719903829643, "loss": 0.1706, "step": 19340 }, { "epoch": 1.2461159499621657, "grad_norm": 0.12071836739778519, "learning_rate": 0.0001169242658423493, "loss": 0.1729, "step": 19350 }, { "epoch": 1.2467599375332057, "grad_norm": 0.13934855163097382, "learning_rate": 0.0001168813326464022, "loss": 0.166, "step": 19360 }, { "epoch": 1.2474039251042455, "grad_norm": 0.13384824991226196, "learning_rate": 0.00011683839945045509, "loss": 0.1781, "step": 19370 }, { "epoch": 1.2480479126752853, "grad_norm": 0.16728487610816956, "learning_rate": 0.000116795466254508, "loss": 0.1784, "step": 19380 }, { "epoch": 1.2486919002463253, "grad_norm": 0.12510572373867035, "learning_rate": 0.00011675253305856088, "loss": 0.1721, "step": 19390 }, { "epoch": 1.249335887817365, "grad_norm": 0.1261984258890152, "learning_rate": 0.00011670959986261379, "loss": 0.1722, "step": 19400 }, { "epoch": 1.249979875388405, "grad_norm": 0.14676503837108612, "learning_rate": 0.00011666666666666668, "loss": 0.1822, "step": 19410 }, { "epoch": 1.2506238629594448, "grad_norm": 0.1360887587070465, "learning_rate": 0.00011662373347071957, "loss": 0.1797, "step": 19420 }, { "epoch": 1.2512678505304848, "grad_norm": 0.14170607924461365, "learning_rate": 0.00011658080027477245, "loss": 0.1745, "step": 19430 }, { "epoch": 1.2519118381015246, "grad_norm": 0.1404106318950653, "learning_rate": 0.00011653786707882535, "loss": 0.1786, "step": 19440 }, { "epoch": 1.2525558256725646, "grad_norm": 0.14076541364192963, "learning_rate": 0.00011649493388287824, "loss": 0.1764, "step": 19450 }, { "epoch": 1.2531998132436044, "grad_norm": 0.15078507363796234, "learning_rate": 0.00011645200068693115, "loss": 0.1804, "step": 19460 }, { "epoch": 1.2538438008146442, "grad_norm": 0.14529109001159668, "learning_rate": 0.00011640906749098404, "loss": 0.1648, "step": 19470 }, { "epoch": 1.2544877883856842, "grad_norm": 0.1322455257177353, "learning_rate": 0.00011636613429503693, "loss": 0.1744, "step": 19480 }, { "epoch": 1.255131775956724, "grad_norm": 0.16506916284561157, "learning_rate": 0.00011632320109908983, "loss": 0.1713, "step": 19490 }, { "epoch": 1.255775763527764, "grad_norm": 0.13652914762496948, "learning_rate": 0.00011628026790314272, "loss": 0.1667, "step": 19500 }, { "epoch": 1.2564197510988038, "grad_norm": 0.13270138204097748, "learning_rate": 0.0001162373347071956, "loss": 0.1767, "step": 19510 }, { "epoch": 1.2570637386698436, "grad_norm": 0.13244764506816864, "learning_rate": 0.00011619440151124849, "loss": 0.1766, "step": 19520 }, { "epoch": 1.2577077262408836, "grad_norm": 0.14681226015090942, "learning_rate": 0.0001161514683153014, "loss": 0.1785, "step": 19530 }, { "epoch": 1.2583517138119236, "grad_norm": 0.15295998752117157, "learning_rate": 0.00011610853511935428, "loss": 0.178, "step": 19540 }, { "epoch": 1.2589957013829634, "grad_norm": 0.1508578360080719, "learning_rate": 0.00011606560192340719, "loss": 0.1779, "step": 19550 }, { "epoch": 1.2596396889540031, "grad_norm": 0.1456872671842575, "learning_rate": 0.00011602266872746008, "loss": 0.1717, "step": 19560 }, { "epoch": 1.2602836765250431, "grad_norm": 0.13599295914173126, "learning_rate": 0.00011597973553151298, "loss": 0.1749, "step": 19570 }, { "epoch": 1.260927664096083, "grad_norm": 0.17579984664916992, "learning_rate": 0.00011593680233556587, "loss": 0.1715, "step": 19580 }, { "epoch": 1.261571651667123, "grad_norm": 0.1291302740573883, "learning_rate": 0.00011589386913961875, "loss": 0.1773, "step": 19590 }, { "epoch": 1.2622156392381627, "grad_norm": 0.14098261296749115, "learning_rate": 0.00011585093594367164, "loss": 0.178, "step": 19600 }, { "epoch": 1.2628596268092025, "grad_norm": 0.14389780163764954, "learning_rate": 0.00011580800274772455, "loss": 0.1677, "step": 19610 }, { "epoch": 1.2635036143802425, "grad_norm": 0.1299760788679123, "learning_rate": 0.00011576506955177744, "loss": 0.1761, "step": 19620 }, { "epoch": 1.2641476019512823, "grad_norm": 0.13717254996299744, "learning_rate": 0.00011572213635583034, "loss": 0.1703, "step": 19630 }, { "epoch": 1.2647915895223223, "grad_norm": 0.14157631993293762, "learning_rate": 0.00011567920315988323, "loss": 0.1695, "step": 19640 }, { "epoch": 1.265435577093362, "grad_norm": 0.1489230990409851, "learning_rate": 0.00011563626996393612, "loss": 0.1711, "step": 19650 }, { "epoch": 1.2660795646644019, "grad_norm": 0.15521685779094696, "learning_rate": 0.00011559333676798903, "loss": 0.1685, "step": 19660 }, { "epoch": 1.2667235522354419, "grad_norm": 0.13816308975219727, "learning_rate": 0.0001155504035720419, "loss": 0.1705, "step": 19670 }, { "epoch": 1.2673675398064819, "grad_norm": 0.14019477367401123, "learning_rate": 0.0001155074703760948, "loss": 0.1765, "step": 19680 }, { "epoch": 1.2680115273775217, "grad_norm": 0.134628027677536, "learning_rate": 0.00011546453718014769, "loss": 0.1701, "step": 19690 }, { "epoch": 1.2686555149485614, "grad_norm": 0.1226300448179245, "learning_rate": 0.00011542160398420059, "loss": 0.1738, "step": 19700 }, { "epoch": 1.2692995025196014, "grad_norm": 0.15332919359207153, "learning_rate": 0.00011537867078825348, "loss": 0.1677, "step": 19710 }, { "epoch": 1.2699434900906412, "grad_norm": 0.11456998437643051, "learning_rate": 0.00011533573759230638, "loss": 0.1755, "step": 19720 }, { "epoch": 1.2705874776616812, "grad_norm": 0.14564581215381622, "learning_rate": 0.00011529280439635928, "loss": 0.1859, "step": 19730 }, { "epoch": 1.271231465232721, "grad_norm": 0.13011327385902405, "learning_rate": 0.00011524987120041218, "loss": 0.1743, "step": 19740 }, { "epoch": 1.2718754528037608, "grad_norm": 0.12924900650978088, "learning_rate": 0.00011520693800446504, "loss": 0.1753, "step": 19750 }, { "epoch": 1.2725194403748008, "grad_norm": 0.15983444452285767, "learning_rate": 0.00011516400480851795, "loss": 0.1768, "step": 19760 }, { "epoch": 1.2731634279458406, "grad_norm": 0.11561333388090134, "learning_rate": 0.00011512107161257084, "loss": 0.1751, "step": 19770 }, { "epoch": 1.2738074155168806, "grad_norm": 0.13810089230537415, "learning_rate": 0.00011507813841662374, "loss": 0.1744, "step": 19780 }, { "epoch": 1.2744514030879204, "grad_norm": 0.14343495666980743, "learning_rate": 0.00011503520522067663, "loss": 0.1814, "step": 19790 }, { "epoch": 1.2750953906589602, "grad_norm": 0.11443517357110977, "learning_rate": 0.00011499227202472954, "loss": 0.1759, "step": 19800 }, { "epoch": 1.2757393782300002, "grad_norm": 0.1166982501745224, "learning_rate": 0.00011494933882878243, "loss": 0.1775, "step": 19810 }, { "epoch": 1.2763833658010402, "grad_norm": 0.14135268330574036, "learning_rate": 0.00011490640563283532, "loss": 0.1691, "step": 19820 }, { "epoch": 1.27702735337208, "grad_norm": 0.14957952499389648, "learning_rate": 0.0001148634724368882, "loss": 0.1816, "step": 19830 }, { "epoch": 1.2776713409431197, "grad_norm": 0.13865047693252563, "learning_rate": 0.0001148205392409411, "loss": 0.1733, "step": 19840 }, { "epoch": 1.2783153285141597, "grad_norm": 0.14534899592399597, "learning_rate": 0.00011477760604499399, "loss": 0.1752, "step": 19850 }, { "epoch": 1.2789593160851995, "grad_norm": 0.12605613470077515, "learning_rate": 0.00011473467284904688, "loss": 0.1759, "step": 19860 }, { "epoch": 1.2796033036562395, "grad_norm": 0.14184992015361786, "learning_rate": 0.00011469173965309979, "loss": 0.1723, "step": 19870 }, { "epoch": 1.2802472912272793, "grad_norm": 0.15434816479682922, "learning_rate": 0.00011464880645715268, "loss": 0.1665, "step": 19880 }, { "epoch": 1.280891278798319, "grad_norm": 0.12446150183677673, "learning_rate": 0.00011460587326120558, "loss": 0.1698, "step": 19890 }, { "epoch": 1.281535266369359, "grad_norm": 0.1330304890871048, "learning_rate": 0.00011456294006525847, "loss": 0.1732, "step": 19900 }, { "epoch": 1.2821792539403989, "grad_norm": 0.13070696592330933, "learning_rate": 0.00011452000686931135, "loss": 0.1721, "step": 19910 }, { "epoch": 1.282823241511439, "grad_norm": 0.1374344378709793, "learning_rate": 0.00011447707367336424, "loss": 0.1743, "step": 19920 }, { "epoch": 1.2834672290824787, "grad_norm": 0.12380386888980865, "learning_rate": 0.00011443414047741714, "loss": 0.1712, "step": 19930 }, { "epoch": 1.2841112166535185, "grad_norm": 0.14479920268058777, "learning_rate": 0.00011439120728147003, "loss": 0.1841, "step": 19940 }, { "epoch": 1.2847552042245585, "grad_norm": 0.16474056243896484, "learning_rate": 0.00011434827408552294, "loss": 0.1742, "step": 19950 }, { "epoch": 1.2853991917955985, "grad_norm": 0.15273736417293549, "learning_rate": 0.00011430534088957583, "loss": 0.1728, "step": 19960 }, { "epoch": 1.2860431793666383, "grad_norm": 0.1593133956193924, "learning_rate": 0.00011426240769362872, "loss": 0.1653, "step": 19970 }, { "epoch": 1.286687166937678, "grad_norm": 0.15814171731472015, "learning_rate": 0.00011421947449768162, "loss": 0.1736, "step": 19980 }, { "epoch": 1.287331154508718, "grad_norm": 0.16252541542053223, "learning_rate": 0.0001141765413017345, "loss": 0.1708, "step": 19990 }, { "epoch": 1.2879751420797578, "grad_norm": 0.13553769886493683, "learning_rate": 0.00011413360810578739, "loss": 0.1626, "step": 20000 }, { "epoch": 1.2886191296507978, "grad_norm": 0.14041325449943542, "learning_rate": 0.0001140906749098403, "loss": 0.17, "step": 20010 }, { "epoch": 1.2892631172218376, "grad_norm": 0.131260946393013, "learning_rate": 0.00011404774171389319, "loss": 0.1706, "step": 20020 }, { "epoch": 1.2899071047928774, "grad_norm": 0.1464378982782364, "learning_rate": 0.00011400480851794608, "loss": 0.18, "step": 20030 }, { "epoch": 1.2905510923639174, "grad_norm": 0.1453510820865631, "learning_rate": 0.00011396187532199898, "loss": 0.1743, "step": 20040 }, { "epoch": 1.2911950799349572, "grad_norm": 0.1607009321451187, "learning_rate": 0.00011391894212605187, "loss": 0.1718, "step": 20050 }, { "epoch": 1.2918390675059972, "grad_norm": 0.138259157538414, "learning_rate": 0.00011387600893010478, "loss": 0.1714, "step": 20060 }, { "epoch": 1.292483055077037, "grad_norm": 0.1474282145500183, "learning_rate": 0.00011383307573415764, "loss": 0.1711, "step": 20070 }, { "epoch": 1.2931270426480768, "grad_norm": 0.13766071200370789, "learning_rate": 0.00011379014253821054, "loss": 0.1717, "step": 20080 }, { "epoch": 1.2937710302191168, "grad_norm": 0.1450456976890564, "learning_rate": 0.00011374720934226343, "loss": 0.1747, "step": 20090 }, { "epoch": 1.2944150177901568, "grad_norm": 0.1463833749294281, "learning_rate": 0.00011370427614631634, "loss": 0.1815, "step": 20100 }, { "epoch": 1.2950590053611966, "grad_norm": 0.18156446516513824, "learning_rate": 0.00011366134295036923, "loss": 0.1708, "step": 20110 }, { "epoch": 1.2957029929322363, "grad_norm": 0.13029088079929352, "learning_rate": 0.00011361840975442213, "loss": 0.1719, "step": 20120 }, { "epoch": 1.2963469805032763, "grad_norm": 0.13691522181034088, "learning_rate": 0.00011357547655847502, "loss": 0.174, "step": 20130 }, { "epoch": 1.2969909680743161, "grad_norm": 0.12911418080329895, "learning_rate": 0.00011353254336252791, "loss": 0.1796, "step": 20140 }, { "epoch": 1.2976349556453561, "grad_norm": 0.1785157173871994, "learning_rate": 0.00011348961016658079, "loss": 0.1716, "step": 20150 }, { "epoch": 1.298278943216396, "grad_norm": 0.15025804936885834, "learning_rate": 0.0001134466769706337, "loss": 0.1652, "step": 20160 }, { "epoch": 1.2989229307874357, "grad_norm": 0.14496071636676788, "learning_rate": 0.00011340374377468659, "loss": 0.171, "step": 20170 }, { "epoch": 1.2995669183584757, "grad_norm": 0.13951030373573303, "learning_rate": 0.00011336081057873949, "loss": 0.1682, "step": 20180 }, { "epoch": 1.3002109059295155, "grad_norm": 0.12362170219421387, "learning_rate": 0.00011331787738279238, "loss": 0.1756, "step": 20190 }, { "epoch": 1.3008548935005555, "grad_norm": 0.1471814066171646, "learning_rate": 0.00011327494418684527, "loss": 0.1766, "step": 20200 }, { "epoch": 1.3014988810715953, "grad_norm": 0.1484769880771637, "learning_rate": 0.00011323201099089818, "loss": 0.1719, "step": 20210 }, { "epoch": 1.302142868642635, "grad_norm": 0.12908154726028442, "learning_rate": 0.00011318907779495107, "loss": 0.1699, "step": 20220 }, { "epoch": 1.302786856213675, "grad_norm": 0.13518309593200684, "learning_rate": 0.00011314614459900394, "loss": 0.1816, "step": 20230 }, { "epoch": 1.303430843784715, "grad_norm": 0.12933428585529327, "learning_rate": 0.00011310321140305683, "loss": 0.1716, "step": 20240 }, { "epoch": 1.3040748313557549, "grad_norm": 0.14329680800437927, "learning_rate": 0.00011306027820710974, "loss": 0.1794, "step": 20250 }, { "epoch": 1.3047188189267946, "grad_norm": 0.144343763589859, "learning_rate": 0.00011301734501116263, "loss": 0.1755, "step": 20260 }, { "epoch": 1.3053628064978346, "grad_norm": 0.14726324379444122, "learning_rate": 0.00011297441181521553, "loss": 0.1781, "step": 20270 }, { "epoch": 1.3060067940688744, "grad_norm": 0.16567404568195343, "learning_rate": 0.00011293147861926842, "loss": 0.179, "step": 20280 }, { "epoch": 1.3066507816399144, "grad_norm": 0.13769879937171936, "learning_rate": 0.00011288854542332133, "loss": 0.1718, "step": 20290 }, { "epoch": 1.3072947692109542, "grad_norm": 0.13980849087238312, "learning_rate": 0.00011284561222737422, "loss": 0.1742, "step": 20300 }, { "epoch": 1.307938756781994, "grad_norm": 0.12867051362991333, "learning_rate": 0.0001128026790314271, "loss": 0.1699, "step": 20310 }, { "epoch": 1.308582744353034, "grad_norm": 0.11891160160303116, "learning_rate": 0.00011275974583547999, "loss": 0.1758, "step": 20320 }, { "epoch": 1.3092267319240738, "grad_norm": 0.14424222707748413, "learning_rate": 0.00011271681263953289, "loss": 0.1753, "step": 20330 }, { "epoch": 1.3098707194951138, "grad_norm": 0.12690092623233795, "learning_rate": 0.00011267387944358578, "loss": 0.1674, "step": 20340 }, { "epoch": 1.3105147070661536, "grad_norm": 0.1361362189054489, "learning_rate": 0.00011263094624763869, "loss": 0.1612, "step": 20350 }, { "epoch": 1.3111586946371934, "grad_norm": 0.14021353423595428, "learning_rate": 0.00011258801305169158, "loss": 0.1737, "step": 20360 }, { "epoch": 1.3118026822082334, "grad_norm": 0.1517530232667923, "learning_rate": 0.00011254507985574447, "loss": 0.1757, "step": 20370 }, { "epoch": 1.3124466697792734, "grad_norm": 0.15329915285110474, "learning_rate": 0.00011250214665979737, "loss": 0.1724, "step": 20380 }, { "epoch": 1.3130906573503132, "grad_norm": 0.14984937012195587, "learning_rate": 0.00011245921346385025, "loss": 0.1759, "step": 20390 }, { "epoch": 1.313734644921353, "grad_norm": 0.17333190143108368, "learning_rate": 0.00011241628026790314, "loss": 0.172, "step": 20400 }, { "epoch": 1.314378632492393, "grad_norm": 0.12303344160318375, "learning_rate": 0.00011237334707195603, "loss": 0.1699, "step": 20410 }, { "epoch": 1.3150226200634327, "grad_norm": 0.14971397817134857, "learning_rate": 0.00011233041387600893, "loss": 0.1758, "step": 20420 }, { "epoch": 1.3156666076344727, "grad_norm": 0.11230733245611191, "learning_rate": 0.00011228748068006183, "loss": 0.1723, "step": 20430 }, { "epoch": 1.3163105952055125, "grad_norm": 0.13522161543369293, "learning_rate": 0.00011224454748411473, "loss": 0.1785, "step": 20440 }, { "epoch": 1.3169545827765523, "grad_norm": 0.14010478556156158, "learning_rate": 0.00011220161428816762, "loss": 0.172, "step": 20450 }, { "epoch": 1.3175985703475923, "grad_norm": 0.13418914377689362, "learning_rate": 0.00011215868109222052, "loss": 0.1704, "step": 20460 }, { "epoch": 1.318242557918632, "grad_norm": 0.15884064137935638, "learning_rate": 0.00011211574789627339, "loss": 0.1791, "step": 20470 }, { "epoch": 1.318886545489672, "grad_norm": 0.14091013371944427, "learning_rate": 0.00011207281470032629, "loss": 0.1803, "step": 20480 }, { "epoch": 1.3195305330607119, "grad_norm": 0.13499897718429565, "learning_rate": 0.00011202988150437918, "loss": 0.1825, "step": 20490 }, { "epoch": 1.3201745206317517, "grad_norm": 0.12211118638515472, "learning_rate": 0.00011198694830843209, "loss": 0.1716, "step": 20500 }, { "epoch": 1.3208185082027917, "grad_norm": 0.12358514219522476, "learning_rate": 0.00011194401511248498, "loss": 0.1637, "step": 20510 }, { "epoch": 1.3214624957738317, "grad_norm": 0.1475144773721695, "learning_rate": 0.00011190108191653788, "loss": 0.1718, "step": 20520 }, { "epoch": 1.3221064833448715, "grad_norm": 0.13161666691303253, "learning_rate": 0.00011185814872059077, "loss": 0.1763, "step": 20530 }, { "epoch": 1.3227504709159112, "grad_norm": 0.13805577158927917, "learning_rate": 0.00011181521552464366, "loss": 0.1787, "step": 20540 }, { "epoch": 1.3233944584869513, "grad_norm": 0.17987588047981262, "learning_rate": 0.00011177228232869654, "loss": 0.1691, "step": 20550 }, { "epoch": 1.324038446057991, "grad_norm": 0.1554185450077057, "learning_rate": 0.00011172934913274944, "loss": 0.174, "step": 20560 }, { "epoch": 1.324682433629031, "grad_norm": 0.1340741068124771, "learning_rate": 0.00011168641593680234, "loss": 0.1722, "step": 20570 }, { "epoch": 1.3253264212000708, "grad_norm": 0.14371734857559204, "learning_rate": 0.00011164348274085523, "loss": 0.1789, "step": 20580 }, { "epoch": 1.3259704087711106, "grad_norm": 0.127070814371109, "learning_rate": 0.00011160054954490813, "loss": 0.1692, "step": 20590 }, { "epoch": 1.3266143963421506, "grad_norm": 0.14094701409339905, "learning_rate": 0.00011155761634896102, "loss": 0.1725, "step": 20600 }, { "epoch": 1.3272583839131904, "grad_norm": 0.15470580756664276, "learning_rate": 0.00011151468315301393, "loss": 0.1749, "step": 20610 }, { "epoch": 1.3279023714842304, "grad_norm": 0.14113537967205048, "learning_rate": 0.00011147174995706682, "loss": 0.1789, "step": 20620 }, { "epoch": 1.3285463590552702, "grad_norm": 0.16336098313331604, "learning_rate": 0.00011142881676111969, "loss": 0.1747, "step": 20630 }, { "epoch": 1.3291903466263102, "grad_norm": 0.1453891098499298, "learning_rate": 0.00011138588356517258, "loss": 0.1758, "step": 20640 }, { "epoch": 1.32983433419735, "grad_norm": 0.12860417366027832, "learning_rate": 0.00011134295036922549, "loss": 0.1771, "step": 20650 }, { "epoch": 1.33047832176839, "grad_norm": 0.13660341501235962, "learning_rate": 0.00011130001717327838, "loss": 0.1762, "step": 20660 }, { "epoch": 1.3311223093394298, "grad_norm": 0.14824225008487701, "learning_rate": 0.00011125708397733128, "loss": 0.172, "step": 20670 }, { "epoch": 1.3317662969104695, "grad_norm": 0.12904125452041626, "learning_rate": 0.00011121415078138417, "loss": 0.1778, "step": 20680 }, { "epoch": 1.3324102844815096, "grad_norm": 0.12075378745794296, "learning_rate": 0.00011117121758543706, "loss": 0.1659, "step": 20690 }, { "epoch": 1.3330542720525493, "grad_norm": 0.1332232505083084, "learning_rate": 0.00011112828438948997, "loss": 0.178, "step": 20700 }, { "epoch": 1.3336982596235893, "grad_norm": 0.15102632343769073, "learning_rate": 0.00011108535119354286, "loss": 0.1698, "step": 20710 }, { "epoch": 1.3343422471946291, "grad_norm": 0.13376551866531372, "learning_rate": 0.00011104241799759574, "loss": 0.1739, "step": 20720 }, { "epoch": 1.334986234765669, "grad_norm": 0.12406788021326065, "learning_rate": 0.00011099948480164864, "loss": 0.182, "step": 20730 }, { "epoch": 1.335630222336709, "grad_norm": 0.15591196715831757, "learning_rate": 0.00011095655160570153, "loss": 0.1798, "step": 20740 }, { "epoch": 1.3362742099077487, "grad_norm": 0.1425260603427887, "learning_rate": 0.00011091361840975442, "loss": 0.1749, "step": 20750 }, { "epoch": 1.3369181974787887, "grad_norm": 0.13813915848731995, "learning_rate": 0.00011087068521380733, "loss": 0.1799, "step": 20760 }, { "epoch": 1.3375621850498285, "grad_norm": 0.14136448502540588, "learning_rate": 0.00011082775201786022, "loss": 0.1793, "step": 20770 }, { "epoch": 1.3382061726208685, "grad_norm": 0.13096201419830322, "learning_rate": 0.00011078481882191312, "loss": 0.171, "step": 20780 }, { "epoch": 1.3388501601919083, "grad_norm": 0.15143609046936035, "learning_rate": 0.00011074188562596601, "loss": 0.1705, "step": 20790 }, { "epoch": 1.3394941477629483, "grad_norm": 0.12729062139987946, "learning_rate": 0.00011069895243001889, "loss": 0.1861, "step": 20800 }, { "epoch": 1.340138135333988, "grad_norm": 0.1425873339176178, "learning_rate": 0.00011065601923407178, "loss": 0.1754, "step": 20810 }, { "epoch": 1.3407821229050279, "grad_norm": 0.1325571984052658, "learning_rate": 0.00011061308603812468, "loss": 0.1729, "step": 20820 }, { "epoch": 1.3414261104760679, "grad_norm": 0.14008310437202454, "learning_rate": 0.00011057015284217757, "loss": 0.173, "step": 20830 }, { "epoch": 1.3420700980471076, "grad_norm": 0.1534469723701477, "learning_rate": 0.00011052721964623048, "loss": 0.1717, "step": 20840 }, { "epoch": 1.3427140856181476, "grad_norm": 0.13398049771785736, "learning_rate": 0.00011048428645028337, "loss": 0.1811, "step": 20850 }, { "epoch": 1.3433580731891874, "grad_norm": 0.1551012545824051, "learning_rate": 0.00011044135325433626, "loss": 0.1763, "step": 20860 }, { "epoch": 1.3440020607602272, "grad_norm": 0.16058099269866943, "learning_rate": 0.00011039842005838916, "loss": 0.181, "step": 20870 }, { "epoch": 1.3446460483312672, "grad_norm": 0.1329493373632431, "learning_rate": 0.00011035548686244204, "loss": 0.186, "step": 20880 }, { "epoch": 1.3452900359023072, "grad_norm": 0.13138671219348907, "learning_rate": 0.00011031255366649493, "loss": 0.1686, "step": 20890 }, { "epoch": 1.345934023473347, "grad_norm": 0.16065897047519684, "learning_rate": 0.00011026962047054784, "loss": 0.176, "step": 20900 }, { "epoch": 1.3465780110443868, "grad_norm": 0.1283680498600006, "learning_rate": 0.00011022668727460073, "loss": 0.1731, "step": 20910 }, { "epoch": 1.3472219986154268, "grad_norm": 0.14649420976638794, "learning_rate": 0.00011018375407865362, "loss": 0.1797, "step": 20920 }, { "epoch": 1.3478659861864666, "grad_norm": 0.14636649191379547, "learning_rate": 0.00011014082088270652, "loss": 0.1846, "step": 20930 }, { "epoch": 1.3485099737575066, "grad_norm": 0.13960200548171997, "learning_rate": 0.00011009788768675941, "loss": 0.1701, "step": 20940 }, { "epoch": 1.3491539613285464, "grad_norm": 0.13083666563034058, "learning_rate": 0.00011005495449081232, "loss": 0.1756, "step": 20950 }, { "epoch": 1.3497979488995862, "grad_norm": 0.13722240924835205, "learning_rate": 0.00011001202129486518, "loss": 0.1685, "step": 20960 }, { "epoch": 1.3504419364706262, "grad_norm": 0.14851321280002594, "learning_rate": 0.00010996908809891808, "loss": 0.1755, "step": 20970 }, { "epoch": 1.351085924041666, "grad_norm": 0.14638429880142212, "learning_rate": 0.00010992615490297097, "loss": 0.1793, "step": 20980 }, { "epoch": 1.351729911612706, "grad_norm": 0.13116149604320526, "learning_rate": 0.00010988322170702388, "loss": 0.1712, "step": 20990 }, { "epoch": 1.3523738991837457, "grad_norm": 0.1468837410211563, "learning_rate": 0.00010984028851107677, "loss": 0.1753, "step": 21000 }, { "epoch": 1.3530178867547855, "grad_norm": 0.12397810071706772, "learning_rate": 0.00010979735531512967, "loss": 0.1761, "step": 21010 }, { "epoch": 1.3536618743258255, "grad_norm": 0.1404937356710434, "learning_rate": 0.00010975442211918256, "loss": 0.1729, "step": 21020 }, { "epoch": 1.3543058618968655, "grad_norm": 0.18299123644828796, "learning_rate": 0.00010971148892323546, "loss": 0.18, "step": 21030 }, { "epoch": 1.3549498494679053, "grad_norm": 0.15267451107501984, "learning_rate": 0.00010966855572728833, "loss": 0.1786, "step": 21040 }, { "epoch": 1.355593837038945, "grad_norm": 0.1483820527791977, "learning_rate": 0.00010962562253134124, "loss": 0.1776, "step": 21050 }, { "epoch": 1.356237824609985, "grad_norm": 0.14821314811706543, "learning_rate": 0.00010958268933539413, "loss": 0.173, "step": 21060 }, { "epoch": 1.3568818121810249, "grad_norm": 0.13402746617794037, "learning_rate": 0.00010953975613944703, "loss": 0.1714, "step": 21070 }, { "epoch": 1.3575257997520649, "grad_norm": 0.1293080449104309, "learning_rate": 0.00010949682294349992, "loss": 0.182, "step": 21080 }, { "epoch": 1.3581697873231047, "grad_norm": 0.14323876798152924, "learning_rate": 0.00010945388974755281, "loss": 0.1863, "step": 21090 }, { "epoch": 1.3588137748941445, "grad_norm": 0.13726995885372162, "learning_rate": 0.00010941095655160572, "loss": 0.1761, "step": 21100 }, { "epoch": 1.3594577624651845, "grad_norm": 0.15528661012649536, "learning_rate": 0.00010936802335565861, "loss": 0.1761, "step": 21110 }, { "epoch": 1.3601017500362242, "grad_norm": 0.14628401398658752, "learning_rate": 0.00010932509015971148, "loss": 0.1807, "step": 21120 }, { "epoch": 1.3607457376072642, "grad_norm": 0.14018264412879944, "learning_rate": 0.00010928215696376438, "loss": 0.174, "step": 21130 }, { "epoch": 1.361389725178304, "grad_norm": 0.1330811232328415, "learning_rate": 0.00010923922376781728, "loss": 0.1776, "step": 21140 }, { "epoch": 1.3620337127493438, "grad_norm": 0.13930338621139526, "learning_rate": 0.00010919629057187017, "loss": 0.1773, "step": 21150 }, { "epoch": 1.3626777003203838, "grad_norm": 0.12638823688030243, "learning_rate": 0.00010915335737592307, "loss": 0.167, "step": 21160 }, { "epoch": 1.3633216878914238, "grad_norm": 0.1317758709192276, "learning_rate": 0.00010911042417997597, "loss": 0.1768, "step": 21170 }, { "epoch": 1.3639656754624636, "grad_norm": 0.12704814970493317, "learning_rate": 0.00010906749098402887, "loss": 0.1788, "step": 21180 }, { "epoch": 1.3646096630335034, "grad_norm": 0.12508323788642883, "learning_rate": 0.00010902455778808176, "loss": 0.1685, "step": 21190 }, { "epoch": 1.3652536506045434, "grad_norm": 0.14901018142700195, "learning_rate": 0.00010898162459213464, "loss": 0.1714, "step": 21200 }, { "epoch": 1.3658976381755832, "grad_norm": 0.1575509011745453, "learning_rate": 0.00010893869139618753, "loss": 0.1786, "step": 21210 }, { "epoch": 1.3665416257466232, "grad_norm": 0.13878098130226135, "learning_rate": 0.00010889575820024043, "loss": 0.1759, "step": 21220 }, { "epoch": 1.367185613317663, "grad_norm": 0.13535167276859283, "learning_rate": 0.00010885282500429332, "loss": 0.1728, "step": 21230 }, { "epoch": 1.3678296008887028, "grad_norm": 0.14773808419704437, "learning_rate": 0.00010880989180834623, "loss": 0.1753, "step": 21240 }, { "epoch": 1.3684735884597428, "grad_norm": 0.17106880247592926, "learning_rate": 0.00010876695861239912, "loss": 0.1777, "step": 21250 }, { "epoch": 1.3691175760307825, "grad_norm": 0.12939783930778503, "learning_rate": 0.00010872402541645201, "loss": 0.1736, "step": 21260 }, { "epoch": 1.3697615636018226, "grad_norm": 0.13192783296108246, "learning_rate": 0.00010868109222050491, "loss": 0.1725, "step": 21270 }, { "epoch": 1.3704055511728623, "grad_norm": 0.12714265286922455, "learning_rate": 0.00010863815902455779, "loss": 0.1726, "step": 21280 }, { "epoch": 1.3710495387439021, "grad_norm": 0.13666413724422455, "learning_rate": 0.00010859522582861068, "loss": 0.1672, "step": 21290 }, { "epoch": 1.3716935263149421, "grad_norm": 0.13130523264408112, "learning_rate": 0.00010855229263266357, "loss": 0.1743, "step": 21300 }, { "epoch": 1.3723375138859821, "grad_norm": 0.11934766918420792, "learning_rate": 0.00010850935943671648, "loss": 0.1805, "step": 21310 }, { "epoch": 1.372981501457022, "grad_norm": 0.1306128352880478, "learning_rate": 0.00010846642624076937, "loss": 0.1816, "step": 21320 }, { "epoch": 1.3736254890280617, "grad_norm": 0.14955338835716248, "learning_rate": 0.00010842349304482227, "loss": 0.1758, "step": 21330 }, { "epoch": 1.3742694765991017, "grad_norm": 0.14985793828964233, "learning_rate": 0.00010838055984887516, "loss": 0.1712, "step": 21340 }, { "epoch": 1.3749134641701415, "grad_norm": 0.15089233219623566, "learning_rate": 0.00010833762665292807, "loss": 0.1805, "step": 21350 }, { "epoch": 1.3755574517411815, "grad_norm": 0.145599365234375, "learning_rate": 0.00010829469345698093, "loss": 0.1732, "step": 21360 }, { "epoch": 1.3762014393122213, "grad_norm": 0.12449701875448227, "learning_rate": 0.00010825176026103383, "loss": 0.1755, "step": 21370 }, { "epoch": 1.376845426883261, "grad_norm": 0.12971866130828857, "learning_rate": 0.00010820882706508672, "loss": 0.1749, "step": 21380 }, { "epoch": 1.377489414454301, "grad_norm": 0.132247656583786, "learning_rate": 0.00010816589386913963, "loss": 0.1733, "step": 21390 }, { "epoch": 1.3781334020253408, "grad_norm": 0.16557440161705017, "learning_rate": 0.00010812296067319252, "loss": 0.1694, "step": 21400 }, { "epoch": 1.3787773895963809, "grad_norm": 0.12731848657131195, "learning_rate": 0.00010808002747724541, "loss": 0.1715, "step": 21410 }, { "epoch": 1.3794213771674206, "grad_norm": 0.17688941955566406, "learning_rate": 0.00010803709428129831, "loss": 0.166, "step": 21420 }, { "epoch": 1.3800653647384604, "grad_norm": 0.15868067741394043, "learning_rate": 0.0001079941610853512, "loss": 0.1787, "step": 21430 }, { "epoch": 1.3807093523095004, "grad_norm": 0.1521763801574707, "learning_rate": 0.00010795122788940408, "loss": 0.1855, "step": 21440 }, { "epoch": 1.3813533398805404, "grad_norm": 0.129471555352211, "learning_rate": 0.00010790829469345699, "loss": 0.1754, "step": 21450 }, { "epoch": 1.3819973274515802, "grad_norm": 0.14299635589122772, "learning_rate": 0.00010786536149750988, "loss": 0.1705, "step": 21460 }, { "epoch": 1.38264131502262, "grad_norm": 0.13622267544269562, "learning_rate": 0.00010782242830156277, "loss": 0.1691, "step": 21470 }, { "epoch": 1.38328530259366, "grad_norm": 0.13742002844810486, "learning_rate": 0.00010777949510561567, "loss": 0.1843, "step": 21480 }, { "epoch": 1.3839292901646998, "grad_norm": 0.15910208225250244, "learning_rate": 0.00010773656190966856, "loss": 0.1741, "step": 21490 }, { "epoch": 1.3845732777357398, "grad_norm": 0.1547326296567917, "learning_rate": 0.00010769362871372147, "loss": 0.1759, "step": 21500 }, { "epoch": 1.3852172653067796, "grad_norm": 0.13524247705936432, "learning_rate": 0.00010765069551777436, "loss": 0.1783, "step": 21510 }, { "epoch": 1.3858612528778194, "grad_norm": 0.1506098508834839, "learning_rate": 0.00010760776232182723, "loss": 0.175, "step": 21520 }, { "epoch": 1.3865052404488594, "grad_norm": 0.1440984457731247, "learning_rate": 0.00010756482912588012, "loss": 0.1696, "step": 21530 }, { "epoch": 1.3871492280198991, "grad_norm": 0.14616551995277405, "learning_rate": 0.00010752189592993303, "loss": 0.1718, "step": 21540 }, { "epoch": 1.3877932155909392, "grad_norm": 0.15154846012592316, "learning_rate": 0.00010747896273398592, "loss": 0.1771, "step": 21550 }, { "epoch": 1.388437203161979, "grad_norm": 0.14470849931240082, "learning_rate": 0.00010743602953803882, "loss": 0.1674, "step": 21560 }, { "epoch": 1.3890811907330187, "grad_norm": 0.13451240956783295, "learning_rate": 0.00010739309634209171, "loss": 0.1732, "step": 21570 }, { "epoch": 1.3897251783040587, "grad_norm": 0.137560173869133, "learning_rate": 0.0001073501631461446, "loss": 0.1809, "step": 21580 }, { "epoch": 1.3903691658750987, "grad_norm": 0.15711739659309387, "learning_rate": 0.00010730722995019751, "loss": 0.1716, "step": 21590 }, { "epoch": 1.3910131534461385, "grad_norm": 0.14549840986728668, "learning_rate": 0.00010726429675425039, "loss": 0.1724, "step": 21600 }, { "epoch": 1.3916571410171783, "grad_norm": 0.15562918782234192, "learning_rate": 0.00010722136355830328, "loss": 0.1659, "step": 21610 }, { "epoch": 1.3923011285882183, "grad_norm": 0.13151396811008453, "learning_rate": 0.00010717843036235618, "loss": 0.1719, "step": 21620 }, { "epoch": 1.392945116159258, "grad_norm": 0.12914690375328064, "learning_rate": 0.00010713549716640907, "loss": 0.1746, "step": 21630 }, { "epoch": 1.393589103730298, "grad_norm": 0.13477469980716705, "learning_rate": 0.00010709256397046196, "loss": 0.1737, "step": 21640 }, { "epoch": 1.3942330913013379, "grad_norm": 0.13995036482810974, "learning_rate": 0.00010704963077451487, "loss": 0.1732, "step": 21650 }, { "epoch": 1.3948770788723777, "grad_norm": 0.13713234663009644, "learning_rate": 0.00010700669757856776, "loss": 0.1725, "step": 21660 }, { "epoch": 1.3955210664434177, "grad_norm": 0.12948839366436005, "learning_rate": 0.00010696376438262066, "loss": 0.1756, "step": 21670 }, { "epoch": 1.3961650540144575, "grad_norm": 0.1363738626241684, "learning_rate": 0.00010692083118667352, "loss": 0.1701, "step": 21680 }, { "epoch": 1.3968090415854975, "grad_norm": 0.1308399736881256, "learning_rate": 0.00010687789799072643, "loss": 0.1751, "step": 21690 }, { "epoch": 1.3974530291565372, "grad_norm": 0.12659184634685516, "learning_rate": 0.00010683496479477932, "loss": 0.1715, "step": 21700 }, { "epoch": 1.398097016727577, "grad_norm": 0.1450807899236679, "learning_rate": 0.00010679203159883222, "loss": 0.1792, "step": 21710 }, { "epoch": 1.398741004298617, "grad_norm": 0.12365124374628067, "learning_rate": 0.00010674909840288511, "loss": 0.1687, "step": 21720 }, { "epoch": 1.399384991869657, "grad_norm": 0.13264067471027374, "learning_rate": 0.00010670616520693802, "loss": 0.17, "step": 21730 }, { "epoch": 1.4000289794406968, "grad_norm": 0.13364511728286743, "learning_rate": 0.00010666323201099091, "loss": 0.1774, "step": 21740 }, { "epoch": 1.4006729670117366, "grad_norm": 0.12786754965782166, "learning_rate": 0.0001066202988150438, "loss": 0.1762, "step": 21750 }, { "epoch": 1.4013169545827766, "grad_norm": 0.12153439968824387, "learning_rate": 0.00010657736561909668, "loss": 0.1771, "step": 21760 }, { "epoch": 1.4019609421538164, "grad_norm": 0.1386013627052307, "learning_rate": 0.00010653443242314958, "loss": 0.1758, "step": 21770 }, { "epoch": 1.4026049297248564, "grad_norm": 0.14441531896591187, "learning_rate": 0.00010649149922720247, "loss": 0.179, "step": 21780 }, { "epoch": 1.4032489172958962, "grad_norm": 0.13344019651412964, "learning_rate": 0.00010644856603125538, "loss": 0.1731, "step": 21790 }, { "epoch": 1.403892904866936, "grad_norm": 0.128960981965065, "learning_rate": 0.00010640563283530827, "loss": 0.169, "step": 21800 }, { "epoch": 1.404536892437976, "grad_norm": 0.12239548563957214, "learning_rate": 0.00010636269963936116, "loss": 0.1665, "step": 21810 }, { "epoch": 1.4051808800090158, "grad_norm": 0.1389811635017395, "learning_rate": 0.00010631976644341406, "loss": 0.1758, "step": 21820 }, { "epoch": 1.4058248675800558, "grad_norm": 0.13720756769180298, "learning_rate": 0.00010627683324746695, "loss": 0.1734, "step": 21830 }, { "epoch": 1.4064688551510955, "grad_norm": 0.14404058456420898, "learning_rate": 0.00010623390005151983, "loss": 0.1764, "step": 21840 }, { "epoch": 1.4071128427221353, "grad_norm": 0.12227972596883774, "learning_rate": 0.00010619096685557272, "loss": 0.1846, "step": 21850 }, { "epoch": 1.4077568302931753, "grad_norm": 0.12404496967792511, "learning_rate": 0.00010614803365962562, "loss": 0.1682, "step": 21860 }, { "epoch": 1.4084008178642153, "grad_norm": 0.12280487269163132, "learning_rate": 0.00010610510046367852, "loss": 0.1759, "step": 21870 }, { "epoch": 1.4090448054352551, "grad_norm": 0.13871361315250397, "learning_rate": 0.00010606216726773142, "loss": 0.1691, "step": 21880 }, { "epoch": 1.409688793006295, "grad_norm": 0.1262495070695877, "learning_rate": 0.00010601923407178431, "loss": 0.1638, "step": 21890 }, { "epoch": 1.410332780577335, "grad_norm": 0.13554011285305023, "learning_rate": 0.00010597630087583721, "loss": 0.171, "step": 21900 }, { "epoch": 1.4109767681483747, "grad_norm": 0.1477193832397461, "learning_rate": 0.0001059333676798901, "loss": 0.1823, "step": 21910 }, { "epoch": 1.4116207557194147, "grad_norm": 0.1328149288892746, "learning_rate": 0.00010589043448394298, "loss": 0.1716, "step": 21920 }, { "epoch": 1.4122647432904545, "grad_norm": 0.15345995128154755, "learning_rate": 0.00010584750128799587, "loss": 0.1698, "step": 21930 }, { "epoch": 1.4129087308614943, "grad_norm": 0.14685405790805817, "learning_rate": 0.00010580456809204878, "loss": 0.1731, "step": 21940 }, { "epoch": 1.4135527184325343, "grad_norm": 0.14620162546634674, "learning_rate": 0.00010576163489610167, "loss": 0.1774, "step": 21950 }, { "epoch": 1.414196706003574, "grad_norm": 0.13909366726875305, "learning_rate": 0.00010571870170015457, "loss": 0.1788, "step": 21960 }, { "epoch": 1.414840693574614, "grad_norm": 0.14105547964572906, "learning_rate": 0.00010567576850420746, "loss": 0.1778, "step": 21970 }, { "epoch": 1.4154846811456538, "grad_norm": 0.1329772174358368, "learning_rate": 0.00010563283530826035, "loss": 0.1701, "step": 21980 }, { "epoch": 1.4161286687166939, "grad_norm": 0.12026146799325943, "learning_rate": 0.00010558990211231326, "loss": 0.1705, "step": 21990 }, { "epoch": 1.4167726562877336, "grad_norm": 0.13498681783676147, "learning_rate": 0.00010554696891636613, "loss": 0.1768, "step": 22000 }, { "epoch": 1.4174166438587736, "grad_norm": 0.14090515673160553, "learning_rate": 0.00010550403572041903, "loss": 0.1772, "step": 22010 }, { "epoch": 1.4180606314298134, "grad_norm": 0.14212819933891296, "learning_rate": 0.00010546110252447192, "loss": 0.1718, "step": 22020 }, { "epoch": 1.4187046190008532, "grad_norm": 0.20372627675533295, "learning_rate": 0.00010541816932852482, "loss": 0.1804, "step": 22030 }, { "epoch": 1.4193486065718932, "grad_norm": 0.14791348576545715, "learning_rate": 0.00010537523613257771, "loss": 0.1719, "step": 22040 }, { "epoch": 1.419992594142933, "grad_norm": 0.13724198937416077, "learning_rate": 0.00010533230293663062, "loss": 0.1774, "step": 22050 }, { "epoch": 1.420636581713973, "grad_norm": 0.1387111395597458, "learning_rate": 0.0001052893697406835, "loss": 0.1719, "step": 22060 }, { "epoch": 1.4212805692850128, "grad_norm": 0.13465355336666107, "learning_rate": 0.00010524643654473641, "loss": 0.1712, "step": 22070 }, { "epoch": 1.4219245568560526, "grad_norm": 0.12430483847856522, "learning_rate": 0.00010520350334878927, "loss": 0.18, "step": 22080 }, { "epoch": 1.4225685444270926, "grad_norm": 0.1450575888156891, "learning_rate": 0.00010516057015284218, "loss": 0.1757, "step": 22090 }, { "epoch": 1.4232125319981324, "grad_norm": 0.1507108062505722, "learning_rate": 0.00010511763695689507, "loss": 0.1799, "step": 22100 }, { "epoch": 1.4238565195691724, "grad_norm": 0.1213056892156601, "learning_rate": 0.00010507470376094797, "loss": 0.1713, "step": 22110 }, { "epoch": 1.4245005071402121, "grad_norm": 0.13139133155345917, "learning_rate": 0.00010503177056500086, "loss": 0.1743, "step": 22120 }, { "epoch": 1.4251444947112522, "grad_norm": 0.12878373265266418, "learning_rate": 0.00010498883736905375, "loss": 0.1691, "step": 22130 }, { "epoch": 1.425788482282292, "grad_norm": 0.14033865928649902, "learning_rate": 0.00010494590417310666, "loss": 0.1735, "step": 22140 }, { "epoch": 1.426432469853332, "grad_norm": 0.13297012448310852, "learning_rate": 0.00010490297097715955, "loss": 0.173, "step": 22150 }, { "epoch": 1.4270764574243717, "grad_norm": 0.13421505689620972, "learning_rate": 0.00010486003778121243, "loss": 0.167, "step": 22160 }, { "epoch": 1.4277204449954115, "grad_norm": 0.13928964734077454, "learning_rate": 0.00010481710458526533, "loss": 0.1746, "step": 22170 }, { "epoch": 1.4283644325664515, "grad_norm": 0.1402827352285385, "learning_rate": 0.00010477417138931822, "loss": 0.1764, "step": 22180 }, { "epoch": 1.4290084201374913, "grad_norm": 0.1363271176815033, "learning_rate": 0.00010473123819337111, "loss": 0.1766, "step": 22190 }, { "epoch": 1.4296524077085313, "grad_norm": 0.14554378390312195, "learning_rate": 0.00010468830499742402, "loss": 0.1676, "step": 22200 }, { "epoch": 1.430296395279571, "grad_norm": 0.13453231751918793, "learning_rate": 0.0001046453718014769, "loss": 0.1712, "step": 22210 }, { "epoch": 1.4309403828506109, "grad_norm": 0.13556571304798126, "learning_rate": 0.00010460243860552981, "loss": 0.1717, "step": 22220 }, { "epoch": 1.4315843704216509, "grad_norm": 0.12859566509723663, "learning_rate": 0.0001045595054095827, "loss": 0.172, "step": 22230 }, { "epoch": 1.4322283579926907, "grad_norm": 0.15196794271469116, "learning_rate": 0.00010451657221363558, "loss": 0.1715, "step": 22240 }, { "epoch": 1.4328723455637307, "grad_norm": 0.14925283193588257, "learning_rate": 0.00010447363901768847, "loss": 0.17, "step": 22250 }, { "epoch": 1.4335163331347704, "grad_norm": 0.14211736619472504, "learning_rate": 0.00010443070582174137, "loss": 0.174, "step": 22260 }, { "epoch": 1.4341603207058105, "grad_norm": 0.14383816719055176, "learning_rate": 0.00010438777262579426, "loss": 0.1752, "step": 22270 }, { "epoch": 1.4348043082768502, "grad_norm": 0.14473141729831696, "learning_rate": 0.00010434483942984717, "loss": 0.1788, "step": 22280 }, { "epoch": 1.4354482958478902, "grad_norm": 0.14754506945610046, "learning_rate": 0.00010430190623390006, "loss": 0.1769, "step": 22290 }, { "epoch": 1.43609228341893, "grad_norm": 0.14411430060863495, "learning_rate": 0.00010425897303795295, "loss": 0.1826, "step": 22300 }, { "epoch": 1.4367362709899698, "grad_norm": 0.14363664388656616, "learning_rate": 0.00010421603984200585, "loss": 0.1749, "step": 22310 }, { "epoch": 1.4373802585610098, "grad_norm": 0.13410556316375732, "learning_rate": 0.00010417310664605873, "loss": 0.1698, "step": 22320 }, { "epoch": 1.4380242461320496, "grad_norm": 0.15263986587524414, "learning_rate": 0.00010413017345011162, "loss": 0.1823, "step": 22330 }, { "epoch": 1.4386682337030896, "grad_norm": 0.1367017924785614, "learning_rate": 0.00010408724025416453, "loss": 0.1743, "step": 22340 }, { "epoch": 1.4393122212741294, "grad_norm": 0.1458776742219925, "learning_rate": 0.00010404430705821742, "loss": 0.166, "step": 22350 }, { "epoch": 1.4399562088451692, "grad_norm": 0.11343254894018173, "learning_rate": 0.00010400137386227031, "loss": 0.1642, "step": 22360 }, { "epoch": 1.4406001964162092, "grad_norm": 0.14436371624469757, "learning_rate": 0.00010395844066632321, "loss": 0.1817, "step": 22370 }, { "epoch": 1.4412441839872492, "grad_norm": 0.1352306455373764, "learning_rate": 0.0001039155074703761, "loss": 0.17, "step": 22380 }, { "epoch": 1.441888171558289, "grad_norm": 0.1213482916355133, "learning_rate": 0.000103872574274429, "loss": 0.174, "step": 22390 }, { "epoch": 1.4425321591293288, "grad_norm": 0.16524606943130493, "learning_rate": 0.00010382964107848187, "loss": 0.1822, "step": 22400 }, { "epoch": 1.4431761467003688, "grad_norm": 0.13580100238323212, "learning_rate": 0.00010378670788253477, "loss": 0.1798, "step": 22410 }, { "epoch": 1.4438201342714085, "grad_norm": 0.11998768895864487, "learning_rate": 0.00010374377468658766, "loss": 0.1708, "step": 22420 }, { "epoch": 1.4444641218424485, "grad_norm": 0.13616876304149628, "learning_rate": 0.00010370084149064057, "loss": 0.1782, "step": 22430 }, { "epoch": 1.4451081094134883, "grad_norm": 0.16703343391418457, "learning_rate": 0.00010365790829469346, "loss": 0.1814, "step": 22440 }, { "epoch": 1.4457520969845281, "grad_norm": 0.14493831992149353, "learning_rate": 0.00010361497509874636, "loss": 0.172, "step": 22450 }, { "epoch": 1.4463960845555681, "grad_norm": 0.12981119751930237, "learning_rate": 0.00010357204190279925, "loss": 0.1735, "step": 22460 }, { "epoch": 1.447040072126608, "grad_norm": 0.16380415856838226, "learning_rate": 0.00010352910870685215, "loss": 0.1765, "step": 22470 }, { "epoch": 1.447684059697648, "grad_norm": 0.14486274123191833, "learning_rate": 0.00010348617551090502, "loss": 0.1747, "step": 22480 }, { "epoch": 1.4483280472686877, "grad_norm": 0.15282420814037323, "learning_rate": 0.00010344324231495793, "loss": 0.1736, "step": 22490 }, { "epoch": 1.4489720348397275, "grad_norm": 0.13563449680805206, "learning_rate": 0.00010340030911901082, "loss": 0.1662, "step": 22500 }, { "epoch": 1.4496160224107675, "grad_norm": 0.13647682964801788, "learning_rate": 0.00010335737592306372, "loss": 0.1871, "step": 22510 }, { "epoch": 1.4502600099818075, "grad_norm": 0.1469939798116684, "learning_rate": 0.00010331444272711661, "loss": 0.1762, "step": 22520 }, { "epoch": 1.4509039975528473, "grad_norm": 0.12818343937397003, "learning_rate": 0.0001032715095311695, "loss": 0.1753, "step": 22530 }, { "epoch": 1.451547985123887, "grad_norm": 0.138153538107872, "learning_rate": 0.00010322857633522241, "loss": 0.1729, "step": 22540 }, { "epoch": 1.452191972694927, "grad_norm": 0.16232316195964813, "learning_rate": 0.0001031856431392753, "loss": 0.1754, "step": 22550 }, { "epoch": 1.4528359602659668, "grad_norm": 0.14305409789085388, "learning_rate": 0.0001031427099433282, "loss": 0.173, "step": 22560 }, { "epoch": 1.4534799478370068, "grad_norm": 0.11697295308113098, "learning_rate": 0.00010309977674738107, "loss": 0.1715, "step": 22570 }, { "epoch": 1.4541239354080466, "grad_norm": 0.1375856101512909, "learning_rate": 0.00010305684355143397, "loss": 0.1692, "step": 22580 }, { "epoch": 1.4547679229790864, "grad_norm": 0.1386508196592331, "learning_rate": 0.00010301391035548686, "loss": 0.1719, "step": 22590 }, { "epoch": 1.4554119105501264, "grad_norm": 0.12702950835227966, "learning_rate": 0.00010297097715953976, "loss": 0.177, "step": 22600 }, { "epoch": 1.4560558981211662, "grad_norm": 0.13662442564964294, "learning_rate": 0.00010292804396359266, "loss": 0.1727, "step": 22610 }, { "epoch": 1.4566998856922062, "grad_norm": 0.13229599595069885, "learning_rate": 0.00010288511076764556, "loss": 0.1733, "step": 22620 }, { "epoch": 1.457343873263246, "grad_norm": 0.14539258182048798, "learning_rate": 0.00010284217757169845, "loss": 0.1761, "step": 22630 }, { "epoch": 1.4579878608342858, "grad_norm": 0.14178335666656494, "learning_rate": 0.00010279924437575134, "loss": 0.1806, "step": 22640 }, { "epoch": 1.4586318484053258, "grad_norm": 0.129881352186203, "learning_rate": 0.00010275631117980422, "loss": 0.1729, "step": 22650 }, { "epoch": 1.4592758359763658, "grad_norm": 0.14153389632701874, "learning_rate": 0.00010271337798385712, "loss": 0.1694, "step": 22660 }, { "epoch": 1.4599198235474056, "grad_norm": 0.14133328199386597, "learning_rate": 0.00010267044478791001, "loss": 0.183, "step": 22670 }, { "epoch": 1.4605638111184454, "grad_norm": 0.12120213359594345, "learning_rate": 0.00010262751159196292, "loss": 0.1789, "step": 22680 }, { "epoch": 1.4612077986894854, "grad_norm": 0.1223507896065712, "learning_rate": 0.00010258457839601581, "loss": 0.1781, "step": 22690 }, { "epoch": 1.4618517862605251, "grad_norm": 0.13770470023155212, "learning_rate": 0.0001025416452000687, "loss": 0.1777, "step": 22700 }, { "epoch": 1.4624957738315651, "grad_norm": 0.1240755245089531, "learning_rate": 0.0001024987120041216, "loss": 0.1737, "step": 22710 }, { "epoch": 1.463139761402605, "grad_norm": 0.1395176649093628, "learning_rate": 0.00010245577880817449, "loss": 0.1767, "step": 22720 }, { "epoch": 1.4637837489736447, "grad_norm": 0.12383472919464111, "learning_rate": 0.00010241284561222737, "loss": 0.1761, "step": 22730 }, { "epoch": 1.4644277365446847, "grad_norm": 0.1677873283624649, "learning_rate": 0.00010236991241628026, "loss": 0.1771, "step": 22740 }, { "epoch": 1.4650717241157245, "grad_norm": 0.12909606099128723, "learning_rate": 0.00010232697922033317, "loss": 0.1703, "step": 22750 }, { "epoch": 1.4657157116867645, "grad_norm": 0.12894940376281738, "learning_rate": 0.00010228404602438606, "loss": 0.1774, "step": 22760 }, { "epoch": 1.4663596992578043, "grad_norm": 0.12143992632627487, "learning_rate": 0.00010224111282843896, "loss": 0.1732, "step": 22770 }, { "epoch": 1.467003686828844, "grad_norm": 0.1417064219713211, "learning_rate": 0.00010219817963249185, "loss": 0.1791, "step": 22780 }, { "epoch": 1.467647674399884, "grad_norm": 0.13785669207572937, "learning_rate": 0.00010215524643654475, "loss": 0.175, "step": 22790 }, { "epoch": 1.468291661970924, "grad_norm": 0.1396806836128235, "learning_rate": 0.00010211231324059765, "loss": 0.1738, "step": 22800 }, { "epoch": 1.4689356495419639, "grad_norm": 0.12814146280288696, "learning_rate": 0.00010206938004465052, "loss": 0.1728, "step": 22810 }, { "epoch": 1.4695796371130037, "grad_norm": 0.13997586071491241, "learning_rate": 0.00010202644684870341, "loss": 0.1732, "step": 22820 }, { "epoch": 1.4702236246840437, "grad_norm": 0.15324616432189941, "learning_rate": 0.00010198351365275632, "loss": 0.1799, "step": 22830 }, { "epoch": 1.4708676122550834, "grad_norm": 0.13739179074764252, "learning_rate": 0.00010194058045680921, "loss": 0.1678, "step": 22840 }, { "epoch": 1.4715115998261235, "grad_norm": 0.14430201053619385, "learning_rate": 0.0001018976472608621, "loss": 0.1692, "step": 22850 }, { "epoch": 1.4721555873971632, "grad_norm": 0.11934373527765274, "learning_rate": 0.000101854714064915, "loss": 0.1717, "step": 22860 }, { "epoch": 1.472799574968203, "grad_norm": 0.12972724437713623, "learning_rate": 0.0001018117808689679, "loss": 0.1695, "step": 22870 }, { "epoch": 1.473443562539243, "grad_norm": 0.14279679954051971, "learning_rate": 0.0001017688476730208, "loss": 0.1732, "step": 22880 }, { "epoch": 1.4740875501102828, "grad_norm": 0.14487171173095703, "learning_rate": 0.00010172591447707368, "loss": 0.1785, "step": 22890 }, { "epoch": 1.4747315376813228, "grad_norm": 0.14311790466308594, "learning_rate": 0.00010168298128112657, "loss": 0.1814, "step": 22900 }, { "epoch": 1.4753755252523626, "grad_norm": 0.15164868533611298, "learning_rate": 0.00010164004808517946, "loss": 0.1769, "step": 22910 }, { "epoch": 1.4760195128234024, "grad_norm": 0.13043679296970367, "learning_rate": 0.00010159711488923236, "loss": 0.1691, "step": 22920 }, { "epoch": 1.4766635003944424, "grad_norm": 0.15517914295196533, "learning_rate": 0.00010155418169328525, "loss": 0.1773, "step": 22930 }, { "epoch": 1.4773074879654824, "grad_norm": 0.13633696734905243, "learning_rate": 0.00010151124849733816, "loss": 0.1787, "step": 22940 }, { "epoch": 1.4779514755365222, "grad_norm": 0.13635365664958954, "learning_rate": 0.00010146831530139105, "loss": 0.1716, "step": 22950 }, { "epoch": 1.478595463107562, "grad_norm": 0.1505585014820099, "learning_rate": 0.00010142538210544395, "loss": 0.1742, "step": 22960 }, { "epoch": 1.479239450678602, "grad_norm": 0.14207395911216736, "learning_rate": 0.00010138244890949681, "loss": 0.1797, "step": 22970 }, { "epoch": 1.4798834382496417, "grad_norm": 0.1384740024805069, "learning_rate": 0.00010133951571354972, "loss": 0.1671, "step": 22980 }, { "epoch": 1.4805274258206818, "grad_norm": 0.1351637840270996, "learning_rate": 0.00010129658251760261, "loss": 0.1701, "step": 22990 }, { "epoch": 1.4811714133917215, "grad_norm": 0.1641162484884262, "learning_rate": 0.00010125364932165551, "loss": 0.1728, "step": 23000 }, { "epoch": 1.4818154009627613, "grad_norm": 0.14035004377365112, "learning_rate": 0.0001012107161257084, "loss": 0.1788, "step": 23010 }, { "epoch": 1.4824593885338013, "grad_norm": 0.1233641505241394, "learning_rate": 0.0001011677829297613, "loss": 0.1687, "step": 23020 }, { "epoch": 1.483103376104841, "grad_norm": 0.1472138613462448, "learning_rate": 0.0001011248497338142, "loss": 0.1761, "step": 23030 }, { "epoch": 1.4837473636758811, "grad_norm": 0.131879523396492, "learning_rate": 0.00010108191653786709, "loss": 0.1764, "step": 23040 }, { "epoch": 1.484391351246921, "grad_norm": 0.13675987720489502, "learning_rate": 0.00010103898334191997, "loss": 0.1672, "step": 23050 }, { "epoch": 1.4850353388179607, "grad_norm": 0.14564049243927002, "learning_rate": 0.00010099605014597287, "loss": 0.1715, "step": 23060 }, { "epoch": 1.4856793263890007, "grad_norm": 0.11543440073728561, "learning_rate": 0.00010095311695002576, "loss": 0.1655, "step": 23070 }, { "epoch": 1.4863233139600407, "grad_norm": 0.1426287442445755, "learning_rate": 0.00010091018375407865, "loss": 0.1759, "step": 23080 }, { "epoch": 1.4869673015310805, "grad_norm": 0.11886084079742432, "learning_rate": 0.00010086725055813156, "loss": 0.1661, "step": 23090 }, { "epoch": 1.4876112891021203, "grad_norm": 0.15631666779518127, "learning_rate": 0.00010082431736218445, "loss": 0.1695, "step": 23100 }, { "epoch": 1.4882552766731603, "grad_norm": 0.13729508221149445, "learning_rate": 0.00010078138416623735, "loss": 0.1793, "step": 23110 }, { "epoch": 1.4888992642442, "grad_norm": 0.15370039641857147, "learning_rate": 0.00010073845097029024, "loss": 0.1735, "step": 23120 }, { "epoch": 1.48954325181524, "grad_norm": 0.14361721277236938, "learning_rate": 0.00010069551777434312, "loss": 0.1758, "step": 23130 }, { "epoch": 1.4901872393862798, "grad_norm": 0.1274462789297104, "learning_rate": 0.00010065258457839601, "loss": 0.1788, "step": 23140 }, { "epoch": 1.4908312269573196, "grad_norm": 0.158486470580101, "learning_rate": 0.00010060965138244891, "loss": 0.1731, "step": 23150 }, { "epoch": 1.4914752145283596, "grad_norm": 0.14983774721622467, "learning_rate": 0.0001005667181865018, "loss": 0.182, "step": 23160 }, { "epoch": 1.4921192020993994, "grad_norm": 0.1497897058725357, "learning_rate": 0.00010052378499055471, "loss": 0.176, "step": 23170 }, { "epoch": 1.4927631896704394, "grad_norm": 0.1399778425693512, "learning_rate": 0.0001004808517946076, "loss": 0.1779, "step": 23180 }, { "epoch": 1.4934071772414792, "grad_norm": 0.1359182894229889, "learning_rate": 0.00010043791859866049, "loss": 0.174, "step": 23190 }, { "epoch": 1.494051164812519, "grad_norm": 0.1379256397485733, "learning_rate": 0.0001003949854027134, "loss": 0.1771, "step": 23200 }, { "epoch": 1.494695152383559, "grad_norm": 0.1430729776620865, "learning_rate": 0.00010035205220676627, "loss": 0.1619, "step": 23210 }, { "epoch": 1.495339139954599, "grad_norm": 0.13854825496673584, "learning_rate": 0.00010030911901081916, "loss": 0.1748, "step": 23220 }, { "epoch": 1.4959831275256388, "grad_norm": 0.14188005030155182, "learning_rate": 0.00010026618581487207, "loss": 0.1713, "step": 23230 }, { "epoch": 1.4966271150966786, "grad_norm": 0.12608836591243744, "learning_rate": 0.00010022325261892496, "loss": 0.169, "step": 23240 }, { "epoch": 1.4972711026677186, "grad_norm": 0.15133747458457947, "learning_rate": 0.00010018031942297785, "loss": 0.1804, "step": 23250 }, { "epoch": 1.4979150902387584, "grad_norm": 0.13021817803382874, "learning_rate": 0.00010013738622703075, "loss": 0.171, "step": 23260 }, { "epoch": 1.4985590778097984, "grad_norm": 0.12953883409500122, "learning_rate": 0.00010009445303108364, "loss": 0.1697, "step": 23270 }, { "epoch": 1.4992030653808381, "grad_norm": 0.1272529810667038, "learning_rate": 0.00010005151983513655, "loss": 0.1711, "step": 23280 }, { "epoch": 1.499847052951878, "grad_norm": 0.13837623596191406, "learning_rate": 0.00010000858663918941, "loss": 0.1786, "step": 23290 }, { "epoch": 1.500491040522918, "grad_norm": 0.13566821813583374, "learning_rate": 9.996565344324233e-05, "loss": 0.1734, "step": 23300 }, { "epoch": 1.501135028093958, "grad_norm": 0.13320405781269073, "learning_rate": 9.99227202472952e-05, "loss": 0.1719, "step": 23310 }, { "epoch": 1.5017790156649977, "grad_norm": 0.15837016701698303, "learning_rate": 9.987978705134811e-05, "loss": 0.1743, "step": 23320 }, { "epoch": 1.5024230032360375, "grad_norm": 0.1399882584810257, "learning_rate": 9.9836853855401e-05, "loss": 0.1634, "step": 23330 }, { "epoch": 1.5030669908070773, "grad_norm": 0.14658567309379578, "learning_rate": 9.97939206594539e-05, "loss": 0.183, "step": 23340 }, { "epoch": 1.5037109783781173, "grad_norm": 0.1293664276599884, "learning_rate": 9.975098746350678e-05, "loss": 0.1844, "step": 23350 }, { "epoch": 1.5043549659491573, "grad_norm": 0.13270005583763123, "learning_rate": 9.970805426755969e-05, "loss": 0.171, "step": 23360 }, { "epoch": 1.504998953520197, "grad_norm": 0.14518563449382782, "learning_rate": 9.966512107161258e-05, "loss": 0.1751, "step": 23370 }, { "epoch": 1.5056429410912369, "grad_norm": 0.14065948128700256, "learning_rate": 9.962218787566547e-05, "loss": 0.1791, "step": 23380 }, { "epoch": 1.5062869286622769, "grad_norm": 0.14226451516151428, "learning_rate": 9.957925467971836e-05, "loss": 0.1749, "step": 23390 }, { "epoch": 1.5069309162333167, "grad_norm": 0.13914074003696442, "learning_rate": 9.953632148377126e-05, "loss": 0.1804, "step": 23400 }, { "epoch": 1.5075749038043567, "grad_norm": 0.13767144083976746, "learning_rate": 9.949338828782415e-05, "loss": 0.1775, "step": 23410 }, { "epoch": 1.5082188913753964, "grad_norm": 0.13464537262916565, "learning_rate": 9.945045509187704e-05, "loss": 0.1731, "step": 23420 }, { "epoch": 1.5088628789464362, "grad_norm": 0.13118599355220795, "learning_rate": 9.940752189592993e-05, "loss": 0.1753, "step": 23430 }, { "epoch": 1.5095068665174762, "grad_norm": 0.1240231841802597, "learning_rate": 9.936458869998282e-05, "loss": 0.1762, "step": 23440 }, { "epoch": 1.5101508540885162, "grad_norm": 0.12464452534914017, "learning_rate": 9.932165550403573e-05, "loss": 0.171, "step": 23450 }, { "epoch": 1.510794841659556, "grad_norm": 0.12712042033672333, "learning_rate": 9.927872230808862e-05, "loss": 0.1749, "step": 23460 }, { "epoch": 1.5114388292305958, "grad_norm": 0.15891772508621216, "learning_rate": 9.923578911214151e-05, "loss": 0.176, "step": 23470 }, { "epoch": 1.5120828168016356, "grad_norm": 0.14309725165367126, "learning_rate": 9.91928559161944e-05, "loss": 0.1749, "step": 23480 }, { "epoch": 1.5127268043726756, "grad_norm": 0.13119685649871826, "learning_rate": 9.91499227202473e-05, "loss": 0.1771, "step": 23490 }, { "epoch": 1.5133707919437156, "grad_norm": 0.12234962731599808, "learning_rate": 9.91069895243002e-05, "loss": 0.1804, "step": 23500 }, { "epoch": 1.5140147795147554, "grad_norm": 0.12176007032394409, "learning_rate": 9.906405632835309e-05, "loss": 0.1667, "step": 23510 }, { "epoch": 1.5146587670857952, "grad_norm": 0.13353534042835236, "learning_rate": 9.902112313240598e-05, "loss": 0.171, "step": 23520 }, { "epoch": 1.5153027546568352, "grad_norm": 0.16203300654888153, "learning_rate": 9.897818993645888e-05, "loss": 0.1705, "step": 23530 }, { "epoch": 1.515946742227875, "grad_norm": 0.14306339621543884, "learning_rate": 9.893525674051177e-05, "loss": 0.176, "step": 23540 }, { "epoch": 1.516590729798915, "grad_norm": 0.137338787317276, "learning_rate": 9.889232354456466e-05, "loss": 0.1756, "step": 23550 }, { "epoch": 1.5172347173699547, "grad_norm": 0.1298137903213501, "learning_rate": 9.884939034861755e-05, "loss": 0.1711, "step": 23560 }, { "epoch": 1.5178787049409945, "grad_norm": 0.13738863170146942, "learning_rate": 9.880645715267046e-05, "loss": 0.181, "step": 23570 }, { "epoch": 1.5185226925120345, "grad_norm": 0.14890746772289276, "learning_rate": 9.876352395672335e-05, "loss": 0.1744, "step": 23580 }, { "epoch": 1.5191666800830745, "grad_norm": 0.12620671093463898, "learning_rate": 9.872059076077624e-05, "loss": 0.1764, "step": 23590 }, { "epoch": 1.5198106676541143, "grad_norm": 0.14443670213222504, "learning_rate": 9.867765756482913e-05, "loss": 0.1676, "step": 23600 }, { "epoch": 1.520454655225154, "grad_norm": 0.1531490534543991, "learning_rate": 9.863472436888202e-05, "loss": 0.1753, "step": 23610 }, { "epoch": 1.521098642796194, "grad_norm": 0.15075744688510895, "learning_rate": 9.859179117293492e-05, "loss": 0.1746, "step": 23620 }, { "epoch": 1.521742630367234, "grad_norm": 0.14011211693286896, "learning_rate": 9.85488579769878e-05, "loss": 0.1729, "step": 23630 }, { "epoch": 1.522386617938274, "grad_norm": 0.1406162828207016, "learning_rate": 9.85059247810407e-05, "loss": 0.1691, "step": 23640 }, { "epoch": 1.5230306055093137, "grad_norm": 0.13517339527606964, "learning_rate": 9.84629915850936e-05, "loss": 0.1695, "step": 23650 }, { "epoch": 1.5236745930803535, "grad_norm": 0.1317611038684845, "learning_rate": 9.84200583891465e-05, "loss": 0.1698, "step": 23660 }, { "epoch": 1.5243185806513935, "grad_norm": 0.14260493218898773, "learning_rate": 9.837712519319938e-05, "loss": 0.1817, "step": 23670 }, { "epoch": 1.5249625682224333, "grad_norm": 0.13010255992412567, "learning_rate": 9.833419199725228e-05, "loss": 0.1706, "step": 23680 }, { "epoch": 1.5256065557934733, "grad_norm": 0.13361193239688873, "learning_rate": 9.829125880130517e-05, "loss": 0.1753, "step": 23690 }, { "epoch": 1.526250543364513, "grad_norm": 0.14344431459903717, "learning_rate": 9.824832560535808e-05, "loss": 0.1741, "step": 23700 }, { "epoch": 1.5268945309355528, "grad_norm": 0.1373724341392517, "learning_rate": 9.820539240941095e-05, "loss": 0.1706, "step": 23710 }, { "epoch": 1.5275385185065928, "grad_norm": 0.1566663682460785, "learning_rate": 9.816245921346386e-05, "loss": 0.1673, "step": 23720 }, { "epoch": 1.5281825060776328, "grad_norm": 0.1438867598772049, "learning_rate": 9.811952601751675e-05, "loss": 0.1785, "step": 23730 }, { "epoch": 1.5288264936486726, "grad_norm": 0.1401561051607132, "learning_rate": 9.807659282156964e-05, "loss": 0.1786, "step": 23740 }, { "epoch": 1.5294704812197124, "grad_norm": 0.1584477424621582, "learning_rate": 9.803365962562253e-05, "loss": 0.1705, "step": 23750 }, { "epoch": 1.5301144687907522, "grad_norm": 0.14854225516319275, "learning_rate": 9.799072642967543e-05, "loss": 0.1808, "step": 23760 }, { "epoch": 1.5307584563617922, "grad_norm": 0.136137917637825, "learning_rate": 9.794779323372833e-05, "loss": 0.1751, "step": 23770 }, { "epoch": 1.5314024439328322, "grad_norm": 0.14462734758853912, "learning_rate": 9.790486003778122e-05, "loss": 0.1742, "step": 23780 }, { "epoch": 1.532046431503872, "grad_norm": 0.1328057497739792, "learning_rate": 9.78619268418341e-05, "loss": 0.1762, "step": 23790 }, { "epoch": 1.5326904190749118, "grad_norm": 0.15099993348121643, "learning_rate": 9.7818993645887e-05, "loss": 0.1768, "step": 23800 }, { "epoch": 1.5333344066459518, "grad_norm": 0.16073967516422272, "learning_rate": 9.77760604499399e-05, "loss": 0.1765, "step": 23810 }, { "epoch": 1.5339783942169916, "grad_norm": 0.13899800181388855, "learning_rate": 9.773312725399279e-05, "loss": 0.179, "step": 23820 }, { "epoch": 1.5346223817880316, "grad_norm": 0.15602579712867737, "learning_rate": 9.769019405804568e-05, "loss": 0.1794, "step": 23830 }, { "epoch": 1.5352663693590713, "grad_norm": 0.1276365965604782, "learning_rate": 9.764726086209857e-05, "loss": 0.1725, "step": 23840 }, { "epoch": 1.5359103569301111, "grad_norm": 0.12974216043949127, "learning_rate": 9.760432766615148e-05, "loss": 0.1782, "step": 23850 }, { "epoch": 1.5365543445011511, "grad_norm": 0.17007072269916534, "learning_rate": 9.756139447020437e-05, "loss": 0.181, "step": 23860 }, { "epoch": 1.5371983320721911, "grad_norm": 0.12568962574005127, "learning_rate": 9.751846127425726e-05, "loss": 0.1796, "step": 23870 }, { "epoch": 1.537842319643231, "grad_norm": 0.14846566319465637, "learning_rate": 9.747552807831015e-05, "loss": 0.1787, "step": 23880 }, { "epoch": 1.5384863072142707, "grad_norm": 0.14624261856079102, "learning_rate": 9.743259488236305e-05, "loss": 0.173, "step": 23890 }, { "epoch": 1.5391302947853105, "grad_norm": 0.14806202054023743, "learning_rate": 9.738966168641594e-05, "loss": 0.1779, "step": 23900 }, { "epoch": 1.5397742823563505, "grad_norm": 0.1271984726190567, "learning_rate": 9.734672849046884e-05, "loss": 0.1773, "step": 23910 }, { "epoch": 1.5404182699273905, "grad_norm": 0.12842737138271332, "learning_rate": 9.730379529452173e-05, "loss": 0.1789, "step": 23920 }, { "epoch": 1.5410622574984303, "grad_norm": 0.12323910742998123, "learning_rate": 9.726086209857463e-05, "loss": 0.167, "step": 23930 }, { "epoch": 1.54170624506947, "grad_norm": 0.1555841863155365, "learning_rate": 9.721792890262752e-05, "loss": 0.1722, "step": 23940 }, { "epoch": 1.54235023264051, "grad_norm": 0.15185348689556122, "learning_rate": 9.717499570668041e-05, "loss": 0.1818, "step": 23950 }, { "epoch": 1.54299422021155, "grad_norm": 0.13923625648021698, "learning_rate": 9.71320625107333e-05, "loss": 0.1761, "step": 23960 }, { "epoch": 1.5436382077825899, "grad_norm": 0.13536271452903748, "learning_rate": 9.708912931478619e-05, "loss": 0.1732, "step": 23970 }, { "epoch": 1.5442821953536296, "grad_norm": 0.1311531960964203, "learning_rate": 9.70461961188391e-05, "loss": 0.1738, "step": 23980 }, { "epoch": 1.5449261829246694, "grad_norm": 0.13090598583221436, "learning_rate": 9.700326292289197e-05, "loss": 0.1776, "step": 23990 }, { "epoch": 1.5455701704957094, "grad_norm": 0.1506389081478119, "learning_rate": 9.696032972694488e-05, "loss": 0.178, "step": 24000 }, { "epoch": 1.5462141580667494, "grad_norm": 0.1470354199409485, "learning_rate": 9.691739653099777e-05, "loss": 0.1732, "step": 24010 }, { "epoch": 1.5468581456377892, "grad_norm": 0.14873221516609192, "learning_rate": 9.687446333505067e-05, "loss": 0.1756, "step": 24020 }, { "epoch": 1.547502133208829, "grad_norm": 0.13900300860404968, "learning_rate": 9.683153013910355e-05, "loss": 0.1675, "step": 24030 }, { "epoch": 1.5481461207798688, "grad_norm": 0.12229190021753311, "learning_rate": 9.678859694315645e-05, "loss": 0.1668, "step": 24040 }, { "epoch": 1.5487901083509088, "grad_norm": 0.13206149637699127, "learning_rate": 9.674566374720935e-05, "loss": 0.1738, "step": 24050 }, { "epoch": 1.5494340959219488, "grad_norm": 0.14548097550868988, "learning_rate": 9.670273055126225e-05, "loss": 0.1752, "step": 24060 }, { "epoch": 1.5500780834929886, "grad_norm": 0.13438594341278076, "learning_rate": 9.665979735531513e-05, "loss": 0.1733, "step": 24070 }, { "epoch": 1.5507220710640284, "grad_norm": 0.1346234232187271, "learning_rate": 9.661686415936803e-05, "loss": 0.1778, "step": 24080 }, { "epoch": 1.5513660586350684, "grad_norm": 0.13288049399852753, "learning_rate": 9.657393096342092e-05, "loss": 0.1762, "step": 24090 }, { "epoch": 1.5520100462061084, "grad_norm": 0.15128441154956818, "learning_rate": 9.653099776747381e-05, "loss": 0.1749, "step": 24100 }, { "epoch": 1.5526540337771482, "grad_norm": 0.14222782850265503, "learning_rate": 9.64880645715267e-05, "loss": 0.1752, "step": 24110 }, { "epoch": 1.553298021348188, "grad_norm": 0.14389562606811523, "learning_rate": 9.644513137557961e-05, "loss": 0.1745, "step": 24120 }, { "epoch": 1.5539420089192277, "grad_norm": 0.1342764049768448, "learning_rate": 9.64021981796325e-05, "loss": 0.1725, "step": 24130 }, { "epoch": 1.5545859964902677, "grad_norm": 0.14309503138065338, "learning_rate": 9.635926498368539e-05, "loss": 0.1727, "step": 24140 }, { "epoch": 1.5552299840613077, "grad_norm": 0.14724642038345337, "learning_rate": 9.631633178773828e-05, "loss": 0.1758, "step": 24150 }, { "epoch": 1.5558739716323475, "grad_norm": 0.12938663363456726, "learning_rate": 9.627339859179117e-05, "loss": 0.1715, "step": 24160 }, { "epoch": 1.5565179592033873, "grad_norm": 0.14281851053237915, "learning_rate": 9.623046539584407e-05, "loss": 0.1717, "step": 24170 }, { "epoch": 1.557161946774427, "grad_norm": 0.12672406435012817, "learning_rate": 9.618753219989696e-05, "loss": 0.1672, "step": 24180 }, { "epoch": 1.557805934345467, "grad_norm": 0.1365630030632019, "learning_rate": 9.614459900394986e-05, "loss": 0.1682, "step": 24190 }, { "epoch": 1.558449921916507, "grad_norm": 0.14313936233520508, "learning_rate": 9.610166580800275e-05, "loss": 0.1657, "step": 24200 }, { "epoch": 1.559093909487547, "grad_norm": 0.15887096524238586, "learning_rate": 9.605873261205565e-05, "loss": 0.1693, "step": 24210 }, { "epoch": 1.5597378970585867, "grad_norm": 0.13917703926563263, "learning_rate": 9.601579941610854e-05, "loss": 0.1764, "step": 24220 }, { "epoch": 1.5603818846296267, "grad_norm": 0.12730702757835388, "learning_rate": 9.597286622016143e-05, "loss": 0.1701, "step": 24230 }, { "epoch": 1.5610258722006667, "grad_norm": 0.14495137333869934, "learning_rate": 9.592993302421432e-05, "loss": 0.1742, "step": 24240 }, { "epoch": 1.5616698597717065, "grad_norm": 0.13443416357040405, "learning_rate": 9.588699982826723e-05, "loss": 0.1684, "step": 24250 }, { "epoch": 1.5623138473427463, "grad_norm": 0.1189957782626152, "learning_rate": 9.584406663232012e-05, "loss": 0.1739, "step": 24260 }, { "epoch": 1.562957834913786, "grad_norm": 0.1223813071846962, "learning_rate": 9.580113343637301e-05, "loss": 0.1825, "step": 24270 }, { "epoch": 1.563601822484826, "grad_norm": 0.16276150941848755, "learning_rate": 9.57582002404259e-05, "loss": 0.1821, "step": 24280 }, { "epoch": 1.564245810055866, "grad_norm": 0.12724821269512177, "learning_rate": 9.57152670444788e-05, "loss": 0.1684, "step": 24290 }, { "epoch": 1.5648897976269058, "grad_norm": 0.13489790260791779, "learning_rate": 9.567233384853169e-05, "loss": 0.1767, "step": 24300 }, { "epoch": 1.5655337851979456, "grad_norm": 0.13408902287483215, "learning_rate": 9.562940065258458e-05, "loss": 0.1686, "step": 24310 }, { "epoch": 1.5661777727689854, "grad_norm": 0.11975857615470886, "learning_rate": 9.558646745663747e-05, "loss": 0.1711, "step": 24320 }, { "epoch": 1.5668217603400254, "grad_norm": 0.15315625071525574, "learning_rate": 9.554353426069037e-05, "loss": 0.1727, "step": 24330 }, { "epoch": 1.5674657479110654, "grad_norm": 0.1453060507774353, "learning_rate": 9.550060106474327e-05, "loss": 0.1684, "step": 24340 }, { "epoch": 1.5681097354821052, "grad_norm": 0.13665077090263367, "learning_rate": 9.545766786879615e-05, "loss": 0.1719, "step": 24350 }, { "epoch": 1.568753723053145, "grad_norm": 0.16198113560676575, "learning_rate": 9.541473467284905e-05, "loss": 0.1795, "step": 24360 }, { "epoch": 1.569397710624185, "grad_norm": 0.1483277529478073, "learning_rate": 9.537180147690194e-05, "loss": 0.1719, "step": 24370 }, { "epoch": 1.570041698195225, "grad_norm": 0.14429429173469543, "learning_rate": 9.532886828095485e-05, "loss": 0.1706, "step": 24380 }, { "epoch": 1.5706856857662648, "grad_norm": 0.15935474634170532, "learning_rate": 9.528593508500772e-05, "loss": 0.1777, "step": 24390 }, { "epoch": 1.5713296733373046, "grad_norm": 0.1431579887866974, "learning_rate": 9.524300188906063e-05, "loss": 0.1747, "step": 24400 }, { "epoch": 1.5719736609083443, "grad_norm": 0.14480963349342346, "learning_rate": 9.520006869311352e-05, "loss": 0.1746, "step": 24410 }, { "epoch": 1.5726176484793843, "grad_norm": 0.1561373621225357, "learning_rate": 9.515713549716642e-05, "loss": 0.1798, "step": 24420 }, { "epoch": 1.5732616360504244, "grad_norm": 0.1567126214504242, "learning_rate": 9.511420230121931e-05, "loss": 0.1775, "step": 24430 }, { "epoch": 1.5739056236214641, "grad_norm": 0.17338967323303223, "learning_rate": 9.50712691052722e-05, "loss": 0.1758, "step": 24440 }, { "epoch": 1.574549611192504, "grad_norm": 0.13669423758983612, "learning_rate": 9.50283359093251e-05, "loss": 0.1752, "step": 24450 }, { "epoch": 1.5751935987635437, "grad_norm": 0.12637609243392944, "learning_rate": 9.498540271337798e-05, "loss": 0.1733, "step": 24460 }, { "epoch": 1.5758375863345837, "grad_norm": 0.1432817429304123, "learning_rate": 9.494246951743089e-05, "loss": 0.1739, "step": 24470 }, { "epoch": 1.5764815739056237, "grad_norm": 0.15746468305587769, "learning_rate": 9.489953632148378e-05, "loss": 0.1699, "step": 24480 }, { "epoch": 1.5771255614766635, "grad_norm": 0.14533640444278717, "learning_rate": 9.485660312553667e-05, "loss": 0.177, "step": 24490 }, { "epoch": 1.5777695490477033, "grad_norm": 0.1451699286699295, "learning_rate": 9.481366992958956e-05, "loss": 0.182, "step": 24500 }, { "epoch": 1.5784135366187433, "grad_norm": 0.1323574036359787, "learning_rate": 9.477073673364246e-05, "loss": 0.1682, "step": 24510 }, { "epoch": 1.5790575241897833, "grad_norm": 0.16544455289840698, "learning_rate": 9.472780353769534e-05, "loss": 0.1675, "step": 24520 }, { "epoch": 1.579701511760823, "grad_norm": 0.12825648486614227, "learning_rate": 9.468487034174825e-05, "loss": 0.1705, "step": 24530 }, { "epoch": 1.5803454993318629, "grad_norm": 0.14414428174495697, "learning_rate": 9.464193714580114e-05, "loss": 0.1758, "step": 24540 }, { "epoch": 1.5809894869029026, "grad_norm": 0.16194657981395721, "learning_rate": 9.459900394985404e-05, "loss": 0.1724, "step": 24550 }, { "epoch": 1.5816334744739426, "grad_norm": 0.12831860780715942, "learning_rate": 9.455607075390692e-05, "loss": 0.1741, "step": 24560 }, { "epoch": 1.5822774620449827, "grad_norm": 0.13252554833889008, "learning_rate": 9.451313755795982e-05, "loss": 0.1756, "step": 24570 }, { "epoch": 1.5829214496160224, "grad_norm": 0.14744256436824799, "learning_rate": 9.447020436201271e-05, "loss": 0.1736, "step": 24580 }, { "epoch": 1.5835654371870622, "grad_norm": 0.14163726568222046, "learning_rate": 9.442727116606562e-05, "loss": 0.1765, "step": 24590 }, { "epoch": 1.584209424758102, "grad_norm": 0.1576281636953354, "learning_rate": 9.43843379701185e-05, "loss": 0.1812, "step": 24600 }, { "epoch": 1.584853412329142, "grad_norm": 0.14777906239032745, "learning_rate": 9.43414047741714e-05, "loss": 0.1734, "step": 24610 }, { "epoch": 1.585497399900182, "grad_norm": 0.1387128084897995, "learning_rate": 9.429847157822429e-05, "loss": 0.1659, "step": 24620 }, { "epoch": 1.5861413874712218, "grad_norm": 0.1422872394323349, "learning_rate": 9.425553838227718e-05, "loss": 0.1762, "step": 24630 }, { "epoch": 1.5867853750422616, "grad_norm": 0.11936570703983307, "learning_rate": 9.421260518633007e-05, "loss": 0.1709, "step": 24640 }, { "epoch": 1.5874293626133016, "grad_norm": 0.13631242513656616, "learning_rate": 9.416967199038297e-05, "loss": 0.1682, "step": 24650 }, { "epoch": 1.5880733501843416, "grad_norm": 0.1618947684764862, "learning_rate": 9.412673879443587e-05, "loss": 0.1773, "step": 24660 }, { "epoch": 1.5887173377553814, "grad_norm": 0.12810967862606049, "learning_rate": 9.408380559848876e-05, "loss": 0.1733, "step": 24670 }, { "epoch": 1.5893613253264212, "grad_norm": 0.1368444412946701, "learning_rate": 9.404087240254165e-05, "loss": 0.1699, "step": 24680 }, { "epoch": 1.590005312897461, "grad_norm": 0.13662631809711456, "learning_rate": 9.399793920659454e-05, "loss": 0.1779, "step": 24690 }, { "epoch": 1.590649300468501, "grad_norm": 0.12711723148822784, "learning_rate": 9.395500601064744e-05, "loss": 0.1816, "step": 24700 }, { "epoch": 1.591293288039541, "grad_norm": 0.14134404063224792, "learning_rate": 9.391207281470033e-05, "loss": 0.1752, "step": 24710 }, { "epoch": 1.5919372756105807, "grad_norm": 0.14565406739711761, "learning_rate": 9.386913961875322e-05, "loss": 0.1729, "step": 24720 }, { "epoch": 1.5925812631816205, "grad_norm": 0.14513768255710602, "learning_rate": 9.382620642280611e-05, "loss": 0.1752, "step": 24730 }, { "epoch": 1.5932252507526605, "grad_norm": 0.16621406376361847, "learning_rate": 9.378327322685902e-05, "loss": 0.1739, "step": 24740 }, { "epoch": 1.5938692383237003, "grad_norm": 0.13383883237838745, "learning_rate": 9.374034003091191e-05, "loss": 0.1726, "step": 24750 }, { "epoch": 1.5945132258947403, "grad_norm": 0.13869665563106537, "learning_rate": 9.36974068349648e-05, "loss": 0.18, "step": 24760 }, { "epoch": 1.59515721346578, "grad_norm": 0.11117953807115555, "learning_rate": 9.365447363901769e-05, "loss": 0.1723, "step": 24770 }, { "epoch": 1.5958012010368199, "grad_norm": 0.14321838319301605, "learning_rate": 9.36115404430706e-05, "loss": 0.1726, "step": 24780 }, { "epoch": 1.5964451886078599, "grad_norm": 0.1342436522245407, "learning_rate": 9.356860724712349e-05, "loss": 0.1734, "step": 24790 }, { "epoch": 1.5970891761789, "grad_norm": 0.13701611757278442, "learning_rate": 9.352567405117638e-05, "loss": 0.1689, "step": 24800 }, { "epoch": 1.5977331637499397, "grad_norm": 0.16644203662872314, "learning_rate": 9.348274085522927e-05, "loss": 0.1802, "step": 24810 }, { "epoch": 1.5983771513209795, "grad_norm": 0.12719132006168365, "learning_rate": 9.343980765928216e-05, "loss": 0.1809, "step": 24820 }, { "epoch": 1.5990211388920192, "grad_norm": 0.13969293236732483, "learning_rate": 9.339687446333506e-05, "loss": 0.1691, "step": 24830 }, { "epoch": 1.5996651264630593, "grad_norm": 0.1312289983034134, "learning_rate": 9.335394126738795e-05, "loss": 0.1715, "step": 24840 }, { "epoch": 1.6003091140340993, "grad_norm": 0.1408320516347885, "learning_rate": 9.331100807144084e-05, "loss": 0.1704, "step": 24850 }, { "epoch": 1.600953101605139, "grad_norm": 0.14238932728767395, "learning_rate": 9.326807487549373e-05, "loss": 0.1727, "step": 24860 }, { "epoch": 1.6015970891761788, "grad_norm": 0.1238614022731781, "learning_rate": 9.322514167954664e-05, "loss": 0.1703, "step": 24870 }, { "epoch": 1.6022410767472188, "grad_norm": 0.13177034258842468, "learning_rate": 9.318220848359951e-05, "loss": 0.173, "step": 24880 }, { "epoch": 1.6028850643182586, "grad_norm": 0.14353613555431366, "learning_rate": 9.313927528765242e-05, "loss": 0.1801, "step": 24890 }, { "epoch": 1.6035290518892986, "grad_norm": 0.15213216841220856, "learning_rate": 9.309634209170531e-05, "loss": 0.1749, "step": 24900 }, { "epoch": 1.6041730394603384, "grad_norm": 0.1225953996181488, "learning_rate": 9.305340889575821e-05, "loss": 0.1795, "step": 24910 }, { "epoch": 1.6048170270313782, "grad_norm": 0.13704775273799896, "learning_rate": 9.301047569981109e-05, "loss": 0.1714, "step": 24920 }, { "epoch": 1.6054610146024182, "grad_norm": 0.13781236112117767, "learning_rate": 9.2967542503864e-05, "loss": 0.1801, "step": 24930 }, { "epoch": 1.6061050021734582, "grad_norm": 0.1557421088218689, "learning_rate": 9.292460930791689e-05, "loss": 0.1696, "step": 24940 }, { "epoch": 1.606748989744498, "grad_norm": 0.1287761628627777, "learning_rate": 9.288167611196979e-05, "loss": 0.1719, "step": 24950 }, { "epoch": 1.6073929773155378, "grad_norm": 0.1517457664012909, "learning_rate": 9.283874291602267e-05, "loss": 0.1769, "step": 24960 }, { "epoch": 1.6080369648865775, "grad_norm": 0.14376088976860046, "learning_rate": 9.279580972007557e-05, "loss": 0.168, "step": 24970 }, { "epoch": 1.6086809524576176, "grad_norm": 0.1452200561761856, "learning_rate": 9.275287652412846e-05, "loss": 0.1718, "step": 24980 }, { "epoch": 1.6093249400286576, "grad_norm": 0.12455900758504868, "learning_rate": 9.270994332818135e-05, "loss": 0.1767, "step": 24990 }, { "epoch": 1.6099689275996973, "grad_norm": 0.12687011063098907, "learning_rate": 9.266701013223424e-05, "loss": 0.1712, "step": 25000 }, { "epoch": 1.6106129151707371, "grad_norm": 0.11801668256521225, "learning_rate": 9.262407693628715e-05, "loss": 0.1769, "step": 25010 }, { "epoch": 1.6112569027417771, "grad_norm": 0.13140341639518738, "learning_rate": 9.258114374034004e-05, "loss": 0.1707, "step": 25020 }, { "epoch": 1.611900890312817, "grad_norm": 0.14177222549915314, "learning_rate": 9.253821054439293e-05, "loss": 0.179, "step": 25030 }, { "epoch": 1.612544877883857, "grad_norm": 0.12678411602973938, "learning_rate": 9.249527734844582e-05, "loss": 0.172, "step": 25040 }, { "epoch": 1.6131888654548967, "grad_norm": 0.13517649471759796, "learning_rate": 9.245234415249871e-05, "loss": 0.1826, "step": 25050 }, { "epoch": 1.6138328530259365, "grad_norm": 0.14007531106472015, "learning_rate": 9.240941095655161e-05, "loss": 0.1681, "step": 25060 }, { "epoch": 1.6144768405969765, "grad_norm": 0.13419798016548157, "learning_rate": 9.23664777606045e-05, "loss": 0.173, "step": 25070 }, { "epoch": 1.6151208281680165, "grad_norm": 0.11934328079223633, "learning_rate": 9.23235445646574e-05, "loss": 0.1781, "step": 25080 }, { "epoch": 1.6157648157390563, "grad_norm": 0.11514256149530411, "learning_rate": 9.228061136871029e-05, "loss": 0.1721, "step": 25090 }, { "epoch": 1.616408803310096, "grad_norm": 0.15056142210960388, "learning_rate": 9.223767817276319e-05, "loss": 0.1649, "step": 25100 }, { "epoch": 1.6170527908811358, "grad_norm": 0.1698857545852661, "learning_rate": 9.219474497681608e-05, "loss": 0.1736, "step": 25110 }, { "epoch": 1.6176967784521759, "grad_norm": 0.17164689302444458, "learning_rate": 9.215181178086897e-05, "loss": 0.1715, "step": 25120 }, { "epoch": 1.6183407660232159, "grad_norm": 0.12577378749847412, "learning_rate": 9.210887858492186e-05, "loss": 0.173, "step": 25130 }, { "epoch": 1.6189847535942556, "grad_norm": 0.13315711915493011, "learning_rate": 9.206594538897477e-05, "loss": 0.1731, "step": 25140 }, { "epoch": 1.6196287411652954, "grad_norm": 0.141169011592865, "learning_rate": 9.202301219302766e-05, "loss": 0.176, "step": 25150 }, { "epoch": 1.6202727287363354, "grad_norm": 0.1441199630498886, "learning_rate": 9.198007899708055e-05, "loss": 0.1763, "step": 25160 }, { "epoch": 1.6209167163073752, "grad_norm": 0.12589514255523682, "learning_rate": 9.193714580113344e-05, "loss": 0.1742, "step": 25170 }, { "epoch": 1.6215607038784152, "grad_norm": 0.14445792138576508, "learning_rate": 9.189421260518633e-05, "loss": 0.1749, "step": 25180 }, { "epoch": 1.622204691449455, "grad_norm": 0.12652386724948883, "learning_rate": 9.185127940923923e-05, "loss": 0.1758, "step": 25190 }, { "epoch": 1.6228486790204948, "grad_norm": 0.11934319883584976, "learning_rate": 9.180834621329212e-05, "loss": 0.1772, "step": 25200 }, { "epoch": 1.6234926665915348, "grad_norm": 0.1353561282157898, "learning_rate": 9.176541301734502e-05, "loss": 0.1792, "step": 25210 }, { "epoch": 1.6241366541625748, "grad_norm": 0.11862833052873611, "learning_rate": 9.17224798213979e-05, "loss": 0.1753, "step": 25220 }, { "epoch": 1.6247806417336146, "grad_norm": 0.130097895860672, "learning_rate": 9.167954662545081e-05, "loss": 0.1738, "step": 25230 }, { "epoch": 1.6254246293046544, "grad_norm": 0.1354440450668335, "learning_rate": 9.163661342950369e-05, "loss": 0.175, "step": 25240 }, { "epoch": 1.6260686168756942, "grad_norm": 0.1360703557729721, "learning_rate": 9.159368023355659e-05, "loss": 0.1741, "step": 25250 }, { "epoch": 1.6267126044467342, "grad_norm": 0.14660800993442535, "learning_rate": 9.155074703760948e-05, "loss": 0.1724, "step": 25260 }, { "epoch": 1.6273565920177742, "grad_norm": 0.15254268050193787, "learning_rate": 9.150781384166239e-05, "loss": 0.1745, "step": 25270 }, { "epoch": 1.628000579588814, "grad_norm": 0.12499679625034332, "learning_rate": 9.146488064571526e-05, "loss": 0.1668, "step": 25280 }, { "epoch": 1.6286445671598537, "grad_norm": 0.13333860039710999, "learning_rate": 9.142194744976817e-05, "loss": 0.1719, "step": 25290 }, { "epoch": 1.6292885547308937, "grad_norm": 0.15900318324565887, "learning_rate": 9.137901425382106e-05, "loss": 0.1753, "step": 25300 }, { "epoch": 1.6299325423019335, "grad_norm": 0.13781817257404327, "learning_rate": 9.133608105787396e-05, "loss": 0.1707, "step": 25310 }, { "epoch": 1.6305765298729735, "grad_norm": 0.13162322342395782, "learning_rate": 9.129314786192684e-05, "loss": 0.1735, "step": 25320 }, { "epoch": 1.6312205174440133, "grad_norm": 0.14153742790222168, "learning_rate": 9.125021466597974e-05, "loss": 0.1712, "step": 25330 }, { "epoch": 1.631864505015053, "grad_norm": 0.14791743457317352, "learning_rate": 9.120728147003263e-05, "loss": 0.1793, "step": 25340 }, { "epoch": 1.632508492586093, "grad_norm": 0.15871931612491608, "learning_rate": 9.116434827408553e-05, "loss": 0.1807, "step": 25350 }, { "epoch": 1.633152480157133, "grad_norm": 0.16117985546588898, "learning_rate": 9.112141507813842e-05, "loss": 0.1715, "step": 25360 }, { "epoch": 1.6337964677281729, "grad_norm": 0.14310221374034882, "learning_rate": 9.107848188219132e-05, "loss": 0.1736, "step": 25370 }, { "epoch": 1.6344404552992127, "grad_norm": 0.13701625168323517, "learning_rate": 9.103554868624421e-05, "loss": 0.1781, "step": 25380 }, { "epoch": 1.6350844428702525, "grad_norm": 0.13362133502960205, "learning_rate": 9.09926154902971e-05, "loss": 0.1708, "step": 25390 }, { "epoch": 1.6357284304412925, "grad_norm": 0.12653009593486786, "learning_rate": 9.094968229434999e-05, "loss": 0.1645, "step": 25400 }, { "epoch": 1.6363724180123325, "grad_norm": 0.14177031815052032, "learning_rate": 9.090674909840288e-05, "loss": 0.1681, "step": 25410 }, { "epoch": 1.6370164055833722, "grad_norm": 0.15242381393909454, "learning_rate": 9.086381590245579e-05, "loss": 0.1736, "step": 25420 }, { "epoch": 1.637660393154412, "grad_norm": 0.1324288547039032, "learning_rate": 9.082088270650868e-05, "loss": 0.1777, "step": 25430 }, { "epoch": 1.638304380725452, "grad_norm": 0.14239196479320526, "learning_rate": 9.077794951056157e-05, "loss": 0.1695, "step": 25440 }, { "epoch": 1.638948368296492, "grad_norm": 0.13807961344718933, "learning_rate": 9.073501631461446e-05, "loss": 0.1743, "step": 25450 }, { "epoch": 1.6395923558675318, "grad_norm": 0.15577028691768646, "learning_rate": 9.069208311866736e-05, "loss": 0.1768, "step": 25460 }, { "epoch": 1.6402363434385716, "grad_norm": 0.11865486204624176, "learning_rate": 9.064914992272025e-05, "loss": 0.169, "step": 25470 }, { "epoch": 1.6408803310096114, "grad_norm": 0.13957737386226654, "learning_rate": 9.060621672677314e-05, "loss": 0.1781, "step": 25480 }, { "epoch": 1.6415243185806514, "grad_norm": 0.13507089018821716, "learning_rate": 9.056328353082604e-05, "loss": 0.1706, "step": 25490 }, { "epoch": 1.6421683061516914, "grad_norm": 0.15136320888996124, "learning_rate": 9.052035033487894e-05, "loss": 0.1683, "step": 25500 }, { "epoch": 1.6428122937227312, "grad_norm": 0.14034053683280945, "learning_rate": 9.047741713893183e-05, "loss": 0.1758, "step": 25510 }, { "epoch": 1.643456281293771, "grad_norm": 0.12731395661830902, "learning_rate": 9.043448394298472e-05, "loss": 0.1732, "step": 25520 }, { "epoch": 1.6441002688648108, "grad_norm": 0.1396988481283188, "learning_rate": 9.039155074703761e-05, "loss": 0.177, "step": 25530 }, { "epoch": 1.6447442564358508, "grad_norm": 0.1349281668663025, "learning_rate": 9.03486175510905e-05, "loss": 0.1731, "step": 25540 }, { "epoch": 1.6453882440068908, "grad_norm": 0.14376354217529297, "learning_rate": 9.03056843551434e-05, "loss": 0.1743, "step": 25550 }, { "epoch": 1.6460322315779305, "grad_norm": 0.12633219361305237, "learning_rate": 9.02627511591963e-05, "loss": 0.1724, "step": 25560 }, { "epoch": 1.6466762191489703, "grad_norm": 0.14616121351718903, "learning_rate": 9.021981796324919e-05, "loss": 0.1703, "step": 25570 }, { "epoch": 1.6473202067200103, "grad_norm": 0.1506553739309311, "learning_rate": 9.017688476730208e-05, "loss": 0.1701, "step": 25580 }, { "epoch": 1.6479641942910503, "grad_norm": 0.1446242481470108, "learning_rate": 9.013395157135498e-05, "loss": 0.176, "step": 25590 }, { "epoch": 1.6486081818620901, "grad_norm": 0.1589617133140564, "learning_rate": 9.009101837540786e-05, "loss": 0.1758, "step": 25600 }, { "epoch": 1.64925216943313, "grad_norm": 0.17172448337078094, "learning_rate": 9.004808517946076e-05, "loss": 0.171, "step": 25610 }, { "epoch": 1.6498961570041697, "grad_norm": 0.13308463990688324, "learning_rate": 9.000515198351365e-05, "loss": 0.1671, "step": 25620 }, { "epoch": 1.6505401445752097, "grad_norm": 0.14586487412452698, "learning_rate": 8.996221878756656e-05, "loss": 0.1736, "step": 25630 }, { "epoch": 1.6511841321462497, "grad_norm": 0.1685573309659958, "learning_rate": 8.991928559161944e-05, "loss": 0.1706, "step": 25640 }, { "epoch": 1.6518281197172895, "grad_norm": 0.1405738741159439, "learning_rate": 8.987635239567234e-05, "loss": 0.1699, "step": 25650 }, { "epoch": 1.6524721072883293, "grad_norm": 0.13485556840896606, "learning_rate": 8.983341919972523e-05, "loss": 0.1758, "step": 25660 }, { "epoch": 1.653116094859369, "grad_norm": 0.16581301391124725, "learning_rate": 8.979048600377813e-05, "loss": 0.1677, "step": 25670 }, { "epoch": 1.653760082430409, "grad_norm": 0.16240520775318146, "learning_rate": 8.974755280783101e-05, "loss": 0.176, "step": 25680 }, { "epoch": 1.654404070001449, "grad_norm": 0.14592918753623962, "learning_rate": 8.970461961188392e-05, "loss": 0.1765, "step": 25690 }, { "epoch": 1.6550480575724889, "grad_norm": 0.14719903469085693, "learning_rate": 8.966168641593681e-05, "loss": 0.1802, "step": 25700 }, { "epoch": 1.6556920451435286, "grad_norm": 0.14668048918247223, "learning_rate": 8.96187532199897e-05, "loss": 0.1758, "step": 25710 }, { "epoch": 1.6563360327145686, "grad_norm": 0.1301698088645935, "learning_rate": 8.957582002404259e-05, "loss": 0.1772, "step": 25720 }, { "epoch": 1.6569800202856086, "grad_norm": 0.13308779895305634, "learning_rate": 8.953288682809549e-05, "loss": 0.1748, "step": 25730 }, { "epoch": 1.6576240078566484, "grad_norm": 0.12847353518009186, "learning_rate": 8.948995363214838e-05, "loss": 0.174, "step": 25740 }, { "epoch": 1.6582679954276882, "grad_norm": 0.12305327504873276, "learning_rate": 8.944702043620127e-05, "loss": 0.1781, "step": 25750 }, { "epoch": 1.658911982998728, "grad_norm": 0.13173021376132965, "learning_rate": 8.940408724025416e-05, "loss": 0.1758, "step": 25760 }, { "epoch": 1.659555970569768, "grad_norm": 0.14201894402503967, "learning_rate": 8.936115404430706e-05, "loss": 0.1773, "step": 25770 }, { "epoch": 1.660199958140808, "grad_norm": 0.12854605913162231, "learning_rate": 8.931822084835996e-05, "loss": 0.1774, "step": 25780 }, { "epoch": 1.6608439457118478, "grad_norm": 0.12874937057495117, "learning_rate": 8.927528765241285e-05, "loss": 0.1756, "step": 25790 }, { "epoch": 1.6614879332828876, "grad_norm": 0.14415930211544037, "learning_rate": 8.923235445646574e-05, "loss": 0.1717, "step": 25800 }, { "epoch": 1.6621319208539274, "grad_norm": 0.16799712181091309, "learning_rate": 8.918942126051863e-05, "loss": 0.1773, "step": 25810 }, { "epoch": 1.6627759084249674, "grad_norm": 0.16746020317077637, "learning_rate": 8.914648806457154e-05, "loss": 0.1751, "step": 25820 }, { "epoch": 1.6634198959960074, "grad_norm": 0.14811836183071136, "learning_rate": 8.910355486862443e-05, "loss": 0.1771, "step": 25830 }, { "epoch": 1.6640638835670472, "grad_norm": 0.1440129280090332, "learning_rate": 8.906062167267732e-05, "loss": 0.1714, "step": 25840 }, { "epoch": 1.664707871138087, "grad_norm": 0.1448437124490738, "learning_rate": 8.901768847673021e-05, "loss": 0.1703, "step": 25850 }, { "epoch": 1.665351858709127, "grad_norm": 0.14581388235092163, "learning_rate": 8.897475528078311e-05, "loss": 0.1686, "step": 25860 }, { "epoch": 1.665995846280167, "grad_norm": 0.15425138175487518, "learning_rate": 8.8931822084836e-05, "loss": 0.1807, "step": 25870 }, { "epoch": 1.6666398338512067, "grad_norm": 0.13303609192371368, "learning_rate": 8.888888888888889e-05, "loss": 0.1695, "step": 25880 }, { "epoch": 1.6672838214222465, "grad_norm": 0.14644715189933777, "learning_rate": 8.884595569294178e-05, "loss": 0.1771, "step": 25890 }, { "epoch": 1.6679278089932863, "grad_norm": 0.1453896015882492, "learning_rate": 8.880302249699467e-05, "loss": 0.1771, "step": 25900 }, { "epoch": 1.6685717965643263, "grad_norm": 0.14639601111412048, "learning_rate": 8.876008930104758e-05, "loss": 0.1744, "step": 25910 }, { "epoch": 1.6692157841353663, "grad_norm": 0.1294928640127182, "learning_rate": 8.871715610510047e-05, "loss": 0.1741, "step": 25920 }, { "epoch": 1.669859771706406, "grad_norm": 0.13237431645393372, "learning_rate": 8.867422290915336e-05, "loss": 0.171, "step": 25930 }, { "epoch": 1.6705037592774459, "grad_norm": 0.15140637755393982, "learning_rate": 8.863128971320625e-05, "loss": 0.1757, "step": 25940 }, { "epoch": 1.6711477468484857, "grad_norm": 0.13309882581233978, "learning_rate": 8.858835651725915e-05, "loss": 0.1681, "step": 25950 }, { "epoch": 1.6717917344195257, "grad_norm": 0.1617574691772461, "learning_rate": 8.854542332131203e-05, "loss": 0.1764, "step": 25960 }, { "epoch": 1.6724357219905657, "grad_norm": 0.1672007441520691, "learning_rate": 8.850249012536494e-05, "loss": 0.1746, "step": 25970 }, { "epoch": 1.6730797095616055, "grad_norm": 0.13224352896213531, "learning_rate": 8.845955692941783e-05, "loss": 0.1738, "step": 25980 }, { "epoch": 1.6737236971326452, "grad_norm": 0.17420098185539246, "learning_rate": 8.841662373347073e-05, "loss": 0.1756, "step": 25990 }, { "epoch": 1.6743676847036852, "grad_norm": 0.1371115893125534, "learning_rate": 8.837369053752361e-05, "loss": 0.1745, "step": 26000 }, { "epoch": 1.6750116722747252, "grad_norm": 0.14010965824127197, "learning_rate": 8.833075734157651e-05, "loss": 0.1731, "step": 26010 }, { "epoch": 1.675655659845765, "grad_norm": 0.12283728271722794, "learning_rate": 8.82878241456294e-05, "loss": 0.1677, "step": 26020 }, { "epoch": 1.6762996474168048, "grad_norm": 0.13698247075080872, "learning_rate": 8.824489094968231e-05, "loss": 0.171, "step": 26030 }, { "epoch": 1.6769436349878446, "grad_norm": 0.12063325196504593, "learning_rate": 8.820195775373518e-05, "loss": 0.1746, "step": 26040 }, { "epoch": 1.6775876225588846, "grad_norm": 0.1542341411113739, "learning_rate": 8.815902455778809e-05, "loss": 0.1799, "step": 26050 }, { "epoch": 1.6782316101299246, "grad_norm": 0.13875941932201385, "learning_rate": 8.811609136184098e-05, "loss": 0.1763, "step": 26060 }, { "epoch": 1.6788755977009644, "grad_norm": 0.1376761943101883, "learning_rate": 8.807315816589387e-05, "loss": 0.1746, "step": 26070 }, { "epoch": 1.6795195852720042, "grad_norm": 0.16029466688632965, "learning_rate": 8.803022496994676e-05, "loss": 0.1645, "step": 26080 }, { "epoch": 1.6801635728430442, "grad_norm": 0.1386878788471222, "learning_rate": 8.798729177399966e-05, "loss": 0.1749, "step": 26090 }, { "epoch": 1.680807560414084, "grad_norm": 0.13761694729328156, "learning_rate": 8.794435857805256e-05, "loss": 0.1705, "step": 26100 }, { "epoch": 1.681451547985124, "grad_norm": 0.1548626869916916, "learning_rate": 8.790142538210545e-05, "loss": 0.1747, "step": 26110 }, { "epoch": 1.6820955355561638, "grad_norm": 0.1554076373577118, "learning_rate": 8.785849218615834e-05, "loss": 0.1714, "step": 26120 }, { "epoch": 1.6827395231272035, "grad_norm": 0.12811318039894104, "learning_rate": 8.781555899021123e-05, "loss": 0.1721, "step": 26130 }, { "epoch": 1.6833835106982435, "grad_norm": 0.12215601652860641, "learning_rate": 8.777262579426413e-05, "loss": 0.1723, "step": 26140 }, { "epoch": 1.6840274982692836, "grad_norm": 0.14734551310539246, "learning_rate": 8.772969259831702e-05, "loss": 0.1762, "step": 26150 }, { "epoch": 1.6846714858403233, "grad_norm": 0.13998882472515106, "learning_rate": 8.768675940236991e-05, "loss": 0.1799, "step": 26160 }, { "epoch": 1.6853154734113631, "grad_norm": 0.12805373966693878, "learning_rate": 8.76438262064228e-05, "loss": 0.1755, "step": 26170 }, { "epoch": 1.685959460982403, "grad_norm": 0.1469922512769699, "learning_rate": 8.760089301047571e-05, "loss": 0.172, "step": 26180 }, { "epoch": 1.686603448553443, "grad_norm": 0.14591528475284576, "learning_rate": 8.75579598145286e-05, "loss": 0.1765, "step": 26190 }, { "epoch": 1.687247436124483, "grad_norm": 0.1419384926557541, "learning_rate": 8.751502661858149e-05, "loss": 0.1707, "step": 26200 }, { "epoch": 1.6878914236955227, "grad_norm": 0.13799622654914856, "learning_rate": 8.747209342263438e-05, "loss": 0.1724, "step": 26210 }, { "epoch": 1.6885354112665625, "grad_norm": 0.15569385886192322, "learning_rate": 8.742916022668728e-05, "loss": 0.1769, "step": 26220 }, { "epoch": 1.6891793988376025, "grad_norm": 0.1691560298204422, "learning_rate": 8.738622703074017e-05, "loss": 0.1758, "step": 26230 }, { "epoch": 1.6898233864086423, "grad_norm": 0.13950303196907043, "learning_rate": 8.734329383479307e-05, "loss": 0.1741, "step": 26240 }, { "epoch": 1.6904673739796823, "grad_norm": 0.1397392898797989, "learning_rate": 8.730036063884596e-05, "loss": 0.1667, "step": 26250 }, { "epoch": 1.691111361550722, "grad_norm": 0.1380697339773178, "learning_rate": 8.725742744289885e-05, "loss": 0.1771, "step": 26260 }, { "epoch": 1.6917553491217618, "grad_norm": 0.12360910326242447, "learning_rate": 8.721449424695175e-05, "loss": 0.1684, "step": 26270 }, { "epoch": 1.6923993366928018, "grad_norm": 0.14069923758506775, "learning_rate": 8.717156105100464e-05, "loss": 0.1795, "step": 26280 }, { "epoch": 1.6930433242638419, "grad_norm": 0.13234394788742065, "learning_rate": 8.712862785505753e-05, "loss": 0.1725, "step": 26290 }, { "epoch": 1.6936873118348816, "grad_norm": 0.13105225563049316, "learning_rate": 8.708569465911042e-05, "loss": 0.1689, "step": 26300 }, { "epoch": 1.6943312994059214, "grad_norm": 0.14907869696617126, "learning_rate": 8.704276146316333e-05, "loss": 0.1717, "step": 26310 }, { "epoch": 1.6949752869769612, "grad_norm": 0.15877707302570343, "learning_rate": 8.699982826721622e-05, "loss": 0.1681, "step": 26320 }, { "epoch": 1.6956192745480012, "grad_norm": 0.13266617059707642, "learning_rate": 8.695689507126911e-05, "loss": 0.1754, "step": 26330 }, { "epoch": 1.6962632621190412, "grad_norm": 0.11985336244106293, "learning_rate": 8.6913961875322e-05, "loss": 0.1697, "step": 26340 }, { "epoch": 1.696907249690081, "grad_norm": 0.12996827065944672, "learning_rate": 8.68710286793749e-05, "loss": 0.1725, "step": 26350 }, { "epoch": 1.6975512372611208, "grad_norm": 0.15809763967990875, "learning_rate": 8.68280954834278e-05, "loss": 0.1745, "step": 26360 }, { "epoch": 1.6981952248321608, "grad_norm": 0.14453576505184174, "learning_rate": 8.678516228748068e-05, "loss": 0.1779, "step": 26370 }, { "epoch": 1.6988392124032006, "grad_norm": 0.135070338845253, "learning_rate": 8.674222909153358e-05, "loss": 0.1696, "step": 26380 }, { "epoch": 1.6994831999742406, "grad_norm": 0.12670910358428955, "learning_rate": 8.669929589558648e-05, "loss": 0.1746, "step": 26390 }, { "epoch": 1.7001271875452804, "grad_norm": 0.14517654478549957, "learning_rate": 8.665636269963937e-05, "loss": 0.1644, "step": 26400 }, { "epoch": 1.7007711751163201, "grad_norm": 0.16446036100387573, "learning_rate": 8.661342950369226e-05, "loss": 0.1761, "step": 26410 }, { "epoch": 1.7014151626873601, "grad_norm": 0.1315331906080246, "learning_rate": 8.657049630774515e-05, "loss": 0.1686, "step": 26420 }, { "epoch": 1.7020591502584002, "grad_norm": 0.14152279496192932, "learning_rate": 8.652756311179804e-05, "loss": 0.1704, "step": 26430 }, { "epoch": 1.70270313782944, "grad_norm": 0.14346948266029358, "learning_rate": 8.648462991585095e-05, "loss": 0.1753, "step": 26440 }, { "epoch": 1.7033471254004797, "grad_norm": 0.16415901482105255, "learning_rate": 8.644169671990384e-05, "loss": 0.1764, "step": 26450 }, { "epoch": 1.7039911129715195, "grad_norm": 0.11746125668287277, "learning_rate": 8.639876352395673e-05, "loss": 0.1735, "step": 26460 }, { "epoch": 1.7046351005425595, "grad_norm": 0.1299019157886505, "learning_rate": 8.635583032800962e-05, "loss": 0.1724, "step": 26470 }, { "epoch": 1.7052790881135995, "grad_norm": 0.13196437060832977, "learning_rate": 8.631289713206252e-05, "loss": 0.1774, "step": 26480 }, { "epoch": 1.7059230756846393, "grad_norm": 0.1398802101612091, "learning_rate": 8.62699639361154e-05, "loss": 0.171, "step": 26490 }, { "epoch": 1.706567063255679, "grad_norm": 0.13366679847240448, "learning_rate": 8.62270307401683e-05, "loss": 0.1775, "step": 26500 }, { "epoch": 1.707211050826719, "grad_norm": 0.12461135536432266, "learning_rate": 8.61840975442212e-05, "loss": 0.168, "step": 26510 }, { "epoch": 1.7078550383977589, "grad_norm": 0.14467884600162506, "learning_rate": 8.61411643482741e-05, "loss": 0.1714, "step": 26520 }, { "epoch": 1.7084990259687989, "grad_norm": 0.16126671433448792, "learning_rate": 8.609823115232698e-05, "loss": 0.1801, "step": 26530 }, { "epoch": 1.7091430135398387, "grad_norm": 0.16611574590206146, "learning_rate": 8.605529795637988e-05, "loss": 0.1783, "step": 26540 }, { "epoch": 1.7097870011108784, "grad_norm": 0.1282462179660797, "learning_rate": 8.601236476043277e-05, "loss": 0.1783, "step": 26550 }, { "epoch": 1.7104309886819185, "grad_norm": 0.13346879184246063, "learning_rate": 8.596943156448568e-05, "loss": 0.177, "step": 26560 }, { "epoch": 1.7110749762529585, "grad_norm": 0.14817017316818237, "learning_rate": 8.592649836853855e-05, "loss": 0.1781, "step": 26570 }, { "epoch": 1.7117189638239982, "grad_norm": 0.1527533084154129, "learning_rate": 8.588356517259146e-05, "loss": 0.1772, "step": 26580 }, { "epoch": 1.712362951395038, "grad_norm": 0.13649636507034302, "learning_rate": 8.584063197664435e-05, "loss": 0.1723, "step": 26590 }, { "epoch": 1.7130069389660778, "grad_norm": 0.12403944134712219, "learning_rate": 8.579769878069724e-05, "loss": 0.1751, "step": 26600 }, { "epoch": 1.7136509265371178, "grad_norm": 0.11507163941860199, "learning_rate": 8.575476558475013e-05, "loss": 0.1725, "step": 26610 }, { "epoch": 1.7142949141081578, "grad_norm": 0.14349287748336792, "learning_rate": 8.571183238880302e-05, "loss": 0.1729, "step": 26620 }, { "epoch": 1.7149389016791976, "grad_norm": 0.15874981880187988, "learning_rate": 8.566889919285592e-05, "loss": 0.1705, "step": 26630 }, { "epoch": 1.7155828892502374, "grad_norm": 0.12125656753778458, "learning_rate": 8.562596599690881e-05, "loss": 0.1673, "step": 26640 }, { "epoch": 1.7162268768212774, "grad_norm": 0.13971467316150665, "learning_rate": 8.55830328009617e-05, "loss": 0.1741, "step": 26650 }, { "epoch": 1.7168708643923172, "grad_norm": 0.1336052417755127, "learning_rate": 8.55400996050146e-05, "loss": 0.1723, "step": 26660 }, { "epoch": 1.7175148519633572, "grad_norm": 0.1382596790790558, "learning_rate": 8.54971664090675e-05, "loss": 0.1761, "step": 26670 }, { "epoch": 1.718158839534397, "grad_norm": 0.13040557503700256, "learning_rate": 8.545423321312039e-05, "loss": 0.1685, "step": 26680 }, { "epoch": 1.7188028271054367, "grad_norm": 0.1368090957403183, "learning_rate": 8.541130001717328e-05, "loss": 0.1789, "step": 26690 }, { "epoch": 1.7194468146764768, "grad_norm": 0.1509471982717514, "learning_rate": 8.536836682122617e-05, "loss": 0.1702, "step": 26700 }, { "epoch": 1.7200908022475168, "grad_norm": 0.14225375652313232, "learning_rate": 8.532543362527908e-05, "loss": 0.1746, "step": 26710 }, { "epoch": 1.7207347898185565, "grad_norm": 0.13869042694568634, "learning_rate": 8.528250042933197e-05, "loss": 0.1677, "step": 26720 }, { "epoch": 1.7213787773895963, "grad_norm": 0.13210201263427734, "learning_rate": 8.523956723338486e-05, "loss": 0.1737, "step": 26730 }, { "epoch": 1.722022764960636, "grad_norm": 0.13871227204799652, "learning_rate": 8.519663403743775e-05, "loss": 0.18, "step": 26740 }, { "epoch": 1.7226667525316761, "grad_norm": 0.14477701485157013, "learning_rate": 8.515370084149065e-05, "loss": 0.1693, "step": 26750 }, { "epoch": 1.7233107401027161, "grad_norm": 0.12651221454143524, "learning_rate": 8.511076764554354e-05, "loss": 0.1733, "step": 26760 }, { "epoch": 1.723954727673756, "grad_norm": 0.13489295542240143, "learning_rate": 8.506783444959643e-05, "loss": 0.176, "step": 26770 }, { "epoch": 1.7245987152447957, "grad_norm": 0.1302901655435562, "learning_rate": 8.502490125364932e-05, "loss": 0.1748, "step": 26780 }, { "epoch": 1.7252427028158357, "grad_norm": 0.12897253036499023, "learning_rate": 8.498196805770222e-05, "loss": 0.1645, "step": 26790 }, { "epoch": 1.7258866903868757, "grad_norm": 0.12625320255756378, "learning_rate": 8.493903486175512e-05, "loss": 0.1699, "step": 26800 }, { "epoch": 1.7265306779579155, "grad_norm": 0.1504066288471222, "learning_rate": 8.489610166580801e-05, "loss": 0.1766, "step": 26810 }, { "epoch": 1.7271746655289553, "grad_norm": 0.15124687552452087, "learning_rate": 8.48531684698609e-05, "loss": 0.1764, "step": 26820 }, { "epoch": 1.727818653099995, "grad_norm": 0.1376807540655136, "learning_rate": 8.481023527391379e-05, "loss": 0.1663, "step": 26830 }, { "epoch": 1.728462640671035, "grad_norm": 0.12301559746265411, "learning_rate": 8.47673020779667e-05, "loss": 0.1754, "step": 26840 }, { "epoch": 1.729106628242075, "grad_norm": 0.13695980608463287, "learning_rate": 8.472436888201957e-05, "loss": 0.1751, "step": 26850 }, { "epoch": 1.7297506158131148, "grad_norm": 0.16626006364822388, "learning_rate": 8.468143568607248e-05, "loss": 0.1785, "step": 26860 }, { "epoch": 1.7303946033841546, "grad_norm": 0.15622787177562714, "learning_rate": 8.463850249012537e-05, "loss": 0.1768, "step": 26870 }, { "epoch": 1.7310385909551944, "grad_norm": 0.14125317335128784, "learning_rate": 8.459556929417827e-05, "loss": 0.1834, "step": 26880 }, { "epoch": 1.7316825785262344, "grad_norm": 0.1494511365890503, "learning_rate": 8.455263609823115e-05, "loss": 0.1743, "step": 26890 }, { "epoch": 1.7323265660972744, "grad_norm": 0.13657602667808533, "learning_rate": 8.450970290228405e-05, "loss": 0.1645, "step": 26900 }, { "epoch": 1.7329705536683142, "grad_norm": 0.15256252884864807, "learning_rate": 8.446676970633694e-05, "loss": 0.1745, "step": 26910 }, { "epoch": 1.733614541239354, "grad_norm": 0.13517184555530548, "learning_rate": 8.442383651038985e-05, "loss": 0.1736, "step": 26920 }, { "epoch": 1.734258528810394, "grad_norm": 0.15684670209884644, "learning_rate": 8.438090331444273e-05, "loss": 0.1751, "step": 26930 }, { "epoch": 1.734902516381434, "grad_norm": 0.1407250165939331, "learning_rate": 8.433797011849563e-05, "loss": 0.1704, "step": 26940 }, { "epoch": 1.7355465039524738, "grad_norm": 0.15140779316425323, "learning_rate": 8.429503692254852e-05, "loss": 0.173, "step": 26950 }, { "epoch": 1.7361904915235136, "grad_norm": 0.13966155052185059, "learning_rate": 8.425210372660141e-05, "loss": 0.177, "step": 26960 }, { "epoch": 1.7368344790945534, "grad_norm": 0.13123895227909088, "learning_rate": 8.42091705306543e-05, "loss": 0.1676, "step": 26970 }, { "epoch": 1.7374784666655934, "grad_norm": 0.14587531983852386, "learning_rate": 8.416623733470719e-05, "loss": 0.1776, "step": 26980 }, { "epoch": 1.7381224542366334, "grad_norm": 0.142295703291893, "learning_rate": 8.41233041387601e-05, "loss": 0.179, "step": 26990 }, { "epoch": 1.7387664418076731, "grad_norm": 0.14573131501674652, "learning_rate": 8.408037094281299e-05, "loss": 0.1841, "step": 27000 }, { "epoch": 1.739410429378713, "grad_norm": 0.12355440855026245, "learning_rate": 8.403743774686588e-05, "loss": 0.1758, "step": 27010 }, { "epoch": 1.7400544169497527, "grad_norm": 0.13958214223384857, "learning_rate": 8.399450455091877e-05, "loss": 0.1834, "step": 27020 }, { "epoch": 1.7406984045207927, "grad_norm": 0.15159007906913757, "learning_rate": 8.395157135497167e-05, "loss": 0.176, "step": 27030 }, { "epoch": 1.7413423920918327, "grad_norm": 0.15101678669452667, "learning_rate": 8.390863815902456e-05, "loss": 0.1684, "step": 27040 }, { "epoch": 1.7419863796628725, "grad_norm": 0.1503848284482956, "learning_rate": 8.386570496307745e-05, "loss": 0.1695, "step": 27050 }, { "epoch": 1.7426303672339123, "grad_norm": 0.14533080160617828, "learning_rate": 8.382277176713034e-05, "loss": 0.1699, "step": 27060 }, { "epoch": 1.7432743548049523, "grad_norm": 0.16099688410758972, "learning_rate": 8.377983857118325e-05, "loss": 0.1713, "step": 27070 }, { "epoch": 1.7439183423759923, "grad_norm": 0.1589110642671585, "learning_rate": 8.373690537523614e-05, "loss": 0.1741, "step": 27080 }, { "epoch": 1.744562329947032, "grad_norm": 0.17142143845558167, "learning_rate": 8.369397217928903e-05, "loss": 0.1716, "step": 27090 }, { "epoch": 1.7452063175180719, "grad_norm": 0.15632444620132446, "learning_rate": 8.365103898334192e-05, "loss": 0.1855, "step": 27100 }, { "epoch": 1.7458503050891117, "grad_norm": 0.11749503761529922, "learning_rate": 8.360810578739482e-05, "loss": 0.1675, "step": 27110 }, { "epoch": 1.7464942926601517, "grad_norm": 0.14724747836589813, "learning_rate": 8.356517259144772e-05, "loss": 0.1694, "step": 27120 }, { "epoch": 1.7471382802311917, "grad_norm": 0.15165695548057556, "learning_rate": 8.35222393955006e-05, "loss": 0.1794, "step": 27130 }, { "epoch": 1.7477822678022314, "grad_norm": 0.14954186975955963, "learning_rate": 8.34793061995535e-05, "loss": 0.1719, "step": 27140 }, { "epoch": 1.7484262553732712, "grad_norm": 0.141738161444664, "learning_rate": 8.343637300360639e-05, "loss": 0.171, "step": 27150 }, { "epoch": 1.749070242944311, "grad_norm": 0.1379300355911255, "learning_rate": 8.339343980765929e-05, "loss": 0.1756, "step": 27160 }, { "epoch": 1.749714230515351, "grad_norm": 0.12602508068084717, "learning_rate": 8.335050661171218e-05, "loss": 0.1695, "step": 27170 }, { "epoch": 1.750358218086391, "grad_norm": 0.150459885597229, "learning_rate": 8.330757341576507e-05, "loss": 0.1758, "step": 27180 }, { "epoch": 1.7510022056574308, "grad_norm": 0.13164067268371582, "learning_rate": 8.326464021981796e-05, "loss": 0.1704, "step": 27190 }, { "epoch": 1.7516461932284706, "grad_norm": 0.16017013788223267, "learning_rate": 8.322170702387087e-05, "loss": 0.1767, "step": 27200 }, { "epoch": 1.7522901807995106, "grad_norm": 0.15443207323551178, "learning_rate": 8.317877382792375e-05, "loss": 0.1715, "step": 27210 }, { "epoch": 1.7529341683705506, "grad_norm": 0.11269912123680115, "learning_rate": 8.313584063197665e-05, "loss": 0.1702, "step": 27220 }, { "epoch": 1.7535781559415904, "grad_norm": 0.15137220919132233, "learning_rate": 8.309290743602954e-05, "loss": 0.1733, "step": 27230 }, { "epoch": 1.7542221435126302, "grad_norm": 0.1409047394990921, "learning_rate": 8.304997424008244e-05, "loss": 0.1719, "step": 27240 }, { "epoch": 1.75486613108367, "grad_norm": 0.15322378277778625, "learning_rate": 8.300704104413532e-05, "loss": 0.1814, "step": 27250 }, { "epoch": 1.75551011865471, "grad_norm": 0.15054874122142792, "learning_rate": 8.296410784818823e-05, "loss": 0.1717, "step": 27260 }, { "epoch": 1.75615410622575, "grad_norm": 0.14062544703483582, "learning_rate": 8.292117465224112e-05, "loss": 0.1821, "step": 27270 }, { "epoch": 1.7567980937967898, "grad_norm": 0.13365954160690308, "learning_rate": 8.287824145629402e-05, "loss": 0.1824, "step": 27280 }, { "epoch": 1.7574420813678295, "grad_norm": 0.13239042460918427, "learning_rate": 8.28353082603469e-05, "loss": 0.1696, "step": 27290 }, { "epoch": 1.7580860689388693, "grad_norm": 0.15561945736408234, "learning_rate": 8.27923750643998e-05, "loss": 0.1851, "step": 27300 }, { "epoch": 1.7587300565099093, "grad_norm": 0.14587517082691193, "learning_rate": 8.274944186845269e-05, "loss": 0.1759, "step": 27310 }, { "epoch": 1.7593740440809493, "grad_norm": 0.11934401839971542, "learning_rate": 8.270650867250558e-05, "loss": 0.1721, "step": 27320 }, { "epoch": 1.7600180316519891, "grad_norm": 0.12870942056179047, "learning_rate": 8.266357547655847e-05, "loss": 0.1671, "step": 27330 }, { "epoch": 1.760662019223029, "grad_norm": 0.189888134598732, "learning_rate": 8.262064228061136e-05, "loss": 0.173, "step": 27340 }, { "epoch": 1.761306006794069, "grad_norm": 0.14439885318279266, "learning_rate": 8.257770908466427e-05, "loss": 0.1771, "step": 27350 }, { "epoch": 1.761949994365109, "grad_norm": 0.1449151337146759, "learning_rate": 8.253477588871716e-05, "loss": 0.1732, "step": 27360 }, { "epoch": 1.7625939819361487, "grad_norm": 0.1381807178258896, "learning_rate": 8.249184269277005e-05, "loss": 0.1748, "step": 27370 }, { "epoch": 1.7632379695071885, "grad_norm": 0.147622212767601, "learning_rate": 8.244890949682294e-05, "loss": 0.1781, "step": 27380 }, { "epoch": 1.7638819570782283, "grad_norm": 0.14667773246765137, "learning_rate": 8.240597630087584e-05, "loss": 0.1792, "step": 27390 }, { "epoch": 1.7645259446492683, "grad_norm": 0.1376911848783493, "learning_rate": 8.236304310492874e-05, "loss": 0.1779, "step": 27400 }, { "epoch": 1.7651699322203083, "grad_norm": 0.1623169332742691, "learning_rate": 8.232010990898163e-05, "loss": 0.171, "step": 27410 }, { "epoch": 1.765813919791348, "grad_norm": 0.14092504978179932, "learning_rate": 8.227717671303452e-05, "loss": 0.1731, "step": 27420 }, { "epoch": 1.7664579073623878, "grad_norm": 0.13770288228988647, "learning_rate": 8.223424351708742e-05, "loss": 0.1705, "step": 27430 }, { "epoch": 1.7671018949334276, "grad_norm": 0.1255834847688675, "learning_rate": 8.219131032114031e-05, "loss": 0.1727, "step": 27440 }, { "epoch": 1.7677458825044676, "grad_norm": 0.1601857990026474, "learning_rate": 8.21483771251932e-05, "loss": 0.1715, "step": 27450 }, { "epoch": 1.7683898700755076, "grad_norm": 0.15125294029712677, "learning_rate": 8.210544392924609e-05, "loss": 0.173, "step": 27460 }, { "epoch": 1.7690338576465474, "grad_norm": 0.13442204892635345, "learning_rate": 8.2062510733299e-05, "loss": 0.1728, "step": 27470 }, { "epoch": 1.7696778452175872, "grad_norm": 0.13480907678604126, "learning_rate": 8.201957753735189e-05, "loss": 0.1777, "step": 27480 }, { "epoch": 1.7703218327886272, "grad_norm": 0.13061514496803284, "learning_rate": 8.197664434140478e-05, "loss": 0.1748, "step": 27490 }, { "epoch": 1.7709658203596672, "grad_norm": 0.13458208739757538, "learning_rate": 8.193371114545767e-05, "loss": 0.174, "step": 27500 }, { "epoch": 1.771609807930707, "grad_norm": 0.12845268845558167, "learning_rate": 8.189077794951056e-05, "loss": 0.172, "step": 27510 }, { "epoch": 1.7722537955017468, "grad_norm": 0.155989408493042, "learning_rate": 8.184784475356346e-05, "loss": 0.1782, "step": 27520 }, { "epoch": 1.7728977830727866, "grad_norm": 0.12354432791471481, "learning_rate": 8.180491155761635e-05, "loss": 0.1702, "step": 27530 }, { "epoch": 1.7735417706438266, "grad_norm": 0.1438465267419815, "learning_rate": 8.176197836166925e-05, "loss": 0.1861, "step": 27540 }, { "epoch": 1.7741857582148666, "grad_norm": 0.12637031078338623, "learning_rate": 8.171904516572214e-05, "loss": 0.162, "step": 27550 }, { "epoch": 1.7748297457859064, "grad_norm": 0.12601524591445923, "learning_rate": 8.167611196977504e-05, "loss": 0.1675, "step": 27560 }, { "epoch": 1.7754737333569461, "grad_norm": 0.14779946208000183, "learning_rate": 8.163317877382792e-05, "loss": 0.1706, "step": 27570 }, { "epoch": 1.7761177209279861, "grad_norm": 0.15499800443649292, "learning_rate": 8.159024557788082e-05, "loss": 0.1681, "step": 27580 }, { "epoch": 1.776761708499026, "grad_norm": 0.14287887513637543, "learning_rate": 8.154731238193371e-05, "loss": 0.1781, "step": 27590 }, { "epoch": 1.777405696070066, "grad_norm": 0.1513594537973404, "learning_rate": 8.150437918598662e-05, "loss": 0.1763, "step": 27600 }, { "epoch": 1.7780496836411057, "grad_norm": 0.15260964632034302, "learning_rate": 8.14614459900395e-05, "loss": 0.1839, "step": 27610 }, { "epoch": 1.7786936712121455, "grad_norm": 0.16069740056991577, "learning_rate": 8.14185127940924e-05, "loss": 0.1747, "step": 27620 }, { "epoch": 1.7793376587831855, "grad_norm": 0.11619509756565094, "learning_rate": 8.137557959814529e-05, "loss": 0.1705, "step": 27630 }, { "epoch": 1.7799816463542255, "grad_norm": 0.15190830826759338, "learning_rate": 8.133264640219819e-05, "loss": 0.1751, "step": 27640 }, { "epoch": 1.7806256339252653, "grad_norm": 0.1422068029642105, "learning_rate": 8.128971320625107e-05, "loss": 0.1739, "step": 27650 }, { "epoch": 1.781269621496305, "grad_norm": 0.15256354212760925, "learning_rate": 8.124678001030397e-05, "loss": 0.17, "step": 27660 }, { "epoch": 1.7819136090673449, "grad_norm": 0.1468254029750824, "learning_rate": 8.120384681435686e-05, "loss": 0.1807, "step": 27670 }, { "epoch": 1.7825575966383849, "grad_norm": 0.1409984976053238, "learning_rate": 8.116091361840976e-05, "loss": 0.1744, "step": 27680 }, { "epoch": 1.7832015842094249, "grad_norm": 0.1226859986782074, "learning_rate": 8.111798042246265e-05, "loss": 0.1719, "step": 27690 }, { "epoch": 1.7838455717804647, "grad_norm": 0.14353269338607788, "learning_rate": 8.107504722651554e-05, "loss": 0.1767, "step": 27700 }, { "epoch": 1.7844895593515044, "grad_norm": 0.15736716985702515, "learning_rate": 8.103211403056844e-05, "loss": 0.1728, "step": 27710 }, { "epoch": 1.7851335469225444, "grad_norm": 0.12161029875278473, "learning_rate": 8.098918083462133e-05, "loss": 0.1711, "step": 27720 }, { "epoch": 1.7857775344935842, "grad_norm": 0.11611779779195786, "learning_rate": 8.094624763867422e-05, "loss": 0.1724, "step": 27730 }, { "epoch": 1.7864215220646242, "grad_norm": 0.1371331661939621, "learning_rate": 8.090331444272711e-05, "loss": 0.1646, "step": 27740 }, { "epoch": 1.787065509635664, "grad_norm": 0.1346701830625534, "learning_rate": 8.086038124678002e-05, "loss": 0.175, "step": 27750 }, { "epoch": 1.7877094972067038, "grad_norm": 0.14365963637828827, "learning_rate": 8.081744805083291e-05, "loss": 0.1759, "step": 27760 }, { "epoch": 1.7883534847777438, "grad_norm": 0.15745925903320312, "learning_rate": 8.07745148548858e-05, "loss": 0.1743, "step": 27770 }, { "epoch": 1.7889974723487838, "grad_norm": 0.1485585868358612, "learning_rate": 8.073158165893869e-05, "loss": 0.1713, "step": 27780 }, { "epoch": 1.7896414599198236, "grad_norm": 0.12246234714984894, "learning_rate": 8.06886484629916e-05, "loss": 0.1738, "step": 27790 }, { "epoch": 1.7902854474908634, "grad_norm": 0.13190320134162903, "learning_rate": 8.064571526704448e-05, "loss": 0.1705, "step": 27800 }, { "epoch": 1.7909294350619032, "grad_norm": 0.13939206302165985, "learning_rate": 8.060278207109737e-05, "loss": 0.1712, "step": 27810 }, { "epoch": 1.7915734226329432, "grad_norm": 0.1513931155204773, "learning_rate": 8.055984887515027e-05, "loss": 0.1805, "step": 27820 }, { "epoch": 1.7922174102039832, "grad_norm": 0.14203304052352905, "learning_rate": 8.051691567920317e-05, "loss": 0.1727, "step": 27830 }, { "epoch": 1.792861397775023, "grad_norm": 0.15793469548225403, "learning_rate": 8.047398248325606e-05, "loss": 0.1726, "step": 27840 }, { "epoch": 1.7935053853460627, "grad_norm": 0.1514703780412674, "learning_rate": 8.043104928730895e-05, "loss": 0.1745, "step": 27850 }, { "epoch": 1.7941493729171027, "grad_norm": 0.13906928896903992, "learning_rate": 8.038811609136184e-05, "loss": 0.1856, "step": 27860 }, { "epoch": 1.7947933604881425, "grad_norm": 0.1510389745235443, "learning_rate": 8.034518289541473e-05, "loss": 0.1749, "step": 27870 }, { "epoch": 1.7954373480591825, "grad_norm": 0.13679440319538116, "learning_rate": 8.030224969946764e-05, "loss": 0.1702, "step": 27880 }, { "epoch": 1.7960813356302223, "grad_norm": 0.13800939917564392, "learning_rate": 8.025931650352053e-05, "loss": 0.1691, "step": 27890 }, { "epoch": 1.796725323201262, "grad_norm": 0.1452840268611908, "learning_rate": 8.021638330757342e-05, "loss": 0.1751, "step": 27900 }, { "epoch": 1.797369310772302, "grad_norm": 0.1338694840669632, "learning_rate": 8.017345011162631e-05, "loss": 0.1667, "step": 27910 }, { "epoch": 1.7980132983433421, "grad_norm": 0.13917142152786255, "learning_rate": 8.013051691567921e-05, "loss": 0.1711, "step": 27920 }, { "epoch": 1.798657285914382, "grad_norm": 0.13027797639369965, "learning_rate": 8.008758371973209e-05, "loss": 0.1797, "step": 27930 }, { "epoch": 1.7993012734854217, "grad_norm": 0.14449229836463928, "learning_rate": 8.0044650523785e-05, "loss": 0.1732, "step": 27940 }, { "epoch": 1.7999452610564615, "grad_norm": 0.14788897335529327, "learning_rate": 8.000171732783788e-05, "loss": 0.1742, "step": 27950 }, { "epoch": 1.8005892486275015, "grad_norm": 0.17377300560474396, "learning_rate": 7.995878413189079e-05, "loss": 0.1742, "step": 27960 }, { "epoch": 1.8012332361985415, "grad_norm": 0.13962319493293762, "learning_rate": 7.991585093594367e-05, "loss": 0.175, "step": 27970 }, { "epoch": 1.8018772237695813, "grad_norm": 0.13480345904827118, "learning_rate": 7.987291773999657e-05, "loss": 0.1775, "step": 27980 }, { "epoch": 1.802521211340621, "grad_norm": 0.14386780560016632, "learning_rate": 7.982998454404946e-05, "loss": 0.1715, "step": 27990 }, { "epoch": 1.803165198911661, "grad_norm": 0.12814190983772278, "learning_rate": 7.978705134810237e-05, "loss": 0.1689, "step": 28000 }, { "epoch": 1.8038091864827008, "grad_norm": 0.15154476463794708, "learning_rate": 7.974411815215524e-05, "loss": 0.1801, "step": 28010 }, { "epoch": 1.8044531740537408, "grad_norm": 0.14660762250423431, "learning_rate": 7.970118495620815e-05, "loss": 0.1774, "step": 28020 }, { "epoch": 1.8050971616247806, "grad_norm": 0.13252991437911987, "learning_rate": 7.965825176026104e-05, "loss": 0.1664, "step": 28030 }, { "epoch": 1.8057411491958204, "grad_norm": 0.14684712886810303, "learning_rate": 7.961531856431393e-05, "loss": 0.174, "step": 28040 }, { "epoch": 1.8063851367668604, "grad_norm": 0.13660942018032074, "learning_rate": 7.957238536836682e-05, "loss": 0.1775, "step": 28050 }, { "epoch": 1.8070291243379004, "grad_norm": 0.1234297975897789, "learning_rate": 7.952945217241971e-05, "loss": 0.1756, "step": 28060 }, { "epoch": 1.8076731119089402, "grad_norm": 0.1310359090566635, "learning_rate": 7.948651897647261e-05, "loss": 0.1776, "step": 28070 }, { "epoch": 1.80831709947998, "grad_norm": 0.12764568626880646, "learning_rate": 7.94435857805255e-05, "loss": 0.1732, "step": 28080 }, { "epoch": 1.8089610870510198, "grad_norm": 0.12051772326231003, "learning_rate": 7.94006525845784e-05, "loss": 0.1827, "step": 28090 }, { "epoch": 1.8096050746220598, "grad_norm": 0.1425718069076538, "learning_rate": 7.935771938863129e-05, "loss": 0.1684, "step": 28100 }, { "epoch": 1.8102490621930998, "grad_norm": 0.15996232628822327, "learning_rate": 7.931478619268419e-05, "loss": 0.1765, "step": 28110 }, { "epoch": 1.8108930497641396, "grad_norm": 0.1543685793876648, "learning_rate": 7.927185299673708e-05, "loss": 0.1792, "step": 28120 }, { "epoch": 1.8115370373351793, "grad_norm": 0.1343897581100464, "learning_rate": 7.922891980078997e-05, "loss": 0.1758, "step": 28130 }, { "epoch": 1.8121810249062194, "grad_norm": 0.16217264533042908, "learning_rate": 7.918598660484286e-05, "loss": 0.1802, "step": 28140 }, { "epoch": 1.8128250124772591, "grad_norm": 0.13860909640789032, "learning_rate": 7.914305340889577e-05, "loss": 0.1712, "step": 28150 }, { "epoch": 1.8134690000482991, "grad_norm": 0.14019563794136047, "learning_rate": 7.910012021294866e-05, "loss": 0.1661, "step": 28160 }, { "epoch": 1.814112987619339, "grad_norm": 0.13584604859352112, "learning_rate": 7.905718701700155e-05, "loss": 0.1785, "step": 28170 }, { "epoch": 1.8147569751903787, "grad_norm": 0.1396104395389557, "learning_rate": 7.901425382105444e-05, "loss": 0.176, "step": 28180 }, { "epoch": 1.8154009627614187, "grad_norm": 0.1253729611635208, "learning_rate": 7.897132062510734e-05, "loss": 0.1748, "step": 28190 }, { "epoch": 1.8160449503324587, "grad_norm": 0.1394306868314743, "learning_rate": 7.892838742916023e-05, "loss": 0.1696, "step": 28200 }, { "epoch": 1.8166889379034985, "grad_norm": 0.1356905996799469, "learning_rate": 7.888545423321312e-05, "loss": 0.1733, "step": 28210 }, { "epoch": 1.8173329254745383, "grad_norm": 0.1196146309375763, "learning_rate": 7.884252103726601e-05, "loss": 0.1661, "step": 28220 }, { "epoch": 1.817976913045578, "grad_norm": 0.1389116495847702, "learning_rate": 7.87995878413189e-05, "loss": 0.1652, "step": 28230 }, { "epoch": 1.818620900616618, "grad_norm": 0.14125637710094452, "learning_rate": 7.875665464537181e-05, "loss": 0.1662, "step": 28240 }, { "epoch": 1.819264888187658, "grad_norm": 0.1470976024866104, "learning_rate": 7.87137214494247e-05, "loss": 0.1768, "step": 28250 }, { "epoch": 1.8199088757586979, "grad_norm": 0.1525764912366867, "learning_rate": 7.867078825347759e-05, "loss": 0.1678, "step": 28260 }, { "epoch": 1.8205528633297376, "grad_norm": 0.1585046947002411, "learning_rate": 7.862785505753048e-05, "loss": 0.1807, "step": 28270 }, { "epoch": 1.8211968509007777, "grad_norm": 0.11248099058866501, "learning_rate": 7.858492186158339e-05, "loss": 0.1762, "step": 28280 }, { "epoch": 1.8218408384718177, "grad_norm": 0.1527692824602127, "learning_rate": 7.854198866563628e-05, "loss": 0.1752, "step": 28290 }, { "epoch": 1.8224848260428574, "grad_norm": 0.14011701941490173, "learning_rate": 7.849905546968917e-05, "loss": 0.1605, "step": 28300 }, { "epoch": 1.8231288136138972, "grad_norm": 0.1654020994901657, "learning_rate": 7.845612227374206e-05, "loss": 0.1714, "step": 28310 }, { "epoch": 1.823772801184937, "grad_norm": 0.13452249765396118, "learning_rate": 7.841318907779496e-05, "loss": 0.1685, "step": 28320 }, { "epoch": 1.824416788755977, "grad_norm": 0.15205393731594086, "learning_rate": 7.837025588184785e-05, "loss": 0.1762, "step": 28330 }, { "epoch": 1.825060776327017, "grad_norm": 0.12857292592525482, "learning_rate": 7.832732268590074e-05, "loss": 0.1724, "step": 28340 }, { "epoch": 1.8257047638980568, "grad_norm": 0.1467728614807129, "learning_rate": 7.828438948995363e-05, "loss": 0.1738, "step": 28350 }, { "epoch": 1.8263487514690966, "grad_norm": 0.159661203622818, "learning_rate": 7.824145629400654e-05, "loss": 0.1821, "step": 28360 }, { "epoch": 1.8269927390401364, "grad_norm": 0.12726902961730957, "learning_rate": 7.819852309805943e-05, "loss": 0.1757, "step": 28370 }, { "epoch": 1.8276367266111764, "grad_norm": 0.1560751497745514, "learning_rate": 7.815558990211232e-05, "loss": 0.1717, "step": 28380 }, { "epoch": 1.8282807141822164, "grad_norm": 0.13422530889511108, "learning_rate": 7.811265670616521e-05, "loss": 0.1766, "step": 28390 }, { "epoch": 1.8289247017532562, "grad_norm": 0.14504583179950714, "learning_rate": 7.80697235102181e-05, "loss": 0.1692, "step": 28400 }, { "epoch": 1.829568689324296, "grad_norm": 0.1310250163078308, "learning_rate": 7.8026790314271e-05, "loss": 0.1762, "step": 28410 }, { "epoch": 1.830212676895336, "grad_norm": 0.13842974603176117, "learning_rate": 7.798385711832388e-05, "loss": 0.1712, "step": 28420 }, { "epoch": 1.830856664466376, "grad_norm": 0.13774877786636353, "learning_rate": 7.794092392237679e-05, "loss": 0.1719, "step": 28430 }, { "epoch": 1.8315006520374157, "grad_norm": 0.13867054879665375, "learning_rate": 7.789799072642968e-05, "loss": 0.1747, "step": 28440 }, { "epoch": 1.8321446396084555, "grad_norm": 0.14097483456134796, "learning_rate": 7.785505753048258e-05, "loss": 0.1811, "step": 28450 }, { "epoch": 1.8327886271794953, "grad_norm": 0.13555563986301422, "learning_rate": 7.781212433453546e-05, "loss": 0.1707, "step": 28460 }, { "epoch": 1.8334326147505353, "grad_norm": 0.1352749913930893, "learning_rate": 7.776919113858836e-05, "loss": 0.1734, "step": 28470 }, { "epoch": 1.8340766023215753, "grad_norm": 0.13648007810115814, "learning_rate": 7.772625794264125e-05, "loss": 0.1702, "step": 28480 }, { "epoch": 1.834720589892615, "grad_norm": 0.16580192744731903, "learning_rate": 7.768332474669416e-05, "loss": 0.1715, "step": 28490 }, { "epoch": 1.835364577463655, "grad_norm": 0.12399222701787949, "learning_rate": 7.764039155074703e-05, "loss": 0.1731, "step": 28500 }, { "epoch": 1.8360085650346947, "grad_norm": 0.12590950727462769, "learning_rate": 7.759745835479994e-05, "loss": 0.1715, "step": 28510 }, { "epoch": 1.8366525526057347, "grad_norm": 0.15075910091400146, "learning_rate": 7.755452515885283e-05, "loss": 0.181, "step": 28520 }, { "epoch": 1.8372965401767747, "grad_norm": 0.15551236271858215, "learning_rate": 7.751159196290573e-05, "loss": 0.1789, "step": 28530 }, { "epoch": 1.8379405277478145, "grad_norm": 0.12851355969905853, "learning_rate": 7.746865876695861e-05, "loss": 0.1651, "step": 28540 }, { "epoch": 1.8385845153188543, "grad_norm": 0.12690792977809906, "learning_rate": 7.742572557101151e-05, "loss": 0.1699, "step": 28550 }, { "epoch": 1.8392285028898943, "grad_norm": 0.13973206281661987, "learning_rate": 7.73827923750644e-05, "loss": 0.179, "step": 28560 }, { "epoch": 1.8398724904609343, "grad_norm": 0.12941032648086548, "learning_rate": 7.73398591791173e-05, "loss": 0.1612, "step": 28570 }, { "epoch": 1.840516478031974, "grad_norm": 0.14006255567073822, "learning_rate": 7.729692598317019e-05, "loss": 0.1757, "step": 28580 }, { "epoch": 1.8411604656030138, "grad_norm": 0.15474648773670197, "learning_rate": 7.725399278722308e-05, "loss": 0.1715, "step": 28590 }, { "epoch": 1.8418044531740536, "grad_norm": 0.1548299938440323, "learning_rate": 7.721105959127598e-05, "loss": 0.1742, "step": 28600 }, { "epoch": 1.8424484407450936, "grad_norm": 0.1489746868610382, "learning_rate": 7.716812639532887e-05, "loss": 0.1735, "step": 28610 }, { "epoch": 1.8430924283161336, "grad_norm": 0.15837064385414124, "learning_rate": 7.712519319938176e-05, "loss": 0.1684, "step": 28620 }, { "epoch": 1.8437364158871734, "grad_norm": 0.1508493721485138, "learning_rate": 7.708226000343465e-05, "loss": 0.1771, "step": 28630 }, { "epoch": 1.8443804034582132, "grad_norm": 0.14458073675632477, "learning_rate": 7.703932680748756e-05, "loss": 0.166, "step": 28640 }, { "epoch": 1.845024391029253, "grad_norm": 0.14615000784397125, "learning_rate": 7.699639361154045e-05, "loss": 0.1755, "step": 28650 }, { "epoch": 1.845668378600293, "grad_norm": 0.14338116347789764, "learning_rate": 7.695346041559334e-05, "loss": 0.1812, "step": 28660 }, { "epoch": 1.846312366171333, "grad_norm": 0.1380125880241394, "learning_rate": 7.691052721964623e-05, "loss": 0.1707, "step": 28670 }, { "epoch": 1.8469563537423728, "grad_norm": 0.1364850252866745, "learning_rate": 7.686759402369913e-05, "loss": 0.1712, "step": 28680 }, { "epoch": 1.8476003413134126, "grad_norm": 0.1531829535961151, "learning_rate": 7.682466082775202e-05, "loss": 0.174, "step": 28690 }, { "epoch": 1.8482443288844526, "grad_norm": 0.1341627687215805, "learning_rate": 7.678172763180492e-05, "loss": 0.1723, "step": 28700 }, { "epoch": 1.8488883164554926, "grad_norm": 0.13202054798603058, "learning_rate": 7.67387944358578e-05, "loss": 0.1713, "step": 28710 }, { "epoch": 1.8495323040265323, "grad_norm": 0.13626904785633087, "learning_rate": 7.669586123991071e-05, "loss": 0.1727, "step": 28720 }, { "epoch": 1.8501762915975721, "grad_norm": 0.13765351474285126, "learning_rate": 7.66529280439636e-05, "loss": 0.1744, "step": 28730 }, { "epoch": 1.850820279168612, "grad_norm": 0.1376587599515915, "learning_rate": 7.660999484801649e-05, "loss": 0.1685, "step": 28740 }, { "epoch": 1.851464266739652, "grad_norm": 0.14048916101455688, "learning_rate": 7.656706165206938e-05, "loss": 0.1781, "step": 28750 }, { "epoch": 1.852108254310692, "grad_norm": 0.1463300585746765, "learning_rate": 7.652412845612227e-05, "loss": 0.1739, "step": 28760 }, { "epoch": 1.8527522418817317, "grad_norm": 0.15120843052864075, "learning_rate": 7.648119526017518e-05, "loss": 0.1757, "step": 28770 }, { "epoch": 1.8533962294527715, "grad_norm": 0.1313261240720749, "learning_rate": 7.643826206422805e-05, "loss": 0.1678, "step": 28780 }, { "epoch": 1.8540402170238113, "grad_norm": 0.1444675475358963, "learning_rate": 7.639532886828096e-05, "loss": 0.1734, "step": 28790 }, { "epoch": 1.8546842045948513, "grad_norm": 0.13993655145168304, "learning_rate": 7.635239567233385e-05, "loss": 0.1765, "step": 28800 }, { "epoch": 1.8553281921658913, "grad_norm": 0.14516256749629974, "learning_rate": 7.630946247638675e-05, "loss": 0.1708, "step": 28810 }, { "epoch": 1.855972179736931, "grad_norm": 0.1399213969707489, "learning_rate": 7.626652928043963e-05, "loss": 0.1747, "step": 28820 }, { "epoch": 1.8566161673079709, "grad_norm": 0.1487474888563156, "learning_rate": 7.622359608449253e-05, "loss": 0.1679, "step": 28830 }, { "epoch": 1.8572601548790109, "grad_norm": 0.15147721767425537, "learning_rate": 7.618066288854543e-05, "loss": 0.1785, "step": 28840 }, { "epoch": 1.8579041424500509, "grad_norm": 0.14529506862163544, "learning_rate": 7.613772969259833e-05, "loss": 0.1811, "step": 28850 }, { "epoch": 1.8585481300210906, "grad_norm": 0.12975050508975983, "learning_rate": 7.609479649665121e-05, "loss": 0.1695, "step": 28860 }, { "epoch": 1.8591921175921304, "grad_norm": 0.11818961799144745, "learning_rate": 7.605186330070411e-05, "loss": 0.1701, "step": 28870 }, { "epoch": 1.8598361051631702, "grad_norm": 0.1561373770236969, "learning_rate": 7.6008930104757e-05, "loss": 0.1685, "step": 28880 }, { "epoch": 1.8604800927342102, "grad_norm": 0.1367853283882141, "learning_rate": 7.59659969088099e-05, "loss": 0.1745, "step": 28890 }, { "epoch": 1.8611240803052502, "grad_norm": 0.12588512897491455, "learning_rate": 7.592306371286278e-05, "loss": 0.1735, "step": 28900 }, { "epoch": 1.86176806787629, "grad_norm": 0.13119561970233917, "learning_rate": 7.588013051691569e-05, "loss": 0.171, "step": 28910 }, { "epoch": 1.8624120554473298, "grad_norm": 0.14438503980636597, "learning_rate": 7.583719732096858e-05, "loss": 0.1823, "step": 28920 }, { "epoch": 1.8630560430183698, "grad_norm": 0.12435891479253769, "learning_rate": 7.579426412502147e-05, "loss": 0.1729, "step": 28930 }, { "epoch": 1.8637000305894096, "grad_norm": 0.13388927280902863, "learning_rate": 7.575133092907436e-05, "loss": 0.1724, "step": 28940 }, { "epoch": 1.8643440181604496, "grad_norm": 0.14422845840454102, "learning_rate": 7.570839773312725e-05, "loss": 0.1769, "step": 28950 }, { "epoch": 1.8649880057314894, "grad_norm": 0.1339499056339264, "learning_rate": 7.566546453718015e-05, "loss": 0.1842, "step": 28960 }, { "epoch": 1.8656319933025292, "grad_norm": 0.14512792229652405, "learning_rate": 7.562253134123304e-05, "loss": 0.1727, "step": 28970 }, { "epoch": 1.8662759808735692, "grad_norm": 0.12894681096076965, "learning_rate": 7.557959814528594e-05, "loss": 0.1731, "step": 28980 }, { "epoch": 1.8669199684446092, "grad_norm": 0.13647006452083588, "learning_rate": 7.553666494933883e-05, "loss": 0.1751, "step": 28990 }, { "epoch": 1.867563956015649, "grad_norm": 0.1229422464966774, "learning_rate": 7.549373175339173e-05, "loss": 0.1688, "step": 29000 }, { "epoch": 1.8682079435866887, "grad_norm": 0.13504715263843536, "learning_rate": 7.545079855744462e-05, "loss": 0.1755, "step": 29010 }, { "epoch": 1.8688519311577285, "grad_norm": 0.16331826150417328, "learning_rate": 7.540786536149751e-05, "loss": 0.1783, "step": 29020 }, { "epoch": 1.8694959187287685, "grad_norm": 0.146763414144516, "learning_rate": 7.53649321655504e-05, "loss": 0.1769, "step": 29030 }, { "epoch": 1.8701399062998085, "grad_norm": 0.14227962493896484, "learning_rate": 7.53219989696033e-05, "loss": 0.1737, "step": 29040 }, { "epoch": 1.8707838938708483, "grad_norm": 0.13682831823825836, "learning_rate": 7.52790657736562e-05, "loss": 0.1743, "step": 29050 }, { "epoch": 1.871427881441888, "grad_norm": 0.14679376780986786, "learning_rate": 7.523613257770909e-05, "loss": 0.169, "step": 29060 }, { "epoch": 1.872071869012928, "grad_norm": 0.1263737976551056, "learning_rate": 7.519319938176198e-05, "loss": 0.1748, "step": 29070 }, { "epoch": 1.8727158565839679, "grad_norm": 0.13921499252319336, "learning_rate": 7.515026618581488e-05, "loss": 0.1743, "step": 29080 }, { "epoch": 1.873359844155008, "grad_norm": 0.1409720927476883, "learning_rate": 7.510733298986777e-05, "loss": 0.1704, "step": 29090 }, { "epoch": 1.8740038317260477, "grad_norm": 0.12178916484117508, "learning_rate": 7.506439979392066e-05, "loss": 0.1676, "step": 29100 }, { "epoch": 1.8746478192970875, "grad_norm": 0.1351480334997177, "learning_rate": 7.502146659797355e-05, "loss": 0.1755, "step": 29110 }, { "epoch": 1.8752918068681275, "grad_norm": 0.13907843828201294, "learning_rate": 7.497853340202645e-05, "loss": 0.1679, "step": 29120 }, { "epoch": 1.8759357944391675, "grad_norm": 0.14930258691310883, "learning_rate": 7.493560020607935e-05, "loss": 0.1789, "step": 29130 }, { "epoch": 1.8765797820102073, "grad_norm": 0.11959561705589294, "learning_rate": 7.489266701013223e-05, "loss": 0.1768, "step": 29140 }, { "epoch": 1.877223769581247, "grad_norm": 0.1484522521495819, "learning_rate": 7.484973381418513e-05, "loss": 0.1721, "step": 29150 }, { "epoch": 1.8778677571522868, "grad_norm": 0.13643935322761536, "learning_rate": 7.480680061823802e-05, "loss": 0.1746, "step": 29160 }, { "epoch": 1.8785117447233268, "grad_norm": 0.1359250396490097, "learning_rate": 7.476386742229093e-05, "loss": 0.1802, "step": 29170 }, { "epoch": 1.8791557322943668, "grad_norm": 0.13562758266925812, "learning_rate": 7.47209342263438e-05, "loss": 0.1771, "step": 29180 }, { "epoch": 1.8797997198654066, "grad_norm": 0.12311853468418121, "learning_rate": 7.467800103039671e-05, "loss": 0.176, "step": 29190 }, { "epoch": 1.8804437074364464, "grad_norm": 0.14202481508255005, "learning_rate": 7.46350678344496e-05, "loss": 0.1775, "step": 29200 }, { "epoch": 1.8810876950074864, "grad_norm": 0.16902077198028564, "learning_rate": 7.45921346385025e-05, "loss": 0.1848, "step": 29210 }, { "epoch": 1.8817316825785262, "grad_norm": 0.14423537254333496, "learning_rate": 7.454920144255538e-05, "loss": 0.17, "step": 29220 }, { "epoch": 1.8823756701495662, "grad_norm": 0.1357463002204895, "learning_rate": 7.450626824660828e-05, "loss": 0.1688, "step": 29230 }, { "epoch": 1.883019657720606, "grad_norm": 0.15791074931621552, "learning_rate": 7.446333505066117e-05, "loss": 0.1704, "step": 29240 }, { "epoch": 1.8836636452916458, "grad_norm": 0.14574724435806274, "learning_rate": 7.442040185471408e-05, "loss": 0.1746, "step": 29250 }, { "epoch": 1.8843076328626858, "grad_norm": 0.15701572597026825, "learning_rate": 7.437746865876696e-05, "loss": 0.1766, "step": 29260 }, { "epoch": 1.8849516204337258, "grad_norm": 0.14984063804149628, "learning_rate": 7.433453546281986e-05, "loss": 0.1694, "step": 29270 }, { "epoch": 1.8855956080047656, "grad_norm": 0.12888966500759125, "learning_rate": 7.429160226687275e-05, "loss": 0.1744, "step": 29280 }, { "epoch": 1.8862395955758053, "grad_norm": 0.1248808428645134, "learning_rate": 7.424866907092564e-05, "loss": 0.1709, "step": 29290 }, { "epoch": 1.8868835831468451, "grad_norm": 0.15055043995380402, "learning_rate": 7.420573587497853e-05, "loss": 0.1711, "step": 29300 }, { "epoch": 1.8875275707178851, "grad_norm": 0.1304144561290741, "learning_rate": 7.416280267903142e-05, "loss": 0.1701, "step": 29310 }, { "epoch": 1.8881715582889251, "grad_norm": 0.1402372121810913, "learning_rate": 7.411986948308433e-05, "loss": 0.1696, "step": 29320 }, { "epoch": 1.888815545859965, "grad_norm": 0.1364368200302124, "learning_rate": 7.407693628713722e-05, "loss": 0.1737, "step": 29330 }, { "epoch": 1.8894595334310047, "grad_norm": 0.15484018623828888, "learning_rate": 7.403400309119011e-05, "loss": 0.1791, "step": 29340 }, { "epoch": 1.8901035210020447, "grad_norm": 0.1401561051607132, "learning_rate": 7.3991069895243e-05, "loss": 0.1716, "step": 29350 }, { "epoch": 1.8907475085730845, "grad_norm": 0.15336009860038757, "learning_rate": 7.39481366992959e-05, "loss": 0.1812, "step": 29360 }, { "epoch": 1.8913914961441245, "grad_norm": 0.13300305604934692, "learning_rate": 7.39052035033488e-05, "loss": 0.1742, "step": 29370 }, { "epoch": 1.8920354837151643, "grad_norm": 0.1283971220254898, "learning_rate": 7.386227030740168e-05, "loss": 0.1691, "step": 29380 }, { "epoch": 1.892679471286204, "grad_norm": 0.13798485696315765, "learning_rate": 7.381933711145457e-05, "loss": 0.1733, "step": 29390 }, { "epoch": 1.893323458857244, "grad_norm": 0.16888247430324554, "learning_rate": 7.377640391550748e-05, "loss": 0.1762, "step": 29400 }, { "epoch": 1.893967446428284, "grad_norm": 0.15654702484607697, "learning_rate": 7.373347071956037e-05, "loss": 0.1711, "step": 29410 }, { "epoch": 1.8946114339993239, "grad_norm": 0.15029816329479218, "learning_rate": 7.369053752361326e-05, "loss": 0.1772, "step": 29420 }, { "epoch": 1.8952554215703636, "grad_norm": 0.1308252364397049, "learning_rate": 7.364760432766615e-05, "loss": 0.1733, "step": 29430 }, { "epoch": 1.8958994091414034, "grad_norm": 0.1546105146408081, "learning_rate": 7.360467113171906e-05, "loss": 0.1763, "step": 29440 }, { "epoch": 1.8965433967124434, "grad_norm": 0.12471922487020493, "learning_rate": 7.356173793577195e-05, "loss": 0.1697, "step": 29450 }, { "epoch": 1.8971873842834834, "grad_norm": 0.12908469140529633, "learning_rate": 7.351880473982484e-05, "loss": 0.1686, "step": 29460 }, { "epoch": 1.8978313718545232, "grad_norm": 0.1453697681427002, "learning_rate": 7.347587154387773e-05, "loss": 0.1703, "step": 29470 }, { "epoch": 1.898475359425563, "grad_norm": 0.12698465585708618, "learning_rate": 7.343293834793062e-05, "loss": 0.1695, "step": 29480 }, { "epoch": 1.899119346996603, "grad_norm": 0.1388496607542038, "learning_rate": 7.339000515198352e-05, "loss": 0.1655, "step": 29490 }, { "epoch": 1.8997633345676428, "grad_norm": 0.15315058827400208, "learning_rate": 7.33470719560364e-05, "loss": 0.1769, "step": 29500 }, { "epoch": 1.9004073221386828, "grad_norm": 0.1413145661354065, "learning_rate": 7.33041387600893e-05, "loss": 0.1728, "step": 29510 }, { "epoch": 1.9010513097097226, "grad_norm": 0.12831275165081024, "learning_rate": 7.32612055641422e-05, "loss": 0.1725, "step": 29520 }, { "epoch": 1.9016952972807624, "grad_norm": 0.12890757620334625, "learning_rate": 7.32182723681951e-05, "loss": 0.1698, "step": 29530 }, { "epoch": 1.9023392848518024, "grad_norm": 0.1831369251012802, "learning_rate": 7.317533917224798e-05, "loss": 0.1749, "step": 29540 }, { "epoch": 1.9029832724228424, "grad_norm": 0.16081324219703674, "learning_rate": 7.313240597630088e-05, "loss": 0.1784, "step": 29550 }, { "epoch": 1.9036272599938822, "grad_norm": 0.15232475101947784, "learning_rate": 7.308947278035377e-05, "loss": 0.1789, "step": 29560 }, { "epoch": 1.904271247564922, "grad_norm": 0.15394048392772675, "learning_rate": 7.304653958440667e-05, "loss": 0.1659, "step": 29570 }, { "epoch": 1.9049152351359617, "grad_norm": 0.1611095517873764, "learning_rate": 7.300360638845955e-05, "loss": 0.1698, "step": 29580 }, { "epoch": 1.9055592227070017, "grad_norm": 0.14386500418186188, "learning_rate": 7.296067319251246e-05, "loss": 0.1764, "step": 29590 }, { "epoch": 1.9062032102780417, "grad_norm": 0.12953387200832367, "learning_rate": 7.291773999656535e-05, "loss": 0.1691, "step": 29600 }, { "epoch": 1.9068471978490815, "grad_norm": 0.14364264905452728, "learning_rate": 7.287480680061825e-05, "loss": 0.1747, "step": 29610 }, { "epoch": 1.9074911854201213, "grad_norm": 0.1497490406036377, "learning_rate": 7.283187360467113e-05, "loss": 0.1739, "step": 29620 }, { "epoch": 1.9081351729911613, "grad_norm": 0.12779629230499268, "learning_rate": 7.278894040872403e-05, "loss": 0.1834, "step": 29630 }, { "epoch": 1.9087791605622013, "grad_norm": 0.1299477517604828, "learning_rate": 7.274600721277692e-05, "loss": 0.1693, "step": 29640 }, { "epoch": 1.909423148133241, "grad_norm": 0.14683941006660461, "learning_rate": 7.270307401682981e-05, "loss": 0.1764, "step": 29650 }, { "epoch": 1.9100671357042809, "grad_norm": 0.137542724609375, "learning_rate": 7.26601408208827e-05, "loss": 0.1807, "step": 29660 }, { "epoch": 1.9107111232753207, "grad_norm": 0.13402092456817627, "learning_rate": 7.26172076249356e-05, "loss": 0.1737, "step": 29670 }, { "epoch": 1.9113551108463607, "grad_norm": 0.15907268226146698, "learning_rate": 7.25742744289885e-05, "loss": 0.1763, "step": 29680 }, { "epoch": 1.9119990984174007, "grad_norm": 0.17472286522388458, "learning_rate": 7.253134123304139e-05, "loss": 0.1752, "step": 29690 }, { "epoch": 1.9126430859884405, "grad_norm": 0.14482705295085907, "learning_rate": 7.248840803709428e-05, "loss": 0.1704, "step": 29700 }, { "epoch": 1.9132870735594802, "grad_norm": 0.15022392570972443, "learning_rate": 7.244547484114717e-05, "loss": 0.1646, "step": 29710 }, { "epoch": 1.91393106113052, "grad_norm": 0.140706405043602, "learning_rate": 7.240254164520008e-05, "loss": 0.1731, "step": 29720 }, { "epoch": 1.91457504870156, "grad_norm": 0.127040296792984, "learning_rate": 7.235960844925297e-05, "loss": 0.1787, "step": 29730 }, { "epoch": 1.9152190362726, "grad_norm": 0.14416636526584625, "learning_rate": 7.231667525330586e-05, "loss": 0.1731, "step": 29740 }, { "epoch": 1.9158630238436398, "grad_norm": 0.1609325259923935, "learning_rate": 7.227374205735875e-05, "loss": 0.1772, "step": 29750 }, { "epoch": 1.9165070114146796, "grad_norm": 0.13654175400733948, "learning_rate": 7.223080886141165e-05, "loss": 0.174, "step": 29760 }, { "epoch": 1.9171509989857196, "grad_norm": 0.13322624564170837, "learning_rate": 7.218787566546454e-05, "loss": 0.1732, "step": 29770 }, { "epoch": 1.9177949865567596, "grad_norm": 0.12348611652851105, "learning_rate": 7.214494246951743e-05, "loss": 0.1774, "step": 29780 }, { "epoch": 1.9184389741277994, "grad_norm": 0.1416795551776886, "learning_rate": 7.210200927357032e-05, "loss": 0.1781, "step": 29790 }, { "epoch": 1.9190829616988392, "grad_norm": 0.12655912339687347, "learning_rate": 7.205907607762323e-05, "loss": 0.1742, "step": 29800 }, { "epoch": 1.919726949269879, "grad_norm": 0.13115528225898743, "learning_rate": 7.201614288167612e-05, "loss": 0.1686, "step": 29810 }, { "epoch": 1.920370936840919, "grad_norm": 0.13831448554992676, "learning_rate": 7.197320968572901e-05, "loss": 0.1755, "step": 29820 }, { "epoch": 1.921014924411959, "grad_norm": 0.1401808112859726, "learning_rate": 7.19302764897819e-05, "loss": 0.1746, "step": 29830 }, { "epoch": 1.9216589119829988, "grad_norm": 0.14752234518527985, "learning_rate": 7.188734329383479e-05, "loss": 0.171, "step": 29840 }, { "epoch": 1.9223028995540385, "grad_norm": 0.13997787237167358, "learning_rate": 7.18444100978877e-05, "loss": 0.1699, "step": 29850 }, { "epoch": 1.9229468871250783, "grad_norm": 0.14786504209041595, "learning_rate": 7.180147690194059e-05, "loss": 0.1642, "step": 29860 }, { "epoch": 1.9235908746961183, "grad_norm": 0.13686510920524597, "learning_rate": 7.175854370599348e-05, "loss": 0.1754, "step": 29870 }, { "epoch": 1.9242348622671583, "grad_norm": 0.14148850739002228, "learning_rate": 7.171561051004637e-05, "loss": 0.1684, "step": 29880 }, { "epoch": 1.9248788498381981, "grad_norm": 0.1329798698425293, "learning_rate": 7.167267731409927e-05, "loss": 0.173, "step": 29890 }, { "epoch": 1.925522837409238, "grad_norm": 0.11547597497701645, "learning_rate": 7.162974411815215e-05, "loss": 0.1801, "step": 29900 }, { "epoch": 1.926166824980278, "grad_norm": 0.1380377858877182, "learning_rate": 7.158681092220505e-05, "loss": 0.1773, "step": 29910 }, { "epoch": 1.926810812551318, "grad_norm": 0.16275130212306976, "learning_rate": 7.154387772625794e-05, "loss": 0.1737, "step": 29920 }, { "epoch": 1.9274548001223577, "grad_norm": 0.1442023366689682, "learning_rate": 7.150094453031085e-05, "loss": 0.165, "step": 29930 }, { "epoch": 1.9280987876933975, "grad_norm": 0.1364905834197998, "learning_rate": 7.145801133436372e-05, "loss": 0.1745, "step": 29940 }, { "epoch": 1.9287427752644373, "grad_norm": 0.16091622412204742, "learning_rate": 7.141507813841663e-05, "loss": 0.1716, "step": 29950 }, { "epoch": 1.9293867628354773, "grad_norm": 0.15264220535755157, "learning_rate": 7.137214494246952e-05, "loss": 0.1718, "step": 29960 }, { "epoch": 1.9300307504065173, "grad_norm": 0.17298820614814758, "learning_rate": 7.132921174652242e-05, "loss": 0.177, "step": 29970 }, { "epoch": 1.930674737977557, "grad_norm": 0.14006014168262482, "learning_rate": 7.12862785505753e-05, "loss": 0.1728, "step": 29980 }, { "epoch": 1.9313187255485968, "grad_norm": 0.1402505338191986, "learning_rate": 7.12433453546282e-05, "loss": 0.1724, "step": 29990 }, { "epoch": 1.9319627131196366, "grad_norm": 0.1292792111635208, "learning_rate": 7.12004121586811e-05, "loss": 0.1706, "step": 30000 }, { "epoch": 1.9326067006906766, "grad_norm": 0.14788146317005157, "learning_rate": 7.115747896273399e-05, "loss": 0.1727, "step": 30010 }, { "epoch": 1.9332506882617166, "grad_norm": 0.1286647468805313, "learning_rate": 7.111454576678688e-05, "loss": 0.1763, "step": 30020 }, { "epoch": 1.9338946758327564, "grad_norm": 0.1484619677066803, "learning_rate": 7.107161257083977e-05, "loss": 0.1715, "step": 30030 }, { "epoch": 1.9345386634037962, "grad_norm": 0.1254682093858719, "learning_rate": 7.102867937489267e-05, "loss": 0.1693, "step": 30040 }, { "epoch": 1.9351826509748362, "grad_norm": 0.13584944605827332, "learning_rate": 7.098574617894556e-05, "loss": 0.1838, "step": 30050 }, { "epoch": 1.9358266385458762, "grad_norm": 0.13007330894470215, "learning_rate": 7.094281298299847e-05, "loss": 0.1752, "step": 30060 }, { "epoch": 1.936470626116916, "grad_norm": 0.13558557629585266, "learning_rate": 7.089987978705134e-05, "loss": 0.1722, "step": 30070 }, { "epoch": 1.9371146136879558, "grad_norm": 0.14261965453624725, "learning_rate": 7.085694659110425e-05, "loss": 0.1705, "step": 30080 }, { "epoch": 1.9377586012589956, "grad_norm": 0.14811775088310242, "learning_rate": 7.081401339515714e-05, "loss": 0.1765, "step": 30090 }, { "epoch": 1.9384025888300356, "grad_norm": 0.14318080246448517, "learning_rate": 7.077108019921004e-05, "loss": 0.1728, "step": 30100 }, { "epoch": 1.9390465764010756, "grad_norm": 0.15579307079315186, "learning_rate": 7.072814700326292e-05, "loss": 0.1789, "step": 30110 }, { "epoch": 1.9396905639721154, "grad_norm": 0.14616049826145172, "learning_rate": 7.068521380731582e-05, "loss": 0.1703, "step": 30120 }, { "epoch": 1.9403345515431552, "grad_norm": 0.13366341590881348, "learning_rate": 7.064228061136871e-05, "loss": 0.1729, "step": 30130 }, { "epoch": 1.940978539114195, "grad_norm": 0.13240034878253937, "learning_rate": 7.059934741542162e-05, "loss": 0.1692, "step": 30140 }, { "epoch": 1.941622526685235, "grad_norm": 0.12488800287246704, "learning_rate": 7.05564142194745e-05, "loss": 0.1663, "step": 30150 }, { "epoch": 1.942266514256275, "grad_norm": 0.15447363257408142, "learning_rate": 7.05134810235274e-05, "loss": 0.1712, "step": 30160 }, { "epoch": 1.9429105018273147, "grad_norm": 0.13131467998027802, "learning_rate": 7.047054782758029e-05, "loss": 0.1718, "step": 30170 }, { "epoch": 1.9435544893983545, "grad_norm": 0.13682492077350616, "learning_rate": 7.042761463163318e-05, "loss": 0.1689, "step": 30180 }, { "epoch": 1.9441984769693945, "grad_norm": 0.12813308835029602, "learning_rate": 7.038468143568607e-05, "loss": 0.17, "step": 30190 }, { "epoch": 1.9448424645404345, "grad_norm": 0.12922728061676025, "learning_rate": 7.034174823973896e-05, "loss": 0.1666, "step": 30200 }, { "epoch": 1.9454864521114743, "grad_norm": 0.12431295961141586, "learning_rate": 7.029881504379187e-05, "loss": 0.1664, "step": 30210 }, { "epoch": 1.946130439682514, "grad_norm": 0.15613502264022827, "learning_rate": 7.025588184784476e-05, "loss": 0.1684, "step": 30220 }, { "epoch": 1.9467744272535539, "grad_norm": 0.1495448648929596, "learning_rate": 7.021294865189765e-05, "loss": 0.1738, "step": 30230 }, { "epoch": 1.9474184148245939, "grad_norm": 0.14588463306427002, "learning_rate": 7.017001545595054e-05, "loss": 0.1791, "step": 30240 }, { "epoch": 1.9480624023956339, "grad_norm": 0.1421249955892563, "learning_rate": 7.012708226000344e-05, "loss": 0.1723, "step": 30250 }, { "epoch": 1.9487063899666737, "grad_norm": 0.14319872856140137, "learning_rate": 7.008414906405633e-05, "loss": 0.1719, "step": 30260 }, { "epoch": 1.9493503775377135, "grad_norm": 0.15905028581619263, "learning_rate": 7.004121586810922e-05, "loss": 0.1741, "step": 30270 }, { "epoch": 1.9499943651087532, "grad_norm": 0.13525596261024475, "learning_rate": 6.999828267216212e-05, "loss": 0.1708, "step": 30280 }, { "epoch": 1.9506383526797932, "grad_norm": 0.1429899036884308, "learning_rate": 6.995534947621502e-05, "loss": 0.1747, "step": 30290 }, { "epoch": 1.9512823402508332, "grad_norm": 0.14036262035369873, "learning_rate": 6.991241628026791e-05, "loss": 0.1755, "step": 30300 }, { "epoch": 1.951926327821873, "grad_norm": 0.11514687538146973, "learning_rate": 6.98694830843208e-05, "loss": 0.1703, "step": 30310 }, { "epoch": 1.9525703153929128, "grad_norm": 0.14569401741027832, "learning_rate": 6.982654988837369e-05, "loss": 0.1819, "step": 30320 }, { "epoch": 1.9532143029639528, "grad_norm": 0.13825321197509766, "learning_rate": 6.97836166924266e-05, "loss": 0.1764, "step": 30330 }, { "epoch": 1.9538582905349928, "grad_norm": 0.12494892627000809, "learning_rate": 6.974068349647949e-05, "loss": 0.1713, "step": 30340 }, { "epoch": 1.9545022781060326, "grad_norm": 0.12430020421743393, "learning_rate": 6.969775030053238e-05, "loss": 0.1709, "step": 30350 }, { "epoch": 1.9551462656770724, "grad_norm": 0.14766472578048706, "learning_rate": 6.965481710458527e-05, "loss": 0.1751, "step": 30360 }, { "epoch": 1.9557902532481122, "grad_norm": 0.15387292206287384, "learning_rate": 6.961188390863816e-05, "loss": 0.1709, "step": 30370 }, { "epoch": 1.9564342408191522, "grad_norm": 0.1463126540184021, "learning_rate": 6.956895071269106e-05, "loss": 0.1748, "step": 30380 }, { "epoch": 1.9570782283901922, "grad_norm": 0.14343871176242828, "learning_rate": 6.952601751674394e-05, "loss": 0.1691, "step": 30390 }, { "epoch": 1.957722215961232, "grad_norm": 0.14688515663146973, "learning_rate": 6.948308432079684e-05, "loss": 0.1728, "step": 30400 }, { "epoch": 1.9583662035322718, "grad_norm": 0.14019353687763214, "learning_rate": 6.944015112484973e-05, "loss": 0.1714, "step": 30410 }, { "epoch": 1.9590101911033118, "grad_norm": 0.1600102186203003, "learning_rate": 6.939721792890264e-05, "loss": 0.1702, "step": 30420 }, { "epoch": 1.9596541786743515, "grad_norm": 0.1473645120859146, "learning_rate": 6.935428473295552e-05, "loss": 0.1739, "step": 30430 }, { "epoch": 1.9602981662453915, "grad_norm": 0.15233278274536133, "learning_rate": 6.931135153700842e-05, "loss": 0.1689, "step": 30440 }, { "epoch": 1.9609421538164313, "grad_norm": 0.138855442404747, "learning_rate": 6.926841834106131e-05, "loss": 0.1636, "step": 30450 }, { "epoch": 1.9615861413874711, "grad_norm": 0.14371539652347565, "learning_rate": 6.922548514511422e-05, "loss": 0.1734, "step": 30460 }, { "epoch": 1.9622301289585111, "grad_norm": 0.1403135359287262, "learning_rate": 6.918255194916709e-05, "loss": 0.173, "step": 30470 }, { "epoch": 1.9628741165295511, "grad_norm": 0.1426779180765152, "learning_rate": 6.913961875322e-05, "loss": 0.1712, "step": 30480 }, { "epoch": 1.963518104100591, "grad_norm": 0.13455066084861755, "learning_rate": 6.909668555727289e-05, "loss": 0.1737, "step": 30490 }, { "epoch": 1.9641620916716307, "grad_norm": 0.12525193393230438, "learning_rate": 6.905375236132579e-05, "loss": 0.164, "step": 30500 }, { "epoch": 1.9648060792426705, "grad_norm": 0.13797560334205627, "learning_rate": 6.901081916537867e-05, "loss": 0.168, "step": 30510 }, { "epoch": 1.9654500668137105, "grad_norm": 0.13817663490772247, "learning_rate": 6.896788596943157e-05, "loss": 0.1708, "step": 30520 }, { "epoch": 1.9660940543847505, "grad_norm": 0.13920055329799652, "learning_rate": 6.892495277348446e-05, "loss": 0.1743, "step": 30530 }, { "epoch": 1.9667380419557903, "grad_norm": 0.1323019415140152, "learning_rate": 6.888201957753735e-05, "loss": 0.1747, "step": 30540 }, { "epoch": 1.96738202952683, "grad_norm": 0.1564551740884781, "learning_rate": 6.883908638159024e-05, "loss": 0.1765, "step": 30550 }, { "epoch": 1.96802601709787, "grad_norm": 0.14048965275287628, "learning_rate": 6.879615318564314e-05, "loss": 0.1707, "step": 30560 }, { "epoch": 1.9686700046689098, "grad_norm": 0.1322987675666809, "learning_rate": 6.875321998969604e-05, "loss": 0.1789, "step": 30570 }, { "epoch": 1.9693139922399499, "grad_norm": 0.13588497042655945, "learning_rate": 6.871028679374893e-05, "loss": 0.1693, "step": 30580 }, { "epoch": 1.9699579798109896, "grad_norm": 0.1316051483154297, "learning_rate": 6.866735359780182e-05, "loss": 0.1631, "step": 30590 }, { "epoch": 1.9706019673820294, "grad_norm": 0.16969089210033417, "learning_rate": 6.862442040185471e-05, "loss": 0.1766, "step": 30600 }, { "epoch": 1.9712459549530694, "grad_norm": 0.15734347701072693, "learning_rate": 6.858148720590762e-05, "loss": 0.1682, "step": 30610 }, { "epoch": 1.9718899425241094, "grad_norm": 0.1468954235315323, "learning_rate": 6.85385540099605e-05, "loss": 0.1749, "step": 30620 }, { "epoch": 1.9725339300951492, "grad_norm": 0.12978820502758026, "learning_rate": 6.84956208140134e-05, "loss": 0.1676, "step": 30630 }, { "epoch": 1.973177917666189, "grad_norm": 0.11942397803068161, "learning_rate": 6.845268761806629e-05, "loss": 0.1638, "step": 30640 }, { "epoch": 1.9738219052372288, "grad_norm": 0.14833444356918335, "learning_rate": 6.840975442211919e-05, "loss": 0.17, "step": 30650 }, { "epoch": 1.9744658928082688, "grad_norm": 0.1306144744157791, "learning_rate": 6.836682122617208e-05, "loss": 0.1742, "step": 30660 }, { "epoch": 1.9751098803793088, "grad_norm": 0.1389499008655548, "learning_rate": 6.832388803022497e-05, "loss": 0.1734, "step": 30670 }, { "epoch": 1.9757538679503486, "grad_norm": 0.14087645709514618, "learning_rate": 6.828095483427786e-05, "loss": 0.1751, "step": 30680 }, { "epoch": 1.9763978555213884, "grad_norm": 0.1508990228176117, "learning_rate": 6.823802163833077e-05, "loss": 0.1718, "step": 30690 }, { "epoch": 1.9770418430924284, "grad_norm": 0.16892698407173157, "learning_rate": 6.819508844238366e-05, "loss": 0.1815, "step": 30700 }, { "epoch": 1.9776858306634681, "grad_norm": 0.1466483622789383, "learning_rate": 6.815215524643655e-05, "loss": 0.1692, "step": 30710 }, { "epoch": 1.9783298182345082, "grad_norm": 0.15545962750911713, "learning_rate": 6.810922205048944e-05, "loss": 0.1754, "step": 30720 }, { "epoch": 1.978973805805548, "grad_norm": 0.1557532548904419, "learning_rate": 6.806628885454233e-05, "loss": 0.1725, "step": 30730 }, { "epoch": 1.9796177933765877, "grad_norm": 0.13781599700450897, "learning_rate": 6.802335565859524e-05, "loss": 0.1717, "step": 30740 }, { "epoch": 1.9802617809476277, "grad_norm": 0.13841724395751953, "learning_rate": 6.798042246264811e-05, "loss": 0.1722, "step": 30750 }, { "epoch": 1.9809057685186677, "grad_norm": 0.12537936866283417, "learning_rate": 6.793748926670102e-05, "loss": 0.1702, "step": 30760 }, { "epoch": 1.9815497560897075, "grad_norm": 0.13715116679668427, "learning_rate": 6.789455607075391e-05, "loss": 0.179, "step": 30770 }, { "epoch": 1.9821937436607473, "grad_norm": 0.1588275134563446, "learning_rate": 6.785162287480681e-05, "loss": 0.1777, "step": 30780 }, { "epoch": 1.982837731231787, "grad_norm": 0.14208568632602692, "learning_rate": 6.780868967885969e-05, "loss": 0.1698, "step": 30790 }, { "epoch": 1.983481718802827, "grad_norm": 0.1705465167760849, "learning_rate": 6.776575648291259e-05, "loss": 0.1739, "step": 30800 }, { "epoch": 1.984125706373867, "grad_norm": 0.13628390431404114, "learning_rate": 6.772282328696548e-05, "loss": 0.1727, "step": 30810 }, { "epoch": 1.9847696939449069, "grad_norm": 0.16478578746318817, "learning_rate": 6.767989009101839e-05, "loss": 0.1736, "step": 30820 }, { "epoch": 1.9854136815159467, "grad_norm": 0.15762051939964294, "learning_rate": 6.763695689507126e-05, "loss": 0.1776, "step": 30830 }, { "epoch": 1.9860576690869867, "grad_norm": 0.12776000797748566, "learning_rate": 6.759402369912417e-05, "loss": 0.1732, "step": 30840 }, { "epoch": 1.9867016566580264, "grad_norm": 0.16747087240219116, "learning_rate": 6.755109050317706e-05, "loss": 0.1778, "step": 30850 }, { "epoch": 1.9873456442290665, "grad_norm": 0.13072934746742249, "learning_rate": 6.750815730722996e-05, "loss": 0.167, "step": 30860 }, { "epoch": 1.9879896318001062, "grad_norm": 0.12367163598537445, "learning_rate": 6.746522411128284e-05, "loss": 0.1712, "step": 30870 }, { "epoch": 1.988633619371146, "grad_norm": 0.12200035154819489, "learning_rate": 6.742229091533575e-05, "loss": 0.1688, "step": 30880 }, { "epoch": 1.989277606942186, "grad_norm": 0.1460222750902176, "learning_rate": 6.737935771938864e-05, "loss": 0.173, "step": 30890 }, { "epoch": 1.989921594513226, "grad_norm": 0.13721442222595215, "learning_rate": 6.733642452344153e-05, "loss": 0.1708, "step": 30900 }, { "epoch": 1.9905655820842658, "grad_norm": 0.13630971312522888, "learning_rate": 6.729349132749442e-05, "loss": 0.1699, "step": 30910 }, { "epoch": 1.9912095696553056, "grad_norm": 0.13508020341396332, "learning_rate": 6.725055813154731e-05, "loss": 0.1713, "step": 30920 }, { "epoch": 1.9918535572263454, "grad_norm": 0.13882164657115936, "learning_rate": 6.720762493560021e-05, "loss": 0.1721, "step": 30930 }, { "epoch": 1.9924975447973854, "grad_norm": 0.13790647685527802, "learning_rate": 6.71646917396531e-05, "loss": 0.177, "step": 30940 }, { "epoch": 1.9931415323684254, "grad_norm": 0.15190787613391876, "learning_rate": 6.7121758543706e-05, "loss": 0.1804, "step": 30950 }, { "epoch": 1.9937855199394652, "grad_norm": 0.14287252724170685, "learning_rate": 6.707882534775888e-05, "loss": 0.1727, "step": 30960 }, { "epoch": 1.994429507510505, "grad_norm": 0.13138175010681152, "learning_rate": 6.703589215181179e-05, "loss": 0.165, "step": 30970 }, { "epoch": 1.995073495081545, "grad_norm": 0.14345134794712067, "learning_rate": 6.699295895586468e-05, "loss": 0.1715, "step": 30980 }, { "epoch": 1.9957174826525848, "grad_norm": 0.13775770366191864, "learning_rate": 6.695002575991757e-05, "loss": 0.1742, "step": 30990 }, { "epoch": 1.9963614702236248, "grad_norm": 0.13411925733089447, "learning_rate": 6.690709256397046e-05, "loss": 0.1766, "step": 31000 } ], "logging_steps": 10, "max_steps": 46584, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.748236898162934e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }