{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 143758, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.993043865384883e-05, "loss": 3.1775, "step": 200 }, { "epoch": 0.01, "learning_rate": 4.986087730769766e-05, "loss": 2.9882, "step": 400 }, { "epoch": 0.01, "learning_rate": 4.979131596154649e-05, "loss": 2.932, "step": 600 }, { "epoch": 0.01, "learning_rate": 4.9721754615395325e-05, "loss": 2.8774, "step": 800 }, { "epoch": 0.01, "learning_rate": 4.965219326924415e-05, "loss": 2.9227, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.9582631923092976e-05, "loss": 2.9011, "step": 1200 }, { "epoch": 0.02, "learning_rate": 4.9513070576941804e-05, "loss": 2.8233, "step": 1400 }, { "epoch": 0.02, "learning_rate": 4.944350923079063e-05, "loss": 2.859, "step": 1600 }, { "epoch": 0.03, "learning_rate": 4.937394788463946e-05, "loss": 2.8269, "step": 1800 }, { "epoch": 0.03, "learning_rate": 4.93043865384883e-05, "loss": 2.82, "step": 2000 }, { "epoch": 0.03, "learning_rate": 4.9234825192337127e-05, "loss": 2.7556, "step": 2200 }, { "epoch": 0.03, "learning_rate": 4.9165263846185955e-05, "loss": 2.7388, "step": 2400 }, { "epoch": 0.04, "learning_rate": 4.9095702500034784e-05, "loss": 2.8441, "step": 2600 }, { "epoch": 0.04, "learning_rate": 4.902614115388361e-05, "loss": 2.7828, "step": 2800 }, { "epoch": 0.04, "learning_rate": 4.895657980773244e-05, "loss": 2.7528, "step": 3000 }, { "epoch": 0.04, "learning_rate": 4.888701846158127e-05, "loss": 2.7744, "step": 3200 }, { "epoch": 0.05, "learning_rate": 4.88174571154301e-05, "loss": 2.7752, "step": 3400 }, { "epoch": 0.05, "learning_rate": 4.874789576927893e-05, "loss": 2.7288, "step": 3600 }, { "epoch": 0.05, "learning_rate": 4.867833442312776e-05, "loss": 2.7043, "step": 3800 }, { "epoch": 0.06, "learning_rate": 4.8608773076976586e-05, "loss": 2.7296, "step": 4000 }, { "epoch": 0.06, "learning_rate": 4.8539211730825415e-05, "loss": 2.6831, "step": 4200 }, { "epoch": 0.06, "learning_rate": 4.846965038467425e-05, "loss": 2.7519, "step": 4400 }, { "epoch": 0.06, "learning_rate": 4.840008903852308e-05, "loss": 2.6923, "step": 4600 }, { "epoch": 0.07, "learning_rate": 4.833052769237191e-05, "loss": 2.6681, "step": 4800 }, { "epoch": 0.07, "learning_rate": 4.826096634622073e-05, "loss": 2.6918, "step": 5000 }, { "epoch": 0.07, "learning_rate": 4.819140500006956e-05, "loss": 2.6827, "step": 5200 }, { "epoch": 0.08, "learning_rate": 4.812184365391839e-05, "loss": 2.6477, "step": 5400 }, { "epoch": 0.08, "learning_rate": 4.8052282307767224e-05, "loss": 2.6638, "step": 5600 }, { "epoch": 0.08, "learning_rate": 4.798272096161605e-05, "loss": 2.7062, "step": 5800 }, { "epoch": 0.08, "learning_rate": 4.791315961546488e-05, "loss": 2.6954, "step": 6000 }, { "epoch": 0.09, "learning_rate": 4.784359826931371e-05, "loss": 2.7133, "step": 6200 }, { "epoch": 0.09, "learning_rate": 4.777403692316254e-05, "loss": 2.693, "step": 6400 }, { "epoch": 0.09, "learning_rate": 4.770447557701137e-05, "loss": 2.632, "step": 6600 }, { "epoch": 0.09, "learning_rate": 4.76349142308602e-05, "loss": 2.6715, "step": 6800 }, { "epoch": 0.1, "learning_rate": 4.7565352884709026e-05, "loss": 2.6947, "step": 7000 }, { "epoch": 0.1, "learning_rate": 4.7495791538557855e-05, "loss": 2.6396, "step": 7200 }, { "epoch": 0.1, "learning_rate": 4.7426230192406684e-05, "loss": 2.6506, "step": 7400 }, { "epoch": 0.11, "learning_rate": 4.735666884625551e-05, "loss": 2.6204, "step": 7600 }, { "epoch": 0.11, "learning_rate": 4.728710750010434e-05, "loss": 2.6298, "step": 7800 }, { "epoch": 0.11, "learning_rate": 4.721754615395318e-05, "loss": 2.5967, "step": 8000 }, { "epoch": 0.11, "learning_rate": 4.7147984807802006e-05, "loss": 2.6396, "step": 8200 }, { "epoch": 0.12, "learning_rate": 4.7078423461650835e-05, "loss": 2.601, "step": 8400 }, { "epoch": 0.12, "learning_rate": 4.7008862115499664e-05, "loss": 2.6289, "step": 8600 }, { "epoch": 0.12, "learning_rate": 4.693930076934849e-05, "loss": 2.6143, "step": 8800 }, { "epoch": 0.13, "learning_rate": 4.6869739423197315e-05, "loss": 2.6672, "step": 9000 }, { "epoch": 0.13, "learning_rate": 4.680017807704615e-05, "loss": 2.6145, "step": 9200 }, { "epoch": 0.13, "learning_rate": 4.673061673089498e-05, "loss": 2.626, "step": 9400 }, { "epoch": 0.13, "learning_rate": 4.666105538474381e-05, "loss": 2.6622, "step": 9600 }, { "epoch": 0.14, "learning_rate": 4.659149403859264e-05, "loss": 2.578, "step": 9800 }, { "epoch": 0.14, "learning_rate": 4.6521932692441466e-05, "loss": 2.6476, "step": 10000 }, { "epoch": 0.14, "learning_rate": 4.6452371346290294e-05, "loss": 2.6051, "step": 10200 }, { "epoch": 0.14, "learning_rate": 4.638281000013912e-05, "loss": 2.5568, "step": 10400 }, { "epoch": 0.15, "learning_rate": 4.631324865398796e-05, "loss": 2.5498, "step": 10600 }, { "epoch": 0.15, "learning_rate": 4.624368730783679e-05, "loss": 2.5283, "step": 10800 }, { "epoch": 0.15, "learning_rate": 4.617412596168561e-05, "loss": 2.615, "step": 11000 }, { "epoch": 0.16, "learning_rate": 4.610456461553444e-05, "loss": 2.6121, "step": 11200 }, { "epoch": 0.16, "learning_rate": 4.603500326938327e-05, "loss": 2.535, "step": 11400 }, { "epoch": 0.16, "learning_rate": 4.59654419232321e-05, "loss": 2.584, "step": 11600 }, { "epoch": 0.16, "learning_rate": 4.589588057708093e-05, "loss": 2.5619, "step": 11800 }, { "epoch": 0.17, "learning_rate": 4.582631923092976e-05, "loss": 2.581, "step": 12000 }, { "epoch": 0.17, "learning_rate": 4.575675788477859e-05, "loss": 2.5513, "step": 12200 }, { "epoch": 0.17, "learning_rate": 4.568719653862742e-05, "loss": 2.5844, "step": 12400 }, { "epoch": 0.18, "learning_rate": 4.561763519247625e-05, "loss": 2.6033, "step": 12600 }, { "epoch": 0.18, "learning_rate": 4.5548073846325076e-05, "loss": 2.4988, "step": 12800 }, { "epoch": 0.18, "learning_rate": 4.5478512500173905e-05, "loss": 2.5467, "step": 13000 }, { "epoch": 0.18, "learning_rate": 4.5408951154022734e-05, "loss": 2.5825, "step": 13200 }, { "epoch": 0.19, "learning_rate": 4.533938980787156e-05, "loss": 2.5732, "step": 13400 }, { "epoch": 0.19, "learning_rate": 4.526982846172039e-05, "loss": 2.572, "step": 13600 }, { "epoch": 0.19, "learning_rate": 4.520026711556922e-05, "loss": 2.4804, "step": 13800 }, { "epoch": 0.19, "learning_rate": 4.513070576941805e-05, "loss": 2.5576, "step": 14000 }, { "epoch": 0.2, "learning_rate": 4.5061144423266885e-05, "loss": 2.5474, "step": 14200 }, { "epoch": 0.2, "learning_rate": 4.4991583077115714e-05, "loss": 2.5262, "step": 14400 }, { "epoch": 0.2, "learning_rate": 4.492202173096454e-05, "loss": 2.5679, "step": 14600 }, { "epoch": 0.21, "learning_rate": 4.485246038481337e-05, "loss": 2.5541, "step": 14800 }, { "epoch": 0.21, "learning_rate": 4.4782899038662194e-05, "loss": 2.4937, "step": 15000 }, { "epoch": 0.21, "learning_rate": 4.471333769251102e-05, "loss": 2.5643, "step": 15200 }, { "epoch": 0.21, "learning_rate": 4.464377634635986e-05, "loss": 2.5978, "step": 15400 }, { "epoch": 0.22, "learning_rate": 4.457421500020869e-05, "loss": 2.4727, "step": 15600 }, { "epoch": 0.22, "learning_rate": 4.4504653654057516e-05, "loss": 2.613, "step": 15800 }, { "epoch": 0.22, "learning_rate": 4.4435092307906345e-05, "loss": 2.5453, "step": 16000 }, { "epoch": 0.23, "learning_rate": 4.4365530961755174e-05, "loss": 2.5362, "step": 16200 }, { "epoch": 0.23, "learning_rate": 4.4295969615604e-05, "loss": 2.5296, "step": 16400 }, { "epoch": 0.23, "learning_rate": 4.422640826945283e-05, "loss": 2.5282, "step": 16600 }, { "epoch": 0.23, "learning_rate": 4.415684692330167e-05, "loss": 2.4973, "step": 16800 }, { "epoch": 0.24, "learning_rate": 4.408728557715049e-05, "loss": 2.5563, "step": 17000 }, { "epoch": 0.24, "learning_rate": 4.401772423099932e-05, "loss": 2.5514, "step": 17200 }, { "epoch": 0.24, "learning_rate": 4.394816288484815e-05, "loss": 2.5327, "step": 17400 }, { "epoch": 0.24, "learning_rate": 4.3878601538696976e-05, "loss": 2.5251, "step": 17600 }, { "epoch": 0.25, "learning_rate": 4.380904019254581e-05, "loss": 2.5695, "step": 17800 }, { "epoch": 0.25, "learning_rate": 4.373947884639464e-05, "loss": 2.4936, "step": 18000 }, { "epoch": 0.25, "learning_rate": 4.366991750024347e-05, "loss": 2.516, "step": 18200 }, { "epoch": 0.26, "learning_rate": 4.36003561540923e-05, "loss": 2.5568, "step": 18400 }, { "epoch": 0.26, "learning_rate": 4.353079480794113e-05, "loss": 2.5253, "step": 18600 }, { "epoch": 0.26, "learning_rate": 4.346123346178995e-05, "loss": 2.4991, "step": 18800 }, { "epoch": 0.26, "learning_rate": 4.3391672115638785e-05, "loss": 2.5029, "step": 19000 }, { "epoch": 0.27, "learning_rate": 4.332211076948761e-05, "loss": 2.5178, "step": 19200 }, { "epoch": 0.27, "learning_rate": 4.325254942333644e-05, "loss": 2.4964, "step": 19400 }, { "epoch": 0.27, "learning_rate": 4.318298807718527e-05, "loss": 2.4339, "step": 19600 }, { "epoch": 0.28, "learning_rate": 4.31134267310341e-05, "loss": 2.4874, "step": 19800 }, { "epoch": 0.28, "learning_rate": 4.304386538488293e-05, "loss": 2.4894, "step": 20000 }, { "epoch": 0.28, "learning_rate": 4.297430403873176e-05, "loss": 2.5352, "step": 20200 }, { "epoch": 0.28, "learning_rate": 4.290474269258059e-05, "loss": 2.5251, "step": 20400 }, { "epoch": 0.29, "learning_rate": 4.283518134642942e-05, "loss": 2.4545, "step": 20600 }, { "epoch": 0.29, "learning_rate": 4.2765620000278244e-05, "loss": 2.4831, "step": 20800 }, { "epoch": 0.29, "learning_rate": 4.269605865412707e-05, "loss": 2.5145, "step": 21000 }, { "epoch": 0.29, "learning_rate": 4.26264973079759e-05, "loss": 2.4543, "step": 21200 }, { "epoch": 0.3, "learning_rate": 4.255693596182474e-05, "loss": 2.5056, "step": 21400 }, { "epoch": 0.3, "learning_rate": 4.2487374615673566e-05, "loss": 2.4933, "step": 21600 }, { "epoch": 0.3, "learning_rate": 4.2417813269522395e-05, "loss": 2.4786, "step": 21800 }, { "epoch": 0.31, "learning_rate": 4.2348251923371224e-05, "loss": 2.3971, "step": 22000 }, { "epoch": 0.31, "learning_rate": 4.227869057722005e-05, "loss": 2.4759, "step": 22200 }, { "epoch": 0.31, "learning_rate": 4.220912923106888e-05, "loss": 2.4637, "step": 22400 }, { "epoch": 0.31, "learning_rate": 4.213956788491771e-05, "loss": 2.5033, "step": 22600 }, { "epoch": 0.32, "learning_rate": 4.207000653876654e-05, "loss": 2.5021, "step": 22800 }, { "epoch": 0.32, "learning_rate": 4.200044519261537e-05, "loss": 2.4136, "step": 23000 }, { "epoch": 0.32, "learning_rate": 4.19308838464642e-05, "loss": 2.5604, "step": 23200 }, { "epoch": 0.33, "learning_rate": 4.1861322500313026e-05, "loss": 2.5054, "step": 23400 }, { "epoch": 0.33, "learning_rate": 4.1791761154161855e-05, "loss": 2.4761, "step": 23600 }, { "epoch": 0.33, "learning_rate": 4.1722199808010684e-05, "loss": 2.4802, "step": 23800 }, { "epoch": 0.33, "learning_rate": 4.165263846185952e-05, "loss": 2.5241, "step": 24000 }, { "epoch": 0.34, "learning_rate": 4.158307711570835e-05, "loss": 2.4519, "step": 24200 }, { "epoch": 0.34, "learning_rate": 4.151351576955718e-05, "loss": 2.487, "step": 24400 }, { "epoch": 0.34, "learning_rate": 4.1443954423406006e-05, "loss": 2.4719, "step": 24600 }, { "epoch": 0.35, "learning_rate": 4.137439307725483e-05, "loss": 2.5132, "step": 24800 }, { "epoch": 0.35, "learning_rate": 4.130483173110366e-05, "loss": 2.4432, "step": 25000 }, { "epoch": 0.35, "learning_rate": 4.123527038495249e-05, "loss": 2.448, "step": 25200 }, { "epoch": 0.35, "learning_rate": 4.116570903880132e-05, "loss": 2.4659, "step": 25400 }, { "epoch": 0.36, "learning_rate": 4.109614769265015e-05, "loss": 2.4596, "step": 25600 }, { "epoch": 0.36, "learning_rate": 4.102658634649898e-05, "loss": 2.473, "step": 25800 }, { "epoch": 0.36, "learning_rate": 4.095702500034781e-05, "loss": 2.4812, "step": 26000 }, { "epoch": 0.36, "learning_rate": 4.088746365419664e-05, "loss": 2.43, "step": 26200 }, { "epoch": 0.37, "learning_rate": 4.081790230804547e-05, "loss": 2.4743, "step": 26400 }, { "epoch": 0.37, "learning_rate": 4.07483409618943e-05, "loss": 2.4693, "step": 26600 }, { "epoch": 0.37, "learning_rate": 4.0678779615743124e-05, "loss": 2.4932, "step": 26800 }, { "epoch": 0.38, "learning_rate": 4.060921826959195e-05, "loss": 2.4459, "step": 27000 }, { "epoch": 0.38, "learning_rate": 4.053965692344078e-05, "loss": 2.4123, "step": 27200 }, { "epoch": 0.38, "learning_rate": 4.047009557728961e-05, "loss": 2.4364, "step": 27400 }, { "epoch": 0.38, "learning_rate": 4.0400534231138446e-05, "loss": 2.4428, "step": 27600 }, { "epoch": 0.39, "learning_rate": 4.0330972884987275e-05, "loss": 2.4157, "step": 27800 }, { "epoch": 0.39, "learning_rate": 4.0261411538836103e-05, "loss": 2.4145, "step": 28000 }, { "epoch": 0.39, "learning_rate": 4.019185019268493e-05, "loss": 2.5131, "step": 28200 }, { "epoch": 0.4, "learning_rate": 4.012228884653376e-05, "loss": 2.4509, "step": 28400 }, { "epoch": 0.4, "learning_rate": 4.005272750038259e-05, "loss": 2.4781, "step": 28600 }, { "epoch": 0.4, "learning_rate": 3.998316615423142e-05, "loss": 2.428, "step": 28800 }, { "epoch": 0.4, "learning_rate": 3.991360480808025e-05, "loss": 2.4402, "step": 29000 }, { "epoch": 0.41, "learning_rate": 3.9844043461929077e-05, "loss": 2.3886, "step": 29200 }, { "epoch": 0.41, "learning_rate": 3.9774482115777905e-05, "loss": 2.4329, "step": 29400 }, { "epoch": 0.41, "learning_rate": 3.9704920769626734e-05, "loss": 2.4038, "step": 29600 }, { "epoch": 0.41, "learning_rate": 3.963535942347556e-05, "loss": 2.3857, "step": 29800 }, { "epoch": 0.42, "learning_rate": 3.956579807732439e-05, "loss": 2.3806, "step": 30000 }, { "epoch": 0.42, "learning_rate": 3.949623673117323e-05, "loss": 2.4243, "step": 30200 }, { "epoch": 0.42, "learning_rate": 3.9426675385022057e-05, "loss": 2.4545, "step": 30400 }, { "epoch": 0.43, "learning_rate": 3.9357114038870885e-05, "loss": 2.5031, "step": 30600 }, { "epoch": 0.43, "learning_rate": 3.928755269271971e-05, "loss": 2.4046, "step": 30800 }, { "epoch": 0.43, "learning_rate": 3.9217991346568536e-05, "loss": 2.4326, "step": 31000 }, { "epoch": 0.43, "learning_rate": 3.914843000041737e-05, "loss": 2.4118, "step": 31200 }, { "epoch": 0.44, "learning_rate": 3.90788686542662e-05, "loss": 2.4556, "step": 31400 }, { "epoch": 0.44, "learning_rate": 3.900930730811503e-05, "loss": 2.4579, "step": 31600 }, { "epoch": 0.44, "learning_rate": 3.893974596196386e-05, "loss": 2.3932, "step": 31800 }, { "epoch": 0.45, "learning_rate": 3.887018461581269e-05, "loss": 2.4281, "step": 32000 }, { "epoch": 0.45, "learning_rate": 3.8800623269661516e-05, "loss": 2.4362, "step": 32200 }, { "epoch": 0.45, "learning_rate": 3.8731061923510345e-05, "loss": 2.4217, "step": 32400 }, { "epoch": 0.45, "learning_rate": 3.866150057735918e-05, "loss": 2.4305, "step": 32600 }, { "epoch": 0.46, "learning_rate": 3.8591939231208e-05, "loss": 2.441, "step": 32800 }, { "epoch": 0.46, "learning_rate": 3.852237788505683e-05, "loss": 2.422, "step": 33000 }, { "epoch": 0.46, "learning_rate": 3.845281653890566e-05, "loss": 2.3997, "step": 33200 }, { "epoch": 0.46, "learning_rate": 3.838325519275449e-05, "loss": 2.3749, "step": 33400 }, { "epoch": 0.47, "learning_rate": 3.831369384660332e-05, "loss": 2.4073, "step": 33600 }, { "epoch": 0.47, "learning_rate": 3.8244132500452154e-05, "loss": 2.4585, "step": 33800 }, { "epoch": 0.47, "learning_rate": 3.817457115430098e-05, "loss": 2.4215, "step": 34000 }, { "epoch": 0.48, "learning_rate": 3.810500980814981e-05, "loss": 2.414, "step": 34200 }, { "epoch": 0.48, "learning_rate": 3.803544846199864e-05, "loss": 2.4223, "step": 34400 }, { "epoch": 0.48, "learning_rate": 3.796588711584746e-05, "loss": 2.4012, "step": 34600 }, { "epoch": 0.48, "learning_rate": 3.78963257696963e-05, "loss": 2.422, "step": 34800 }, { "epoch": 0.49, "learning_rate": 3.782676442354513e-05, "loss": 2.3821, "step": 35000 }, { "epoch": 0.49, "learning_rate": 3.7757203077393956e-05, "loss": 2.4588, "step": 35200 }, { "epoch": 0.49, "learning_rate": 3.7687641731242785e-05, "loss": 2.4303, "step": 35400 }, { "epoch": 0.5, "learning_rate": 3.7618080385091614e-05, "loss": 2.4043, "step": 35600 }, { "epoch": 0.5, "learning_rate": 3.754851903894044e-05, "loss": 2.3885, "step": 35800 }, { "epoch": 0.5, "learning_rate": 3.747895769278927e-05, "loss": 2.452, "step": 36000 }, { "epoch": 0.5, "learning_rate": 3.740939634663811e-05, "loss": 2.4303, "step": 36200 }, { "epoch": 0.51, "learning_rate": 3.7339835000486936e-05, "loss": 2.4219, "step": 36400 }, { "epoch": 0.51, "learning_rate": 3.727027365433576e-05, "loss": 2.4514, "step": 36600 }, { "epoch": 0.51, "learning_rate": 3.720071230818459e-05, "loss": 2.3961, "step": 36800 }, { "epoch": 0.51, "learning_rate": 3.7131150962033416e-05, "loss": 2.3741, "step": 37000 }, { "epoch": 0.52, "learning_rate": 3.7061589615882244e-05, "loss": 2.4553, "step": 37200 }, { "epoch": 0.52, "learning_rate": 3.699202826973108e-05, "loss": 2.3806, "step": 37400 }, { "epoch": 0.52, "learning_rate": 3.692246692357991e-05, "loss": 2.416, "step": 37600 }, { "epoch": 0.53, "learning_rate": 3.685290557742874e-05, "loss": 2.3745, "step": 37800 }, { "epoch": 0.53, "learning_rate": 3.678334423127757e-05, "loss": 2.4195, "step": 38000 }, { "epoch": 0.53, "learning_rate": 3.6713782885126395e-05, "loss": 2.4216, "step": 38200 }, { "epoch": 0.53, "learning_rate": 3.6644221538975224e-05, "loss": 2.3769, "step": 38400 }, { "epoch": 0.54, "learning_rate": 3.657466019282405e-05, "loss": 2.3851, "step": 38600 }, { "epoch": 0.54, "learning_rate": 3.650509884667288e-05, "loss": 2.4112, "step": 38800 }, { "epoch": 0.54, "learning_rate": 3.643553750052171e-05, "loss": 2.4096, "step": 39000 }, { "epoch": 0.55, "learning_rate": 3.636597615437054e-05, "loss": 2.4354, "step": 39200 }, { "epoch": 0.55, "learning_rate": 3.629641480821937e-05, "loss": 2.422, "step": 39400 }, { "epoch": 0.55, "learning_rate": 3.62268534620682e-05, "loss": 2.4195, "step": 39600 }, { "epoch": 0.55, "learning_rate": 3.6157292115917026e-05, "loss": 2.4506, "step": 39800 }, { "epoch": 0.56, "learning_rate": 3.608773076976586e-05, "loss": 2.4016, "step": 40000 }, { "epoch": 0.56, "learning_rate": 3.601816942361469e-05, "loss": 2.3905, "step": 40200 }, { "epoch": 0.56, "learning_rate": 3.594860807746352e-05, "loss": 2.4293, "step": 40400 }, { "epoch": 0.56, "learning_rate": 3.587904673131234e-05, "loss": 2.4237, "step": 40600 }, { "epoch": 0.57, "learning_rate": 3.580948538516117e-05, "loss": 2.3867, "step": 40800 }, { "epoch": 0.57, "learning_rate": 3.5739924039010006e-05, "loss": 2.3885, "step": 41000 }, { "epoch": 0.57, "learning_rate": 3.5670362692858835e-05, "loss": 2.4321, "step": 41200 }, { "epoch": 0.58, "learning_rate": 3.5600801346707664e-05, "loss": 2.3701, "step": 41400 }, { "epoch": 0.58, "learning_rate": 3.553124000055649e-05, "loss": 2.4581, "step": 41600 }, { "epoch": 0.58, "learning_rate": 3.546167865440532e-05, "loss": 2.398, "step": 41800 }, { "epoch": 0.58, "learning_rate": 3.539211730825415e-05, "loss": 2.4155, "step": 42000 }, { "epoch": 0.59, "learning_rate": 3.532255596210298e-05, "loss": 2.4122, "step": 42200 }, { "epoch": 0.59, "learning_rate": 3.5252994615951815e-05, "loss": 2.3833, "step": 42400 }, { "epoch": 0.59, "learning_rate": 3.518343326980064e-05, "loss": 2.4614, "step": 42600 }, { "epoch": 0.6, "learning_rate": 3.5113871923649466e-05, "loss": 2.3859, "step": 42800 }, { "epoch": 0.6, "learning_rate": 3.5044310577498295e-05, "loss": 2.3674, "step": 43000 }, { "epoch": 0.6, "learning_rate": 3.4974749231347124e-05, "loss": 2.4227, "step": 43200 }, { "epoch": 0.6, "learning_rate": 3.490518788519595e-05, "loss": 2.4031, "step": 43400 }, { "epoch": 0.61, "learning_rate": 3.483562653904479e-05, "loss": 2.4271, "step": 43600 }, { "epoch": 0.61, "learning_rate": 3.476606519289362e-05, "loss": 2.4176, "step": 43800 }, { "epoch": 0.61, "learning_rate": 3.4696503846742446e-05, "loss": 2.4076, "step": 44000 }, { "epoch": 0.61, "learning_rate": 3.4626942500591275e-05, "loss": 2.3618, "step": 44200 }, { "epoch": 0.62, "learning_rate": 3.4557381154440104e-05, "loss": 2.3998, "step": 44400 }, { "epoch": 0.62, "learning_rate": 3.448781980828893e-05, "loss": 2.4145, "step": 44600 }, { "epoch": 0.62, "learning_rate": 3.441825846213776e-05, "loss": 2.3621, "step": 44800 }, { "epoch": 0.63, "learning_rate": 3.434869711598659e-05, "loss": 2.4247, "step": 45000 }, { "epoch": 0.63, "learning_rate": 3.427913576983542e-05, "loss": 2.3934, "step": 45200 }, { "epoch": 0.63, "learning_rate": 3.420957442368425e-05, "loss": 2.379, "step": 45400 }, { "epoch": 0.63, "learning_rate": 3.414001307753308e-05, "loss": 2.3932, "step": 45600 }, { "epoch": 0.64, "learning_rate": 3.4070451731381906e-05, "loss": 2.3922, "step": 45800 }, { "epoch": 0.64, "learning_rate": 3.400089038523074e-05, "loss": 2.4221, "step": 46000 }, { "epoch": 0.64, "learning_rate": 3.393132903907957e-05, "loss": 2.3921, "step": 46200 }, { "epoch": 0.65, "learning_rate": 3.38617676929284e-05, "loss": 2.3484, "step": 46400 }, { "epoch": 0.65, "learning_rate": 3.379220634677722e-05, "loss": 2.3812, "step": 46600 }, { "epoch": 0.65, "learning_rate": 3.372264500062605e-05, "loss": 2.3616, "step": 46800 }, { "epoch": 0.65, "learning_rate": 3.365308365447488e-05, "loss": 2.388, "step": 47000 }, { "epoch": 0.66, "learning_rate": 3.3583522308323714e-05, "loss": 2.3102, "step": 47200 }, { "epoch": 0.66, "learning_rate": 3.351396096217254e-05, "loss": 2.3746, "step": 47400 }, { "epoch": 0.66, "learning_rate": 3.344439961602137e-05, "loss": 2.4212, "step": 47600 }, { "epoch": 0.67, "learning_rate": 3.33748382698702e-05, "loss": 2.3558, "step": 47800 }, { "epoch": 0.67, "learning_rate": 3.330527692371903e-05, "loss": 2.393, "step": 48000 }, { "epoch": 0.67, "learning_rate": 3.323571557756786e-05, "loss": 2.3798, "step": 48200 }, { "epoch": 0.67, "learning_rate": 3.316615423141669e-05, "loss": 2.4259, "step": 48400 }, { "epoch": 0.68, "learning_rate": 3.3096592885265516e-05, "loss": 2.3816, "step": 48600 }, { "epoch": 0.68, "learning_rate": 3.3027031539114345e-05, "loss": 2.3699, "step": 48800 }, { "epoch": 0.68, "learning_rate": 3.2957470192963174e-05, "loss": 2.4018, "step": 49000 }, { "epoch": 0.68, "learning_rate": 3.2887908846812e-05, "loss": 2.3356, "step": 49200 }, { "epoch": 0.69, "learning_rate": 3.281834750066083e-05, "loss": 2.347, "step": 49400 }, { "epoch": 0.69, "learning_rate": 3.274878615450967e-05, "loss": 2.3598, "step": 49600 }, { "epoch": 0.69, "learning_rate": 3.2679224808358496e-05, "loss": 2.3773, "step": 49800 }, { "epoch": 0.7, "learning_rate": 3.2609663462207325e-05, "loss": 2.3704, "step": 50000 }, { "epoch": 0.7, "learning_rate": 3.2540102116056154e-05, "loss": 2.3317, "step": 50200 }, { "epoch": 0.7, "learning_rate": 3.247054076990498e-05, "loss": 2.3878, "step": 50400 }, { "epoch": 0.7, "learning_rate": 3.2400979423753805e-05, "loss": 2.3198, "step": 50600 }, { "epoch": 0.71, "learning_rate": 3.233141807760264e-05, "loss": 2.4416, "step": 50800 }, { "epoch": 0.71, "learning_rate": 3.226185673145147e-05, "loss": 2.3275, "step": 51000 }, { "epoch": 0.71, "learning_rate": 3.21922953853003e-05, "loss": 2.3858, "step": 51200 }, { "epoch": 0.72, "learning_rate": 3.212273403914913e-05, "loss": 2.3448, "step": 51400 }, { "epoch": 0.72, "learning_rate": 3.2053172692997956e-05, "loss": 2.3493, "step": 51600 }, { "epoch": 0.72, "learning_rate": 3.1983611346846785e-05, "loss": 2.3644, "step": 51800 }, { "epoch": 0.72, "learning_rate": 3.1914050000695614e-05, "loss": 2.339, "step": 52000 }, { "epoch": 0.73, "learning_rate": 3.184448865454445e-05, "loss": 2.3393, "step": 52200 }, { "epoch": 0.73, "learning_rate": 3.177492730839327e-05, "loss": 2.3754, "step": 52400 }, { "epoch": 0.73, "learning_rate": 3.17053659622421e-05, "loss": 2.3251, "step": 52600 }, { "epoch": 0.73, "learning_rate": 3.163580461609093e-05, "loss": 2.3407, "step": 52800 }, { "epoch": 0.74, "learning_rate": 3.156624326993976e-05, "loss": 2.3092, "step": 53000 }, { "epoch": 0.74, "learning_rate": 3.149668192378859e-05, "loss": 2.3626, "step": 53200 }, { "epoch": 0.74, "learning_rate": 3.142712057763742e-05, "loss": 2.3639, "step": 53400 }, { "epoch": 0.75, "learning_rate": 3.135755923148625e-05, "loss": 2.3443, "step": 53600 }, { "epoch": 0.75, "learning_rate": 3.128799788533508e-05, "loss": 2.3274, "step": 53800 }, { "epoch": 0.75, "learning_rate": 3.121843653918391e-05, "loss": 2.3728, "step": 54000 }, { "epoch": 0.75, "learning_rate": 3.114887519303274e-05, "loss": 2.3047, "step": 54200 }, { "epoch": 0.76, "learning_rate": 3.107931384688157e-05, "loss": 2.316, "step": 54400 }, { "epoch": 0.76, "learning_rate": 3.1009752500730396e-05, "loss": 2.3326, "step": 54600 }, { "epoch": 0.76, "learning_rate": 3.0940191154579225e-05, "loss": 2.357, "step": 54800 }, { "epoch": 0.77, "learning_rate": 3.0870629808428053e-05, "loss": 2.367, "step": 55000 }, { "epoch": 0.77, "learning_rate": 3.080106846227688e-05, "loss": 2.3529, "step": 55200 }, { "epoch": 0.77, "learning_rate": 3.073150711612571e-05, "loss": 2.3748, "step": 55400 }, { "epoch": 0.77, "learning_rate": 3.066194576997454e-05, "loss": 2.3208, "step": 55600 }, { "epoch": 0.78, "learning_rate": 3.0592384423823376e-05, "loss": 2.3327, "step": 55800 }, { "epoch": 0.78, "learning_rate": 3.0522823077672204e-05, "loss": 2.3327, "step": 56000 }, { "epoch": 0.78, "learning_rate": 3.0453261731521033e-05, "loss": 2.3588, "step": 56200 }, { "epoch": 0.78, "learning_rate": 3.0383700385369855e-05, "loss": 2.3343, "step": 56400 }, { "epoch": 0.79, "learning_rate": 3.0314139039218688e-05, "loss": 2.3095, "step": 56600 }, { "epoch": 0.79, "learning_rate": 3.0244577693067517e-05, "loss": 2.3547, "step": 56800 }, { "epoch": 0.79, "learning_rate": 3.0175016346916345e-05, "loss": 2.3346, "step": 57000 }, { "epoch": 0.8, "learning_rate": 3.0105455000765174e-05, "loss": 2.289, "step": 57200 }, { "epoch": 0.8, "learning_rate": 3.0035893654614006e-05, "loss": 2.3577, "step": 57400 }, { "epoch": 0.8, "learning_rate": 2.9966332308462835e-05, "loss": 2.3592, "step": 57600 }, { "epoch": 0.8, "learning_rate": 2.9896770962311664e-05, "loss": 2.3258, "step": 57800 }, { "epoch": 0.81, "learning_rate": 2.9827209616160496e-05, "loss": 2.336, "step": 58000 }, { "epoch": 0.81, "learning_rate": 2.9757648270009325e-05, "loss": 2.3663, "step": 58200 }, { "epoch": 0.81, "learning_rate": 2.968808692385815e-05, "loss": 2.3952, "step": 58400 }, { "epoch": 0.82, "learning_rate": 2.961852557770698e-05, "loss": 2.3366, "step": 58600 }, { "epoch": 0.82, "learning_rate": 2.954896423155581e-05, "loss": 2.316, "step": 58800 }, { "epoch": 0.82, "learning_rate": 2.9479402885404637e-05, "loss": 2.3167, "step": 59000 }, { "epoch": 0.82, "learning_rate": 2.940984153925347e-05, "loss": 2.3853, "step": 59200 }, { "epoch": 0.83, "learning_rate": 2.93402801931023e-05, "loss": 2.3824, "step": 59400 }, { "epoch": 0.83, "learning_rate": 2.9270718846951127e-05, "loss": 2.385, "step": 59600 }, { "epoch": 0.83, "learning_rate": 2.920115750079996e-05, "loss": 2.3916, "step": 59800 }, { "epoch": 0.83, "learning_rate": 2.913159615464879e-05, "loss": 2.3757, "step": 60000 }, { "epoch": 0.84, "learning_rate": 2.9062034808497617e-05, "loss": 2.3031, "step": 60200 }, { "epoch": 0.84, "learning_rate": 2.8992473462346443e-05, "loss": 2.3391, "step": 60400 }, { "epoch": 0.84, "learning_rate": 2.892291211619527e-05, "loss": 2.3327, "step": 60600 }, { "epoch": 0.85, "learning_rate": 2.88533507700441e-05, "loss": 2.3553, "step": 60800 }, { "epoch": 0.85, "learning_rate": 2.8783789423892933e-05, "loss": 2.4071, "step": 61000 }, { "epoch": 0.85, "learning_rate": 2.871422807774176e-05, "loss": 2.2922, "step": 61200 }, { "epoch": 0.85, "learning_rate": 2.864466673159059e-05, "loss": 2.2956, "step": 61400 }, { "epoch": 0.86, "learning_rate": 2.8575105385439423e-05, "loss": 2.3518, "step": 61600 }, { "epoch": 0.86, "learning_rate": 2.850554403928825e-05, "loss": 2.3378, "step": 61800 }, { "epoch": 0.86, "learning_rate": 2.843598269313708e-05, "loss": 2.3567, "step": 62000 }, { "epoch": 0.87, "learning_rate": 2.836642134698591e-05, "loss": 2.3835, "step": 62200 }, { "epoch": 0.87, "learning_rate": 2.8296860000834735e-05, "loss": 2.3527, "step": 62400 }, { "epoch": 0.87, "learning_rate": 2.8227298654683564e-05, "loss": 2.3609, "step": 62600 }, { "epoch": 0.87, "learning_rate": 2.8157737308532396e-05, "loss": 2.3263, "step": 62800 }, { "epoch": 0.88, "learning_rate": 2.8088175962381225e-05, "loss": 2.3604, "step": 63000 }, { "epoch": 0.88, "learning_rate": 2.8018614616230054e-05, "loss": 2.371, "step": 63200 }, { "epoch": 0.88, "learning_rate": 2.7949053270078886e-05, "loss": 2.3555, "step": 63400 }, { "epoch": 0.88, "learning_rate": 2.7879491923927715e-05, "loss": 2.3573, "step": 63600 }, { "epoch": 0.89, "learning_rate": 2.7809930577776543e-05, "loss": 2.3151, "step": 63800 }, { "epoch": 0.89, "learning_rate": 2.7740369231625372e-05, "loss": 2.342, "step": 64000 }, { "epoch": 0.89, "learning_rate": 2.7670807885474205e-05, "loss": 2.3264, "step": 64200 }, { "epoch": 0.9, "learning_rate": 2.7601246539323027e-05, "loss": 2.3124, "step": 64400 }, { "epoch": 0.9, "learning_rate": 2.753168519317186e-05, "loss": 2.3672, "step": 64600 }, { "epoch": 0.9, "learning_rate": 2.7462123847020688e-05, "loss": 2.3758, "step": 64800 }, { "epoch": 0.9, "learning_rate": 2.7392562500869517e-05, "loss": 2.3426, "step": 65000 }, { "epoch": 0.91, "learning_rate": 2.732300115471835e-05, "loss": 2.3127, "step": 65200 }, { "epoch": 0.91, "learning_rate": 2.7253439808567178e-05, "loss": 2.351, "step": 65400 }, { "epoch": 0.91, "learning_rate": 2.7183878462416007e-05, "loss": 2.3214, "step": 65600 }, { "epoch": 0.92, "learning_rate": 2.7114317116264835e-05, "loss": 2.3141, "step": 65800 }, { "epoch": 0.92, "learning_rate": 2.7044755770113668e-05, "loss": 2.3095, "step": 66000 }, { "epoch": 0.92, "learning_rate": 2.6975194423962497e-05, "loss": 2.3303, "step": 66200 }, { "epoch": 0.92, "learning_rate": 2.6905633077811322e-05, "loss": 2.3111, "step": 66400 }, { "epoch": 0.93, "learning_rate": 2.683607173166015e-05, "loss": 2.2752, "step": 66600 }, { "epoch": 0.93, "learning_rate": 2.676651038550898e-05, "loss": 2.3245, "step": 66800 }, { "epoch": 0.93, "learning_rate": 2.6696949039357812e-05, "loss": 2.3506, "step": 67000 }, { "epoch": 0.93, "learning_rate": 2.662738769320664e-05, "loss": 2.2878, "step": 67200 }, { "epoch": 0.94, "learning_rate": 2.655782634705547e-05, "loss": 2.2858, "step": 67400 }, { "epoch": 0.94, "learning_rate": 2.64882650009043e-05, "loss": 2.3803, "step": 67600 }, { "epoch": 0.94, "learning_rate": 2.641870365475313e-05, "loss": 2.334, "step": 67800 }, { "epoch": 0.95, "learning_rate": 2.634914230860196e-05, "loss": 2.3088, "step": 68000 }, { "epoch": 0.95, "learning_rate": 2.627958096245079e-05, "loss": 2.3356, "step": 68200 }, { "epoch": 0.95, "learning_rate": 2.6210019616299614e-05, "loss": 2.321, "step": 68400 }, { "epoch": 0.95, "learning_rate": 2.6140458270148443e-05, "loss": 2.3396, "step": 68600 }, { "epoch": 0.96, "learning_rate": 2.607089692399727e-05, "loss": 2.2976, "step": 68800 }, { "epoch": 0.96, "learning_rate": 2.6001335577846104e-05, "loss": 2.2884, "step": 69000 }, { "epoch": 0.96, "learning_rate": 2.5931774231694933e-05, "loss": 2.3099, "step": 69200 }, { "epoch": 0.97, "learning_rate": 2.586221288554376e-05, "loss": 2.3217, "step": 69400 }, { "epoch": 0.97, "learning_rate": 2.5792651539392594e-05, "loss": 2.2856, "step": 69600 }, { "epoch": 0.97, "learning_rate": 2.5723090193241423e-05, "loss": 2.3615, "step": 69800 }, { "epoch": 0.97, "learning_rate": 2.565352884709025e-05, "loss": 2.2836, "step": 70000 }, { "epoch": 0.98, "learning_rate": 2.5583967500939077e-05, "loss": 2.3666, "step": 70200 }, { "epoch": 0.98, "learning_rate": 2.5514406154787906e-05, "loss": 2.3123, "step": 70400 }, { "epoch": 0.98, "learning_rate": 2.5444844808636735e-05, "loss": 2.3703, "step": 70600 }, { "epoch": 0.98, "learning_rate": 2.5375283462485567e-05, "loss": 2.3, "step": 70800 }, { "epoch": 0.99, "learning_rate": 2.5305722116334396e-05, "loss": 2.3252, "step": 71000 }, { "epoch": 0.99, "learning_rate": 2.5236160770183225e-05, "loss": 2.2764, "step": 71200 }, { "epoch": 0.99, "learning_rate": 2.5166599424032057e-05, "loss": 2.3298, "step": 71400 }, { "epoch": 1.0, "learning_rate": 2.5097038077880886e-05, "loss": 2.2929, "step": 71600 }, { "epoch": 1.0, "learning_rate": 2.5027476731729715e-05, "loss": 2.2647, "step": 71800 }, { "epoch": 1.0, "learning_rate": 2.4957915385578544e-05, "loss": 2.2967, "step": 72000 }, { "epoch": 1.0, "learning_rate": 2.4888354039427372e-05, "loss": 2.2854, "step": 72200 }, { "epoch": 1.01, "learning_rate": 2.48187926932762e-05, "loss": 2.2653, "step": 72400 }, { "epoch": 1.01, "learning_rate": 2.4749231347125034e-05, "loss": 2.2476, "step": 72600 }, { "epoch": 1.01, "learning_rate": 2.467967000097386e-05, "loss": 2.2801, "step": 72800 }, { "epoch": 1.02, "learning_rate": 2.4610108654822688e-05, "loss": 2.2836, "step": 73000 }, { "epoch": 1.02, "learning_rate": 2.454054730867152e-05, "loss": 2.2847, "step": 73200 }, { "epoch": 1.02, "learning_rate": 2.447098596252035e-05, "loss": 2.3052, "step": 73400 }, { "epoch": 1.02, "learning_rate": 2.4401424616369178e-05, "loss": 2.285, "step": 73600 }, { "epoch": 1.03, "learning_rate": 2.4331863270218007e-05, "loss": 2.22, "step": 73800 }, { "epoch": 1.03, "learning_rate": 2.4262301924066836e-05, "loss": 2.2589, "step": 74000 }, { "epoch": 1.03, "learning_rate": 2.4192740577915664e-05, "loss": 2.2705, "step": 74200 }, { "epoch": 1.04, "learning_rate": 2.4123179231764497e-05, "loss": 2.304, "step": 74400 }, { "epoch": 1.04, "learning_rate": 2.4053617885613325e-05, "loss": 2.2499, "step": 74600 }, { "epoch": 1.04, "learning_rate": 2.398405653946215e-05, "loss": 2.3109, "step": 74800 }, { "epoch": 1.04, "learning_rate": 2.3914495193310983e-05, "loss": 2.2772, "step": 75000 }, { "epoch": 1.05, "learning_rate": 2.3844933847159812e-05, "loss": 2.2835, "step": 75200 }, { "epoch": 1.05, "learning_rate": 2.377537250100864e-05, "loss": 2.298, "step": 75400 }, { "epoch": 1.05, "learning_rate": 2.370581115485747e-05, "loss": 2.2486, "step": 75600 }, { "epoch": 1.05, "learning_rate": 2.36362498087063e-05, "loss": 2.304, "step": 75800 }, { "epoch": 1.06, "learning_rate": 2.3566688462555127e-05, "loss": 2.2604, "step": 76000 }, { "epoch": 1.06, "learning_rate": 2.3497127116403956e-05, "loss": 2.2368, "step": 76200 }, { "epoch": 1.06, "learning_rate": 2.342756577025279e-05, "loss": 2.2736, "step": 76400 }, { "epoch": 1.07, "learning_rate": 2.3358004424101617e-05, "loss": 2.3323, "step": 76600 }, { "epoch": 1.07, "learning_rate": 2.3288443077950446e-05, "loss": 2.2373, "step": 76800 }, { "epoch": 1.07, "learning_rate": 2.3218881731799275e-05, "loss": 2.3072, "step": 77000 }, { "epoch": 1.07, "learning_rate": 2.3149320385648104e-05, "loss": 2.2919, "step": 77200 }, { "epoch": 1.08, "learning_rate": 2.3079759039496933e-05, "loss": 2.2874, "step": 77400 }, { "epoch": 1.08, "learning_rate": 2.3010197693345762e-05, "loss": 2.2723, "step": 77600 }, { "epoch": 1.08, "learning_rate": 2.294063634719459e-05, "loss": 2.3047, "step": 77800 }, { "epoch": 1.09, "learning_rate": 2.287107500104342e-05, "loss": 2.2542, "step": 78000 }, { "epoch": 1.09, "learning_rate": 2.2801513654892252e-05, "loss": 2.2426, "step": 78200 }, { "epoch": 1.09, "learning_rate": 2.273195230874108e-05, "loss": 2.2554, "step": 78400 }, { "epoch": 1.09, "learning_rate": 2.266239096258991e-05, "loss": 2.3179, "step": 78600 }, { "epoch": 1.1, "learning_rate": 2.2592829616438738e-05, "loss": 2.2934, "step": 78800 }, { "epoch": 1.1, "learning_rate": 2.2523268270287567e-05, "loss": 2.2707, "step": 79000 }, { "epoch": 1.1, "learning_rate": 2.2453706924136396e-05, "loss": 2.311, "step": 79200 }, { "epoch": 1.1, "learning_rate": 2.2384145577985228e-05, "loss": 2.3381, "step": 79400 }, { "epoch": 1.11, "learning_rate": 2.2314584231834054e-05, "loss": 2.247, "step": 79600 }, { "epoch": 1.11, "learning_rate": 2.2245022885682883e-05, "loss": 2.3296, "step": 79800 }, { "epoch": 1.11, "learning_rate": 2.2175461539531715e-05, "loss": 2.2792, "step": 80000 }, { "epoch": 1.12, "learning_rate": 2.2105900193380544e-05, "loss": 2.3004, "step": 80200 }, { "epoch": 1.12, "learning_rate": 2.2036338847229373e-05, "loss": 2.2845, "step": 80400 }, { "epoch": 1.12, "learning_rate": 2.19667775010782e-05, "loss": 2.2623, "step": 80600 }, { "epoch": 1.12, "learning_rate": 2.189721615492703e-05, "loss": 2.2849, "step": 80800 }, { "epoch": 1.13, "learning_rate": 2.182765480877586e-05, "loss": 2.231, "step": 81000 }, { "epoch": 1.13, "learning_rate": 2.175809346262469e-05, "loss": 2.2894, "step": 81200 }, { "epoch": 1.13, "learning_rate": 2.168853211647352e-05, "loss": 2.2979, "step": 81400 }, { "epoch": 1.14, "learning_rate": 2.1618970770322346e-05, "loss": 2.2961, "step": 81600 }, { "epoch": 1.14, "learning_rate": 2.1549409424171178e-05, "loss": 2.2625, "step": 81800 }, { "epoch": 1.14, "learning_rate": 2.1479848078020007e-05, "loss": 2.2742, "step": 82000 }, { "epoch": 1.14, "learning_rate": 2.1410286731868836e-05, "loss": 2.2718, "step": 82200 }, { "epoch": 1.15, "learning_rate": 2.1340725385717668e-05, "loss": 2.2341, "step": 82400 }, { "epoch": 1.15, "learning_rate": 2.1271164039566493e-05, "loss": 2.2424, "step": 82600 }, { "epoch": 1.15, "learning_rate": 2.1201602693415322e-05, "loss": 2.2147, "step": 82800 }, { "epoch": 1.15, "learning_rate": 2.1132041347264154e-05, "loss": 2.3273, "step": 83000 }, { "epoch": 1.16, "learning_rate": 2.1062480001112983e-05, "loss": 2.2852, "step": 83200 }, { "epoch": 1.16, "learning_rate": 2.0992918654961812e-05, "loss": 2.301, "step": 83400 }, { "epoch": 1.16, "learning_rate": 2.092335730881064e-05, "loss": 2.2577, "step": 83600 }, { "epoch": 1.17, "learning_rate": 2.085379596265947e-05, "loss": 2.3224, "step": 83800 }, { "epoch": 1.17, "learning_rate": 2.07842346165083e-05, "loss": 2.2808, "step": 84000 }, { "epoch": 1.17, "learning_rate": 2.071467327035713e-05, "loss": 2.369, "step": 84200 }, { "epoch": 1.17, "learning_rate": 2.064511192420596e-05, "loss": 2.2545, "step": 84400 }, { "epoch": 1.18, "learning_rate": 2.0575550578054785e-05, "loss": 2.2866, "step": 84600 }, { "epoch": 1.18, "learning_rate": 2.0505989231903618e-05, "loss": 2.2575, "step": 84800 }, { "epoch": 1.18, "learning_rate": 2.0436427885752446e-05, "loss": 2.2734, "step": 85000 }, { "epoch": 1.19, "learning_rate": 2.0366866539601275e-05, "loss": 2.3246, "step": 85200 }, { "epoch": 1.19, "learning_rate": 2.0297305193450104e-05, "loss": 2.2856, "step": 85400 }, { "epoch": 1.19, "learning_rate": 2.0227743847298933e-05, "loss": 2.2153, "step": 85600 }, { "epoch": 1.19, "learning_rate": 2.0158182501147762e-05, "loss": 2.3101, "step": 85800 }, { "epoch": 1.2, "learning_rate": 2.0088621154996594e-05, "loss": 2.256, "step": 86000 }, { "epoch": 1.2, "learning_rate": 2.0019059808845423e-05, "loss": 2.2453, "step": 86200 }, { "epoch": 1.2, "learning_rate": 1.9949498462694252e-05, "loss": 2.2607, "step": 86400 }, { "epoch": 1.2, "learning_rate": 1.987993711654308e-05, "loss": 2.2636, "step": 86600 }, { "epoch": 1.21, "learning_rate": 1.981037577039191e-05, "loss": 2.2273, "step": 86800 }, { "epoch": 1.21, "learning_rate": 1.974081442424074e-05, "loss": 2.2384, "step": 87000 }, { "epoch": 1.21, "learning_rate": 1.9671253078089567e-05, "loss": 2.234, "step": 87200 }, { "epoch": 1.22, "learning_rate": 1.96016917319384e-05, "loss": 2.2819, "step": 87400 }, { "epoch": 1.22, "learning_rate": 1.9532130385787225e-05, "loss": 2.2695, "step": 87600 }, { "epoch": 1.22, "learning_rate": 1.9462569039636054e-05, "loss": 2.2836, "step": 87800 }, { "epoch": 1.22, "learning_rate": 1.9393007693484886e-05, "loss": 2.2486, "step": 88000 }, { "epoch": 1.23, "learning_rate": 1.9323446347333715e-05, "loss": 2.2448, "step": 88200 }, { "epoch": 1.23, "learning_rate": 1.9253885001182544e-05, "loss": 2.3028, "step": 88400 }, { "epoch": 1.23, "learning_rate": 1.9184323655031373e-05, "loss": 2.2855, "step": 88600 }, { "epoch": 1.24, "learning_rate": 1.91147623088802e-05, "loss": 2.2171, "step": 88800 }, { "epoch": 1.24, "learning_rate": 1.904520096272903e-05, "loss": 2.2834, "step": 89000 }, { "epoch": 1.24, "learning_rate": 1.8975639616577863e-05, "loss": 2.2743, "step": 89200 }, { "epoch": 1.24, "learning_rate": 1.890607827042669e-05, "loss": 2.2437, "step": 89400 }, { "epoch": 1.25, "learning_rate": 1.8836516924275517e-05, "loss": 2.2791, "step": 89600 }, { "epoch": 1.25, "learning_rate": 1.876695557812435e-05, "loss": 2.2572, "step": 89800 }, { "epoch": 1.25, "learning_rate": 1.8697394231973178e-05, "loss": 2.3147, "step": 90000 }, { "epoch": 1.25, "learning_rate": 1.8627832885822007e-05, "loss": 2.2677, "step": 90200 }, { "epoch": 1.26, "learning_rate": 1.855827153967084e-05, "loss": 2.2691, "step": 90400 }, { "epoch": 1.26, "learning_rate": 1.8488710193519665e-05, "loss": 2.2629, "step": 90600 }, { "epoch": 1.26, "learning_rate": 1.8419148847368493e-05, "loss": 2.2621, "step": 90800 }, { "epoch": 1.27, "learning_rate": 1.8349587501217326e-05, "loss": 2.2666, "step": 91000 }, { "epoch": 1.27, "learning_rate": 1.8280026155066155e-05, "loss": 2.2626, "step": 91200 }, { "epoch": 1.27, "learning_rate": 1.8210464808914983e-05, "loss": 2.2453, "step": 91400 }, { "epoch": 1.27, "learning_rate": 1.8140903462763812e-05, "loss": 2.2943, "step": 91600 }, { "epoch": 1.28, "learning_rate": 1.807134211661264e-05, "loss": 2.2731, "step": 91800 }, { "epoch": 1.28, "learning_rate": 1.800178077046147e-05, "loss": 2.2229, "step": 92000 }, { "epoch": 1.28, "learning_rate": 1.7932219424310302e-05, "loss": 2.2683, "step": 92200 }, { "epoch": 1.29, "learning_rate": 1.786265807815913e-05, "loss": 2.2642, "step": 92400 }, { "epoch": 1.29, "learning_rate": 1.7793096732007957e-05, "loss": 2.2761, "step": 92600 }, { "epoch": 1.29, "learning_rate": 1.772353538585679e-05, "loss": 2.2218, "step": 92800 }, { "epoch": 1.29, "learning_rate": 1.7653974039705618e-05, "loss": 2.2866, "step": 93000 }, { "epoch": 1.3, "learning_rate": 1.7584412693554447e-05, "loss": 2.267, "step": 93200 }, { "epoch": 1.3, "learning_rate": 1.751485134740328e-05, "loss": 2.2521, "step": 93400 }, { "epoch": 1.3, "learning_rate": 1.7445290001252104e-05, "loss": 2.2212, "step": 93600 }, { "epoch": 1.3, "learning_rate": 1.7375728655100933e-05, "loss": 2.2599, "step": 93800 }, { "epoch": 1.31, "learning_rate": 1.7306167308949765e-05, "loss": 2.2767, "step": 94000 }, { "epoch": 1.31, "learning_rate": 1.7236605962798594e-05, "loss": 2.2473, "step": 94200 }, { "epoch": 1.31, "learning_rate": 1.716704461664742e-05, "loss": 2.2549, "step": 94400 }, { "epoch": 1.32, "learning_rate": 1.7097483270496252e-05, "loss": 2.2694, "step": 94600 }, { "epoch": 1.32, "learning_rate": 1.702792192434508e-05, "loss": 2.2805, "step": 94800 }, { "epoch": 1.32, "learning_rate": 1.695836057819391e-05, "loss": 2.2728, "step": 95000 }, { "epoch": 1.32, "learning_rate": 1.688879923204274e-05, "loss": 2.2649, "step": 95200 }, { "epoch": 1.33, "learning_rate": 1.6819237885891567e-05, "loss": 2.2667, "step": 95400 }, { "epoch": 1.33, "learning_rate": 1.6749676539740396e-05, "loss": 2.2807, "step": 95600 }, { "epoch": 1.33, "learning_rate": 1.668011519358923e-05, "loss": 2.2694, "step": 95800 }, { "epoch": 1.34, "learning_rate": 1.6610553847438057e-05, "loss": 2.2407, "step": 96000 }, { "epoch": 1.34, "learning_rate": 1.6540992501286886e-05, "loss": 2.2147, "step": 96200 }, { "epoch": 1.34, "learning_rate": 1.6471431155135715e-05, "loss": 2.266, "step": 96400 }, { "epoch": 1.34, "learning_rate": 1.6401869808984544e-05, "loss": 2.2406, "step": 96600 }, { "epoch": 1.35, "learning_rate": 1.6332308462833373e-05, "loss": 2.2371, "step": 96800 }, { "epoch": 1.35, "learning_rate": 1.62627471166822e-05, "loss": 2.2532, "step": 97000 }, { "epoch": 1.35, "learning_rate": 1.6193185770531034e-05, "loss": 2.2243, "step": 97200 }, { "epoch": 1.36, "learning_rate": 1.612362442437986e-05, "loss": 2.2837, "step": 97400 }, { "epoch": 1.36, "learning_rate": 1.605406307822869e-05, "loss": 2.2991, "step": 97600 }, { "epoch": 1.36, "learning_rate": 1.598450173207752e-05, "loss": 2.2166, "step": 97800 }, { "epoch": 1.36, "learning_rate": 1.591494038592635e-05, "loss": 2.2738, "step": 98000 }, { "epoch": 1.37, "learning_rate": 1.5845379039775178e-05, "loss": 2.2654, "step": 98200 }, { "epoch": 1.37, "learning_rate": 1.5775817693624007e-05, "loss": 2.2906, "step": 98400 }, { "epoch": 1.37, "learning_rate": 1.5706256347472836e-05, "loss": 2.2701, "step": 98600 }, { "epoch": 1.37, "learning_rate": 1.5636695001321665e-05, "loss": 2.2599, "step": 98800 }, { "epoch": 1.38, "learning_rate": 1.5567133655170497e-05, "loss": 2.2211, "step": 99000 }, { "epoch": 1.38, "learning_rate": 1.5497572309019326e-05, "loss": 2.2448, "step": 99200 }, { "epoch": 1.38, "learning_rate": 1.542801096286815e-05, "loss": 2.2691, "step": 99400 }, { "epoch": 1.39, "learning_rate": 1.5358449616716984e-05, "loss": 2.2314, "step": 99600 }, { "epoch": 1.39, "learning_rate": 1.5288888270565812e-05, "loss": 2.2875, "step": 99800 }, { "epoch": 1.39, "learning_rate": 1.5219326924414643e-05, "loss": 2.2662, "step": 100000 }, { "epoch": 1.39, "learning_rate": 1.5149765578263472e-05, "loss": 2.2468, "step": 100200 }, { "epoch": 1.4, "learning_rate": 1.5080204232112299e-05, "loss": 2.2398, "step": 100400 }, { "epoch": 1.4, "learning_rate": 1.501064288596113e-05, "loss": 2.2485, "step": 100600 }, { "epoch": 1.4, "learning_rate": 1.4941081539809958e-05, "loss": 2.235, "step": 100800 }, { "epoch": 1.41, "learning_rate": 1.4871520193658789e-05, "loss": 2.2683, "step": 101000 }, { "epoch": 1.41, "learning_rate": 1.480195884750762e-05, "loss": 2.2748, "step": 101200 }, { "epoch": 1.41, "learning_rate": 1.4732397501356447e-05, "loss": 2.2722, "step": 101400 }, { "epoch": 1.41, "learning_rate": 1.4662836155205275e-05, "loss": 2.256, "step": 101600 }, { "epoch": 1.42, "learning_rate": 1.4593274809054106e-05, "loss": 2.2949, "step": 101800 }, { "epoch": 1.42, "learning_rate": 1.4523713462902935e-05, "loss": 2.2466, "step": 102000 }, { "epoch": 1.42, "learning_rate": 1.4454152116751765e-05, "loss": 2.2626, "step": 102200 }, { "epoch": 1.42, "learning_rate": 1.4384590770600593e-05, "loss": 2.2383, "step": 102400 }, { "epoch": 1.43, "learning_rate": 1.4315029424449421e-05, "loss": 2.2195, "step": 102600 }, { "epoch": 1.43, "learning_rate": 1.4245468078298252e-05, "loss": 2.2826, "step": 102800 }, { "epoch": 1.43, "learning_rate": 1.4175906732147083e-05, "loss": 2.2879, "step": 103000 }, { "epoch": 1.44, "learning_rate": 1.4106345385995911e-05, "loss": 2.2546, "step": 103200 }, { "epoch": 1.44, "learning_rate": 1.4036784039844739e-05, "loss": 2.3098, "step": 103400 }, { "epoch": 1.44, "learning_rate": 1.3967222693693569e-05, "loss": 2.2196, "step": 103600 }, { "epoch": 1.44, "learning_rate": 1.3897661347542398e-05, "loss": 2.2716, "step": 103800 }, { "epoch": 1.45, "learning_rate": 1.3828100001391229e-05, "loss": 2.283, "step": 104000 }, { "epoch": 1.45, "learning_rate": 1.3758538655240057e-05, "loss": 2.2332, "step": 104200 }, { "epoch": 1.45, "learning_rate": 1.3688977309088885e-05, "loss": 2.2702, "step": 104400 }, { "epoch": 1.46, "learning_rate": 1.3619415962937715e-05, "loss": 2.2292, "step": 104600 }, { "epoch": 1.46, "learning_rate": 1.3549854616786544e-05, "loss": 2.2587, "step": 104800 }, { "epoch": 1.46, "learning_rate": 1.3480293270635375e-05, "loss": 2.2908, "step": 105000 }, { "epoch": 1.46, "learning_rate": 1.3410731924484205e-05, "loss": 2.2374, "step": 105200 }, { "epoch": 1.47, "learning_rate": 1.3341170578333032e-05, "loss": 2.202, "step": 105400 }, { "epoch": 1.47, "learning_rate": 1.3271609232181861e-05, "loss": 2.2496, "step": 105600 }, { "epoch": 1.47, "learning_rate": 1.3202047886030692e-05, "loss": 2.2488, "step": 105800 }, { "epoch": 1.47, "learning_rate": 1.313248653987952e-05, "loss": 2.2002, "step": 106000 }, { "epoch": 1.48, "learning_rate": 1.3062925193728351e-05, "loss": 2.2189, "step": 106200 }, { "epoch": 1.48, "learning_rate": 1.2993363847577178e-05, "loss": 2.2373, "step": 106400 }, { "epoch": 1.48, "learning_rate": 1.2923802501426007e-05, "loss": 2.2146, "step": 106600 }, { "epoch": 1.49, "learning_rate": 1.2854241155274838e-05, "loss": 2.2442, "step": 106800 }, { "epoch": 1.49, "learning_rate": 1.2784679809123668e-05, "loss": 2.2113, "step": 107000 }, { "epoch": 1.49, "learning_rate": 1.2715118462972497e-05, "loss": 2.2127, "step": 107200 }, { "epoch": 1.49, "learning_rate": 1.2645557116821324e-05, "loss": 2.2608, "step": 107400 }, { "epoch": 1.5, "learning_rate": 1.2575995770670155e-05, "loss": 2.2913, "step": 107600 }, { "epoch": 1.5, "learning_rate": 1.2506434424518984e-05, "loss": 2.2128, "step": 107800 }, { "epoch": 1.5, "learning_rate": 1.2436873078367812e-05, "loss": 2.2995, "step": 108000 }, { "epoch": 1.51, "learning_rate": 1.2367311732216643e-05, "loss": 2.1982, "step": 108200 }, { "epoch": 1.51, "learning_rate": 1.2297750386065472e-05, "loss": 2.2296, "step": 108400 }, { "epoch": 1.51, "learning_rate": 1.22281890399143e-05, "loss": 2.2605, "step": 108600 }, { "epoch": 1.51, "learning_rate": 1.2158627693763131e-05, "loss": 2.2234, "step": 108800 }, { "epoch": 1.52, "learning_rate": 1.2089066347611958e-05, "loss": 2.2545, "step": 109000 }, { "epoch": 1.52, "learning_rate": 1.2019505001460789e-05, "loss": 2.2522, "step": 109200 }, { "epoch": 1.52, "learning_rate": 1.1949943655309618e-05, "loss": 2.2098, "step": 109400 }, { "epoch": 1.52, "learning_rate": 1.1880382309158447e-05, "loss": 2.2733, "step": 109600 }, { "epoch": 1.53, "learning_rate": 1.1810820963007277e-05, "loss": 2.2238, "step": 109800 }, { "epoch": 1.53, "learning_rate": 1.1741259616856106e-05, "loss": 2.2313, "step": 110000 }, { "epoch": 1.53, "learning_rate": 1.1671698270704935e-05, "loss": 2.2784, "step": 110200 }, { "epoch": 1.54, "learning_rate": 1.1602136924553766e-05, "loss": 2.2708, "step": 110400 }, { "epoch": 1.54, "learning_rate": 1.1532575578402593e-05, "loss": 2.2571, "step": 110600 }, { "epoch": 1.54, "learning_rate": 1.1463014232251423e-05, "loss": 2.2817, "step": 110800 }, { "epoch": 1.54, "learning_rate": 1.1393452886100252e-05, "loss": 2.2155, "step": 111000 }, { "epoch": 1.55, "learning_rate": 1.1323891539949081e-05, "loss": 2.2027, "step": 111200 }, { "epoch": 1.55, "learning_rate": 1.1254330193797912e-05, "loss": 2.2216, "step": 111400 }, { "epoch": 1.55, "learning_rate": 1.118476884764674e-05, "loss": 2.2222, "step": 111600 }, { "epoch": 1.56, "learning_rate": 1.111520750149557e-05, "loss": 2.2636, "step": 111800 }, { "epoch": 1.56, "learning_rate": 1.1045646155344398e-05, "loss": 2.2968, "step": 112000 }, { "epoch": 1.56, "learning_rate": 1.0976084809193229e-05, "loss": 2.3037, "step": 112200 }, { "epoch": 1.56, "learning_rate": 1.0906523463042057e-05, "loss": 2.2548, "step": 112400 }, { "epoch": 1.57, "learning_rate": 1.0836962116890886e-05, "loss": 2.2708, "step": 112600 }, { "epoch": 1.57, "learning_rate": 1.0767400770739717e-05, "loss": 2.2342, "step": 112800 }, { "epoch": 1.57, "learning_rate": 1.0697839424588544e-05, "loss": 2.2659, "step": 113000 }, { "epoch": 1.57, "learning_rate": 1.0628278078437375e-05, "loss": 2.2394, "step": 113200 }, { "epoch": 1.58, "learning_rate": 1.0558716732286205e-05, "loss": 2.29, "step": 113400 }, { "epoch": 1.58, "learning_rate": 1.0489155386135032e-05, "loss": 2.2658, "step": 113600 }, { "epoch": 1.58, "learning_rate": 1.0419594039983863e-05, "loss": 2.2412, "step": 113800 }, { "epoch": 1.59, "learning_rate": 1.0350032693832692e-05, "loss": 2.2438, "step": 114000 }, { "epoch": 1.59, "learning_rate": 1.028047134768152e-05, "loss": 2.2179, "step": 114200 }, { "epoch": 1.59, "learning_rate": 1.0210910001530351e-05, "loss": 2.2669, "step": 114400 }, { "epoch": 1.59, "learning_rate": 1.014134865537918e-05, "loss": 2.1845, "step": 114600 }, { "epoch": 1.6, "learning_rate": 1.0071787309228009e-05, "loss": 2.239, "step": 114800 }, { "epoch": 1.6, "learning_rate": 1.0002225963076838e-05, "loss": 2.2727, "step": 115000 }, { "epoch": 1.6, "learning_rate": 9.932664616925667e-06, "loss": 2.2217, "step": 115200 }, { "epoch": 1.61, "learning_rate": 9.863103270774495e-06, "loss": 2.2662, "step": 115400 }, { "epoch": 1.61, "learning_rate": 9.793541924623326e-06, "loss": 2.2163, "step": 115600 }, { "epoch": 1.61, "learning_rate": 9.723980578472155e-06, "loss": 2.242, "step": 115800 }, { "epoch": 1.61, "learning_rate": 9.654419232320984e-06, "loss": 2.2967, "step": 116000 }, { "epoch": 1.62, "learning_rate": 9.584857886169814e-06, "loss": 2.2005, "step": 116200 }, { "epoch": 1.62, "learning_rate": 9.515296540018641e-06, "loss": 2.244, "step": 116400 }, { "epoch": 1.62, "learning_rate": 9.445735193867472e-06, "loss": 2.2641, "step": 116600 }, { "epoch": 1.62, "learning_rate": 9.376173847716303e-06, "loss": 2.2491, "step": 116800 }, { "epoch": 1.63, "learning_rate": 9.30661250156513e-06, "loss": 2.1801, "step": 117000 }, { "epoch": 1.63, "learning_rate": 9.23705115541396e-06, "loss": 2.2559, "step": 117200 }, { "epoch": 1.63, "learning_rate": 9.167489809262789e-06, "loss": 2.2147, "step": 117400 }, { "epoch": 1.64, "learning_rate": 9.097928463111618e-06, "loss": 2.2392, "step": 117600 }, { "epoch": 1.64, "learning_rate": 9.028367116960449e-06, "loss": 2.2059, "step": 117800 }, { "epoch": 1.64, "learning_rate": 8.958805770809277e-06, "loss": 2.2391, "step": 118000 }, { "epoch": 1.64, "learning_rate": 8.889244424658106e-06, "loss": 2.2566, "step": 118200 }, { "epoch": 1.65, "learning_rate": 8.819683078506935e-06, "loss": 2.1766, "step": 118400 }, { "epoch": 1.65, "learning_rate": 8.750121732355766e-06, "loss": 2.2256, "step": 118600 }, { "epoch": 1.65, "learning_rate": 8.680560386204594e-06, "loss": 2.2554, "step": 118800 }, { "epoch": 1.66, "learning_rate": 8.610999040053423e-06, "loss": 2.2409, "step": 119000 }, { "epoch": 1.66, "learning_rate": 8.541437693902254e-06, "loss": 2.2004, "step": 119200 }, { "epoch": 1.66, "learning_rate": 8.471876347751081e-06, "loss": 2.2624, "step": 119400 }, { "epoch": 1.66, "learning_rate": 8.402315001599912e-06, "loss": 2.2336, "step": 119600 }, { "epoch": 1.67, "learning_rate": 8.33275365544874e-06, "loss": 2.2002, "step": 119800 }, { "epoch": 1.67, "learning_rate": 8.26319230929757e-06, "loss": 2.2472, "step": 120000 }, { "epoch": 1.67, "learning_rate": 8.1936309631464e-06, "loss": 2.2573, "step": 120200 }, { "epoch": 1.68, "learning_rate": 8.124069616995229e-06, "loss": 2.2176, "step": 120400 }, { "epoch": 1.68, "learning_rate": 8.054508270844058e-06, "loss": 2.2385, "step": 120600 }, { "epoch": 1.68, "learning_rate": 7.984946924692888e-06, "loss": 2.2575, "step": 120800 }, { "epoch": 1.68, "learning_rate": 7.915385578541715e-06, "loss": 2.2158, "step": 121000 }, { "epoch": 1.69, "learning_rate": 7.845824232390546e-06, "loss": 2.2307, "step": 121200 }, { "epoch": 1.69, "learning_rate": 7.776262886239375e-06, "loss": 2.2312, "step": 121400 }, { "epoch": 1.69, "learning_rate": 7.706701540088204e-06, "loss": 2.251, "step": 121600 }, { "epoch": 1.69, "learning_rate": 7.637140193937034e-06, "loss": 2.2864, "step": 121800 }, { "epoch": 1.7, "learning_rate": 7.567578847785862e-06, "loss": 2.2243, "step": 122000 }, { "epoch": 1.7, "learning_rate": 7.498017501634693e-06, "loss": 2.252, "step": 122200 }, { "epoch": 1.7, "learning_rate": 7.428456155483521e-06, "loss": 2.2009, "step": 122400 }, { "epoch": 1.71, "learning_rate": 7.35889480933235e-06, "loss": 2.2145, "step": 122600 }, { "epoch": 1.71, "learning_rate": 7.28933346318118e-06, "loss": 2.2335, "step": 122800 }, { "epoch": 1.71, "learning_rate": 7.219772117030009e-06, "loss": 2.2516, "step": 123000 }, { "epoch": 1.71, "learning_rate": 7.150210770878839e-06, "loss": 2.2146, "step": 123200 }, { "epoch": 1.72, "learning_rate": 7.0806494247276675e-06, "loss": 2.2304, "step": 123400 }, { "epoch": 1.72, "learning_rate": 7.011088078576497e-06, "loss": 2.1978, "step": 123600 }, { "epoch": 1.72, "learning_rate": 6.941526732425327e-06, "loss": 2.2488, "step": 123800 }, { "epoch": 1.73, "learning_rate": 6.871965386274155e-06, "loss": 2.2347, "step": 124000 }, { "epoch": 1.73, "learning_rate": 6.8024040401229855e-06, "loss": 2.2496, "step": 124200 }, { "epoch": 1.73, "learning_rate": 6.7328426939718135e-06, "loss": 2.2366, "step": 124400 }, { "epoch": 1.73, "learning_rate": 6.663281347820643e-06, "loss": 2.1691, "step": 124600 }, { "epoch": 1.74, "learning_rate": 6.593720001669472e-06, "loss": 2.205, "step": 124800 }, { "epoch": 1.74, "learning_rate": 6.524158655518302e-06, "loss": 2.2107, "step": 125000 }, { "epoch": 1.74, "learning_rate": 6.4545973093671315e-06, "loss": 2.2188, "step": 125200 }, { "epoch": 1.74, "learning_rate": 6.38503596321596e-06, "loss": 2.2381, "step": 125400 }, { "epoch": 1.75, "learning_rate": 6.31547461706479e-06, "loss": 2.2638, "step": 125600 }, { "epoch": 1.75, "learning_rate": 6.245913270913619e-06, "loss": 2.2688, "step": 125800 }, { "epoch": 1.75, "learning_rate": 6.176351924762449e-06, "loss": 2.1799, "step": 126000 }, { "epoch": 1.76, "learning_rate": 6.1067905786112775e-06, "loss": 2.2184, "step": 126200 }, { "epoch": 1.76, "learning_rate": 6.037229232460106e-06, "loss": 2.2297, "step": 126400 }, { "epoch": 1.76, "learning_rate": 5.967667886308936e-06, "loss": 2.2882, "step": 126600 }, { "epoch": 1.76, "learning_rate": 5.898106540157766e-06, "loss": 2.2388, "step": 126800 }, { "epoch": 1.77, "learning_rate": 5.828545194006595e-06, "loss": 2.2356, "step": 127000 }, { "epoch": 1.77, "learning_rate": 5.7589838478554234e-06, "loss": 2.2427, "step": 127200 }, { "epoch": 1.77, "learning_rate": 5.689422501704253e-06, "loss": 2.2371, "step": 127400 }, { "epoch": 1.78, "learning_rate": 5.619861155553083e-06, "loss": 2.2639, "step": 127600 }, { "epoch": 1.78, "learning_rate": 5.550299809401912e-06, "loss": 2.2532, "step": 127800 }, { "epoch": 1.78, "learning_rate": 5.480738463250741e-06, "loss": 2.2085, "step": 128000 }, { "epoch": 1.78, "learning_rate": 5.41117711709957e-06, "loss": 2.263, "step": 128200 }, { "epoch": 1.79, "learning_rate": 5.341615770948399e-06, "loss": 2.1695, "step": 128400 }, { "epoch": 1.79, "learning_rate": 5.272054424797229e-06, "loss": 2.2055, "step": 128600 }, { "epoch": 1.79, "learning_rate": 5.2024930786460585e-06, "loss": 2.2349, "step": 128800 }, { "epoch": 1.79, "learning_rate": 5.132931732494887e-06, "loss": 2.2024, "step": 129000 }, { "epoch": 1.8, "learning_rate": 5.063370386343716e-06, "loss": 2.2263, "step": 129200 }, { "epoch": 1.8, "learning_rate": 4.993809040192546e-06, "loss": 2.2084, "step": 129400 }, { "epoch": 1.8, "learning_rate": 4.924247694041376e-06, "loss": 2.2397, "step": 129600 }, { "epoch": 1.81, "learning_rate": 4.8546863478902045e-06, "loss": 2.2455, "step": 129800 }, { "epoch": 1.81, "learning_rate": 4.785125001739034e-06, "loss": 2.2159, "step": 130000 }, { "epoch": 1.81, "learning_rate": 4.715563655587863e-06, "loss": 2.1978, "step": 130200 }, { "epoch": 1.81, "learning_rate": 4.646002309436692e-06, "loss": 2.2635, "step": 130400 }, { "epoch": 1.82, "learning_rate": 4.576440963285522e-06, "loss": 2.2005, "step": 130600 }, { "epoch": 1.82, "learning_rate": 4.506879617134351e-06, "loss": 2.2388, "step": 130800 }, { "epoch": 1.82, "learning_rate": 4.43731827098318e-06, "loss": 2.2142, "step": 131000 }, { "epoch": 1.83, "learning_rate": 4.36775692483201e-06, "loss": 2.2437, "step": 131200 }, { "epoch": 1.83, "learning_rate": 4.298195578680839e-06, "loss": 2.2269, "step": 131400 }, { "epoch": 1.83, "learning_rate": 4.228634232529668e-06, "loss": 2.1651, "step": 131600 }, { "epoch": 1.83, "learning_rate": 4.159072886378497e-06, "loss": 2.2552, "step": 131800 }, { "epoch": 1.84, "learning_rate": 4.089511540227327e-06, "loss": 2.2328, "step": 132000 }, { "epoch": 1.84, "learning_rate": 4.019950194076156e-06, "loss": 2.2267, "step": 132200 }, { "epoch": 1.84, "learning_rate": 3.950388847924985e-06, "loss": 2.138, "step": 132400 }, { "epoch": 1.84, "learning_rate": 3.8808275017738145e-06, "loss": 2.1773, "step": 132600 }, { "epoch": 1.85, "learning_rate": 3.811266155622644e-06, "loss": 2.1719, "step": 132800 }, { "epoch": 1.85, "learning_rate": 3.741704809471473e-06, "loss": 2.2124, "step": 133000 }, { "epoch": 1.85, "learning_rate": 3.6721434633203023e-06, "loss": 2.2293, "step": 133200 }, { "epoch": 1.86, "learning_rate": 3.6025821171691316e-06, "loss": 2.1802, "step": 133400 }, { "epoch": 1.86, "learning_rate": 3.533020771017961e-06, "loss": 2.1823, "step": 133600 }, { "epoch": 1.86, "learning_rate": 3.4634594248667906e-06, "loss": 2.2114, "step": 133800 }, { "epoch": 1.86, "learning_rate": 3.39389807871562e-06, "loss": 2.2102, "step": 134000 }, { "epoch": 1.87, "learning_rate": 3.3243367325644487e-06, "loss": 2.2313, "step": 134200 }, { "epoch": 1.87, "learning_rate": 3.254775386413278e-06, "loss": 2.2214, "step": 134400 }, { "epoch": 1.87, "learning_rate": 3.1852140402621073e-06, "loss": 2.2491, "step": 134600 }, { "epoch": 1.88, "learning_rate": 3.1156526941109365e-06, "loss": 2.1957, "step": 134800 }, { "epoch": 1.88, "learning_rate": 3.046091347959766e-06, "loss": 2.251, "step": 135000 }, { "epoch": 1.88, "learning_rate": 2.976530001808595e-06, "loss": 2.1941, "step": 135200 }, { "epoch": 1.88, "learning_rate": 2.9069686556574244e-06, "loss": 2.231, "step": 135400 }, { "epoch": 1.89, "learning_rate": 2.8374073095062537e-06, "loss": 2.1893, "step": 135600 }, { "epoch": 1.89, "learning_rate": 2.767845963355083e-06, "loss": 2.2114, "step": 135800 }, { "epoch": 1.89, "learning_rate": 2.6982846172039122e-06, "loss": 2.2375, "step": 136000 }, { "epoch": 1.89, "learning_rate": 2.6287232710527415e-06, "loss": 2.1917, "step": 136200 }, { "epoch": 1.9, "learning_rate": 2.559161924901571e-06, "loss": 2.2227, "step": 136400 }, { "epoch": 1.9, "learning_rate": 2.4896005787504e-06, "loss": 2.2534, "step": 136600 }, { "epoch": 1.9, "learning_rate": 2.4200392325992294e-06, "loss": 2.2529, "step": 136800 }, { "epoch": 1.91, "learning_rate": 2.3504778864480586e-06, "loss": 2.2046, "step": 137000 }, { "epoch": 1.91, "learning_rate": 2.280916540296888e-06, "loss": 2.262, "step": 137200 }, { "epoch": 1.91, "learning_rate": 2.211355194145717e-06, "loss": 2.1998, "step": 137400 }, { "epoch": 1.91, "learning_rate": 2.1417938479945465e-06, "loss": 2.2093, "step": 137600 }, { "epoch": 1.92, "learning_rate": 2.0722325018433758e-06, "loss": 2.204, "step": 137800 }, { "epoch": 1.92, "learning_rate": 2.002671155692205e-06, "loss": 2.234, "step": 138000 }, { "epoch": 1.92, "learning_rate": 1.9331098095410343e-06, "loss": 2.2064, "step": 138200 }, { "epoch": 1.93, "learning_rate": 1.8635484633898636e-06, "loss": 2.2288, "step": 138400 }, { "epoch": 1.93, "learning_rate": 1.7939871172386929e-06, "loss": 2.1994, "step": 138600 }, { "epoch": 1.93, "learning_rate": 1.7244257710875224e-06, "loss": 2.2235, "step": 138800 }, { "epoch": 1.93, "learning_rate": 1.6548644249363515e-06, "loss": 2.2251, "step": 139000 }, { "epoch": 1.94, "learning_rate": 1.5853030787851805e-06, "loss": 2.2068, "step": 139200 }, { "epoch": 1.94, "learning_rate": 1.51574173263401e-06, "loss": 2.1716, "step": 139400 }, { "epoch": 1.94, "learning_rate": 1.4461803864828393e-06, "loss": 2.229, "step": 139600 }, { "epoch": 1.94, "learning_rate": 1.3766190403316686e-06, "loss": 2.2169, "step": 139800 }, { "epoch": 1.95, "learning_rate": 1.3070576941804979e-06, "loss": 2.1852, "step": 140000 }, { "epoch": 1.95, "learning_rate": 1.2374963480293271e-06, "loss": 2.2213, "step": 140200 }, { "epoch": 1.95, "learning_rate": 1.1679350018781564e-06, "loss": 2.2633, "step": 140400 }, { "epoch": 1.96, "learning_rate": 1.0983736557269857e-06, "loss": 2.1825, "step": 140600 }, { "epoch": 1.96, "learning_rate": 1.028812309575815e-06, "loss": 2.2269, "step": 140800 }, { "epoch": 1.96, "learning_rate": 9.592509634246443e-07, "loss": 2.2141, "step": 141000 }, { "epoch": 1.96, "learning_rate": 8.896896172734734e-07, "loss": 2.2351, "step": 141200 }, { "epoch": 1.97, "learning_rate": 8.201282711223028e-07, "loss": 2.2102, "step": 141400 }, { "epoch": 1.97, "learning_rate": 7.505669249711321e-07, "loss": 2.1856, "step": 141600 }, { "epoch": 1.97, "learning_rate": 6.810055788199613e-07, "loss": 2.2066, "step": 141800 }, { "epoch": 1.98, "learning_rate": 6.114442326687907e-07, "loss": 2.2295, "step": 142000 }, { "epoch": 1.98, "learning_rate": 5.418828865176198e-07, "loss": 2.1887, "step": 142200 }, { "epoch": 1.98, "learning_rate": 4.7232154036644923e-07, "loss": 2.1985, "step": 142400 }, { "epoch": 1.98, "learning_rate": 4.0276019421527846e-07, "loss": 2.2791, "step": 142600 }, { "epoch": 1.99, "learning_rate": 3.3319884806410774e-07, "loss": 2.2265, "step": 142800 }, { "epoch": 1.99, "learning_rate": 2.63637501912937e-07, "loss": 2.2137, "step": 143000 }, { "epoch": 1.99, "learning_rate": 1.940761557617663e-07, "loss": 2.253, "step": 143200 }, { "epoch": 2.0, "learning_rate": 1.2451480961059558e-07, "loss": 2.2167, "step": 143400 }, { "epoch": 2.0, "learning_rate": 5.495346345942487e-08, "loss": 2.2104, "step": 143600 }, { "epoch": 2.0, "step": 143758, "total_flos": 1.502524253059154e+17, "train_loss": 2.352252098808129, "train_runtime": 164976.46, "train_samples_per_second": 3.486, "train_steps_per_second": 0.871 } ], "logging_steps": 200, "max_steps": 143758, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 1.502524253059154e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }