|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 330, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 257.23835842552523, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 8.3183, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 212.69538455789322, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 8.7163, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 112.61346458118909, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 7.6494, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 784.8951183910573, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 8.0969, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 185.45732108671288, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 7.311, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 195.28031105021546, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 7.8658, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 36.48221664801266, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 5.2603, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 9.55998838923863, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 4.8036, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 9.615632721341097, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 4.8279, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 7.669264353119744, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 4.6123, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.9967605072507215, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 4.3857, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 6.587199773697142, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 4.4648, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 5.505554834249296, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 4.1377, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 4.9970053869195965, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 3.8989, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 3.79904253928739, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 3.8294, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 3.508407692792644, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 4.0649, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 3.2871677206858054, |
|
"learning_rate": 5e-05, |
|
"loss": 3.7045, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 3.3241161598281, |
|
"learning_rate": 4.999886666070519e-05, |
|
"loss": 3.7635, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 3.1152646953619567, |
|
"learning_rate": 4.9995466756994795e-05, |
|
"loss": 3.6178, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 3.273528675012558, |
|
"learning_rate": 4.9989800631379443e-05, |
|
"loss": 3.5688, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 3.115834430389958, |
|
"learning_rate": 4.998186885467182e-05, |
|
"loss": 3.2943, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.661570245411782, |
|
"learning_rate": 4.99716722259292e-05, |
|
"loss": 3.0865, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 2.849664680666923, |
|
"learning_rate": 4.99592117723729e-05, |
|
"loss": 3.1392, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 2.4018072414305895, |
|
"learning_rate": 4.994448874928487e-05, |
|
"loss": 2.9298, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 2.0206665673120896, |
|
"learning_rate": 4.992750463988114e-05, |
|
"loss": 2.8627, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 1.9920698277851823, |
|
"learning_rate": 4.990826115516248e-05, |
|
"loss": 2.8083, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 1.7918724410197877, |
|
"learning_rate": 4.9886760233742e-05, |
|
"loss": 2.6199, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 1.5505702505545602, |
|
"learning_rate": 4.986300404164984e-05, |
|
"loss": 2.6521, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 1.2616722287294364, |
|
"learning_rate": 4.9836994972114974e-05, |
|
"loss": 2.6015, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 1.1638338779341943, |
|
"learning_rate": 4.9808735645324125e-05, |
|
"loss": 2.5141, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 1.0794908268185501, |
|
"learning_rate": 4.9778228908157766e-05, |
|
"loss": 2.4714, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 0.9412858812798977, |
|
"learning_rate": 4.9745477833903364e-05, |
|
"loss": 2.4086, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.9703710030327403, |
|
"learning_rate": 4.971048572194577e-05, |
|
"loss": 2.3233, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 0.8923121257838885, |
|
"learning_rate": 4.9673256097434793e-05, |
|
"loss": 2.3252, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 0.7408466397481557, |
|
"learning_rate": 4.963379271093012e-05, |
|
"loss": 2.3592, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 0.8332937369940602, |
|
"learning_rate": 4.959209953802344e-05, |
|
"loss": 2.3153, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 0.8198715006056391, |
|
"learning_rate": 4.954818077893798e-05, |
|
"loss": 2.14, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 0.7030061091284978, |
|
"learning_rate": 4.950204085810533e-05, |
|
"loss": 2.1745, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 0.7045320084558211, |
|
"learning_rate": 4.945368442371974e-05, |
|
"loss": 2.0868, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 0.6475997413192687, |
|
"learning_rate": 4.9403116347269866e-05, |
|
"loss": 2.0927, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 0.6818237942981833, |
|
"learning_rate": 4.935034172304797e-05, |
|
"loss": 2.1228, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 0.6349632576540991, |
|
"learning_rate": 4.9295365867636766e-05, |
|
"loss": 2.1594, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 0.6123700654773722, |
|
"learning_rate": 4.923819431937377e-05, |
|
"loss": 1.9419, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.628265874333403, |
|
"learning_rate": 4.9178832837793415e-05, |
|
"loss": 1.9591, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 0.6079957574674165, |
|
"learning_rate": 4.9117287403046766e-05, |
|
"loss": 1.9066, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 0.5913194294689115, |
|
"learning_rate": 4.9053564215299135e-05, |
|
"loss": 1.9269, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.1363636363636362, |
|
"grad_norm": 0.524599692265706, |
|
"learning_rate": 4.898766969410542e-05, |
|
"loss": 1.8848, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 0.5449852882597904, |
|
"learning_rate": 4.891961047776342e-05, |
|
"loss": 1.8835, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.227272727272727, |
|
"grad_norm": 0.5842912439698547, |
|
"learning_rate": 4.8849393422645054e-05, |
|
"loss": 1.8353, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 0.5004654366250076, |
|
"learning_rate": 4.87770256025057e-05, |
|
"loss": 1.8698, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.3181818181818183, |
|
"grad_norm": 0.5000460896838382, |
|
"learning_rate": 4.870251430777148e-05, |
|
"loss": 1.8355, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 0.47646909638568696, |
|
"learning_rate": 4.862586704480494e-05, |
|
"loss": 1.8062, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.409090909090909, |
|
"grad_norm": 0.4775044778243631, |
|
"learning_rate": 4.8547091535148725e-05, |
|
"loss": 1.7511, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 0.4271462696410255, |
|
"learning_rate": 4.846619571474777e-05, |
|
"loss": 1.819, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.4687848016567625, |
|
"learning_rate": 4.8383187733149814e-05, |
|
"loss": 1.7687, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 0.49290753360843537, |
|
"learning_rate": 4.8298075952684406e-05, |
|
"loss": 1.7602, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.590909090909091, |
|
"grad_norm": 0.4145717434905905, |
|
"learning_rate": 4.821086894762045e-05, |
|
"loss": 1.6849, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 0.47758526019077047, |
|
"learning_rate": 4.812157550330246e-05, |
|
"loss": 1.8031, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.6818181818181817, |
|
"grad_norm": 0.43090399608171975, |
|
"learning_rate": 4.8030204615265445e-05, |
|
"loss": 1.6979, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 0.42558498758567664, |
|
"learning_rate": 4.7936765488328794e-05, |
|
"loss": 1.6167, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.7727272727272725, |
|
"grad_norm": 0.42771373575336713, |
|
"learning_rate": 4.7841267535668876e-05, |
|
"loss": 1.6126, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 0.45605982497696307, |
|
"learning_rate": 4.7743720377870786e-05, |
|
"loss": 1.7051, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.8636363636363638, |
|
"grad_norm": 0.36383290803582924, |
|
"learning_rate": 4.764413384195915e-05, |
|
"loss": 1.5355, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 0.35888633901842737, |
|
"learning_rate": 4.7542517960408125e-05, |
|
"loss": 1.6037, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 0.35247547310658606, |
|
"learning_rate": 4.7438882970130756e-05, |
|
"loss": 1.6403, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.4123547479714445, |
|
"learning_rate": 4.7333239311447634e-05, |
|
"loss": 1.6687, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 3.0454545454545454, |
|
"grad_norm": 0.3575991218498864, |
|
"learning_rate": 4.7225597627035176e-05, |
|
"loss": 1.6531, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 3.090909090909091, |
|
"grad_norm": 0.3303944518163955, |
|
"learning_rate": 4.711596876085344e-05, |
|
"loss": 1.4537, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 3.1363636363636362, |
|
"grad_norm": 0.43041449387205766, |
|
"learning_rate": 4.70043637570537e-05, |
|
"loss": 1.6096, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.1818181818181817, |
|
"grad_norm": 0.3646696726581941, |
|
"learning_rate": 4.6890793858865865e-05, |
|
"loss": 1.586, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.227272727272727, |
|
"grad_norm": 0.3260419946678917, |
|
"learning_rate": 4.677527050746577e-05, |
|
"loss": 1.5422, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 3.2727272727272725, |
|
"grad_norm": 0.33570338878732947, |
|
"learning_rate": 4.665780534082264e-05, |
|
"loss": 1.5553, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.3181818181818183, |
|
"grad_norm": 0.3085499618355785, |
|
"learning_rate": 4.6538410192526613e-05, |
|
"loss": 1.5067, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 3.3636363636363638, |
|
"grad_norm": 0.30727638027509513, |
|
"learning_rate": 4.6417097090596637e-05, |
|
"loss": 1.5667, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 3.409090909090909, |
|
"grad_norm": 0.3021730689826977, |
|
"learning_rate": 4.629387825626875e-05, |
|
"loss": 1.5795, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.4545454545454546, |
|
"grad_norm": 0.32502917714553514, |
|
"learning_rate": 4.6168766102764874e-05, |
|
"loss": 1.5154, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 0.3301265717537239, |
|
"learning_rate": 4.604177323404235e-05, |
|
"loss": 1.5048, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 3.5454545454545454, |
|
"grad_norm": 0.323165145487666, |
|
"learning_rate": 4.591291244352413e-05, |
|
"loss": 1.5117, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 3.590909090909091, |
|
"grad_norm": 0.3082703852698703, |
|
"learning_rate": 4.578219671280998e-05, |
|
"loss": 1.4521, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 0.29479913403691677, |
|
"learning_rate": 4.5649639210368714e-05, |
|
"loss": 1.4487, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.6818181818181817, |
|
"grad_norm": 0.3092257716704811, |
|
"learning_rate": 4.551525329021155e-05, |
|
"loss": 1.447, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.7272727272727275, |
|
"grad_norm": 0.37229811196636947, |
|
"learning_rate": 4.5379052490546855e-05, |
|
"loss": 1.4919, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 3.7727272727272725, |
|
"grad_norm": 0.31138376308603494, |
|
"learning_rate": 4.524105053241625e-05, |
|
"loss": 1.4865, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 3.8181818181818183, |
|
"grad_norm": 0.3584821617778458, |
|
"learning_rate": 4.510126131831234e-05, |
|
"loss": 1.5092, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 3.8636363636363638, |
|
"grad_norm": 0.32342053983138597, |
|
"learning_rate": 4.4959698930778184e-05, |
|
"loss": 1.3528, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.909090909090909, |
|
"grad_norm": 0.3755258228072684, |
|
"learning_rate": 4.481637763098858e-05, |
|
"loss": 1.5071, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 3.9545454545454546, |
|
"grad_norm": 0.31577787863565876, |
|
"learning_rate": 4.4671311857313376e-05, |
|
"loss": 1.5149, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.3274660630382459, |
|
"learning_rate": 4.452451622386294e-05, |
|
"loss": 1.5101, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 4.045454545454546, |
|
"grad_norm": 0.295499868623689, |
|
"learning_rate": 4.437600551901591e-05, |
|
"loss": 1.4591, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 4.090909090909091, |
|
"grad_norm": 0.28617863379643455, |
|
"learning_rate": 4.422579470392941e-05, |
|
"loss": 1.4866, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.136363636363637, |
|
"grad_norm": 0.27613544331064876, |
|
"learning_rate": 4.40738989110318e-05, |
|
"loss": 1.4244, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 4.181818181818182, |
|
"grad_norm": 0.2969006794655833, |
|
"learning_rate": 4.392033344249827e-05, |
|
"loss": 1.3955, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.2272727272727275, |
|
"grad_norm": 0.2833654953485509, |
|
"learning_rate": 4.376511376870925e-05, |
|
"loss": 1.4366, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 4.2727272727272725, |
|
"grad_norm": 0.32286555195443095, |
|
"learning_rate": 4.36082555266919e-05, |
|
"loss": 1.3779, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 4.318181818181818, |
|
"grad_norm": 0.30339867592392933, |
|
"learning_rate": 4.3449774518544837e-05, |
|
"loss": 1.3523, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 4.363636363636363, |
|
"grad_norm": 0.3678773849209462, |
|
"learning_rate": 4.328968670984621e-05, |
|
"loss": 1.4055, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.409090909090909, |
|
"grad_norm": 0.2809671108473524, |
|
"learning_rate": 4.3128008228045264e-05, |
|
"loss": 1.3742, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 4.454545454545454, |
|
"grad_norm": 0.3264760569541278, |
|
"learning_rate": 4.296475536083769e-05, |
|
"loss": 1.3946, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 0.258862105020995, |
|
"learning_rate": 4.279994455452478e-05, |
|
"loss": 1.3284, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"grad_norm": 0.3134553840821824, |
|
"learning_rate": 4.263359241235657e-05, |
|
"loss": 1.3851, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.590909090909091, |
|
"grad_norm": 0.30170722376996695, |
|
"learning_rate": 4.246571569285925e-05, |
|
"loss": 1.371, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 4.636363636363637, |
|
"grad_norm": 0.2887395932931785, |
|
"learning_rate": 4.229633130814685e-05, |
|
"loss": 1.3292, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 4.681818181818182, |
|
"grad_norm": 0.3218428316599217, |
|
"learning_rate": 4.212545632221751e-05, |
|
"loss": 1.4558, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 4.7272727272727275, |
|
"grad_norm": 0.27631578638146514, |
|
"learning_rate": 4.1953107949234414e-05, |
|
"loss": 1.2989, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 4.7727272727272725, |
|
"grad_norm": 0.2688375043457644, |
|
"learning_rate": 4.1779303551791695e-05, |
|
"loss": 1.3677, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 4.818181818181818, |
|
"grad_norm": 0.28592384506961227, |
|
"learning_rate": 4.160406063916517e-05, |
|
"loss": 1.2749, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 4.863636363636363, |
|
"grad_norm": 0.30087535430968293, |
|
"learning_rate": 4.142739686554853e-05, |
|
"loss": 1.405, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 4.909090909090909, |
|
"grad_norm": 0.27354449180224716, |
|
"learning_rate": 4.124933002827481e-05, |
|
"loss": 1.3642, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.954545454545455, |
|
"grad_norm": 0.29727660205166156, |
|
"learning_rate": 4.106987806602345e-05, |
|
"loss": 1.3686, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.29032585014056134, |
|
"learning_rate": 4.088905905701316e-05, |
|
"loss": 1.3651, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.045454545454546, |
|
"grad_norm": 0.2900324279431226, |
|
"learning_rate": 4.070689121718066e-05, |
|
"loss": 1.3556, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 5.090909090909091, |
|
"grad_norm": 0.24863713928813203, |
|
"learning_rate": 4.0523392898345604e-05, |
|
"loss": 1.341, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 5.136363636363637, |
|
"grad_norm": 0.2790657225717466, |
|
"learning_rate": 4.03385825863618e-05, |
|
"loss": 1.3565, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 5.181818181818182, |
|
"grad_norm": 0.2665012607059084, |
|
"learning_rate": 4.0152478899254906e-05, |
|
"loss": 1.2776, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 5.2272727272727275, |
|
"grad_norm": 0.29328265354243366, |
|
"learning_rate": 3.996510058534682e-05, |
|
"loss": 1.3697, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.2727272727272725, |
|
"grad_norm": 0.2636658932396027, |
|
"learning_rate": 3.9776466521366995e-05, |
|
"loss": 1.3208, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 5.318181818181818, |
|
"grad_norm": 0.23509864156431806, |
|
"learning_rate": 3.958659571055071e-05, |
|
"loss": 1.2671, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 5.363636363636363, |
|
"grad_norm": 0.2621476324195016, |
|
"learning_rate": 3.939550728072473e-05, |
|
"loss": 1.3526, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 5.409090909090909, |
|
"grad_norm": 0.24664500701059844, |
|
"learning_rate": 3.920322048238024e-05, |
|
"loss": 1.2985, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 5.454545454545454, |
|
"grad_norm": 0.28092507999952776, |
|
"learning_rate": 3.900975468673368e-05, |
|
"loss": 1.2591, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 0.2604137388452376, |
|
"learning_rate": 3.8815129383775104e-05, |
|
"loss": 1.3022, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 5.545454545454545, |
|
"grad_norm": 0.25485436564236713, |
|
"learning_rate": 3.861936418030483e-05, |
|
"loss": 1.3511, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 5.590909090909091, |
|
"grad_norm": 0.22709249270142517, |
|
"learning_rate": 3.842247879795822e-05, |
|
"loss": 1.3479, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 5.636363636363637, |
|
"grad_norm": 0.23971939043085813, |
|
"learning_rate": 3.822449307121886e-05, |
|
"loss": 1.2734, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 5.681818181818182, |
|
"grad_norm": 0.27819081228375214, |
|
"learning_rate": 3.8025426945420426e-05, |
|
"loss": 1.2971, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.7272727272727275, |
|
"grad_norm": 0.25456906150670894, |
|
"learning_rate": 3.782530047473739e-05, |
|
"loss": 1.2765, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 5.7727272727272725, |
|
"grad_norm": 0.2235373692230836, |
|
"learning_rate": 3.762413382016467e-05, |
|
"loss": 1.2652, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 5.818181818181818, |
|
"grad_norm": 0.29051536045091336, |
|
"learning_rate": 3.742194724748668e-05, |
|
"loss": 1.2158, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 5.863636363636363, |
|
"grad_norm": 0.25505139923240183, |
|
"learning_rate": 3.721876112523566e-05, |
|
"loss": 1.2749, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 5.909090909090909, |
|
"grad_norm": 0.2297174194118098, |
|
"learning_rate": 3.701459592263974e-05, |
|
"loss": 1.2686, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.954545454545455, |
|
"grad_norm": 1.1819543960335286, |
|
"learning_rate": 3.680947220756086e-05, |
|
"loss": 1.2669, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.3596649944995116, |
|
"learning_rate": 3.6603410644422703e-05, |
|
"loss": 1.2553, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 6.045454545454546, |
|
"grad_norm": 0.2373357658133507, |
|
"learning_rate": 3.639643199212899e-05, |
|
"loss": 1.2475, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 6.090909090909091, |
|
"grad_norm": 0.22493113890489372, |
|
"learning_rate": 3.618855710197212e-05, |
|
"loss": 1.2343, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 6.136363636363637, |
|
"grad_norm": 0.2186099012077051, |
|
"learning_rate": 3.59798069155327e-05, |
|
"loss": 1.2583, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 6.181818181818182, |
|
"grad_norm": 0.2198359168498248, |
|
"learning_rate": 3.577020246256974e-05, |
|
"loss": 1.2124, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 6.2272727272727275, |
|
"grad_norm": 0.2418275850989451, |
|
"learning_rate": 3.555976485890216e-05, |
|
"loss": 1.2652, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 6.2727272727272725, |
|
"grad_norm": 0.24177652899109378, |
|
"learning_rate": 3.5348515304281567e-05, |
|
"loss": 1.2718, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.318181818181818, |
|
"grad_norm": 0.23176107126497403, |
|
"learning_rate": 3.5136475080256504e-05, |
|
"loss": 1.2815, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 6.363636363636363, |
|
"grad_norm": 0.2186114898802362, |
|
"learning_rate": 3.492366554802856e-05, |
|
"loss": 1.2278, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.409090909090909, |
|
"grad_norm": 0.24894955084370854, |
|
"learning_rate": 3.471010814630044e-05, |
|
"loss": 1.2528, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 6.454545454545454, |
|
"grad_norm": 0.26036666060205077, |
|
"learning_rate": 3.449582438911613e-05, |
|
"loss": 1.3011, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"grad_norm": 0.21464875894128543, |
|
"learning_rate": 3.428083586369362e-05, |
|
"loss": 1.2153, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 6.545454545454545, |
|
"grad_norm": 0.24924104575584335, |
|
"learning_rate": 3.406516422825013e-05, |
|
"loss": 1.2149, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 6.590909090909091, |
|
"grad_norm": 0.22796512903628247, |
|
"learning_rate": 3.384883120982027e-05, |
|
"loss": 1.2057, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.636363636363637, |
|
"grad_norm": 0.22412816101017885, |
|
"learning_rate": 3.363185860206719e-05, |
|
"loss": 1.2879, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 6.681818181818182, |
|
"grad_norm": 0.22898070560796288, |
|
"learning_rate": 3.341426826308708e-05, |
|
"loss": 1.2407, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 6.7272727272727275, |
|
"grad_norm": 0.24599569608907884, |
|
"learning_rate": 3.319608211320719e-05, |
|
"loss": 1.193, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 6.7727272727272725, |
|
"grad_norm": 0.22837138512494187, |
|
"learning_rate": 3.29773221327775e-05, |
|
"loss": 1.2173, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 6.818181818181818, |
|
"grad_norm": 0.23242892820855598, |
|
"learning_rate": 3.2758010359956376e-05, |
|
"loss": 1.3222, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.863636363636363, |
|
"grad_norm": 0.2543864237654759, |
|
"learning_rate": 3.253816888849051e-05, |
|
"loss": 1.24, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 6.909090909090909, |
|
"grad_norm": 0.24092106206033628, |
|
"learning_rate": 3.2317819865489066e-05, |
|
"loss": 1.2964, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 6.954545454545455, |
|
"grad_norm": 0.24386711464365543, |
|
"learning_rate": 3.209698548919262e-05, |
|
"loss": 1.2041, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.2507114565585548, |
|
"learning_rate": 3.187568800673682e-05, |
|
"loss": 1.2057, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 7.045454545454546, |
|
"grad_norm": 0.21813780408934713, |
|
"learning_rate": 3.165394971191125e-05, |
|
"loss": 1.1822, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 7.090909090909091, |
|
"grad_norm": 0.22737082010889867, |
|
"learning_rate": 3.143179294291351e-05, |
|
"loss": 1.2516, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 7.136363636363637, |
|
"grad_norm": 0.2168145915465791, |
|
"learning_rate": 3.120924008009875e-05, |
|
"loss": 1.1801, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 7.181818181818182, |
|
"grad_norm": 0.19136333990689988, |
|
"learning_rate": 3.0986313543725174e-05, |
|
"loss": 1.1683, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 7.2272727272727275, |
|
"grad_norm": 0.2261635672146715, |
|
"learning_rate": 3.0763035791695335e-05, |
|
"loss": 1.2801, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 0.1900863256471995, |
|
"learning_rate": 3.053942931729365e-05, |
|
"loss": 1.2395, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.318181818181818, |
|
"grad_norm": 0.20083348886939187, |
|
"learning_rate": 3.0315516646920494e-05, |
|
"loss": 1.1789, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.363636363636363, |
|
"grad_norm": 0.22527651747823768, |
|
"learning_rate": 3.0091320337822793e-05, |
|
"loss": 1.1912, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 7.409090909090909, |
|
"grad_norm": 0.22935986524053414, |
|
"learning_rate": 2.9866862975821596e-05, |
|
"loss": 1.2043, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 7.454545454545454, |
|
"grad_norm": 0.25053211749891174, |
|
"learning_rate": 2.9642167173036768e-05, |
|
"loss": 1.2245, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 0.24058101769072435, |
|
"learning_rate": 2.9417255565608982e-05, |
|
"loss": 1.1887, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 7.545454545454545, |
|
"grad_norm": 0.2236093293557457, |
|
"learning_rate": 2.9192150811419343e-05, |
|
"loss": 1.1546, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 7.590909090909091, |
|
"grad_norm": 0.2536710657188106, |
|
"learning_rate": 2.8966875587806842e-05, |
|
"loss": 1.2302, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 7.636363636363637, |
|
"grad_norm": 0.23635026185440464, |
|
"learning_rate": 2.8741452589283747e-05, |
|
"loss": 1.2491, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 7.681818181818182, |
|
"grad_norm": 0.20829672573008348, |
|
"learning_rate": 2.8515904525249342e-05, |
|
"loss": 1.1821, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 7.7272727272727275, |
|
"grad_norm": 0.19887830172000012, |
|
"learning_rate": 2.8290254117702204e-05, |
|
"loss": 1.2327, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 7.7727272727272725, |
|
"grad_norm": 0.22301542864724377, |
|
"learning_rate": 2.8064524098951122e-05, |
|
"loss": 1.1883, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 7.818181818181818, |
|
"grad_norm": 0.20156843827561532, |
|
"learning_rate": 2.7838737209324995e-05, |
|
"loss": 1.2065, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 7.863636363636363, |
|
"grad_norm": 0.2196470118065061, |
|
"learning_rate": 2.761291619488198e-05, |
|
"loss": 1.2109, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 7.909090909090909, |
|
"grad_norm": 0.23170457369155842, |
|
"learning_rate": 2.738708380511803e-05, |
|
"loss": 1.211, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 7.954545454545455, |
|
"grad_norm": 0.20233640061480676, |
|
"learning_rate": 2.7161262790675013e-05, |
|
"loss": 1.1566, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.22428142795845282, |
|
"learning_rate": 2.6935475901048884e-05, |
|
"loss": 1.2359, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 8.045454545454545, |
|
"grad_norm": 0.21470110790573302, |
|
"learning_rate": 2.6709745882297805e-05, |
|
"loss": 1.2061, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 8.090909090909092, |
|
"grad_norm": 0.19724648690029864, |
|
"learning_rate": 2.6484095474750663e-05, |
|
"loss": 1.2481, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 8.136363636363637, |
|
"grad_norm": 0.20720915390036856, |
|
"learning_rate": 2.6258547410716272e-05, |
|
"loss": 1.1459, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 8.181818181818182, |
|
"grad_norm": 0.2279215538554465, |
|
"learning_rate": 2.6033124412193167e-05, |
|
"loss": 1.1456, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.227272727272727, |
|
"grad_norm": 0.1919565575948159, |
|
"learning_rate": 2.580784918858066e-05, |
|
"loss": 1.2178, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 8.272727272727273, |
|
"grad_norm": 0.20123788080080415, |
|
"learning_rate": 2.558274443439103e-05, |
|
"loss": 1.1572, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 8.318181818181818, |
|
"grad_norm": 0.18813695162235894, |
|
"learning_rate": 2.535783282696324e-05, |
|
"loss": 1.1923, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 8.363636363636363, |
|
"grad_norm": 0.20823032710382397, |
|
"learning_rate": 2.5133137024178406e-05, |
|
"loss": 1.1843, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.409090909090908, |
|
"grad_norm": 0.2099319254835431, |
|
"learning_rate": 2.4908679662177216e-05, |
|
"loss": 1.1993, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 8.454545454545455, |
|
"grad_norm": 0.19824053523947052, |
|
"learning_rate": 2.468448335307951e-05, |
|
"loss": 1.1826, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"grad_norm": 0.19231852836204918, |
|
"learning_rate": 2.4460570682706362e-05, |
|
"loss": 1.1279, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 8.545454545454545, |
|
"grad_norm": 0.20741037413832103, |
|
"learning_rate": 2.4236964208304673e-05, |
|
"loss": 1.0622, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 8.590909090909092, |
|
"grad_norm": 0.3975271395657707, |
|
"learning_rate": 2.4013686456274824e-05, |
|
"loss": 1.1743, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 8.636363636363637, |
|
"grad_norm": 0.1911090472778124, |
|
"learning_rate": 2.379075991990126e-05, |
|
"loss": 1.1241, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.681818181818182, |
|
"grad_norm": 0.19428196767319525, |
|
"learning_rate": 2.35682070570865e-05, |
|
"loss": 1.2169, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 8.727272727272727, |
|
"grad_norm": 0.1929543238663036, |
|
"learning_rate": 2.3346050288088743e-05, |
|
"loss": 1.1213, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 8.772727272727273, |
|
"grad_norm": 0.1859295707214649, |
|
"learning_rate": 2.3124311993263192e-05, |
|
"loss": 1.2022, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 8.818181818181818, |
|
"grad_norm": 0.19215762964127076, |
|
"learning_rate": 2.2903014510807392e-05, |
|
"loss": 1.1756, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 8.863636363636363, |
|
"grad_norm": 0.19577206240809536, |
|
"learning_rate": 2.2682180134510943e-05, |
|
"loss": 1.1492, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 8.909090909090908, |
|
"grad_norm": 0.18354313338272446, |
|
"learning_rate": 2.2461831111509496e-05, |
|
"loss": 1.1474, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 8.954545454545455, |
|
"grad_norm": 0.1959877870143329, |
|
"learning_rate": 2.2241989640043633e-05, |
|
"loss": 1.2621, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.1948482879201238, |
|
"learning_rate": 2.202267786722252e-05, |
|
"loss": 1.2449, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 9.045454545454545, |
|
"grad_norm": 0.21120390875412814, |
|
"learning_rate": 2.1803917886792812e-05, |
|
"loss": 1.1333, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"grad_norm": 0.18499690032381164, |
|
"learning_rate": 2.1585731736912922e-05, |
|
"loss": 1.2015, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.136363636363637, |
|
"grad_norm": 0.19403738840593351, |
|
"learning_rate": 2.136814139793282e-05, |
|
"loss": 1.1961, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 9.181818181818182, |
|
"grad_norm": 0.2116507051866016, |
|
"learning_rate": 2.1151168790179738e-05, |
|
"loss": 1.215, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 9.227272727272727, |
|
"grad_norm": 0.2091130488246949, |
|
"learning_rate": 2.0934835771749872e-05, |
|
"loss": 1.1493, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 9.272727272727273, |
|
"grad_norm": 0.21656692423046117, |
|
"learning_rate": 2.0719164136306386e-05, |
|
"loss": 1.1132, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 9.318181818181818, |
|
"grad_norm": 0.20613565196879666, |
|
"learning_rate": 2.0504175610883876e-05, |
|
"loss": 1.2056, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 9.363636363636363, |
|
"grad_norm": 0.18361410512148918, |
|
"learning_rate": 2.0289891853699573e-05, |
|
"loss": 1.2396, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 9.409090909090908, |
|
"grad_norm": 0.1970968023390117, |
|
"learning_rate": 2.0076334451971447e-05, |
|
"loss": 1.1505, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.454545454545455, |
|
"grad_norm": 0.2802489746470572, |
|
"learning_rate": 1.9863524919743505e-05, |
|
"loss": 1.0803, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 0.18361667226221806, |
|
"learning_rate": 1.9651484695718435e-05, |
|
"loss": 1.1293, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 9.545454545454545, |
|
"grad_norm": 0.1836624775996522, |
|
"learning_rate": 1.944023514109784e-05, |
|
"loss": 1.1646, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 9.590909090909092, |
|
"grad_norm": 0.7985028121765728, |
|
"learning_rate": 1.922979753743027e-05, |
|
"loss": 1.1631, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 9.636363636363637, |
|
"grad_norm": 0.18982874728183255, |
|
"learning_rate": 1.9020193084467303e-05, |
|
"loss": 1.0795, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 9.681818181818182, |
|
"grad_norm": 0.2738998625073182, |
|
"learning_rate": 1.881144289802788e-05, |
|
"loss": 1.0812, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 9.727272727272727, |
|
"grad_norm": 0.19287056890311244, |
|
"learning_rate": 1.8603568007871025e-05, |
|
"loss": 1.1318, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 9.772727272727273, |
|
"grad_norm": 0.19551935271275803, |
|
"learning_rate": 1.83965893555773e-05, |
|
"loss": 1.1903, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 9.818181818181818, |
|
"grad_norm": 0.20289464182378333, |
|
"learning_rate": 1.8190527792439145e-05, |
|
"loss": 1.1716, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.863636363636363, |
|
"grad_norm": 0.18427534137801654, |
|
"learning_rate": 1.7985404077360258e-05, |
|
"loss": 1.181, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 9.909090909090908, |
|
"grad_norm": 0.17269516793495363, |
|
"learning_rate": 1.7781238874764337e-05, |
|
"loss": 1.1443, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 9.954545454545455, |
|
"grad_norm": 0.20393617981346993, |
|
"learning_rate": 1.757805275251333e-05, |
|
"loss": 1.1991, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.19442135716601172, |
|
"learning_rate": 1.737586617983534e-05, |
|
"loss": 1.1121, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.045454545454545, |
|
"grad_norm": 0.17057632115064372, |
|
"learning_rate": 1.717469952526262e-05, |
|
"loss": 1.162, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 10.090909090909092, |
|
"grad_norm": 0.17688084966953885, |
|
"learning_rate": 1.6974573054579582e-05, |
|
"loss": 1.177, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 10.136363636363637, |
|
"grad_norm": 0.17485623392128088, |
|
"learning_rate": 1.6775506928781146e-05, |
|
"loss": 1.1594, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 10.181818181818182, |
|
"grad_norm": 0.1915709146213454, |
|
"learning_rate": 1.6577521202041775e-05, |
|
"loss": 1.1637, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 10.227272727272727, |
|
"grad_norm": 0.18779150088359975, |
|
"learning_rate": 1.6380635819695172e-05, |
|
"loss": 1.1325, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.272727272727273, |
|
"grad_norm": 0.17675767122639172, |
|
"learning_rate": 1.6184870616224905e-05, |
|
"loss": 1.1283, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 10.318181818181818, |
|
"grad_norm": 0.20187810956460173, |
|
"learning_rate": 1.599024531326632e-05, |
|
"loss": 1.1362, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 10.363636363636363, |
|
"grad_norm": 0.18429771837794573, |
|
"learning_rate": 1.5796779517619757e-05, |
|
"loss": 1.0782, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 10.409090909090908, |
|
"grad_norm": 0.18129745466430086, |
|
"learning_rate": 1.560449271927528e-05, |
|
"loss": 1.1556, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 10.454545454545455, |
|
"grad_norm": 0.18156111868915045, |
|
"learning_rate": 1.541340428944929e-05, |
|
"loss": 1.1472, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"grad_norm": 0.2058178822209493, |
|
"learning_rate": 1.5223533478633012e-05, |
|
"loss": 1.1436, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 10.545454545454545, |
|
"grad_norm": 0.19420543695117135, |
|
"learning_rate": 1.5034899414653183e-05, |
|
"loss": 1.1632, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 10.590909090909092, |
|
"grad_norm": 0.1816721312324971, |
|
"learning_rate": 1.4847521100745101e-05, |
|
"loss": 1.0919, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 10.636363636363637, |
|
"grad_norm": 0.17503046181743187, |
|
"learning_rate": 1.4661417413638206e-05, |
|
"loss": 1.177, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 10.681818181818182, |
|
"grad_norm": 0.1996340286183738, |
|
"learning_rate": 1.44766071016544e-05, |
|
"loss": 1.1901, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 10.727272727272727, |
|
"grad_norm": 0.18843938813925634, |
|
"learning_rate": 1.4293108782819345e-05, |
|
"loss": 1.1081, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 10.772727272727273, |
|
"grad_norm": 0.18750865793808888, |
|
"learning_rate": 1.4110940942986844e-05, |
|
"loss": 1.0781, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 10.818181818181818, |
|
"grad_norm": 0.17785144309042253, |
|
"learning_rate": 1.3930121933976556e-05, |
|
"loss": 1.1961, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 10.863636363636363, |
|
"grad_norm": 0.17015756605905757, |
|
"learning_rate": 1.37506699717252e-05, |
|
"loss": 1.142, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 10.909090909090908, |
|
"grad_norm": 0.1976824831465818, |
|
"learning_rate": 1.3572603134451479e-05, |
|
"loss": 1.1024, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.954545454545455, |
|
"grad_norm": 0.17152795538031174, |
|
"learning_rate": 1.3395939360834845e-05, |
|
"loss": 1.136, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.1667419474833486, |
|
"learning_rate": 1.3220696448208308e-05, |
|
"loss": 1.1114, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 11.045454545454545, |
|
"grad_norm": 0.16490649981297714, |
|
"learning_rate": 1.304689205076558e-05, |
|
"loss": 1.1569, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 11.090909090909092, |
|
"grad_norm": 0.17142788488665067, |
|
"learning_rate": 1.2874543677782508e-05, |
|
"loss": 1.1667, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 11.136363636363637, |
|
"grad_norm": 0.1747032605597451, |
|
"learning_rate": 1.2703668691853155e-05, |
|
"loss": 1.1422, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 11.181818181818182, |
|
"grad_norm": 0.1680960418576758, |
|
"learning_rate": 1.253428430714076e-05, |
|
"loss": 1.1462, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 11.227272727272727, |
|
"grad_norm": 0.1893549688172363, |
|
"learning_rate": 1.2366407587643432e-05, |
|
"loss": 1.1496, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 11.272727272727273, |
|
"grad_norm": 0.2930049956267172, |
|
"learning_rate": 1.220005544547522e-05, |
|
"loss": 1.0953, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 11.318181818181818, |
|
"grad_norm": 0.16093892837183954, |
|
"learning_rate": 1.2035244639162319e-05, |
|
"loss": 1.1001, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 11.363636363636363, |
|
"grad_norm": 0.16689857435832806, |
|
"learning_rate": 1.1871991771954748e-05, |
|
"loss": 1.0471, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.409090909090908, |
|
"grad_norm": 0.17166817028161555, |
|
"learning_rate": 1.1710313290153795e-05, |
|
"loss": 1.0986, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 11.454545454545455, |
|
"grad_norm": 0.18190627031196485, |
|
"learning_rate": 1.1550225481455165e-05, |
|
"loss": 1.1788, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"grad_norm": 0.17512791533622662, |
|
"learning_rate": 1.1391744473308106e-05, |
|
"loss": 1.1673, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.545454545454545, |
|
"grad_norm": 0.16757254457143245, |
|
"learning_rate": 1.1234886231290759e-05, |
|
"loss": 1.1746, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 11.590909090909092, |
|
"grad_norm": 0.1668004234347932, |
|
"learning_rate": 1.1079666557501736e-05, |
|
"loss": 1.1107, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 11.636363636363637, |
|
"grad_norm": 0.15359971441629242, |
|
"learning_rate": 1.0926101088968207e-05, |
|
"loss": 1.1658, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 11.681818181818182, |
|
"grad_norm": 0.16049551854978608, |
|
"learning_rate": 1.0774205296070597e-05, |
|
"loss": 1.0853, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 11.727272727272727, |
|
"grad_norm": 0.20099872095589202, |
|
"learning_rate": 1.062399448098409e-05, |
|
"loss": 1.0978, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 11.772727272727273, |
|
"grad_norm": 0.18578304835004, |
|
"learning_rate": 1.0475483776137062e-05, |
|
"loss": 1.1296, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 11.818181818181818, |
|
"grad_norm": 0.16704809777000973, |
|
"learning_rate": 1.0328688142686627e-05, |
|
"loss": 1.0695, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 11.863636363636363, |
|
"grad_norm": 0.17323425454468377, |
|
"learning_rate": 1.0183622369011422e-05, |
|
"loss": 1.077, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 11.909090909090908, |
|
"grad_norm": 0.17800221938786567, |
|
"learning_rate": 1.0040301069221823e-05, |
|
"loss": 1.122, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 11.954545454545455, |
|
"grad_norm": 0.17418177714949332, |
|
"learning_rate": 9.89873868168766e-06, |
|
"loss": 1.1375, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.16676746363627873, |
|
"learning_rate": 9.758949467583754e-06, |
|
"loss": 1.1737, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 12.045454545454545, |
|
"grad_norm": 0.1527614833387276, |
|
"learning_rate": 9.620947509453155e-06, |
|
"loss": 1.1136, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 12.090909090909092, |
|
"grad_norm": 0.17699034639073358, |
|
"learning_rate": 9.484746709788451e-06, |
|
"loss": 1.1231, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 12.136363636363637, |
|
"grad_norm": 0.17421157455050928, |
|
"learning_rate": 9.350360789631291e-06, |
|
"loss": 1.148, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 12.181818181818182, |
|
"grad_norm": 0.15462065956553298, |
|
"learning_rate": 9.217803287190029e-06, |
|
"loss": 1.1435, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 12.227272727272727, |
|
"grad_norm": 0.16543045040679552, |
|
"learning_rate": 9.087087556475873e-06, |
|
"loss": 1.1312, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 12.272727272727273, |
|
"grad_norm": 0.17092139127270728, |
|
"learning_rate": 8.958226765957655e-06, |
|
"loss": 1.1164, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 12.318181818181818, |
|
"grad_norm": 0.16435344514564526, |
|
"learning_rate": 8.831233897235128e-06, |
|
"loss": 1.075, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 12.363636363636363, |
|
"grad_norm": 0.16450679267899113, |
|
"learning_rate": 8.706121743731256e-06, |
|
"loss": 1.1508, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 12.409090909090908, |
|
"grad_norm": 0.15282580972945572, |
|
"learning_rate": 8.58290290940337e-06, |
|
"loss": 1.135, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 12.454545454545455, |
|
"grad_norm": 0.1555918103689812, |
|
"learning_rate": 8.461589807473392e-06, |
|
"loss": 1.121, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 0.17353062952008638, |
|
"learning_rate": 8.342194659177358e-06, |
|
"loss": 1.1849, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.545454545454545, |
|
"grad_norm": 0.15700310300217593, |
|
"learning_rate": 8.224729492534231e-06, |
|
"loss": 1.1479, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 12.590909090909092, |
|
"grad_norm": 0.23929770557768484, |
|
"learning_rate": 8.109206141134142e-06, |
|
"loss": 1.0834, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 12.636363636363637, |
|
"grad_norm": 0.18493048427935435, |
|
"learning_rate": 7.995636242946305e-06, |
|
"loss": 1.1398, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 12.681818181818182, |
|
"grad_norm": 0.16961784748611264, |
|
"learning_rate": 7.884031239146569e-06, |
|
"loss": 1.0651, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 12.727272727272727, |
|
"grad_norm": 0.1597693846586007, |
|
"learning_rate": 7.774402372964833e-06, |
|
"loss": 1.0952, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 12.772727272727273, |
|
"grad_norm": 0.16022321839887022, |
|
"learning_rate": 7.666760688552371e-06, |
|
"loss": 1.1269, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 12.818181818181818, |
|
"grad_norm": 0.1661994031922762, |
|
"learning_rate": 7.5611170298692466e-06, |
|
"loss": 1.0682, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 12.863636363636363, |
|
"grad_norm": 0.16932158388771293, |
|
"learning_rate": 7.4574820395918735e-06, |
|
"loss": 1.163, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 12.909090909090908, |
|
"grad_norm": 0.15828697644374046, |
|
"learning_rate": 7.3558661580408545e-06, |
|
"loss": 1.1466, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 12.954545454545455, |
|
"grad_norm": 0.25782602954860684, |
|
"learning_rate": 7.256279622129215e-06, |
|
"loss": 0.9785, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.15598380461908062, |
|
"learning_rate": 7.15873246433113e-06, |
|
"loss": 1.1248, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 13.045454545454545, |
|
"grad_norm": 0.16456745649594778, |
|
"learning_rate": 7.063234511671206e-06, |
|
"loss": 1.1426, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 13.090909090909092, |
|
"grad_norm": 0.17833577442195056, |
|
"learning_rate": 6.969795384734556e-06, |
|
"loss": 1.1278, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 13.136363636363637, |
|
"grad_norm": 0.16618049623992226, |
|
"learning_rate": 6.878424496697554e-06, |
|
"loss": 1.0637, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 13.181818181818182, |
|
"grad_norm": 0.1548002033964038, |
|
"learning_rate": 6.789131052379549e-06, |
|
"loss": 1.1438, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 13.227272727272727, |
|
"grad_norm": 0.15277655748023802, |
|
"learning_rate": 6.7019240473155924e-06, |
|
"loss": 1.0657, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 13.272727272727273, |
|
"grad_norm": 0.24777282879928914, |
|
"learning_rate": 6.616812266850187e-06, |
|
"loss": 1.159, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 13.318181818181818, |
|
"grad_norm": 0.19128988519206985, |
|
"learning_rate": 6.5338042852522305e-06, |
|
"loss": 1.1772, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 13.363636363636363, |
|
"grad_norm": 0.17365339407446134, |
|
"learning_rate": 6.4529084648512815e-06, |
|
"loss": 1.0807, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 13.409090909090908, |
|
"grad_norm": 0.16321204192747293, |
|
"learning_rate": 6.374132955195062e-06, |
|
"loss": 1.1293, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 13.454545454545455, |
|
"grad_norm": 0.1754755861725117, |
|
"learning_rate": 6.297485692228512e-06, |
|
"loss": 1.1434, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"grad_norm": 0.14452663905974894, |
|
"learning_rate": 6.222974397494309e-06, |
|
"loss": 1.0709, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 13.545454545454545, |
|
"grad_norm": 0.14708535357644448, |
|
"learning_rate": 6.150606577354948e-06, |
|
"loss": 1.0964, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 13.590909090909092, |
|
"grad_norm": 0.16471061622026806, |
|
"learning_rate": 6.080389522236585e-06, |
|
"loss": 1.114, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 13.636363636363637, |
|
"grad_norm": 0.175358096168744, |
|
"learning_rate": 6.012330305894584e-06, |
|
"loss": 1.0573, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.681818181818182, |
|
"grad_norm": 0.15744915474687687, |
|
"learning_rate": 5.946435784700869e-06, |
|
"loss": 1.1256, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 13.727272727272727, |
|
"grad_norm": 0.1594724312088306, |
|
"learning_rate": 5.8827125969532365e-06, |
|
"loss": 1.0757, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 13.772727272727273, |
|
"grad_norm": 0.16045998825705504, |
|
"learning_rate": 5.82116716220659e-06, |
|
"loss": 1.1302, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 13.818181818181818, |
|
"grad_norm": 0.15174228657227892, |
|
"learning_rate": 5.76180568062623e-06, |
|
"loss": 1.1555, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 13.863636363636363, |
|
"grad_norm": 0.16291888537269678, |
|
"learning_rate": 5.704634132363239e-06, |
|
"loss": 1.131, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 13.909090909090908, |
|
"grad_norm": 0.15722592315890294, |
|
"learning_rate": 5.649658276952029e-06, |
|
"loss": 1.0328, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 13.954545454545455, |
|
"grad_norm": 0.15455642700386937, |
|
"learning_rate": 5.596883652730137e-06, |
|
"loss": 1.0786, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.16018814750974167, |
|
"learning_rate": 5.546315576280258e-06, |
|
"loss": 1.0977, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 14.045454545454545, |
|
"grad_norm": 0.15691439028906481, |
|
"learning_rate": 5.497959141894671e-06, |
|
"loss": 1.1606, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 14.090909090909092, |
|
"grad_norm": 0.15808702196585686, |
|
"learning_rate": 5.451819221062024e-06, |
|
"loss": 1.1181, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 14.136363636363637, |
|
"grad_norm": 0.15115950620778784, |
|
"learning_rate": 5.4079004619765614e-06, |
|
"loss": 1.1576, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 14.181818181818182, |
|
"grad_norm": 0.15083892267087215, |
|
"learning_rate": 5.3662072890698845e-06, |
|
"loss": 1.1048, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 14.227272727272727, |
|
"grad_norm": 0.1481235847517611, |
|
"learning_rate": 5.326743902565208e-06, |
|
"loss": 1.0597, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 14.272727272727273, |
|
"grad_norm": 0.17487949113995144, |
|
"learning_rate": 5.289514278054232e-06, |
|
"loss": 1.1048, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 14.318181818181818, |
|
"grad_norm": 0.1565612302630643, |
|
"learning_rate": 5.254522166096635e-06, |
|
"loss": 1.1404, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 14.363636363636363, |
|
"grad_norm": 0.15463375307382266, |
|
"learning_rate": 5.221771091842242e-06, |
|
"loss": 1.0867, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 14.409090909090908, |
|
"grad_norm": 0.14777974752978296, |
|
"learning_rate": 5.191264354675882e-06, |
|
"loss": 1.1297, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 14.454545454545455, |
|
"grad_norm": 0.15705272728049427, |
|
"learning_rate": 5.1630050278850275e-06, |
|
"loss": 1.1302, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"grad_norm": 0.15391726300293704, |
|
"learning_rate": 5.136995958350162e-06, |
|
"loss": 1.1421, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 14.545454545454545, |
|
"grad_norm": 0.15288254329961756, |
|
"learning_rate": 5.113239766257999e-06, |
|
"loss": 1.1455, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 14.590909090909092, |
|
"grad_norm": 0.14947122968152077, |
|
"learning_rate": 5.091738844837518e-06, |
|
"loss": 1.0706, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 14.636363636363637, |
|
"grad_norm": 0.16745909284503774, |
|
"learning_rate": 5.0724953601188635e-06, |
|
"loss": 1.0375, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 14.681818181818182, |
|
"grad_norm": 0.1515771191801389, |
|
"learning_rate": 5.0555112507151364e-06, |
|
"loss": 1.1166, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 14.727272727272727, |
|
"grad_norm": 0.15386111047539605, |
|
"learning_rate": 5.0407882276271015e-06, |
|
"loss": 1.0891, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 14.772727272727273, |
|
"grad_norm": 1.3735036578655186, |
|
"learning_rate": 5.028327774070807e-06, |
|
"loss": 1.0836, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.818181818181818, |
|
"grad_norm": 0.1570470593366731, |
|
"learning_rate": 5.018131145328181e-06, |
|
"loss": 1.0566, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 14.863636363636363, |
|
"grad_norm": 0.15950636214118602, |
|
"learning_rate": 5.0101993686205585e-06, |
|
"loss": 1.1019, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 14.909090909090908, |
|
"grad_norm": 0.15627633379629186, |
|
"learning_rate": 5.004533243005204e-06, |
|
"loss": 1.0666, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 14.954545454545455, |
|
"grad_norm": 0.157736145851946, |
|
"learning_rate": 5.0011333392948126e-06, |
|
"loss": 1.1023, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.15122659418177306, |
|
"learning_rate": 5e-06, |
|
"loss": 1.1027, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 330, |
|
"total_flos": 103645451026432.0, |
|
"train_loss": 1.592864108988733, |
|
"train_runtime": 2075.5919, |
|
"train_samples_per_second": 2.544, |
|
"train_steps_per_second": 0.159 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 103645451026432.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|