|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7122198120028923, |
|
"eval_steps": 692, |
|
"global_step": 985, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007230657989877079, |
|
"grad_norm": 0.37217167019844055, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 1.5113, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007230657989877079, |
|
"eval_loss": 1.3881797790527344, |
|
"eval_runtime": 667.6567, |
|
"eval_samples_per_second": 3.49, |
|
"eval_steps_per_second": 0.873, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014461315979754157, |
|
"grad_norm": 0.26884201169013977, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 1.3991, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0021691973969631237, |
|
"grad_norm": 0.42644935846328735, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 1.5952, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0028922631959508315, |
|
"grad_norm": 0.29478296637535095, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 1.3243, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0036153289949385392, |
|
"grad_norm": 0.41465193033218384, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.4471, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004338394793926247, |
|
"grad_norm": 0.25165989995002747, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 1.3216, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005061460592913955, |
|
"grad_norm": 0.2920430600643158, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 1.4116, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.005784526391901663, |
|
"grad_norm": 0.3292546272277832, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 1.4948, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006507592190889371, |
|
"grad_norm": 0.2700996696949005, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 1.2588, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0072306579898770785, |
|
"grad_norm": 0.268759161233902, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.4552, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007953723788864787, |
|
"grad_norm": 0.3097935616970062, |
|
"learning_rate": 2.2e-06, |
|
"loss": 1.2995, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008676789587852495, |
|
"grad_norm": 0.3226897120475769, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 1.439, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009399855386840203, |
|
"grad_norm": 0.2564990222454071, |
|
"learning_rate": 2.6e-06, |
|
"loss": 1.4056, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01012292118582791, |
|
"grad_norm": 0.2508530020713806, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 1.3216, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010845986984815618, |
|
"grad_norm": 0.2531338632106781, |
|
"learning_rate": 3e-06, |
|
"loss": 1.4021, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011569052783803326, |
|
"grad_norm": 0.45660316944122314, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 1.8103, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012292118582791034, |
|
"grad_norm": 0.2740483283996582, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 1.2811, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.013015184381778741, |
|
"grad_norm": 0.262483686208725, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 1.4296, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01373825018076645, |
|
"grad_norm": 0.23837727308273315, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 1.3429, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.014461315979754157, |
|
"grad_norm": 0.2985168993473053, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.53, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015184381778741865, |
|
"grad_norm": 0.3057151138782501, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 1.3555, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015907447577729574, |
|
"grad_norm": 0.2920631766319275, |
|
"learning_rate": 4.4e-06, |
|
"loss": 1.3522, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.016630513376717282, |
|
"grad_norm": 0.2631171941757202, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 1.2649, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01735357917570499, |
|
"grad_norm": 0.2993241250514984, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 1.4431, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.018076644974692697, |
|
"grad_norm": 0.2654544711112976, |
|
"learning_rate": 5e-06, |
|
"loss": 1.3019, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.018799710773680405, |
|
"grad_norm": 0.28488221764564514, |
|
"learning_rate": 5.2e-06, |
|
"loss": 1.3105, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.019522776572668113, |
|
"grad_norm": 0.28586897253990173, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 1.3502, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02024584237165582, |
|
"grad_norm": 0.2833098769187927, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 1.3046, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02096890817064353, |
|
"grad_norm": 0.24501250684261322, |
|
"learning_rate": 5.8e-06, |
|
"loss": 1.2376, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.021691973969631236, |
|
"grad_norm": 0.4207770824432373, |
|
"learning_rate": 6e-06, |
|
"loss": 1.5645, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.022415039768618944, |
|
"grad_norm": 0.5098739862442017, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 1.5241, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.023138105567606652, |
|
"grad_norm": 0.31374362111091614, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 1.4496, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02386117136659436, |
|
"grad_norm": 0.49290239810943604, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 1.626, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.024584237165582067, |
|
"grad_norm": 0.31210431456565857, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 1.3339, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.025307302964569775, |
|
"grad_norm": 0.3143630623817444, |
|
"learning_rate": 7e-06, |
|
"loss": 1.2261, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.026030368763557483, |
|
"grad_norm": 0.4381314218044281, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 1.4051, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02675343456254519, |
|
"grad_norm": 0.3636917769908905, |
|
"learning_rate": 7.4e-06, |
|
"loss": 1.3815, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0274765003615329, |
|
"grad_norm": 0.3755267560482025, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 1.3677, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.028199566160520606, |
|
"grad_norm": 0.3734245300292969, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 1.4356, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.028922631959508314, |
|
"grad_norm": 0.3986704349517822, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.4567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02964569775849602, |
|
"grad_norm": 0.39432185888290405, |
|
"learning_rate": 8.2e-06, |
|
"loss": 1.5894, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03036876355748373, |
|
"grad_norm": 0.5450723767280579, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.569, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03109182935647144, |
|
"grad_norm": 0.38127774000167847, |
|
"learning_rate": 8.6e-06, |
|
"loss": 1.472, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03181489515545915, |
|
"grad_norm": 0.4520113468170166, |
|
"learning_rate": 8.8e-06, |
|
"loss": 1.4103, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03253796095444685, |
|
"grad_norm": 0.382798969745636, |
|
"learning_rate": 9e-06, |
|
"loss": 1.3647, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.033261026753434564, |
|
"grad_norm": 0.4572380483150482, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 1.4196, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03398409255242227, |
|
"grad_norm": 0.3955709934234619, |
|
"learning_rate": 9.4e-06, |
|
"loss": 1.4654, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03470715835140998, |
|
"grad_norm": 0.5052328109741211, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 1.6222, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.035430224150397684, |
|
"grad_norm": 0.3734697699546814, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 1.3473, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.036153289949385395, |
|
"grad_norm": 0.3865366280078888, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4105, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0368763557483731, |
|
"grad_norm": 0.42371755838394165, |
|
"learning_rate": 1.02e-05, |
|
"loss": 1.4933, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03759942154736081, |
|
"grad_norm": 0.34533318877220154, |
|
"learning_rate": 1.04e-05, |
|
"loss": 1.214, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.038322487346348515, |
|
"grad_norm": 0.3520753085613251, |
|
"learning_rate": 1.0600000000000002e-05, |
|
"loss": 1.2373, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.039045553145336226, |
|
"grad_norm": 0.6355977058410645, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 1.5411, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03976861894432393, |
|
"grad_norm": 0.46781396865844727, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 1.4157, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04049168474331164, |
|
"grad_norm": 0.3881015479564667, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 1.3046, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04121475054229935, |
|
"grad_norm": 0.5843562483787537, |
|
"learning_rate": 1.14e-05, |
|
"loss": 1.5255, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04193781634128706, |
|
"grad_norm": 0.36714574694633484, |
|
"learning_rate": 1.16e-05, |
|
"loss": 1.356, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04266088214027477, |
|
"grad_norm": 0.3784966468811035, |
|
"learning_rate": 1.18e-05, |
|
"loss": 1.1724, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04338394793926247, |
|
"grad_norm": 0.421464204788208, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.4358, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.044107013738250184, |
|
"grad_norm": 0.3749872148036957, |
|
"learning_rate": 1.22e-05, |
|
"loss": 1.2683, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04483007953723789, |
|
"grad_norm": 0.3953036665916443, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 1.2323, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0455531453362256, |
|
"grad_norm": 0.47576940059661865, |
|
"learning_rate": 1.2600000000000001e-05, |
|
"loss": 1.5067, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.046276211135213303, |
|
"grad_norm": 0.3674280643463135, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 1.2926, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.046999276934201015, |
|
"grad_norm": 0.4777793288230896, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 1.4084, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04772234273318872, |
|
"grad_norm": 0.5235625505447388, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 1.4148, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04844540853217643, |
|
"grad_norm": 0.43167218565940857, |
|
"learning_rate": 1.3400000000000002e-05, |
|
"loss": 1.2986, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.049168474331164135, |
|
"grad_norm": 0.35833850502967834, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 1.2428, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.049891540130151846, |
|
"grad_norm": 0.4315280616283417, |
|
"learning_rate": 1.38e-05, |
|
"loss": 1.2497, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05061460592913955, |
|
"grad_norm": 0.5640541315078735, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.3195, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05133767172812726, |
|
"grad_norm": 0.3889266848564148, |
|
"learning_rate": 1.4200000000000001e-05, |
|
"loss": 1.2181, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.052060737527114966, |
|
"grad_norm": 0.5410600900650024, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 1.3649, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05278380332610268, |
|
"grad_norm": 0.4043705463409424, |
|
"learning_rate": 1.46e-05, |
|
"loss": 1.3354, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05350686912509038, |
|
"grad_norm": 0.2976624071598053, |
|
"learning_rate": 1.48e-05, |
|
"loss": 1.2689, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05422993492407809, |
|
"grad_norm": 0.4049951136112213, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.3255, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0549530007230658, |
|
"grad_norm": 0.35892027616500854, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 1.1894, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05567606652205351, |
|
"grad_norm": 0.46196767687797546, |
|
"learning_rate": 1.54e-05, |
|
"loss": 1.3013, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05639913232104121, |
|
"grad_norm": 0.46217429637908936, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 1.1938, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05712219812002892, |
|
"grad_norm": 1.1090481281280518, |
|
"learning_rate": 1.58e-05, |
|
"loss": 1.5665, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05784526391901663, |
|
"grad_norm": 0.5215122699737549, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.3107, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05856832971800434, |
|
"grad_norm": 0.31574738025665283, |
|
"learning_rate": 1.62e-05, |
|
"loss": 1.2218, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.05929139551699204, |
|
"grad_norm": 0.34580984711647034, |
|
"learning_rate": 1.64e-05, |
|
"loss": 1.1997, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.060014461315979754, |
|
"grad_norm": 0.4590187966823578, |
|
"learning_rate": 1.66e-05, |
|
"loss": 1.3401, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06073752711496746, |
|
"grad_norm": 0.40272387862205505, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 1.256, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06146059291395517, |
|
"grad_norm": 0.4131518006324768, |
|
"learning_rate": 1.7e-05, |
|
"loss": 1.1237, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06218365871294288, |
|
"grad_norm": 0.3303501605987549, |
|
"learning_rate": 1.72e-05, |
|
"loss": 1.2023, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06290672451193059, |
|
"grad_norm": 0.314738929271698, |
|
"learning_rate": 1.7400000000000003e-05, |
|
"loss": 1.2153, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0636297903109183, |
|
"grad_norm": 0.31625810265541077, |
|
"learning_rate": 1.76e-05, |
|
"loss": 1.2048, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.064352856109906, |
|
"grad_norm": 0.36682891845703125, |
|
"learning_rate": 1.7800000000000002e-05, |
|
"loss": 1.278, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0650759219088937, |
|
"grad_norm": 0.3271387219429016, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.3014, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06579898770788142, |
|
"grad_norm": 0.38205042481422424, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 1.2683, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06652205350686913, |
|
"grad_norm": 0.3368231952190399, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 1.3805, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06724511930585683, |
|
"grad_norm": 0.462415874004364, |
|
"learning_rate": 1.86e-05, |
|
"loss": 1.2521, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06796818510484454, |
|
"grad_norm": 0.3378755450248718, |
|
"learning_rate": 1.88e-05, |
|
"loss": 1.1164, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.06869125090383225, |
|
"grad_norm": 0.3311493694782257, |
|
"learning_rate": 1.9e-05, |
|
"loss": 1.223, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06941431670281996, |
|
"grad_norm": 0.3691946268081665, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 1.1876, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07013738250180766, |
|
"grad_norm": 0.40734970569610596, |
|
"learning_rate": 1.94e-05, |
|
"loss": 1.2732, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07086044830079537, |
|
"grad_norm": 1.0996010303497314, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 1.4193, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07158351409978309, |
|
"grad_norm": 0.4469655752182007, |
|
"learning_rate": 1.98e-05, |
|
"loss": 1.1684, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07230657989877079, |
|
"grad_norm": 0.3527953028678894, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1232, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0730296456977585, |
|
"grad_norm": 0.33126530051231384, |
|
"learning_rate": 1.999999893747778e-05, |
|
"loss": 1.1679, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0737527114967462, |
|
"grad_norm": 0.2917117476463318, |
|
"learning_rate": 1.999999574991134e-05, |
|
"loss": 1.1278, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07447577729573392, |
|
"grad_norm": 0.3925279676914215, |
|
"learning_rate": 1.999999043730136e-05, |
|
"loss": 1.2538, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07519884309472162, |
|
"grad_norm": 0.5860010981559753, |
|
"learning_rate": 1.999998299964897e-05, |
|
"loss": 1.0725, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07592190889370933, |
|
"grad_norm": 0.37837302684783936, |
|
"learning_rate": 1.999997343695575e-05, |
|
"loss": 1.2131, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07664497469269703, |
|
"grad_norm": 0.32298776507377625, |
|
"learning_rate": 1.999996174922373e-05, |
|
"loss": 1.1068, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07736804049168475, |
|
"grad_norm": 0.2889445424079895, |
|
"learning_rate": 1.999994793645539e-05, |
|
"loss": 1.1648, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07809110629067245, |
|
"grad_norm": 0.326594740152359, |
|
"learning_rate": 1.9999931998653677e-05, |
|
"loss": 1.1019, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07881417208966016, |
|
"grad_norm": 0.37818118929862976, |
|
"learning_rate": 1.9999913935821973e-05, |
|
"loss": 1.1402, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.07953723788864786, |
|
"grad_norm": 0.3480512201786041, |
|
"learning_rate": 1.9999893747964108e-05, |
|
"loss": 1.1359, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08026030368763558, |
|
"grad_norm": 0.2945075035095215, |
|
"learning_rate": 1.9999871435084384e-05, |
|
"loss": 1.1261, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08098336948662328, |
|
"grad_norm": 0.3005722463130951, |
|
"learning_rate": 1.9999846997187535e-05, |
|
"loss": 1.1119, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08170643528561099, |
|
"grad_norm": 0.38726744055747986, |
|
"learning_rate": 1.9999820434278755e-05, |
|
"loss": 1.231, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0824295010845987, |
|
"grad_norm": 0.3677898943424225, |
|
"learning_rate": 1.999979174636369e-05, |
|
"loss": 1.0692, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08315256688358641, |
|
"grad_norm": 0.45312801003456116, |
|
"learning_rate": 1.9999760933448443e-05, |
|
"loss": 1.1273, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08387563268257411, |
|
"grad_norm": 0.33305323123931885, |
|
"learning_rate": 1.999972799553955e-05, |
|
"loss": 1.2399, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08459869848156182, |
|
"grad_norm": 0.2953278124332428, |
|
"learning_rate": 1.9999692932644016e-05, |
|
"loss": 1.2297, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08532176428054954, |
|
"grad_norm": 0.5426760315895081, |
|
"learning_rate": 1.9999655744769292e-05, |
|
"loss": 1.0915, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08604483007953724, |
|
"grad_norm": 0.7710702419281006, |
|
"learning_rate": 1.999961643192328e-05, |
|
"loss": 1.1106, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08676789587852494, |
|
"grad_norm": 0.4938242435455322, |
|
"learning_rate": 1.9999574994114336e-05, |
|
"loss": 1.1751, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08749096167751265, |
|
"grad_norm": 0.2793751657009125, |
|
"learning_rate": 1.9999531431351263e-05, |
|
"loss": 1.0712, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08821402747650037, |
|
"grad_norm": 0.37205490469932556, |
|
"learning_rate": 1.999948574364332e-05, |
|
"loss": 1.1263, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.08893709327548807, |
|
"grad_norm": 0.25429850816726685, |
|
"learning_rate": 1.9999437931000213e-05, |
|
"loss": 1.0495, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.08966015907447578, |
|
"grad_norm": 0.3983619213104248, |
|
"learning_rate": 1.9999387993432107e-05, |
|
"loss": 1.1422, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09038322487346348, |
|
"grad_norm": 0.27335548400878906, |
|
"learning_rate": 1.9999335930949612e-05, |
|
"loss": 1.1263, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0911062906724512, |
|
"grad_norm": 0.24053499102592468, |
|
"learning_rate": 1.9999281743563788e-05, |
|
"loss": 1.1613, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0918293564714389, |
|
"grad_norm": 0.2247527539730072, |
|
"learning_rate": 1.999922543128616e-05, |
|
"loss": 1.0831, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09255242227042661, |
|
"grad_norm": 0.3363755941390991, |
|
"learning_rate": 1.9999166994128686e-05, |
|
"loss": 1.2648, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09327548806941431, |
|
"grad_norm": 0.31380873918533325, |
|
"learning_rate": 1.9999106432103785e-05, |
|
"loss": 1.1549, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09399855386840203, |
|
"grad_norm": 0.40868130326271057, |
|
"learning_rate": 1.9999043745224324e-05, |
|
"loss": 1.0049, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09472161966738973, |
|
"grad_norm": 0.46924683451652527, |
|
"learning_rate": 1.999897893350363e-05, |
|
"loss": 1.3451, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09544468546637744, |
|
"grad_norm": 0.42218780517578125, |
|
"learning_rate": 1.9998911996955478e-05, |
|
"loss": 1.1618, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09616775126536514, |
|
"grad_norm": 0.24168308079242706, |
|
"learning_rate": 1.999884293559408e-05, |
|
"loss": 1.1712, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09689081706435286, |
|
"grad_norm": 0.41752415895462036, |
|
"learning_rate": 1.9998771749434128e-05, |
|
"loss": 1.2094, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09761388286334056, |
|
"grad_norm": 0.4695405960083008, |
|
"learning_rate": 1.999869843849074e-05, |
|
"loss": 1.08, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09833694866232827, |
|
"grad_norm": 0.36969393491744995, |
|
"learning_rate": 1.9998623002779493e-05, |
|
"loss": 1.1485, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09906001446131597, |
|
"grad_norm": 0.31913572549819946, |
|
"learning_rate": 1.9998545442316422e-05, |
|
"loss": 1.1797, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.09978308026030369, |
|
"grad_norm": 0.3533536493778229, |
|
"learning_rate": 1.9998465757118007e-05, |
|
"loss": 1.0771, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1005061460592914, |
|
"grad_norm": 0.31527888774871826, |
|
"learning_rate": 1.999838394720118e-05, |
|
"loss": 1.1364, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1012292118582791, |
|
"grad_norm": 0.274924635887146, |
|
"learning_rate": 1.9998300012583333e-05, |
|
"loss": 1.0948, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1019522776572668, |
|
"grad_norm": 0.6106323599815369, |
|
"learning_rate": 1.9998213953282297e-05, |
|
"loss": 1.2224, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10267534345625452, |
|
"grad_norm": 0.4037460386753082, |
|
"learning_rate": 1.999812576931636e-05, |
|
"loss": 1.1291, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10339840925524223, |
|
"grad_norm": 0.3101276457309723, |
|
"learning_rate": 1.999803546070426e-05, |
|
"loss": 1.2032, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10412147505422993, |
|
"grad_norm": 0.2807331085205078, |
|
"learning_rate": 1.9997943027465187e-05, |
|
"loss": 1.0995, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10484454085321765, |
|
"grad_norm": 0.2920645773410797, |
|
"learning_rate": 1.999784846961879e-05, |
|
"loss": 1.0696, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10556760665220535, |
|
"grad_norm": 0.2744218409061432, |
|
"learning_rate": 1.9997751787185163e-05, |
|
"loss": 1.0342, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.10629067245119306, |
|
"grad_norm": 0.3686463534832001, |
|
"learning_rate": 1.999765298018484e-05, |
|
"loss": 1.0401, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10701373825018076, |
|
"grad_norm": 0.3631075322628021, |
|
"learning_rate": 1.9997552048638833e-05, |
|
"loss": 1.083, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10773680404916848, |
|
"grad_norm": 0.4366123080253601, |
|
"learning_rate": 1.999744899256858e-05, |
|
"loss": 1.077, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10845986984815618, |
|
"grad_norm": 0.27256667613983154, |
|
"learning_rate": 1.9997343811995985e-05, |
|
"loss": 1.0411, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10918293564714389, |
|
"grad_norm": 0.2851492464542389, |
|
"learning_rate": 1.99972365069434e-05, |
|
"loss": 1.033, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1099060014461316, |
|
"grad_norm": 0.2830626368522644, |
|
"learning_rate": 1.999712707743362e-05, |
|
"loss": 1.0917, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11062906724511931, |
|
"grad_norm": 0.30642372369766235, |
|
"learning_rate": 1.9997015523489912e-05, |
|
"loss": 1.0848, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11135213304410702, |
|
"grad_norm": 0.29700708389282227, |
|
"learning_rate": 1.999690184513597e-05, |
|
"loss": 1.1311, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11207519884309472, |
|
"grad_norm": 0.3125067949295044, |
|
"learning_rate": 1.999678604239596e-05, |
|
"loss": 1.0089, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11279826464208242, |
|
"grad_norm": 0.26634660363197327, |
|
"learning_rate": 1.9996668115294486e-05, |
|
"loss": 0.8978, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11352133044107014, |
|
"grad_norm": 0.3419530689716339, |
|
"learning_rate": 1.9996548063856606e-05, |
|
"loss": 1.1423, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11424439624005785, |
|
"grad_norm": 0.3322874903678894, |
|
"learning_rate": 1.999642588810784e-05, |
|
"loss": 1.1393, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11496746203904555, |
|
"grad_norm": 0.39157527685165405, |
|
"learning_rate": 1.999630158807414e-05, |
|
"loss": 1.1042, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11569052783803326, |
|
"grad_norm": 0.2635056674480438, |
|
"learning_rate": 1.999617516378193e-05, |
|
"loss": 1.1925, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11641359363702097, |
|
"grad_norm": 0.28288525342941284, |
|
"learning_rate": 1.999604661525807e-05, |
|
"loss": 1.1529, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11713665943600868, |
|
"grad_norm": 0.2699298560619354, |
|
"learning_rate": 1.9995915942529875e-05, |
|
"loss": 1.1823, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.11785972523499638, |
|
"grad_norm": 0.2862537205219269, |
|
"learning_rate": 1.999578314562512e-05, |
|
"loss": 1.1446, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11858279103398409, |
|
"grad_norm": 0.3110126852989197, |
|
"learning_rate": 1.9995648224572023e-05, |
|
"loss": 1.1324, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1193058568329718, |
|
"grad_norm": 0.29105737805366516, |
|
"learning_rate": 1.9995511179399253e-05, |
|
"loss": 1.1826, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12002892263195951, |
|
"grad_norm": 0.29570046067237854, |
|
"learning_rate": 1.999537201013593e-05, |
|
"loss": 1.1039, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12075198843094721, |
|
"grad_norm": 0.3625316619873047, |
|
"learning_rate": 1.9995230716811637e-05, |
|
"loss": 1.22, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12147505422993492, |
|
"grad_norm": 0.2676149606704712, |
|
"learning_rate": 1.9995087299456395e-05, |
|
"loss": 1.112, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12219812002892264, |
|
"grad_norm": 0.2860702574253082, |
|
"learning_rate": 1.9994941758100677e-05, |
|
"loss": 1.1972, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12292118582791034, |
|
"grad_norm": 0.22130216658115387, |
|
"learning_rate": 1.9994794092775418e-05, |
|
"loss": 1.0739, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12364425162689804, |
|
"grad_norm": 0.28100821375846863, |
|
"learning_rate": 1.9994644303511994e-05, |
|
"loss": 1.0996, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12436731742588576, |
|
"grad_norm": 0.2977873980998993, |
|
"learning_rate": 1.9994492390342233e-05, |
|
"loss": 1.0319, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12509038322487345, |
|
"grad_norm": 0.28634506464004517, |
|
"learning_rate": 1.999433835329842e-05, |
|
"loss": 1.1696, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12581344902386118, |
|
"grad_norm": 0.2574990689754486, |
|
"learning_rate": 1.999418219241329e-05, |
|
"loss": 1.1368, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1265365148228489, |
|
"grad_norm": 0.48853209614753723, |
|
"learning_rate": 1.9994023907720027e-05, |
|
"loss": 1.15, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1272595806218366, |
|
"grad_norm": 0.2706013023853302, |
|
"learning_rate": 1.9993863499252265e-05, |
|
"loss": 1.123, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1279826464208243, |
|
"grad_norm": 0.3121446669101715, |
|
"learning_rate": 1.9993700967044097e-05, |
|
"loss": 1.1917, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.128705712219812, |
|
"grad_norm": 0.31216657161712646, |
|
"learning_rate": 1.9993536311130054e-05, |
|
"loss": 1.1167, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1294287780187997, |
|
"grad_norm": 0.36718523502349854, |
|
"learning_rate": 1.9993369531545134e-05, |
|
"loss": 1.1922, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1301518438177874, |
|
"grad_norm": 0.37485387921333313, |
|
"learning_rate": 1.999320062832477e-05, |
|
"loss": 1.1381, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13087490961677511, |
|
"grad_norm": 0.2811019718647003, |
|
"learning_rate": 1.9993029601504865e-05, |
|
"loss": 1.0746, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13159797541576285, |
|
"grad_norm": 0.30120980739593506, |
|
"learning_rate": 1.9992856451121754e-05, |
|
"loss": 1.1391, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13232104121475055, |
|
"grad_norm": 0.3567376136779785, |
|
"learning_rate": 1.9992681177212236e-05, |
|
"loss": 1.0458, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13304410701373826, |
|
"grad_norm": 0.2923922538757324, |
|
"learning_rate": 1.9992503779813558e-05, |
|
"loss": 1.1919, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13376717281272596, |
|
"grad_norm": 0.4777401089668274, |
|
"learning_rate": 1.9992324258963414e-05, |
|
"loss": 1.1606, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.13449023861171366, |
|
"grad_norm": 0.4806600511074066, |
|
"learning_rate": 1.9992142614699958e-05, |
|
"loss": 1.1763, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.13521330441070137, |
|
"grad_norm": 0.3039063811302185, |
|
"learning_rate": 1.9991958847061786e-05, |
|
"loss": 1.0922, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.13593637020968907, |
|
"grad_norm": 0.27506738901138306, |
|
"learning_rate": 1.9991772956087952e-05, |
|
"loss": 1.0489, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.13665943600867678, |
|
"grad_norm": 0.2944995164871216, |
|
"learning_rate": 1.999158494181796e-05, |
|
"loss": 1.0292, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1373825018076645, |
|
"grad_norm": 0.347499817609787, |
|
"learning_rate": 1.999139480429176e-05, |
|
"loss": 1.1791, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1381055676066522, |
|
"grad_norm": 0.2530309855937958, |
|
"learning_rate": 1.9991202543549758e-05, |
|
"loss": 1.0919, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13882863340563992, |
|
"grad_norm": 0.3032122850418091, |
|
"learning_rate": 1.9991008159632816e-05, |
|
"loss": 1.0343, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13955169920462762, |
|
"grad_norm": 0.32035306096076965, |
|
"learning_rate": 1.999081165258223e-05, |
|
"loss": 1.1535, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.14027476500361533, |
|
"grad_norm": 0.3276461958885193, |
|
"learning_rate": 1.999061302243977e-05, |
|
"loss": 1.1237, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14099783080260303, |
|
"grad_norm": 0.27263158559799194, |
|
"learning_rate": 1.9990412269247637e-05, |
|
"loss": 1.1925, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14172089660159073, |
|
"grad_norm": 0.48212355375289917, |
|
"learning_rate": 1.9990209393048497e-05, |
|
"loss": 1.0973, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14244396240057844, |
|
"grad_norm": 0.2263614982366562, |
|
"learning_rate": 1.9990004393885466e-05, |
|
"loss": 1.1126, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.14316702819956617, |
|
"grad_norm": 0.24680563807487488, |
|
"learning_rate": 1.99897972718021e-05, |
|
"loss": 1.1049, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.14389009399855388, |
|
"grad_norm": 0.2854582667350769, |
|
"learning_rate": 1.9989588026842416e-05, |
|
"loss": 1.2055, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.14461315979754158, |
|
"grad_norm": 0.2650730609893799, |
|
"learning_rate": 1.9989376659050878e-05, |
|
"loss": 1.1494, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14533622559652928, |
|
"grad_norm": 0.4389187693595886, |
|
"learning_rate": 1.99891631684724e-05, |
|
"loss": 1.162, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.146059291395517, |
|
"grad_norm": 0.2606823444366455, |
|
"learning_rate": 1.998894755515236e-05, |
|
"loss": 1.2099, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1467823571945047, |
|
"grad_norm": 0.3051895201206207, |
|
"learning_rate": 1.9988729819136568e-05, |
|
"loss": 1.055, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1475054229934924, |
|
"grad_norm": 0.2886350452899933, |
|
"learning_rate": 1.9988509960471294e-05, |
|
"loss": 1.1345, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14822848879248013, |
|
"grad_norm": 0.2689605951309204, |
|
"learning_rate": 1.9988287979203264e-05, |
|
"loss": 1.0322, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14895155459146783, |
|
"grad_norm": 0.2789241075515747, |
|
"learning_rate": 1.9988063875379645e-05, |
|
"loss": 1.2015, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.14967462039045554, |
|
"grad_norm": 0.4138123095035553, |
|
"learning_rate": 1.9987837649048062e-05, |
|
"loss": 1.1172, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.15039768618944324, |
|
"grad_norm": 0.49634483456611633, |
|
"learning_rate": 1.998760930025659e-05, |
|
"loss": 1.1927, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.15112075198843095, |
|
"grad_norm": 0.3492712378501892, |
|
"learning_rate": 1.9987378829053756e-05, |
|
"loss": 1.0852, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.15184381778741865, |
|
"grad_norm": 0.42525535821914673, |
|
"learning_rate": 1.9987146235488532e-05, |
|
"loss": 1.1665, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15256688358640635, |
|
"grad_norm": 0.23187308013439178, |
|
"learning_rate": 1.9986911519610346e-05, |
|
"loss": 1.0863, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.15328994938539406, |
|
"grad_norm": 0.23776406049728394, |
|
"learning_rate": 1.9986674681469074e-05, |
|
"loss": 0.9548, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1540130151843818, |
|
"grad_norm": 0.23905649781227112, |
|
"learning_rate": 1.998643572111505e-05, |
|
"loss": 1.0704, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1547360809833695, |
|
"grad_norm": 0.31379473209381104, |
|
"learning_rate": 1.9986194638599056e-05, |
|
"loss": 1.0357, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.1554591467823572, |
|
"grad_norm": 0.31585368514060974, |
|
"learning_rate": 1.9985951433972313e-05, |
|
"loss": 1.0999, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1561822125813449, |
|
"grad_norm": 0.42276912927627563, |
|
"learning_rate": 1.9985706107286515e-05, |
|
"loss": 1.0196, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1569052783803326, |
|
"grad_norm": 0.2652490437030792, |
|
"learning_rate": 1.9985458658593787e-05, |
|
"loss": 1.0087, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1576283441793203, |
|
"grad_norm": 0.2674664855003357, |
|
"learning_rate": 1.9985209087946717e-05, |
|
"loss": 1.1576, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15835140997830802, |
|
"grad_norm": 0.3311745524406433, |
|
"learning_rate": 1.9984957395398336e-05, |
|
"loss": 1.2496, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.15907447577729572, |
|
"grad_norm": 0.31602323055267334, |
|
"learning_rate": 1.998470358100213e-05, |
|
"loss": 1.0267, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15979754157628345, |
|
"grad_norm": 0.42870277166366577, |
|
"learning_rate": 1.998444764481204e-05, |
|
"loss": 1.0689, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.16052060737527116, |
|
"grad_norm": 0.2735862731933594, |
|
"learning_rate": 1.9984189586882455e-05, |
|
"loss": 1.1395, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.16124367317425886, |
|
"grad_norm": 0.36028143763542175, |
|
"learning_rate": 1.9983929407268206e-05, |
|
"loss": 1.176, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.16196673897324657, |
|
"grad_norm": 0.35686901211738586, |
|
"learning_rate": 1.9983667106024584e-05, |
|
"loss": 1.165, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.16268980477223427, |
|
"grad_norm": 0.25849634408950806, |
|
"learning_rate": 1.9983402683207334e-05, |
|
"loss": 1.0527, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.16341287057122197, |
|
"grad_norm": 0.2960383892059326, |
|
"learning_rate": 1.9983136138872644e-05, |
|
"loss": 1.1526, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.16413593637020968, |
|
"grad_norm": 0.31200936436653137, |
|
"learning_rate": 1.9982867473077155e-05, |
|
"loss": 1.001, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.1648590021691974, |
|
"grad_norm": 0.2932310998439789, |
|
"learning_rate": 1.998259668587796e-05, |
|
"loss": 1.0264, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16558206796818511, |
|
"grad_norm": 0.31865110993385315, |
|
"learning_rate": 1.9982323777332605e-05, |
|
"loss": 1.1389, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.16630513376717282, |
|
"grad_norm": 0.3649924397468567, |
|
"learning_rate": 1.9982048747499082e-05, |
|
"loss": 1.1133, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16702819956616052, |
|
"grad_norm": 0.27349087595939636, |
|
"learning_rate": 1.9981771596435834e-05, |
|
"loss": 1.1829, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.16775126536514823, |
|
"grad_norm": 0.49940750002861023, |
|
"learning_rate": 1.9981492324201762e-05, |
|
"loss": 1.1312, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.16847433116413593, |
|
"grad_norm": 0.3599984049797058, |
|
"learning_rate": 1.998121093085621e-05, |
|
"loss": 1.0232, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.16919739696312364, |
|
"grad_norm": 0.3514828681945801, |
|
"learning_rate": 1.9980927416458976e-05, |
|
"loss": 1.1149, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.16992046276211134, |
|
"grad_norm": 0.29723837971687317, |
|
"learning_rate": 1.998064178107031e-05, |
|
"loss": 1.0841, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.17064352856109907, |
|
"grad_norm": 0.33873599767684937, |
|
"learning_rate": 1.9980354024750903e-05, |
|
"loss": 1.046, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17136659436008678, |
|
"grad_norm": 0.32947036623954773, |
|
"learning_rate": 1.998006414756191e-05, |
|
"loss": 1.0069, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.17208966015907448, |
|
"grad_norm": 0.26573359966278076, |
|
"learning_rate": 1.9979772149564932e-05, |
|
"loss": 1.0213, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.17281272595806219, |
|
"grad_norm": 0.3644008934497833, |
|
"learning_rate": 1.9979478030822022e-05, |
|
"loss": 1.153, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1735357917570499, |
|
"grad_norm": 0.337447851896286, |
|
"learning_rate": 1.997918179139567e-05, |
|
"loss": 1.0932, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1742588575560376, |
|
"grad_norm": 0.2889169752597809, |
|
"learning_rate": 1.9978883431348845e-05, |
|
"loss": 1.1418, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1749819233550253, |
|
"grad_norm": 0.3004131019115448, |
|
"learning_rate": 1.9978582950744938e-05, |
|
"loss": 1.1679, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.175704989154013, |
|
"grad_norm": 0.28067609667778015, |
|
"learning_rate": 1.9978280349647808e-05, |
|
"loss": 1.2233, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.17642805495300073, |
|
"grad_norm": 0.3912954330444336, |
|
"learning_rate": 1.9977975628121753e-05, |
|
"loss": 1.0767, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.17715112075198844, |
|
"grad_norm": 0.2701180875301361, |
|
"learning_rate": 1.9977668786231536e-05, |
|
"loss": 1.1795, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17787418655097614, |
|
"grad_norm": 0.27517423033714294, |
|
"learning_rate": 1.9977359824042353e-05, |
|
"loss": 1.0639, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.17859725234996385, |
|
"grad_norm": 0.33523860573768616, |
|
"learning_rate": 1.9977048741619866e-05, |
|
"loss": 1.0979, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.17932031814895155, |
|
"grad_norm": 0.25108376145362854, |
|
"learning_rate": 1.9976735539030182e-05, |
|
"loss": 1.1019, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.18004338394793926, |
|
"grad_norm": 0.35542207956314087, |
|
"learning_rate": 1.9976420216339854e-05, |
|
"loss": 1.0838, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.18076644974692696, |
|
"grad_norm": 0.3071356415748596, |
|
"learning_rate": 1.9976102773615894e-05, |
|
"loss": 1.024, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18148951554591466, |
|
"grad_norm": 0.2952810227870941, |
|
"learning_rate": 1.9975783210925752e-05, |
|
"loss": 0.9775, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.1822125813449024, |
|
"grad_norm": 0.29858988523483276, |
|
"learning_rate": 1.9975461528337345e-05, |
|
"loss": 0.9622, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.1829356471438901, |
|
"grad_norm": 0.26090505719184875, |
|
"learning_rate": 1.9975137725919032e-05, |
|
"loss": 1.061, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.1836587129428778, |
|
"grad_norm": 0.3304395079612732, |
|
"learning_rate": 1.9974811803739617e-05, |
|
"loss": 1.0657, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.1843817787418655, |
|
"grad_norm": 0.4547516405582428, |
|
"learning_rate": 1.997448376186836e-05, |
|
"loss": 1.1043, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.18510484454085321, |
|
"grad_norm": 0.28662002086639404, |
|
"learning_rate": 1.997415360037498e-05, |
|
"loss": 1.0743, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.18582791033984092, |
|
"grad_norm": 0.266493022441864, |
|
"learning_rate": 1.9973821319329625e-05, |
|
"loss": 0.9767, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.18655097613882862, |
|
"grad_norm": 0.34035131335258484, |
|
"learning_rate": 1.9973486918802912e-05, |
|
"loss": 1.2844, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.18727404193781635, |
|
"grad_norm": 0.34165364503860474, |
|
"learning_rate": 1.9973150398865908e-05, |
|
"loss": 1.2808, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18799710773680406, |
|
"grad_norm": 0.3018459975719452, |
|
"learning_rate": 1.9972811759590117e-05, |
|
"loss": 0.9891, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18872017353579176, |
|
"grad_norm": 0.3636222779750824, |
|
"learning_rate": 1.9972471001047505e-05, |
|
"loss": 1.1218, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.18944323933477947, |
|
"grad_norm": 0.3037750720977783, |
|
"learning_rate": 1.9972128123310485e-05, |
|
"loss": 1.0688, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.19016630513376717, |
|
"grad_norm": 0.3097022771835327, |
|
"learning_rate": 1.997178312645192e-05, |
|
"loss": 1.135, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.19088937093275488, |
|
"grad_norm": 0.29391196370124817, |
|
"learning_rate": 1.9971436010545125e-05, |
|
"loss": 1.0739, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.19161243673174258, |
|
"grad_norm": 0.2928149104118347, |
|
"learning_rate": 1.9971086775663856e-05, |
|
"loss": 1.0486, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.19233550253073028, |
|
"grad_norm": 0.32840797305107117, |
|
"learning_rate": 1.9970735421882334e-05, |
|
"loss": 1.0065, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.19305856832971802, |
|
"grad_norm": 0.2717532217502594, |
|
"learning_rate": 1.997038194927522e-05, |
|
"loss": 0.9868, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.19378163412870572, |
|
"grad_norm": 0.3612685799598694, |
|
"learning_rate": 1.9970026357917636e-05, |
|
"loss": 1.055, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.19450469992769343, |
|
"grad_norm": 0.31416481733322144, |
|
"learning_rate": 1.9969668647885136e-05, |
|
"loss": 1.0907, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.19522776572668113, |
|
"grad_norm": 0.31718918681144714, |
|
"learning_rate": 1.996930881925374e-05, |
|
"loss": 1.048, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19595083152566883, |
|
"grad_norm": 0.41617873311042786, |
|
"learning_rate": 1.9968946872099915e-05, |
|
"loss": 1.1356, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.19667389732465654, |
|
"grad_norm": 0.6090404987335205, |
|
"learning_rate": 1.9968582806500572e-05, |
|
"loss": 1.0258, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.19739696312364424, |
|
"grad_norm": 0.3227296471595764, |
|
"learning_rate": 1.9968216622533082e-05, |
|
"loss": 1.0439, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.19812002892263195, |
|
"grad_norm": 0.5116894245147705, |
|
"learning_rate": 1.9967848320275253e-05, |
|
"loss": 1.2046, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.19884309472161968, |
|
"grad_norm": 0.30214396119117737, |
|
"learning_rate": 1.996747789980536e-05, |
|
"loss": 1.0646, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19956616052060738, |
|
"grad_norm": 0.32545679807662964, |
|
"learning_rate": 1.996710536120211e-05, |
|
"loss": 1.0382, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2002892263195951, |
|
"grad_norm": 0.3302004933357239, |
|
"learning_rate": 1.9966730704544677e-05, |
|
"loss": 1.3115, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.2010122921185828, |
|
"grad_norm": 0.29362550377845764, |
|
"learning_rate": 1.9966353929912672e-05, |
|
"loss": 1.1415, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2017353579175705, |
|
"grad_norm": 0.41206830739974976, |
|
"learning_rate": 1.9965975037386164e-05, |
|
"loss": 1.1537, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2024584237165582, |
|
"grad_norm": 0.28330183029174805, |
|
"learning_rate": 1.9965594027045668e-05, |
|
"loss": 0.9909, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2031814895155459, |
|
"grad_norm": 0.3228115439414978, |
|
"learning_rate": 1.996521089897215e-05, |
|
"loss": 1.1051, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2039045553145336, |
|
"grad_norm": 0.2800455689430237, |
|
"learning_rate": 1.9964825653247026e-05, |
|
"loss": 1.0582, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.20462762111352134, |
|
"grad_norm": 0.3681683838367462, |
|
"learning_rate": 1.9964438289952167e-05, |
|
"loss": 1.1653, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.20535068691250905, |
|
"grad_norm": 0.3052619695663452, |
|
"learning_rate": 1.9964048809169885e-05, |
|
"loss": 1.0226, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.20607375271149675, |
|
"grad_norm": 0.32279151678085327, |
|
"learning_rate": 1.9963657210982947e-05, |
|
"loss": 1.143, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.20679681851048445, |
|
"grad_norm": 0.3428622782230377, |
|
"learning_rate": 1.9963263495474573e-05, |
|
"loss": 1.0171, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.20751988430947216, |
|
"grad_norm": 0.2577044665813446, |
|
"learning_rate": 1.9962867662728422e-05, |
|
"loss": 0.9957, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.20824295010845986, |
|
"grad_norm": 0.4019884467124939, |
|
"learning_rate": 1.9962469712828613e-05, |
|
"loss": 1.2837, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.20896601590744757, |
|
"grad_norm": 0.3153854012489319, |
|
"learning_rate": 1.9962069645859717e-05, |
|
"loss": 1.1967, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2096890817064353, |
|
"grad_norm": 0.3266783356666565, |
|
"learning_rate": 1.9961667461906743e-05, |
|
"loss": 1.015, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.210412147505423, |
|
"grad_norm": 0.5336325168609619, |
|
"learning_rate": 1.9961263161055163e-05, |
|
"loss": 1.1563, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2111352133044107, |
|
"grad_norm": 0.4741445481777191, |
|
"learning_rate": 1.996085674339089e-05, |
|
"loss": 1.1913, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2118582791033984, |
|
"grad_norm": 0.30581969022750854, |
|
"learning_rate": 1.996044820900029e-05, |
|
"loss": 1.0972, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.21258134490238612, |
|
"grad_norm": 0.31274256110191345, |
|
"learning_rate": 1.996003755797018e-05, |
|
"loss": 1.1664, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.21330441070137382, |
|
"grad_norm": 0.32022932171821594, |
|
"learning_rate": 1.995962479038782e-05, |
|
"loss": 1.0773, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.21402747650036152, |
|
"grad_norm": 0.3246869444847107, |
|
"learning_rate": 1.9959209906340925e-05, |
|
"loss": 1.1162, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.21475054229934923, |
|
"grad_norm": 0.44917920231819153, |
|
"learning_rate": 1.995879290591767e-05, |
|
"loss": 1.0324, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.21547360809833696, |
|
"grad_norm": 0.30781978368759155, |
|
"learning_rate": 1.9958373789206656e-05, |
|
"loss": 1.0437, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.21619667389732466, |
|
"grad_norm": 0.28506171703338623, |
|
"learning_rate": 1.995795255629696e-05, |
|
"loss": 1.0357, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.21691973969631237, |
|
"grad_norm": 0.3168564438819885, |
|
"learning_rate": 1.9957529207278082e-05, |
|
"loss": 1.0496, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21764280549530007, |
|
"grad_norm": 0.4779270589351654, |
|
"learning_rate": 1.9957103742239997e-05, |
|
"loss": 1.0295, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.21836587129428778, |
|
"grad_norm": 0.2756384611129761, |
|
"learning_rate": 1.9956676161273114e-05, |
|
"loss": 0.9995, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.21908893709327548, |
|
"grad_norm": 0.28942012786865234, |
|
"learning_rate": 1.9956246464468294e-05, |
|
"loss": 1.0739, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2198120028922632, |
|
"grad_norm": 0.3324378430843353, |
|
"learning_rate": 1.9955814651916853e-05, |
|
"loss": 1.061, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.2205350686912509, |
|
"grad_norm": 0.3170448839664459, |
|
"learning_rate": 1.995538072371055e-05, |
|
"loss": 1.2148, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.22125813449023862, |
|
"grad_norm": 0.4121737778186798, |
|
"learning_rate": 1.9954944679941602e-05, |
|
"loss": 0.9862, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.22198120028922633, |
|
"grad_norm": 0.3573335111141205, |
|
"learning_rate": 1.9954506520702662e-05, |
|
"loss": 1.0321, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.22270426608821403, |
|
"grad_norm": 0.36431220173835754, |
|
"learning_rate": 1.995406624608685e-05, |
|
"loss": 1.1277, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.22342733188720174, |
|
"grad_norm": 0.4707132577896118, |
|
"learning_rate": 1.9953623856187714e-05, |
|
"loss": 0.9929, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.22415039768618944, |
|
"grad_norm": 0.3705374598503113, |
|
"learning_rate": 1.9953179351099276e-05, |
|
"loss": 1.0452, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22487346348517714, |
|
"grad_norm": 0.2698614299297333, |
|
"learning_rate": 1.9952732730915993e-05, |
|
"loss": 0.9796, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.22559652928416485, |
|
"grad_norm": 0.3234151303768158, |
|
"learning_rate": 1.9952283995732765e-05, |
|
"loss": 1.0508, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.22631959508315258, |
|
"grad_norm": 0.3262479603290558, |
|
"learning_rate": 1.9951833145644962e-05, |
|
"loss": 1.2089, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.22704266088214028, |
|
"grad_norm": 0.5179426670074463, |
|
"learning_rate": 1.9951380180748383e-05, |
|
"loss": 1.1464, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.227765726681128, |
|
"grad_norm": 0.4757941961288452, |
|
"learning_rate": 1.9950925101139292e-05, |
|
"loss": 1.0322, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2284887924801157, |
|
"grad_norm": 0.40444672107696533, |
|
"learning_rate": 1.9950467906914387e-05, |
|
"loss": 1.0495, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2292118582791034, |
|
"grad_norm": 0.3511693775653839, |
|
"learning_rate": 1.995000859817083e-05, |
|
"loss": 0.9481, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2299349240780911, |
|
"grad_norm": 0.5550206303596497, |
|
"learning_rate": 1.9949547175006227e-05, |
|
"loss": 1.036, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2306579898770788, |
|
"grad_norm": 0.31517210602760315, |
|
"learning_rate": 1.9949083637518628e-05, |
|
"loss": 1.0197, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2313810556760665, |
|
"grad_norm": 0.3532278835773468, |
|
"learning_rate": 1.994861798580654e-05, |
|
"loss": 1.0144, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23210412147505424, |
|
"grad_norm": 0.3524475693702698, |
|
"learning_rate": 1.9948150219968917e-05, |
|
"loss": 1.2165, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.23282718727404195, |
|
"grad_norm": 0.30520960688591003, |
|
"learning_rate": 1.9947680340105156e-05, |
|
"loss": 1.0552, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.23355025307302965, |
|
"grad_norm": 0.36823803186416626, |
|
"learning_rate": 1.9947208346315112e-05, |
|
"loss": 1.0136, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.23427331887201736, |
|
"grad_norm": 0.33798906207084656, |
|
"learning_rate": 1.994673423869909e-05, |
|
"loss": 1.0973, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.23499638467100506, |
|
"grad_norm": 0.4596942961215973, |
|
"learning_rate": 1.994625801735783e-05, |
|
"loss": 0.9646, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.23571945046999276, |
|
"grad_norm": 0.27387481927871704, |
|
"learning_rate": 1.9945779682392538e-05, |
|
"loss": 1.1533, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.23644251626898047, |
|
"grad_norm": 0.3908016085624695, |
|
"learning_rate": 1.994529923390486e-05, |
|
"loss": 1.1398, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.23716558206796817, |
|
"grad_norm": 0.5063049793243408, |
|
"learning_rate": 1.99448166719969e-05, |
|
"loss": 1.1009, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2378886478669559, |
|
"grad_norm": 0.4367094337940216, |
|
"learning_rate": 1.9944331996771194e-05, |
|
"loss": 1.1103, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2386117136659436, |
|
"grad_norm": 0.3248264193534851, |
|
"learning_rate": 1.9943845208330742e-05, |
|
"loss": 1.1392, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2393347794649313, |
|
"grad_norm": 0.31271231174468994, |
|
"learning_rate": 1.9943356306778995e-05, |
|
"loss": 1.0572, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.24005784526391902, |
|
"grad_norm": 0.31461718678474426, |
|
"learning_rate": 1.9942865292219837e-05, |
|
"loss": 1.0907, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.24078091106290672, |
|
"grad_norm": 0.40921902656555176, |
|
"learning_rate": 1.9942372164757616e-05, |
|
"loss": 1.0401, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.24150397686189443, |
|
"grad_norm": 0.39852192997932434, |
|
"learning_rate": 1.994187692449712e-05, |
|
"loss": 1.1206, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.24222704266088213, |
|
"grad_norm": 0.30172234773635864, |
|
"learning_rate": 1.9941379571543597e-05, |
|
"loss": 1.0826, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.24295010845986983, |
|
"grad_norm": 0.32275334000587463, |
|
"learning_rate": 1.994088010600273e-05, |
|
"loss": 0.9221, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.24367317425885757, |
|
"grad_norm": 0.32760071754455566, |
|
"learning_rate": 1.994037852798066e-05, |
|
"loss": 1.1498, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.24439624005784527, |
|
"grad_norm": 0.33486208319664, |
|
"learning_rate": 1.9939874837583977e-05, |
|
"loss": 1.0462, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.24511930585683298, |
|
"grad_norm": 0.4280707836151123, |
|
"learning_rate": 1.9939369034919712e-05, |
|
"loss": 0.981, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.24584237165582068, |
|
"grad_norm": 0.3889998495578766, |
|
"learning_rate": 1.9938861120095353e-05, |
|
"loss": 1.1114, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24656543745480838, |
|
"grad_norm": 0.2864742577075958, |
|
"learning_rate": 1.9938351093218833e-05, |
|
"loss": 1.0915, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2472885032537961, |
|
"grad_norm": 0.390601247549057, |
|
"learning_rate": 1.9937838954398542e-05, |
|
"loss": 1.0737, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.2480115690527838, |
|
"grad_norm": 0.4023664593696594, |
|
"learning_rate": 1.99373247037433e-05, |
|
"loss": 1.1513, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.24873463485177152, |
|
"grad_norm": 0.2922250032424927, |
|
"learning_rate": 1.9936808341362396e-05, |
|
"loss": 1.1422, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.24945770065075923, |
|
"grad_norm": 0.5238683819770813, |
|
"learning_rate": 1.9936289867365557e-05, |
|
"loss": 1.1925, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2501807664497469, |
|
"grad_norm": 0.2811175584793091, |
|
"learning_rate": 1.993576928186296e-05, |
|
"loss": 1.0295, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.25090383224873464, |
|
"grad_norm": 0.3582068383693695, |
|
"learning_rate": 1.9935246584965237e-05, |
|
"loss": 1.0085, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.25162689804772237, |
|
"grad_norm": 0.29945480823516846, |
|
"learning_rate": 1.993472177678345e-05, |
|
"loss": 0.9309, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.25234996384671005, |
|
"grad_norm": 0.2941429018974304, |
|
"learning_rate": 1.993419485742914e-05, |
|
"loss": 1.0875, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2530730296456978, |
|
"grad_norm": 0.33406949043273926, |
|
"learning_rate": 1.9933665827014272e-05, |
|
"loss": 1.1204, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25379609544468545, |
|
"grad_norm": 0.30616679787635803, |
|
"learning_rate": 1.9933134685651267e-05, |
|
"loss": 1.1959, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2545191612436732, |
|
"grad_norm": 0.3512917459011078, |
|
"learning_rate": 1.993260143345299e-05, |
|
"loss": 1.1553, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.25524222704266086, |
|
"grad_norm": 0.40841343998908997, |
|
"learning_rate": 1.9932066070532768e-05, |
|
"loss": 1.0604, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.2559652928416486, |
|
"grad_norm": 0.4030756950378418, |
|
"learning_rate": 1.9931528597004363e-05, |
|
"loss": 1.2246, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.25668835864063627, |
|
"grad_norm": 0.3495555520057678, |
|
"learning_rate": 1.9930989012981992e-05, |
|
"loss": 1.1397, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.257411424439624, |
|
"grad_norm": 0.31172969937324524, |
|
"learning_rate": 1.9930447318580323e-05, |
|
"loss": 1.0977, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.25813449023861174, |
|
"grad_norm": 0.3468332886695862, |
|
"learning_rate": 1.992990351391446e-05, |
|
"loss": 1.2147, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2588575560375994, |
|
"grad_norm": 0.4538349211215973, |
|
"learning_rate": 1.992935759909997e-05, |
|
"loss": 1.0623, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.25958062183658714, |
|
"grad_norm": 0.5491474270820618, |
|
"learning_rate": 1.9928809574252864e-05, |
|
"loss": 1.1133, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.2603036876355748, |
|
"grad_norm": 0.33087098598480225, |
|
"learning_rate": 1.992825943948959e-05, |
|
"loss": 1.0613, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.26102675343456255, |
|
"grad_norm": 0.4498991370201111, |
|
"learning_rate": 1.9927707194927067e-05, |
|
"loss": 1.1283, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.26174981923355023, |
|
"grad_norm": 0.3464474678039551, |
|
"learning_rate": 1.9927152840682636e-05, |
|
"loss": 1.1189, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.26247288503253796, |
|
"grad_norm": 0.32526206970214844, |
|
"learning_rate": 1.9926596376874112e-05, |
|
"loss": 0.9857, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2631959508315257, |
|
"grad_norm": 0.36080119013786316, |
|
"learning_rate": 1.9926037803619744e-05, |
|
"loss": 1.0095, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.26391901663051337, |
|
"grad_norm": 0.4334959089756012, |
|
"learning_rate": 1.9925477121038218e-05, |
|
"loss": 1.1569, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2646420824295011, |
|
"grad_norm": 0.33947449922561646, |
|
"learning_rate": 1.99249143292487e-05, |
|
"loss": 1.067, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2653651482284888, |
|
"grad_norm": 0.3930415213108063, |
|
"learning_rate": 1.9924349428370774e-05, |
|
"loss": 1.1315, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2660882140274765, |
|
"grad_norm": 0.47822701930999756, |
|
"learning_rate": 1.992378241852449e-05, |
|
"loss": 1.0941, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.2668112798264642, |
|
"grad_norm": 0.4171973764896393, |
|
"learning_rate": 1.9923213299830336e-05, |
|
"loss": 1.2023, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2675343456254519, |
|
"grad_norm": 0.29545828700065613, |
|
"learning_rate": 1.992264207240925e-05, |
|
"loss": 1.1136, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.26825741142443965, |
|
"grad_norm": 0.3832903504371643, |
|
"learning_rate": 1.9922068736382627e-05, |
|
"loss": 1.0233, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.26898047722342733, |
|
"grad_norm": 0.5224931836128235, |
|
"learning_rate": 1.99214932918723e-05, |
|
"loss": 1.178, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.26970354302241506, |
|
"grad_norm": 0.3658877909183502, |
|
"learning_rate": 1.9920915739000555e-05, |
|
"loss": 1.1076, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.27042660882140274, |
|
"grad_norm": 0.325195848941803, |
|
"learning_rate": 1.9920336077890122e-05, |
|
"loss": 1.0595, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.27114967462039047, |
|
"grad_norm": 0.33365723490715027, |
|
"learning_rate": 1.991975430866419e-05, |
|
"loss": 1.0157, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.27187274041937814, |
|
"grad_norm": 0.3699803948402405, |
|
"learning_rate": 1.9919170431446374e-05, |
|
"loss": 0.9225, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2725958062183659, |
|
"grad_norm": 0.4542098939418793, |
|
"learning_rate": 1.9918584446360755e-05, |
|
"loss": 1.0914, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.27331887201735355, |
|
"grad_norm": 0.2979832589626312, |
|
"learning_rate": 1.9917996353531864e-05, |
|
"loss": 1.1258, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2740419378163413, |
|
"grad_norm": 0.3481557369232178, |
|
"learning_rate": 1.9917406153084668e-05, |
|
"loss": 1.1902, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.274765003615329, |
|
"grad_norm": 0.3932206928730011, |
|
"learning_rate": 1.9916813845144587e-05, |
|
"loss": 1.1773, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2754880694143167, |
|
"grad_norm": 0.32080596685409546, |
|
"learning_rate": 1.991621942983749e-05, |
|
"loss": 1.0663, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2762111352133044, |
|
"grad_norm": 0.38809677958488464, |
|
"learning_rate": 1.9915622907289695e-05, |
|
"loss": 0.9916, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2769342010122921, |
|
"grad_norm": 0.2993149757385254, |
|
"learning_rate": 1.9915024277627965e-05, |
|
"loss": 1.0444, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.27765726681127983, |
|
"grad_norm": 0.39644211530685425, |
|
"learning_rate": 1.991442354097951e-05, |
|
"loss": 1.1248, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.2783803326102675, |
|
"grad_norm": 0.3163677752017975, |
|
"learning_rate": 1.9913820697471988e-05, |
|
"loss": 1.1467, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.27910339840925524, |
|
"grad_norm": 0.3997037410736084, |
|
"learning_rate": 1.9913215747233505e-05, |
|
"loss": 1.0009, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.279826464208243, |
|
"grad_norm": 0.43837687373161316, |
|
"learning_rate": 1.991260869039262e-05, |
|
"loss": 1.1285, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.28054953000723065, |
|
"grad_norm": 0.3039294481277466, |
|
"learning_rate": 1.9911999527078333e-05, |
|
"loss": 1.1063, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2812725958062184, |
|
"grad_norm": 0.2840760052204132, |
|
"learning_rate": 1.9911388257420093e-05, |
|
"loss": 1.0023, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.28199566160520606, |
|
"grad_norm": 0.34338292479515076, |
|
"learning_rate": 1.9910774881547803e-05, |
|
"loss": 0.9788, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2827187274041938, |
|
"grad_norm": 0.30882540345191956, |
|
"learning_rate": 1.99101593995918e-05, |
|
"loss": 0.949, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.28344179320318147, |
|
"grad_norm": 0.29385194182395935, |
|
"learning_rate": 1.9909541811682883e-05, |
|
"loss": 1.0488, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2841648590021692, |
|
"grad_norm": 0.2834920585155487, |
|
"learning_rate": 1.9908922117952288e-05, |
|
"loss": 1.1649, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2848879248011569, |
|
"grad_norm": 0.451275110244751, |
|
"learning_rate": 1.9908300318531707e-05, |
|
"loss": 0.9627, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.2856109906001446, |
|
"grad_norm": 0.3283119201660156, |
|
"learning_rate": 1.990767641355327e-05, |
|
"loss": 0.9928, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.28633405639913234, |
|
"grad_norm": 0.4345645308494568, |
|
"learning_rate": 1.990705040314956e-05, |
|
"loss": 1.121, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.28705712219812, |
|
"grad_norm": 0.4899539649486542, |
|
"learning_rate": 1.9906422287453614e-05, |
|
"loss": 1.1305, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.28778018799710775, |
|
"grad_norm": 0.3533375859260559, |
|
"learning_rate": 1.99057920665989e-05, |
|
"loss": 1.1405, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.2885032537960954, |
|
"grad_norm": 0.2978805899620056, |
|
"learning_rate": 1.990515974071935e-05, |
|
"loss": 1.0067, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.28922631959508316, |
|
"grad_norm": 0.39741653203964233, |
|
"learning_rate": 1.9904525309949332e-05, |
|
"loss": 1.1486, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28994938539407084, |
|
"grad_norm": 0.3140316903591156, |
|
"learning_rate": 1.990388877442367e-05, |
|
"loss": 1.0824, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.29067245119305857, |
|
"grad_norm": 0.5698236227035522, |
|
"learning_rate": 1.9903250134277622e-05, |
|
"loss": 1.155, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.2913955169920463, |
|
"grad_norm": 0.3922751545906067, |
|
"learning_rate": 1.990260938964691e-05, |
|
"loss": 1.0638, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.292118582791034, |
|
"grad_norm": 0.39551931619644165, |
|
"learning_rate": 1.990196654066769e-05, |
|
"loss": 1.1221, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2928416485900217, |
|
"grad_norm": 0.4097634255886078, |
|
"learning_rate": 1.9901321587476573e-05, |
|
"loss": 1.1902, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2935647143890094, |
|
"grad_norm": 0.32898181676864624, |
|
"learning_rate": 1.9900674530210617e-05, |
|
"loss": 1.1063, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.2942877801879971, |
|
"grad_norm": 0.3113490045070648, |
|
"learning_rate": 1.9900025369007316e-05, |
|
"loss": 1.1066, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.2950108459869848, |
|
"grad_norm": 0.38681137561798096, |
|
"learning_rate": 1.9899374104004628e-05, |
|
"loss": 0.9615, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.2957339117859725, |
|
"grad_norm": 0.3377947509288788, |
|
"learning_rate": 1.9898720735340948e-05, |
|
"loss": 1.1625, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.29645697758496026, |
|
"grad_norm": 0.347249299287796, |
|
"learning_rate": 1.9898065263155118e-05, |
|
"loss": 1.0252, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29718004338394793, |
|
"grad_norm": 0.301384299993515, |
|
"learning_rate": 1.989740768758643e-05, |
|
"loss": 1.0796, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.29790310918293567, |
|
"grad_norm": 0.5834956765174866, |
|
"learning_rate": 1.9896748008774618e-05, |
|
"loss": 1.0108, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.29862617498192334, |
|
"grad_norm": 0.4832312762737274, |
|
"learning_rate": 1.989608622685987e-05, |
|
"loss": 1.2985, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2993492407809111, |
|
"grad_norm": 0.28051361441612244, |
|
"learning_rate": 1.989542234198282e-05, |
|
"loss": 1.0726, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.30007230657989875, |
|
"grad_norm": 0.38429728150367737, |
|
"learning_rate": 1.989475635428454e-05, |
|
"loss": 1.1316, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3007953723788865, |
|
"grad_norm": 1.0346802473068237, |
|
"learning_rate": 1.9894088263906563e-05, |
|
"loss": 0.9848, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.30151843817787416, |
|
"grad_norm": 0.4266470670700073, |
|
"learning_rate": 1.9893418070990855e-05, |
|
"loss": 1.0715, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3022415039768619, |
|
"grad_norm": 0.3880995810031891, |
|
"learning_rate": 1.9892745775679837e-05, |
|
"loss": 1.0393, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3029645697758496, |
|
"grad_norm": 0.3417673408985138, |
|
"learning_rate": 1.9892071378116378e-05, |
|
"loss": 0.9673, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3036876355748373, |
|
"grad_norm": 0.35822057723999023, |
|
"learning_rate": 1.9891394878443783e-05, |
|
"loss": 1.0562, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30441070137382503, |
|
"grad_norm": 0.5743041038513184, |
|
"learning_rate": 1.989071627680582e-05, |
|
"loss": 1.0709, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3051337671728127, |
|
"grad_norm": 0.3537566363811493, |
|
"learning_rate": 1.9890035573346685e-05, |
|
"loss": 1.0405, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.30585683297180044, |
|
"grad_norm": 0.3579690456390381, |
|
"learning_rate": 1.988935276821104e-05, |
|
"loss": 1.0664, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3065798987707881, |
|
"grad_norm": 0.3571028709411621, |
|
"learning_rate": 1.988866786154398e-05, |
|
"loss": 1.1104, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.30730296456977585, |
|
"grad_norm": 0.3370681405067444, |
|
"learning_rate": 1.988798085349105e-05, |
|
"loss": 1.1617, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3080260303687636, |
|
"grad_norm": 0.34891048073768616, |
|
"learning_rate": 1.9887291744198242e-05, |
|
"loss": 1.1739, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.30874909616775126, |
|
"grad_norm": 0.36562421917915344, |
|
"learning_rate": 1.9886600533812e-05, |
|
"loss": 0.9671, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.309472161966739, |
|
"grad_norm": 0.291559636592865, |
|
"learning_rate": 1.9885907222479202e-05, |
|
"loss": 1.0322, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.31019522776572667, |
|
"grad_norm": 0.35871514678001404, |
|
"learning_rate": 1.9885211810347185e-05, |
|
"loss": 0.9888, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3109182935647144, |
|
"grad_norm": 0.38193368911743164, |
|
"learning_rate": 1.9884514297563722e-05, |
|
"loss": 1.048, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3116413593637021, |
|
"grad_norm": 0.45247647166252136, |
|
"learning_rate": 1.9883814684277043e-05, |
|
"loss": 0.9847, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3123644251626898, |
|
"grad_norm": 0.32414931058883667, |
|
"learning_rate": 1.9883112970635812e-05, |
|
"loss": 1.0522, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.31308749096167754, |
|
"grad_norm": 0.32052549719810486, |
|
"learning_rate": 1.988240915678916e-05, |
|
"loss": 1.2259, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3138105567606652, |
|
"grad_norm": 0.5931859016418457, |
|
"learning_rate": 1.9881703242886635e-05, |
|
"loss": 1.1631, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.31453362255965295, |
|
"grad_norm": 0.34109485149383545, |
|
"learning_rate": 1.9880995229078253e-05, |
|
"loss": 1.1199, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3152566883586406, |
|
"grad_norm": 0.5068467855453491, |
|
"learning_rate": 1.988028511551447e-05, |
|
"loss": 1.0753, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.31597975415762836, |
|
"grad_norm": 0.3728184103965759, |
|
"learning_rate": 1.987957290234619e-05, |
|
"loss": 1.075, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.31670281995661603, |
|
"grad_norm": 0.3531060814857483, |
|
"learning_rate": 1.987885858972476e-05, |
|
"loss": 1.0648, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.31742588575560376, |
|
"grad_norm": 0.39028334617614746, |
|
"learning_rate": 1.9878142177801977e-05, |
|
"loss": 1.1465, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.31814895155459144, |
|
"grad_norm": 0.4468533992767334, |
|
"learning_rate": 1.9877423666730075e-05, |
|
"loss": 1.0639, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3188720173535792, |
|
"grad_norm": 0.4782250225543976, |
|
"learning_rate": 1.9876703056661748e-05, |
|
"loss": 1.0668, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3195950831525669, |
|
"grad_norm": 0.321390837430954, |
|
"learning_rate": 1.9875980347750125e-05, |
|
"loss": 1.1202, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3203181489515546, |
|
"grad_norm": 0.3330671191215515, |
|
"learning_rate": 1.9875255540148787e-05, |
|
"loss": 1.0998, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3210412147505423, |
|
"grad_norm": 0.3717515170574188, |
|
"learning_rate": 1.9874528634011758e-05, |
|
"loss": 1.0561, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.32176428054953, |
|
"grad_norm": 0.42336907982826233, |
|
"learning_rate": 1.9873799629493507e-05, |
|
"loss": 1.0152, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3224873463485177, |
|
"grad_norm": 0.3879169821739197, |
|
"learning_rate": 1.9873068526748957e-05, |
|
"loss": 1.1424, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3232104121475054, |
|
"grad_norm": 0.3264402151107788, |
|
"learning_rate": 1.987233532593346e-05, |
|
"loss": 1.062, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.32393347794649313, |
|
"grad_norm": 0.45311230421066284, |
|
"learning_rate": 1.987160002720283e-05, |
|
"loss": 1.0058, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.32465654374548086, |
|
"grad_norm": 0.3314540982246399, |
|
"learning_rate": 1.9870862630713325e-05, |
|
"loss": 1.0831, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.32537960954446854, |
|
"grad_norm": 0.46991175413131714, |
|
"learning_rate": 1.9870123136621638e-05, |
|
"loss": 1.0709, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32610267534345627, |
|
"grad_norm": 0.344123899936676, |
|
"learning_rate": 1.9869381545084924e-05, |
|
"loss": 1.1122, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.32682574114244395, |
|
"grad_norm": 0.34936752915382385, |
|
"learning_rate": 1.9868637856260764e-05, |
|
"loss": 1.1093, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3275488069414317, |
|
"grad_norm": 0.3147508502006531, |
|
"learning_rate": 1.9867892070307202e-05, |
|
"loss": 0.9859, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.32827187274041936, |
|
"grad_norm": 0.3766098916530609, |
|
"learning_rate": 1.9867144187382718e-05, |
|
"loss": 1.0976, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.3289949385394071, |
|
"grad_norm": 0.3415137827396393, |
|
"learning_rate": 1.986639420764624e-05, |
|
"loss": 1.0321, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3297180043383948, |
|
"grad_norm": 0.34528324007987976, |
|
"learning_rate": 1.9865642131257147e-05, |
|
"loss": 1.0696, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3304410701373825, |
|
"grad_norm": 0.445751428604126, |
|
"learning_rate": 1.986488795837525e-05, |
|
"loss": 1.1925, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.33116413593637023, |
|
"grad_norm": 0.38328754901885986, |
|
"learning_rate": 1.9864131689160822e-05, |
|
"loss": 1.2289, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3318872017353579, |
|
"grad_norm": 0.3823976218700409, |
|
"learning_rate": 1.986337332377457e-05, |
|
"loss": 1.0853, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.33261026753434564, |
|
"grad_norm": 0.467807412147522, |
|
"learning_rate": 1.9862612862377652e-05, |
|
"loss": 1.1639, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.38709041476249695, |
|
"learning_rate": 1.9861850305131666e-05, |
|
"loss": 1.1848, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.33405639913232105, |
|
"grad_norm": 0.36389395594596863, |
|
"learning_rate": 1.986108565219866e-05, |
|
"loss": 1.0158, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3347794649313087, |
|
"grad_norm": 0.31022781133651733, |
|
"learning_rate": 1.986031890374113e-05, |
|
"loss": 1.1638, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.33550253073029646, |
|
"grad_norm": 0.4525332748889923, |
|
"learning_rate": 1.985955005992201e-05, |
|
"loss": 1.0545, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3362255965292842, |
|
"grad_norm": 0.3653198480606079, |
|
"learning_rate": 1.985877912090468e-05, |
|
"loss": 1.1992, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.33694866232827186, |
|
"grad_norm": 0.5307183265686035, |
|
"learning_rate": 1.985800608685297e-05, |
|
"loss": 1.0617, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3376717281272596, |
|
"grad_norm": 0.32720625400543213, |
|
"learning_rate": 1.985723095793116e-05, |
|
"loss": 0.9739, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3383947939262473, |
|
"grad_norm": 0.3342708945274353, |
|
"learning_rate": 1.9856453734303958e-05, |
|
"loss": 0.9406, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.339117859725235, |
|
"grad_norm": 0.5348480939865112, |
|
"learning_rate": 1.9855674416136536e-05, |
|
"loss": 1.123, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3398409255242227, |
|
"grad_norm": 0.37697339057922363, |
|
"learning_rate": 1.9854893003594492e-05, |
|
"loss": 1.1665, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3405639913232104, |
|
"grad_norm": 0.36113324761390686, |
|
"learning_rate": 1.985410949684389e-05, |
|
"loss": 1.0995, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.34128705712219815, |
|
"grad_norm": 0.39146167039871216, |
|
"learning_rate": 1.9853323896051226e-05, |
|
"loss": 1.0418, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3420101229211858, |
|
"grad_norm": 0.36467745900154114, |
|
"learning_rate": 1.9852536201383444e-05, |
|
"loss": 1.0498, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.34273318872017355, |
|
"grad_norm": 0.34970226883888245, |
|
"learning_rate": 1.985174641300793e-05, |
|
"loss": 0.88, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.34345625451916123, |
|
"grad_norm": 0.5417302846908569, |
|
"learning_rate": 1.9850954531092515e-05, |
|
"loss": 1.0795, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.34417932031814896, |
|
"grad_norm": 0.39648255705833435, |
|
"learning_rate": 1.9850160555805485e-05, |
|
"loss": 1.006, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.34490238611713664, |
|
"grad_norm": 0.29076090455055237, |
|
"learning_rate": 1.984936448731556e-05, |
|
"loss": 1.0339, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.34562545191612437, |
|
"grad_norm": 0.32601386308670044, |
|
"learning_rate": 1.9848566325791906e-05, |
|
"loss": 1.1229, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.34634851771511205, |
|
"grad_norm": 0.3112106919288635, |
|
"learning_rate": 1.984776607140414e-05, |
|
"loss": 1.019, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3470715835140998, |
|
"grad_norm": 0.46314576268196106, |
|
"learning_rate": 1.984696372432231e-05, |
|
"loss": 1.0548, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3477946493130875, |
|
"grad_norm": 0.3397153615951538, |
|
"learning_rate": 1.9846159284716933e-05, |
|
"loss": 1.0716, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3485177151120752, |
|
"grad_norm": 0.3064862787723541, |
|
"learning_rate": 1.9845352752758943e-05, |
|
"loss": 1.0731, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3492407809110629, |
|
"grad_norm": 0.3357180953025818, |
|
"learning_rate": 1.984454412861974e-05, |
|
"loss": 1.1419, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3499638467100506, |
|
"grad_norm": 0.34050750732421875, |
|
"learning_rate": 1.9843733412471155e-05, |
|
"loss": 0.9212, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.35068691250903833, |
|
"grad_norm": 0.3766930103302002, |
|
"learning_rate": 1.9842920604485474e-05, |
|
"loss": 1.0685, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.351409978308026, |
|
"grad_norm": 0.39459505677223206, |
|
"learning_rate": 1.9842105704835416e-05, |
|
"loss": 1.0002, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.35213304410701374, |
|
"grad_norm": 0.552499532699585, |
|
"learning_rate": 1.9841288713694155e-05, |
|
"loss": 0.9073, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.35285610990600147, |
|
"grad_norm": 0.34755146503448486, |
|
"learning_rate": 1.9840469631235305e-05, |
|
"loss": 1.0715, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.35357917570498915, |
|
"grad_norm": 0.33920028805732727, |
|
"learning_rate": 1.9839648457632928e-05, |
|
"loss": 1.0243, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3543022415039769, |
|
"grad_norm": 0.3754862844944, |
|
"learning_rate": 1.9838825193061518e-05, |
|
"loss": 1.0662, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.35502530730296455, |
|
"grad_norm": 0.4731397330760956, |
|
"learning_rate": 1.9837999837696028e-05, |
|
"loss": 1.1542, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3557483731019523, |
|
"grad_norm": 0.3418963551521301, |
|
"learning_rate": 1.983717239171185e-05, |
|
"loss": 1.1911, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.35647143890093996, |
|
"grad_norm": 0.40142807364463806, |
|
"learning_rate": 1.9836342855284817e-05, |
|
"loss": 1.0625, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3571945046999277, |
|
"grad_norm": 0.856560468673706, |
|
"learning_rate": 1.9835511228591214e-05, |
|
"loss": 1.0711, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.3579175704989154, |
|
"grad_norm": 0.4791191816329956, |
|
"learning_rate": 1.983467751180776e-05, |
|
"loss": 1.0933, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3586406362979031, |
|
"grad_norm": 0.41239994764328003, |
|
"learning_rate": 1.983384170511163e-05, |
|
"loss": 1.0406, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.35936370209689084, |
|
"grad_norm": 0.5610681772232056, |
|
"learning_rate": 1.983300380868043e-05, |
|
"loss": 1.1115, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.3600867678958785, |
|
"grad_norm": 0.46288248896598816, |
|
"learning_rate": 1.9832163822692217e-05, |
|
"loss": 0.975, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.36080983369486624, |
|
"grad_norm": 0.4447326958179474, |
|
"learning_rate": 1.98313217473255e-05, |
|
"loss": 1.1229, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3615328994938539, |
|
"grad_norm": 0.3198843002319336, |
|
"learning_rate": 1.9830477582759213e-05, |
|
"loss": 1.1134, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36225596529284165, |
|
"grad_norm": 0.5029221177101135, |
|
"learning_rate": 1.9829631329172754e-05, |
|
"loss": 1.0069, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.36297903109182933, |
|
"grad_norm": 0.3619605302810669, |
|
"learning_rate": 1.982878298674595e-05, |
|
"loss": 1.114, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.36370209689081706, |
|
"grad_norm": 0.9314847588539124, |
|
"learning_rate": 1.9827932555659076e-05, |
|
"loss": 1.1359, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.3644251626898048, |
|
"grad_norm": 0.47591856122016907, |
|
"learning_rate": 1.9827080036092857e-05, |
|
"loss": 0.9483, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.36514822848879247, |
|
"grad_norm": 0.4096466600894928, |
|
"learning_rate": 1.9826225428228455e-05, |
|
"loss": 1.1312, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3658712942877802, |
|
"grad_norm": 0.4787607192993164, |
|
"learning_rate": 1.982536873224748e-05, |
|
"loss": 0.9659, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.3665943600867679, |
|
"grad_norm": 0.4249323308467865, |
|
"learning_rate": 1.9824509948331983e-05, |
|
"loss": 1.1238, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.3673174258857556, |
|
"grad_norm": 0.6485395431518555, |
|
"learning_rate": 1.9823649076664456e-05, |
|
"loss": 1.094, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3680404916847433, |
|
"grad_norm": 0.38700076937675476, |
|
"learning_rate": 1.982278611742784e-05, |
|
"loss": 1.0641, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.368763557483731, |
|
"grad_norm": 0.33102619647979736, |
|
"learning_rate": 1.9821921070805522e-05, |
|
"loss": 1.0633, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.36948662328271875, |
|
"grad_norm": 0.36750614643096924, |
|
"learning_rate": 1.982105393698132e-05, |
|
"loss": 1.0691, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.37020968908170643, |
|
"grad_norm": 0.5264472365379333, |
|
"learning_rate": 1.9820184716139513e-05, |
|
"loss": 1.0395, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.37093275488069416, |
|
"grad_norm": 0.333700567483902, |
|
"learning_rate": 1.9819313408464804e-05, |
|
"loss": 1.0712, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.37165582067968184, |
|
"grad_norm": 0.3045554757118225, |
|
"learning_rate": 1.9818440014142363e-05, |
|
"loss": 1.0887, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.37237888647866957, |
|
"grad_norm": 0.4070206880569458, |
|
"learning_rate": 1.9817564533357775e-05, |
|
"loss": 0.9634, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.37310195227765725, |
|
"grad_norm": 0.3737180829048157, |
|
"learning_rate": 1.9816686966297095e-05, |
|
"loss": 1.0509, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.373825018076645, |
|
"grad_norm": 0.40559202432632446, |
|
"learning_rate": 1.9815807313146803e-05, |
|
"loss": 1.1261, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.3745480838756327, |
|
"grad_norm": 0.36162590980529785, |
|
"learning_rate": 1.9814925574093836e-05, |
|
"loss": 0.9911, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.3752711496746204, |
|
"grad_norm": 0.355801522731781, |
|
"learning_rate": 1.981404174932556e-05, |
|
"loss": 1.0928, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.3759942154736081, |
|
"grad_norm": 0.4543927311897278, |
|
"learning_rate": 1.98131558390298e-05, |
|
"loss": 1.0528, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3767172812725958, |
|
"grad_norm": 0.3490901291370392, |
|
"learning_rate": 1.981226784339481e-05, |
|
"loss": 1.1732, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3774403470715835, |
|
"grad_norm": 0.3499595820903778, |
|
"learning_rate": 1.981137776260929e-05, |
|
"loss": 1.0, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.3781634128705712, |
|
"grad_norm": 0.3718455731868744, |
|
"learning_rate": 1.981048559686239e-05, |
|
"loss": 1.0863, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.37888647866955893, |
|
"grad_norm": 0.5918817520141602, |
|
"learning_rate": 1.9809591346343705e-05, |
|
"loss": 1.0692, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3796095444685466, |
|
"grad_norm": 0.32191401720046997, |
|
"learning_rate": 1.980869501124326e-05, |
|
"loss": 1.0125, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.38033261026753434, |
|
"grad_norm": 0.30539965629577637, |
|
"learning_rate": 1.9807796591751535e-05, |
|
"loss": 0.9828, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3810556760665221, |
|
"grad_norm": 0.28572794795036316, |
|
"learning_rate": 1.980689608805944e-05, |
|
"loss": 1.0433, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.38177874186550975, |
|
"grad_norm": 0.4277295768260956, |
|
"learning_rate": 1.980599350035834e-05, |
|
"loss": 0.9482, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.3825018076644975, |
|
"grad_norm": 0.49444156885147095, |
|
"learning_rate": 1.9805088828840043e-05, |
|
"loss": 1.0394, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.38322487346348516, |
|
"grad_norm": 0.3143816888332367, |
|
"learning_rate": 1.9804182073696793e-05, |
|
"loss": 0.9569, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3839479392624729, |
|
"grad_norm": 0.7650742530822754, |
|
"learning_rate": 1.980327323512128e-05, |
|
"loss": 0.8502, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.38467100506146057, |
|
"grad_norm": 0.4131964147090912, |
|
"learning_rate": 1.9802362313306633e-05, |
|
"loss": 1.0786, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.3853940708604483, |
|
"grad_norm": 0.4136810600757599, |
|
"learning_rate": 1.9801449308446428e-05, |
|
"loss": 1.1747, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.38611713665943603, |
|
"grad_norm": 0.7104756832122803, |
|
"learning_rate": 1.980053422073469e-05, |
|
"loss": 1.0974, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3868402024584237, |
|
"grad_norm": 0.31901639699935913, |
|
"learning_rate": 1.979961705036587e-05, |
|
"loss": 1.0914, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.38756326825741144, |
|
"grad_norm": 0.4051487147808075, |
|
"learning_rate": 1.9798697797534875e-05, |
|
"loss": 0.9603, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.3882863340563991, |
|
"grad_norm": 0.45901933312416077, |
|
"learning_rate": 1.9797776462437048e-05, |
|
"loss": 1.02, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.38900939985538685, |
|
"grad_norm": 0.32152485847473145, |
|
"learning_rate": 1.9796853045268177e-05, |
|
"loss": 1.0367, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.3897324656543745, |
|
"grad_norm": 0.3409494161605835, |
|
"learning_rate": 1.9795927546224495e-05, |
|
"loss": 1.0584, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.39045553145336226, |
|
"grad_norm": 0.36208903789520264, |
|
"learning_rate": 1.979499996550267e-05, |
|
"loss": 1.1319, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.39117859725235, |
|
"grad_norm": 0.4814053177833557, |
|
"learning_rate": 1.9794070303299824e-05, |
|
"loss": 1.1161, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.39190166305133767, |
|
"grad_norm": 0.5428364276885986, |
|
"learning_rate": 1.979313855981351e-05, |
|
"loss": 1.0326, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3926247288503254, |
|
"grad_norm": 0.336286336183548, |
|
"learning_rate": 1.9792204735241726e-05, |
|
"loss": 0.9927, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.3933477946493131, |
|
"grad_norm": 0.38685551285743713, |
|
"learning_rate": 1.979126882978292e-05, |
|
"loss": 1.0352, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.3940708604483008, |
|
"grad_norm": 0.4633883833885193, |
|
"learning_rate": 1.9790330843635967e-05, |
|
"loss": 1.0327, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3947939262472885, |
|
"grad_norm": 0.35226261615753174, |
|
"learning_rate": 1.97893907770002e-05, |
|
"loss": 1.1259, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.3955169920462762, |
|
"grad_norm": 0.4218463897705078, |
|
"learning_rate": 1.9788448630075385e-05, |
|
"loss": 1.1796, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.3962400578452639, |
|
"grad_norm": 0.36684003472328186, |
|
"learning_rate": 1.9787504403061733e-05, |
|
"loss": 1.134, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.3969631236442516, |
|
"grad_norm": 0.3835614025592804, |
|
"learning_rate": 1.97865580961599e-05, |
|
"loss": 1.0669, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.39768618944323936, |
|
"grad_norm": 0.36046484112739563, |
|
"learning_rate": 1.9785609709570973e-05, |
|
"loss": 1.1683, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.39840925524222703, |
|
"grad_norm": 0.4382922649383545, |
|
"learning_rate": 1.9784659243496492e-05, |
|
"loss": 1.0883, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.39913232104121477, |
|
"grad_norm": 0.343426913022995, |
|
"learning_rate": 1.9783706698138438e-05, |
|
"loss": 1.1423, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.39985538684020244, |
|
"grad_norm": 0.4072953760623932, |
|
"learning_rate": 1.9782752073699224e-05, |
|
"loss": 1.1642, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.4005784526391902, |
|
"grad_norm": 0.3550209701061249, |
|
"learning_rate": 1.978179537038172e-05, |
|
"loss": 1.0956, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.40130151843817785, |
|
"grad_norm": 0.4303446412086487, |
|
"learning_rate": 1.9780836588389225e-05, |
|
"loss": 1.0257, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.4020245842371656, |
|
"grad_norm": 0.3410395681858063, |
|
"learning_rate": 1.9779875727925487e-05, |
|
"loss": 0.9585, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4027476500361533, |
|
"grad_norm": 0.5033888816833496, |
|
"learning_rate": 1.9778912789194692e-05, |
|
"loss": 1.0376, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.403470715835141, |
|
"grad_norm": 0.4729475677013397, |
|
"learning_rate": 1.9777947772401468e-05, |
|
"loss": 1.1985, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4041937816341287, |
|
"grad_norm": 0.36693134903907776, |
|
"learning_rate": 1.9776980677750884e-05, |
|
"loss": 1.1011, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4049168474331164, |
|
"grad_norm": 0.49466729164123535, |
|
"learning_rate": 1.9776011505448455e-05, |
|
"loss": 0.9232, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.40563991323210413, |
|
"grad_norm": 0.3103843033313751, |
|
"learning_rate": 1.9775040255700137e-05, |
|
"loss": 0.9774, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4063629790310918, |
|
"grad_norm": 0.3624059855937958, |
|
"learning_rate": 1.9774066928712315e-05, |
|
"loss": 1.0841, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.40708604483007954, |
|
"grad_norm": 0.4419246017932892, |
|
"learning_rate": 1.9773091524691833e-05, |
|
"loss": 1.1977, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4078091106290672, |
|
"grad_norm": 0.43419817090034485, |
|
"learning_rate": 1.9772114043845968e-05, |
|
"loss": 1.0559, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.40853217642805495, |
|
"grad_norm": 0.5691271424293518, |
|
"learning_rate": 1.9771134486382436e-05, |
|
"loss": 1.1231, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4092552422270427, |
|
"grad_norm": 0.366300493478775, |
|
"learning_rate": 1.9770152852509403e-05, |
|
"loss": 1.1711, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.40997830802603036, |
|
"grad_norm": 0.38072991371154785, |
|
"learning_rate": 1.9769169142435463e-05, |
|
"loss": 1.0633, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4107013738250181, |
|
"grad_norm": 0.33428722620010376, |
|
"learning_rate": 1.9768183356369666e-05, |
|
"loss": 1.0056, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.41142443962400577, |
|
"grad_norm": 0.31342649459838867, |
|
"learning_rate": 1.9767195494521493e-05, |
|
"loss": 0.9913, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.4121475054229935, |
|
"grad_norm": 0.3302362561225891, |
|
"learning_rate": 1.976620555710087e-05, |
|
"loss": 1.0847, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4128705712219812, |
|
"grad_norm": 0.41525211930274963, |
|
"learning_rate": 1.976521354431816e-05, |
|
"loss": 1.081, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4135936370209689, |
|
"grad_norm": 0.39214402437210083, |
|
"learning_rate": 1.976421945638417e-05, |
|
"loss": 1.0983, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.41431670281995664, |
|
"grad_norm": 0.5267409086227417, |
|
"learning_rate": 1.976322329351015e-05, |
|
"loss": 1.0452, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4150397686189443, |
|
"grad_norm": 0.40632006525993347, |
|
"learning_rate": 1.976222505590779e-05, |
|
"loss": 1.1461, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.41576283441793205, |
|
"grad_norm": 0.43246760964393616, |
|
"learning_rate": 1.976122474378922e-05, |
|
"loss": 1.1012, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4164859002169197, |
|
"grad_norm": 0.6482414603233337, |
|
"learning_rate": 1.976022235736701e-05, |
|
"loss": 1.0559, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.41720896601590746, |
|
"grad_norm": 0.381939560174942, |
|
"learning_rate": 1.9759217896854167e-05, |
|
"loss": 1.0672, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.41793203181489513, |
|
"grad_norm": 0.32086628675460815, |
|
"learning_rate": 1.9758211362464155e-05, |
|
"loss": 1.1237, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.41865509761388287, |
|
"grad_norm": 0.5381978750228882, |
|
"learning_rate": 1.9757202754410857e-05, |
|
"loss": 1.1335, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4193781634128706, |
|
"grad_norm": 0.3503972589969635, |
|
"learning_rate": 1.9756192072908605e-05, |
|
"loss": 1.0443, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4201012292118583, |
|
"grad_norm": 0.3661491572856903, |
|
"learning_rate": 1.975517931817218e-05, |
|
"loss": 1.1533, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.420824295010846, |
|
"grad_norm": 0.36555972695350647, |
|
"learning_rate": 1.9754164490416796e-05, |
|
"loss": 1.1034, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4215473608098337, |
|
"grad_norm": 0.7466145157814026, |
|
"learning_rate": 1.975314758985811e-05, |
|
"loss": 1.011, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4222704266088214, |
|
"grad_norm": 0.4798685610294342, |
|
"learning_rate": 1.975212861671221e-05, |
|
"loss": 1.1509, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.4229934924078091, |
|
"grad_norm": 0.4475909471511841, |
|
"learning_rate": 1.975110757119564e-05, |
|
"loss": 1.0896, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4237165582067968, |
|
"grad_norm": 0.6410030126571655, |
|
"learning_rate": 1.9750084453525372e-05, |
|
"loss": 1.0774, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4244396240057845, |
|
"grad_norm": 0.34272611141204834, |
|
"learning_rate": 1.9749059263918825e-05, |
|
"loss": 1.1316, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.42516268980477223, |
|
"grad_norm": 0.3565758168697357, |
|
"learning_rate": 1.9748032002593854e-05, |
|
"loss": 1.0329, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.42588575560375996, |
|
"grad_norm": 0.40375301241874695, |
|
"learning_rate": 1.9747002669768763e-05, |
|
"loss": 0.9806, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.42660882140274764, |
|
"grad_norm": 0.2997436225414276, |
|
"learning_rate": 1.9745971265662286e-05, |
|
"loss": 1.0032, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.42733188720173537, |
|
"grad_norm": 0.36661413311958313, |
|
"learning_rate": 1.9744937790493595e-05, |
|
"loss": 1.0444, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.42805495300072305, |
|
"grad_norm": 0.5585591793060303, |
|
"learning_rate": 1.974390224448232e-05, |
|
"loss": 0.9029, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.4287780187997108, |
|
"grad_norm": 0.38664501905441284, |
|
"learning_rate": 1.974286462784851e-05, |
|
"loss": 1.0633, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.42950108459869846, |
|
"grad_norm": 0.6772089004516602, |
|
"learning_rate": 1.9741824940812664e-05, |
|
"loss": 1.1014, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4302241503976862, |
|
"grad_norm": 0.42573368549346924, |
|
"learning_rate": 1.9740783183595726e-05, |
|
"loss": 0.8741, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4309472161966739, |
|
"grad_norm": 0.6061074137687683, |
|
"learning_rate": 1.973973935641907e-05, |
|
"loss": 1.1427, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.4316702819956616, |
|
"grad_norm": 0.45339277386665344, |
|
"learning_rate": 1.9738693459504514e-05, |
|
"loss": 1.0578, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.43239334779464933, |
|
"grad_norm": 0.4401942193508148, |
|
"learning_rate": 1.9737645493074313e-05, |
|
"loss": 1.0383, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.433116413593637, |
|
"grad_norm": 0.37971097230911255, |
|
"learning_rate": 1.9736595457351167e-05, |
|
"loss": 1.1324, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.43383947939262474, |
|
"grad_norm": 0.5753974914550781, |
|
"learning_rate": 1.973554335255822e-05, |
|
"loss": 1.1619, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4345625451916124, |
|
"grad_norm": 0.39130666851997375, |
|
"learning_rate": 1.973448917891904e-05, |
|
"loss": 1.1005, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.43528561099060015, |
|
"grad_norm": 0.48390281200408936, |
|
"learning_rate": 1.9733432936657643e-05, |
|
"loss": 1.0415, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4360086767895879, |
|
"grad_norm": 0.5121054649353027, |
|
"learning_rate": 1.973237462599849e-05, |
|
"loss": 1.0392, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.43673174258857556, |
|
"grad_norm": 0.4659397602081299, |
|
"learning_rate": 1.9731314247166474e-05, |
|
"loss": 1.0161, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.4374548083875633, |
|
"grad_norm": 0.5566121339797974, |
|
"learning_rate": 1.973025180038693e-05, |
|
"loss": 1.2346, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.43817787418655096, |
|
"grad_norm": 0.4821203052997589, |
|
"learning_rate": 1.9729187285885636e-05, |
|
"loss": 1.0823, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.4389009399855387, |
|
"grad_norm": 0.47735702991485596, |
|
"learning_rate": 1.9728120703888804e-05, |
|
"loss": 1.0736, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4396240057845264, |
|
"grad_norm": 0.34330862760543823, |
|
"learning_rate": 1.9727052054623086e-05, |
|
"loss": 1.1462, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.4403470715835141, |
|
"grad_norm": 0.36775317788124084, |
|
"learning_rate": 1.972598133831558e-05, |
|
"loss": 1.1986, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4410701373825018, |
|
"grad_norm": 0.329261839389801, |
|
"learning_rate": 1.972490855519381e-05, |
|
"loss": 1.1152, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4417932031814895, |
|
"grad_norm": 0.3638715445995331, |
|
"learning_rate": 1.9723833705485752e-05, |
|
"loss": 0.9042, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.44251626898047725, |
|
"grad_norm": 0.3313756585121155, |
|
"learning_rate": 1.9722756789419816e-05, |
|
"loss": 1.1223, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4432393347794649, |
|
"grad_norm": 0.46972256898880005, |
|
"learning_rate": 1.9721677807224853e-05, |
|
"loss": 0.9796, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.44396240057845265, |
|
"grad_norm": 0.37405598163604736, |
|
"learning_rate": 1.9720596759130146e-05, |
|
"loss": 0.9614, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.44468546637744033, |
|
"grad_norm": 0.4029143154621124, |
|
"learning_rate": 1.9719513645365426e-05, |
|
"loss": 1.1846, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.44540853217642806, |
|
"grad_norm": 0.3397390842437744, |
|
"learning_rate": 1.9718428466160863e-05, |
|
"loss": 1.2796, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.44613159797541574, |
|
"grad_norm": 0.43354395031929016, |
|
"learning_rate": 1.9717341221747056e-05, |
|
"loss": 1.1272, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.44685466377440347, |
|
"grad_norm": 0.3265979290008545, |
|
"learning_rate": 1.9716251912355053e-05, |
|
"loss": 0.9609, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4475777295733912, |
|
"grad_norm": 0.4178304374217987, |
|
"learning_rate": 1.9715160538216337e-05, |
|
"loss": 0.9845, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4483007953723789, |
|
"grad_norm": 0.4773651957511902, |
|
"learning_rate": 1.971406709956283e-05, |
|
"loss": 1.1367, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4490238611713666, |
|
"grad_norm": 0.5595555901527405, |
|
"learning_rate": 1.9712971596626894e-05, |
|
"loss": 1.1154, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4497469269703543, |
|
"grad_norm": 0.5785884261131287, |
|
"learning_rate": 1.971187402964132e-05, |
|
"loss": 1.2254, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.450469992769342, |
|
"grad_norm": 0.38726550340652466, |
|
"learning_rate": 1.9710774398839354e-05, |
|
"loss": 1.0962, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4511930585683297, |
|
"grad_norm": 0.3972480893135071, |
|
"learning_rate": 1.970967270445467e-05, |
|
"loss": 1.0462, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.45191612436731743, |
|
"grad_norm": 0.3542553186416626, |
|
"learning_rate": 1.970856894672139e-05, |
|
"loss": 1.139, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.45263919016630516, |
|
"grad_norm": 0.3438204526901245, |
|
"learning_rate": 1.9707463125874052e-05, |
|
"loss": 1.0925, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.45336225596529284, |
|
"grad_norm": 0.3725246489048004, |
|
"learning_rate": 1.9706355242147656e-05, |
|
"loss": 1.1189, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.45408532176428057, |
|
"grad_norm": 0.5289852619171143, |
|
"learning_rate": 1.9705245295777636e-05, |
|
"loss": 1.0969, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.45480838756326825, |
|
"grad_norm": 0.3476713001728058, |
|
"learning_rate": 1.970413328699986e-05, |
|
"loss": 1.1203, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.455531453362256, |
|
"grad_norm": 0.4541453719139099, |
|
"learning_rate": 1.9703019216050627e-05, |
|
"loss": 0.9669, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.45625451916124365, |
|
"grad_norm": 0.3815220296382904, |
|
"learning_rate": 1.9701903083166692e-05, |
|
"loss": 1.0995, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.4569775849602314, |
|
"grad_norm": 0.5542870759963989, |
|
"learning_rate": 1.970078488858523e-05, |
|
"loss": 1.069, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.45770065075921906, |
|
"grad_norm": 0.4734939634799957, |
|
"learning_rate": 1.9699664632543868e-05, |
|
"loss": 1.0801, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.4584237165582068, |
|
"grad_norm": 0.45855480432510376, |
|
"learning_rate": 1.9698542315280658e-05, |
|
"loss": 1.1013, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.4591467823571945, |
|
"grad_norm": 0.48604539036750793, |
|
"learning_rate": 1.9697417937034106e-05, |
|
"loss": 1.0505, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4598698481561822, |
|
"grad_norm": 0.4221401810646057, |
|
"learning_rate": 1.9696291498043144e-05, |
|
"loss": 1.0743, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.46059291395516994, |
|
"grad_norm": 0.39816153049468994, |
|
"learning_rate": 1.9695162998547145e-05, |
|
"loss": 1.0368, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.4613159797541576, |
|
"grad_norm": 0.3210352957248688, |
|
"learning_rate": 1.969403243878592e-05, |
|
"loss": 0.9725, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.46203904555314534, |
|
"grad_norm": 0.332942932844162, |
|
"learning_rate": 1.969289981899972e-05, |
|
"loss": 1.0431, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.462762111352133, |
|
"grad_norm": 0.5648030638694763, |
|
"learning_rate": 1.9691765139429227e-05, |
|
"loss": 1.0316, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46348517715112075, |
|
"grad_norm": 0.3773210048675537, |
|
"learning_rate": 1.969062840031557e-05, |
|
"loss": 0.9669, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.4642082429501085, |
|
"grad_norm": 0.37246161699295044, |
|
"learning_rate": 1.968948960190031e-05, |
|
"loss": 1.0267, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.46493130874909616, |
|
"grad_norm": 0.30961740016937256, |
|
"learning_rate": 1.9688348744425443e-05, |
|
"loss": 1.0467, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.4656543745480839, |
|
"grad_norm": 0.4040377736091614, |
|
"learning_rate": 1.968720582813341e-05, |
|
"loss": 1.0727, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.46637744034707157, |
|
"grad_norm": 0.37091973423957825, |
|
"learning_rate": 1.9686060853267088e-05, |
|
"loss": 1.1161, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4671005061460593, |
|
"grad_norm": 0.33641329407691956, |
|
"learning_rate": 1.9684913820069785e-05, |
|
"loss": 0.9528, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.467823571945047, |
|
"grad_norm": 0.36881205439567566, |
|
"learning_rate": 1.9683764728785255e-05, |
|
"loss": 1.0615, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.4685466377440347, |
|
"grad_norm": 0.37982848286628723, |
|
"learning_rate": 1.968261357965768e-05, |
|
"loss": 1.0018, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.4692697035430224, |
|
"grad_norm": 0.4756353497505188, |
|
"learning_rate": 1.9681460372931688e-05, |
|
"loss": 1.0773, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.4699927693420101, |
|
"grad_norm": 0.4749247133731842, |
|
"learning_rate": 1.9680305108852335e-05, |
|
"loss": 0.9998, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.47071583514099785, |
|
"grad_norm": 0.309193879365921, |
|
"learning_rate": 1.9679147787665128e-05, |
|
"loss": 1.0227, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.47143890093998553, |
|
"grad_norm": 0.44455206394195557, |
|
"learning_rate": 1.9677988409615996e-05, |
|
"loss": 1.1575, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.47216196673897326, |
|
"grad_norm": 0.3658543527126312, |
|
"learning_rate": 1.9676826974951316e-05, |
|
"loss": 1.1213, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.47288503253796094, |
|
"grad_norm": 0.3084392249584198, |
|
"learning_rate": 1.9675663483917896e-05, |
|
"loss": 0.9861, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.47360809833694867, |
|
"grad_norm": 0.39952704310417175, |
|
"learning_rate": 1.9674497936762984e-05, |
|
"loss": 1.1173, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.47433116413593635, |
|
"grad_norm": 0.37585368752479553, |
|
"learning_rate": 1.9673330333734263e-05, |
|
"loss": 1.0684, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.4750542299349241, |
|
"grad_norm": 0.4608975946903229, |
|
"learning_rate": 1.9672160675079857e-05, |
|
"loss": 1.0324, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.4757772957339118, |
|
"grad_norm": 0.3381264805793762, |
|
"learning_rate": 1.9670988961048318e-05, |
|
"loss": 1.0594, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.4765003615328995, |
|
"grad_norm": 0.402926504611969, |
|
"learning_rate": 1.9669815191888647e-05, |
|
"loss": 1.0805, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.4772234273318872, |
|
"grad_norm": 0.4931265115737915, |
|
"learning_rate": 1.966863936785027e-05, |
|
"loss": 0.9502, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4779464931308749, |
|
"grad_norm": 0.3112112879753113, |
|
"learning_rate": 1.9667461489183056e-05, |
|
"loss": 1.081, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.4786695589298626, |
|
"grad_norm": 0.4174824059009552, |
|
"learning_rate": 1.9666281556137313e-05, |
|
"loss": 0.9463, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.4793926247288503, |
|
"grad_norm": 0.45544588565826416, |
|
"learning_rate": 1.9665099568963777e-05, |
|
"loss": 0.9999, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.48011569052783803, |
|
"grad_norm": 0.3407338559627533, |
|
"learning_rate": 1.9663915527913628e-05, |
|
"loss": 1.0472, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.48083875632682577, |
|
"grad_norm": 0.4073576033115387, |
|
"learning_rate": 1.9662729433238477e-05, |
|
"loss": 1.1324, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.48156182212581344, |
|
"grad_norm": 0.3806203305721283, |
|
"learning_rate": 1.966154128519038e-05, |
|
"loss": 0.9075, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.4822848879248012, |
|
"grad_norm": 0.8510825634002686, |
|
"learning_rate": 1.966035108402182e-05, |
|
"loss": 1.0489, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.48300795372378885, |
|
"grad_norm": 0.36897632479667664, |
|
"learning_rate": 1.965915882998572e-05, |
|
"loss": 1.0699, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.4837310195227766, |
|
"grad_norm": 0.6465381979942322, |
|
"learning_rate": 1.9657964523335443e-05, |
|
"loss": 1.1132, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.48445408532176426, |
|
"grad_norm": 0.5258365273475647, |
|
"learning_rate": 1.965676816432478e-05, |
|
"loss": 1.0174, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.485177151120752, |
|
"grad_norm": 0.5828375816345215, |
|
"learning_rate": 1.9655569753207962e-05, |
|
"loss": 1.106, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.48590021691973967, |
|
"grad_norm": 0.32821497321128845, |
|
"learning_rate": 1.965436929023966e-05, |
|
"loss": 0.9611, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4866232827187274, |
|
"grad_norm": 0.37569311261177063, |
|
"learning_rate": 1.9653166775674976e-05, |
|
"loss": 1.0434, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.48734634851771513, |
|
"grad_norm": 0.41483476758003235, |
|
"learning_rate": 1.965196220976945e-05, |
|
"loss": 1.1067, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.4880694143167028, |
|
"grad_norm": 0.5044158697128296, |
|
"learning_rate": 1.965075559277906e-05, |
|
"loss": 1.1482, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.48879248011569054, |
|
"grad_norm": 0.4230242371559143, |
|
"learning_rate": 1.9649546924960217e-05, |
|
"loss": 0.8987, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.4895155459146782, |
|
"grad_norm": 0.5350583791732788, |
|
"learning_rate": 1.964833620656976e-05, |
|
"loss": 1.0624, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.49023861171366595, |
|
"grad_norm": 0.3142975866794586, |
|
"learning_rate": 1.9647123437864985e-05, |
|
"loss": 1.1541, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.4909616775126536, |
|
"grad_norm": 0.44349947571754456, |
|
"learning_rate": 1.96459086191036e-05, |
|
"loss": 1.1184, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.49168474331164136, |
|
"grad_norm": 0.36088117957115173, |
|
"learning_rate": 1.964469175054377e-05, |
|
"loss": 1.0658, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4924078091106291, |
|
"grad_norm": 0.37263569235801697, |
|
"learning_rate": 1.964347283244407e-05, |
|
"loss": 0.9996, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.49313087490961677, |
|
"grad_norm": 0.34794577956199646, |
|
"learning_rate": 1.964225186506354e-05, |
|
"loss": 1.119, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.4938539407086045, |
|
"grad_norm": 0.3667242228984833, |
|
"learning_rate": 1.9641028848661633e-05, |
|
"loss": 1.1527, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.4945770065075922, |
|
"grad_norm": 0.3178524076938629, |
|
"learning_rate": 1.963980378349825e-05, |
|
"loss": 1.1277, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.4953000723065799, |
|
"grad_norm": 0.669657289981842, |
|
"learning_rate": 1.963857666983372e-05, |
|
"loss": 1.026, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.4960231381055676, |
|
"grad_norm": 0.4365706145763397, |
|
"learning_rate": 1.963734750792881e-05, |
|
"loss": 1.0566, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.4967462039045553, |
|
"grad_norm": 0.3377775549888611, |
|
"learning_rate": 1.963611629804472e-05, |
|
"loss": 1.0243, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.49746926970354305, |
|
"grad_norm": 0.49133753776550293, |
|
"learning_rate": 1.9634883040443093e-05, |
|
"loss": 1.0347, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.4981923355025307, |
|
"grad_norm": 0.46029427647590637, |
|
"learning_rate": 1.9633647735386002e-05, |
|
"loss": 1.2015, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.49891540130151846, |
|
"grad_norm": 0.4478205442428589, |
|
"learning_rate": 1.9632410383135946e-05, |
|
"loss": 1.2785, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.49963846710050613, |
|
"grad_norm": 0.44710350036621094, |
|
"learning_rate": 1.9631170983955878e-05, |
|
"loss": 1.0269, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5003615328994938, |
|
"grad_norm": 0.5935525298118591, |
|
"learning_rate": 1.9629929538109175e-05, |
|
"loss": 0.8842, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5003615328994938, |
|
"eval_loss": 1.0690364837646484, |
|
"eval_runtime": 669.5491, |
|
"eval_samples_per_second": 3.48, |
|
"eval_steps_per_second": 0.871, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5010845986984815, |
|
"grad_norm": 0.7159201502799988, |
|
"learning_rate": 1.962868604585964e-05, |
|
"loss": 1.0409, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5018076644974693, |
|
"grad_norm": 0.5066662430763245, |
|
"learning_rate": 1.962744050747153e-05, |
|
"loss": 1.0734, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.502530730296457, |
|
"grad_norm": 0.44471684098243713, |
|
"learning_rate": 1.9626192923209524e-05, |
|
"loss": 1.1123, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5032537960954447, |
|
"grad_norm": 0.5092771053314209, |
|
"learning_rate": 1.962494329333874e-05, |
|
"loss": 1.0229, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5039768618944324, |
|
"grad_norm": 0.39915162324905396, |
|
"learning_rate": 1.962369161812473e-05, |
|
"loss": 1.0708, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5046999276934201, |
|
"grad_norm": 0.4196009039878845, |
|
"learning_rate": 1.962243789783348e-05, |
|
"loss": 0.9515, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5054229934924078, |
|
"grad_norm": 0.3307042717933655, |
|
"learning_rate": 1.962118213273141e-05, |
|
"loss": 1.021, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5061460592913956, |
|
"grad_norm": 0.40860000252723694, |
|
"learning_rate": 1.961992432308538e-05, |
|
"loss": 1.098, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5068691250903832, |
|
"grad_norm": 0.5225731134414673, |
|
"learning_rate": 1.9618664469162673e-05, |
|
"loss": 1.1183, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5075921908893709, |
|
"grad_norm": 0.3705964684486389, |
|
"learning_rate": 1.9617402571231022e-05, |
|
"loss": 1.0388, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5083152566883586, |
|
"grad_norm": 0.48947831988334656, |
|
"learning_rate": 1.9616138629558577e-05, |
|
"loss": 1.1357, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5090383224873464, |
|
"grad_norm": 0.4139689803123474, |
|
"learning_rate": 1.9614872644413943e-05, |
|
"loss": 1.1402, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5097613882863341, |
|
"grad_norm": 0.5978605151176453, |
|
"learning_rate": 1.9613604616066137e-05, |
|
"loss": 1.0413, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5104844540853217, |
|
"grad_norm": 0.5595225095748901, |
|
"learning_rate": 1.961233454478462e-05, |
|
"loss": 0.9795, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5112075198843095, |
|
"grad_norm": 0.5151244401931763, |
|
"learning_rate": 1.9611062430839296e-05, |
|
"loss": 0.9897, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5119305856832972, |
|
"grad_norm": 0.43460798263549805, |
|
"learning_rate": 1.960978827450049e-05, |
|
"loss": 1.0279, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5126536514822849, |
|
"grad_norm": 0.3416450023651123, |
|
"learning_rate": 1.9608512076038964e-05, |
|
"loss": 1.0831, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5133767172812725, |
|
"grad_norm": 0.4662010371685028, |
|
"learning_rate": 1.960723383572592e-05, |
|
"loss": 1.0736, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5140997830802603, |
|
"grad_norm": 0.3611615300178528, |
|
"learning_rate": 1.9605953553832987e-05, |
|
"loss": 0.9366, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.514822848879248, |
|
"grad_norm": 0.3886420726776123, |
|
"learning_rate": 1.9604671230632234e-05, |
|
"loss": 1.1591, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5155459146782357, |
|
"grad_norm": 0.45299410820007324, |
|
"learning_rate": 1.9603386866396155e-05, |
|
"loss": 1.0887, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5162689804772235, |
|
"grad_norm": 0.3800199627876282, |
|
"learning_rate": 1.960210046139769e-05, |
|
"loss": 1.1232, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5169920462762111, |
|
"grad_norm": 0.5120952129364014, |
|
"learning_rate": 1.9600812015910203e-05, |
|
"loss": 1.1163, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5177151120751988, |
|
"grad_norm": 0.5335928797721863, |
|
"learning_rate": 1.9599521530207492e-05, |
|
"loss": 0.8892, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5184381778741866, |
|
"grad_norm": 0.5247392058372498, |
|
"learning_rate": 1.959822900456379e-05, |
|
"loss": 1.1459, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5191612436731743, |
|
"grad_norm": 0.3226061165332794, |
|
"learning_rate": 1.9596934439253768e-05, |
|
"loss": 0.9743, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.519884309472162, |
|
"grad_norm": 0.616631031036377, |
|
"learning_rate": 1.9595637834552524e-05, |
|
"loss": 1.1154, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5206073752711496, |
|
"grad_norm": 0.48514634370803833, |
|
"learning_rate": 1.9594339190735594e-05, |
|
"loss": 1.0391, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5213304410701374, |
|
"grad_norm": 0.36247485876083374, |
|
"learning_rate": 1.959303850807895e-05, |
|
"loss": 0.85, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5220535068691251, |
|
"grad_norm": 0.36446431279182434, |
|
"learning_rate": 1.9591735786858985e-05, |
|
"loss": 1.063, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5227765726681128, |
|
"grad_norm": 0.44948527216911316, |
|
"learning_rate": 1.9590431027352533e-05, |
|
"loss": 1.032, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5234996384671005, |
|
"grad_norm": 0.612940788269043, |
|
"learning_rate": 1.958912422983687e-05, |
|
"loss": 1.0337, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5242227042660882, |
|
"grad_norm": 0.48580238223075867, |
|
"learning_rate": 1.958781539458969e-05, |
|
"loss": 1.0932, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5249457700650759, |
|
"grad_norm": 0.4745420813560486, |
|
"learning_rate": 1.9586504521889122e-05, |
|
"loss": 1.2802, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5256688358640637, |
|
"grad_norm": 0.35681384801864624, |
|
"learning_rate": 1.9585191612013745e-05, |
|
"loss": 1.1219, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5263919016630514, |
|
"grad_norm": 0.4016458988189697, |
|
"learning_rate": 1.9583876665242548e-05, |
|
"loss": 1.0063, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.527114967462039, |
|
"grad_norm": 0.43212026357650757, |
|
"learning_rate": 1.9582559681854962e-05, |
|
"loss": 1.1472, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5278380332610267, |
|
"grad_norm": 0.4073551595211029, |
|
"learning_rate": 1.958124066213086e-05, |
|
"loss": 0.9998, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5285610990600145, |
|
"grad_norm": 0.4123099446296692, |
|
"learning_rate": 1.957991960635053e-05, |
|
"loss": 1.0065, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5292841648590022, |
|
"grad_norm": 0.3612479567527771, |
|
"learning_rate": 1.9578596514794714e-05, |
|
"loss": 1.1218, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5300072306579898, |
|
"grad_norm": 0.5187839865684509, |
|
"learning_rate": 1.957727138774456e-05, |
|
"loss": 1.0317, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5307302964569776, |
|
"grad_norm": 0.3550787568092346, |
|
"learning_rate": 1.957594422548168e-05, |
|
"loss": 1.0982, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5314533622559653, |
|
"grad_norm": 0.38324612379074097, |
|
"learning_rate": 1.957461502828809e-05, |
|
"loss": 0.9884, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.532176428054953, |
|
"grad_norm": 0.9438675045967102, |
|
"learning_rate": 1.957328379644625e-05, |
|
"loss": 1.1052, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5328994938539408, |
|
"grad_norm": 0.35722121596336365, |
|
"learning_rate": 1.9571950530239062e-05, |
|
"loss": 0.9642, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5336225596529284, |
|
"grad_norm": 0.42087095975875854, |
|
"learning_rate": 1.9570615229949844e-05, |
|
"loss": 1.1394, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5343456254519161, |
|
"grad_norm": 0.4152733087539673, |
|
"learning_rate": 1.956927789586235e-05, |
|
"loss": 1.0182, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5350686912509038, |
|
"grad_norm": 0.4146084785461426, |
|
"learning_rate": 1.9567938528260778e-05, |
|
"loss": 0.981, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5357917570498916, |
|
"grad_norm": 0.32588517665863037, |
|
"learning_rate": 1.9566597127429746e-05, |
|
"loss": 1.0129, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5365148228488793, |
|
"grad_norm": 0.3670799136161804, |
|
"learning_rate": 1.9565253693654307e-05, |
|
"loss": 1.0014, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5372378886478669, |
|
"grad_norm": 0.41809889674186707, |
|
"learning_rate": 1.9563908227219945e-05, |
|
"loss": 0.9734, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5379609544468547, |
|
"grad_norm": 0.37623125314712524, |
|
"learning_rate": 1.956256072841258e-05, |
|
"loss": 1.0281, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5386840202458424, |
|
"grad_norm": 0.4615330100059509, |
|
"learning_rate": 1.9561211197518564e-05, |
|
"loss": 0.8803, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5394070860448301, |
|
"grad_norm": 0.3422069847583771, |
|
"learning_rate": 1.9559859634824675e-05, |
|
"loss": 1.0829, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5401301518438177, |
|
"grad_norm": 0.5453998446464539, |
|
"learning_rate": 1.9558506040618122e-05, |
|
"loss": 1.137, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5408532176428055, |
|
"grad_norm": 0.5272181034088135, |
|
"learning_rate": 1.9557150415186558e-05, |
|
"loss": 1.1079, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5415762834417932, |
|
"grad_norm": 0.5351109504699707, |
|
"learning_rate": 1.9555792758818052e-05, |
|
"loss": 1.0158, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5422993492407809, |
|
"grad_norm": 0.45870500802993774, |
|
"learning_rate": 1.9554433071801117e-05, |
|
"loss": 1.1699, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5430224150397687, |
|
"grad_norm": 0.42589834332466125, |
|
"learning_rate": 1.9553071354424692e-05, |
|
"loss": 1.0275, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5437454808387563, |
|
"grad_norm": 0.43729254603385925, |
|
"learning_rate": 1.955170760697815e-05, |
|
"loss": 0.9456, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.544468546637744, |
|
"grad_norm": 0.39081788063049316, |
|
"learning_rate": 1.9550341829751283e-05, |
|
"loss": 1.0078, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5451916124367318, |
|
"grad_norm": 0.47970667481422424, |
|
"learning_rate": 1.9548974023034337e-05, |
|
"loss": 1.2289, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5459146782357195, |
|
"grad_norm": 0.3771878182888031, |
|
"learning_rate": 1.9547604187117974e-05, |
|
"loss": 0.9818, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5466377440347071, |
|
"grad_norm": 0.3280777335166931, |
|
"learning_rate": 1.9546232322293285e-05, |
|
"loss": 1.0503, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5473608098336948, |
|
"grad_norm": 0.5649731159210205, |
|
"learning_rate": 1.95448584288518e-05, |
|
"loss": 1.0285, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5480838756326826, |
|
"grad_norm": 0.6320711374282837, |
|
"learning_rate": 1.9543482507085484e-05, |
|
"loss": 1.2219, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5488069414316703, |
|
"grad_norm": 0.36972150206565857, |
|
"learning_rate": 1.9542104557286715e-05, |
|
"loss": 1.0396, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.549530007230658, |
|
"grad_norm": 0.6155106425285339, |
|
"learning_rate": 1.9540724579748323e-05, |
|
"loss": 0.9265, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5502530730296457, |
|
"grad_norm": 0.3946453630924225, |
|
"learning_rate": 1.9539342574763554e-05, |
|
"loss": 1.2332, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5509761388286334, |
|
"grad_norm": 0.31569650769233704, |
|
"learning_rate": 1.953795854262609e-05, |
|
"loss": 1.0735, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5516992046276211, |
|
"grad_norm": 0.35076481103897095, |
|
"learning_rate": 1.9536572483630048e-05, |
|
"loss": 1.0889, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5524222704266089, |
|
"grad_norm": 0.39931586384773254, |
|
"learning_rate": 1.953518439806997e-05, |
|
"loss": 0.904, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5531453362255966, |
|
"grad_norm": 0.394999623298645, |
|
"learning_rate": 1.9533794286240828e-05, |
|
"loss": 1.0382, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5538684020245842, |
|
"grad_norm": 0.3732694089412689, |
|
"learning_rate": 1.953240214843803e-05, |
|
"loss": 1.0024, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5545914678235719, |
|
"grad_norm": 0.4705665409564972, |
|
"learning_rate": 1.9531007984957408e-05, |
|
"loss": 1.0037, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5553145336225597, |
|
"grad_norm": 0.3279126286506653, |
|
"learning_rate": 1.9529611796095232e-05, |
|
"loss": 0.9385, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5560375994215474, |
|
"grad_norm": 0.5398396849632263, |
|
"learning_rate": 1.95282135821482e-05, |
|
"loss": 1.0472, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.556760665220535, |
|
"grad_norm": 0.5406724810600281, |
|
"learning_rate": 1.952681334341343e-05, |
|
"loss": 1.0669, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5574837310195228, |
|
"grad_norm": 0.4217100441455841, |
|
"learning_rate": 1.952541108018849e-05, |
|
"loss": 1.1648, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5582067968185105, |
|
"grad_norm": 0.5078068375587463, |
|
"learning_rate": 1.9524006792771354e-05, |
|
"loss": 1.1129, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5589298626174982, |
|
"grad_norm": 0.39175429940223694, |
|
"learning_rate": 1.952260048146045e-05, |
|
"loss": 1.1255, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.559652928416486, |
|
"grad_norm": 0.43805810809135437, |
|
"learning_rate": 1.9521192146554623e-05, |
|
"loss": 1.1986, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5603759942154736, |
|
"grad_norm": 0.3366648554801941, |
|
"learning_rate": 1.9519781788353148e-05, |
|
"loss": 1.1253, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5610990600144613, |
|
"grad_norm": 0.35892999172210693, |
|
"learning_rate": 1.9518369407155732e-05, |
|
"loss": 0.998, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.561822125813449, |
|
"grad_norm": 0.4056641757488251, |
|
"learning_rate": 1.9516955003262517e-05, |
|
"loss": 0.9725, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5625451916124368, |
|
"grad_norm": 0.3325026333332062, |
|
"learning_rate": 1.9515538576974067e-05, |
|
"loss": 1.1256, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5632682574114244, |
|
"grad_norm": 0.4020673930644989, |
|
"learning_rate": 1.951412012859138e-05, |
|
"loss": 1.0631, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5639913232104121, |
|
"grad_norm": 0.3648395836353302, |
|
"learning_rate": 1.9512699658415882e-05, |
|
"loss": 1.1435, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5647143890093999, |
|
"grad_norm": 0.35294806957244873, |
|
"learning_rate": 1.9511277166749425e-05, |
|
"loss": 0.9151, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5654374548083876, |
|
"grad_norm": 0.4098382592201233, |
|
"learning_rate": 1.95098526538943e-05, |
|
"loss": 0.9072, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5661605206073753, |
|
"grad_norm": 0.377547025680542, |
|
"learning_rate": 1.950842612015322e-05, |
|
"loss": 0.8726, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5668835864063629, |
|
"grad_norm": 0.42288947105407715, |
|
"learning_rate": 1.9506997565829335e-05, |
|
"loss": 1.0385, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5676066522053507, |
|
"grad_norm": 0.39082857966423035, |
|
"learning_rate": 1.9505566991226214e-05, |
|
"loss": 0.9935, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5683297180043384, |
|
"grad_norm": 0.3439696729183197, |
|
"learning_rate": 1.950413439664786e-05, |
|
"loss": 1.1061, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.5690527838033261, |
|
"grad_norm": 0.4018382728099823, |
|
"learning_rate": 1.950269978239871e-05, |
|
"loss": 0.9143, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5697758496023138, |
|
"grad_norm": 0.42380473017692566, |
|
"learning_rate": 1.950126314878362e-05, |
|
"loss": 1.1054, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5704989154013015, |
|
"grad_norm": 0.4379919767379761, |
|
"learning_rate": 1.9499824496107883e-05, |
|
"loss": 1.0511, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5712219812002892, |
|
"grad_norm": 0.4412190318107605, |
|
"learning_rate": 1.9498383824677223e-05, |
|
"loss": 1.153, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.571945046999277, |
|
"grad_norm": 0.4430684447288513, |
|
"learning_rate": 1.9496941134797784e-05, |
|
"loss": 0.9391, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5726681127982647, |
|
"grad_norm": 0.3570266664028168, |
|
"learning_rate": 1.9495496426776147e-05, |
|
"loss": 1.1379, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5733911785972523, |
|
"grad_norm": 0.4187745451927185, |
|
"learning_rate": 1.949404970091932e-05, |
|
"loss": 1.0523, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.57411424439624, |
|
"grad_norm": 0.4245019853115082, |
|
"learning_rate": 1.9492600957534735e-05, |
|
"loss": 1.0555, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5748373101952278, |
|
"grad_norm": 0.5078554749488831, |
|
"learning_rate": 1.9491150196930258e-05, |
|
"loss": 1.1153, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5755603759942155, |
|
"grad_norm": 0.4756588041782379, |
|
"learning_rate": 1.948969741941418e-05, |
|
"loss": 1.157, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5762834417932032, |
|
"grad_norm": 0.43571797013282776, |
|
"learning_rate": 1.948824262529523e-05, |
|
"loss": 0.9788, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5770065075921909, |
|
"grad_norm": 0.4313880205154419, |
|
"learning_rate": 1.948678581488255e-05, |
|
"loss": 0.9907, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5777295733911786, |
|
"grad_norm": 0.4783801734447479, |
|
"learning_rate": 1.948532698848572e-05, |
|
"loss": 1.1045, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.5784526391901663, |
|
"grad_norm": 0.41679173707962036, |
|
"learning_rate": 1.9483866146414756e-05, |
|
"loss": 1.1111, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.579175704989154, |
|
"grad_norm": 0.3428262174129486, |
|
"learning_rate": 1.9482403288980082e-05, |
|
"loss": 1.1117, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5798987707881417, |
|
"grad_norm": 0.4037555456161499, |
|
"learning_rate": 1.9480938416492564e-05, |
|
"loss": 1.0466, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5806218365871294, |
|
"grad_norm": 0.423533171415329, |
|
"learning_rate": 1.9479471529263502e-05, |
|
"loss": 1.0328, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.5813449023861171, |
|
"grad_norm": 0.42857813835144043, |
|
"learning_rate": 1.9478002627604605e-05, |
|
"loss": 0.9655, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5820679681851049, |
|
"grad_norm": 0.4149419963359833, |
|
"learning_rate": 1.9476531711828027e-05, |
|
"loss": 1.0997, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5827910339840926, |
|
"grad_norm": 0.5268839001655579, |
|
"learning_rate": 1.9475058782246342e-05, |
|
"loss": 0.9518, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5835140997830802, |
|
"grad_norm": 0.4031262695789337, |
|
"learning_rate": 1.947358383917256e-05, |
|
"loss": 1.0986, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.584237165582068, |
|
"grad_norm": 0.3794477581977844, |
|
"learning_rate": 1.9472106882920103e-05, |
|
"loss": 1.0462, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.5849602313810557, |
|
"grad_norm": 0.4023298919200897, |
|
"learning_rate": 1.947062791380284e-05, |
|
"loss": 0.972, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.5856832971800434, |
|
"grad_norm": 0.3912923038005829, |
|
"learning_rate": 1.946914693213505e-05, |
|
"loss": 1.0204, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.586406362979031, |
|
"grad_norm": 0.4873286187648773, |
|
"learning_rate": 1.946766393823146e-05, |
|
"loss": 1.1627, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.5871294287780188, |
|
"grad_norm": 0.4002951681613922, |
|
"learning_rate": 1.94661789324072e-05, |
|
"loss": 0.9713, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.5878524945770065, |
|
"grad_norm": 0.3794306516647339, |
|
"learning_rate": 1.946469191497785e-05, |
|
"loss": 1.0521, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.5885755603759942, |
|
"grad_norm": 0.5814877152442932, |
|
"learning_rate": 1.9463202886259398e-05, |
|
"loss": 1.0715, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.589298626174982, |
|
"grad_norm": 0.46600183844566345, |
|
"learning_rate": 1.946171184656828e-05, |
|
"loss": 1.0208, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5900216919739696, |
|
"grad_norm": 0.6167263984680176, |
|
"learning_rate": 1.946021879622134e-05, |
|
"loss": 1.0131, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.5907447577729573, |
|
"grad_norm": 0.45610466599464417, |
|
"learning_rate": 1.9458723735535866e-05, |
|
"loss": 1.1696, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.591467823571945, |
|
"grad_norm": 0.3938460052013397, |
|
"learning_rate": 1.9457226664829555e-05, |
|
"loss": 0.9354, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.5921908893709328, |
|
"grad_norm": 0.7263491749763489, |
|
"learning_rate": 1.945572758442055e-05, |
|
"loss": 0.9243, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.5929139551699205, |
|
"grad_norm": 0.39691534638404846, |
|
"learning_rate": 1.945422649462741e-05, |
|
"loss": 1.0144, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5936370209689081, |
|
"grad_norm": 0.4366442561149597, |
|
"learning_rate": 1.9452723395769118e-05, |
|
"loss": 1.1116, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.5943600867678959, |
|
"grad_norm": 0.5859283208847046, |
|
"learning_rate": 1.9451218288165098e-05, |
|
"loss": 0.8539, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.5950831525668836, |
|
"grad_norm": 0.4116322696208954, |
|
"learning_rate": 1.9449711172135185e-05, |
|
"loss": 1.0003, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.5958062183658713, |
|
"grad_norm": 0.3992029130458832, |
|
"learning_rate": 1.9448202047999653e-05, |
|
"loss": 1.0766, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.596529284164859, |
|
"grad_norm": 0.42713025212287903, |
|
"learning_rate": 1.944669091607919e-05, |
|
"loss": 1.0108, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5972523499638467, |
|
"grad_norm": 0.36716726422309875, |
|
"learning_rate": 1.9445177776694923e-05, |
|
"loss": 0.9922, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.5979754157628344, |
|
"grad_norm": 0.39609575271606445, |
|
"learning_rate": 1.9443662630168404e-05, |
|
"loss": 1.2268, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.5986984815618221, |
|
"grad_norm": 0.4199231266975403, |
|
"learning_rate": 1.9442145476821607e-05, |
|
"loss": 1.0309, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.5994215473608099, |
|
"grad_norm": 0.3968781530857086, |
|
"learning_rate": 1.9440626316976926e-05, |
|
"loss": 1.068, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6001446131597975, |
|
"grad_norm": 0.39676693081855774, |
|
"learning_rate": 1.94391051509572e-05, |
|
"loss": 1.0705, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6008676789587852, |
|
"grad_norm": 0.4254015386104584, |
|
"learning_rate": 1.9437581979085678e-05, |
|
"loss": 1.1494, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.601590744757773, |
|
"grad_norm": 0.5715295672416687, |
|
"learning_rate": 1.943605680168604e-05, |
|
"loss": 1.1313, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6023138105567607, |
|
"grad_norm": 0.44537991285324097, |
|
"learning_rate": 1.9434529619082396e-05, |
|
"loss": 1.1528, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6030368763557483, |
|
"grad_norm": 0.4265975058078766, |
|
"learning_rate": 1.943300043159928e-05, |
|
"loss": 1.111, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.603759942154736, |
|
"grad_norm": 0.4071260988712311, |
|
"learning_rate": 1.9431469239561646e-05, |
|
"loss": 0.9698, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6044830079537238, |
|
"grad_norm": 0.5448580384254456, |
|
"learning_rate": 1.942993604329488e-05, |
|
"loss": 1.0849, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6052060737527115, |
|
"grad_norm": 0.40993860363960266, |
|
"learning_rate": 1.94284008431248e-05, |
|
"loss": 1.054, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6059291395516992, |
|
"grad_norm": 0.418747216463089, |
|
"learning_rate": 1.9426863639377634e-05, |
|
"loss": 0.9819, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6066522053506869, |
|
"grad_norm": 0.47144564986228943, |
|
"learning_rate": 1.942532443238005e-05, |
|
"loss": 1.1143, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6073752711496746, |
|
"grad_norm": 0.38305413722991943, |
|
"learning_rate": 1.9423783222459135e-05, |
|
"loss": 1.0806, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6080983369486623, |
|
"grad_norm": 0.3502989113330841, |
|
"learning_rate": 1.9422240009942403e-05, |
|
"loss": 1.0041, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6088214027476501, |
|
"grad_norm": 0.3907722234725952, |
|
"learning_rate": 1.9420694795157792e-05, |
|
"loss": 1.051, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6095444685466378, |
|
"grad_norm": 0.35976073145866394, |
|
"learning_rate": 1.9419147578433667e-05, |
|
"loss": 1.1117, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6102675343456254, |
|
"grad_norm": 0.4920249879360199, |
|
"learning_rate": 1.9417598360098822e-05, |
|
"loss": 1.1318, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6109906001446131, |
|
"grad_norm": 0.3940010666847229, |
|
"learning_rate": 1.941604714048247e-05, |
|
"loss": 0.8906, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6117136659436009, |
|
"grad_norm": 0.38385289907455444, |
|
"learning_rate": 1.9414493919914253e-05, |
|
"loss": 1.0902, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6124367317425886, |
|
"grad_norm": 0.375847190618515, |
|
"learning_rate": 1.9412938698724237e-05, |
|
"loss": 1.1212, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6131597975415762, |
|
"grad_norm": 0.4347201883792877, |
|
"learning_rate": 1.9411381477242913e-05, |
|
"loss": 0.9429, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.613882863340564, |
|
"grad_norm": 0.42075204849243164, |
|
"learning_rate": 1.9409822255801197e-05, |
|
"loss": 1.2179, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6146059291395517, |
|
"grad_norm": 0.9480845928192139, |
|
"learning_rate": 1.940826103473043e-05, |
|
"loss": 1.1344, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6153289949385394, |
|
"grad_norm": 0.44934409856796265, |
|
"learning_rate": 1.9406697814362382e-05, |
|
"loss": 1.0579, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6160520607375272, |
|
"grad_norm": 0.48897886276245117, |
|
"learning_rate": 1.940513259502924e-05, |
|
"loss": 1.0972, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6167751265365148, |
|
"grad_norm": 0.35401520133018494, |
|
"learning_rate": 1.9403565377063624e-05, |
|
"loss": 0.9755, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6174981923355025, |
|
"grad_norm": 0.3998658359050751, |
|
"learning_rate": 1.9401996160798574e-05, |
|
"loss": 0.9717, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6182212581344902, |
|
"grad_norm": 0.3708108067512512, |
|
"learning_rate": 1.9400424946567552e-05, |
|
"loss": 0.9571, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.618944323933478, |
|
"grad_norm": 0.36125117540359497, |
|
"learning_rate": 1.939885173470445e-05, |
|
"loss": 1.0311, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6196673897324656, |
|
"grad_norm": 0.35216933488845825, |
|
"learning_rate": 1.9397276525543583e-05, |
|
"loss": 1.0258, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6203904555314533, |
|
"grad_norm": 0.3627132475376129, |
|
"learning_rate": 1.9395699319419687e-05, |
|
"loss": 1.0178, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6211135213304411, |
|
"grad_norm": 0.5006598830223083, |
|
"learning_rate": 1.9394120116667932e-05, |
|
"loss": 1.138, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6218365871294288, |
|
"grad_norm": 0.4495599865913391, |
|
"learning_rate": 1.93925389176239e-05, |
|
"loss": 1.2473, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6225596529284165, |
|
"grad_norm": 0.7601077556610107, |
|
"learning_rate": 1.9390955722623602e-05, |
|
"loss": 1.0375, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6232827187274042, |
|
"grad_norm": 0.38106614351272583, |
|
"learning_rate": 1.9389370532003483e-05, |
|
"loss": 1.1907, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6240057845263919, |
|
"grad_norm": 0.4347703754901886, |
|
"learning_rate": 1.938778334610039e-05, |
|
"loss": 1.0626, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6247288503253796, |
|
"grad_norm": 0.41734766960144043, |
|
"learning_rate": 1.9386194165251616e-05, |
|
"loss": 1.0065, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6254519161243673, |
|
"grad_norm": 0.397935688495636, |
|
"learning_rate": 1.9384602989794868e-05, |
|
"loss": 1.0134, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6261749819233551, |
|
"grad_norm": 0.3894132077693939, |
|
"learning_rate": 1.9383009820068275e-05, |
|
"loss": 1.0335, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6268980477223427, |
|
"grad_norm": 0.4423835277557373, |
|
"learning_rate": 1.938141465641039e-05, |
|
"loss": 0.9506, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6276211135213304, |
|
"grad_norm": 0.524779200553894, |
|
"learning_rate": 1.9379817499160202e-05, |
|
"loss": 0.9809, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6283441793203182, |
|
"grad_norm": 0.8196138739585876, |
|
"learning_rate": 1.9378218348657104e-05, |
|
"loss": 1.0157, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6290672451193059, |
|
"grad_norm": 0.4658561050891876, |
|
"learning_rate": 1.937661720524093e-05, |
|
"loss": 1.0556, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6297903109182935, |
|
"grad_norm": 0.41377320885658264, |
|
"learning_rate": 1.9375014069251928e-05, |
|
"loss": 1.0779, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6305133767172812, |
|
"grad_norm": 0.5276013016700745, |
|
"learning_rate": 1.937340894103077e-05, |
|
"loss": 1.1533, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.631236442516269, |
|
"grad_norm": 0.410815954208374, |
|
"learning_rate": 1.937180182091855e-05, |
|
"loss": 1.0898, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6319595083152567, |
|
"grad_norm": 0.37035036087036133, |
|
"learning_rate": 1.9370192709256795e-05, |
|
"loss": 1.1058, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6326825741142444, |
|
"grad_norm": 0.4849984049797058, |
|
"learning_rate": 1.9368581606387442e-05, |
|
"loss": 0.9201, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6334056399132321, |
|
"grad_norm": 0.61427903175354, |
|
"learning_rate": 1.9366968512652863e-05, |
|
"loss": 1.0005, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6341287057122198, |
|
"grad_norm": 0.4817984402179718, |
|
"learning_rate": 1.9365353428395845e-05, |
|
"loss": 1.1757, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6348517715112075, |
|
"grad_norm": 0.44777020812034607, |
|
"learning_rate": 1.9363736353959603e-05, |
|
"loss": 1.0455, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6355748373101953, |
|
"grad_norm": 0.41143718361854553, |
|
"learning_rate": 1.9362117289687764e-05, |
|
"loss": 0.9868, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6362979031091829, |
|
"grad_norm": 0.8019953966140747, |
|
"learning_rate": 1.9360496235924396e-05, |
|
"loss": 1.0541, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6370209689081706, |
|
"grad_norm": 0.5058273673057556, |
|
"learning_rate": 1.9358873193013975e-05, |
|
"loss": 1.1222, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6377440347071583, |
|
"grad_norm": 0.4158300459384918, |
|
"learning_rate": 1.935724816130141e-05, |
|
"loss": 1.1177, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6384671005061461, |
|
"grad_norm": 0.3587720990180969, |
|
"learning_rate": 1.9355621141132022e-05, |
|
"loss": 1.0659, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6391901663051338, |
|
"grad_norm": 0.5225945711135864, |
|
"learning_rate": 1.935399213285156e-05, |
|
"loss": 1.1002, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6399132321041214, |
|
"grad_norm": 0.3899383544921875, |
|
"learning_rate": 1.93523611368062e-05, |
|
"loss": 1.023, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6406362979031092, |
|
"grad_norm": 0.3299097716808319, |
|
"learning_rate": 1.9350728153342533e-05, |
|
"loss": 0.9908, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6413593637020969, |
|
"grad_norm": 0.4942092001438141, |
|
"learning_rate": 1.9349093182807574e-05, |
|
"loss": 1.0417, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6420824295010846, |
|
"grad_norm": 0.6462168097496033, |
|
"learning_rate": 1.9347456225548767e-05, |
|
"loss": 1.0398, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6428054953000724, |
|
"grad_norm": 0.47286465764045715, |
|
"learning_rate": 1.9345817281913964e-05, |
|
"loss": 1.0001, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.64352856109906, |
|
"grad_norm": 0.40762990713119507, |
|
"learning_rate": 1.9344176352251456e-05, |
|
"loss": 1.0202, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6442516268980477, |
|
"grad_norm": 0.4899740517139435, |
|
"learning_rate": 1.9342533436909942e-05, |
|
"loss": 1.0916, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6449746926970354, |
|
"grad_norm": 0.5071614384651184, |
|
"learning_rate": 1.9340888536238555e-05, |
|
"loss": 1.0278, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6456977584960232, |
|
"grad_norm": 0.36371201276779175, |
|
"learning_rate": 1.9339241650586835e-05, |
|
"loss": 1.1233, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6464208242950108, |
|
"grad_norm": 0.5467074513435364, |
|
"learning_rate": 1.933759278030476e-05, |
|
"loss": 1.0505, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6471438900939985, |
|
"grad_norm": 0.3950304388999939, |
|
"learning_rate": 1.933594192574272e-05, |
|
"loss": 0.8757, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6478669558929863, |
|
"grad_norm": 0.44331902265548706, |
|
"learning_rate": 1.933428908725153e-05, |
|
"loss": 1.0699, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.648590021691974, |
|
"grad_norm": 0.5385304093360901, |
|
"learning_rate": 1.9332634265182422e-05, |
|
"loss": 0.9539, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6493130874909617, |
|
"grad_norm": 0.3947201371192932, |
|
"learning_rate": 1.9330977459887058e-05, |
|
"loss": 1.0645, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6500361532899493, |
|
"grad_norm": 0.4047844707965851, |
|
"learning_rate": 1.932931867171751e-05, |
|
"loss": 1.0795, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6507592190889371, |
|
"grad_norm": 0.45291417837142944, |
|
"learning_rate": 1.9327657901026284e-05, |
|
"loss": 0.8067, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6514822848879248, |
|
"grad_norm": 0.45252764225006104, |
|
"learning_rate": 1.93259951481663e-05, |
|
"loss": 0.9772, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6522053506869125, |
|
"grad_norm": 0.8900644183158875, |
|
"learning_rate": 1.9324330413490896e-05, |
|
"loss": 1.0711, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6529284164859002, |
|
"grad_norm": 0.5582137107849121, |
|
"learning_rate": 1.932266369735384e-05, |
|
"loss": 1.1331, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6536514822848879, |
|
"grad_norm": 0.488629549741745, |
|
"learning_rate": 1.9320995000109315e-05, |
|
"loss": 1.1425, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6543745480838756, |
|
"grad_norm": 0.5531617403030396, |
|
"learning_rate": 1.9319324322111928e-05, |
|
"loss": 0.9534, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6550976138828634, |
|
"grad_norm": 0.4221404790878296, |
|
"learning_rate": 1.9317651663716704e-05, |
|
"loss": 1.0654, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6558206796818511, |
|
"grad_norm": 0.36095425486564636, |
|
"learning_rate": 1.9315977025279088e-05, |
|
"loss": 1.0173, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6565437454808387, |
|
"grad_norm": 0.5640881061553955, |
|
"learning_rate": 1.9314300407154954e-05, |
|
"loss": 1.0946, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6572668112798264, |
|
"grad_norm": 0.3728702962398529, |
|
"learning_rate": 1.9312621809700586e-05, |
|
"loss": 1.1767, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6579898770788142, |
|
"grad_norm": 0.5009912252426147, |
|
"learning_rate": 1.9310941233272698e-05, |
|
"loss": 0.9627, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6587129428778019, |
|
"grad_norm": 0.4499903619289398, |
|
"learning_rate": 1.9309258678228412e-05, |
|
"loss": 1.0449, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6594360086767896, |
|
"grad_norm": 0.7598214745521545, |
|
"learning_rate": 1.9307574144925288e-05, |
|
"loss": 1.0257, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6601590744757773, |
|
"grad_norm": 0.413637638092041, |
|
"learning_rate": 1.930588763372129e-05, |
|
"loss": 1.0121, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.660882140274765, |
|
"grad_norm": 0.4112358093261719, |
|
"learning_rate": 1.930419914497481e-05, |
|
"loss": 1.165, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6616052060737527, |
|
"grad_norm": 0.41854724287986755, |
|
"learning_rate": 1.9302508679044662e-05, |
|
"loss": 1.1068, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6623282718727405, |
|
"grad_norm": 0.4287382662296295, |
|
"learning_rate": 1.9300816236290077e-05, |
|
"loss": 0.9718, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6630513376717281, |
|
"grad_norm": 0.3937189280986786, |
|
"learning_rate": 1.9299121817070706e-05, |
|
"loss": 0.9827, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6637744034707158, |
|
"grad_norm": 0.6436665058135986, |
|
"learning_rate": 1.929742542174662e-05, |
|
"loss": 0.9858, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6644974692697035, |
|
"grad_norm": 0.4006657600402832, |
|
"learning_rate": 1.929572705067831e-05, |
|
"loss": 0.9564, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6652205350686913, |
|
"grad_norm": 0.6361526250839233, |
|
"learning_rate": 1.929402670422669e-05, |
|
"loss": 1.1058, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.665943600867679, |
|
"grad_norm": 0.4914890229701996, |
|
"learning_rate": 1.9292324382753088e-05, |
|
"loss": 1.0892, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.4729480743408203, |
|
"learning_rate": 1.9290620086619255e-05, |
|
"loss": 1.2001, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6673897324656544, |
|
"grad_norm": 0.37761080265045166, |
|
"learning_rate": 1.9288913816187365e-05, |
|
"loss": 1.1583, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6681127982646421, |
|
"grad_norm": 0.4286913275718689, |
|
"learning_rate": 1.9287205571820007e-05, |
|
"loss": 1.0615, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6688358640636298, |
|
"grad_norm": 0.3655116558074951, |
|
"learning_rate": 1.9285495353880187e-05, |
|
"loss": 1.1904, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6695589298626174, |
|
"grad_norm": 0.5355531573295593, |
|
"learning_rate": 1.928378316273134e-05, |
|
"loss": 1.1509, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.6702819956616052, |
|
"grad_norm": 0.49476566910743713, |
|
"learning_rate": 1.9282068998737306e-05, |
|
"loss": 1.0475, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.6710050614605929, |
|
"grad_norm": 0.3970414102077484, |
|
"learning_rate": 1.928035286226236e-05, |
|
"loss": 1.1631, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6717281272595806, |
|
"grad_norm": 0.4435936212539673, |
|
"learning_rate": 1.9278634753671185e-05, |
|
"loss": 1.13, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6724511930585684, |
|
"grad_norm": 0.47896090149879456, |
|
"learning_rate": 1.927691467332889e-05, |
|
"loss": 1.0606, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.673174258857556, |
|
"grad_norm": 0.5069797039031982, |
|
"learning_rate": 1.9275192621600993e-05, |
|
"loss": 1.1685, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.6738973246565437, |
|
"grad_norm": 0.4008742868900299, |
|
"learning_rate": 1.9273468598853446e-05, |
|
"loss": 1.0611, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6746203904555315, |
|
"grad_norm": 0.41504350304603577, |
|
"learning_rate": 1.9271742605452604e-05, |
|
"loss": 1.0769, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6753434562545192, |
|
"grad_norm": 0.4604232609272003, |
|
"learning_rate": 1.927001464176525e-05, |
|
"loss": 1.0744, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.6760665220535069, |
|
"grad_norm": 0.4554314613342285, |
|
"learning_rate": 1.926828470815859e-05, |
|
"loss": 1.031, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6767895878524945, |
|
"grad_norm": 0.4168878197669983, |
|
"learning_rate": 1.9266552805000236e-05, |
|
"loss": 1.1499, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6775126536514823, |
|
"grad_norm": 0.37697312235832214, |
|
"learning_rate": 1.926481893265823e-05, |
|
"loss": 1.0173, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.67823571945047, |
|
"grad_norm": 0.48489493131637573, |
|
"learning_rate": 1.926308309150102e-05, |
|
"loss": 0.8716, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6789587852494577, |
|
"grad_norm": 0.4797099530696869, |
|
"learning_rate": 1.926134528189749e-05, |
|
"loss": 0.9966, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.6796818510484454, |
|
"grad_norm": 0.4304414987564087, |
|
"learning_rate": 1.9259605504216922e-05, |
|
"loss": 1.05, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6804049168474331, |
|
"grad_norm": 0.5568910837173462, |
|
"learning_rate": 1.9257863758829038e-05, |
|
"loss": 1.1633, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.6811279826464208, |
|
"grad_norm": 0.4955918788909912, |
|
"learning_rate": 1.9256120046103954e-05, |
|
"loss": 0.9605, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.6818510484454086, |
|
"grad_norm": 0.44979023933410645, |
|
"learning_rate": 1.9254374366412225e-05, |
|
"loss": 1.0132, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.6825741142443963, |
|
"grad_norm": 0.5083341598510742, |
|
"learning_rate": 1.9252626720124813e-05, |
|
"loss": 1.0155, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.6832971800433839, |
|
"grad_norm": 0.40737634897232056, |
|
"learning_rate": 1.92508771076131e-05, |
|
"loss": 1.1144, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6840202458423716, |
|
"grad_norm": 0.43715277314186096, |
|
"learning_rate": 1.924912552924889e-05, |
|
"loss": 1.2293, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.6847433116413594, |
|
"grad_norm": 0.8877456188201904, |
|
"learning_rate": 1.9247371985404397e-05, |
|
"loss": 1.0002, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.6854663774403471, |
|
"grad_norm": 0.45003005862236023, |
|
"learning_rate": 1.9245616476452263e-05, |
|
"loss": 0.9714, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.6861894432393347, |
|
"grad_norm": 0.5420054197311401, |
|
"learning_rate": 1.924385900276553e-05, |
|
"loss": 0.854, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.6869125090383225, |
|
"grad_norm": 0.49163565039634705, |
|
"learning_rate": 1.9242099564717683e-05, |
|
"loss": 1.1079, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6876355748373102, |
|
"grad_norm": 0.561029314994812, |
|
"learning_rate": 1.9240338162682598e-05, |
|
"loss": 1.1312, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.6883586406362979, |
|
"grad_norm": 0.43775081634521484, |
|
"learning_rate": 1.923857479703459e-05, |
|
"loss": 1.1497, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6890817064352857, |
|
"grad_norm": 0.5425902009010315, |
|
"learning_rate": 1.9236809468148378e-05, |
|
"loss": 1.0263, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.6898047722342733, |
|
"grad_norm": 0.4741000235080719, |
|
"learning_rate": 1.92350421763991e-05, |
|
"loss": 1.108, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.690527838033261, |
|
"grad_norm": 0.36742356419563293, |
|
"learning_rate": 1.9233272922162318e-05, |
|
"loss": 0.9945, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6912509038322487, |
|
"grad_norm": 0.6709416508674622, |
|
"learning_rate": 1.9231501705814005e-05, |
|
"loss": 1.1226, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6919739696312365, |
|
"grad_norm": 0.39497923851013184, |
|
"learning_rate": 1.922972852773055e-05, |
|
"loss": 1.0677, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.6926970354302241, |
|
"grad_norm": 0.43766117095947266, |
|
"learning_rate": 1.922795338828876e-05, |
|
"loss": 1.1008, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.6934201012292118, |
|
"grad_norm": 0.6151456832885742, |
|
"learning_rate": 1.922617628786587e-05, |
|
"loss": 1.0524, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.6941431670281996, |
|
"grad_norm": 0.4668988883495331, |
|
"learning_rate": 1.922439722683951e-05, |
|
"loss": 0.9703, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6948662328271873, |
|
"grad_norm": 0.43881484866142273, |
|
"learning_rate": 1.9222616205587742e-05, |
|
"loss": 1.1008, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.695589298626175, |
|
"grad_norm": 0.4098372161388397, |
|
"learning_rate": 1.9220833224489045e-05, |
|
"loss": 1.1125, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.6963123644251626, |
|
"grad_norm": 0.3672609329223633, |
|
"learning_rate": 1.9219048283922305e-05, |
|
"loss": 0.9593, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.6970354302241504, |
|
"grad_norm": 0.4422583281993866, |
|
"learning_rate": 1.9217261384266833e-05, |
|
"loss": 1.1713, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.6977584960231381, |
|
"grad_norm": 0.3935304284095764, |
|
"learning_rate": 1.921547252590235e-05, |
|
"loss": 1.1546, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6984815618221258, |
|
"grad_norm": 0.3898194134235382, |
|
"learning_rate": 1.9213681709209e-05, |
|
"loss": 1.044, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.6992046276211136, |
|
"grad_norm": 0.5345346331596375, |
|
"learning_rate": 1.921188893456734e-05, |
|
"loss": 0.9389, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.6999276934201012, |
|
"grad_norm": 0.4857673645019531, |
|
"learning_rate": 1.921009420235834e-05, |
|
"loss": 1.0613, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7006507592190889, |
|
"grad_norm": 0.4024747312068939, |
|
"learning_rate": 1.920829751296339e-05, |
|
"loss": 1.0423, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7013738250180767, |
|
"grad_norm": 0.4439171850681305, |
|
"learning_rate": 1.920649886676429e-05, |
|
"loss": 1.0442, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7020968908170644, |
|
"grad_norm": 0.41875940561294556, |
|
"learning_rate": 1.9204698264143268e-05, |
|
"loss": 1.0491, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.702819956616052, |
|
"grad_norm": 0.5009695887565613, |
|
"learning_rate": 1.9202895705482952e-05, |
|
"loss": 1.057, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7035430224150397, |
|
"grad_norm": 0.6199245452880859, |
|
"learning_rate": 1.92010911911664e-05, |
|
"loss": 1.0034, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7042660882140275, |
|
"grad_norm": 0.4618159830570221, |
|
"learning_rate": 1.919928472157708e-05, |
|
"loss": 1.0351, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7049891540130152, |
|
"grad_norm": 0.4229826033115387, |
|
"learning_rate": 1.9197476297098868e-05, |
|
"loss": 1.2061, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7057122198120029, |
|
"grad_norm": 0.49111953377723694, |
|
"learning_rate": 1.9195665918116068e-05, |
|
"loss": 1.0294, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7064352856109906, |
|
"grad_norm": 0.4833580255508423, |
|
"learning_rate": 1.919385358501339e-05, |
|
"loss": 1.0781, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7071583514099783, |
|
"grad_norm": 0.42100799083709717, |
|
"learning_rate": 1.9192039298175965e-05, |
|
"loss": 1.03, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.707881417208966, |
|
"grad_norm": 0.5994098782539368, |
|
"learning_rate": 1.9190223057989337e-05, |
|
"loss": 0.8802, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7086044830079538, |
|
"grad_norm": 0.4303410053253174, |
|
"learning_rate": 1.9188404864839465e-05, |
|
"loss": 1.1212, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7093275488069414, |
|
"grad_norm": 0.453218013048172, |
|
"learning_rate": 1.9186584719112724e-05, |
|
"loss": 0.9847, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7100506146059291, |
|
"grad_norm": 0.43997496366500854, |
|
"learning_rate": 1.9184762621195897e-05, |
|
"loss": 0.9984, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7107736804049168, |
|
"grad_norm": 0.5103498101234436, |
|
"learning_rate": 1.91829385714762e-05, |
|
"loss": 1.0727, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7114967462039046, |
|
"grad_norm": 0.5352555513381958, |
|
"learning_rate": 1.918111257034124e-05, |
|
"loss": 1.02, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7122198120028923, |
|
"grad_norm": 0.40594494342803955, |
|
"learning_rate": 1.917928461817906e-05, |
|
"loss": 1.0947, |
|
"step": 985 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 6915, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1929190399934464e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|