|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 18498, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008108984755108661, |
|
"grad_norm": 1.6712403297424316, |
|
"learning_rate": 9.980942340442787e-06, |
|
"loss": 1.5976, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016217969510217322, |
|
"grad_norm": 1.9675096273422241, |
|
"learning_rate": 9.960668234530858e-06, |
|
"loss": 1.4389, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02432695426532598, |
|
"grad_norm": 4.630291938781738, |
|
"learning_rate": 9.940394128618928e-06, |
|
"loss": 1.387, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.032435939020434644, |
|
"grad_norm": 2.186373710632324, |
|
"learning_rate": 9.920120022707e-06, |
|
"loss": 1.4025, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0405449237755433, |
|
"grad_norm": 1.8459932804107666, |
|
"learning_rate": 9.89984591679507e-06, |
|
"loss": 1.3795, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04865390853065196, |
|
"grad_norm": 1.8652907609939575, |
|
"learning_rate": 9.879571810883141e-06, |
|
"loss": 1.3835, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.056762893285760625, |
|
"grad_norm": 1.97219979763031, |
|
"learning_rate": 9.859297704971211e-06, |
|
"loss": 1.3549, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06487187804086929, |
|
"grad_norm": 1.764737844467163, |
|
"learning_rate": 9.839023599059281e-06, |
|
"loss": 1.3785, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07298086279597794, |
|
"grad_norm": 1.37200927734375, |
|
"learning_rate": 9.818749493147353e-06, |
|
"loss": 1.397, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0810898475510866, |
|
"grad_norm": 0.783941388130188, |
|
"learning_rate": 9.798475387235423e-06, |
|
"loss": 1.4055, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08919883230619527, |
|
"grad_norm": 2.4082376956939697, |
|
"learning_rate": 9.778201281323494e-06, |
|
"loss": 1.2851, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09730781706130393, |
|
"grad_norm": 1.2580335140228271, |
|
"learning_rate": 9.757927175411566e-06, |
|
"loss": 1.3557, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10541680181641258, |
|
"grad_norm": 1.688684105873108, |
|
"learning_rate": 9.737653069499636e-06, |
|
"loss": 1.3462, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11352578657152125, |
|
"grad_norm": 1.7642041444778442, |
|
"learning_rate": 9.717378963587707e-06, |
|
"loss": 1.3122, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1216347713266299, |
|
"grad_norm": 1.5700275897979736, |
|
"learning_rate": 9.697104857675777e-06, |
|
"loss": 1.3248, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12974375608173858, |
|
"grad_norm": 1.8756022453308105, |
|
"learning_rate": 9.676830751763849e-06, |
|
"loss": 1.3364, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13785274083684723, |
|
"grad_norm": 2.0151844024658203, |
|
"learning_rate": 9.656556645851919e-06, |
|
"loss": 1.3566, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1459617255919559, |
|
"grad_norm": 1.8269509077072144, |
|
"learning_rate": 9.636282539939989e-06, |
|
"loss": 1.3291, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15407071034706454, |
|
"grad_norm": 2.033515453338623, |
|
"learning_rate": 9.61600843402806e-06, |
|
"loss": 1.3239, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1621796951021732, |
|
"grad_norm": 1.2932145595550537, |
|
"learning_rate": 9.59573432811613e-06, |
|
"loss": 1.3458, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17028867985728185, |
|
"grad_norm": 1.7336113452911377, |
|
"learning_rate": 9.575460222204202e-06, |
|
"loss": 1.3227, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17839766461239054, |
|
"grad_norm": 2.2469635009765625, |
|
"learning_rate": 9.555186116292272e-06, |
|
"loss": 1.3055, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1865066493674992, |
|
"grad_norm": 1.9851100444793701, |
|
"learning_rate": 9.534912010380343e-06, |
|
"loss": 1.2713, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.19461563412260785, |
|
"grad_norm": 1.896209716796875, |
|
"learning_rate": 9.514637904468413e-06, |
|
"loss": 1.3926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2027246188777165, |
|
"grad_norm": 1.941974401473999, |
|
"learning_rate": 9.494363798556485e-06, |
|
"loss": 1.2845, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.21083360363282516, |
|
"grad_norm": 1.7393105030059814, |
|
"learning_rate": 9.474089692644555e-06, |
|
"loss": 1.3989, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21894258838793382, |
|
"grad_norm": 2.3268821239471436, |
|
"learning_rate": 9.453815586732625e-06, |
|
"loss": 1.3939, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.2270515731430425, |
|
"grad_norm": 2.2540266513824463, |
|
"learning_rate": 9.433541480820698e-06, |
|
"loss": 1.3421, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23516055789815116, |
|
"grad_norm": 1.5483736991882324, |
|
"learning_rate": 9.413267374908768e-06, |
|
"loss": 1.3236, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2432695426532598, |
|
"grad_norm": 1.2663439512252808, |
|
"learning_rate": 9.392993268996838e-06, |
|
"loss": 1.3333, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25137852740836847, |
|
"grad_norm": 0.978643536567688, |
|
"learning_rate": 9.37271916308491e-06, |
|
"loss": 1.3522, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.25948751216347715, |
|
"grad_norm": 2.664464235305786, |
|
"learning_rate": 9.35244505717298e-06, |
|
"loss": 1.3969, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2675964969185858, |
|
"grad_norm": 0.9333709478378296, |
|
"learning_rate": 9.332170951261051e-06, |
|
"loss": 1.3423, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.27570548167369446, |
|
"grad_norm": 2.0592761039733887, |
|
"learning_rate": 9.31189684534912e-06, |
|
"loss": 1.3274, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2838144664288031, |
|
"grad_norm": 1.6379183530807495, |
|
"learning_rate": 9.291622739437192e-06, |
|
"loss": 1.321, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2919234511839118, |
|
"grad_norm": 1.659201979637146, |
|
"learning_rate": 9.271348633525262e-06, |
|
"loss": 1.3662, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.30003243593902046, |
|
"grad_norm": 2.4258599281311035, |
|
"learning_rate": 9.251074527613332e-06, |
|
"loss": 1.2489, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3081414206941291, |
|
"grad_norm": 2.314232349395752, |
|
"learning_rate": 9.230800421701404e-06, |
|
"loss": 1.3549, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31625040544923777, |
|
"grad_norm": 1.78170645236969, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 1.3405, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.3243593902043464, |
|
"grad_norm": 2.3508291244506836, |
|
"learning_rate": 9.190252209877545e-06, |
|
"loss": 1.335, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3324683749594551, |
|
"grad_norm": 3.0449280738830566, |
|
"learning_rate": 9.169978103965615e-06, |
|
"loss": 1.3188, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.3405773597145637, |
|
"grad_norm": 2.0919177532196045, |
|
"learning_rate": 9.149703998053687e-06, |
|
"loss": 1.3369, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3486863444696724, |
|
"grad_norm": 1.5090256929397583, |
|
"learning_rate": 9.129429892141757e-06, |
|
"loss": 1.3782, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.3567953292247811, |
|
"grad_norm": 1.5943760871887207, |
|
"learning_rate": 9.109155786229828e-06, |
|
"loss": 1.2826, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3649043139798897, |
|
"grad_norm": 1.4445804357528687, |
|
"learning_rate": 9.0888816803179e-06, |
|
"loss": 1.2706, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3730132987349984, |
|
"grad_norm": 1.366866111755371, |
|
"learning_rate": 9.06860757440597e-06, |
|
"loss": 1.3081, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.381122283490107, |
|
"grad_norm": 1.5148597955703735, |
|
"learning_rate": 9.04833346849404e-06, |
|
"loss": 1.3117, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3892312682452157, |
|
"grad_norm": 1.9457615613937378, |
|
"learning_rate": 9.028059362582111e-06, |
|
"loss": 1.3385, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3973402530003244, |
|
"grad_norm": 1.0214955806732178, |
|
"learning_rate": 9.007785256670181e-06, |
|
"loss": 1.4173, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.405449237755433, |
|
"grad_norm": 2.3479509353637695, |
|
"learning_rate": 8.987511150758253e-06, |
|
"loss": 1.338, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4135582225105417, |
|
"grad_norm": 1.7548096179962158, |
|
"learning_rate": 8.967237044846323e-06, |
|
"loss": 1.3245, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.4216672072656503, |
|
"grad_norm": 1.3919732570648193, |
|
"learning_rate": 8.946962938934394e-06, |
|
"loss": 1.2934, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.429776192020759, |
|
"grad_norm": 2.419706344604492, |
|
"learning_rate": 8.926688833022464e-06, |
|
"loss": 1.2451, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.43788517677586763, |
|
"grad_norm": 0.7026298642158508, |
|
"learning_rate": 8.906414727110536e-06, |
|
"loss": 1.3075, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4459941615309763, |
|
"grad_norm": 1.9528323411941528, |
|
"learning_rate": 8.886140621198606e-06, |
|
"loss": 1.3071, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.454103146286085, |
|
"grad_norm": 1.322129487991333, |
|
"learning_rate": 8.865866515286676e-06, |
|
"loss": 1.3234, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.46221213104119363, |
|
"grad_norm": 3.2143826484680176, |
|
"learning_rate": 8.845592409374747e-06, |
|
"loss": 1.3553, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.4703211157963023, |
|
"grad_norm": 2.3696305751800537, |
|
"learning_rate": 8.825318303462817e-06, |
|
"loss": 1.2905, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.47843010055141094, |
|
"grad_norm": 1.7011082172393799, |
|
"learning_rate": 8.805044197550889e-06, |
|
"loss": 1.2826, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.4865390853065196, |
|
"grad_norm": 1.8602455854415894, |
|
"learning_rate": 8.784770091638959e-06, |
|
"loss": 1.2119, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4946480700616283, |
|
"grad_norm": 1.8766978979110718, |
|
"learning_rate": 8.76449598572703e-06, |
|
"loss": 1.25, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5027570548167369, |
|
"grad_norm": 1.2416331768035889, |
|
"learning_rate": 8.744221879815102e-06, |
|
"loss": 1.3494, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5108660395718456, |
|
"grad_norm": 2.160327434539795, |
|
"learning_rate": 8.723947773903172e-06, |
|
"loss": 1.2821, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5189750243269543, |
|
"grad_norm": 1.5392768383026123, |
|
"learning_rate": 8.703673667991243e-06, |
|
"loss": 1.3716, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5270840090820629, |
|
"grad_norm": 1.9121636152267456, |
|
"learning_rate": 8.683399562079313e-06, |
|
"loss": 1.2615, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5351929938371716, |
|
"grad_norm": 1.8084639310836792, |
|
"learning_rate": 8.663125456167383e-06, |
|
"loss": 1.3391, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5433019785922802, |
|
"grad_norm": 2.516788959503174, |
|
"learning_rate": 8.642851350255455e-06, |
|
"loss": 1.2747, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5514109633473889, |
|
"grad_norm": 5.785072326660156, |
|
"learning_rate": 8.622577244343525e-06, |
|
"loss": 1.3414, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5595199481024976, |
|
"grad_norm": 3.8145599365234375, |
|
"learning_rate": 8.602303138431596e-06, |
|
"loss": 1.2274, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.5676289328576062, |
|
"grad_norm": 0.36742010712623596, |
|
"learning_rate": 8.582029032519666e-06, |
|
"loss": 1.2184, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5757379176127149, |
|
"grad_norm": 2.2382278442382812, |
|
"learning_rate": 8.561754926607738e-06, |
|
"loss": 1.2359, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.5838469023678236, |
|
"grad_norm": 3.001042127609253, |
|
"learning_rate": 8.541480820695808e-06, |
|
"loss": 1.2839, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5919558871229322, |
|
"grad_norm": 1.2254408597946167, |
|
"learning_rate": 8.521206714783878e-06, |
|
"loss": 1.3025, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6000648718780409, |
|
"grad_norm": 2.408430576324463, |
|
"learning_rate": 8.50093260887195e-06, |
|
"loss": 1.2959, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6081738566331495, |
|
"grad_norm": 1.125710129737854, |
|
"learning_rate": 8.480658502960019e-06, |
|
"loss": 1.3165, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6162828413882582, |
|
"grad_norm": 1.7280784845352173, |
|
"learning_rate": 8.46038439704809e-06, |
|
"loss": 1.3033, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6243918261433669, |
|
"grad_norm": 1.7289584875106812, |
|
"learning_rate": 8.440110291136162e-06, |
|
"loss": 1.3818, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6325008108984755, |
|
"grad_norm": 1.4574753046035767, |
|
"learning_rate": 8.419836185224232e-06, |
|
"loss": 1.2858, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6406097956535842, |
|
"grad_norm": 1.9952958822250366, |
|
"learning_rate": 8.399562079312304e-06, |
|
"loss": 1.323, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.6487187804086928, |
|
"grad_norm": 2.3702497482299805, |
|
"learning_rate": 8.379287973400374e-06, |
|
"loss": 1.3362, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6568277651638015, |
|
"grad_norm": 2.1261913776397705, |
|
"learning_rate": 8.359013867488445e-06, |
|
"loss": 1.2978, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.6649367499189102, |
|
"grad_norm": 0.6903754472732544, |
|
"learning_rate": 8.338739761576515e-06, |
|
"loss": 1.3273, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.6730457346740188, |
|
"grad_norm": 1.848915934562683, |
|
"learning_rate": 8.318465655664587e-06, |
|
"loss": 1.294, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.6811547194291274, |
|
"grad_norm": 2.0583913326263428, |
|
"learning_rate": 8.298191549752657e-06, |
|
"loss": 1.3353, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6892637041842361, |
|
"grad_norm": 1.5055549144744873, |
|
"learning_rate": 8.277917443840727e-06, |
|
"loss": 1.3305, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.6973726889393448, |
|
"grad_norm": 1.6197587251663208, |
|
"learning_rate": 8.257643337928798e-06, |
|
"loss": 1.3765, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7054816736944535, |
|
"grad_norm": 1.927742838859558, |
|
"learning_rate": 8.237369232016868e-06, |
|
"loss": 1.3372, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.7135906584495622, |
|
"grad_norm": 2.3434853553771973, |
|
"learning_rate": 8.21709512610494e-06, |
|
"loss": 1.3041, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7216996432046707, |
|
"grad_norm": 2.09249210357666, |
|
"learning_rate": 8.19682102019301e-06, |
|
"loss": 1.2946, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.7298086279597794, |
|
"grad_norm": 0.19756704568862915, |
|
"learning_rate": 8.176546914281081e-06, |
|
"loss": 1.3014, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7379176127148881, |
|
"grad_norm": 1.2464430332183838, |
|
"learning_rate": 8.156272808369151e-06, |
|
"loss": 1.2552, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.7460265974699968, |
|
"grad_norm": 1.7760344743728638, |
|
"learning_rate": 8.135998702457221e-06, |
|
"loss": 1.2963, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.7541355822251055, |
|
"grad_norm": 1.922969937324524, |
|
"learning_rate": 8.115724596545294e-06, |
|
"loss": 1.2849, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.762244566980214, |
|
"grad_norm": 3.757589340209961, |
|
"learning_rate": 8.095450490633364e-06, |
|
"loss": 1.2511, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.7703535517353227, |
|
"grad_norm": 2.4983339309692383, |
|
"learning_rate": 8.075176384721434e-06, |
|
"loss": 1.2169, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.7784625364904314, |
|
"grad_norm": 2.1676225662231445, |
|
"learning_rate": 8.054902278809506e-06, |
|
"loss": 1.2711, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7865715212455401, |
|
"grad_norm": 2.8060524463653564, |
|
"learning_rate": 8.034628172897576e-06, |
|
"loss": 1.3246, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.7946805060006488, |
|
"grad_norm": 1.917485237121582, |
|
"learning_rate": 8.014354066985647e-06, |
|
"loss": 1.2669, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.8027894907557573, |
|
"grad_norm": 2.1371405124664307, |
|
"learning_rate": 7.994079961073717e-06, |
|
"loss": 1.3554, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.810898475510866, |
|
"grad_norm": 0.9637095928192139, |
|
"learning_rate": 7.973805855161789e-06, |
|
"loss": 1.2862, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8190074602659747, |
|
"grad_norm": 1.3636858463287354, |
|
"learning_rate": 7.953531749249859e-06, |
|
"loss": 1.3134, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.8271164450210834, |
|
"grad_norm": 0.9230815768241882, |
|
"learning_rate": 7.933257643337929e-06, |
|
"loss": 1.2352, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.8352254297761921, |
|
"grad_norm": 1.7373725175857544, |
|
"learning_rate": 7.912983537426e-06, |
|
"loss": 1.318, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.8433344145313006, |
|
"grad_norm": 2.0280568599700928, |
|
"learning_rate": 7.89270943151407e-06, |
|
"loss": 1.2464, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.8514433992864093, |
|
"grad_norm": 1.1584951877593994, |
|
"learning_rate": 7.872435325602142e-06, |
|
"loss": 1.2292, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.859552384041518, |
|
"grad_norm": 1.7846872806549072, |
|
"learning_rate": 7.852161219690212e-06, |
|
"loss": 1.2976, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.8676613687966267, |
|
"grad_norm": 1.7553609609603882, |
|
"learning_rate": 7.831887113778283e-06, |
|
"loss": 1.2684, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.8757703535517353, |
|
"grad_norm": 2.1919806003570557, |
|
"learning_rate": 7.811613007866353e-06, |
|
"loss": 1.2956, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.883879338306844, |
|
"grad_norm": 0.717258870601654, |
|
"learning_rate": 7.791338901954425e-06, |
|
"loss": 1.2186, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.8919883230619526, |
|
"grad_norm": 1.4364169836044312, |
|
"learning_rate": 7.771064796042496e-06, |
|
"loss": 1.2596, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9000973078170613, |
|
"grad_norm": 1.99948251247406, |
|
"learning_rate": 7.750790690130566e-06, |
|
"loss": 1.3616, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.90820629257217, |
|
"grad_norm": 1.5464040040969849, |
|
"learning_rate": 7.730516584218636e-06, |
|
"loss": 1.2595, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.9163152773272786, |
|
"grad_norm": 1.500651240348816, |
|
"learning_rate": 7.710242478306708e-06, |
|
"loss": 1.3223, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.9244242620823873, |
|
"grad_norm": 1.6872031688690186, |
|
"learning_rate": 7.689968372394778e-06, |
|
"loss": 1.2099, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.9325332468374959, |
|
"grad_norm": 1.5728824138641357, |
|
"learning_rate": 7.66969426648285e-06, |
|
"loss": 1.2975, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.9406422315926046, |
|
"grad_norm": 2.0520365238189697, |
|
"learning_rate": 7.649420160570919e-06, |
|
"loss": 1.2895, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.9487512163477133, |
|
"grad_norm": 1.6994986534118652, |
|
"learning_rate": 7.62914605465899e-06, |
|
"loss": 1.2421, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.9568602011028219, |
|
"grad_norm": 2.1418871879577637, |
|
"learning_rate": 7.608871948747061e-06, |
|
"loss": 1.2464, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.9649691858579306, |
|
"grad_norm": 3.202744960784912, |
|
"learning_rate": 7.588597842835131e-06, |
|
"loss": 1.2565, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.9730781706130393, |
|
"grad_norm": 2.3648183345794678, |
|
"learning_rate": 7.568323736923202e-06, |
|
"loss": 1.262, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9811871553681479, |
|
"grad_norm": 1.500482439994812, |
|
"learning_rate": 7.548049631011273e-06, |
|
"loss": 1.2281, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.9892961401232566, |
|
"grad_norm": 2.1810970306396484, |
|
"learning_rate": 7.527775525099344e-06, |
|
"loss": 1.2318, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.9974051248783652, |
|
"grad_norm": 1.3932199478149414, |
|
"learning_rate": 7.507501419187414e-06, |
|
"loss": 1.2344, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.0055141096334739, |
|
"grad_norm": 2.48954701423645, |
|
"learning_rate": 7.487227313275484e-06, |
|
"loss": 1.3035, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.0136230943885824, |
|
"grad_norm": 1.4061833620071411, |
|
"learning_rate": 7.466953207363555e-06, |
|
"loss": 1.308, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.0217320791436912, |
|
"grad_norm": 1.1377198696136475, |
|
"learning_rate": 7.4466791014516275e-06, |
|
"loss": 1.2877, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.0298410638987998, |
|
"grad_norm": 1.90823233127594, |
|
"learning_rate": 7.426404995539697e-06, |
|
"loss": 1.2371, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.0379500486539086, |
|
"grad_norm": 1.5908371210098267, |
|
"learning_rate": 7.406130889627768e-06, |
|
"loss": 1.2861, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.0460590334090172, |
|
"grad_norm": 2.892045259475708, |
|
"learning_rate": 7.385856783715839e-06, |
|
"loss": 1.2856, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.0541680181641258, |
|
"grad_norm": 1.4647297859191895, |
|
"learning_rate": 7.36558267780391e-06, |
|
"loss": 1.2431, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.0622770029192345, |
|
"grad_norm": 1.2635786533355713, |
|
"learning_rate": 7.34530857189198e-06, |
|
"loss": 1.3269, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.0703859876743431, |
|
"grad_norm": 1.0140317678451538, |
|
"learning_rate": 7.325034465980051e-06, |
|
"loss": 1.2724, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.078494972429452, |
|
"grad_norm": 1.1980386972427368, |
|
"learning_rate": 7.304760360068122e-06, |
|
"loss": 1.2909, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.0866039571845605, |
|
"grad_norm": 2.5311107635498047, |
|
"learning_rate": 7.284486254156193e-06, |
|
"loss": 1.2296, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.094712941939669, |
|
"grad_norm": 1.5901424884796143, |
|
"learning_rate": 7.2642121482442626e-06, |
|
"loss": 1.2136, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.1028219266947779, |
|
"grad_norm": 1.0336142778396606, |
|
"learning_rate": 7.243938042332333e-06, |
|
"loss": 1.2908, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.1109309114498864, |
|
"grad_norm": 2.5491976737976074, |
|
"learning_rate": 7.223663936420404e-06, |
|
"loss": 1.2256, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.1190398962049952, |
|
"grad_norm": 2.592517137527466, |
|
"learning_rate": 7.203389830508475e-06, |
|
"loss": 1.2808, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.1271488809601038, |
|
"grad_norm": 1.5294160842895508, |
|
"learning_rate": 7.1831157245965456e-06, |
|
"loss": 1.2279, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.1352578657152124, |
|
"grad_norm": 2.665705680847168, |
|
"learning_rate": 7.162841618684616e-06, |
|
"loss": 1.2813, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.1433668504703212, |
|
"grad_norm": 5.743466854095459, |
|
"learning_rate": 7.142567512772687e-06, |
|
"loss": 1.2601, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.1514758352254297, |
|
"grad_norm": 1.979543924331665, |
|
"learning_rate": 7.122293406860759e-06, |
|
"loss": 1.2568, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.1595848199805385, |
|
"grad_norm": 0.2160961776971817, |
|
"learning_rate": 7.1020193009488294e-06, |
|
"loss": 1.2234, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.167693804735647, |
|
"grad_norm": 2.09420108795166, |
|
"learning_rate": 7.0817451950369e-06, |
|
"loss": 1.2727, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.1758027894907557, |
|
"grad_norm": 1.4437772035598755, |
|
"learning_rate": 7.06147108912497e-06, |
|
"loss": 1.2535, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.1839117742458645, |
|
"grad_norm": 2.6284403800964355, |
|
"learning_rate": 7.041196983213041e-06, |
|
"loss": 1.1969, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.192020759000973, |
|
"grad_norm": 1.897250771522522, |
|
"learning_rate": 7.020922877301112e-06, |
|
"loss": 1.2579, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.2001297437560818, |
|
"grad_norm": 1.591044545173645, |
|
"learning_rate": 7.000648771389182e-06, |
|
"loss": 1.2553, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.2082387285111904, |
|
"grad_norm": 2.323927402496338, |
|
"learning_rate": 6.980374665477253e-06, |
|
"loss": 1.2802, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.216347713266299, |
|
"grad_norm": 2.145848035812378, |
|
"learning_rate": 6.960100559565324e-06, |
|
"loss": 1.3271, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.2244566980214078, |
|
"grad_norm": 1.1199519634246826, |
|
"learning_rate": 6.939826453653395e-06, |
|
"loss": 1.2948, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.2325656827765163, |
|
"grad_norm": 1.5974798202514648, |
|
"learning_rate": 6.919552347741465e-06, |
|
"loss": 1.2925, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.240674667531625, |
|
"grad_norm": 1.1883361339569092, |
|
"learning_rate": 6.899278241829535e-06, |
|
"loss": 1.2288, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.2487836522867337, |
|
"grad_norm": 2.2361881732940674, |
|
"learning_rate": 6.879004135917606e-06, |
|
"loss": 1.2732, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.2568926370418423, |
|
"grad_norm": 0.9524820446968079, |
|
"learning_rate": 6.858730030005677e-06, |
|
"loss": 1.3427, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.265001621796951, |
|
"grad_norm": 2.439042091369629, |
|
"learning_rate": 6.8384559240937475e-06, |
|
"loss": 1.2915, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.2731106065520597, |
|
"grad_norm": 2.7487218379974365, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 1.3025, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.2812195913071682, |
|
"grad_norm": 1.3138405084609985, |
|
"learning_rate": 6.79790771226989e-06, |
|
"loss": 1.2678, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.289328576062277, |
|
"grad_norm": 2.2130439281463623, |
|
"learning_rate": 6.777633606357961e-06, |
|
"loss": 1.2955, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.2974375608173856, |
|
"grad_norm": 2.6008100509643555, |
|
"learning_rate": 6.757359500446031e-06, |
|
"loss": 1.2413, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.3055465455724944, |
|
"grad_norm": 2.495473861694336, |
|
"learning_rate": 6.737085394534102e-06, |
|
"loss": 1.3061, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.313655530327603, |
|
"grad_norm": 0.8150402307510376, |
|
"learning_rate": 6.716811288622173e-06, |
|
"loss": 1.3055, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.3217645150827115, |
|
"grad_norm": 1.7135632038116455, |
|
"learning_rate": 6.696537182710244e-06, |
|
"loss": 1.2723, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.3298734998378203, |
|
"grad_norm": 2.367229700088501, |
|
"learning_rate": 6.6762630767983135e-06, |
|
"loss": 1.2817, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.337982484592929, |
|
"grad_norm": 1.9759521484375, |
|
"learning_rate": 6.655988970886384e-06, |
|
"loss": 1.2453, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.3460914693480377, |
|
"grad_norm": 1.2894783020019531, |
|
"learning_rate": 6.635714864974455e-06, |
|
"loss": 1.338, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.3542004541031463, |
|
"grad_norm": 0.24668912589550018, |
|
"learning_rate": 6.615440759062526e-06, |
|
"loss": 1.255, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.3623094388582548, |
|
"grad_norm": 2.6564314365386963, |
|
"learning_rate": 6.5951666531505966e-06, |
|
"loss": 1.1793, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.3704184236133636, |
|
"grad_norm": 1.1451148986816406, |
|
"learning_rate": 6.574892547238667e-06, |
|
"loss": 1.2857, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.3785274083684722, |
|
"grad_norm": 2.693976879119873, |
|
"learning_rate": 6.554618441326738e-06, |
|
"loss": 1.2497, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.386636393123581, |
|
"grad_norm": 2.3329060077667236, |
|
"learning_rate": 6.534344335414808e-06, |
|
"loss": 1.2665, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.3947453778786896, |
|
"grad_norm": 2.251298427581787, |
|
"learning_rate": 6.514070229502879e-06, |
|
"loss": 1.33, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.4028543626337981, |
|
"grad_norm": 3.0650289058685303, |
|
"learning_rate": 6.4937961235909495e-06, |
|
"loss": 1.2996, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.410963347388907, |
|
"grad_norm": 2.1476645469665527, |
|
"learning_rate": 6.47352201767902e-06, |
|
"loss": 1.3232, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.4190723321440155, |
|
"grad_norm": 1.236338496208191, |
|
"learning_rate": 6.453247911767092e-06, |
|
"loss": 1.2547, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.4271813168991243, |
|
"grad_norm": 2.276726722717285, |
|
"learning_rate": 6.432973805855163e-06, |
|
"loss": 1.2067, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.4352903016542329, |
|
"grad_norm": 1.871100664138794, |
|
"learning_rate": 6.412699699943233e-06, |
|
"loss": 1.238, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.4433992864093415, |
|
"grad_norm": 1.9131008386611938, |
|
"learning_rate": 6.392425594031304e-06, |
|
"loss": 1.2436, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.4515082711644502, |
|
"grad_norm": 1.6729109287261963, |
|
"learning_rate": 6.372151488119375e-06, |
|
"loss": 1.2732, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.4596172559195588, |
|
"grad_norm": 1.4220985174179077, |
|
"learning_rate": 6.351877382207446e-06, |
|
"loss": 1.3043, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4677262406746676, |
|
"grad_norm": 1.5014866590499878, |
|
"learning_rate": 6.331603276295516e-06, |
|
"loss": 1.2501, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.4758352254297762, |
|
"grad_norm": 1.6406453847885132, |
|
"learning_rate": 6.311329170383586e-06, |
|
"loss": 1.2457, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.4839442101848848, |
|
"grad_norm": 2.01636004447937, |
|
"learning_rate": 6.291055064471657e-06, |
|
"loss": 1.1894, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.4920531949399936, |
|
"grad_norm": 1.5200086832046509, |
|
"learning_rate": 6.270780958559728e-06, |
|
"loss": 1.1833, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.5001621796951021, |
|
"grad_norm": 1.9229196310043335, |
|
"learning_rate": 6.2505068526477985e-06, |
|
"loss": 1.2219, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.508271164450211, |
|
"grad_norm": 2.19616961479187, |
|
"learning_rate": 6.230232746735869e-06, |
|
"loss": 1.2788, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.5163801492053195, |
|
"grad_norm": 1.4525929689407349, |
|
"learning_rate": 6.20995864082394e-06, |
|
"loss": 1.2963, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.524489133960428, |
|
"grad_norm": 2.227957248687744, |
|
"learning_rate": 6.189684534912011e-06, |
|
"loss": 1.324, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.5325981187155369, |
|
"grad_norm": 1.158480167388916, |
|
"learning_rate": 6.1694104290000815e-06, |
|
"loss": 1.2185, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.5407071034706454, |
|
"grad_norm": 1.3199654817581177, |
|
"learning_rate": 6.1491363230881514e-06, |
|
"loss": 1.2714, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.5488160882257542, |
|
"grad_norm": 2.110074043273926, |
|
"learning_rate": 6.128862217176224e-06, |
|
"loss": 1.256, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.5569250729808628, |
|
"grad_norm": 1.322704553604126, |
|
"learning_rate": 6.108588111264294e-06, |
|
"loss": 1.1785, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.5650340577359714, |
|
"grad_norm": 2.572434425354004, |
|
"learning_rate": 6.0883140053523645e-06, |
|
"loss": 1.2349, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.5731430424910802, |
|
"grad_norm": 2.033780574798584, |
|
"learning_rate": 6.068039899440435e-06, |
|
"loss": 1.2203, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.5812520272461887, |
|
"grad_norm": 1.9049453735351562, |
|
"learning_rate": 6.047765793528506e-06, |
|
"loss": 1.2043, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.5893610120012975, |
|
"grad_norm": 2.882568597793579, |
|
"learning_rate": 6.027491687616577e-06, |
|
"loss": 1.217, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.597469996756406, |
|
"grad_norm": 2.430227279663086, |
|
"learning_rate": 6.0072175817046475e-06, |
|
"loss": 1.2795, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.6055789815115147, |
|
"grad_norm": 1.9852076768875122, |
|
"learning_rate": 5.986943475792718e-06, |
|
"loss": 1.2434, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.6136879662666233, |
|
"grad_norm": 2.1857733726501465, |
|
"learning_rate": 5.966669369880789e-06, |
|
"loss": 1.2756, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.621796951021732, |
|
"grad_norm": 2.7323496341705322, |
|
"learning_rate": 5.946395263968859e-06, |
|
"loss": 1.2921, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.6299059357768408, |
|
"grad_norm": 2.2632415294647217, |
|
"learning_rate": 5.92612115805693e-06, |
|
"loss": 1.2763, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.6380149205319494, |
|
"grad_norm": 2.0098931789398193, |
|
"learning_rate": 5.9058470521450005e-06, |
|
"loss": 1.292, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.646123905287058, |
|
"grad_norm": 2.09377384185791, |
|
"learning_rate": 5.885572946233071e-06, |
|
"loss": 1.2738, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.6542328900421666, |
|
"grad_norm": 2.292084217071533, |
|
"learning_rate": 5.865298840321142e-06, |
|
"loss": 1.2726, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.6623418747972754, |
|
"grad_norm": 2.7795863151550293, |
|
"learning_rate": 5.845024734409213e-06, |
|
"loss": 1.2523, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.6704508595523841, |
|
"grad_norm": 3.0921523571014404, |
|
"learning_rate": 5.8247506284972835e-06, |
|
"loss": 1.2684, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.6785598443074927, |
|
"grad_norm": 1.036535382270813, |
|
"learning_rate": 5.804476522585355e-06, |
|
"loss": 1.2404, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.6866688290626013, |
|
"grad_norm": 1.3650333881378174, |
|
"learning_rate": 5.784202416673426e-06, |
|
"loss": 1.247, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.6947778138177099, |
|
"grad_norm": 2.558058977127075, |
|
"learning_rate": 5.763928310761497e-06, |
|
"loss": 1.2938, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.7028867985728187, |
|
"grad_norm": 2.6575851440429688, |
|
"learning_rate": 5.743654204849567e-06, |
|
"loss": 1.2974, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.7109957833279275, |
|
"grad_norm": 1.657400369644165, |
|
"learning_rate": 5.723380098937637e-06, |
|
"loss": 1.3268, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.719104768083036, |
|
"grad_norm": 2.027801275253296, |
|
"learning_rate": 5.703105993025708e-06, |
|
"loss": 1.3029, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.7272137528381446, |
|
"grad_norm": 1.3793126344680786, |
|
"learning_rate": 5.682831887113779e-06, |
|
"loss": 1.1812, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.7353227375932532, |
|
"grad_norm": 1.5861361026763916, |
|
"learning_rate": 5.6625577812018495e-06, |
|
"loss": 1.2871, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.743431722348362, |
|
"grad_norm": 2.3719797134399414, |
|
"learning_rate": 5.64228367528992e-06, |
|
"loss": 1.1947, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.7515407071034708, |
|
"grad_norm": 2.675689697265625, |
|
"learning_rate": 5.622009569377991e-06, |
|
"loss": 1.2902, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.7596496918585793, |
|
"grad_norm": 2.018069267272949, |
|
"learning_rate": 5.601735463466062e-06, |
|
"loss": 1.1827, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.767758676613688, |
|
"grad_norm": 1.1704685688018799, |
|
"learning_rate": 5.5814613575541325e-06, |
|
"loss": 1.1498, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.7758676613687965, |
|
"grad_norm": 2.0111939907073975, |
|
"learning_rate": 5.561187251642202e-06, |
|
"loss": 1.2358, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.7839766461239053, |
|
"grad_norm": 3.043297529220581, |
|
"learning_rate": 5.540913145730273e-06, |
|
"loss": 1.26, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.792085630879014, |
|
"grad_norm": 3.0858490467071533, |
|
"learning_rate": 5.520639039818344e-06, |
|
"loss": 1.2519, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.8001946156341226, |
|
"grad_norm": 2.669131278991699, |
|
"learning_rate": 5.500364933906415e-06, |
|
"loss": 1.3174, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.8083036003892312, |
|
"grad_norm": 1.4813052415847778, |
|
"learning_rate": 5.480090827994486e-06, |
|
"loss": 1.2528, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.8164125851443398, |
|
"grad_norm": 2.2234039306640625, |
|
"learning_rate": 5.459816722082557e-06, |
|
"loss": 1.2984, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.8245215698994486, |
|
"grad_norm": 3.4711413383483887, |
|
"learning_rate": 5.439542616170628e-06, |
|
"loss": 1.2174, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.8326305546545574, |
|
"grad_norm": 1.9171231985092163, |
|
"learning_rate": 5.4192685102586985e-06, |
|
"loss": 1.2657, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.840739539409666, |
|
"grad_norm": 1.7139352560043335, |
|
"learning_rate": 5.398994404346769e-06, |
|
"loss": 1.274, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.8488485241647745, |
|
"grad_norm": 2.9915032386779785, |
|
"learning_rate": 5.37872029843484e-06, |
|
"loss": 1.1745, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.856957508919883, |
|
"grad_norm": 3.0832788944244385, |
|
"learning_rate": 5.35844619252291e-06, |
|
"loss": 1.3089, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.8650664936749919, |
|
"grad_norm": 2.364255905151367, |
|
"learning_rate": 5.338172086610981e-06, |
|
"loss": 1.2146, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.8731754784301007, |
|
"grad_norm": 3.9903273582458496, |
|
"learning_rate": 5.3178979806990514e-06, |
|
"loss": 1.3103, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 1.8812844631852093, |
|
"grad_norm": 3.4954657554626465, |
|
"learning_rate": 5.297623874787122e-06, |
|
"loss": 1.1682, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.8893934479403178, |
|
"grad_norm": 1.927339792251587, |
|
"learning_rate": 5.277349768875193e-06, |
|
"loss": 1.224, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 1.8975024326954264, |
|
"grad_norm": 1.446385145187378, |
|
"learning_rate": 5.257075662963264e-06, |
|
"loss": 1.3907, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.9056114174505352, |
|
"grad_norm": 2.112168312072754, |
|
"learning_rate": 5.2368015570513345e-06, |
|
"loss": 1.2041, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.913720402205644, |
|
"grad_norm": 2.601565361022949, |
|
"learning_rate": 5.216527451139405e-06, |
|
"loss": 1.2827, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.9218293869607526, |
|
"grad_norm": 1.9448506832122803, |
|
"learning_rate": 5.196253345227475e-06, |
|
"loss": 1.1899, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.9299383717158611, |
|
"grad_norm": 1.7583825588226318, |
|
"learning_rate": 5.175979239315546e-06, |
|
"loss": 1.2681, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.9380473564709697, |
|
"grad_norm": 1.319887399673462, |
|
"learning_rate": 5.155705133403617e-06, |
|
"loss": 1.2311, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.9461563412260785, |
|
"grad_norm": 1.5955997705459595, |
|
"learning_rate": 5.135431027491688e-06, |
|
"loss": 1.2043, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.9542653259811873, |
|
"grad_norm": 2.112924337387085, |
|
"learning_rate": 5.115156921579759e-06, |
|
"loss": 1.21, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.9623743107362959, |
|
"grad_norm": 2.8387506008148193, |
|
"learning_rate": 5.09488281566783e-06, |
|
"loss": 1.253, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.9704832954914044, |
|
"grad_norm": 2.55635142326355, |
|
"learning_rate": 5.0746087097559005e-06, |
|
"loss": 1.2277, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.978592280246513, |
|
"grad_norm": 2.2216525077819824, |
|
"learning_rate": 5.054334603843971e-06, |
|
"loss": 1.1755, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.9867012650016218, |
|
"grad_norm": 1.1123380661010742, |
|
"learning_rate": 5.034060497932042e-06, |
|
"loss": 1.229, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.9948102497567306, |
|
"grad_norm": 2.5723652839660645, |
|
"learning_rate": 5.013786392020113e-06, |
|
"loss": 1.2028, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.002919234511839, |
|
"grad_norm": 2.5151309967041016, |
|
"learning_rate": 4.993512286108183e-06, |
|
"loss": 1.2314, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.0110282192669477, |
|
"grad_norm": 1.5693440437316895, |
|
"learning_rate": 4.973238180196253e-06, |
|
"loss": 1.2771, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.0191372040220563, |
|
"grad_norm": 4.562756538391113, |
|
"learning_rate": 4.952964074284324e-06, |
|
"loss": 1.2614, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.027246188777165, |
|
"grad_norm": 0.9000415802001953, |
|
"learning_rate": 4.932689968372395e-06, |
|
"loss": 1.2383, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.035355173532274, |
|
"grad_norm": 1.2384685277938843, |
|
"learning_rate": 4.912415862460466e-06, |
|
"loss": 1.2501, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.0434641582873825, |
|
"grad_norm": 1.771003246307373, |
|
"learning_rate": 4.892141756548537e-06, |
|
"loss": 1.2603, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.051573143042491, |
|
"grad_norm": 2.1732709407806396, |
|
"learning_rate": 4.871867650636607e-06, |
|
"loss": 1.1763, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.0596821277975996, |
|
"grad_norm": 1.5497926473617554, |
|
"learning_rate": 4.851593544724678e-06, |
|
"loss": 1.2558, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.067791112552708, |
|
"grad_norm": 1.6834354400634766, |
|
"learning_rate": 4.831319438812749e-06, |
|
"loss": 1.2592, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.075900097307817, |
|
"grad_norm": 1.920637845993042, |
|
"learning_rate": 4.811045332900819e-06, |
|
"loss": 1.279, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.084009082062926, |
|
"grad_norm": 1.851440191268921, |
|
"learning_rate": 4.79077122698889e-06, |
|
"loss": 1.2904, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.0921180668180344, |
|
"grad_norm": 3.262347936630249, |
|
"learning_rate": 4.770497121076961e-06, |
|
"loss": 1.1763, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.100227051573143, |
|
"grad_norm": 1.7690410614013672, |
|
"learning_rate": 4.750223015165032e-06, |
|
"loss": 1.246, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.1083360363282515, |
|
"grad_norm": 2.3499839305877686, |
|
"learning_rate": 4.7299489092531024e-06, |
|
"loss": 1.1969, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.1164450210833605, |
|
"grad_norm": 2.815704822540283, |
|
"learning_rate": 4.709674803341173e-06, |
|
"loss": 1.2299, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.124554005838469, |
|
"grad_norm": 1.6346577405929565, |
|
"learning_rate": 4.689400697429244e-06, |
|
"loss": 1.1941, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.1326629905935777, |
|
"grad_norm": 1.3435848951339722, |
|
"learning_rate": 4.669126591517315e-06, |
|
"loss": 1.2193, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.1407719753486862, |
|
"grad_norm": 2.11517596244812, |
|
"learning_rate": 4.6488524856053854e-06, |
|
"loss": 1.2868, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.148880960103795, |
|
"grad_norm": 1.5125885009765625, |
|
"learning_rate": 4.628578379693456e-06, |
|
"loss": 1.2034, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.156989944858904, |
|
"grad_norm": 2.0965402126312256, |
|
"learning_rate": 4.608304273781526e-06, |
|
"loss": 1.2484, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.1650989296140124, |
|
"grad_norm": 1.9137533903121948, |
|
"learning_rate": 4.588030167869597e-06, |
|
"loss": 1.2531, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.173207914369121, |
|
"grad_norm": 2.630784749984741, |
|
"learning_rate": 4.5677560619576685e-06, |
|
"loss": 1.2074, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.1813168991242295, |
|
"grad_norm": 1.4360569715499878, |
|
"learning_rate": 4.547481956045739e-06, |
|
"loss": 1.22, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.189425883879338, |
|
"grad_norm": 2.066938638687134, |
|
"learning_rate": 4.52720785013381e-06, |
|
"loss": 1.1438, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.197534868634447, |
|
"grad_norm": 1.7182413339614868, |
|
"learning_rate": 4.50693374422188e-06, |
|
"loss": 1.2681, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.2056438533895557, |
|
"grad_norm": 1.706874132156372, |
|
"learning_rate": 4.486659638309951e-06, |
|
"loss": 1.1888, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.2137528381446643, |
|
"grad_norm": 1.8634135723114014, |
|
"learning_rate": 4.466385532398021e-06, |
|
"loss": 1.2953, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.221861822899773, |
|
"grad_norm": 1.77712881565094, |
|
"learning_rate": 4.446111426486092e-06, |
|
"loss": 1.2461, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.2299708076548814, |
|
"grad_norm": 1.9737837314605713, |
|
"learning_rate": 4.425837320574163e-06, |
|
"loss": 1.2856, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.2380797924099904, |
|
"grad_norm": 2.7296142578125, |
|
"learning_rate": 4.405563214662234e-06, |
|
"loss": 1.2124, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.246188777165099, |
|
"grad_norm": 3.5112380981445312, |
|
"learning_rate": 4.385289108750304e-06, |
|
"loss": 1.2143, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.2542977619202076, |
|
"grad_norm": 3.4935994148254395, |
|
"learning_rate": 4.365015002838375e-06, |
|
"loss": 1.3156, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.262406746675316, |
|
"grad_norm": 2.2354025840759277, |
|
"learning_rate": 4.344740896926446e-06, |
|
"loss": 1.3025, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.2705157314304247, |
|
"grad_norm": 2.089087724685669, |
|
"learning_rate": 4.324466791014517e-06, |
|
"loss": 1.2249, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.2786247161855337, |
|
"grad_norm": 2.6738228797912598, |
|
"learning_rate": 4.304192685102587e-06, |
|
"loss": 1.1766, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.2867337009406423, |
|
"grad_norm": 1.6028488874435425, |
|
"learning_rate": 4.283918579190658e-06, |
|
"loss": 1.2806, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.294842685695751, |
|
"grad_norm": 2.742100954055786, |
|
"learning_rate": 4.263644473278729e-06, |
|
"loss": 1.2241, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.3029516704508595, |
|
"grad_norm": 1.2533172369003296, |
|
"learning_rate": 4.2433703673668e-06, |
|
"loss": 1.2563, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.311060655205968, |
|
"grad_norm": 2.783311128616333, |
|
"learning_rate": 4.22309626145487e-06, |
|
"loss": 1.2382, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.319169639961077, |
|
"grad_norm": 1.9947431087493896, |
|
"learning_rate": 4.202822155542941e-06, |
|
"loss": 1.2533, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.3272786247161856, |
|
"grad_norm": 2.8825254440307617, |
|
"learning_rate": 4.182548049631012e-06, |
|
"loss": 1.2332, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.335387609471294, |
|
"grad_norm": 1.9132847785949707, |
|
"learning_rate": 4.162273943719083e-06, |
|
"loss": 1.225, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.3434965942264028, |
|
"grad_norm": 2.9740896224975586, |
|
"learning_rate": 4.1419998378071526e-06, |
|
"loss": 1.302, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.3516055789815113, |
|
"grad_norm": 2.5434772968292236, |
|
"learning_rate": 4.121725731895223e-06, |
|
"loss": 1.2857, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.3597145637366204, |
|
"grad_norm": 0.8146458864212036, |
|
"learning_rate": 4.101451625983294e-06, |
|
"loss": 1.1667, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.367823548491729, |
|
"grad_norm": 2.3713395595550537, |
|
"learning_rate": 4.081177520071365e-06, |
|
"loss": 1.1799, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.3759325332468375, |
|
"grad_norm": 2.5800139904022217, |
|
"learning_rate": 4.0609034141594364e-06, |
|
"loss": 1.2639, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.384041518001946, |
|
"grad_norm": 8.0242338180542, |
|
"learning_rate": 4.040629308247507e-06, |
|
"loss": 1.2168, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.3921505027570547, |
|
"grad_norm": 0.9941585063934326, |
|
"learning_rate": 4.020355202335577e-06, |
|
"loss": 1.2966, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.4002594875121637, |
|
"grad_norm": 0.7390837669372559, |
|
"learning_rate": 4.000081096423648e-06, |
|
"loss": 1.3237, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.4083684722672722, |
|
"grad_norm": 0.5450477004051208, |
|
"learning_rate": 3.979806990511719e-06, |
|
"loss": 1.284, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.416477457022381, |
|
"grad_norm": 1.7690049409866333, |
|
"learning_rate": 3.959532884599789e-06, |
|
"loss": 1.286, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.4245864417774894, |
|
"grad_norm": 2.655095100402832, |
|
"learning_rate": 3.93925877868786e-06, |
|
"loss": 1.1513, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.432695426532598, |
|
"grad_norm": 1.2846322059631348, |
|
"learning_rate": 3.918984672775931e-06, |
|
"loss": 1.2024, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.440804411287707, |
|
"grad_norm": 6.425904273986816, |
|
"learning_rate": 3.898710566864002e-06, |
|
"loss": 1.2462, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 2.4489133960428155, |
|
"grad_norm": 2.2578928470611572, |
|
"learning_rate": 3.878436460952072e-06, |
|
"loss": 1.2289, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.457022380797924, |
|
"grad_norm": 3.1006276607513428, |
|
"learning_rate": 3.858162355040143e-06, |
|
"loss": 1.2839, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 2.4651313655530327, |
|
"grad_norm": 2.598376512527466, |
|
"learning_rate": 3.837888249128214e-06, |
|
"loss": 1.2496, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.4732403503081413, |
|
"grad_norm": 0.7084365487098694, |
|
"learning_rate": 3.817614143216285e-06, |
|
"loss": 1.2325, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.48134933506325, |
|
"grad_norm": 2.280824661254883, |
|
"learning_rate": 3.797340037304355e-06, |
|
"loss": 1.1541, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.489458319818359, |
|
"grad_norm": 10.274874687194824, |
|
"learning_rate": 3.7770659313924257e-06, |
|
"loss": 1.1489, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 2.4975673045734674, |
|
"grad_norm": 2.2401506900787354, |
|
"learning_rate": 3.7567918254804964e-06, |
|
"loss": 1.1696, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.505676289328576, |
|
"grad_norm": 2.0671870708465576, |
|
"learning_rate": 3.7365177195685676e-06, |
|
"loss": 1.2512, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 2.5137852740836846, |
|
"grad_norm": 2.2912776470184326, |
|
"learning_rate": 3.7162436136566384e-06, |
|
"loss": 1.2278, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.5218942588387936, |
|
"grad_norm": 1.7719197273254395, |
|
"learning_rate": 3.6959695077447087e-06, |
|
"loss": 1.2984, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 2.530003243593902, |
|
"grad_norm": 1.9428201913833618, |
|
"learning_rate": 3.6756954018327795e-06, |
|
"loss": 1.2511, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.5381122283490107, |
|
"grad_norm": 2.804948091506958, |
|
"learning_rate": 3.6554212959208502e-06, |
|
"loss": 1.2599, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 2.5462212131041193, |
|
"grad_norm": 2.306248188018799, |
|
"learning_rate": 3.635147190008921e-06, |
|
"loss": 1.2252, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.554330197859228, |
|
"grad_norm": 2.2706100940704346, |
|
"learning_rate": 3.6148730840969913e-06, |
|
"loss": 1.1874, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.5624391826143365, |
|
"grad_norm": 1.6053426265716553, |
|
"learning_rate": 3.594598978185062e-06, |
|
"loss": 1.1885, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.5705481673694455, |
|
"grad_norm": 2.0183823108673096, |
|
"learning_rate": 3.5743248722731332e-06, |
|
"loss": 1.2176, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 2.578657152124554, |
|
"grad_norm": 1.6887139081954956, |
|
"learning_rate": 3.554050766361204e-06, |
|
"loss": 1.1165, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.5867661368796626, |
|
"grad_norm": 1.6139768362045288, |
|
"learning_rate": 3.5337766604492747e-06, |
|
"loss": 1.2512, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 2.594875121634771, |
|
"grad_norm": 1.026294231414795, |
|
"learning_rate": 3.513502554537345e-06, |
|
"loss": 1.2485, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.60298410638988, |
|
"grad_norm": 2.261563777923584, |
|
"learning_rate": 3.493228448625416e-06, |
|
"loss": 1.2493, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 2.6110930911449888, |
|
"grad_norm": 2.9573357105255127, |
|
"learning_rate": 3.4729543427134866e-06, |
|
"loss": 1.2542, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.6192020759000973, |
|
"grad_norm": 2.836587905883789, |
|
"learning_rate": 3.4526802368015573e-06, |
|
"loss": 1.2571, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 2.627311060655206, |
|
"grad_norm": 5.188553333282471, |
|
"learning_rate": 3.4324061308896276e-06, |
|
"loss": 1.3193, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.6354200454103145, |
|
"grad_norm": 1.9180452823638916, |
|
"learning_rate": 3.412132024977699e-06, |
|
"loss": 1.2382, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.643529030165423, |
|
"grad_norm": 2.1819140911102295, |
|
"learning_rate": 3.3918579190657696e-06, |
|
"loss": 1.2826, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.651638014920532, |
|
"grad_norm": 2.264775037765503, |
|
"learning_rate": 3.3715838131538403e-06, |
|
"loss": 1.2507, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 2.6597469996756407, |
|
"grad_norm": 1.7436145544052124, |
|
"learning_rate": 3.351309707241911e-06, |
|
"loss": 1.2057, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.6678559844307492, |
|
"grad_norm": 2.5168802738189697, |
|
"learning_rate": 3.3310356013299814e-06, |
|
"loss": 1.215, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 2.675964969185858, |
|
"grad_norm": 1.952141284942627, |
|
"learning_rate": 3.310761495418052e-06, |
|
"loss": 1.255, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.684073953940967, |
|
"grad_norm": 1.5202018022537231, |
|
"learning_rate": 3.290487389506123e-06, |
|
"loss": 1.224, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 2.6921829386960754, |
|
"grad_norm": 1.8313968181610107, |
|
"learning_rate": 3.2702132835941937e-06, |
|
"loss": 1.3158, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.700291923451184, |
|
"grad_norm": 2.5669538974761963, |
|
"learning_rate": 3.249939177682265e-06, |
|
"loss": 1.2905, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 2.7084009082062925, |
|
"grad_norm": 2.714341878890991, |
|
"learning_rate": 3.2296650717703356e-06, |
|
"loss": 1.2508, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.716509892961401, |
|
"grad_norm": 2.8722524642944336, |
|
"learning_rate": 3.209390965858406e-06, |
|
"loss": 1.2905, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 2.7246188777165097, |
|
"grad_norm": 2.822148323059082, |
|
"learning_rate": 3.1891168599464767e-06, |
|
"loss": 1.1907, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.7327278624716187, |
|
"grad_norm": 0.6179723143577576, |
|
"learning_rate": 3.1688427540345474e-06, |
|
"loss": 1.276, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 2.7408368472267273, |
|
"grad_norm": 1.800058126449585, |
|
"learning_rate": 3.148568648122618e-06, |
|
"loss": 1.1746, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.748945831981836, |
|
"grad_norm": 2.2209925651550293, |
|
"learning_rate": 3.1282945422106885e-06, |
|
"loss": 1.245, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 2.7570548167369444, |
|
"grad_norm": 2.067692995071411, |
|
"learning_rate": 3.1080204362987593e-06, |
|
"loss": 1.2182, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.7651638014920534, |
|
"grad_norm": 2.8044259548187256, |
|
"learning_rate": 3.0877463303868304e-06, |
|
"loss": 1.1653, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 2.773272786247162, |
|
"grad_norm": 2.077935218811035, |
|
"learning_rate": 3.067472224474901e-06, |
|
"loss": 1.2223, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.7813817710022706, |
|
"grad_norm": 0.14890266954898834, |
|
"learning_rate": 3.047198118562972e-06, |
|
"loss": 1.2318, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 2.789490755757379, |
|
"grad_norm": 1.6987643241882324, |
|
"learning_rate": 3.0269240126510423e-06, |
|
"loss": 1.2607, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.7975997405124877, |
|
"grad_norm": 2.667273759841919, |
|
"learning_rate": 3.006649906739113e-06, |
|
"loss": 1.2486, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 2.8057087252675963, |
|
"grad_norm": 1.8006951808929443, |
|
"learning_rate": 2.9863758008271838e-06, |
|
"loss": 1.2674, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.8138177100227053, |
|
"grad_norm": 3.4938597679138184, |
|
"learning_rate": 2.9661016949152545e-06, |
|
"loss": 1.2782, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 2.821926694777814, |
|
"grad_norm": 3.029115915298462, |
|
"learning_rate": 2.945827589003325e-06, |
|
"loss": 1.2397, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.8300356795329225, |
|
"grad_norm": 1.7525807619094849, |
|
"learning_rate": 2.9255534830913956e-06, |
|
"loss": 1.1576, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 2.838144664288031, |
|
"grad_norm": 2.326188564300537, |
|
"learning_rate": 2.905279377179467e-06, |
|
"loss": 1.2581, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.84625364904314, |
|
"grad_norm": 2.2817983627319336, |
|
"learning_rate": 2.8850052712675375e-06, |
|
"loss": 1.1471, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 2.8543626337982486, |
|
"grad_norm": 1.9472275972366333, |
|
"learning_rate": 2.8647311653556083e-06, |
|
"loss": 1.2784, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.862471618553357, |
|
"grad_norm": 2.6982924938201904, |
|
"learning_rate": 2.8444570594436786e-06, |
|
"loss": 1.181, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 2.8705806033084658, |
|
"grad_norm": 2.204470634460449, |
|
"learning_rate": 2.8241829535317494e-06, |
|
"loss": 1.3165, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.8786895880635743, |
|
"grad_norm": 1.8621634244918823, |
|
"learning_rate": 2.80390884761982e-06, |
|
"loss": 1.1949, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 2.886798572818683, |
|
"grad_norm": 2.0357561111450195, |
|
"learning_rate": 2.783634741707891e-06, |
|
"loss": 1.2433, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.894907557573792, |
|
"grad_norm": 1.873806118965149, |
|
"learning_rate": 2.7633606357959612e-06, |
|
"loss": 1.2737, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 2.9030165423289005, |
|
"grad_norm": 2.299546241760254, |
|
"learning_rate": 2.7430865298840324e-06, |
|
"loss": 1.2296, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.911125527084009, |
|
"grad_norm": 2.267756938934326, |
|
"learning_rate": 2.722812423972103e-06, |
|
"loss": 1.2348, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 2.9192345118391176, |
|
"grad_norm": 3.639319658279419, |
|
"learning_rate": 2.702538318060174e-06, |
|
"loss": 1.2168, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.9273434965942267, |
|
"grad_norm": 2.825929641723633, |
|
"learning_rate": 2.6822642121482447e-06, |
|
"loss": 1.2045, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 2.9354524813493352, |
|
"grad_norm": 1.6339542865753174, |
|
"learning_rate": 2.661990106236315e-06, |
|
"loss": 1.2303, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.943561466104444, |
|
"grad_norm": 1.8570992946624756, |
|
"learning_rate": 2.6417160003243857e-06, |
|
"loss": 1.2268, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 2.9516704508595524, |
|
"grad_norm": 2.2465381622314453, |
|
"learning_rate": 2.6214418944124565e-06, |
|
"loss": 1.1506, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.959779435614661, |
|
"grad_norm": 2.456606149673462, |
|
"learning_rate": 2.6011677885005272e-06, |
|
"loss": 1.2371, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 2.9678884203697695, |
|
"grad_norm": 1.7189408540725708, |
|
"learning_rate": 2.5808936825885984e-06, |
|
"loss": 1.1677, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.975997405124878, |
|
"grad_norm": 1.5492075681686401, |
|
"learning_rate": 2.5606195766766687e-06, |
|
"loss": 1.2146, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 2.984106389879987, |
|
"grad_norm": 2.5447022914886475, |
|
"learning_rate": 2.5403454707647395e-06, |
|
"loss": 1.1574, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.9922153746350957, |
|
"grad_norm": 1.8842716217041016, |
|
"learning_rate": 2.5200713648528103e-06, |
|
"loss": 1.2093, |
|
"step": 18450 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 24664, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.035905534827063e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|