|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002, |
|
"grad_norm": 6.779170989990234, |
|
"learning_rate": 6.666666666666668e-08, |
|
"loss": 1.2436, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004, |
|
"grad_norm": 6.8514909744262695, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"loss": 1.2722, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006, |
|
"grad_norm": 6.65818977355957, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 1.2199, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 6.83758020401001, |
|
"learning_rate": 2.666666666666667e-07, |
|
"loss": 1.2601, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.710981369018555, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 1.279, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012, |
|
"grad_norm": 6.6265668869018555, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 1.2378, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014, |
|
"grad_norm": 6.815594673156738, |
|
"learning_rate": 4.666666666666667e-07, |
|
"loss": 1.2796, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 6.420166969299316, |
|
"learning_rate": 5.333333333333335e-07, |
|
"loss": 1.2348, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018, |
|
"grad_norm": 6.717895030975342, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 1.2454, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.134315490722656, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 1.2112, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022, |
|
"grad_norm": 6.2358598709106445, |
|
"learning_rate": 7.333333333333334e-07, |
|
"loss": 1.2471, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 6.138259410858154, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 1.2422, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.026, |
|
"grad_norm": 4.941827297210693, |
|
"learning_rate": 8.666666666666668e-07, |
|
"loss": 1.2303, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028, |
|
"grad_norm": 4.79518461227417, |
|
"learning_rate": 9.333333333333334e-07, |
|
"loss": 1.2304, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.52637243270874, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.2018, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 4.465169906616211, |
|
"learning_rate": 1.066666666666667e-06, |
|
"loss": 1.2221, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.034, |
|
"grad_norm": 2.7714080810546875, |
|
"learning_rate": 1.1333333333333334e-06, |
|
"loss": 1.139, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.036, |
|
"grad_norm": 2.8010947704315186, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 1.1654, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.038, |
|
"grad_norm": 2.7349588871002197, |
|
"learning_rate": 1.2666666666666669e-06, |
|
"loss": 1.1389, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.573075294494629, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 1.1583, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.042, |
|
"grad_norm": 2.594188928604126, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 1.1342, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.044, |
|
"grad_norm": 2.4305062294006348, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"loss": 1.159, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.046, |
|
"grad_norm": 2.1182713508605957, |
|
"learning_rate": 1.5333333333333334e-06, |
|
"loss": 1.1454, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 3.179542064666748, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 1.1222, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.3224637508392334, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.123, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.052, |
|
"grad_norm": 3.318582773208618, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"loss": 1.1258, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.054, |
|
"grad_norm": 3.051558494567871, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 1.0751, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 2.4479663372039795, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"loss": 1.0648, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.058, |
|
"grad_norm": 2.288482904434204, |
|
"learning_rate": 1.9333333333333336e-06, |
|
"loss": 1.0898, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.026151657104492, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.0935, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.062, |
|
"grad_norm": 1.4403475522994995, |
|
"learning_rate": 2.0666666666666666e-06, |
|
"loss": 1.0833, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 1.496666431427002, |
|
"learning_rate": 2.133333333333334e-06, |
|
"loss": 1.0441, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.066, |
|
"grad_norm": 1.6388070583343506, |
|
"learning_rate": 2.2e-06, |
|
"loss": 1.0262, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.068, |
|
"grad_norm": 1.682080626487732, |
|
"learning_rate": 2.266666666666667e-06, |
|
"loss": 1.0388, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.5605063438415527, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 1.037, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 1.287360429763794, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 1.0476, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.074, |
|
"grad_norm": 1.0461649894714355, |
|
"learning_rate": 2.466666666666667e-06, |
|
"loss": 1.0496, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.076, |
|
"grad_norm": 1.0821465253829956, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"loss": 1.0307, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.078, |
|
"grad_norm": 1.061495065689087, |
|
"learning_rate": 2.6e-06, |
|
"loss": 1.0279, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.0558407306671143, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 1.0201, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.082, |
|
"grad_norm": 0.923545777797699, |
|
"learning_rate": 2.7333333333333336e-06, |
|
"loss": 1.0028, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.084, |
|
"grad_norm": 0.8768528699874878, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 1.0085, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.086, |
|
"grad_norm": 0.891363263130188, |
|
"learning_rate": 2.866666666666667e-06, |
|
"loss": 1.0024, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 0.8647627830505371, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"loss": 0.9924, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.8449153304100037, |
|
"learning_rate": 3e-06, |
|
"loss": 1.0107, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.092, |
|
"grad_norm": 0.7374930381774902, |
|
"learning_rate": 3.066666666666667e-06, |
|
"loss": 0.9944, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.094, |
|
"grad_norm": 0.6910094022750854, |
|
"learning_rate": 3.133333333333334e-06, |
|
"loss": 0.9756, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 0.7899953722953796, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.9564, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.098, |
|
"grad_norm": 0.7347140312194824, |
|
"learning_rate": 3.266666666666667e-06, |
|
"loss": 0.9787, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.71909499168396, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.9744, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.102, |
|
"grad_norm": 0.704480767250061, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.9686, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 0.6947300434112549, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"loss": 0.9644, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.106, |
|
"grad_norm": 0.66453617811203, |
|
"learning_rate": 3.5333333333333335e-06, |
|
"loss": 0.9486, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.108, |
|
"grad_norm": 0.6524918675422668, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.9473, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.7257620096206665, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.9851, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 0.6214912533760071, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"loss": 0.9226, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.114, |
|
"grad_norm": 0.6151769161224365, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.9252, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.116, |
|
"grad_norm": 0.6252180337905884, |
|
"learning_rate": 3.866666666666667e-06, |
|
"loss": 0.9495, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.118, |
|
"grad_norm": 0.6552236080169678, |
|
"learning_rate": 3.9333333333333335e-06, |
|
"loss": 0.9693, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.648305356502533, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9473, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.122, |
|
"grad_norm": 0.6331591010093689, |
|
"learning_rate": 4.066666666666667e-06, |
|
"loss": 0.9563, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.124, |
|
"grad_norm": 0.6448110342025757, |
|
"learning_rate": 4.133333333333333e-06, |
|
"loss": 0.9355, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.126, |
|
"grad_norm": 0.6444178223609924, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.9706, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.6279839873313904, |
|
"learning_rate": 4.266666666666668e-06, |
|
"loss": 0.955, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6672361493110657, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.9396, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.132, |
|
"grad_norm": 0.6860214471817017, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.9023, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.134, |
|
"grad_norm": 0.5856081247329712, |
|
"learning_rate": 4.4666666666666665e-06, |
|
"loss": 0.914, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 0.5969718098640442, |
|
"learning_rate": 4.533333333333334e-06, |
|
"loss": 0.9292, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.138, |
|
"grad_norm": 0.6717826128005981, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.9057, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6435735821723938, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.9358, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.142, |
|
"grad_norm": 0.6428582668304443, |
|
"learning_rate": 4.7333333333333335e-06, |
|
"loss": 0.9357, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 0.6402033567428589, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.9601, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.146, |
|
"grad_norm": 0.5693813562393188, |
|
"learning_rate": 4.866666666666667e-06, |
|
"loss": 0.9408, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.148, |
|
"grad_norm": 0.604894757270813, |
|
"learning_rate": 4.933333333333334e-06, |
|
"loss": 0.9143, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.5587258338928223, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9461, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 0.5895867347717285, |
|
"learning_rate": 5.0666666666666676e-06, |
|
"loss": 0.9324, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.154, |
|
"grad_norm": 0.6396773457527161, |
|
"learning_rate": 5.133333333333334e-06, |
|
"loss": 0.9284, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.156, |
|
"grad_norm": 0.6042245030403137, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.9189, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.158, |
|
"grad_norm": 0.7052513957023621, |
|
"learning_rate": 5.2666666666666665e-06, |
|
"loss": 0.8917, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.5608431696891785, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.9101, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.162, |
|
"grad_norm": 0.6625503301620483, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.9442, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.164, |
|
"grad_norm": 0.7061005234718323, |
|
"learning_rate": 5.466666666666667e-06, |
|
"loss": 0.9173, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.166, |
|
"grad_norm": 0.6100620627403259, |
|
"learning_rate": 5.533333333333334e-06, |
|
"loss": 0.8942, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 0.5722076296806335, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.9025, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.5684266686439514, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.9191, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.172, |
|
"grad_norm": 0.5633739233016968, |
|
"learning_rate": 5.733333333333334e-06, |
|
"loss": 0.9195, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.174, |
|
"grad_norm": 0.5535778999328613, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.8992, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 0.5984314680099487, |
|
"learning_rate": 5.8666666666666675e-06, |
|
"loss": 0.9046, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.178, |
|
"grad_norm": 0.5641950964927673, |
|
"learning_rate": 5.933333333333335e-06, |
|
"loss": 0.9309, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.6352724432945251, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9449, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.182, |
|
"grad_norm": 0.655491054058075, |
|
"learning_rate": 6.066666666666667e-06, |
|
"loss": 0.959, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 0.5627034902572632, |
|
"learning_rate": 6.133333333333334e-06, |
|
"loss": 0.9479, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.186, |
|
"grad_norm": 0.7245451807975769, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 0.8791, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.188, |
|
"grad_norm": 0.6565855145454407, |
|
"learning_rate": 6.266666666666668e-06, |
|
"loss": 0.9234, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.5744818449020386, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.9209, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.7991737127304077, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.931, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.194, |
|
"grad_norm": 0.542374312877655, |
|
"learning_rate": 6.466666666666667e-06, |
|
"loss": 0.9381, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.196, |
|
"grad_norm": 0.6645175814628601, |
|
"learning_rate": 6.533333333333334e-06, |
|
"loss": 0.9074, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.198, |
|
"grad_norm": 0.6946215033531189, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.9359, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.6256315112113953, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.9097, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.202, |
|
"grad_norm": 0.6502851843833923, |
|
"learning_rate": 6.733333333333334e-06, |
|
"loss": 0.9355, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.204, |
|
"grad_norm": 0.6452805399894714, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.8883, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.206, |
|
"grad_norm": 0.6026374697685242, |
|
"learning_rate": 6.866666666666667e-06, |
|
"loss": 0.9054, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 0.5819752812385559, |
|
"learning_rate": 6.9333333333333344e-06, |
|
"loss": 0.9196, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.74315345287323, |
|
"learning_rate": 7e-06, |
|
"loss": 0.9175, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.212, |
|
"grad_norm": 0.6989079713821411, |
|
"learning_rate": 7.066666666666667e-06, |
|
"loss": 0.9186, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.214, |
|
"grad_norm": 0.6356122493743896, |
|
"learning_rate": 7.133333333333334e-06, |
|
"loss": 0.9102, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 0.7254390120506287, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.9181, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.218, |
|
"grad_norm": 0.6062964200973511, |
|
"learning_rate": 7.266666666666668e-06, |
|
"loss": 0.8487, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.6720328330993652, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.8934, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.222, |
|
"grad_norm": 0.6350739002227783, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.927, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 0.5598141551017761, |
|
"learning_rate": 7.4666666666666675e-06, |
|
"loss": 0.9275, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.226, |
|
"grad_norm": 0.6055253744125366, |
|
"learning_rate": 7.533333333333334e-06, |
|
"loss": 0.9078, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.228, |
|
"grad_norm": 0.5682628154754639, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.9462, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.5697638988494873, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.9008, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 0.6082512736320496, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 0.8878, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.234, |
|
"grad_norm": 0.6491492390632629, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 0.9021, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.236, |
|
"grad_norm": 0.5754338502883911, |
|
"learning_rate": 7.866666666666667e-06, |
|
"loss": 0.9012, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.238, |
|
"grad_norm": 0.6297829151153564, |
|
"learning_rate": 7.933333333333334e-06, |
|
"loss": 0.936, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.6097427606582642, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.8952, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.242, |
|
"grad_norm": 0.5730277895927429, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.9103, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.244, |
|
"grad_norm": 0.48332902789115906, |
|
"learning_rate": 8.133333333333334e-06, |
|
"loss": 0.9291, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.246, |
|
"grad_norm": 0.554993212223053, |
|
"learning_rate": 8.2e-06, |
|
"loss": 0.905, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 0.5835518836975098, |
|
"learning_rate": 8.266666666666667e-06, |
|
"loss": 0.8933, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.5752037763595581, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.905, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.252, |
|
"grad_norm": 0.5807712078094482, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.8634, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.254, |
|
"grad_norm": 0.6821209788322449, |
|
"learning_rate": 8.466666666666668e-06, |
|
"loss": 0.8879, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 0.5632617473602295, |
|
"learning_rate": 8.533333333333335e-06, |
|
"loss": 0.8993, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.258, |
|
"grad_norm": 0.58171546459198, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.8596, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.6329283714294434, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.8848, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.262, |
|
"grad_norm": 0.5913123488426208, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 0.8715, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 0.6618683338165283, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.898, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.266, |
|
"grad_norm": 0.6601865887641907, |
|
"learning_rate": 8.866666666666668e-06, |
|
"loss": 0.8785, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.268, |
|
"grad_norm": 0.715740442276001, |
|
"learning_rate": 8.933333333333333e-06, |
|
"loss": 0.8904, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.7350261211395264, |
|
"learning_rate": 9e-06, |
|
"loss": 0.9241, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 0.6438612341880798, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 0.8931, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.274, |
|
"grad_norm": 0.8216118812561035, |
|
"learning_rate": 9.133333333333335e-06, |
|
"loss": 0.8773, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.276, |
|
"grad_norm": 0.6327617764472961, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.8808, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.278, |
|
"grad_norm": 0.6212260723114014, |
|
"learning_rate": 9.266666666666667e-06, |
|
"loss": 0.8811, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.627037525177002, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.9266, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.282, |
|
"grad_norm": 0.624682605266571, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.9108, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.284, |
|
"grad_norm": 0.6022452712059021, |
|
"learning_rate": 9.466666666666667e-06, |
|
"loss": 0.9115, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.286, |
|
"grad_norm": 0.5518873333930969, |
|
"learning_rate": 9.533333333333334e-06, |
|
"loss": 0.8877, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 0.7182208299636841, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.8841, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.6279104351997375, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.8631, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.292, |
|
"grad_norm": 0.5953511595726013, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 0.8906, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.294, |
|
"grad_norm": 0.9255452752113342, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.8899, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 0.6811776161193848, |
|
"learning_rate": 9.866666666666668e-06, |
|
"loss": 0.9043, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.298, |
|
"grad_norm": 0.8268923759460449, |
|
"learning_rate": 9.933333333333334e-06, |
|
"loss": 0.8981, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.8442743420600891, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8739, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.302, |
|
"grad_norm": 0.8098756670951843, |
|
"learning_rate": 9.99998646145412e-06, |
|
"loss": 0.8607, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 0.8181275129318237, |
|
"learning_rate": 9.999945845889795e-06, |
|
"loss": 0.8963, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.306, |
|
"grad_norm": 0.6322422623634338, |
|
"learning_rate": 9.999878153526974e-06, |
|
"loss": 0.8965, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.308, |
|
"grad_norm": 0.8551559448242188, |
|
"learning_rate": 9.999783384732242e-06, |
|
"loss": 0.902, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.6801779270172119, |
|
"learning_rate": 9.999661540018812e-06, |
|
"loss": 0.8598, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 0.7247231006622314, |
|
"learning_rate": 9.999512620046523e-06, |
|
"loss": 0.9204, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.314, |
|
"grad_norm": 0.8099005818367004, |
|
"learning_rate": 9.999336625621836e-06, |
|
"loss": 0.8934, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.316, |
|
"grad_norm": 0.6449592113494873, |
|
"learning_rate": 9.99913355769784e-06, |
|
"loss": 0.8633, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.318, |
|
"grad_norm": 0.7183147668838501, |
|
"learning_rate": 9.998903417374228e-06, |
|
"loss": 0.894, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.6774334907531738, |
|
"learning_rate": 9.99864620589731e-06, |
|
"loss": 0.9034, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.322, |
|
"grad_norm": 0.6847894787788391, |
|
"learning_rate": 9.998361924659989e-06, |
|
"loss": 0.9033, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.324, |
|
"grad_norm": 0.6603731513023376, |
|
"learning_rate": 9.998050575201772e-06, |
|
"loss": 0.8633, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.326, |
|
"grad_norm": 0.6944355368614197, |
|
"learning_rate": 9.997712159208745e-06, |
|
"loss": 0.8819, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 0.6257456541061401, |
|
"learning_rate": 9.99734667851357e-06, |
|
"loss": 0.8876, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.702239453792572, |
|
"learning_rate": 9.99695413509548e-06, |
|
"loss": 0.8782, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.332, |
|
"grad_norm": 0.5888884663581848, |
|
"learning_rate": 9.99653453108026e-06, |
|
"loss": 0.8554, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.334, |
|
"grad_norm": 0.6543298959732056, |
|
"learning_rate": 9.996087868740244e-06, |
|
"loss": 0.8662, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 0.8132950663566589, |
|
"learning_rate": 9.995614150494293e-06, |
|
"loss": 0.9093, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.338, |
|
"grad_norm": 0.5895329117774963, |
|
"learning_rate": 9.995113378907791e-06, |
|
"loss": 0.8916, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.8918687701225281, |
|
"learning_rate": 9.994585556692624e-06, |
|
"loss": 0.8586, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.342, |
|
"grad_norm": 0.8333307504653931, |
|
"learning_rate": 9.994030686707171e-06, |
|
"loss": 0.8919, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 0.737712025642395, |
|
"learning_rate": 9.993448771956285e-06, |
|
"loss": 0.9085, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.346, |
|
"grad_norm": 0.7404334545135498, |
|
"learning_rate": 9.99283981559128e-06, |
|
"loss": 0.8941, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.348, |
|
"grad_norm": 0.7040583491325378, |
|
"learning_rate": 9.992203820909906e-06, |
|
"loss": 0.8901, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.7103179097175598, |
|
"learning_rate": 9.991540791356342e-06, |
|
"loss": 0.8806, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 0.7652331590652466, |
|
"learning_rate": 9.99085073052117e-06, |
|
"loss": 0.8689, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.354, |
|
"grad_norm": 0.6264358758926392, |
|
"learning_rate": 9.990133642141359e-06, |
|
"loss": 0.9046, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.356, |
|
"grad_norm": 0.8190493583679199, |
|
"learning_rate": 9.989389530100242e-06, |
|
"loss": 0.9127, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.358, |
|
"grad_norm": 0.6480888724327087, |
|
"learning_rate": 9.988618398427495e-06, |
|
"loss": 0.8533, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.7520120739936829, |
|
"learning_rate": 9.987820251299121e-06, |
|
"loss": 0.8705, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.362, |
|
"grad_norm": 0.8041049242019653, |
|
"learning_rate": 9.986995093037422e-06, |
|
"loss": 0.8946, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.364, |
|
"grad_norm": 0.6484959125518799, |
|
"learning_rate": 9.986142928110972e-06, |
|
"loss": 0.8988, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.366, |
|
"grad_norm": 0.7548107504844666, |
|
"learning_rate": 9.985263761134602e-06, |
|
"loss": 0.875, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 0.6038651466369629, |
|
"learning_rate": 9.984357596869369e-06, |
|
"loss": 0.8696, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.761684000492096, |
|
"learning_rate": 9.98342444022253e-06, |
|
"loss": 0.9005, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.372, |
|
"grad_norm": 0.7544848322868347, |
|
"learning_rate": 9.982464296247523e-06, |
|
"loss": 0.8743, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.374, |
|
"grad_norm": 0.6661540865898132, |
|
"learning_rate": 9.981477170143924e-06, |
|
"loss": 0.8793, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 0.6984407305717468, |
|
"learning_rate": 9.980463067257437e-06, |
|
"loss": 0.8982, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.378, |
|
"grad_norm": 0.7096168994903564, |
|
"learning_rate": 9.979421993079853e-06, |
|
"loss": 0.8327, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.7101868391036987, |
|
"learning_rate": 9.978353953249023e-06, |
|
"loss": 0.8653, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.382, |
|
"grad_norm": 0.6241440773010254, |
|
"learning_rate": 9.977258953548831e-06, |
|
"loss": 0.8479, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.8079991936683655, |
|
"learning_rate": 9.976136999909156e-06, |
|
"loss": 0.8801, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.386, |
|
"grad_norm": 0.8028410077095032, |
|
"learning_rate": 9.97498809840585e-06, |
|
"loss": 0.8703, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.388, |
|
"grad_norm": 0.8198662400245667, |
|
"learning_rate": 9.973812255260692e-06, |
|
"loss": 0.8566, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.8669592142105103, |
|
"learning_rate": 9.972609476841368e-06, |
|
"loss": 0.9185, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 0.8332952857017517, |
|
"learning_rate": 9.971379769661422e-06, |
|
"loss": 0.878, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.394, |
|
"grad_norm": 0.7698492407798767, |
|
"learning_rate": 9.970123140380237e-06, |
|
"loss": 0.91, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.396, |
|
"grad_norm": 0.8548876047134399, |
|
"learning_rate": 9.968839595802982e-06, |
|
"loss": 0.8952, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.398, |
|
"grad_norm": 0.6185474991798401, |
|
"learning_rate": 9.967529142880592e-06, |
|
"loss": 0.9048, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.8127085566520691, |
|
"learning_rate": 9.966191788709716e-06, |
|
"loss": 0.9067, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.402, |
|
"grad_norm": 0.6378841400146484, |
|
"learning_rate": 9.964827540532685e-06, |
|
"loss": 0.8992, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.404, |
|
"grad_norm": 0.8299638032913208, |
|
"learning_rate": 9.963436405737476e-06, |
|
"loss": 0.8849, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.406, |
|
"grad_norm": 0.8502849340438843, |
|
"learning_rate": 9.962018391857665e-06, |
|
"loss": 0.88, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 0.8172805905342102, |
|
"learning_rate": 9.960573506572391e-06, |
|
"loss": 0.9101, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.7766000628471375, |
|
"learning_rate": 9.959101757706308e-06, |
|
"loss": 0.9005, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.412, |
|
"grad_norm": 0.8308401107788086, |
|
"learning_rate": 9.957603153229559e-06, |
|
"loss": 0.8975, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.414, |
|
"grad_norm": 0.7129584550857544, |
|
"learning_rate": 9.95607770125771e-06, |
|
"loss": 0.8636, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 0.7642067074775696, |
|
"learning_rate": 9.95452541005172e-06, |
|
"loss": 0.866, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.418, |
|
"grad_norm": 0.9380465745925903, |
|
"learning_rate": 9.952946288017899e-06, |
|
"loss": 0.8999, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.7058805823326111, |
|
"learning_rate": 9.951340343707852e-06, |
|
"loss": 0.9092, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.422, |
|
"grad_norm": 0.7754363417625427, |
|
"learning_rate": 9.94970758581844e-06, |
|
"loss": 0.8728, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 0.7104613184928894, |
|
"learning_rate": 9.948048023191728e-06, |
|
"loss": 0.8731, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.426, |
|
"grad_norm": 0.820107102394104, |
|
"learning_rate": 9.946361664814942e-06, |
|
"loss": 0.9037, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.428, |
|
"grad_norm": 0.611301600933075, |
|
"learning_rate": 9.94464851982042e-06, |
|
"loss": 0.8822, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.7367827296257019, |
|
"learning_rate": 9.942908597485558e-06, |
|
"loss": 0.8675, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 0.595421552658081, |
|
"learning_rate": 9.941141907232766e-06, |
|
"loss": 0.9004, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.434, |
|
"grad_norm": 0.8655984997749329, |
|
"learning_rate": 9.939348458629406e-06, |
|
"loss": 0.8943, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.436, |
|
"grad_norm": 0.687925398349762, |
|
"learning_rate": 9.937528261387753e-06, |
|
"loss": 0.902, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.438, |
|
"grad_norm": 0.7494369149208069, |
|
"learning_rate": 9.93568132536494e-06, |
|
"loss": 0.8586, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.72107994556427, |
|
"learning_rate": 9.933807660562898e-06, |
|
"loss": 0.8662, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.442, |
|
"grad_norm": 0.7555108666419983, |
|
"learning_rate": 9.9319072771283e-06, |
|
"loss": 0.8689, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.444, |
|
"grad_norm": 0.6739816665649414, |
|
"learning_rate": 9.929980185352525e-06, |
|
"loss": 0.8736, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.446, |
|
"grad_norm": 0.6736449003219604, |
|
"learning_rate": 9.928026395671577e-06, |
|
"loss": 0.8675, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 0.7635306119918823, |
|
"learning_rate": 9.926045918666045e-06, |
|
"loss": 0.8943, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.647369384765625, |
|
"learning_rate": 9.924038765061042e-06, |
|
"loss": 0.8695, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.452, |
|
"grad_norm": 0.8473477959632874, |
|
"learning_rate": 9.92200494572614e-06, |
|
"loss": 0.9111, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.454, |
|
"grad_norm": 0.6710712909698486, |
|
"learning_rate": 9.919944471675328e-06, |
|
"loss": 0.925, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 0.6937105655670166, |
|
"learning_rate": 9.91785735406693e-06, |
|
"loss": 0.8782, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.458, |
|
"grad_norm": 0.658285915851593, |
|
"learning_rate": 9.915743604203563e-06, |
|
"loss": 0.8742, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.6236475706100464, |
|
"learning_rate": 9.913603233532067e-06, |
|
"loss": 0.8671, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.462, |
|
"grad_norm": 0.689721941947937, |
|
"learning_rate": 9.911436253643445e-06, |
|
"loss": 0.8656, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 0.6586703062057495, |
|
"learning_rate": 9.909242676272797e-06, |
|
"loss": 0.8761, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.466, |
|
"grad_norm": 0.6571497917175293, |
|
"learning_rate": 9.907022513299264e-06, |
|
"loss": 0.8914, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.468, |
|
"grad_norm": 0.6405252814292908, |
|
"learning_rate": 9.904775776745959e-06, |
|
"loss": 0.8499, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.7265756726264954, |
|
"learning_rate": 9.902502478779897e-06, |
|
"loss": 0.849, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 0.7562282681465149, |
|
"learning_rate": 9.90020263171194e-06, |
|
"loss": 0.935, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.474, |
|
"grad_norm": 0.9941809773445129, |
|
"learning_rate": 9.89787624799672e-06, |
|
"loss": 0.8422, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.476, |
|
"grad_norm": 0.5920974016189575, |
|
"learning_rate": 9.89552334023258e-06, |
|
"loss": 0.8999, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.478, |
|
"grad_norm": 1.0161575078964233, |
|
"learning_rate": 9.893143921161501e-06, |
|
"loss": 0.849, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.5542423129081726, |
|
"learning_rate": 9.890738003669029e-06, |
|
"loss": 0.8743, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.482, |
|
"grad_norm": 0.8454483151435852, |
|
"learning_rate": 9.888305600784217e-06, |
|
"loss": 0.8796, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.484, |
|
"grad_norm": 0.6125505566596985, |
|
"learning_rate": 9.88584672567954e-06, |
|
"loss": 0.8423, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.486, |
|
"grad_norm": 0.6614047884941101, |
|
"learning_rate": 9.883361391670841e-06, |
|
"loss": 0.9044, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 0.6495211720466614, |
|
"learning_rate": 9.880849612217238e-06, |
|
"loss": 0.8751, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.6626394391059875, |
|
"learning_rate": 9.878311400921072e-06, |
|
"loss": 0.8748, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.492, |
|
"grad_norm": 0.780263364315033, |
|
"learning_rate": 9.875746771527817e-06, |
|
"loss": 0.8951, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.494, |
|
"grad_norm": 0.5627451539039612, |
|
"learning_rate": 9.873155737926014e-06, |
|
"loss": 0.8619, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 0.8236713409423828, |
|
"learning_rate": 9.870538314147194e-06, |
|
"loss": 0.8861, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.498, |
|
"grad_norm": 0.5814322829246521, |
|
"learning_rate": 9.867894514365802e-06, |
|
"loss": 0.8435, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.7435102462768555, |
|
"learning_rate": 9.86522435289912e-06, |
|
"loss": 0.847, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.502, |
|
"grad_norm": 0.6055560111999512, |
|
"learning_rate": 9.862527844207189e-06, |
|
"loss": 0.9026, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 0.7876644134521484, |
|
"learning_rate": 9.859805002892733e-06, |
|
"loss": 0.8963, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.506, |
|
"grad_norm": 0.7262117862701416, |
|
"learning_rate": 9.857055843701073e-06, |
|
"loss": 0.8749, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.508, |
|
"grad_norm": 0.6386765837669373, |
|
"learning_rate": 9.85428038152006e-06, |
|
"loss": 0.8652, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.6865687370300293, |
|
"learning_rate": 9.851478631379982e-06, |
|
"loss": 0.8486, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 0.5925199389457703, |
|
"learning_rate": 9.84865060845349e-06, |
|
"loss": 0.8874, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.514, |
|
"grad_norm": 0.7218337655067444, |
|
"learning_rate": 9.845796328055505e-06, |
|
"loss": 0.8516, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.516, |
|
"grad_norm": 0.5752958655357361, |
|
"learning_rate": 9.842915805643156e-06, |
|
"loss": 0.8895, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.518, |
|
"grad_norm": 0.7929096817970276, |
|
"learning_rate": 9.840009056815674e-06, |
|
"loss": 0.8975, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.6541514992713928, |
|
"learning_rate": 9.83707609731432e-06, |
|
"loss": 0.85, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.522, |
|
"grad_norm": 0.7700029015541077, |
|
"learning_rate": 9.834116943022299e-06, |
|
"loss": 0.8465, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.524, |
|
"grad_norm": 0.7397891283035278, |
|
"learning_rate": 9.831131609964664e-06, |
|
"loss": 0.858, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.526, |
|
"grad_norm": 0.7328517436981201, |
|
"learning_rate": 9.828120114308248e-06, |
|
"loss": 0.9133, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 0.6791554093360901, |
|
"learning_rate": 9.825082472361558e-06, |
|
"loss": 0.8991, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.633378267288208, |
|
"learning_rate": 9.822018700574696e-06, |
|
"loss": 0.875, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.532, |
|
"grad_norm": 0.6736602783203125, |
|
"learning_rate": 9.818928815539266e-06, |
|
"loss": 0.8935, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.534, |
|
"grad_norm": 0.6292229294776917, |
|
"learning_rate": 9.815812833988292e-06, |
|
"loss": 0.8636, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 0.5925595164299011, |
|
"learning_rate": 9.812670772796113e-06, |
|
"loss": 0.8796, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.538, |
|
"grad_norm": 0.6620059609413147, |
|
"learning_rate": 9.809502648978311e-06, |
|
"loss": 0.8507, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.590861976146698, |
|
"learning_rate": 9.806308479691595e-06, |
|
"loss": 0.8668, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.542, |
|
"grad_norm": 0.6953304409980774, |
|
"learning_rate": 9.803088282233733e-06, |
|
"loss": 0.8753, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 0.7721243500709534, |
|
"learning_rate": 9.799842074043438e-06, |
|
"loss": 0.888, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.546, |
|
"grad_norm": 0.6552896499633789, |
|
"learning_rate": 9.796569872700287e-06, |
|
"loss": 0.8614, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.548, |
|
"grad_norm": 0.7471222877502441, |
|
"learning_rate": 9.793271695924621e-06, |
|
"loss": 0.8587, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.6494088172912598, |
|
"learning_rate": 9.789947561577445e-06, |
|
"loss": 0.8382, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 0.6371944546699524, |
|
"learning_rate": 9.786597487660336e-06, |
|
"loss": 0.8866, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.554, |
|
"grad_norm": 0.6909160614013672, |
|
"learning_rate": 9.78322149231535e-06, |
|
"loss": 0.8749, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.556, |
|
"grad_norm": 0.544785737991333, |
|
"learning_rate": 9.779819593824909e-06, |
|
"loss": 0.8504, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.558, |
|
"grad_norm": 0.8029861450195312, |
|
"learning_rate": 9.776391810611719e-06, |
|
"loss": 0.8585, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.5448933243751526, |
|
"learning_rate": 9.77293816123866e-06, |
|
"loss": 0.8724, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.562, |
|
"grad_norm": 0.644865870475769, |
|
"learning_rate": 9.769458664408689e-06, |
|
"loss": 0.8534, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.564, |
|
"grad_norm": 0.6825750470161438, |
|
"learning_rate": 9.765953338964736e-06, |
|
"loss": 0.8765, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.566, |
|
"grad_norm": 0.5962495803833008, |
|
"learning_rate": 9.762422203889604e-06, |
|
"loss": 0.852, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 0.6786333918571472, |
|
"learning_rate": 9.75886527830587e-06, |
|
"loss": 0.8427, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.6863124370574951, |
|
"learning_rate": 9.755282581475769e-06, |
|
"loss": 0.8853, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.572, |
|
"grad_norm": 0.596169114112854, |
|
"learning_rate": 9.751674132801106e-06, |
|
"loss": 0.8583, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.574, |
|
"grad_norm": 0.6611921191215515, |
|
"learning_rate": 9.748039951823141e-06, |
|
"loss": 0.8584, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.5860627889633179, |
|
"learning_rate": 9.744380058222483e-06, |
|
"loss": 0.8574, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.578, |
|
"grad_norm": 0.5513158440589905, |
|
"learning_rate": 9.740694471818988e-06, |
|
"loss": 0.8507, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.6688372492790222, |
|
"learning_rate": 9.736983212571646e-06, |
|
"loss": 0.8638, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.582, |
|
"grad_norm": 0.6180363297462463, |
|
"learning_rate": 9.733246300578482e-06, |
|
"loss": 0.8778, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 0.7170926928520203, |
|
"learning_rate": 9.729483756076436e-06, |
|
"loss": 0.8586, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.586, |
|
"grad_norm": 0.6541260480880737, |
|
"learning_rate": 9.72569559944126e-06, |
|
"loss": 0.8799, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.588, |
|
"grad_norm": 0.5732930302619934, |
|
"learning_rate": 9.721881851187406e-06, |
|
"loss": 0.8669, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.6722111105918884, |
|
"learning_rate": 9.718042531967918e-06, |
|
"loss": 0.8898, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 0.5557838678359985, |
|
"learning_rate": 9.714177662574316e-06, |
|
"loss": 0.8883, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.594, |
|
"grad_norm": 0.7094323039054871, |
|
"learning_rate": 9.710287263936485e-06, |
|
"loss": 0.864, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.596, |
|
"grad_norm": 0.5642362236976624, |
|
"learning_rate": 9.70637135712256e-06, |
|
"loss": 0.8742, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.598, |
|
"grad_norm": 0.7420487403869629, |
|
"learning_rate": 9.702429963338812e-06, |
|
"loss": 0.8753, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.583194375038147, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.8638, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.602, |
|
"grad_norm": 0.714911162853241, |
|
"learning_rate": 9.694470800376951e-06, |
|
"loss": 0.8452, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.604, |
|
"grad_norm": 0.551218569278717, |
|
"learning_rate": 9.690453074301035e-06, |
|
"loss": 0.865, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.606, |
|
"grad_norm": 0.777683675289154, |
|
"learning_rate": 9.68640994745946e-06, |
|
"loss": 0.8554, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 0.6686451435089111, |
|
"learning_rate": 9.682341441747446e-06, |
|
"loss": 0.9091, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.847751259803772, |
|
"learning_rate": 9.678247579197658e-06, |
|
"loss": 0.8649, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.612, |
|
"grad_norm": 0.6995366215705872, |
|
"learning_rate": 9.674128381980073e-06, |
|
"loss": 0.8565, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.614, |
|
"grad_norm": 0.7599447965621948, |
|
"learning_rate": 9.669983872401868e-06, |
|
"loss": 0.8861, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 0.8379013538360596, |
|
"learning_rate": 9.665814072907293e-06, |
|
"loss": 0.8731, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.618, |
|
"grad_norm": 0.6999011039733887, |
|
"learning_rate": 9.661619006077562e-06, |
|
"loss": 0.8891, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.8724938035011292, |
|
"learning_rate": 9.657398694630713e-06, |
|
"loss": 0.8677, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.622, |
|
"grad_norm": 0.5456347465515137, |
|
"learning_rate": 9.653153161421497e-06, |
|
"loss": 0.8357, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 0.8137102127075195, |
|
"learning_rate": 9.648882429441258e-06, |
|
"loss": 0.864, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.626, |
|
"grad_norm": 0.6591514945030212, |
|
"learning_rate": 9.644586521817792e-06, |
|
"loss": 0.8651, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.628, |
|
"grad_norm": 0.6591551303863525, |
|
"learning_rate": 9.640265461815235e-06, |
|
"loss": 0.8742, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.8578521609306335, |
|
"learning_rate": 9.635919272833938e-06, |
|
"loss": 0.8796, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 0.7640730738639832, |
|
"learning_rate": 9.63154797841033e-06, |
|
"loss": 0.8631, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.634, |
|
"grad_norm": 0.5908384919166565, |
|
"learning_rate": 9.627151602216801e-06, |
|
"loss": 0.852, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.636, |
|
"grad_norm": 0.6175269484519958, |
|
"learning_rate": 9.622730168061568e-06, |
|
"loss": 0.8714, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.638, |
|
"grad_norm": 0.6520965695381165, |
|
"learning_rate": 9.618283699888543e-06, |
|
"loss": 0.9097, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.6406873464584351, |
|
"learning_rate": 9.613812221777212e-06, |
|
"loss": 0.864, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.642, |
|
"grad_norm": 0.8102174997329712, |
|
"learning_rate": 9.609315757942504e-06, |
|
"loss": 0.8618, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.644, |
|
"grad_norm": 0.5675696730613708, |
|
"learning_rate": 9.604794332734647e-06, |
|
"loss": 0.8578, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.646, |
|
"grad_norm": 0.6569576263427734, |
|
"learning_rate": 9.600247970639053e-06, |
|
"loss": 0.8508, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 0.6983848214149475, |
|
"learning_rate": 9.595676696276173e-06, |
|
"loss": 0.9102, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.6264669895172119, |
|
"learning_rate": 9.591080534401371e-06, |
|
"loss": 0.8651, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.652, |
|
"grad_norm": 0.7677618861198425, |
|
"learning_rate": 9.586459509904786e-06, |
|
"loss": 0.8864, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.654, |
|
"grad_norm": 0.6663860082626343, |
|
"learning_rate": 9.581813647811199e-06, |
|
"loss": 0.8762, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 0.6873608231544495, |
|
"learning_rate": 9.577142973279896e-06, |
|
"loss": 0.8404, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.658, |
|
"grad_norm": 0.7165527939796448, |
|
"learning_rate": 9.572447511604536e-06, |
|
"loss": 0.8472, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.5741456747055054, |
|
"learning_rate": 9.567727288213005e-06, |
|
"loss": 0.9202, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.662, |
|
"grad_norm": 0.6779457926750183, |
|
"learning_rate": 9.56298232866729e-06, |
|
"loss": 0.876, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 0.6113842129707336, |
|
"learning_rate": 9.55821265866333e-06, |
|
"loss": 0.8977, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.666, |
|
"grad_norm": 0.6604581475257874, |
|
"learning_rate": 9.553418304030886e-06, |
|
"loss": 0.8066, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.668, |
|
"grad_norm": 0.6155861616134644, |
|
"learning_rate": 9.548599290733393e-06, |
|
"loss": 0.8716, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.5810530781745911, |
|
"learning_rate": 9.543755644867823e-06, |
|
"loss": 0.8626, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 0.5998817682266235, |
|
"learning_rate": 9.538887392664544e-06, |
|
"loss": 0.8535, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.674, |
|
"grad_norm": 0.5710519552230835, |
|
"learning_rate": 9.53399456048718e-06, |
|
"loss": 0.858, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.676, |
|
"grad_norm": 0.5840133428573608, |
|
"learning_rate": 9.529077174832466e-06, |
|
"loss": 0.8794, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.678, |
|
"grad_norm": 0.6933937668800354, |
|
"learning_rate": 9.524135262330098e-06, |
|
"loss": 0.8652, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.538587749004364, |
|
"learning_rate": 9.519168849742603e-06, |
|
"loss": 0.8828, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.682, |
|
"grad_norm": 0.6397183537483215, |
|
"learning_rate": 9.514177963965181e-06, |
|
"loss": 0.85, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.684, |
|
"grad_norm": 0.655133068561554, |
|
"learning_rate": 9.50916263202557e-06, |
|
"loss": 0.8623, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.686, |
|
"grad_norm": 0.5885834693908691, |
|
"learning_rate": 9.504122881083886e-06, |
|
"loss": 0.8575, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 0.651634693145752, |
|
"learning_rate": 9.499058738432492e-06, |
|
"loss": 0.8873, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.6309255957603455, |
|
"learning_rate": 9.493970231495836e-06, |
|
"loss": 0.8831, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.692, |
|
"grad_norm": 0.7011136412620544, |
|
"learning_rate": 9.488857387830315e-06, |
|
"loss": 0.8665, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.694, |
|
"grad_norm": 0.6683589816093445, |
|
"learning_rate": 9.483720235124113e-06, |
|
"loss": 0.8505, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 0.7045072317123413, |
|
"learning_rate": 9.478558801197065e-06, |
|
"loss": 0.8524, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.698, |
|
"grad_norm": 0.7451434135437012, |
|
"learning_rate": 9.473373114000493e-06, |
|
"loss": 0.8424, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.6919768452644348, |
|
"learning_rate": 9.468163201617063e-06, |
|
"loss": 0.8715, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.702, |
|
"grad_norm": 0.7216975688934326, |
|
"learning_rate": 9.46292909226063e-06, |
|
"loss": 0.8191, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 0.7797873020172119, |
|
"learning_rate": 9.457670814276083e-06, |
|
"loss": 0.8458, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.706, |
|
"grad_norm": 0.6440073847770691, |
|
"learning_rate": 9.452388396139202e-06, |
|
"loss": 0.8391, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.708, |
|
"grad_norm": 0.7262352108955383, |
|
"learning_rate": 9.44708186645649e-06, |
|
"loss": 0.8578, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.7259324193000793, |
|
"learning_rate": 9.441751253965022e-06, |
|
"loss": 0.857, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 0.6144593358039856, |
|
"learning_rate": 9.436396587532297e-06, |
|
"loss": 0.86, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.714, |
|
"grad_norm": 0.8025081753730774, |
|
"learning_rate": 9.431017896156074e-06, |
|
"loss": 0.8748, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.716, |
|
"grad_norm": 0.5604358911514282, |
|
"learning_rate": 9.425615208964217e-06, |
|
"loss": 0.8504, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.718, |
|
"grad_norm": 0.785858690738678, |
|
"learning_rate": 9.420188555214537e-06, |
|
"loss": 0.886, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.5290353894233704, |
|
"learning_rate": 9.414737964294636e-06, |
|
"loss": 0.8224, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.722, |
|
"grad_norm": 0.6365246772766113, |
|
"learning_rate": 9.40926346572174e-06, |
|
"loss": 0.8578, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.724, |
|
"grad_norm": 0.7250834107398987, |
|
"learning_rate": 9.403765089142554e-06, |
|
"loss": 0.8292, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.726, |
|
"grad_norm": 0.51982581615448, |
|
"learning_rate": 9.398242864333084e-06, |
|
"loss": 0.8582, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 0.7608000040054321, |
|
"learning_rate": 9.392696821198488e-06, |
|
"loss": 0.8679, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.6473584771156311, |
|
"learning_rate": 9.38712698977291e-06, |
|
"loss": 0.8497, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.732, |
|
"grad_norm": 0.7003604769706726, |
|
"learning_rate": 9.381533400219319e-06, |
|
"loss": 0.8666, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.734, |
|
"grad_norm": 0.7169507145881653, |
|
"learning_rate": 9.375916082829341e-06, |
|
"loss": 0.8702, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 0.6383227109909058, |
|
"learning_rate": 9.370275068023097e-06, |
|
"loss": 0.8897, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.738, |
|
"grad_norm": 0.694448709487915, |
|
"learning_rate": 9.364610386349048e-06, |
|
"loss": 0.8448, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.6426830887794495, |
|
"learning_rate": 9.358922068483813e-06, |
|
"loss": 0.8737, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.742, |
|
"grad_norm": 0.7932099103927612, |
|
"learning_rate": 9.35321014523201e-06, |
|
"loss": 0.9002, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 0.6667394042015076, |
|
"learning_rate": 9.347474647526095e-06, |
|
"loss": 0.8579, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.746, |
|
"grad_norm": 0.7726197242736816, |
|
"learning_rate": 9.34171560642619e-06, |
|
"loss": 0.8494, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.748, |
|
"grad_norm": 0.780178427696228, |
|
"learning_rate": 9.335933053119906e-06, |
|
"loss": 0.8598, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.6369235515594482, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.8739, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 0.6134310364723206, |
|
"learning_rate": 9.324297535275156e-06, |
|
"loss": 0.8205, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.754, |
|
"grad_norm": 0.8698707818984985, |
|
"learning_rate": 9.318444633747884e-06, |
|
"loss": 0.858, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.756, |
|
"grad_norm": 0.5858055353164673, |
|
"learning_rate": 9.312568346036288e-06, |
|
"loss": 0.8341, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.758, |
|
"grad_norm": 0.7148376107215881, |
|
"learning_rate": 9.306668703962927e-06, |
|
"loss": 0.8241, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.9750962257385254, |
|
"learning_rate": 9.30074573947683e-06, |
|
"loss": 0.8986, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.762, |
|
"grad_norm": 0.7608172297477722, |
|
"learning_rate": 9.294799484653323e-06, |
|
"loss": 0.8645, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.764, |
|
"grad_norm": 0.9215735197067261, |
|
"learning_rate": 9.288829971693869e-06, |
|
"loss": 0.8283, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.766, |
|
"grad_norm": 0.8112582564353943, |
|
"learning_rate": 9.282837232925876e-06, |
|
"loss": 0.86, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.8226185441017151, |
|
"learning_rate": 9.276821300802535e-06, |
|
"loss": 0.8746, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.7064146399497986, |
|
"learning_rate": 9.27078220790263e-06, |
|
"loss": 0.8813, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.772, |
|
"grad_norm": 0.9706873297691345, |
|
"learning_rate": 9.264719986930376e-06, |
|
"loss": 0.8761, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.774, |
|
"grad_norm": 0.6123415231704712, |
|
"learning_rate": 9.25863467071524e-06, |
|
"loss": 0.8337, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 0.7688935399055481, |
|
"learning_rate": 9.25252629221175e-06, |
|
"loss": 0.8527, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.778, |
|
"grad_norm": 0.6873968243598938, |
|
"learning_rate": 9.246394884499334e-06, |
|
"loss": 0.8725, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.6200389862060547, |
|
"learning_rate": 9.24024048078213e-06, |
|
"loss": 0.8426, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.782, |
|
"grad_norm": 0.555893063545227, |
|
"learning_rate": 9.234063114388809e-06, |
|
"loss": 0.8545, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 0.677457869052887, |
|
"learning_rate": 9.227862818772392e-06, |
|
"loss": 0.8346, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.786, |
|
"grad_norm": 0.6086246371269226, |
|
"learning_rate": 9.221639627510076e-06, |
|
"loss": 0.8622, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.788, |
|
"grad_norm": 0.6295508742332458, |
|
"learning_rate": 9.215393574303043e-06, |
|
"loss": 0.8601, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.6351264119148254, |
|
"learning_rate": 9.209124692976287e-06, |
|
"loss": 0.8487, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 0.6365606784820557, |
|
"learning_rate": 9.202833017478421e-06, |
|
"loss": 0.844, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.794, |
|
"grad_norm": 0.6243758201599121, |
|
"learning_rate": 9.196518581881502e-06, |
|
"loss": 0.8448, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.796, |
|
"grad_norm": 0.6153333783149719, |
|
"learning_rate": 9.190181420380838e-06, |
|
"loss": 0.8535, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.798, |
|
"grad_norm": 0.6188914179801941, |
|
"learning_rate": 9.18382156729481e-06, |
|
"loss": 0.8507, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.6090688109397888, |
|
"learning_rate": 9.177439057064684e-06, |
|
"loss": 0.8617, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.802, |
|
"grad_norm": 0.6110431551933289, |
|
"learning_rate": 9.171033924254421e-06, |
|
"loss": 0.8766, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.804, |
|
"grad_norm": 0.6193629503250122, |
|
"learning_rate": 9.164606203550498e-06, |
|
"loss": 0.8283, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.806, |
|
"grad_norm": 0.6263427734375, |
|
"learning_rate": 9.15815592976171e-06, |
|
"loss": 0.8593, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 0.604211151599884, |
|
"learning_rate": 9.151683137818989e-06, |
|
"loss": 0.8423, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.6154013276100159, |
|
"learning_rate": 9.145187862775208e-06, |
|
"loss": 0.8409, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.812, |
|
"grad_norm": 0.534557044506073, |
|
"learning_rate": 9.138670139805004e-06, |
|
"loss": 0.8458, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.814, |
|
"grad_norm": 0.6704394817352295, |
|
"learning_rate": 9.132130004204569e-06, |
|
"loss": 0.8556, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 0.6993472576141357, |
|
"learning_rate": 9.125567491391476e-06, |
|
"loss": 0.8907, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.818, |
|
"grad_norm": 0.6247013807296753, |
|
"learning_rate": 9.118982636904476e-06, |
|
"loss": 0.8372, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.6774156093597412, |
|
"learning_rate": 9.112375476403313e-06, |
|
"loss": 0.8363, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.822, |
|
"grad_norm": 0.6017762422561646, |
|
"learning_rate": 9.10574604566852e-06, |
|
"loss": 0.8808, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 0.7543519139289856, |
|
"learning_rate": 9.099094380601244e-06, |
|
"loss": 0.8395, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.826, |
|
"grad_norm": 0.5303599834442139, |
|
"learning_rate": 9.09242051722303e-06, |
|
"loss": 0.8495, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.828, |
|
"grad_norm": 0.6581388711929321, |
|
"learning_rate": 9.085724491675642e-06, |
|
"loss": 0.8482, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.6038942337036133, |
|
"learning_rate": 9.079006340220862e-06, |
|
"loss": 0.8646, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 0.6721682548522949, |
|
"learning_rate": 9.072266099240286e-06, |
|
"loss": 0.8486, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.834, |
|
"grad_norm": 0.688840925693512, |
|
"learning_rate": 9.065503805235139e-06, |
|
"loss": 0.8797, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.836, |
|
"grad_norm": 0.9070942401885986, |
|
"learning_rate": 9.058719494826076e-06, |
|
"loss": 0.8313, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.838, |
|
"grad_norm": 0.5890297293663025, |
|
"learning_rate": 9.051913204752972e-06, |
|
"loss": 0.8577, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.9257925748825073, |
|
"learning_rate": 9.045084971874738e-06, |
|
"loss": 0.8476, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.842, |
|
"grad_norm": 0.6360816359519958, |
|
"learning_rate": 9.03823483316911e-06, |
|
"loss": 0.8365, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.844, |
|
"grad_norm": 0.7895241975784302, |
|
"learning_rate": 9.031362825732456e-06, |
|
"loss": 0.9092, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.846, |
|
"grad_norm": 0.8315562009811401, |
|
"learning_rate": 9.02446898677957e-06, |
|
"loss": 0.8825, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 0.6070747971534729, |
|
"learning_rate": 9.017553353643479e-06, |
|
"loss": 0.8157, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.8066498041152954, |
|
"learning_rate": 9.01061596377522e-06, |
|
"loss": 0.8614, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.852, |
|
"grad_norm": 0.6945021748542786, |
|
"learning_rate": 9.003656854743667e-06, |
|
"loss": 0.8652, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.854, |
|
"grad_norm": 0.7237082719802856, |
|
"learning_rate": 8.996676064235308e-06, |
|
"loss": 0.8033, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 0.7030566930770874, |
|
"learning_rate": 8.989673630054044e-06, |
|
"loss": 0.8576, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.858, |
|
"grad_norm": 0.6638972163200378, |
|
"learning_rate": 8.982649590120982e-06, |
|
"loss": 0.8649, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.6616825461387634, |
|
"learning_rate": 8.97560398247424e-06, |
|
"loss": 0.8683, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.862, |
|
"grad_norm": 0.6521521806716919, |
|
"learning_rate": 8.96853684526873e-06, |
|
"loss": 0.8796, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 0.7620105743408203, |
|
"learning_rate": 8.961448216775955e-06, |
|
"loss": 0.8421, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.866, |
|
"grad_norm": 0.5343947410583496, |
|
"learning_rate": 8.954338135383804e-06, |
|
"loss": 0.8413, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.868, |
|
"grad_norm": 0.8251727223396301, |
|
"learning_rate": 8.947206639596346e-06, |
|
"loss": 0.8446, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.6117093563079834, |
|
"learning_rate": 8.94005376803361e-06, |
|
"loss": 0.8391, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 0.7564120888710022, |
|
"learning_rate": 8.932879559431392e-06, |
|
"loss": 0.8555, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.874, |
|
"grad_norm": 0.7144505381584167, |
|
"learning_rate": 8.925684052641027e-06, |
|
"loss": 0.8485, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.876, |
|
"grad_norm": 0.6239364743232727, |
|
"learning_rate": 8.9184672866292e-06, |
|
"loss": 0.8347, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.878, |
|
"grad_norm": 0.6891036629676819, |
|
"learning_rate": 8.911229300477716e-06, |
|
"loss": 0.8764, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.5618371367454529, |
|
"learning_rate": 8.903970133383297e-06, |
|
"loss": 0.8486, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.882, |
|
"grad_norm": 0.6398274302482605, |
|
"learning_rate": 8.896689824657371e-06, |
|
"loss": 0.8586, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.884, |
|
"grad_norm": 0.6113273501396179, |
|
"learning_rate": 8.889388413725857e-06, |
|
"loss": 0.8255, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.886, |
|
"grad_norm": 0.6663311719894409, |
|
"learning_rate": 8.882065940128946e-06, |
|
"loss": 0.8686, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 0.561815083026886, |
|
"learning_rate": 8.874722443520898e-06, |
|
"loss": 0.8468, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.6316409111022949, |
|
"learning_rate": 8.867357963669821e-06, |
|
"loss": 0.8376, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.892, |
|
"grad_norm": 0.719174325466156, |
|
"learning_rate": 8.859972540457451e-06, |
|
"loss": 0.826, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.894, |
|
"grad_norm": 0.6321907639503479, |
|
"learning_rate": 8.852566213878947e-06, |
|
"loss": 0.852, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 0.6965914964675903, |
|
"learning_rate": 8.845139024042664e-06, |
|
"loss": 0.8513, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.898, |
|
"grad_norm": 0.6648813486099243, |
|
"learning_rate": 8.837691011169944e-06, |
|
"loss": 0.8531, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6940134167671204, |
|
"learning_rate": 8.83022221559489e-06, |
|
"loss": 0.8919, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.902, |
|
"grad_norm": 0.5885754823684692, |
|
"learning_rate": 8.822732677764158e-06, |
|
"loss": 0.9006, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 0.6959089636802673, |
|
"learning_rate": 8.815222438236726e-06, |
|
"loss": 0.8686, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.906, |
|
"grad_norm": 0.6296737790107727, |
|
"learning_rate": 8.807691537683685e-06, |
|
"loss": 0.8542, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.908, |
|
"grad_norm": 0.6397737264633179, |
|
"learning_rate": 8.800140016888009e-06, |
|
"loss": 0.8584, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.714893639087677, |
|
"learning_rate": 8.792567916744346e-06, |
|
"loss": 0.8548, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 0.5381580591201782, |
|
"learning_rate": 8.784975278258783e-06, |
|
"loss": 0.8714, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.914, |
|
"grad_norm": 0.7903892397880554, |
|
"learning_rate": 8.777362142548636e-06, |
|
"loss": 0.8662, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.916, |
|
"grad_norm": 0.6871955394744873, |
|
"learning_rate": 8.769728550842217e-06, |
|
"loss": 0.8304, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.918, |
|
"grad_norm": 0.5707519054412842, |
|
"learning_rate": 8.762074544478622e-06, |
|
"loss": 0.8184, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.7742196321487427, |
|
"learning_rate": 8.754400164907496e-06, |
|
"loss": 0.854, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.922, |
|
"grad_norm": 0.7090582847595215, |
|
"learning_rate": 8.746705453688815e-06, |
|
"loss": 0.82, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.924, |
|
"grad_norm": 0.5473904609680176, |
|
"learning_rate": 8.73899045249266e-06, |
|
"loss": 0.8278, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.926, |
|
"grad_norm": 0.5831822752952576, |
|
"learning_rate": 8.73125520309899e-06, |
|
"loss": 0.8389, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 0.645477831363678, |
|
"learning_rate": 8.723499747397415e-06, |
|
"loss": 0.8692, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.5839529037475586, |
|
"learning_rate": 8.715724127386971e-06, |
|
"loss": 0.8682, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.932, |
|
"grad_norm": 0.5304246544837952, |
|
"learning_rate": 8.707928385175898e-06, |
|
"loss": 0.8257, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.934, |
|
"grad_norm": 0.5904834866523743, |
|
"learning_rate": 8.700112562981398e-06, |
|
"loss": 0.8548, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 0.6015157103538513, |
|
"learning_rate": 8.692276703129421e-06, |
|
"loss": 0.8589, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.938, |
|
"grad_norm": 0.5249965786933899, |
|
"learning_rate": 8.68442084805442e-06, |
|
"loss": 0.8387, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.6521996259689331, |
|
"learning_rate": 8.676545040299145e-06, |
|
"loss": 0.8649, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.942, |
|
"grad_norm": 0.6516804099082947, |
|
"learning_rate": 8.668649322514382e-06, |
|
"loss": 0.867, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 0.5435242652893066, |
|
"learning_rate": 8.660733737458751e-06, |
|
"loss": 0.8723, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.946, |
|
"grad_norm": 0.7011072635650635, |
|
"learning_rate": 8.652798327998458e-06, |
|
"loss": 0.8535, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.948, |
|
"grad_norm": 0.5955620408058167, |
|
"learning_rate": 8.644843137107058e-06, |
|
"loss": 0.8747, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.6632460355758667, |
|
"learning_rate": 8.636868207865244e-06, |
|
"loss": 0.8493, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 0.5909416675567627, |
|
"learning_rate": 8.628873583460593e-06, |
|
"loss": 0.8726, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.954, |
|
"grad_norm": 0.5914918184280396, |
|
"learning_rate": 8.620859307187339e-06, |
|
"loss": 0.8766, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.956, |
|
"grad_norm": 0.6765621304512024, |
|
"learning_rate": 8.61282542244614e-06, |
|
"loss": 0.8589, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.958, |
|
"grad_norm": 0.5578816533088684, |
|
"learning_rate": 8.604771972743848e-06, |
|
"loss": 0.8483, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.6967804431915283, |
|
"learning_rate": 8.596699001693257e-06, |
|
"loss": 0.8311, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.962, |
|
"grad_norm": 0.7719427347183228, |
|
"learning_rate": 8.588606553012884e-06, |
|
"loss": 0.8645, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.964, |
|
"grad_norm": 0.5344576835632324, |
|
"learning_rate": 8.580494670526725e-06, |
|
"loss": 0.8759, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.966, |
|
"grad_norm": 0.5716308951377869, |
|
"learning_rate": 8.572363398164017e-06, |
|
"loss": 0.8299, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 0.6527791619300842, |
|
"learning_rate": 8.564212779959003e-06, |
|
"loss": 0.8398, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.6142027974128723, |
|
"learning_rate": 8.556042860050686e-06, |
|
"loss": 0.8251, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.972, |
|
"grad_norm": 0.5965133309364319, |
|
"learning_rate": 8.547853682682605e-06, |
|
"loss": 0.8587, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.974, |
|
"grad_norm": 0.5377973318099976, |
|
"learning_rate": 8.539645292202579e-06, |
|
"loss": 0.8724, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 0.6018757820129395, |
|
"learning_rate": 8.531417733062476e-06, |
|
"loss": 0.8646, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.978, |
|
"grad_norm": 0.6402251720428467, |
|
"learning_rate": 8.523171049817974e-06, |
|
"loss": 0.8562, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.5891185402870178, |
|
"learning_rate": 8.51490528712831e-06, |
|
"loss": 0.8098, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.982, |
|
"grad_norm": 0.5361101627349854, |
|
"learning_rate": 8.506620489756045e-06, |
|
"loss": 0.8605, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 0.5156423449516296, |
|
"learning_rate": 8.498316702566828e-06, |
|
"loss": 0.8312, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.986, |
|
"grad_norm": 0.5701969265937805, |
|
"learning_rate": 8.489993970529137e-06, |
|
"loss": 0.857, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.988, |
|
"grad_norm": 0.6193264126777649, |
|
"learning_rate": 8.481652338714048e-06, |
|
"loss": 0.8571, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.5284368991851807, |
|
"learning_rate": 8.473291852294986e-06, |
|
"loss": 0.8774, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 0.644671618938446, |
|
"learning_rate": 8.464912556547486e-06, |
|
"loss": 0.8824, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.994, |
|
"grad_norm": 0.5708516836166382, |
|
"learning_rate": 8.456514496848938e-06, |
|
"loss": 0.8418, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.996, |
|
"grad_norm": 0.491625040769577, |
|
"learning_rate": 8.44809771867835e-06, |
|
"loss": 0.8599, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.998, |
|
"grad_norm": 0.6016339659690857, |
|
"learning_rate": 8.439662267616093e-06, |
|
"loss": 0.8589, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6193352341651917, |
|
"learning_rate": 8.43120818934367e-06, |
|
"loss": 0.8247, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.002, |
|
"grad_norm": 0.5769928693771362, |
|
"learning_rate": 8.422735529643445e-06, |
|
"loss": 0.7995, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.004, |
|
"grad_norm": 0.5737547874450684, |
|
"learning_rate": 8.414244334398418e-06, |
|
"loss": 0.7948, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.006, |
|
"grad_norm": 0.6058068871498108, |
|
"learning_rate": 8.405734649591964e-06, |
|
"loss": 0.7863, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.008, |
|
"grad_norm": 0.5428916811943054, |
|
"learning_rate": 8.397206521307584e-06, |
|
"loss": 0.8049, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.7062215209007263, |
|
"learning_rate": 8.388659995728662e-06, |
|
"loss": 0.8319, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.012, |
|
"grad_norm": 0.4917738139629364, |
|
"learning_rate": 8.380095119138209e-06, |
|
"loss": 0.844, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.014, |
|
"grad_norm": 0.7528649568557739, |
|
"learning_rate": 8.371511937918616e-06, |
|
"loss": 0.8274, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.016, |
|
"grad_norm": 0.513393759727478, |
|
"learning_rate": 8.362910498551402e-06, |
|
"loss": 0.8135, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.018, |
|
"grad_norm": 0.6357613801956177, |
|
"learning_rate": 8.354290847616954e-06, |
|
"loss": 0.7991, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.6039820909500122, |
|
"learning_rate": 8.345653031794292e-06, |
|
"loss": 0.8176, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.022, |
|
"grad_norm": 0.5468531847000122, |
|
"learning_rate": 8.3369970978608e-06, |
|
"loss": 0.8629, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.024, |
|
"grad_norm": 0.5981908440589905, |
|
"learning_rate": 8.328323092691985e-06, |
|
"loss": 0.7772, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.026, |
|
"grad_norm": 0.509652853012085, |
|
"learning_rate": 8.319631063261209e-06, |
|
"loss": 0.842, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.028, |
|
"grad_norm": 0.5468859672546387, |
|
"learning_rate": 8.310921056639451e-06, |
|
"loss": 0.8299, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.6187793016433716, |
|
"learning_rate": 8.302193119995038e-06, |
|
"loss": 0.7958, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.032, |
|
"grad_norm": 0.5641921758651733, |
|
"learning_rate": 8.293447300593402e-06, |
|
"loss": 0.8341, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.034, |
|
"grad_norm": 0.6467854380607605, |
|
"learning_rate": 8.284683645796814e-06, |
|
"loss": 0.7682, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.036, |
|
"grad_norm": 0.4969216585159302, |
|
"learning_rate": 8.275902203064125e-06, |
|
"loss": 0.8247, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.038, |
|
"grad_norm": 0.5728968977928162, |
|
"learning_rate": 8.267103019950529e-06, |
|
"loss": 0.8065, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.5534486174583435, |
|
"learning_rate": 8.258286144107277e-06, |
|
"loss": 0.818, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.042, |
|
"grad_norm": 0.5607255101203918, |
|
"learning_rate": 8.249451623281444e-06, |
|
"loss": 0.7975, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.044, |
|
"grad_norm": 0.537211000919342, |
|
"learning_rate": 8.240599505315656e-06, |
|
"loss": 0.8345, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.046, |
|
"grad_norm": 0.5845485925674438, |
|
"learning_rate": 8.231729838147833e-06, |
|
"loss": 0.8191, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.048, |
|
"grad_norm": 0.5804502964019775, |
|
"learning_rate": 8.222842669810936e-06, |
|
"loss": 0.8313, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.5499438047409058, |
|
"learning_rate": 8.213938048432697e-06, |
|
"loss": 0.8242, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.052, |
|
"grad_norm": 0.6084002256393433, |
|
"learning_rate": 8.205016022235368e-06, |
|
"loss": 0.8272, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.054, |
|
"grad_norm": 0.5327991247177124, |
|
"learning_rate": 8.196076639535453e-06, |
|
"loss": 0.8309, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.056, |
|
"grad_norm": 0.5128622651100159, |
|
"learning_rate": 8.18711994874345e-06, |
|
"loss": 0.8158, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.058, |
|
"grad_norm": 0.6121509075164795, |
|
"learning_rate": 8.178145998363585e-06, |
|
"loss": 0.7969, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.5912569761276245, |
|
"learning_rate": 8.16915483699355e-06, |
|
"loss": 0.8165, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.062, |
|
"grad_norm": 0.6030197143554688, |
|
"learning_rate": 8.160146513324256e-06, |
|
"loss": 0.8328, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.064, |
|
"grad_norm": 0.5357882380485535, |
|
"learning_rate": 8.151121076139534e-06, |
|
"loss": 0.8209, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.066, |
|
"grad_norm": 0.5966700315475464, |
|
"learning_rate": 8.142078574315907e-06, |
|
"loss": 0.807, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.068, |
|
"grad_norm": 0.6120830774307251, |
|
"learning_rate": 8.133019056822303e-06, |
|
"loss": 0.7926, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.6333845257759094, |
|
"learning_rate": 8.123942572719801e-06, |
|
"loss": 0.8208, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.072, |
|
"grad_norm": 0.6417355537414551, |
|
"learning_rate": 8.11484917116136e-06, |
|
"loss": 0.822, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.074, |
|
"grad_norm": 0.498068243265152, |
|
"learning_rate": 8.105738901391553e-06, |
|
"loss": 0.8476, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.076, |
|
"grad_norm": 0.6133856177330017, |
|
"learning_rate": 8.096611812746302e-06, |
|
"loss": 0.7765, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.078, |
|
"grad_norm": 0.5497493147850037, |
|
"learning_rate": 8.087467954652608e-06, |
|
"loss": 0.8126, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.5176334381103516, |
|
"learning_rate": 8.078307376628292e-06, |
|
"loss": 0.8018, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.082, |
|
"grad_norm": 0.5719330906867981, |
|
"learning_rate": 8.069130128281714e-06, |
|
"loss": 0.8003, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.084, |
|
"grad_norm": 0.6029807329177856, |
|
"learning_rate": 8.059936259311514e-06, |
|
"loss": 0.8541, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.086, |
|
"grad_norm": 0.5657724738121033, |
|
"learning_rate": 8.05072581950634e-06, |
|
"loss": 0.7943, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.088, |
|
"grad_norm": 0.6055567860603333, |
|
"learning_rate": 8.041498858744572e-06, |
|
"loss": 0.7925, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.5769219994544983, |
|
"learning_rate": 8.032255426994069e-06, |
|
"loss": 0.8003, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.092, |
|
"grad_norm": 0.5590239763259888, |
|
"learning_rate": 8.022995574311876e-06, |
|
"loss": 0.8006, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.094, |
|
"grad_norm": 0.613633930683136, |
|
"learning_rate": 8.013719350843969e-06, |
|
"loss": 0.7964, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.096, |
|
"grad_norm": 0.6089486479759216, |
|
"learning_rate": 8.004426806824985e-06, |
|
"loss": 0.8035, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.098, |
|
"grad_norm": 0.5584562420845032, |
|
"learning_rate": 7.99511799257793e-06, |
|
"loss": 0.8435, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.6562801003456116, |
|
"learning_rate": 7.985792958513932e-06, |
|
"loss": 0.8297, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.102, |
|
"grad_norm": 0.573478639125824, |
|
"learning_rate": 7.97645175513195e-06, |
|
"loss": 0.839, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.104, |
|
"grad_norm": 0.5595790147781372, |
|
"learning_rate": 7.967094433018508e-06, |
|
"loss": 0.7936, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.106, |
|
"grad_norm": 0.5592019557952881, |
|
"learning_rate": 7.95772104284742e-06, |
|
"loss": 0.8311, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.108, |
|
"grad_norm": 0.5474761128425598, |
|
"learning_rate": 7.948331635379517e-06, |
|
"loss": 0.7777, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.6115286946296692, |
|
"learning_rate": 7.938926261462366e-06, |
|
"loss": 0.821, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.112, |
|
"grad_norm": 0.4691702425479889, |
|
"learning_rate": 7.929504972030003e-06, |
|
"loss": 0.7774, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.114, |
|
"grad_norm": 0.6433555483818054, |
|
"learning_rate": 7.920067818102652e-06, |
|
"loss": 0.8085, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.116, |
|
"grad_norm": 0.5453904271125793, |
|
"learning_rate": 7.910614850786448e-06, |
|
"loss": 0.804, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.1179999999999999, |
|
"grad_norm": 0.5915471911430359, |
|
"learning_rate": 7.901146121273165e-06, |
|
"loss": 0.8135, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.519405722618103, |
|
"learning_rate": 7.891661680839932e-06, |
|
"loss": 0.8083, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.1219999999999999, |
|
"grad_norm": 0.7413570880889893, |
|
"learning_rate": 7.882161580848966e-06, |
|
"loss": 0.8363, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.124, |
|
"grad_norm": 0.6559600830078125, |
|
"learning_rate": 7.872645872747281e-06, |
|
"loss": 0.8312, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.126, |
|
"grad_norm": 0.5560040473937988, |
|
"learning_rate": 7.863114608066417e-06, |
|
"loss": 0.7881, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.1280000000000001, |
|
"grad_norm": 0.6661034226417542, |
|
"learning_rate": 7.85356783842216e-06, |
|
"loss": 0.8372, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.5710970759391785, |
|
"learning_rate": 7.84400561551426e-06, |
|
"loss": 0.8015, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.1320000000000001, |
|
"grad_norm": 0.5782989263534546, |
|
"learning_rate": 7.834427991126155e-06, |
|
"loss": 0.7983, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.134, |
|
"grad_norm": 0.6261492371559143, |
|
"learning_rate": 7.82483501712469e-06, |
|
"loss": 0.8461, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.1360000000000001, |
|
"grad_norm": 0.513586163520813, |
|
"learning_rate": 7.815226745459831e-06, |
|
"loss": 0.7761, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.138, |
|
"grad_norm": 0.639958918094635, |
|
"learning_rate": 7.80560322816439e-06, |
|
"loss": 0.8502, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.1400000000000001, |
|
"grad_norm": 0.48928067088127136, |
|
"learning_rate": 7.795964517353734e-06, |
|
"loss": 0.8222, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.142, |
|
"grad_norm": 0.527836263179779, |
|
"learning_rate": 7.786310665225522e-06, |
|
"loss": 0.8261, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.144, |
|
"grad_norm": 0.6203815937042236, |
|
"learning_rate": 7.776641724059398e-06, |
|
"loss": 0.8052, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.146, |
|
"grad_norm": 0.5364488363265991, |
|
"learning_rate": 7.76695774621672e-06, |
|
"loss": 0.8178, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.148, |
|
"grad_norm": 0.5550497174263, |
|
"learning_rate": 7.757258784140286e-06, |
|
"loss": 0.7836, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.6141073107719421, |
|
"learning_rate": 7.747544890354031e-06, |
|
"loss": 0.7771, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.152, |
|
"grad_norm": 0.5875626802444458, |
|
"learning_rate": 7.737816117462752e-06, |
|
"loss": 0.8157, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.154, |
|
"grad_norm": 0.5942591428756714, |
|
"learning_rate": 7.728072518151826e-06, |
|
"loss": 0.8049, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.156, |
|
"grad_norm": 0.6154875755310059, |
|
"learning_rate": 7.718314145186918e-06, |
|
"loss": 0.8229, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.158, |
|
"grad_norm": 0.5936452150344849, |
|
"learning_rate": 7.7085410514137e-06, |
|
"loss": 0.8372, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.5601768493652344, |
|
"learning_rate": 7.698753289757565e-06, |
|
"loss": 0.8246, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.162, |
|
"grad_norm": 0.5993269085884094, |
|
"learning_rate": 7.688950913223336e-06, |
|
"loss": 0.8146, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.164, |
|
"grad_norm": 0.5179044604301453, |
|
"learning_rate": 7.679133974894984e-06, |
|
"loss": 0.7709, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.166, |
|
"grad_norm": 0.5535378456115723, |
|
"learning_rate": 7.669302527935334e-06, |
|
"loss": 0.7898, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.168, |
|
"grad_norm": 0.611381471157074, |
|
"learning_rate": 7.65945662558579e-06, |
|
"loss": 0.8016, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.6293729543685913, |
|
"learning_rate": 7.649596321166024e-06, |
|
"loss": 0.7995, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.172, |
|
"grad_norm": 0.5044072866439819, |
|
"learning_rate": 7.639721668073718e-06, |
|
"loss": 0.8043, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.174, |
|
"grad_norm": 0.5509117245674133, |
|
"learning_rate": 7.629832719784245e-06, |
|
"loss": 0.802, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.176, |
|
"grad_norm": 0.5565614104270935, |
|
"learning_rate": 7.619929529850397e-06, |
|
"loss": 0.8155, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.178, |
|
"grad_norm": 0.6130761504173279, |
|
"learning_rate": 7.610012151902091e-06, |
|
"loss": 0.8334, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.5241647362709045, |
|
"learning_rate": 7.600080639646077e-06, |
|
"loss": 0.8129, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.182, |
|
"grad_norm": 0.6383605003356934, |
|
"learning_rate": 7.590135046865652e-06, |
|
"loss": 0.8431, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.184, |
|
"grad_norm": 0.6660565137863159, |
|
"learning_rate": 7.580175427420358e-06, |
|
"loss": 0.8099, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.186, |
|
"grad_norm": 0.5172692537307739, |
|
"learning_rate": 7.570201835245703e-06, |
|
"loss": 0.7888, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.188, |
|
"grad_norm": 0.6571612358093262, |
|
"learning_rate": 7.560214324352858e-06, |
|
"loss": 0.7891, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.6135300993919373, |
|
"learning_rate": 7.550212948828377e-06, |
|
"loss": 0.8296, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.192, |
|
"grad_norm": 0.5668914318084717, |
|
"learning_rate": 7.54019776283389e-06, |
|
"loss": 0.7931, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.194, |
|
"grad_norm": 0.6143723726272583, |
|
"learning_rate": 7.530168820605819e-06, |
|
"loss": 0.8178, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.196, |
|
"grad_norm": 0.7302437424659729, |
|
"learning_rate": 7.520126176455084e-06, |
|
"loss": 0.8434, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.198, |
|
"grad_norm": 0.5691401362419128, |
|
"learning_rate": 7.510069884766802e-06, |
|
"loss": 0.8216, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.72467041015625, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.8043, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.202, |
|
"grad_norm": 0.5468252301216125, |
|
"learning_rate": 7.489916576687318e-06, |
|
"loss": 0.7883, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.204, |
|
"grad_norm": 0.6991376876831055, |
|
"learning_rate": 7.479819669434712e-06, |
|
"loss": 0.8545, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.206, |
|
"grad_norm": 0.5413354635238647, |
|
"learning_rate": 7.469709332921155e-06, |
|
"loss": 0.8286, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.208, |
|
"grad_norm": 0.6795758008956909, |
|
"learning_rate": 7.459585621898353e-06, |
|
"loss": 0.7882, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.5337622165679932, |
|
"learning_rate": 7.449448591190436e-06, |
|
"loss": 0.8151, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.212, |
|
"grad_norm": 0.5041375160217285, |
|
"learning_rate": 7.4392982956936644e-06, |
|
"loss": 0.7572, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.214, |
|
"grad_norm": 0.5212530493736267, |
|
"learning_rate": 7.429134790376136e-06, |
|
"loss": 0.7994, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.216, |
|
"grad_norm": 0.5540004968643188, |
|
"learning_rate": 7.418958130277483e-06, |
|
"loss": 0.8277, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.218, |
|
"grad_norm": 0.5500084757804871, |
|
"learning_rate": 7.408768370508577e-06, |
|
"loss": 0.7894, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.5433350205421448, |
|
"learning_rate": 7.398565566251232e-06, |
|
"loss": 0.8024, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.222, |
|
"grad_norm": 0.6301877498626709, |
|
"learning_rate": 7.3883497727579e-06, |
|
"loss": 0.8209, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.224, |
|
"grad_norm": 0.4610621929168701, |
|
"learning_rate": 7.378121045351378e-06, |
|
"loss": 0.8452, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.226, |
|
"grad_norm": 0.5440369248390198, |
|
"learning_rate": 7.3678794394245e-06, |
|
"loss": 0.8119, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.228, |
|
"grad_norm": 0.5543891191482544, |
|
"learning_rate": 7.357625010439853e-06, |
|
"loss": 0.8434, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.6058164834976196, |
|
"learning_rate": 7.347357813929455e-06, |
|
"loss": 0.7724, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.232, |
|
"grad_norm": 0.5185509920120239, |
|
"learning_rate": 7.337077905494472e-06, |
|
"loss": 0.8108, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.234, |
|
"grad_norm": 0.6211835145950317, |
|
"learning_rate": 7.326785340804908e-06, |
|
"loss": 0.8424, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.236, |
|
"grad_norm": 0.6689534187316895, |
|
"learning_rate": 7.31648017559931e-06, |
|
"loss": 0.8203, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.238, |
|
"grad_norm": 0.6034107804298401, |
|
"learning_rate": 7.3061624656844544e-06, |
|
"loss": 0.8432, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.6534163355827332, |
|
"learning_rate": 7.295832266935059e-06, |
|
"loss": 0.8214, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.242, |
|
"grad_norm": 0.6900277137756348, |
|
"learning_rate": 7.285489635293472e-06, |
|
"loss": 0.8246, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.244, |
|
"grad_norm": 0.49560463428497314, |
|
"learning_rate": 7.275134626769369e-06, |
|
"loss": 0.7885, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.246, |
|
"grad_norm": 0.7048647403717041, |
|
"learning_rate": 7.264767297439455e-06, |
|
"loss": 0.7972, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.248, |
|
"grad_norm": 0.6690518260002136, |
|
"learning_rate": 7.254387703447154e-06, |
|
"loss": 0.8183, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.5271294713020325, |
|
"learning_rate": 7.243995901002312e-06, |
|
"loss": 0.7815, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.252, |
|
"grad_norm": 0.5115328431129456, |
|
"learning_rate": 7.233591946380884e-06, |
|
"loss": 0.7985, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.254, |
|
"grad_norm": 0.9464442133903503, |
|
"learning_rate": 7.223175895924638e-06, |
|
"loss": 0.8168, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.256, |
|
"grad_norm": 0.5649638772010803, |
|
"learning_rate": 7.212747806040845e-06, |
|
"loss": 0.818, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.258, |
|
"grad_norm": 0.5858141183853149, |
|
"learning_rate": 7.2023077332019755e-06, |
|
"loss": 0.7964, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.6316258907318115, |
|
"learning_rate": 7.191855733945388e-06, |
|
"loss": 0.819, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.262, |
|
"grad_norm": 0.6449371576309204, |
|
"learning_rate": 7.181391864873034e-06, |
|
"loss": 0.8433, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.264, |
|
"grad_norm": 0.5685151219367981, |
|
"learning_rate": 7.170916182651141e-06, |
|
"loss": 0.8091, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.266, |
|
"grad_norm": 0.6365333795547485, |
|
"learning_rate": 7.160428744009913e-06, |
|
"loss": 0.7851, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.268, |
|
"grad_norm": 0.6000338196754456, |
|
"learning_rate": 7.149929605743214e-06, |
|
"loss": 0.8176, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.6080004572868347, |
|
"learning_rate": 7.1394188247082715e-06, |
|
"loss": 0.7953, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.272, |
|
"grad_norm": 0.6208333373069763, |
|
"learning_rate": 7.128896457825364e-06, |
|
"loss": 0.812, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.274, |
|
"grad_norm": 0.6057702302932739, |
|
"learning_rate": 7.118362562077508e-06, |
|
"loss": 0.8129, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.276, |
|
"grad_norm": 0.562838613986969, |
|
"learning_rate": 7.107817194510157e-06, |
|
"loss": 0.8127, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.278, |
|
"grad_norm": 0.641021728515625, |
|
"learning_rate": 7.0972604122308865e-06, |
|
"loss": 0.8113, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.6385996341705322, |
|
"learning_rate": 7.08669227240909e-06, |
|
"loss": 0.8378, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.282, |
|
"grad_norm": 0.6018445491790771, |
|
"learning_rate": 7.076112832275667e-06, |
|
"loss": 0.823, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.284, |
|
"grad_norm": 0.6377434134483337, |
|
"learning_rate": 7.06552214912271e-06, |
|
"loss": 0.8271, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.286, |
|
"grad_norm": 0.6069837212562561, |
|
"learning_rate": 7.054920280303199e-06, |
|
"loss": 0.806, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.288, |
|
"grad_norm": 0.5235119462013245, |
|
"learning_rate": 7.04430728323069e-06, |
|
"loss": 0.8077, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.5952073335647583, |
|
"learning_rate": 7.033683215379002e-06, |
|
"loss": 0.7621, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.292, |
|
"grad_norm": 0.5681912899017334, |
|
"learning_rate": 7.023048134281907e-06, |
|
"loss": 0.7912, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.294, |
|
"grad_norm": 0.4946375787258148, |
|
"learning_rate": 7.012402097532815e-06, |
|
"loss": 0.8125, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.296, |
|
"grad_norm": 0.6761035919189453, |
|
"learning_rate": 7.0017451627844765e-06, |
|
"loss": 0.8038, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.298, |
|
"grad_norm": 0.6653770208358765, |
|
"learning_rate": 6.991077387748643e-06, |
|
"loss": 0.7879, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.510911762714386, |
|
"learning_rate": 6.980398830195785e-06, |
|
"loss": 0.7873, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.302, |
|
"grad_norm": 0.6981252431869507, |
|
"learning_rate": 6.9697095479547564e-06, |
|
"loss": 0.8002, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.304, |
|
"grad_norm": 0.6994757652282715, |
|
"learning_rate": 6.959009598912493e-06, |
|
"loss": 0.7766, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.306, |
|
"grad_norm": 0.5115617513656616, |
|
"learning_rate": 6.948299041013695e-06, |
|
"loss": 0.827, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.308, |
|
"grad_norm": 0.6113659739494324, |
|
"learning_rate": 6.9375779322605154e-06, |
|
"loss": 0.787, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.5647181272506714, |
|
"learning_rate": 6.9268463307122425e-06, |
|
"loss": 0.7437, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.312, |
|
"grad_norm": 0.4827154278755188, |
|
"learning_rate": 6.916104294484988e-06, |
|
"loss": 0.8348, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.314, |
|
"grad_norm": 0.61137455701828, |
|
"learning_rate": 6.905351881751372e-06, |
|
"loss": 0.8054, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.316, |
|
"grad_norm": 0.606617271900177, |
|
"learning_rate": 6.8945891507402075e-06, |
|
"loss": 0.8113, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.318, |
|
"grad_norm": 0.7584879994392395, |
|
"learning_rate": 6.883816159736187e-06, |
|
"loss": 0.8542, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.4678022563457489, |
|
"learning_rate": 6.873032967079562e-06, |
|
"loss": 0.7836, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.322, |
|
"grad_norm": 0.7454090118408203, |
|
"learning_rate": 6.862239631165831e-06, |
|
"loss": 0.8062, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.324, |
|
"grad_norm": 0.692216694355011, |
|
"learning_rate": 6.851436210445427e-06, |
|
"loss": 0.795, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.326, |
|
"grad_norm": 0.466247022151947, |
|
"learning_rate": 6.840622763423391e-06, |
|
"loss": 0.8105, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.328, |
|
"grad_norm": 0.6885058283805847, |
|
"learning_rate": 6.829799348659061e-06, |
|
"loss": 0.852, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.6030435562133789, |
|
"learning_rate": 6.818966024765758e-06, |
|
"loss": 0.8173, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.332, |
|
"grad_norm": 0.580797553062439, |
|
"learning_rate": 6.808122850410461e-06, |
|
"loss": 0.8009, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.334, |
|
"grad_norm": 0.6403875350952148, |
|
"learning_rate": 6.7972698843135e-06, |
|
"loss": 0.8135, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.336, |
|
"grad_norm": 0.6278457045555115, |
|
"learning_rate": 6.7864071852482205e-06, |
|
"loss": 0.8025, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.338, |
|
"grad_norm": 0.6138670444488525, |
|
"learning_rate": 6.775534812040686e-06, |
|
"loss": 0.8516, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.5964850783348083, |
|
"learning_rate": 6.7646528235693445e-06, |
|
"loss": 0.8037, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.342, |
|
"grad_norm": 0.4964216351509094, |
|
"learning_rate": 6.753761278764719e-06, |
|
"loss": 0.8162, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.3439999999999999, |
|
"grad_norm": 0.6246684789657593, |
|
"learning_rate": 6.7428602366090764e-06, |
|
"loss": 0.8007, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.346, |
|
"grad_norm": 0.5912864208221436, |
|
"learning_rate": 6.7319497561361245e-06, |
|
"loss": 0.8589, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.3479999999999999, |
|
"grad_norm": 0.634901762008667, |
|
"learning_rate": 6.721029896430678e-06, |
|
"loss": 0.8166, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.5598165988922119, |
|
"learning_rate": 6.710100716628345e-06, |
|
"loss": 0.8003, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3519999999999999, |
|
"grad_norm": 0.5302711725234985, |
|
"learning_rate": 6.699162275915208e-06, |
|
"loss": 0.7989, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.354, |
|
"grad_norm": 0.6884709596633911, |
|
"learning_rate": 6.6882146335274955e-06, |
|
"loss": 0.8377, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.3559999999999999, |
|
"grad_norm": 0.5577335357666016, |
|
"learning_rate": 6.677257848751276e-06, |
|
"loss": 0.8159, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.358, |
|
"grad_norm": 0.5639868974685669, |
|
"learning_rate": 6.666291980922122e-06, |
|
"loss": 0.8189, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 0.6146543622016907, |
|
"learning_rate": 6.655317089424791e-06, |
|
"loss": 0.8428, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.362, |
|
"grad_norm": 0.4616728127002716, |
|
"learning_rate": 6.644333233692917e-06, |
|
"loss": 0.8361, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.3639999999999999, |
|
"grad_norm": 0.556716799736023, |
|
"learning_rate": 6.633340473208673e-06, |
|
"loss": 0.7989, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.366, |
|
"grad_norm": 0.5753201246261597, |
|
"learning_rate": 6.622338867502452e-06, |
|
"loss": 0.8213, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.3679999999999999, |
|
"grad_norm": 0.5656223893165588, |
|
"learning_rate": 6.611328476152557e-06, |
|
"loss": 0.8155, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.5939602255821228, |
|
"learning_rate": 6.600309358784858e-06, |
|
"loss": 0.8176, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.3719999999999999, |
|
"grad_norm": 0.5385190844535828, |
|
"learning_rate": 6.58928157507249e-06, |
|
"loss": 0.8592, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.374, |
|
"grad_norm": 0.5945911407470703, |
|
"learning_rate": 6.578245184735513e-06, |
|
"loss": 0.8017, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.376, |
|
"grad_norm": 0.6518405079841614, |
|
"learning_rate": 6.567200247540599e-06, |
|
"loss": 0.841, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.3780000000000001, |
|
"grad_norm": 0.6033949255943298, |
|
"learning_rate": 6.556146823300701e-06, |
|
"loss": 0.8232, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.570328414440155, |
|
"learning_rate": 6.545084971874738e-06, |
|
"loss": 0.805, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.3820000000000001, |
|
"grad_norm": 0.5114735960960388, |
|
"learning_rate": 6.534014753167263e-06, |
|
"loss": 0.8165, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.384, |
|
"grad_norm": 0.5306764245033264, |
|
"learning_rate": 6.522936227128139e-06, |
|
"loss": 0.7843, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.3860000000000001, |
|
"grad_norm": 0.5091618895530701, |
|
"learning_rate": 6.5118494537522235e-06, |
|
"loss": 0.7579, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.388, |
|
"grad_norm": 0.4887152910232544, |
|
"learning_rate": 6.500754493079029e-06, |
|
"loss": 0.7969, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.3900000000000001, |
|
"grad_norm": 0.49224093556404114, |
|
"learning_rate": 6.48965140519241e-06, |
|
"loss": 0.837, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.392, |
|
"grad_norm": 0.47571027278900146, |
|
"learning_rate": 6.4785402502202345e-06, |
|
"loss": 0.7802, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.3940000000000001, |
|
"grad_norm": 0.46153298020362854, |
|
"learning_rate": 6.467421088334052e-06, |
|
"loss": 0.7947, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.396, |
|
"grad_norm": 0.5026895403862, |
|
"learning_rate": 6.456293979748778e-06, |
|
"loss": 0.7754, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.3980000000000001, |
|
"grad_norm": 0.512188732624054, |
|
"learning_rate": 6.445158984722358e-06, |
|
"loss": 0.856, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.5626097917556763, |
|
"learning_rate": 6.434016163555452e-06, |
|
"loss": 0.8204, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4020000000000001, |
|
"grad_norm": 0.49451103806495667, |
|
"learning_rate": 6.422865576591096e-06, |
|
"loss": 0.8005, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.404, |
|
"grad_norm": 0.47687214612960815, |
|
"learning_rate": 6.411707284214384e-06, |
|
"loss": 0.813, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.4060000000000001, |
|
"grad_norm": 0.5690708160400391, |
|
"learning_rate": 6.400541346852136e-06, |
|
"loss": 0.8292, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.408, |
|
"grad_norm": 0.4766281247138977, |
|
"learning_rate": 6.389367824972575e-06, |
|
"loss": 0.7606, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.48364976048469543, |
|
"learning_rate": 6.378186779084996e-06, |
|
"loss": 0.7973, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.412, |
|
"grad_norm": 0.51153564453125, |
|
"learning_rate": 6.366998269739442e-06, |
|
"loss": 0.817, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.414, |
|
"grad_norm": 0.4661514461040497, |
|
"learning_rate": 6.35580235752637e-06, |
|
"loss": 0.8269, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.416, |
|
"grad_norm": 0.4984007477760315, |
|
"learning_rate": 6.344599103076329e-06, |
|
"loss": 0.8395, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.418, |
|
"grad_norm": 0.5143007636070251, |
|
"learning_rate": 6.3333885670596285e-06, |
|
"loss": 0.7778, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.48550623655319214, |
|
"learning_rate": 6.322170810186013e-06, |
|
"loss": 0.8195, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.422, |
|
"grad_norm": 0.49492278695106506, |
|
"learning_rate": 6.310945893204324e-06, |
|
"loss": 0.8467, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.424, |
|
"grad_norm": 0.521795392036438, |
|
"learning_rate": 6.299713876902188e-06, |
|
"loss": 0.8383, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.426, |
|
"grad_norm": 0.544080913066864, |
|
"learning_rate": 6.28847482210567e-06, |
|
"loss": 0.8189, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.428, |
|
"grad_norm": 0.47585853934288025, |
|
"learning_rate": 6.277228789678953e-06, |
|
"loss": 0.8071, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.5880405306816101, |
|
"learning_rate": 6.26597584052401e-06, |
|
"loss": 0.8156, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.432, |
|
"grad_norm": 0.5470208525657654, |
|
"learning_rate": 6.254716035580264e-06, |
|
"loss": 0.8118, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.434, |
|
"grad_norm": 0.5518249273300171, |
|
"learning_rate": 6.243449435824276e-06, |
|
"loss": 0.8103, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.436, |
|
"grad_norm": 0.5518653988838196, |
|
"learning_rate": 6.23217610226939e-06, |
|
"loss": 0.8304, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.438, |
|
"grad_norm": 0.5015103220939636, |
|
"learning_rate": 6.220896095965428e-06, |
|
"loss": 0.8015, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.5413691997528076, |
|
"learning_rate": 6.209609477998339e-06, |
|
"loss": 0.8005, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.442, |
|
"grad_norm": 0.5215966105461121, |
|
"learning_rate": 6.198316309489886e-06, |
|
"loss": 0.8073, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.444, |
|
"grad_norm": 0.471635103225708, |
|
"learning_rate": 6.187016651597299e-06, |
|
"loss": 0.8202, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.446, |
|
"grad_norm": 0.6489139795303345, |
|
"learning_rate": 6.17571056551295e-06, |
|
"loss": 0.8254, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.448, |
|
"grad_norm": 0.44364041090011597, |
|
"learning_rate": 6.16439811246403e-06, |
|
"loss": 0.7725, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.5453159809112549, |
|
"learning_rate": 6.153079353712201e-06, |
|
"loss": 0.798, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.452, |
|
"grad_norm": 0.5725481510162354, |
|
"learning_rate": 6.141754350553279e-06, |
|
"loss": 0.8243, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.454, |
|
"grad_norm": 0.5413415431976318, |
|
"learning_rate": 6.130423164316893e-06, |
|
"loss": 0.8279, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.456, |
|
"grad_norm": 0.4809170961380005, |
|
"learning_rate": 6.119085856366158e-06, |
|
"loss": 0.797, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.458, |
|
"grad_norm": 0.5237243175506592, |
|
"learning_rate": 6.107742488097338e-06, |
|
"loss": 0.8013, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.5416289567947388, |
|
"learning_rate": 6.0963931209395165e-06, |
|
"loss": 0.809, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.462, |
|
"grad_norm": 0.47033780813217163, |
|
"learning_rate": 6.085037816354269e-06, |
|
"loss": 0.8291, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.464, |
|
"grad_norm": 0.5361016988754272, |
|
"learning_rate": 6.073676635835317e-06, |
|
"loss": 0.8432, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.466, |
|
"grad_norm": 0.5197705626487732, |
|
"learning_rate": 6.062309640908206e-06, |
|
"loss": 0.804, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.468, |
|
"grad_norm": 0.5188045501708984, |
|
"learning_rate": 6.05093689312997e-06, |
|
"loss": 0.8176, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.5773131251335144, |
|
"learning_rate": 6.039558454088796e-06, |
|
"loss": 0.8276, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.472, |
|
"grad_norm": 0.5026968121528625, |
|
"learning_rate": 6.028174385403693e-06, |
|
"loss": 0.8054, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.474, |
|
"grad_norm": 0.481131911277771, |
|
"learning_rate": 6.016784748724153e-06, |
|
"loss": 0.7974, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.476, |
|
"grad_norm": 0.6374879479408264, |
|
"learning_rate": 6.005389605729824e-06, |
|
"loss": 0.7866, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.478, |
|
"grad_norm": 0.5750545263290405, |
|
"learning_rate": 5.993989018130173e-06, |
|
"loss": 0.799, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.5512261986732483, |
|
"learning_rate": 5.982583047664151e-06, |
|
"loss": 0.7951, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.482, |
|
"grad_norm": 0.4982967674732208, |
|
"learning_rate": 5.97117175609986e-06, |
|
"loss": 0.8104, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.484, |
|
"grad_norm": 0.5609320402145386, |
|
"learning_rate": 5.9597552052342174e-06, |
|
"loss": 0.8109, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.486, |
|
"grad_norm": 0.5253490209579468, |
|
"learning_rate": 5.948333456892624e-06, |
|
"loss": 0.7986, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.488, |
|
"grad_norm": 0.6019419431686401, |
|
"learning_rate": 5.936906572928625e-06, |
|
"loss": 0.8111, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.4798535108566284, |
|
"learning_rate": 5.925474615223573e-06, |
|
"loss": 0.8139, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.492, |
|
"grad_norm": 0.5777360200881958, |
|
"learning_rate": 5.914037645686308e-06, |
|
"loss": 0.7785, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.494, |
|
"grad_norm": 0.5345956087112427, |
|
"learning_rate": 5.902595726252801e-06, |
|
"loss": 0.8064, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.496, |
|
"grad_norm": 0.5394262671470642, |
|
"learning_rate": 5.891148918885834e-06, |
|
"loss": 0.7876, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.498, |
|
"grad_norm": 0.575792670249939, |
|
"learning_rate": 5.879697285574655e-06, |
|
"loss": 0.8266, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.4988614320755005, |
|
"learning_rate": 5.8682408883346535e-06, |
|
"loss": 0.8024, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.502, |
|
"grad_norm": 0.4781121015548706, |
|
"learning_rate": 5.85677978920701e-06, |
|
"loss": 0.8014, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.504, |
|
"grad_norm": 0.5493645668029785, |
|
"learning_rate": 5.84531405025837e-06, |
|
"loss": 0.7882, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.506, |
|
"grad_norm": 0.5231897234916687, |
|
"learning_rate": 5.8338437335805124e-06, |
|
"loss": 0.7806, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.508, |
|
"grad_norm": 0.4463392496109009, |
|
"learning_rate": 5.8223689012899945e-06, |
|
"loss": 0.7944, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.4627637565135956, |
|
"learning_rate": 5.810889615527839e-06, |
|
"loss": 0.796, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 0.540871798992157, |
|
"learning_rate": 5.799405938459175e-06, |
|
"loss": 0.7873, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.514, |
|
"grad_norm": 0.5337440371513367, |
|
"learning_rate": 5.787917932272922e-06, |
|
"loss": 0.7991, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.516, |
|
"grad_norm": 0.5268739461898804, |
|
"learning_rate": 5.776425659181438e-06, |
|
"loss": 0.773, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.518, |
|
"grad_norm": 0.5249832272529602, |
|
"learning_rate": 5.764929181420191e-06, |
|
"loss": 0.7912, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.7338144779205322, |
|
"learning_rate": 5.753428561247416e-06, |
|
"loss": 0.7758, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.522, |
|
"grad_norm": 0.603810727596283, |
|
"learning_rate": 5.741923860943783e-06, |
|
"loss": 0.8154, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.524, |
|
"grad_norm": 0.5017214417457581, |
|
"learning_rate": 5.730415142812059e-06, |
|
"loss": 0.8125, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.526, |
|
"grad_norm": 0.6532445549964905, |
|
"learning_rate": 5.718902469176765e-06, |
|
"loss": 0.8281, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.528, |
|
"grad_norm": 0.554637610912323, |
|
"learning_rate": 5.707385902383845e-06, |
|
"loss": 0.8564, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.47505423426628113, |
|
"learning_rate": 5.695865504800328e-06, |
|
"loss": 0.818, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.532, |
|
"grad_norm": 0.5482528805732727, |
|
"learning_rate": 5.684341338813986e-06, |
|
"loss": 0.8216, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.534, |
|
"grad_norm": 0.49230730533599854, |
|
"learning_rate": 5.672813466832998e-06, |
|
"loss": 0.7835, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.536, |
|
"grad_norm": 0.5212811231613159, |
|
"learning_rate": 5.661281951285613e-06, |
|
"loss": 0.8284, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.538, |
|
"grad_norm": 0.4840611517429352, |
|
"learning_rate": 5.649746854619814e-06, |
|
"loss": 0.8013, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.4953254163265228, |
|
"learning_rate": 5.638208239302975e-06, |
|
"loss": 0.782, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.542, |
|
"grad_norm": 0.570379376411438, |
|
"learning_rate": 5.626666167821522e-06, |
|
"loss": 0.8124, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.544, |
|
"grad_norm": 0.5032173991203308, |
|
"learning_rate": 5.615120702680604e-06, |
|
"loss": 0.8016, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.546, |
|
"grad_norm": 0.5023512840270996, |
|
"learning_rate": 5.6035719064037446e-06, |
|
"loss": 0.7978, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.548, |
|
"grad_norm": 0.5365172028541565, |
|
"learning_rate": 5.592019841532507e-06, |
|
"loss": 0.8113, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.6160049438476562, |
|
"learning_rate": 5.5804645706261515e-06, |
|
"loss": 0.7946, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.552, |
|
"grad_norm": 0.46849778294563293, |
|
"learning_rate": 5.568906156261309e-06, |
|
"loss": 0.8342, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.554, |
|
"grad_norm": 0.5677435994148254, |
|
"learning_rate": 5.557344661031628e-06, |
|
"loss": 0.8129, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.556, |
|
"grad_norm": 0.5215854644775391, |
|
"learning_rate": 5.54578014754744e-06, |
|
"loss": 0.7856, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.558, |
|
"grad_norm": 0.4937414824962616, |
|
"learning_rate": 5.5342126784354265e-06, |
|
"loss": 0.8075, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.4672093987464905, |
|
"learning_rate": 5.522642316338268e-06, |
|
"loss": 0.8288, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.562, |
|
"grad_norm": 0.5195022225379944, |
|
"learning_rate": 5.511069123914319e-06, |
|
"loss": 0.7994, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.564, |
|
"grad_norm": 0.4978856146335602, |
|
"learning_rate": 5.499493163837258e-06, |
|
"loss": 0.817, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.5659999999999998, |
|
"grad_norm": 0.5288148522377014, |
|
"learning_rate": 5.487914498795748e-06, |
|
"loss": 0.7984, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.568, |
|
"grad_norm": 0.47676193714141846, |
|
"learning_rate": 5.476333191493108e-06, |
|
"loss": 0.7923, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.5699999999999998, |
|
"grad_norm": 0.5122764706611633, |
|
"learning_rate": 5.464749304646963e-06, |
|
"loss": 0.7706, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.572, |
|
"grad_norm": 0.4978078007698059, |
|
"learning_rate": 5.453162900988902e-06, |
|
"loss": 0.7747, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.5739999999999998, |
|
"grad_norm": 0.4881911277770996, |
|
"learning_rate": 5.44157404326415e-06, |
|
"loss": 0.829, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.576, |
|
"grad_norm": 0.5487351417541504, |
|
"learning_rate": 5.429982794231221e-06, |
|
"loss": 0.8169, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.5779999999999998, |
|
"grad_norm": 0.5416744947433472, |
|
"learning_rate": 5.41838921666158e-06, |
|
"loss": 0.8234, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.5604218244552612, |
|
"learning_rate": 5.406793373339292e-06, |
|
"loss": 0.809, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.5819999999999999, |
|
"grad_norm": 0.5446667075157166, |
|
"learning_rate": 5.395195327060707e-06, |
|
"loss": 0.8159, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.584, |
|
"grad_norm": 0.5692674517631531, |
|
"learning_rate": 5.383595140634093e-06, |
|
"loss": 0.8395, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.5859999999999999, |
|
"grad_norm": 0.5842452049255371, |
|
"learning_rate": 5.371992876879318e-06, |
|
"loss": 0.8206, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.588, |
|
"grad_norm": 0.5441771149635315, |
|
"learning_rate": 5.360388598627487e-06, |
|
"loss": 0.8254, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.5899999999999999, |
|
"grad_norm": 0.4671582579612732, |
|
"learning_rate": 5.348782368720627e-06, |
|
"loss": 0.7993, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.592, |
|
"grad_norm": 0.599138617515564, |
|
"learning_rate": 5.337174250011326e-06, |
|
"loss": 0.8183, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.5939999999999999, |
|
"grad_norm": 0.4990823268890381, |
|
"learning_rate": 5.325564305362404e-06, |
|
"loss": 0.8081, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.596, |
|
"grad_norm": 0.5241624116897583, |
|
"learning_rate": 5.3139525976465675e-06, |
|
"loss": 0.8425, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.5979999999999999, |
|
"grad_norm": 0.572844386100769, |
|
"learning_rate": 5.3023391897460715e-06, |
|
"loss": 0.8241, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.5502427220344543, |
|
"learning_rate": 5.290724144552379e-06, |
|
"loss": 0.7951, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6019999999999999, |
|
"grad_norm": 0.4780225157737732, |
|
"learning_rate": 5.27910752496582e-06, |
|
"loss": 0.8158, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.604, |
|
"grad_norm": 0.4722861051559448, |
|
"learning_rate": 5.267489393895247e-06, |
|
"loss": 0.7858, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.6059999999999999, |
|
"grad_norm": 0.47437071800231934, |
|
"learning_rate": 5.255869814257701e-06, |
|
"loss": 0.8218, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.608, |
|
"grad_norm": 0.5584851503372192, |
|
"learning_rate": 5.244248848978067e-06, |
|
"loss": 0.8444, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.6099999999999999, |
|
"grad_norm": 0.5402180552482605, |
|
"learning_rate": 5.232626560988735e-06, |
|
"loss": 0.8223, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.612, |
|
"grad_norm": 0.5967413187026978, |
|
"learning_rate": 5.221003013229253e-06, |
|
"loss": 0.8017, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.6139999999999999, |
|
"grad_norm": 0.5010871887207031, |
|
"learning_rate": 5.209378268645998e-06, |
|
"loss": 0.8087, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.616, |
|
"grad_norm": 0.48053157329559326, |
|
"learning_rate": 5.197752390191827e-06, |
|
"loss": 0.7959, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.6179999999999999, |
|
"grad_norm": 0.5281156301498413, |
|
"learning_rate": 5.18612544082573e-06, |
|
"loss": 0.8326, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.5991333723068237, |
|
"learning_rate": 5.174497483512506e-06, |
|
"loss": 0.792, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.6219999999999999, |
|
"grad_norm": 0.46182727813720703, |
|
"learning_rate": 5.162868581222407e-06, |
|
"loss": 0.8037, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.624, |
|
"grad_norm": 0.5693475604057312, |
|
"learning_rate": 5.151238796930804e-06, |
|
"loss": 0.8441, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.626, |
|
"grad_norm": 0.4842833876609802, |
|
"learning_rate": 5.139608193617846e-06, |
|
"loss": 0.7986, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.6280000000000001, |
|
"grad_norm": 0.4889780580997467, |
|
"learning_rate": 5.127976834268112e-06, |
|
"loss": 0.7985, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.5266144871711731, |
|
"learning_rate": 5.116344781870282e-06, |
|
"loss": 0.7968, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.6320000000000001, |
|
"grad_norm": 0.4901430010795593, |
|
"learning_rate": 5.1047120994167855e-06, |
|
"loss": 0.7998, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.634, |
|
"grad_norm": 0.47501808404922485, |
|
"learning_rate": 5.093078849903464e-06, |
|
"loss": 0.7756, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.6360000000000001, |
|
"grad_norm": 0.5496917963027954, |
|
"learning_rate": 5.081445096329229e-06, |
|
"loss": 0.7817, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.638, |
|
"grad_norm": 0.43850964307785034, |
|
"learning_rate": 5.069810901695727e-06, |
|
"loss": 0.8252, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 0.5552741289138794, |
|
"learning_rate": 5.0581763290069865e-06, |
|
"loss": 0.8149, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.642, |
|
"grad_norm": 0.5505797266960144, |
|
"learning_rate": 5.046541441269085e-06, |
|
"loss": 0.8131, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.6440000000000001, |
|
"grad_norm": 0.4630671441555023, |
|
"learning_rate": 5.034906301489808e-06, |
|
"loss": 0.7794, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.646, |
|
"grad_norm": 0.49438831210136414, |
|
"learning_rate": 5.0232709726783065e-06, |
|
"loss": 0.8188, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.6480000000000001, |
|
"grad_norm": 0.453652560710907, |
|
"learning_rate": 5.011635517844753e-06, |
|
"loss": 0.786, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.49963581562042236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8272, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.6520000000000001, |
|
"grad_norm": 0.5175125598907471, |
|
"learning_rate": 4.988364482155249e-06, |
|
"loss": 0.7994, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.654, |
|
"grad_norm": 0.4657656252384186, |
|
"learning_rate": 4.976729027321694e-06, |
|
"loss": 0.7749, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.6560000000000001, |
|
"grad_norm": 0.4991082549095154, |
|
"learning_rate": 4.965093698510192e-06, |
|
"loss": 0.8031, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.658, |
|
"grad_norm": 0.47223085165023804, |
|
"learning_rate": 4.953458558730917e-06, |
|
"loss": 0.826, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.6600000000000001, |
|
"grad_norm": 0.5325574278831482, |
|
"learning_rate": 4.941823670993016e-06, |
|
"loss": 0.8018, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.662, |
|
"grad_norm": 0.5229061245918274, |
|
"learning_rate": 4.9301890983042744e-06, |
|
"loss": 0.8187, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.6640000000000001, |
|
"grad_norm": 0.47113412618637085, |
|
"learning_rate": 4.9185549036707715e-06, |
|
"loss": 0.8358, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.666, |
|
"grad_norm": 0.49168315529823303, |
|
"learning_rate": 4.906921150096538e-06, |
|
"loss": 0.8258, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.6680000000000001, |
|
"grad_norm": 0.5069917440414429, |
|
"learning_rate": 4.895287900583216e-06, |
|
"loss": 0.8118, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.518430769443512, |
|
"learning_rate": 4.883655218129719e-06, |
|
"loss": 0.8007, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.6720000000000002, |
|
"grad_norm": 0.5682756900787354, |
|
"learning_rate": 4.87202316573189e-06, |
|
"loss": 0.7916, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.674, |
|
"grad_norm": 0.5430126190185547, |
|
"learning_rate": 4.860391806382157e-06, |
|
"loss": 0.841, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.6760000000000002, |
|
"grad_norm": 0.516560435295105, |
|
"learning_rate": 4.8487612030691975e-06, |
|
"loss": 0.8029, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.678, |
|
"grad_norm": 0.5646497011184692, |
|
"learning_rate": 4.837131418777595e-06, |
|
"loss": 0.8312, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 0.5456799864768982, |
|
"learning_rate": 4.825502516487497e-06, |
|
"loss": 0.8234, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.682, |
|
"grad_norm": 0.5457205176353455, |
|
"learning_rate": 4.813874559174271e-06, |
|
"loss": 0.7864, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.6840000000000002, |
|
"grad_norm": 0.5338269472122192, |
|
"learning_rate": 4.802247609808175e-06, |
|
"loss": 0.8137, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.686, |
|
"grad_norm": 0.5644640922546387, |
|
"learning_rate": 4.7906217313540035e-06, |
|
"loss": 0.8368, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.688, |
|
"grad_norm": 0.5289748311042786, |
|
"learning_rate": 4.778996986770747e-06, |
|
"loss": 0.8174, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.48800283670425415, |
|
"learning_rate": 4.767373439011267e-06, |
|
"loss": 0.8208, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.692, |
|
"grad_norm": 0.5060573816299438, |
|
"learning_rate": 4.755751151021934e-06, |
|
"loss": 0.7976, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.694, |
|
"grad_norm": 0.5901538133621216, |
|
"learning_rate": 4.744130185742301e-06, |
|
"loss": 0.7963, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.696, |
|
"grad_norm": 0.43792036175727844, |
|
"learning_rate": 4.732510606104754e-06, |
|
"loss": 0.7713, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.698, |
|
"grad_norm": 0.46001285314559937, |
|
"learning_rate": 4.720892475034181e-06, |
|
"loss": 0.7949, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.5100943446159363, |
|
"learning_rate": 4.7092758554476215e-06, |
|
"loss": 0.867, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.702, |
|
"grad_norm": 0.4933598041534424, |
|
"learning_rate": 4.6976608102539285e-06, |
|
"loss": 0.8546, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.704, |
|
"grad_norm": 0.4918576776981354, |
|
"learning_rate": 4.686047402353433e-06, |
|
"loss": 0.8219, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.706, |
|
"grad_norm": 0.451834499835968, |
|
"learning_rate": 4.674435694637597e-06, |
|
"loss": 0.8474, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.708, |
|
"grad_norm": 0.47564661502838135, |
|
"learning_rate": 4.662825749988675e-06, |
|
"loss": 0.7999, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.5556415319442749, |
|
"learning_rate": 4.651217631279374e-06, |
|
"loss": 0.7976, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.712, |
|
"grad_norm": 0.4383542537689209, |
|
"learning_rate": 4.639611401372514e-06, |
|
"loss": 0.8453, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.714, |
|
"grad_norm": 0.49957823753356934, |
|
"learning_rate": 4.6280071231206845e-06, |
|
"loss": 0.831, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.716, |
|
"grad_norm": 0.5337651968002319, |
|
"learning_rate": 4.6164048593659076e-06, |
|
"loss": 0.8025, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.718, |
|
"grad_norm": 0.5165109038352966, |
|
"learning_rate": 4.604804672939295e-06, |
|
"loss": 0.7944, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.5279600024223328, |
|
"learning_rate": 4.59320662666071e-06, |
|
"loss": 0.7874, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.722, |
|
"grad_norm": 0.4770665168762207, |
|
"learning_rate": 4.581610783338424e-06, |
|
"loss": 0.8016, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.724, |
|
"grad_norm": 0.513299822807312, |
|
"learning_rate": 4.570017205768779e-06, |
|
"loss": 0.7832, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.726, |
|
"grad_norm": 0.5326491594314575, |
|
"learning_rate": 4.5584259567358505e-06, |
|
"loss": 0.8094, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.728, |
|
"grad_norm": 0.46006107330322266, |
|
"learning_rate": 4.546837099011101e-06, |
|
"loss": 0.8345, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.5608815550804138, |
|
"learning_rate": 4.53525069535304e-06, |
|
"loss": 0.7947, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.732, |
|
"grad_norm": 0.5806840062141418, |
|
"learning_rate": 4.523666808506893e-06, |
|
"loss": 0.7849, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.734, |
|
"grad_norm": 0.47665658593177795, |
|
"learning_rate": 4.512085501204254e-06, |
|
"loss": 0.7925, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.736, |
|
"grad_norm": 0.4672011435031891, |
|
"learning_rate": 4.500506836162746e-06, |
|
"loss": 0.8218, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.738, |
|
"grad_norm": 0.505943238735199, |
|
"learning_rate": 4.4889308760856826e-06, |
|
"loss": 0.8182, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.6454104781150818, |
|
"learning_rate": 4.477357683661734e-06, |
|
"loss": 0.8054, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.742, |
|
"grad_norm": 0.46112364530563354, |
|
"learning_rate": 4.465787321564576e-06, |
|
"loss": 0.8006, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.744, |
|
"grad_norm": 0.4697955250740051, |
|
"learning_rate": 4.45421985245256e-06, |
|
"loss": 0.775, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.746, |
|
"grad_norm": 0.5397466421127319, |
|
"learning_rate": 4.442655338968373e-06, |
|
"loss": 0.8024, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.748, |
|
"grad_norm": 0.5473257303237915, |
|
"learning_rate": 4.431093843738693e-06, |
|
"loss": 0.7931, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.5032723546028137, |
|
"learning_rate": 4.4195354293738484e-06, |
|
"loss": 0.8341, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.752, |
|
"grad_norm": 0.44628429412841797, |
|
"learning_rate": 4.4079801584674955e-06, |
|
"loss": 0.768, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.754, |
|
"grad_norm": 0.5417639017105103, |
|
"learning_rate": 4.396428093596258e-06, |
|
"loss": 0.7915, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.756, |
|
"grad_norm": 0.5060398578643799, |
|
"learning_rate": 4.384879297319398e-06, |
|
"loss": 0.8007, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.758, |
|
"grad_norm": 0.44769251346588135, |
|
"learning_rate": 4.373333832178478e-06, |
|
"loss": 0.8074, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.5053737759590149, |
|
"learning_rate": 4.361791760697027e-06, |
|
"loss": 0.7963, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.762, |
|
"grad_norm": 0.4751681685447693, |
|
"learning_rate": 4.3502531453801885e-06, |
|
"loss": 0.8116, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.764, |
|
"grad_norm": 0.576187789440155, |
|
"learning_rate": 4.3387180487143875e-06, |
|
"loss": 0.8269, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.766, |
|
"grad_norm": 0.49040693044662476, |
|
"learning_rate": 4.3271865331670036e-06, |
|
"loss": 0.8172, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.768, |
|
"grad_norm": 0.44776564836502075, |
|
"learning_rate": 4.315658661186016e-06, |
|
"loss": 0.8085, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.5188393592834473, |
|
"learning_rate": 4.304134495199675e-06, |
|
"loss": 0.7949, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.772, |
|
"grad_norm": 0.5137274861335754, |
|
"learning_rate": 4.2926140976161555e-06, |
|
"loss": 0.7843, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.774, |
|
"grad_norm": 0.5027738809585571, |
|
"learning_rate": 4.281097530823237e-06, |
|
"loss": 0.8109, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.776, |
|
"grad_norm": 0.5102717280387878, |
|
"learning_rate": 4.269584857187942e-06, |
|
"loss": 0.8051, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.778, |
|
"grad_norm": 0.49146613478660583, |
|
"learning_rate": 4.258076139056217e-06, |
|
"loss": 0.8164, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.47425997257232666, |
|
"learning_rate": 4.246571438752585e-06, |
|
"loss": 0.7982, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.782, |
|
"grad_norm": 0.509017825126648, |
|
"learning_rate": 4.23507081857981e-06, |
|
"loss": 0.8061, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.784, |
|
"grad_norm": 0.5787748694419861, |
|
"learning_rate": 4.2235743408185635e-06, |
|
"loss": 0.7887, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.786, |
|
"grad_norm": 0.46629399061203003, |
|
"learning_rate": 4.212082067727079e-06, |
|
"loss": 0.8101, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.788, |
|
"grad_norm": 0.6112287044525146, |
|
"learning_rate": 4.200594061540827e-06, |
|
"loss": 0.7886, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.510998547077179, |
|
"learning_rate": 4.189110384472164e-06, |
|
"loss": 0.7793, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.792, |
|
"grad_norm": 0.5151376128196716, |
|
"learning_rate": 4.1776310987100054e-06, |
|
"loss": 0.7949, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.794, |
|
"grad_norm": 0.4956475496292114, |
|
"learning_rate": 4.166156266419489e-06, |
|
"loss": 0.787, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.796, |
|
"grad_norm": 0.5557451248168945, |
|
"learning_rate": 4.154685949741631e-06, |
|
"loss": 0.8264, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.798, |
|
"grad_norm": 0.50986248254776, |
|
"learning_rate": 4.143220210792993e-06, |
|
"loss": 0.7918, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.47877979278564453, |
|
"learning_rate": 4.131759111665349e-06, |
|
"loss": 0.7743, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.802, |
|
"grad_norm": 0.4706796407699585, |
|
"learning_rate": 4.1203027144253466e-06, |
|
"loss": 0.8061, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.804, |
|
"grad_norm": 0.5295886397361755, |
|
"learning_rate": 4.108851081114169e-06, |
|
"loss": 0.8092, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.806, |
|
"grad_norm": 0.461213082075119, |
|
"learning_rate": 4.0974042737472005e-06, |
|
"loss": 0.7893, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.808, |
|
"grad_norm": 0.5076703429222107, |
|
"learning_rate": 4.0859623543136935e-06, |
|
"loss": 0.8037, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.47249898314476013, |
|
"learning_rate": 4.074525384776428e-06, |
|
"loss": 0.8312, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.812, |
|
"grad_norm": 0.45104101300239563, |
|
"learning_rate": 4.063093427071376e-06, |
|
"loss": 0.7977, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.814, |
|
"grad_norm": 0.48447185754776, |
|
"learning_rate": 4.051666543107377e-06, |
|
"loss": 0.8073, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.8159999999999998, |
|
"grad_norm": 0.5335273146629333, |
|
"learning_rate": 4.040244794765783e-06, |
|
"loss": 0.7914, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.818, |
|
"grad_norm": 0.4864543676376343, |
|
"learning_rate": 4.028828243900141e-06, |
|
"loss": 0.822, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.8199999999999998, |
|
"grad_norm": 0.5080208778381348, |
|
"learning_rate": 4.017416952335849e-06, |
|
"loss": 0.8095, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.822, |
|
"grad_norm": 0.5187543630599976, |
|
"learning_rate": 4.006010981869829e-06, |
|
"loss": 0.7817, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.8239999999999998, |
|
"grad_norm": 0.4684050679206848, |
|
"learning_rate": 3.994610394270178e-06, |
|
"loss": 0.8149, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.826, |
|
"grad_norm": 0.4266953468322754, |
|
"learning_rate": 3.983215251275847e-06, |
|
"loss": 0.808, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.8279999999999998, |
|
"grad_norm": 0.41727814078330994, |
|
"learning_rate": 3.971825614596308e-06, |
|
"loss": 0.8322, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.5039946436882019, |
|
"learning_rate": 3.960441545911205e-06, |
|
"loss": 0.8174, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.8319999999999999, |
|
"grad_norm": 0.48520591855049133, |
|
"learning_rate": 3.949063106870031e-06, |
|
"loss": 0.764, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.834, |
|
"grad_norm": 0.550553560256958, |
|
"learning_rate": 3.9376903590917945e-06, |
|
"loss": 0.8395, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.8359999999999999, |
|
"grad_norm": 0.5808132886886597, |
|
"learning_rate": 3.926323364164684e-06, |
|
"loss": 0.8217, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.838, |
|
"grad_norm": 0.5393190979957581, |
|
"learning_rate": 3.914962183645733e-06, |
|
"loss": 0.7786, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 0.5174140334129333, |
|
"learning_rate": 3.903606879060483e-06, |
|
"loss": 0.8074, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.842, |
|
"grad_norm": 0.5107792615890503, |
|
"learning_rate": 3.892257511902664e-06, |
|
"loss": 0.7904, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.8439999999999999, |
|
"grad_norm": 0.5404635667800903, |
|
"learning_rate": 3.880914143633844e-06, |
|
"loss": 0.8241, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.846, |
|
"grad_norm": 0.5038130283355713, |
|
"learning_rate": 3.869576835683109e-06, |
|
"loss": 0.8188, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.8479999999999999, |
|
"grad_norm": 0.4418555796146393, |
|
"learning_rate": 3.8582456494467214e-06, |
|
"loss": 0.8185, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.46331271529197693, |
|
"learning_rate": 3.8469206462878e-06, |
|
"loss": 0.8046, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.8519999999999999, |
|
"grad_norm": 0.48213040828704834, |
|
"learning_rate": 3.835601887535971e-06, |
|
"loss": 0.8279, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.854, |
|
"grad_norm": 0.48893412947654724, |
|
"learning_rate": 3.82428943448705e-06, |
|
"loss": 0.7719, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.8559999999999999, |
|
"grad_norm": 0.4834563136100769, |
|
"learning_rate": 3.812983348402703e-06, |
|
"loss": 0.7815, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.858, |
|
"grad_norm": 0.4763684570789337, |
|
"learning_rate": 3.8016836905101157e-06, |
|
"loss": 0.7872, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.8599999999999999, |
|
"grad_norm": 0.4794900417327881, |
|
"learning_rate": 3.790390522001662e-06, |
|
"loss": 0.7982, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.862, |
|
"grad_norm": 0.4603365659713745, |
|
"learning_rate": 3.7791039040345743e-06, |
|
"loss": 0.806, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.8639999999999999, |
|
"grad_norm": 0.5639286041259766, |
|
"learning_rate": 3.767823897730612e-06, |
|
"loss": 0.7946, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.866, |
|
"grad_norm": 0.4854590594768524, |
|
"learning_rate": 3.756550564175727e-06, |
|
"loss": 0.7826, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.8679999999999999, |
|
"grad_norm": 0.4922090768814087, |
|
"learning_rate": 3.745283964419736e-06, |
|
"loss": 0.8108, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.46560823917388916, |
|
"learning_rate": 3.7340241594759917e-06, |
|
"loss": 0.8257, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.8719999999999999, |
|
"grad_norm": 0.45512527227401733, |
|
"learning_rate": 3.7227712103210485e-06, |
|
"loss": 0.8273, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.874, |
|
"grad_norm": 0.5078556537628174, |
|
"learning_rate": 3.7115251778943314e-06, |
|
"loss": 0.7712, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.876, |
|
"grad_norm": 0.483775794506073, |
|
"learning_rate": 3.700286123097814e-06, |
|
"loss": 0.7934, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.8780000000000001, |
|
"grad_norm": 0.4577796757221222, |
|
"learning_rate": 3.6890541067956775e-06, |
|
"loss": 0.7992, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.4452671408653259, |
|
"learning_rate": 3.6778291898139907e-06, |
|
"loss": 0.7963, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.8820000000000001, |
|
"grad_norm": 0.48819735646247864, |
|
"learning_rate": 3.6666114329403723e-06, |
|
"loss": 0.8253, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.884, |
|
"grad_norm": 0.505906879901886, |
|
"learning_rate": 3.655400896923672e-06, |
|
"loss": 0.8247, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.8860000000000001, |
|
"grad_norm": 0.4865492582321167, |
|
"learning_rate": 3.6441976424736315e-06, |
|
"loss": 0.8308, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.888, |
|
"grad_norm": 0.49690744280815125, |
|
"learning_rate": 3.633001730260558e-06, |
|
"loss": 0.7937, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.8900000000000001, |
|
"grad_norm": 0.5015735626220703, |
|
"learning_rate": 3.6218132209150047e-06, |
|
"loss": 0.7736, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.892, |
|
"grad_norm": 0.4772734045982361, |
|
"learning_rate": 3.6106321750274275e-06, |
|
"loss": 0.8031, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.8940000000000001, |
|
"grad_norm": 0.4924619495868683, |
|
"learning_rate": 3.5994586531478672e-06, |
|
"loss": 0.8156, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.896, |
|
"grad_norm": 0.5400336980819702, |
|
"learning_rate": 3.5882927157856175e-06, |
|
"loss": 0.8079, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.8980000000000001, |
|
"grad_norm": 0.47806546092033386, |
|
"learning_rate": 3.577134423408906e-06, |
|
"loss": 0.7797, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.4568106532096863, |
|
"learning_rate": 3.5659838364445505e-06, |
|
"loss": 0.7719, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9020000000000001, |
|
"grad_norm": 0.4688374400138855, |
|
"learning_rate": 3.5548410152776414e-06, |
|
"loss": 0.7971, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.904, |
|
"grad_norm": 0.4213714003562927, |
|
"learning_rate": 3.543706020251223e-06, |
|
"loss": 0.8177, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.9060000000000001, |
|
"grad_norm": 0.5132880210876465, |
|
"learning_rate": 3.5325789116659493e-06, |
|
"loss": 0.8076, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.908, |
|
"grad_norm": 0.5077955722808838, |
|
"learning_rate": 3.521459749779769e-06, |
|
"loss": 0.7994, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.9100000000000001, |
|
"grad_norm": 0.5192594528198242, |
|
"learning_rate": 3.51034859480759e-06, |
|
"loss": 0.7992, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.912, |
|
"grad_norm": 0.5100328326225281, |
|
"learning_rate": 3.4992455069209717e-06, |
|
"loss": 0.8076, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.9140000000000001, |
|
"grad_norm": 0.42553383111953735, |
|
"learning_rate": 3.488150546247778e-06, |
|
"loss": 0.817, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.916, |
|
"grad_norm": 0.4890826642513275, |
|
"learning_rate": 3.4770637728718608e-06, |
|
"loss": 0.8088, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.9180000000000001, |
|
"grad_norm": 0.4364239573478699, |
|
"learning_rate": 3.465985246832739e-06, |
|
"loss": 0.8242, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.418891578912735, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 0.7886, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.9220000000000002, |
|
"grad_norm": 0.4589402377605438, |
|
"learning_rate": 3.4438531766993012e-06, |
|
"loss": 0.7952, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.924, |
|
"grad_norm": 0.4044283926486969, |
|
"learning_rate": 3.4327997524594026e-06, |
|
"loss": 0.8409, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.9260000000000002, |
|
"grad_norm": 0.4343528151512146, |
|
"learning_rate": 3.4217548152644887e-06, |
|
"loss": 0.8252, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.928, |
|
"grad_norm": 0.46095573902130127, |
|
"learning_rate": 3.4107184249275114e-06, |
|
"loss": 0.8167, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.9300000000000002, |
|
"grad_norm": 0.4581632614135742, |
|
"learning_rate": 3.399690641215142e-06, |
|
"loss": 0.7869, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.932, |
|
"grad_norm": 0.48575082421302795, |
|
"learning_rate": 3.3886715238474454e-06, |
|
"loss": 0.7851, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.9340000000000002, |
|
"grad_norm": 0.47937479615211487, |
|
"learning_rate": 3.3776611324975496e-06, |
|
"loss": 0.7889, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.936, |
|
"grad_norm": 0.44034528732299805, |
|
"learning_rate": 3.3666595267913293e-06, |
|
"loss": 0.8248, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.938, |
|
"grad_norm": 0.47162750363349915, |
|
"learning_rate": 3.355666766307084e-06, |
|
"loss": 0.7813, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.44156888127326965, |
|
"learning_rate": 3.3446829105752103e-06, |
|
"loss": 0.8, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.942, |
|
"grad_norm": 0.5223917365074158, |
|
"learning_rate": 3.3337080190778816e-06, |
|
"loss": 0.7972, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.944, |
|
"grad_norm": 0.46116048097610474, |
|
"learning_rate": 3.322742151248726e-06, |
|
"loss": 0.7999, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.946, |
|
"grad_norm": 0.4879921078681946, |
|
"learning_rate": 3.311785366472506e-06, |
|
"loss": 0.8009, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.948, |
|
"grad_norm": 0.4909932017326355, |
|
"learning_rate": 3.3008377240847955e-06, |
|
"loss": 0.8025, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.5128923058509827, |
|
"learning_rate": 3.289899283371657e-06, |
|
"loss": 0.8343, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.952, |
|
"grad_norm": 0.47355395555496216, |
|
"learning_rate": 3.2789701035693242e-06, |
|
"loss": 0.8171, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.954, |
|
"grad_norm": 0.4412969648838043, |
|
"learning_rate": 3.268050243863877e-06, |
|
"loss": 0.8041, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.956, |
|
"grad_norm": 0.42818376421928406, |
|
"learning_rate": 3.2571397633909252e-06, |
|
"loss": 0.7968, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.958, |
|
"grad_norm": 0.4641779363155365, |
|
"learning_rate": 3.246238721235283e-06, |
|
"loss": 0.7914, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.46108317375183105, |
|
"learning_rate": 3.2353471764306567e-06, |
|
"loss": 0.7926, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.962, |
|
"grad_norm": 0.4477689266204834, |
|
"learning_rate": 3.224465187959316e-06, |
|
"loss": 0.8085, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.964, |
|
"grad_norm": 0.43308204412460327, |
|
"learning_rate": 3.2135928147517803e-06, |
|
"loss": 0.792, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.966, |
|
"grad_norm": 0.43959012627601624, |
|
"learning_rate": 3.2027301156865015e-06, |
|
"loss": 0.7827, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.968, |
|
"grad_norm": 0.47771644592285156, |
|
"learning_rate": 3.1918771495895395e-06, |
|
"loss": 0.8044, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.486515611410141, |
|
"learning_rate": 3.1810339752342446e-06, |
|
"loss": 0.7992, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.972, |
|
"grad_norm": 0.41616618633270264, |
|
"learning_rate": 3.1702006513409393e-06, |
|
"loss": 0.807, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.974, |
|
"grad_norm": 0.43619367480278015, |
|
"learning_rate": 3.1593772365766107e-06, |
|
"loss": 0.795, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.976, |
|
"grad_norm": 0.4389900863170624, |
|
"learning_rate": 3.148563789554575e-06, |
|
"loss": 0.8113, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.978, |
|
"grad_norm": 0.4348282516002655, |
|
"learning_rate": 3.137760368834169e-06, |
|
"loss": 0.7804, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.4243306815624237, |
|
"learning_rate": 3.12696703292044e-06, |
|
"loss": 0.8005, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.982, |
|
"grad_norm": 0.4412820339202881, |
|
"learning_rate": 3.1161838402638158e-06, |
|
"loss": 0.8126, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.984, |
|
"grad_norm": 0.44826000928878784, |
|
"learning_rate": 3.105410849259796e-06, |
|
"loss": 0.8276, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.986, |
|
"grad_norm": 0.448371559381485, |
|
"learning_rate": 3.09464811824863e-06, |
|
"loss": 0.8171, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.988, |
|
"grad_norm": 0.4271306097507477, |
|
"learning_rate": 3.0838957055150136e-06, |
|
"loss": 0.8174, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.455337792634964, |
|
"learning_rate": 3.0731536692877596e-06, |
|
"loss": 0.7809, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.992, |
|
"grad_norm": 0.4774368405342102, |
|
"learning_rate": 3.0624220677394854e-06, |
|
"loss": 0.8019, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.994, |
|
"grad_norm": 0.4929225444793701, |
|
"learning_rate": 3.0517009589863057e-06, |
|
"loss": 0.7836, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.996, |
|
"grad_norm": 0.42296701669692993, |
|
"learning_rate": 3.040990401087508e-06, |
|
"loss": 0.7909, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.998, |
|
"grad_norm": 0.43920400738716125, |
|
"learning_rate": 3.030290452045245e-06, |
|
"loss": 0.8158, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.4366856813430786, |
|
"learning_rate": 3.019601169804216e-06, |
|
"loss": 0.8151, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.002, |
|
"grad_norm": 0.4748608469963074, |
|
"learning_rate": 3.0089226122513583e-06, |
|
"loss": 0.7909, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.004, |
|
"grad_norm": 0.44225451350212097, |
|
"learning_rate": 2.9982548372155264e-06, |
|
"loss": 0.7375, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.006, |
|
"grad_norm": 0.46516430377960205, |
|
"learning_rate": 2.9875979024671846e-06, |
|
"loss": 0.7774, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.008, |
|
"grad_norm": 0.5143842697143555, |
|
"learning_rate": 2.9769518657180953e-06, |
|
"loss": 0.7638, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.429679811000824, |
|
"learning_rate": 2.966316784621e-06, |
|
"loss": 0.7848, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.012, |
|
"grad_norm": 0.4154725670814514, |
|
"learning_rate": 2.9556927167693107e-06, |
|
"loss": 0.7841, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.014, |
|
"grad_norm": 0.4369898736476898, |
|
"learning_rate": 2.945079719696802e-06, |
|
"loss": 0.7826, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.016, |
|
"grad_norm": 0.4525117874145508, |
|
"learning_rate": 2.934477850877292e-06, |
|
"loss": 0.7601, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.018, |
|
"grad_norm": 0.494784951210022, |
|
"learning_rate": 2.9238871677243354e-06, |
|
"loss": 0.7692, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.43322470784187317, |
|
"learning_rate": 2.9133077275909112e-06, |
|
"loss": 0.7638, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.022, |
|
"grad_norm": 0.4489244818687439, |
|
"learning_rate": 2.9027395877691143e-06, |
|
"loss": 0.7872, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.024, |
|
"grad_norm": 0.4334550201892853, |
|
"learning_rate": 2.892182805489846e-06, |
|
"loss": 0.7805, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.026, |
|
"grad_norm": 0.48196423053741455, |
|
"learning_rate": 2.8816374379224932e-06, |
|
"loss": 0.77, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.028, |
|
"grad_norm": 0.46943002939224243, |
|
"learning_rate": 2.871103542174637e-06, |
|
"loss": 0.7638, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.40652287006378174, |
|
"learning_rate": 2.86058117529173e-06, |
|
"loss": 0.7708, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.032, |
|
"grad_norm": 0.43071281909942627, |
|
"learning_rate": 2.8500703942567874e-06, |
|
"loss": 0.8108, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.034, |
|
"grad_norm": 0.46008026599884033, |
|
"learning_rate": 2.839571255990088e-06, |
|
"loss": 0.7641, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.036, |
|
"grad_norm": 0.409462571144104, |
|
"learning_rate": 2.82908381734886e-06, |
|
"loss": 0.7784, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.038, |
|
"grad_norm": 0.4535731077194214, |
|
"learning_rate": 2.818608135126967e-06, |
|
"loss": 0.7266, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.4551372528076172, |
|
"learning_rate": 2.8081442660546126e-06, |
|
"loss": 0.7685, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.042, |
|
"grad_norm": 0.43031415343284607, |
|
"learning_rate": 2.797692266798027e-06, |
|
"loss": 0.7547, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.044, |
|
"grad_norm": 0.47990843653678894, |
|
"learning_rate": 2.7872521939591556e-06, |
|
"loss": 0.7547, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.046, |
|
"grad_norm": 0.4494011104106903, |
|
"learning_rate": 2.776824104075364e-06, |
|
"loss": 0.7153, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.048, |
|
"grad_norm": 0.42083409428596497, |
|
"learning_rate": 2.7664080536191178e-06, |
|
"loss": 0.7896, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.42462587356567383, |
|
"learning_rate": 2.7560040989976894e-06, |
|
"loss": 0.7692, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.052, |
|
"grad_norm": 0.44874322414398193, |
|
"learning_rate": 2.7456122965528475e-06, |
|
"loss": 0.7709, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.054, |
|
"grad_norm": 0.47028055787086487, |
|
"learning_rate": 2.7352327025605464e-06, |
|
"loss": 0.7434, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.056, |
|
"grad_norm": 0.42561060190200806, |
|
"learning_rate": 2.724865373230632e-06, |
|
"loss": 0.7651, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.058, |
|
"grad_norm": 0.4689915180206299, |
|
"learning_rate": 2.714510364706531e-06, |
|
"loss": 0.7594, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.44147977232933044, |
|
"learning_rate": 2.7041677330649408e-06, |
|
"loss": 0.7366, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.062, |
|
"grad_norm": 0.493244469165802, |
|
"learning_rate": 2.6938375343155464e-06, |
|
"loss": 0.7734, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.064, |
|
"grad_norm": 0.42558082938194275, |
|
"learning_rate": 2.683519824400693e-06, |
|
"loss": 0.7742, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.066, |
|
"grad_norm": 0.4560059607028961, |
|
"learning_rate": 2.6732146591950924e-06, |
|
"loss": 0.7595, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.068, |
|
"grad_norm": 0.43729954957962036, |
|
"learning_rate": 2.662922094505529e-06, |
|
"loss": 0.7466, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.46198272705078125, |
|
"learning_rate": 2.6526421860705474e-06, |
|
"loss": 0.7644, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.072, |
|
"grad_norm": 0.4170866310596466, |
|
"learning_rate": 2.6423749895601494e-06, |
|
"loss": 0.7849, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.074, |
|
"grad_norm": 0.4126523733139038, |
|
"learning_rate": 2.6321205605755002e-06, |
|
"loss": 0.7772, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.076, |
|
"grad_norm": 0.4304735064506531, |
|
"learning_rate": 2.6218789546486235e-06, |
|
"loss": 0.7569, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.078, |
|
"grad_norm": 0.42081594467163086, |
|
"learning_rate": 2.611650227242102e-06, |
|
"loss": 0.7815, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.4308110773563385, |
|
"learning_rate": 2.601434433748771e-06, |
|
"loss": 0.7791, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.082, |
|
"grad_norm": 0.42325472831726074, |
|
"learning_rate": 2.5912316294914232e-06, |
|
"loss": 0.7789, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.084, |
|
"grad_norm": 0.44107508659362793, |
|
"learning_rate": 2.581041869722519e-06, |
|
"loss": 0.7685, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.086, |
|
"grad_norm": 0.4687047004699707, |
|
"learning_rate": 2.5708652096238674e-06, |
|
"loss": 0.7712, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.088, |
|
"grad_norm": 0.5534901022911072, |
|
"learning_rate": 2.560701704306336e-06, |
|
"loss": 0.7938, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.46949270367622375, |
|
"learning_rate": 2.550551408809566e-06, |
|
"loss": 0.7937, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.092, |
|
"grad_norm": 0.43184739351272583, |
|
"learning_rate": 2.540414378101647e-06, |
|
"loss": 0.7584, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.094, |
|
"grad_norm": 0.41342681646347046, |
|
"learning_rate": 2.5302906670788463e-06, |
|
"loss": 0.7393, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.096, |
|
"grad_norm": 0.5106869339942932, |
|
"learning_rate": 2.52018033056529e-06, |
|
"loss": 0.7937, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.098, |
|
"grad_norm": 0.4273001551628113, |
|
"learning_rate": 2.5100834233126827e-06, |
|
"loss": 0.7461, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.43153709173202515, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.7631, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.102, |
|
"grad_norm": 0.4402224123477936, |
|
"learning_rate": 2.489930115233199e-06, |
|
"loss": 0.8026, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.104, |
|
"grad_norm": 0.45185586810112, |
|
"learning_rate": 2.4798738235449164e-06, |
|
"loss": 0.7683, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.106, |
|
"grad_norm": 0.4645299017429352, |
|
"learning_rate": 2.469831179394182e-06, |
|
"loss": 0.7985, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.108, |
|
"grad_norm": 0.4175006151199341, |
|
"learning_rate": 2.4598022371661113e-06, |
|
"loss": 0.8002, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.4740184545516968, |
|
"learning_rate": 2.4497870511716237e-06, |
|
"loss": 0.796, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.112, |
|
"grad_norm": 0.485872745513916, |
|
"learning_rate": 2.4397856756471435e-06, |
|
"loss": 0.7947, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.114, |
|
"grad_norm": 0.43455788493156433, |
|
"learning_rate": 2.429798164754299e-06, |
|
"loss": 0.7656, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.116, |
|
"grad_norm": 0.44379016757011414, |
|
"learning_rate": 2.4198245725796427e-06, |
|
"loss": 0.7622, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.118, |
|
"grad_norm": 0.4761369824409485, |
|
"learning_rate": 2.40986495313435e-06, |
|
"loss": 0.7647, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.4579893946647644, |
|
"learning_rate": 2.3999193603539234e-06, |
|
"loss": 0.7693, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.122, |
|
"grad_norm": 0.4474698007106781, |
|
"learning_rate": 2.3899878480979098e-06, |
|
"loss": 0.7826, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.124, |
|
"grad_norm": 0.4562933146953583, |
|
"learning_rate": 2.380070470149605e-06, |
|
"loss": 0.8071, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.126, |
|
"grad_norm": 0.5011743903160095, |
|
"learning_rate": 2.3701672802157567e-06, |
|
"loss": 0.7657, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.128, |
|
"grad_norm": 0.4564701020717621, |
|
"learning_rate": 2.3602783319262847e-06, |
|
"loss": 0.746, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.4964149296283722, |
|
"learning_rate": 2.3504036788339763e-06, |
|
"loss": 0.7611, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.132, |
|
"grad_norm": 0.4345194101333618, |
|
"learning_rate": 2.340543374414212e-06, |
|
"loss": 0.7925, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.134, |
|
"grad_norm": 0.40942201018333435, |
|
"learning_rate": 2.330697472064667e-06, |
|
"loss": 0.7567, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.136, |
|
"grad_norm": 0.4680323302745819, |
|
"learning_rate": 2.320866025105016e-06, |
|
"loss": 0.7702, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.138, |
|
"grad_norm": 0.4870208501815796, |
|
"learning_rate": 2.3110490867766644e-06, |
|
"loss": 0.7438, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.4345625340938568, |
|
"learning_rate": 2.3012467102424373e-06, |
|
"loss": 0.754, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.142, |
|
"grad_norm": 0.4366339445114136, |
|
"learning_rate": 2.2914589485863015e-06, |
|
"loss": 0.7586, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.144, |
|
"grad_norm": 0.4483181834220886, |
|
"learning_rate": 2.2816858548130837e-06, |
|
"loss": 0.8244, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.146, |
|
"grad_norm": 0.4304027259349823, |
|
"learning_rate": 2.2719274818481767e-06, |
|
"loss": 0.7521, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.148, |
|
"grad_norm": 0.41920629143714905, |
|
"learning_rate": 2.2621838825372496e-06, |
|
"loss": 0.797, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.42902520298957825, |
|
"learning_rate": 2.2524551096459703e-06, |
|
"loss": 0.772, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.152, |
|
"grad_norm": 0.42061668634414673, |
|
"learning_rate": 2.2427412158597133e-06, |
|
"loss": 0.7737, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.154, |
|
"grad_norm": 0.4826355278491974, |
|
"learning_rate": 2.23304225378328e-06, |
|
"loss": 0.7701, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.156, |
|
"grad_norm": 0.44024163484573364, |
|
"learning_rate": 2.2233582759406065e-06, |
|
"loss": 0.7849, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.158, |
|
"grad_norm": 0.4087906777858734, |
|
"learning_rate": 2.213689334774479e-06, |
|
"loss": 0.7615, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.4429933428764343, |
|
"learning_rate": 2.204035482646267e-06, |
|
"loss": 0.7765, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.162, |
|
"grad_norm": 0.4163159430027008, |
|
"learning_rate": 2.1943967718356123e-06, |
|
"loss": 0.7497, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.164, |
|
"grad_norm": 0.4267112910747528, |
|
"learning_rate": 2.184773254540169e-06, |
|
"loss": 0.7756, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.166, |
|
"grad_norm": 0.40076255798339844, |
|
"learning_rate": 2.175164982875311e-06, |
|
"loss": 0.7566, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.168, |
|
"grad_norm": 0.4351121783256531, |
|
"learning_rate": 2.165572008873845e-06, |
|
"loss": 0.772, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.40487784147262573, |
|
"learning_rate": 2.155994384485742e-06, |
|
"loss": 0.7754, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.172, |
|
"grad_norm": 0.4281260371208191, |
|
"learning_rate": 2.146432161577842e-06, |
|
"loss": 0.7542, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.174, |
|
"grad_norm": 0.44176384806632996, |
|
"learning_rate": 2.1368853919335835e-06, |
|
"loss": 0.7782, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.176, |
|
"grad_norm": 0.4439259469509125, |
|
"learning_rate": 2.12735412725272e-06, |
|
"loss": 0.7806, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.178, |
|
"grad_norm": 0.4487484097480774, |
|
"learning_rate": 2.1178384191510344e-06, |
|
"loss": 0.7741, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.4279979467391968, |
|
"learning_rate": 2.1083383191600676e-06, |
|
"loss": 0.7632, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.182, |
|
"grad_norm": 0.43812668323516846, |
|
"learning_rate": 2.0988538787268374e-06, |
|
"loss": 0.7504, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.184, |
|
"grad_norm": 0.4351823925971985, |
|
"learning_rate": 2.0893851492135536e-06, |
|
"loss": 0.7838, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.186, |
|
"grad_norm": 0.4904820919036865, |
|
"learning_rate": 2.0799321818973488e-06, |
|
"loss": 0.7421, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.188, |
|
"grad_norm": 0.38253724575042725, |
|
"learning_rate": 2.0704950279699986e-06, |
|
"loss": 0.7744, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.4415190815925598, |
|
"learning_rate": 2.061073738537635e-06, |
|
"loss": 0.7871, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.192, |
|
"grad_norm": 0.4349212050437927, |
|
"learning_rate": 2.0516683646204836e-06, |
|
"loss": 0.7796, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.194, |
|
"grad_norm": 0.45319244265556335, |
|
"learning_rate": 2.0422789571525813e-06, |
|
"loss": 0.7536, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.196, |
|
"grad_norm": 0.4984447658061981, |
|
"learning_rate": 2.0329055669814936e-06, |
|
"loss": 0.7683, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.198, |
|
"grad_norm": 0.4400745630264282, |
|
"learning_rate": 2.023548244868051e-06, |
|
"loss": 0.7293, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.40443891286849976, |
|
"learning_rate": 2.0142070414860704e-06, |
|
"loss": 0.7865, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.202, |
|
"grad_norm": 0.45138484239578247, |
|
"learning_rate": 2.0048820074220716e-06, |
|
"loss": 0.7632, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.204, |
|
"grad_norm": 0.4614701569080353, |
|
"learning_rate": 1.9955731931750182e-06, |
|
"loss": 0.7997, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.206, |
|
"grad_norm": 0.5170438289642334, |
|
"learning_rate": 1.9862806491560315e-06, |
|
"loss": 0.7392, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.208, |
|
"grad_norm": 0.4604811668395996, |
|
"learning_rate": 1.977004425688126e-06, |
|
"loss": 0.7267, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.4471680521965027, |
|
"learning_rate": 1.9677445730059348e-06, |
|
"loss": 0.7519, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.212, |
|
"grad_norm": 0.4620627760887146, |
|
"learning_rate": 1.958501141255427e-06, |
|
"loss": 0.7664, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.214, |
|
"grad_norm": 0.4312162697315216, |
|
"learning_rate": 1.9492741804936623e-06, |
|
"loss": 0.7635, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.216, |
|
"grad_norm": 0.48040759563446045, |
|
"learning_rate": 1.9400637406884875e-06, |
|
"loss": 0.7728, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.218, |
|
"grad_norm": 0.48567670583724976, |
|
"learning_rate": 1.9308698717182874e-06, |
|
"loss": 0.7546, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.4145522117614746, |
|
"learning_rate": 1.9216926233717087e-06, |
|
"loss": 0.7375, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.222, |
|
"grad_norm": 0.3948729634284973, |
|
"learning_rate": 1.9125320453473923e-06, |
|
"loss": 0.7747, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.224, |
|
"grad_norm": 0.40589264035224915, |
|
"learning_rate": 1.9033881872537009e-06, |
|
"loss": 0.7519, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.226, |
|
"grad_norm": 0.42979031801223755, |
|
"learning_rate": 1.8942610986084487e-06, |
|
"loss": 0.8096, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.228, |
|
"grad_norm": 0.43356558680534363, |
|
"learning_rate": 1.88515082883864e-06, |
|
"loss": 0.7853, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.4155976474285126, |
|
"learning_rate": 1.8760574272802002e-06, |
|
"loss": 0.7433, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.232, |
|
"grad_norm": 0.41546252369880676, |
|
"learning_rate": 1.8669809431776991e-06, |
|
"loss": 0.771, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.234, |
|
"grad_norm": 0.42242228984832764, |
|
"learning_rate": 1.8579214256840938e-06, |
|
"loss": 0.7931, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.2359999999999998, |
|
"grad_norm": 0.3971981704235077, |
|
"learning_rate": 1.8488789238604676e-06, |
|
"loss": 0.7894, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.238, |
|
"grad_norm": 0.4262888729572296, |
|
"learning_rate": 1.8398534866757455e-06, |
|
"loss": 0.7469, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.42837753891944885, |
|
"learning_rate": 1.8308451630064484e-06, |
|
"loss": 0.7523, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.242, |
|
"grad_norm": 0.4347648620605469, |
|
"learning_rate": 1.8218540016364178e-06, |
|
"loss": 0.7754, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.2439999999999998, |
|
"grad_norm": 0.43818947672843933, |
|
"learning_rate": 1.8128800512565514e-06, |
|
"loss": 0.7487, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.246, |
|
"grad_norm": 0.3970399498939514, |
|
"learning_rate": 1.8039233604645468e-06, |
|
"loss": 0.7838, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.248, |
|
"grad_norm": 0.40745770931243896, |
|
"learning_rate": 1.7949839777646327e-06, |
|
"loss": 0.7994, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.43025413155555725, |
|
"learning_rate": 1.7860619515673034e-06, |
|
"loss": 0.7662, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.252, |
|
"grad_norm": 0.46322372555732727, |
|
"learning_rate": 1.7771573301890666e-06, |
|
"loss": 0.8064, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.254, |
|
"grad_norm": 0.47347068786621094, |
|
"learning_rate": 1.7682701618521687e-06, |
|
"loss": 0.7538, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.2560000000000002, |
|
"grad_norm": 0.4192434847354889, |
|
"learning_rate": 1.7594004946843458e-06, |
|
"loss": 0.8046, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.258, |
|
"grad_norm": 0.4811704456806183, |
|
"learning_rate": 1.7505483767185583e-06, |
|
"loss": 0.7348, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.42036548256874084, |
|
"learning_rate": 1.7417138558927244e-06, |
|
"loss": 0.7466, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.262, |
|
"grad_norm": 0.46703797578811646, |
|
"learning_rate": 1.7328969800494727e-06, |
|
"loss": 0.8125, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.2640000000000002, |
|
"grad_norm": 0.4545026421546936, |
|
"learning_rate": 1.7240977969358757e-06, |
|
"loss": 0.756, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.266, |
|
"grad_norm": 0.39864465594291687, |
|
"learning_rate": 1.7153163542031881e-06, |
|
"loss": 0.7741, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.268, |
|
"grad_norm": 0.4210382401943207, |
|
"learning_rate": 1.7065526994065973e-06, |
|
"loss": 0.7489, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.3919101357460022, |
|
"learning_rate": 1.6978068800049624e-06, |
|
"loss": 0.7977, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.2720000000000002, |
|
"grad_norm": 0.4045415222644806, |
|
"learning_rate": 1.6890789433605508e-06, |
|
"loss": 0.7482, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.274, |
|
"grad_norm": 0.42059701681137085, |
|
"learning_rate": 1.680368936738792e-06, |
|
"loss": 0.7717, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.276, |
|
"grad_norm": 0.4480821490287781, |
|
"learning_rate": 1.671676907308018e-06, |
|
"loss": 0.7716, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.278, |
|
"grad_norm": 0.4437813460826874, |
|
"learning_rate": 1.6630029021392007e-06, |
|
"loss": 0.7507, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.2800000000000002, |
|
"grad_norm": 0.4174870252609253, |
|
"learning_rate": 1.6543469682057105e-06, |
|
"loss": 0.7388, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.282, |
|
"grad_norm": 0.4287874102592468, |
|
"learning_rate": 1.645709152383046e-06, |
|
"loss": 0.7615, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.284, |
|
"grad_norm": 0.4152935743331909, |
|
"learning_rate": 1.6370895014486e-06, |
|
"loss": 0.758, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.286, |
|
"grad_norm": 0.41198644042015076, |
|
"learning_rate": 1.6284880620813847e-06, |
|
"loss": 0.7776, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.288, |
|
"grad_norm": 0.40575531125068665, |
|
"learning_rate": 1.6199048808617896e-06, |
|
"loss": 0.7964, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.3933468461036682, |
|
"learning_rate": 1.611340004271339e-06, |
|
"loss": 0.7683, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.292, |
|
"grad_norm": 0.42372068762779236, |
|
"learning_rate": 1.6027934786924187e-06, |
|
"loss": 0.7576, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.294, |
|
"grad_norm": 0.40251022577285767, |
|
"learning_rate": 1.594265350408039e-06, |
|
"loss": 0.76, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.296, |
|
"grad_norm": 0.40240368247032166, |
|
"learning_rate": 1.5857556656015837e-06, |
|
"loss": 0.7936, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.298, |
|
"grad_norm": 0.5056098103523254, |
|
"learning_rate": 1.5772644703565564e-06, |
|
"loss": 0.7714, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.4141823351383209, |
|
"learning_rate": 1.5687918106563326e-06, |
|
"loss": 0.7508, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.302, |
|
"grad_norm": 0.4253833591938019, |
|
"learning_rate": 1.5603377323839069e-06, |
|
"loss": 0.7452, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.304, |
|
"grad_norm": 0.4501397907733917, |
|
"learning_rate": 1.551902281321651e-06, |
|
"loss": 0.7613, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.306, |
|
"grad_norm": 0.4004981517791748, |
|
"learning_rate": 1.5434855031510626e-06, |
|
"loss": 0.7794, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.308, |
|
"grad_norm": 0.44913139939308167, |
|
"learning_rate": 1.5350874434525142e-06, |
|
"loss": 0.7587, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.4194786548614502, |
|
"learning_rate": 1.5267081477050132e-06, |
|
"loss": 0.7882, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.312, |
|
"grad_norm": 0.43769264221191406, |
|
"learning_rate": 1.5183476612859538e-06, |
|
"loss": 0.7932, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.314, |
|
"grad_norm": 0.43893754482269287, |
|
"learning_rate": 1.5100060294708647e-06, |
|
"loss": 0.7729, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.316, |
|
"grad_norm": 0.4052613377571106, |
|
"learning_rate": 1.5016832974331725e-06, |
|
"loss": 0.7768, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.318, |
|
"grad_norm": 0.41634050011634827, |
|
"learning_rate": 1.4933795102439558e-06, |
|
"loss": 0.7517, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.515920102596283, |
|
"learning_rate": 1.4850947128716914e-06, |
|
"loss": 0.7657, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.322, |
|
"grad_norm": 0.43313372135162354, |
|
"learning_rate": 1.4768289501820265e-06, |
|
"loss": 0.7392, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.324, |
|
"grad_norm": 0.4506223797798157, |
|
"learning_rate": 1.4685822669375239e-06, |
|
"loss": 0.7826, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.326, |
|
"grad_norm": 0.44990140199661255, |
|
"learning_rate": 1.4603547077974217e-06, |
|
"loss": 0.7833, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.328, |
|
"grad_norm": 0.43483781814575195, |
|
"learning_rate": 1.4521463173173966e-06, |
|
"loss": 0.7771, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.41871559619903564, |
|
"learning_rate": 1.4439571399493146e-06, |
|
"loss": 0.77, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.332, |
|
"grad_norm": 0.41554340720176697, |
|
"learning_rate": 1.4357872200409988e-06, |
|
"loss": 0.8184, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.334, |
|
"grad_norm": 0.41174760460853577, |
|
"learning_rate": 1.4276366018359845e-06, |
|
"loss": 0.7728, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.336, |
|
"grad_norm": 0.426435649394989, |
|
"learning_rate": 1.4195053294732757e-06, |
|
"loss": 0.7597, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.338, |
|
"grad_norm": 0.4220362603664398, |
|
"learning_rate": 1.4113934469871166e-06, |
|
"loss": 0.7222, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.4154855012893677, |
|
"learning_rate": 1.4033009983067454e-06, |
|
"loss": 0.8104, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.342, |
|
"grad_norm": 0.4327816069126129, |
|
"learning_rate": 1.3952280272561541e-06, |
|
"loss": 0.7503, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.344, |
|
"grad_norm": 0.409800261259079, |
|
"learning_rate": 1.3871745775538598e-06, |
|
"loss": 0.7895, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.346, |
|
"grad_norm": 0.4550241231918335, |
|
"learning_rate": 1.3791406928126638e-06, |
|
"loss": 0.7569, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.348, |
|
"grad_norm": 0.41326087713241577, |
|
"learning_rate": 1.371126416539409e-06, |
|
"loss": 0.7821, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.42594876885414124, |
|
"learning_rate": 1.3631317921347564e-06, |
|
"loss": 0.754, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.352, |
|
"grad_norm": 0.428521066904068, |
|
"learning_rate": 1.3551568628929434e-06, |
|
"loss": 0.7867, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.354, |
|
"grad_norm": 0.39713218808174133, |
|
"learning_rate": 1.3472016720015447e-06, |
|
"loss": 0.7685, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.356, |
|
"grad_norm": 0.4291415512561798, |
|
"learning_rate": 1.339266262541249e-06, |
|
"loss": 0.7723, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.358, |
|
"grad_norm": 0.4253414273262024, |
|
"learning_rate": 1.3313506774856177e-06, |
|
"loss": 0.7748, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.409959077835083, |
|
"learning_rate": 1.3234549597008572e-06, |
|
"loss": 0.7861, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.362, |
|
"grad_norm": 0.4249674379825592, |
|
"learning_rate": 1.3155791519455812e-06, |
|
"loss": 0.8093, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.364, |
|
"grad_norm": 0.4227546155452728, |
|
"learning_rate": 1.3077232968705805e-06, |
|
"loss": 0.7827, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.366, |
|
"grad_norm": 0.4601157009601593, |
|
"learning_rate": 1.2998874370186026e-06, |
|
"loss": 0.7788, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.368, |
|
"grad_norm": 0.4204160273075104, |
|
"learning_rate": 1.2920716148241036e-06, |
|
"loss": 0.752, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.42416802048683167, |
|
"learning_rate": 1.2842758726130283e-06, |
|
"loss": 0.7587, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.372, |
|
"grad_norm": 0.42040911316871643, |
|
"learning_rate": 1.2765002526025871e-06, |
|
"loss": 0.7622, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.374, |
|
"grad_norm": 0.38308432698249817, |
|
"learning_rate": 1.2687447969010113e-06, |
|
"loss": 0.7838, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.376, |
|
"grad_norm": 0.40888527035713196, |
|
"learning_rate": 1.2610095475073415e-06, |
|
"loss": 0.783, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.378, |
|
"grad_norm": 0.40721824765205383, |
|
"learning_rate": 1.2532945463111856e-06, |
|
"loss": 0.7861, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.43069660663604736, |
|
"learning_rate": 1.2455998350925042e-06, |
|
"loss": 0.7398, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.382, |
|
"grad_norm": 0.36445775628089905, |
|
"learning_rate": 1.2379254555213788e-06, |
|
"loss": 0.7637, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.384, |
|
"grad_norm": 0.40194427967071533, |
|
"learning_rate": 1.2302714491577834e-06, |
|
"loss": 0.7485, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.386, |
|
"grad_norm": 0.40127691626548767, |
|
"learning_rate": 1.2226378574513654e-06, |
|
"loss": 0.7683, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.388, |
|
"grad_norm": 0.4438623785972595, |
|
"learning_rate": 1.2150247217412186e-06, |
|
"loss": 0.7896, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.45208215713500977, |
|
"learning_rate": 1.2074320832556558e-06, |
|
"loss": 0.743, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.392, |
|
"grad_norm": 0.4029155373573303, |
|
"learning_rate": 1.1998599831119912e-06, |
|
"loss": 0.7622, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.394, |
|
"grad_norm": 0.41432464122772217, |
|
"learning_rate": 1.1923084623163172e-06, |
|
"loss": 0.7699, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.396, |
|
"grad_norm": 0.42966488003730774, |
|
"learning_rate": 1.1847775617632746e-06, |
|
"loss": 0.7534, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.398, |
|
"grad_norm": 0.4253801107406616, |
|
"learning_rate": 1.1772673222358421e-06, |
|
"loss": 0.7597, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.3783406913280487, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"loss": 0.7738, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.402, |
|
"grad_norm": 0.44550472497940063, |
|
"learning_rate": 1.162308988830057e-06, |
|
"loss": 0.8131, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.404, |
|
"grad_norm": 0.4549965560436249, |
|
"learning_rate": 1.1548609759573375e-06, |
|
"loss": 0.7755, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.406, |
|
"grad_norm": 0.4233042001724243, |
|
"learning_rate": 1.1474337861210543e-06, |
|
"loss": 0.7592, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.408, |
|
"grad_norm": 0.4179689884185791, |
|
"learning_rate": 1.1400274595425499e-06, |
|
"loss": 0.7675, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.3878929615020752, |
|
"learning_rate": 1.132642036330181e-06, |
|
"loss": 0.7756, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.412, |
|
"grad_norm": 0.43287208676338196, |
|
"learning_rate": 1.1252775564791023e-06, |
|
"loss": 0.7551, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.414, |
|
"grad_norm": 0.40028390288352966, |
|
"learning_rate": 1.1179340598710547e-06, |
|
"loss": 0.7572, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.416, |
|
"grad_norm": 0.3733893930912018, |
|
"learning_rate": 1.1106115862741457e-06, |
|
"loss": 0.7952, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.418, |
|
"grad_norm": 0.3904898464679718, |
|
"learning_rate": 1.1033101753426285e-06, |
|
"loss": 0.7745, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.4095996916294098, |
|
"learning_rate": 1.096029866616704e-06, |
|
"loss": 0.7647, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.422, |
|
"grad_norm": 0.4702613055706024, |
|
"learning_rate": 1.0887706995222864e-06, |
|
"loss": 0.768, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.424, |
|
"grad_norm": 0.4261472225189209, |
|
"learning_rate": 1.0815327133708015e-06, |
|
"loss": 0.752, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.426, |
|
"grad_norm": 0.38912856578826904, |
|
"learning_rate": 1.0743159473589738e-06, |
|
"loss": 0.7822, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.428, |
|
"grad_norm": 0.42375972867012024, |
|
"learning_rate": 1.0671204405686108e-06, |
|
"loss": 0.7756, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.40271031856536865, |
|
"learning_rate": 1.0599462319663906e-06, |
|
"loss": 0.7919, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.432, |
|
"grad_norm": 0.39157113432884216, |
|
"learning_rate": 1.052793360403655e-06, |
|
"loss": 0.7749, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.434, |
|
"grad_norm": 0.4087899923324585, |
|
"learning_rate": 1.0456618646161954e-06, |
|
"loss": 0.8007, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.436, |
|
"grad_norm": 0.4762036204338074, |
|
"learning_rate": 1.0385517832240472e-06, |
|
"loss": 0.7607, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.438, |
|
"grad_norm": 0.41489630937576294, |
|
"learning_rate": 1.0314631547312738e-06, |
|
"loss": 0.779, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.3774562180042267, |
|
"learning_rate": 1.0243960175257605e-06, |
|
"loss": 0.8008, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.442, |
|
"grad_norm": 0.39354345202445984, |
|
"learning_rate": 1.0173504098790188e-06, |
|
"loss": 0.7714, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 2.444, |
|
"grad_norm": 0.4119318425655365, |
|
"learning_rate": 1.010326369945957e-06, |
|
"loss": 0.7856, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 2.446, |
|
"grad_norm": 0.3862462639808655, |
|
"learning_rate": 1.0033239357646913e-06, |
|
"loss": 0.7589, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 2.448, |
|
"grad_norm": 0.44779011607170105, |
|
"learning_rate": 9.963431452563331e-07, |
|
"loss": 0.7884, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.4043843746185303, |
|
"learning_rate": 9.893840362247809e-07, |
|
"loss": 0.7568, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.452, |
|
"grad_norm": 0.43136683106422424, |
|
"learning_rate": 9.824466463565246e-07, |
|
"loss": 0.7687, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 2.454, |
|
"grad_norm": 0.40449437499046326, |
|
"learning_rate": 9.7553101322043e-07, |
|
"loss": 0.8022, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 2.456, |
|
"grad_norm": 0.39529287815093994, |
|
"learning_rate": 9.686371742675443e-07, |
|
"loss": 0.7691, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 2.458, |
|
"grad_norm": 0.3712971806526184, |
|
"learning_rate": 9.617651668308914e-07, |
|
"loss": 0.7897, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.41396647691726685, |
|
"learning_rate": 9.549150281252633e-07, |
|
"loss": 0.765, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.462, |
|
"grad_norm": 0.419058620929718, |
|
"learning_rate": 9.480867952470285e-07, |
|
"loss": 0.7615, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 2.464, |
|
"grad_norm": 0.38833707571029663, |
|
"learning_rate": 9.412805051739266e-07, |
|
"loss": 0.796, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.466, |
|
"grad_norm": 0.38123270869255066, |
|
"learning_rate": 9.344961947648624e-07, |
|
"loss": 0.7856, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 2.468, |
|
"grad_norm": 0.39136457443237305, |
|
"learning_rate": 9.277339007597158e-07, |
|
"loss": 0.7702, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 2.4699999999999998, |
|
"grad_norm": 0.424621045589447, |
|
"learning_rate": 9.209936597791407e-07, |
|
"loss": 0.7604, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 2.472, |
|
"grad_norm": 0.42278164625167847, |
|
"learning_rate": 9.142755083243577e-07, |
|
"loss": 0.8015, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.474, |
|
"grad_norm": 0.39735937118530273, |
|
"learning_rate": 9.075794827769696e-07, |
|
"loss": 0.7122, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 2.476, |
|
"grad_norm": 0.4141498804092407, |
|
"learning_rate": 9.009056193987569e-07, |
|
"loss": 0.8107, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 2.4779999999999998, |
|
"grad_norm": 0.38823202252388, |
|
"learning_rate": 8.942539543314799e-07, |
|
"loss": 0.7794, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.40469056367874146, |
|
"learning_rate": 8.876245235966884e-07, |
|
"loss": 0.7747, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.482, |
|
"grad_norm": 0.4189411401748657, |
|
"learning_rate": 8.810173630955249e-07, |
|
"loss": 0.7616, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 2.484, |
|
"grad_norm": 0.396389365196228, |
|
"learning_rate": 8.744325086085248e-07, |
|
"loss": 0.7603, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 2.4859999999999998, |
|
"grad_norm": 0.4292638301849365, |
|
"learning_rate": 8.678699957954323e-07, |
|
"loss": 0.7578, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 2.488, |
|
"grad_norm": 0.3797649145126343, |
|
"learning_rate": 8.613298601949971e-07, |
|
"loss": 0.7562, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.4183959662914276, |
|
"learning_rate": 8.54812137224792e-07, |
|
"loss": 0.7773, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 2.492, |
|
"grad_norm": 0.367262065410614, |
|
"learning_rate": 8.483168621810133e-07, |
|
"loss": 0.7601, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 2.4939999999999998, |
|
"grad_norm": 0.39003264904022217, |
|
"learning_rate": 8.418440702382897e-07, |
|
"loss": 0.7667, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 2.496, |
|
"grad_norm": 0.3786110579967499, |
|
"learning_rate": 8.353937964495029e-07, |
|
"loss": 0.8138, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 2.498, |
|
"grad_norm": 0.4133704900741577, |
|
"learning_rate": 8.289660757455803e-07, |
|
"loss": 0.7382, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.39127856492996216, |
|
"learning_rate": 8.225609429353187e-07, |
|
"loss": 0.752, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.502, |
|
"grad_norm": 0.43218833208084106, |
|
"learning_rate": 8.161784327051919e-07, |
|
"loss": 0.7998, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 2.504, |
|
"grad_norm": 0.3970191478729248, |
|
"learning_rate": 8.098185796191632e-07, |
|
"loss": 0.7362, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.5060000000000002, |
|
"grad_norm": 0.4043598175048828, |
|
"learning_rate": 8.034814181184996e-07, |
|
"loss": 0.7921, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 2.508, |
|
"grad_norm": 0.3990473747253418, |
|
"learning_rate": 7.971669825215789e-07, |
|
"loss": 0.7728, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.4073302745819092, |
|
"learning_rate": 7.908753070237124e-07, |
|
"loss": 0.7421, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.512, |
|
"grad_norm": 0.48361241817474365, |
|
"learning_rate": 7.846064256969571e-07, |
|
"loss": 0.7522, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 2.5140000000000002, |
|
"grad_norm": 0.39428237080574036, |
|
"learning_rate": 7.783603724899258e-07, |
|
"loss": 0.76, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 2.516, |
|
"grad_norm": 0.3941364586353302, |
|
"learning_rate": 7.72137181227608e-07, |
|
"loss": 0.7715, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 2.518, |
|
"grad_norm": 0.4074542224407196, |
|
"learning_rate": 7.659368856111926e-07, |
|
"loss": 0.7611, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.4131048619747162, |
|
"learning_rate": 7.597595192178702e-07, |
|
"loss": 0.7384, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.5220000000000002, |
|
"grad_norm": 0.34917521476745605, |
|
"learning_rate": 7.536051155006657e-07, |
|
"loss": 0.7621, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 2.524, |
|
"grad_norm": 0.40943166613578796, |
|
"learning_rate": 7.47473707788251e-07, |
|
"loss": 0.7669, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 2.526, |
|
"grad_norm": 0.42000773549079895, |
|
"learning_rate": 7.413653292847617e-07, |
|
"loss": 0.7689, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 2.528, |
|
"grad_norm": 0.432198166847229, |
|
"learning_rate": 7.352800130696253e-07, |
|
"loss": 0.7821, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 2.5300000000000002, |
|
"grad_norm": 0.4601210653781891, |
|
"learning_rate": 7.292177920973726e-07, |
|
"loss": 0.7382, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.532, |
|
"grad_norm": 0.398960679769516, |
|
"learning_rate": 7.23178699197467e-07, |
|
"loss": 0.7778, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 2.534, |
|
"grad_norm": 0.4066762924194336, |
|
"learning_rate": 7.171627670741243e-07, |
|
"loss": 0.7505, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 2.536, |
|
"grad_norm": 0.4193468689918518, |
|
"learning_rate": 7.111700283061318e-07, |
|
"loss": 0.7472, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 2.5380000000000003, |
|
"grad_norm": 0.4361197054386139, |
|
"learning_rate": 7.052005153466779e-07, |
|
"loss": 0.7514, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.433932900428772, |
|
"learning_rate": 6.992542605231739e-07, |
|
"loss": 0.7533, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.542, |
|
"grad_norm": 0.3849650025367737, |
|
"learning_rate": 6.933312960370748e-07, |
|
"loss": 0.7595, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 2.544, |
|
"grad_norm": 0.4118126630783081, |
|
"learning_rate": 6.874316539637127e-07, |
|
"loss": 0.7549, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 2.5460000000000003, |
|
"grad_norm": 0.4175261855125427, |
|
"learning_rate": 6.815553662521185e-07, |
|
"loss": 0.7291, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 2.548, |
|
"grad_norm": 0.40914928913116455, |
|
"learning_rate": 6.757024647248456e-07, |
|
"loss": 0.7868, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.37963372468948364, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.7663, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.552, |
|
"grad_norm": 0.44203123450279236, |
|
"learning_rate": 6.640669468800947e-07, |
|
"loss": 0.7593, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.5540000000000003, |
|
"grad_norm": 0.38774022459983826, |
|
"learning_rate": 6.58284393573812e-07, |
|
"loss": 0.7571, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 2.556, |
|
"grad_norm": 0.40861040353775024, |
|
"learning_rate": 6.52525352473905e-07, |
|
"loss": 0.7659, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 2.558, |
|
"grad_norm": 0.4282105565071106, |
|
"learning_rate": 6.467898547679913e-07, |
|
"loss": 0.7602, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.39104947447776794, |
|
"learning_rate": 6.410779315161885e-07, |
|
"loss": 0.7473, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.5620000000000003, |
|
"grad_norm": 0.411494642496109, |
|
"learning_rate": 6.353896136509524e-07, |
|
"loss": 0.77, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 2.564, |
|
"grad_norm": 0.386294960975647, |
|
"learning_rate": 6.297249319769016e-07, |
|
"loss": 0.7938, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 2.566, |
|
"grad_norm": 0.41267791390419006, |
|
"learning_rate": 6.240839171706608e-07, |
|
"loss": 0.7528, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 2.568, |
|
"grad_norm": 0.41963523626327515, |
|
"learning_rate": 6.184665997806832e-07, |
|
"loss": 0.7562, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.401456356048584, |
|
"learning_rate": 6.128730102270897e-07, |
|
"loss": 0.7551, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.572, |
|
"grad_norm": 0.38480910658836365, |
|
"learning_rate": 6.073031788015133e-07, |
|
"loss": 0.7358, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 2.574, |
|
"grad_norm": 0.36396434903144836, |
|
"learning_rate": 6.017571356669183e-07, |
|
"loss": 0.7695, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 2.576, |
|
"grad_norm": 0.394317626953125, |
|
"learning_rate": 5.962349108574478e-07, |
|
"loss": 0.7786, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.578, |
|
"grad_norm": 0.37718522548675537, |
|
"learning_rate": 5.9073653427826e-07, |
|
"loss": 0.762, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 0.3722103238105774, |
|
"learning_rate": 5.852620357053651e-07, |
|
"loss": 0.7687, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.582, |
|
"grad_norm": 0.4033386707305908, |
|
"learning_rate": 5.798114447854636e-07, |
|
"loss": 0.7757, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 2.584, |
|
"grad_norm": 0.440335750579834, |
|
"learning_rate": 5.743847910357836e-07, |
|
"loss": 0.7752, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 2.586, |
|
"grad_norm": 0.41635289788246155, |
|
"learning_rate": 5.689821038439264e-07, |
|
"loss": 0.7256, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 2.588, |
|
"grad_norm": 0.41737210750579834, |
|
"learning_rate": 5.636034124677043e-07, |
|
"loss": 0.7684, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.37913477420806885, |
|
"learning_rate": 5.582487460349806e-07, |
|
"loss": 0.7918, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.592, |
|
"grad_norm": 0.40841925144195557, |
|
"learning_rate": 5.529181335435124e-07, |
|
"loss": 0.7521, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.594, |
|
"grad_norm": 0.4041779339313507, |
|
"learning_rate": 5.476116038607993e-07, |
|
"loss": 0.7527, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 2.596, |
|
"grad_norm": 0.4226526916027069, |
|
"learning_rate": 5.423291857239177e-07, |
|
"loss": 0.8115, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.598, |
|
"grad_norm": 0.40755200386047363, |
|
"learning_rate": 5.370709077393721e-07, |
|
"loss": 0.7997, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.3873971402645111, |
|
"learning_rate": 5.318367983829393e-07, |
|
"loss": 0.7787, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.602, |
|
"grad_norm": 0.443805992603302, |
|
"learning_rate": 5.266268859995083e-07, |
|
"loss": 0.7616, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 2.604, |
|
"grad_norm": 0.39908817410469055, |
|
"learning_rate": 5.214411988029355e-07, |
|
"loss": 0.7876, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 2.606, |
|
"grad_norm": 0.43873775005340576, |
|
"learning_rate": 5.162797648758877e-07, |
|
"loss": 0.7772, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 2.608, |
|
"grad_norm": 0.39780065417289734, |
|
"learning_rate": 5.111426121696866e-07, |
|
"loss": 0.7736, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.4444182515144348, |
|
"learning_rate": 5.06029768504166e-07, |
|
"loss": 0.7951, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.612, |
|
"grad_norm": 0.4113735854625702, |
|
"learning_rate": 5.009412615675102e-07, |
|
"loss": 0.7346, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 2.614, |
|
"grad_norm": 0.39655396342277527, |
|
"learning_rate": 4.958771189161149e-07, |
|
"loss": 0.7395, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 2.616, |
|
"grad_norm": 0.423323392868042, |
|
"learning_rate": 4.908373679744316e-07, |
|
"loss": 0.7711, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 2.618, |
|
"grad_norm": 0.4427488446235657, |
|
"learning_rate": 4.858220360348187e-07, |
|
"loss": 0.7558, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.3920653760433197, |
|
"learning_rate": 4.808311502573976e-07, |
|
"loss": 0.7444, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.622, |
|
"grad_norm": 0.40778854489326477, |
|
"learning_rate": 4.758647376699033e-07, |
|
"loss": 0.7639, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 2.624, |
|
"grad_norm": 0.4384738802909851, |
|
"learning_rate": 4.709228251675357e-07, |
|
"loss": 0.7281, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 2.626, |
|
"grad_norm": 0.3867555856704712, |
|
"learning_rate": 4.6600543951281995e-07, |
|
"loss": 0.7692, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.628, |
|
"grad_norm": 0.3772483170032501, |
|
"learning_rate": 4.6111260733545714e-07, |
|
"loss": 0.7303, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.3986133933067322, |
|
"learning_rate": 4.562443551321788e-07, |
|
"loss": 0.7802, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.632, |
|
"grad_norm": 0.39893269538879395, |
|
"learning_rate": 4.514007092666084e-07, |
|
"loss": 0.7718, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.634, |
|
"grad_norm": 0.4097014367580414, |
|
"learning_rate": 4.4658169596911493e-07, |
|
"loss": 0.7906, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 2.636, |
|
"grad_norm": 0.3478214144706726, |
|
"learning_rate": 4.417873413366702e-07, |
|
"loss": 0.7696, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 2.638, |
|
"grad_norm": 0.42011260986328125, |
|
"learning_rate": 4.370176713327118e-07, |
|
"loss": 0.7825, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.40823644399642944, |
|
"learning_rate": 4.322727117869951e-07, |
|
"loss": 0.7785, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.642, |
|
"grad_norm": 0.4038843512535095, |
|
"learning_rate": 4.275524883954657e-07, |
|
"loss": 0.7932, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 2.644, |
|
"grad_norm": 0.417581170797348, |
|
"learning_rate": 4.228570267201049e-07, |
|
"loss": 0.7759, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 2.646, |
|
"grad_norm": 0.4698950946331024, |
|
"learning_rate": 4.1818635218880186e-07, |
|
"loss": 0.7731, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 2.648, |
|
"grad_norm": 0.4142085313796997, |
|
"learning_rate": 4.1354049009521504e-07, |
|
"loss": 0.773, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.4050668776035309, |
|
"learning_rate": 4.089194655986306e-07, |
|
"loss": 0.801, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.652, |
|
"grad_norm": 0.41010555624961853, |
|
"learning_rate": 4.043233037238281e-07, |
|
"loss": 0.7833, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 2.654, |
|
"grad_norm": 0.3984374701976776, |
|
"learning_rate": 3.99752029360948e-07, |
|
"loss": 0.7807, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 2.656, |
|
"grad_norm": 0.41552355885505676, |
|
"learning_rate": 3.9520566726535367e-07, |
|
"loss": 0.7502, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 2.658, |
|
"grad_norm": 0.4388125240802765, |
|
"learning_rate": 3.90684242057498e-07, |
|
"loss": 0.7821, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 0.4021058976650238, |
|
"learning_rate": 3.8618777822278854e-07, |
|
"loss": 0.8142, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.662, |
|
"grad_norm": 0.38491585850715637, |
|
"learning_rate": 3.8171630011145877e-07, |
|
"loss": 0.7804, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 2.664, |
|
"grad_norm": 0.43489962816238403, |
|
"learning_rate": 3.772698319384349e-07, |
|
"loss": 0.7255, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 2.666, |
|
"grad_norm": 0.4907485544681549, |
|
"learning_rate": 3.728483977831998e-07, |
|
"loss": 0.7768, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 2.668, |
|
"grad_norm": 0.4141354262828827, |
|
"learning_rate": 3.684520215896703e-07, |
|
"loss": 0.7552, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.40464743971824646, |
|
"learning_rate": 3.6408072716606346e-07, |
|
"loss": 0.75, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 2.672, |
|
"grad_norm": 0.4001968204975128, |
|
"learning_rate": 3.597345381847656e-07, |
|
"loss": 0.7509, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 2.674, |
|
"grad_norm": 0.39966756105422974, |
|
"learning_rate": 3.554134781822094e-07, |
|
"loss": 0.7706, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 2.676, |
|
"grad_norm": 0.4206313192844391, |
|
"learning_rate": 3.511175705587433e-07, |
|
"loss": 0.7745, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 2.678, |
|
"grad_norm": 0.4104755222797394, |
|
"learning_rate": 3.468468385785023e-07, |
|
"loss": 0.7618, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.4081501364707947, |
|
"learning_rate": 3.426013053692878e-07, |
|
"loss": 0.7593, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.682, |
|
"grad_norm": 0.4143938720226288, |
|
"learning_rate": 3.3838099392243915e-07, |
|
"loss": 0.7515, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 2.684, |
|
"grad_norm": 0.37549248337745667, |
|
"learning_rate": 3.341859270927067e-07, |
|
"loss": 0.8178, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 2.686, |
|
"grad_norm": 0.3817611038684845, |
|
"learning_rate": 3.30016127598134e-07, |
|
"loss": 0.799, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 2.6879999999999997, |
|
"grad_norm": 0.38387176394462585, |
|
"learning_rate": 3.258716180199278e-07, |
|
"loss": 0.7526, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.42001819610595703, |
|
"learning_rate": 3.2175242080234314e-07, |
|
"loss": 0.7845, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 2.692, |
|
"grad_norm": 0.4304758906364441, |
|
"learning_rate": 3.1765855825255543e-07, |
|
"loss": 0.7381, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 2.694, |
|
"grad_norm": 0.3993408977985382, |
|
"learning_rate": 3.135900525405428e-07, |
|
"loss": 0.7858, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 2.6959999999999997, |
|
"grad_norm": 0.3781108260154724, |
|
"learning_rate": 3.0954692569896585e-07, |
|
"loss": 0.7184, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 2.698, |
|
"grad_norm": 0.41282615065574646, |
|
"learning_rate": 3.055291996230492e-07, |
|
"loss": 0.7445, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.39070644974708557, |
|
"learning_rate": 3.015368960704584e-07, |
|
"loss": 0.7616, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.702, |
|
"grad_norm": 0.3836466670036316, |
|
"learning_rate": 2.975700366611883e-07, |
|
"loss": 0.7625, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 2.7039999999999997, |
|
"grad_norm": 0.36659085750579834, |
|
"learning_rate": 2.9362864287744266e-07, |
|
"loss": 0.8153, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 2.706, |
|
"grad_norm": 0.37427932024002075, |
|
"learning_rate": 2.8971273606351656e-07, |
|
"loss": 0.7523, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 2.708, |
|
"grad_norm": 0.4171502888202667, |
|
"learning_rate": 2.858223374256841e-07, |
|
"loss": 0.7399, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.38254213333129883, |
|
"learning_rate": 2.819574680320825e-07, |
|
"loss": 0.7741, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 2.7119999999999997, |
|
"grad_norm": 0.4144255816936493, |
|
"learning_rate": 2.7811814881259503e-07, |
|
"loss": 0.7869, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 2.714, |
|
"grad_norm": 0.3962395191192627, |
|
"learning_rate": 2.743044005587425e-07, |
|
"loss": 0.7871, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 2.716, |
|
"grad_norm": 0.36894917488098145, |
|
"learning_rate": 2.705162439235648e-07, |
|
"loss": 0.7705, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 2.718, |
|
"grad_norm": 0.39567211270332336, |
|
"learning_rate": 2.6675369942151864e-07, |
|
"loss": 0.7581, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 0.4306613504886627, |
|
"learning_rate": 2.63016787428354e-07, |
|
"loss": 0.7602, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.722, |
|
"grad_norm": 0.40510469675064087, |
|
"learning_rate": 2.593055281810125e-07, |
|
"loss": 0.7311, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 2.724, |
|
"grad_norm": 0.3800606429576874, |
|
"learning_rate": 2.556199417775174e-07, |
|
"loss": 0.7655, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 2.726, |
|
"grad_norm": 0.4124450385570526, |
|
"learning_rate": 2.519600481768597e-07, |
|
"loss": 0.7687, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 2.7279999999999998, |
|
"grad_norm": 0.3907739222049713, |
|
"learning_rate": 2.483258671988942e-07, |
|
"loss": 0.7697, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.3988780081272125, |
|
"learning_rate": 2.447174185242324e-07, |
|
"loss": 0.7653, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 2.732, |
|
"grad_norm": 0.43391841650009155, |
|
"learning_rate": 2.4113472169413176e-07, |
|
"loss": 0.6854, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 2.734, |
|
"grad_norm": 0.43772202730178833, |
|
"learning_rate": 2.37577796110397e-07, |
|
"loss": 0.7842, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 2.7359999999999998, |
|
"grad_norm": 0.37034618854522705, |
|
"learning_rate": 2.3404666103526542e-07, |
|
"loss": 0.7639, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 2.738, |
|
"grad_norm": 0.35954827070236206, |
|
"learning_rate": 2.3054133559131163e-07, |
|
"loss": 0.7998, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.38162147998809814, |
|
"learning_rate": 2.2706183876134047e-07, |
|
"loss": 0.7764, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.742, |
|
"grad_norm": 0.3861159086227417, |
|
"learning_rate": 2.2360818938828189e-07, |
|
"loss": 0.792, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 2.7439999999999998, |
|
"grad_norm": 0.37095239758491516, |
|
"learning_rate": 2.2018040617509174e-07, |
|
"loss": 0.7745, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 2.746, |
|
"grad_norm": 0.3978652358055115, |
|
"learning_rate": 2.167785076846518e-07, |
|
"loss": 0.7546, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 2.748, |
|
"grad_norm": 0.39223480224609375, |
|
"learning_rate": 2.134025123396638e-07, |
|
"loss": 0.7821, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.4191019535064697, |
|
"learning_rate": 2.1005243842255552e-07, |
|
"loss": 0.7574, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.752, |
|
"grad_norm": 0.4072646200656891, |
|
"learning_rate": 2.0672830407537925e-07, |
|
"loss": 0.7591, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 2.754, |
|
"grad_norm": 0.4050624370574951, |
|
"learning_rate": 2.0343012729971244e-07, |
|
"loss": 0.8114, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 2.7560000000000002, |
|
"grad_norm": 0.4051263630390167, |
|
"learning_rate": 2.0015792595656225e-07, |
|
"loss": 0.7642, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 2.758, |
|
"grad_norm": 0.3808835446834564, |
|
"learning_rate": 1.9691171776626882e-07, |
|
"loss": 0.7824, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.4035777449607849, |
|
"learning_rate": 1.9369152030840553e-07, |
|
"loss": 0.7698, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.762, |
|
"grad_norm": 0.382522851228714, |
|
"learning_rate": 1.904973510216912e-07, |
|
"loss": 0.7742, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 2.7640000000000002, |
|
"grad_norm": 0.3774438798427582, |
|
"learning_rate": 1.873292272038868e-07, |
|
"loss": 0.791, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 2.766, |
|
"grad_norm": 0.417490154504776, |
|
"learning_rate": 1.841871660117095e-07, |
|
"loss": 0.762, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 2.768, |
|
"grad_norm": 0.40647685527801514, |
|
"learning_rate": 1.8107118446073492e-07, |
|
"loss": 0.7638, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 0.40906310081481934, |
|
"learning_rate": 1.779812994253055e-07, |
|
"loss": 0.8037, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 2.7720000000000002, |
|
"grad_norm": 0.418174684047699, |
|
"learning_rate": 1.7491752763844294e-07, |
|
"loss": 0.7855, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 2.774, |
|
"grad_norm": 0.4123242497444153, |
|
"learning_rate": 1.7187988569175307e-07, |
|
"loss": 0.7552, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 2.776, |
|
"grad_norm": 0.4758404493331909, |
|
"learning_rate": 1.688683900353366e-07, |
|
"loss": 0.7618, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 2.778, |
|
"grad_norm": 0.392837256193161, |
|
"learning_rate": 1.6588305697770313e-07, |
|
"loss": 0.7906, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 2.7800000000000002, |
|
"grad_norm": 0.3605124354362488, |
|
"learning_rate": 1.6292390268568103e-07, |
|
"loss": 0.7709, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.782, |
|
"grad_norm": 0.39685410261154175, |
|
"learning_rate": 1.5999094318432662e-07, |
|
"loss": 0.7712, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 2.784, |
|
"grad_norm": 0.3873448073863983, |
|
"learning_rate": 1.5708419435684463e-07, |
|
"loss": 0.7559, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 2.786, |
|
"grad_norm": 0.4224396347999573, |
|
"learning_rate": 1.5420367194449448e-07, |
|
"loss": 0.7599, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 2.7880000000000003, |
|
"grad_norm": 0.36535367369651794, |
|
"learning_rate": 1.5134939154651196e-07, |
|
"loss": 0.7822, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.3644055724143982, |
|
"learning_rate": 1.4852136862001766e-07, |
|
"loss": 0.7514, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 2.792, |
|
"grad_norm": 0.423311322927475, |
|
"learning_rate": 1.4571961847993977e-07, |
|
"loss": 0.7406, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 2.794, |
|
"grad_norm": 0.3825368881225586, |
|
"learning_rate": 1.4294415629892756e-07, |
|
"loss": 0.7779, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 2.7960000000000003, |
|
"grad_norm": 0.4092647433280945, |
|
"learning_rate": 1.4019499710726913e-07, |
|
"loss": 0.7341, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 2.798, |
|
"grad_norm": 0.38083040714263916, |
|
"learning_rate": 1.374721557928116e-07, |
|
"loss": 0.7631, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.45573127269744873, |
|
"learning_rate": 1.3477564710088097e-07, |
|
"loss": 0.7536, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.802, |
|
"grad_norm": 0.38706058263778687, |
|
"learning_rate": 1.3210548563419857e-07, |
|
"loss": 0.7856, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 2.8040000000000003, |
|
"grad_norm": 0.3757847547531128, |
|
"learning_rate": 1.294616858528064e-07, |
|
"loss": 0.8257, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 2.806, |
|
"grad_norm": 0.44547516107559204, |
|
"learning_rate": 1.268442620739868e-07, |
|
"loss": 0.7904, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 2.808, |
|
"grad_norm": 0.3914513885974884, |
|
"learning_rate": 1.2425322847218368e-07, |
|
"loss": 0.7536, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.40451890230178833, |
|
"learning_rate": 1.2168859907892904e-07, |
|
"loss": 0.7698, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 2.8120000000000003, |
|
"grad_norm": 0.3859497010707855, |
|
"learning_rate": 1.1915038778276212e-07, |
|
"loss": 0.7556, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 2.814, |
|
"grad_norm": 0.4025975465774536, |
|
"learning_rate": 1.166386083291604e-07, |
|
"loss": 0.7818, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 2.816, |
|
"grad_norm": 0.3883987367153168, |
|
"learning_rate": 1.1415327432046041e-07, |
|
"loss": 0.7682, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 2.818, |
|
"grad_norm": 0.41288435459136963, |
|
"learning_rate": 1.1169439921578485e-07, |
|
"loss": 0.7435, |
|
"step": 1409 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.39167869091033936, |
|
"learning_rate": 1.0926199633097156e-07, |
|
"loss": 0.7778, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.822, |
|
"grad_norm": 0.41951802372932434, |
|
"learning_rate": 1.0685607883850035e-07, |
|
"loss": 0.7935, |
|
"step": 1411 |
|
}, |
|
{ |
|
"epoch": 2.824, |
|
"grad_norm": 0.40632784366607666, |
|
"learning_rate": 1.044766597674196e-07, |
|
"loss": 0.7273, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 2.826, |
|
"grad_norm": 0.379300594329834, |
|
"learning_rate": 1.0212375200327973e-07, |
|
"loss": 0.7752, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 2.828, |
|
"grad_norm": 0.3707320988178253, |
|
"learning_rate": 9.979736828806096e-08, |
|
"loss": 0.725, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.371960312128067, |
|
"learning_rate": 9.749752122010347e-08, |
|
"loss": 0.7946, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 2.832, |
|
"grad_norm": 0.3983508348464966, |
|
"learning_rate": 9.522422325404234e-08, |
|
"loss": 0.7541, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 2.834, |
|
"grad_norm": 0.38345709443092346, |
|
"learning_rate": 9.297748670073658e-08, |
|
"loss": 0.7919, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 2.836, |
|
"grad_norm": 0.36458224058151245, |
|
"learning_rate": 9.075732372720414e-08, |
|
"loss": 0.765, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 2.838, |
|
"grad_norm": 0.37088239192962646, |
|
"learning_rate": 8.856374635655696e-08, |
|
"loss": 0.7838, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.41397547721862793, |
|
"learning_rate": 8.639676646793382e-08, |
|
"loss": 0.7823, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.842, |
|
"grad_norm": 0.3749987781047821, |
|
"learning_rate": 8.425639579643763e-08, |
|
"loss": 0.7627, |
|
"step": 1421 |
|
}, |
|
{ |
|
"epoch": 2.844, |
|
"grad_norm": 0.4029843807220459, |
|
"learning_rate": 8.214264593307097e-08, |
|
"loss": 0.7964, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.846, |
|
"grad_norm": 0.4003535211086273, |
|
"learning_rate": 8.00555283246729e-08, |
|
"loss": 0.7721, |
|
"step": 1423 |
|
}, |
|
{ |
|
"epoch": 2.848, |
|
"grad_norm": 0.3872876465320587, |
|
"learning_rate": 7.799505427386001e-08, |
|
"loss": 0.7721, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.4099399447441101, |
|
"learning_rate": 7.59612349389599e-08, |
|
"loss": 0.742, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.852, |
|
"grad_norm": 0.3872721791267395, |
|
"learning_rate": 7.395408133395509e-08, |
|
"loss": 0.8037, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 2.854, |
|
"grad_norm": 0.3637619614601135, |
|
"learning_rate": 7.197360432842359e-08, |
|
"loss": 0.7783, |
|
"step": 1427 |
|
}, |
|
{ |
|
"epoch": 2.856, |
|
"grad_norm": 0.3728599548339844, |
|
"learning_rate": 7.001981464747565e-08, |
|
"loss": 0.7498, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 2.858, |
|
"grad_norm": 0.37154945731163025, |
|
"learning_rate": 6.809272287169988e-08, |
|
"loss": 0.7953, |
|
"step": 1429 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.41290783882141113, |
|
"learning_rate": 6.61923394371039e-08, |
|
"loss": 0.7582, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.862, |
|
"grad_norm": 0.37080150842666626, |
|
"learning_rate": 6.431867463506047e-08, |
|
"loss": 0.7905, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 2.864, |
|
"grad_norm": 0.3939642608165741, |
|
"learning_rate": 6.247173861224753e-08, |
|
"loss": 0.7539, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 2.866, |
|
"grad_norm": 0.3907812833786011, |
|
"learning_rate": 6.065154137059603e-08, |
|
"loss": 0.769, |
|
"step": 1433 |
|
}, |
|
{ |
|
"epoch": 2.868, |
|
"grad_norm": 0.3818773329257965, |
|
"learning_rate": 5.8858092767236084e-08, |
|
"loss": 0.7335, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.4225902855396271, |
|
"learning_rate": 5.709140251444201e-08, |
|
"loss": 0.7649, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 2.872, |
|
"grad_norm": 0.41155657172203064, |
|
"learning_rate": 5.535148017958014e-08, |
|
"loss": 0.7602, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 2.874, |
|
"grad_norm": 0.4028669595718384, |
|
"learning_rate": 5.363833518505834e-08, |
|
"loss": 0.7674, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 2.876, |
|
"grad_norm": 0.38350680470466614, |
|
"learning_rate": 5.19519768082738e-08, |
|
"loss": 0.7224, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 2.878, |
|
"grad_norm": 0.3763676881790161, |
|
"learning_rate": 5.029241418156139e-08, |
|
"loss": 0.7941, |
|
"step": 1439 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.3712924122810364, |
|
"learning_rate": 4.865965629214819e-08, |
|
"loss": 0.7631, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.882, |
|
"grad_norm": 0.3843233585357666, |
|
"learning_rate": 4.7053711982101294e-08, |
|
"loss": 0.8029, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 2.884, |
|
"grad_norm": 0.3684174120426178, |
|
"learning_rate": 4.5474589948280026e-08, |
|
"loss": 0.7565, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 2.886, |
|
"grad_norm": 0.3982797861099243, |
|
"learning_rate": 4.392229874229159e-08, |
|
"loss": 0.7471, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 2.888, |
|
"grad_norm": 0.41067230701446533, |
|
"learning_rate": 4.2396846770441644e-08, |
|
"loss": 0.7696, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.38704684376716614, |
|
"learning_rate": 4.0898242293691546e-08, |
|
"loss": 0.7443, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 2.892, |
|
"grad_norm": 0.38135451078414917, |
|
"learning_rate": 3.9426493427611177e-08, |
|
"loss": 0.7472, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 2.894, |
|
"grad_norm": 0.3887581527233124, |
|
"learning_rate": 3.7981608142335644e-08, |
|
"loss": 0.7413, |
|
"step": 1447 |
|
}, |
|
{ |
|
"epoch": 2.896, |
|
"grad_norm": 0.3935358226299286, |
|
"learning_rate": 3.65635942625242e-08, |
|
"loss": 0.7873, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 2.898, |
|
"grad_norm": 0.3650510311126709, |
|
"learning_rate": 3.517245946731529e-08, |
|
"loss": 0.7751, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.38925668597221375, |
|
"learning_rate": 3.3808211290284886e-08, |
|
"loss": 0.7693, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.902, |
|
"grad_norm": 0.4022022783756256, |
|
"learning_rate": 3.247085711940878e-08, |
|
"loss": 0.7388, |
|
"step": 1451 |
|
}, |
|
{ |
|
"epoch": 2.904, |
|
"grad_norm": 0.38158661127090454, |
|
"learning_rate": 3.1160404197018155e-08, |
|
"loss": 0.7582, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 2.906, |
|
"grad_norm": 0.39832374453544617, |
|
"learning_rate": 2.9876859619764606e-08, |
|
"loss": 0.7544, |
|
"step": 1453 |
|
}, |
|
{ |
|
"epoch": 2.908, |
|
"grad_norm": 0.3519267439842224, |
|
"learning_rate": 2.8620230338578526e-08, |
|
"loss": 0.7685, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 0.40946221351623535, |
|
"learning_rate": 2.7390523158633552e-08, |
|
"loss": 0.7249, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 2.912, |
|
"grad_norm": 0.41239914298057556, |
|
"learning_rate": 2.6187744739308297e-08, |
|
"loss": 0.7743, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 2.914, |
|
"grad_norm": 0.3555036187171936, |
|
"learning_rate": 2.501190159415079e-08, |
|
"loss": 0.7681, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 2.916, |
|
"grad_norm": 0.37814003229141235, |
|
"learning_rate": 2.386300009084408e-08, |
|
"loss": 0.7537, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 2.918, |
|
"grad_norm": 0.37123093008995056, |
|
"learning_rate": 2.27410464511707e-08, |
|
"loss": 0.7865, |
|
"step": 1459 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.35215526819229126, |
|
"learning_rate": 2.1646046750978255e-08, |
|
"loss": 0.7807, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.922, |
|
"grad_norm": 0.39182814955711365, |
|
"learning_rate": 2.057800692014833e-08, |
|
"loss": 0.7786, |
|
"step": 1461 |
|
}, |
|
{ |
|
"epoch": 2.924, |
|
"grad_norm": 0.3995373249053955, |
|
"learning_rate": 1.953693274256374e-08, |
|
"loss": 0.7739, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 2.926, |
|
"grad_norm": 0.3778274357318878, |
|
"learning_rate": 1.8522829856076895e-08, |
|
"loss": 0.7992, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 2.928, |
|
"grad_norm": 0.4012286365032196, |
|
"learning_rate": 1.753570375247815e-08, |
|
"loss": 0.7559, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.37980130314826965, |
|
"learning_rate": 1.657555977746972e-08, |
|
"loss": 0.7885, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 2.932, |
|
"grad_norm": 0.37036362290382385, |
|
"learning_rate": 1.5642403130632367e-08, |
|
"loss": 0.7941, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 2.934, |
|
"grad_norm": 0.38239234685897827, |
|
"learning_rate": 1.4736238865398766e-08, |
|
"loss": 0.7658, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 2.936, |
|
"grad_norm": 0.36525359749794006, |
|
"learning_rate": 1.3857071889029073e-08, |
|
"loss": 0.8153, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 2.9379999999999997, |
|
"grad_norm": 0.41786691546440125, |
|
"learning_rate": 1.3004906962578723e-08, |
|
"loss": 0.7857, |
|
"step": 1469 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.3932099938392639, |
|
"learning_rate": 1.2179748700879013e-08, |
|
"loss": 0.7324, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.942, |
|
"grad_norm": 0.4426632225513458, |
|
"learning_rate": 1.1381601572505452e-08, |
|
"loss": 0.7635, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 2.944, |
|
"grad_norm": 0.3719755709171295, |
|
"learning_rate": 1.0610469899760001e-08, |
|
"loss": 0.8129, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 2.9459999999999997, |
|
"grad_norm": 0.4014662206172943, |
|
"learning_rate": 9.866357858642206e-09, |
|
"loss": 0.7705, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 2.948, |
|
"grad_norm": 0.35570037364959717, |
|
"learning_rate": 9.14926947883088e-09, |
|
"loss": 0.7696, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.3750092089176178, |
|
"learning_rate": 8.459208643659122e-09, |
|
"loss": 0.7757, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.952, |
|
"grad_norm": 0.41063806414604187, |
|
"learning_rate": 7.796179090094891e-09, |
|
"loss": 0.8003, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 2.9539999999999997, |
|
"grad_norm": 0.4088057577610016, |
|
"learning_rate": 7.160184408721571e-09, |
|
"loss": 0.7465, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 2.956, |
|
"grad_norm": 0.368656724691391, |
|
"learning_rate": 6.551228043715218e-09, |
|
"loss": 0.7716, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 2.958, |
|
"grad_norm": 0.4089622497558594, |
|
"learning_rate": 5.969313292830126e-09, |
|
"loss": 0.7852, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.3801428973674774, |
|
"learning_rate": 5.414443307377171e-09, |
|
"loss": 0.772, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.9619999999999997, |
|
"grad_norm": 0.39099130034446716, |
|
"learning_rate": 4.8866210922110525e-09, |
|
"loss": 0.7901, |
|
"step": 1481 |
|
}, |
|
{ |
|
"epoch": 2.964, |
|
"grad_norm": 0.38790273666381836, |
|
"learning_rate": 4.385849505708084e-09, |
|
"loss": 0.7628, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 2.966, |
|
"grad_norm": 0.3607439398765564, |
|
"learning_rate": 3.912131259757313e-09, |
|
"loss": 0.781, |
|
"step": 1483 |
|
}, |
|
{ |
|
"epoch": 2.968, |
|
"grad_norm": 0.3746965825557709, |
|
"learning_rate": 3.4654689197405335e-09, |
|
"loss": 0.7677, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 2.9699999999999998, |
|
"grad_norm": 0.3762003779411316, |
|
"learning_rate": 3.0458649045211897e-09, |
|
"loss": 0.7549, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 2.972, |
|
"grad_norm": 0.4223495423793793, |
|
"learning_rate": 2.6533214864310485e-09, |
|
"loss": 0.7428, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 2.974, |
|
"grad_norm": 0.3829779326915741, |
|
"learning_rate": 2.287840791256324e-09, |
|
"loss": 0.7882, |
|
"step": 1487 |
|
}, |
|
{ |
|
"epoch": 2.976, |
|
"grad_norm": 0.3744191527366638, |
|
"learning_rate": 1.9494247982282386e-09, |
|
"loss": 0.7786, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 2.9779999999999998, |
|
"grad_norm": 0.3640522062778473, |
|
"learning_rate": 1.638075340010814e-09, |
|
"loss": 0.7755, |
|
"step": 1489 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.36369234323501587, |
|
"learning_rate": 1.3537941026914302e-09, |
|
"loss": 0.7661, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.982, |
|
"grad_norm": 0.3734087646007538, |
|
"learning_rate": 1.096582625772502e-09, |
|
"loss": 0.7361, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 2.984, |
|
"grad_norm": 0.43970629572868347, |
|
"learning_rate": 8.664423021614854e-10, |
|
"loss": 0.752, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 2.9859999999999998, |
|
"grad_norm": 0.3792881965637207, |
|
"learning_rate": 6.633743781642166e-10, |
|
"loss": 0.7787, |
|
"step": 1493 |
|
}, |
|
{ |
|
"epoch": 2.988, |
|
"grad_norm": 0.4296637177467346, |
|
"learning_rate": 4.87379953478806e-10, |
|
"loss": 0.7623, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.4059789180755615, |
|
"learning_rate": 3.384599811889766e-10, |
|
"loss": 0.7643, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 2.992, |
|
"grad_norm": 0.35259121656417847, |
|
"learning_rate": 2.1661526775795804e-10, |
|
"loss": 0.7792, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 2.9939999999999998, |
|
"grad_norm": 0.3793678283691406, |
|
"learning_rate": 1.2184647302626585e-10, |
|
"loss": 0.7959, |
|
"step": 1497 |
|
}, |
|
{ |
|
"epoch": 2.996, |
|
"grad_norm": 0.40443864464759827, |
|
"learning_rate": 5.4154110206150465e-11, |
|
"loss": 0.763, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 2.998, |
|
"grad_norm": 0.35355669260025024, |
|
"learning_rate": 1.3538545881042198e-11, |
|
"loss": 0.8012, |
|
"step": 1499 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.3940981328487396, |
|
"learning_rate": 0.0, |
|
"loss": 0.7917, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1500, |
|
"total_flos": 1507181614268416.0, |
|
"train_loss": 0.8263951101700465, |
|
"train_runtime": 83725.9772, |
|
"train_samples_per_second": 1.72, |
|
"train_steps_per_second": 0.018 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1507181614268416.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|