|
{ |
|
"best_metric": 7.122989865404904, |
|
"best_model_checkpoint": "./whisper-medium-mix-pt/checkpoint-5000", |
|
"epoch": 1.0622, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005, |
|
"grad_norm": 5.954268455505371, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 0.7195, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.020993232727051, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 0.5893, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015, |
|
"grad_norm": 4.6267008781433105, |
|
"learning_rate": 1.46e-06, |
|
"loss": 0.351, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 5.107844829559326, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 0.27, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 4.793674468994141, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.5947, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.168308734893799, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.634, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.035, |
|
"grad_norm": 5.14207124710083, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.4872, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.885250568389893, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.3443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.045, |
|
"grad_norm": 4.922640323638916, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.3077, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.2033262252807617, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.2798, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.055, |
|
"grad_norm": 4.390773773193359, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.237, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.187547206878662, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.2354, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.065, |
|
"grad_norm": 4.3109869956970215, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.2123, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.2677078247070312, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.1923, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 4.168946743011475, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.204, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.5255637168884277, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.1864, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.085, |
|
"grad_norm": 2.9705564975738525, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.1935, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.4820713996887207, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.1751, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.095, |
|
"grad_norm": 3.700296401977539, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.1594, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.0839247703552246, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.1709, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.105, |
|
"grad_norm": 3.175724983215332, |
|
"learning_rate": 9.94888888888889e-06, |
|
"loss": 0.1246, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.514580726623535, |
|
"learning_rate": 9.893333333333334e-06, |
|
"loss": 0.1323, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.115, |
|
"grad_norm": 4.5611724853515625, |
|
"learning_rate": 9.837777777777778e-06, |
|
"loss": 0.2125, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.8415772914886475, |
|
"learning_rate": 9.782222222222222e-06, |
|
"loss": 0.3451, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 3.0463509559631348, |
|
"learning_rate": 9.726666666666668e-06, |
|
"loss": 0.205, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.989981174468994, |
|
"learning_rate": 9.671111111111112e-06, |
|
"loss": 0.1433, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.135, |
|
"grad_norm": 2.3929381370544434, |
|
"learning_rate": 9.615555555555558e-06, |
|
"loss": 0.1383, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.7740297317504883, |
|
"learning_rate": 9.56e-06, |
|
"loss": 0.1288, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.145, |
|
"grad_norm": 3.0236458778381348, |
|
"learning_rate": 9.504444444444446e-06, |
|
"loss": 0.1278, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.738789081573486, |
|
"learning_rate": 9.44888888888889e-06, |
|
"loss": 0.1316, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.155, |
|
"grad_norm": 2.820563316345215, |
|
"learning_rate": 9.393333333333334e-06, |
|
"loss": 0.1298, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.1976284980773926, |
|
"learning_rate": 9.33777777777778e-06, |
|
"loss": 0.125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.165, |
|
"grad_norm": 3.4737789630889893, |
|
"learning_rate": 9.282222222222222e-06, |
|
"loss": 0.1437, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.761700391769409, |
|
"learning_rate": 9.226666666666668e-06, |
|
"loss": 0.1285, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 2.7969601154327393, |
|
"learning_rate": 9.171111111111112e-06, |
|
"loss": 0.1091, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.8701090812683105, |
|
"learning_rate": 9.115555555555556e-06, |
|
"loss": 0.1383, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.185, |
|
"grad_norm": 2.473555088043213, |
|
"learning_rate": 9.060000000000001e-06, |
|
"loss": 0.1067, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 3.0442628860473633, |
|
"learning_rate": 9.004444444444445e-06, |
|
"loss": 0.1265, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.195, |
|
"grad_norm": 3.2474780082702637, |
|
"learning_rate": 8.94888888888889e-06, |
|
"loss": 0.1254, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.9548333883285522, |
|
"learning_rate": 8.893333333333333e-06, |
|
"loss": 0.1116, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.15698818862438202, |
|
"eval_runtime": 947.4383, |
|
"eval_samples_per_second": 9.992, |
|
"eval_steps_per_second": 1.25, |
|
"eval_wer": 8.582435499856134, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.205, |
|
"grad_norm": 2.7346816062927246, |
|
"learning_rate": 8.83777777777778e-06, |
|
"loss": 0.1005, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.2607483863830566, |
|
"learning_rate": 8.782222222222223e-06, |
|
"loss": 0.0931, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.215, |
|
"grad_norm": 2.8954336643218994, |
|
"learning_rate": 8.726666666666667e-06, |
|
"loss": 0.1111, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.1846327781677246, |
|
"learning_rate": 8.671111111111113e-06, |
|
"loss": 0.1003, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 2.284499168395996, |
|
"learning_rate": 8.615555555555555e-06, |
|
"loss": 0.1018, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.8060786724090576, |
|
"learning_rate": 8.560000000000001e-06, |
|
"loss": 0.1145, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.235, |
|
"grad_norm": 2.9943974018096924, |
|
"learning_rate": 8.504444444444445e-06, |
|
"loss": 0.1247, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.2265496253967285, |
|
"learning_rate": 8.448888888888889e-06, |
|
"loss": 0.1193, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.245, |
|
"grad_norm": 2.4557271003723145, |
|
"learning_rate": 8.393333333333335e-06, |
|
"loss": 0.1209, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.332017183303833, |
|
"learning_rate": 8.337777777777777e-06, |
|
"loss": 0.114, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.255, |
|
"grad_norm": 3.056497812271118, |
|
"learning_rate": 8.282222222222223e-06, |
|
"loss": 0.1043, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.024203062057495, |
|
"learning_rate": 8.226666666666667e-06, |
|
"loss": 0.1247, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.265, |
|
"grad_norm": 2.543893814086914, |
|
"learning_rate": 8.171111111111113e-06, |
|
"loss": 0.1347, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.2330307960510254, |
|
"learning_rate": 8.115555555555557e-06, |
|
"loss": 0.119, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 2.819495677947998, |
|
"learning_rate": 8.06e-06, |
|
"loss": 0.0998, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.6235690116882324, |
|
"learning_rate": 8.004444444444445e-06, |
|
"loss": 0.1037, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.285, |
|
"grad_norm": 3.0939559936523438, |
|
"learning_rate": 7.948888888888889e-06, |
|
"loss": 0.1027, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.9563097953796387, |
|
"learning_rate": 7.893333333333335e-06, |
|
"loss": 0.106, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.295, |
|
"grad_norm": 2.6161818504333496, |
|
"learning_rate": 7.837777777777779e-06, |
|
"loss": 0.1112, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.455970525741577, |
|
"learning_rate": 7.782222222222223e-06, |
|
"loss": 0.1138, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.305, |
|
"grad_norm": 2.8548126220703125, |
|
"learning_rate": 7.726666666666667e-06, |
|
"loss": 0.0962, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.414487838745117, |
|
"learning_rate": 7.67111111111111e-06, |
|
"loss": 0.105, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.315, |
|
"grad_norm": 2.747642993927002, |
|
"learning_rate": 7.6155555555555564e-06, |
|
"loss": 0.1111, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.5105972290039062, |
|
"learning_rate": 7.5600000000000005e-06, |
|
"loss": 0.1066, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 2.765288829803467, |
|
"learning_rate": 7.504444444444445e-06, |
|
"loss": 0.1423, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.616039991378784, |
|
"learning_rate": 7.44888888888889e-06, |
|
"loss": 0.1527, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.335, |
|
"grad_norm": 3.961867094039917, |
|
"learning_rate": 7.393333333333333e-06, |
|
"loss": 0.1409, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.5502121448516846, |
|
"learning_rate": 7.337777777777778e-06, |
|
"loss": 0.1375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.345, |
|
"grad_norm": 2.9982211589813232, |
|
"learning_rate": 7.282222222222222e-06, |
|
"loss": 0.1208, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.1248340606689453, |
|
"learning_rate": 7.226666666666667e-06, |
|
"loss": 0.1375, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.355, |
|
"grad_norm": 2.1703038215637207, |
|
"learning_rate": 7.171111111111112e-06, |
|
"loss": 0.1513, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.672173500061035, |
|
"learning_rate": 7.115555555555557e-06, |
|
"loss": 0.1169, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.365, |
|
"grad_norm": 2.1961538791656494, |
|
"learning_rate": 7.06e-06, |
|
"loss": 0.1072, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.129289388656616, |
|
"learning_rate": 7.004444444444445e-06, |
|
"loss": 0.1022, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 2.4798481464385986, |
|
"learning_rate": 6.948888888888889e-06, |
|
"loss": 0.1129, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.9369380474090576, |
|
"learning_rate": 6.893333333333334e-06, |
|
"loss": 0.1149, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.385, |
|
"grad_norm": 2.7497618198394775, |
|
"learning_rate": 6.837777777777779e-06, |
|
"loss": 0.1181, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.4043569564819336, |
|
"learning_rate": 6.782222222222222e-06, |
|
"loss": 0.0998, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.395, |
|
"grad_norm": 2.9279298782348633, |
|
"learning_rate": 6.726666666666667e-06, |
|
"loss": 0.1035, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.7362992763519287, |
|
"learning_rate": 6.671111111111112e-06, |
|
"loss": 0.105, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.14844202995300293, |
|
"eval_runtime": 940.7954, |
|
"eval_samples_per_second": 10.063, |
|
"eval_steps_per_second": 1.259, |
|
"eval_wer": 7.93983183605614, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.405, |
|
"grad_norm": 2.0498945713043213, |
|
"learning_rate": 6.615555555555556e-06, |
|
"loss": 0.1086, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.7259154319763184, |
|
"learning_rate": 6.560000000000001e-06, |
|
"loss": 0.1176, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.415, |
|
"grad_norm": 2.4379186630249023, |
|
"learning_rate": 6.504444444444446e-06, |
|
"loss": 0.1157, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.397427558898926, |
|
"learning_rate": 6.448888888888889e-06, |
|
"loss": 0.1229, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 2.2779042720794678, |
|
"learning_rate": 6.393333333333334e-06, |
|
"loss": 0.0934, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9669123888015747, |
|
"learning_rate": 6.3377777777777786e-06, |
|
"loss": 0.0968, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.435, |
|
"grad_norm": 2.051048755645752, |
|
"learning_rate": 6.282222222222223e-06, |
|
"loss": 0.0876, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.082129716873169, |
|
"learning_rate": 6.2266666666666675e-06, |
|
"loss": 0.0832, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.445, |
|
"grad_norm": 2.0666728019714355, |
|
"learning_rate": 6.171111111111112e-06, |
|
"loss": 0.081, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.1512069702148438, |
|
"learning_rate": 6.1155555555555555e-06, |
|
"loss": 0.0795, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.455, |
|
"grad_norm": 2.393537998199463, |
|
"learning_rate": 6.0600000000000004e-06, |
|
"loss": 0.0797, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.4059464931488037, |
|
"learning_rate": 6.004444444444445e-06, |
|
"loss": 0.086, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.465, |
|
"grad_norm": 2.3822548389434814, |
|
"learning_rate": 5.948888888888889e-06, |
|
"loss": 0.0883, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.063124656677246, |
|
"learning_rate": 5.893333333333334e-06, |
|
"loss": 0.082, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 2.988023281097412, |
|
"learning_rate": 5.837777777777777e-06, |
|
"loss": 0.194, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.959836959838867, |
|
"learning_rate": 5.782222222222222e-06, |
|
"loss": 0.2993, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.485, |
|
"grad_norm": 2.79730486869812, |
|
"learning_rate": 5.726666666666667e-06, |
|
"loss": 0.2067, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.7770684957504272, |
|
"learning_rate": 5.671111111111112e-06, |
|
"loss": 0.1769, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.495, |
|
"grad_norm": 2.2204787731170654, |
|
"learning_rate": 5.615555555555556e-06, |
|
"loss": 0.0911, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.053565263748169, |
|
"learning_rate": 5.560000000000001e-06, |
|
"loss": 0.1141, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.505, |
|
"grad_norm": 5.307312488555908, |
|
"learning_rate": 5.504444444444444e-06, |
|
"loss": 0.4682, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.99273157119751, |
|
"learning_rate": 5.448888888888889e-06, |
|
"loss": 0.4198, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.515, |
|
"grad_norm": 4.5542707443237305, |
|
"learning_rate": 5.393333333333334e-06, |
|
"loss": 0.3805, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.3522355556488037, |
|
"learning_rate": 5.337777777777779e-06, |
|
"loss": 0.3213, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 3.8808069229125977, |
|
"learning_rate": 5.282222222222223e-06, |
|
"loss": 0.2504, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.6517879962921143, |
|
"learning_rate": 5.226666666666667e-06, |
|
"loss": 0.2508, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.535, |
|
"grad_norm": 5.0575151443481445, |
|
"learning_rate": 5.171111111111111e-06, |
|
"loss": 0.288, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.049450874328613, |
|
"learning_rate": 5.115555555555556e-06, |
|
"loss": 0.2526, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.545, |
|
"grad_norm": 3.075061798095703, |
|
"learning_rate": 5.060000000000001e-06, |
|
"loss": 0.25, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.80968976020813, |
|
"learning_rate": 5.004444444444445e-06, |
|
"loss": 0.2388, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.555, |
|
"grad_norm": 3.044834852218628, |
|
"learning_rate": 4.94888888888889e-06, |
|
"loss": 0.1718, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.3228962421417236, |
|
"learning_rate": 4.893333333333334e-06, |
|
"loss": 0.148, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.565, |
|
"grad_norm": 2.9311017990112305, |
|
"learning_rate": 4.837777777777778e-06, |
|
"loss": 0.1308, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.485396146774292, |
|
"learning_rate": 4.7822222222222226e-06, |
|
"loss": 0.1197, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 3.5410890579223633, |
|
"learning_rate": 4.7266666666666674e-06, |
|
"loss": 0.0878, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.246903657913208, |
|
"learning_rate": 4.6711111111111115e-06, |
|
"loss": 0.0793, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.585, |
|
"grad_norm": 2.251746416091919, |
|
"learning_rate": 4.6155555555555555e-06, |
|
"loss": 0.0795, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.791506052017212, |
|
"learning_rate": 4.56e-06, |
|
"loss": 0.0918, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.595, |
|
"grad_norm": 1.3495018482208252, |
|
"learning_rate": 4.504444444444444e-06, |
|
"loss": 0.077, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8840142488479614, |
|
"learning_rate": 4.448888888888889e-06, |
|
"loss": 0.0783, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.13742496073246002, |
|
"eval_runtime": 944.2672, |
|
"eval_samples_per_second": 10.026, |
|
"eval_steps_per_second": 1.254, |
|
"eval_wer": 7.447488730458135, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.605, |
|
"grad_norm": 1.7588751316070557, |
|
"learning_rate": 4.393333333333334e-06, |
|
"loss": 0.0747, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.9886692762374878, |
|
"learning_rate": 4.337777777777778e-06, |
|
"loss": 0.076, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.615, |
|
"grad_norm": 2.5392279624938965, |
|
"learning_rate": 4.282222222222222e-06, |
|
"loss": 0.0796, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.3111472129821777, |
|
"learning_rate": 4.226666666666667e-06, |
|
"loss": 0.0898, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 1.8798396587371826, |
|
"learning_rate": 4.171111111111111e-06, |
|
"loss": 0.0742, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8901393413543701, |
|
"learning_rate": 4.115555555555556e-06, |
|
"loss": 0.0815, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.635, |
|
"grad_norm": 3.506927490234375, |
|
"learning_rate": 4.060000000000001e-06, |
|
"loss": 0.1055, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.018944025039673, |
|
"learning_rate": 4.004444444444445e-06, |
|
"loss": 0.1541, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.645, |
|
"grad_norm": 2.304964303970337, |
|
"learning_rate": 3.948888888888889e-06, |
|
"loss": 0.1213, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.425555944442749, |
|
"learning_rate": 3.893333333333333e-06, |
|
"loss": 0.0935, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.655, |
|
"grad_norm": 1.5498875379562378, |
|
"learning_rate": 3.837777777777778e-06, |
|
"loss": 0.0838, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.360494613647461, |
|
"learning_rate": 3.782222222222223e-06, |
|
"loss": 0.0777, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.665, |
|
"grad_norm": 1.5479514598846436, |
|
"learning_rate": 3.726666666666667e-06, |
|
"loss": 0.0778, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.920868158340454, |
|
"learning_rate": 3.6711111111111113e-06, |
|
"loss": 0.0837, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 1.8797603845596313, |
|
"learning_rate": 3.615555555555556e-06, |
|
"loss": 0.0727, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.5125744342803955, |
|
"learning_rate": 3.5600000000000002e-06, |
|
"loss": 0.0798, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.685, |
|
"grad_norm": 2.6805381774902344, |
|
"learning_rate": 3.5044444444444447e-06, |
|
"loss": 0.0825, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.2335715293884277, |
|
"learning_rate": 3.4488888888888896e-06, |
|
"loss": 0.1145, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.695, |
|
"grad_norm": 3.3753836154937744, |
|
"learning_rate": 3.3933333333333336e-06, |
|
"loss": 0.1083, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.470496654510498, |
|
"learning_rate": 3.337777777777778e-06, |
|
"loss": 0.3921, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.705, |
|
"grad_norm": 1.5432311296463013, |
|
"learning_rate": 3.282222222222223e-06, |
|
"loss": 0.161, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.0430617332458496, |
|
"learning_rate": 3.226666666666667e-06, |
|
"loss": 0.1171, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.715, |
|
"grad_norm": 1.5512393712997437, |
|
"learning_rate": 3.1711111111111114e-06, |
|
"loss": 0.08, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.4717549085617065, |
|
"learning_rate": 3.1155555555555555e-06, |
|
"loss": 0.0726, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 1.5445371866226196, |
|
"learning_rate": 3.0600000000000003e-06, |
|
"loss": 0.0589, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.693588376045227, |
|
"learning_rate": 3.004444444444445e-06, |
|
"loss": 0.0606, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.735, |
|
"grad_norm": 1.875542163848877, |
|
"learning_rate": 2.948888888888889e-06, |
|
"loss": 0.0548, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.2488154172897339, |
|
"learning_rate": 2.8933333333333337e-06, |
|
"loss": 0.0517, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.745, |
|
"grad_norm": 1.5682026147842407, |
|
"learning_rate": 2.837777777777778e-06, |
|
"loss": 0.0571, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.1433475017547607, |
|
"learning_rate": 2.7822222222222222e-06, |
|
"loss": 0.0513, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.755, |
|
"grad_norm": 1.7756175994873047, |
|
"learning_rate": 2.726666666666667e-06, |
|
"loss": 0.047, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.6270946264266968, |
|
"learning_rate": 2.6711111111111116e-06, |
|
"loss": 0.0528, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.765, |
|
"grad_norm": 1.3709731101989746, |
|
"learning_rate": 2.6155555555555556e-06, |
|
"loss": 0.0507, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.4345024824142456, |
|
"learning_rate": 2.56e-06, |
|
"loss": 0.0455, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 2.159369707107544, |
|
"learning_rate": 2.504444444444445e-06, |
|
"loss": 0.0548, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.050881862640381, |
|
"learning_rate": 2.448888888888889e-06, |
|
"loss": 0.0833, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.785, |
|
"grad_norm": 2.428863763809204, |
|
"learning_rate": 2.3933333333333334e-06, |
|
"loss": 0.1491, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.655684471130371, |
|
"learning_rate": 2.337777777777778e-06, |
|
"loss": 0.1254, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.795, |
|
"grad_norm": 1.752144455909729, |
|
"learning_rate": 2.2822222222222223e-06, |
|
"loss": 0.1352, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.89431095123291, |
|
"learning_rate": 2.226666666666667e-06, |
|
"loss": 0.1703, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.13704591989517212, |
|
"eval_runtime": 942.2184, |
|
"eval_samples_per_second": 10.048, |
|
"eval_steps_per_second": 1.257, |
|
"eval_wer": 7.241280092074555, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.805, |
|
"grad_norm": 2.1112782955169678, |
|
"learning_rate": 2.1711111111111113e-06, |
|
"loss": 0.137, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.241307258605957, |
|
"learning_rate": 2.1155555555555557e-06, |
|
"loss": 0.0809, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.815, |
|
"grad_norm": 1.5932738780975342, |
|
"learning_rate": 2.06e-06, |
|
"loss": 0.0605, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.965742588043213, |
|
"learning_rate": 2.006666666666667e-06, |
|
"loss": 0.0549, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 3.738175868988037, |
|
"learning_rate": 1.9511111111111113e-06, |
|
"loss": 0.0557, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.9460804462432861, |
|
"learning_rate": 1.8955555555555557e-06, |
|
"loss": 0.0433, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.835, |
|
"grad_norm": 1.6488566398620605, |
|
"learning_rate": 1.8400000000000002e-06, |
|
"loss": 0.0655, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.511797308921814, |
|
"learning_rate": 1.7844444444444444e-06, |
|
"loss": 0.0787, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.845, |
|
"grad_norm": 2.361323833465576, |
|
"learning_rate": 1.728888888888889e-06, |
|
"loss": 0.0685, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.1696654558181763, |
|
"learning_rate": 1.6733333333333335e-06, |
|
"loss": 0.0518, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.855, |
|
"grad_norm": 1.6606045961380005, |
|
"learning_rate": 1.6177777777777778e-06, |
|
"loss": 0.0509, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.3564594984054565, |
|
"learning_rate": 1.5622222222222225e-06, |
|
"loss": 0.0545, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.865, |
|
"grad_norm": 2.545974016189575, |
|
"learning_rate": 1.506666666666667e-06, |
|
"loss": 0.0953, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 5.036618232727051, |
|
"learning_rate": 1.4511111111111112e-06, |
|
"loss": 0.1465, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 3.5834786891937256, |
|
"learning_rate": 1.3955555555555556e-06, |
|
"loss": 0.4963, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 4.646224498748779, |
|
"learning_rate": 1.34e-06, |
|
"loss": 0.5522, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.885, |
|
"grad_norm": 4.311796188354492, |
|
"learning_rate": 1.2844444444444445e-06, |
|
"loss": 0.585, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.739590883255005, |
|
"learning_rate": 1.228888888888889e-06, |
|
"loss": 0.5151, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.895, |
|
"grad_norm": 2.0715537071228027, |
|
"learning_rate": 1.1733333333333335e-06, |
|
"loss": 0.2399, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.7919174432754517, |
|
"learning_rate": 1.117777777777778e-06, |
|
"loss": 0.1013, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.905, |
|
"grad_norm": 1.8934131860733032, |
|
"learning_rate": 1.0622222222222222e-06, |
|
"loss": 0.0792, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.463447093963623, |
|
"learning_rate": 1.0066666666666668e-06, |
|
"loss": 0.0594, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.915, |
|
"grad_norm": 2.0955190658569336, |
|
"learning_rate": 9.511111111111111e-07, |
|
"loss": 0.0641, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.8596012592315674, |
|
"learning_rate": 8.955555555555557e-07, |
|
"loss": 0.065, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 2.5963196754455566, |
|
"learning_rate": 8.400000000000001e-07, |
|
"loss": 0.0604, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.361854076385498, |
|
"learning_rate": 7.844444444444445e-07, |
|
"loss": 0.0695, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.935, |
|
"grad_norm": 1.8713892698287964, |
|
"learning_rate": 7.28888888888889e-07, |
|
"loss": 0.0501, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.0022, |
|
"grad_norm": 1.7594069242477417, |
|
"learning_rate": 6.755555555555555e-07, |
|
"loss": 0.0811, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.0072, |
|
"grad_norm": 2.2666430473327637, |
|
"learning_rate": 6.200000000000001e-07, |
|
"loss": 0.0794, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.0122, |
|
"grad_norm": 1.4609644412994385, |
|
"learning_rate": 5.644444444444445e-07, |
|
"loss": 0.0913, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.0172, |
|
"grad_norm": 2.5821077823638916, |
|
"learning_rate": 5.088888888888889e-07, |
|
"loss": 0.0743, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.0222, |
|
"grad_norm": 3.746551036834717, |
|
"learning_rate": 4.533333333333334e-07, |
|
"loss": 0.2048, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.0272, |
|
"grad_norm": 4.535083293914795, |
|
"learning_rate": 3.9777777777777783e-07, |
|
"loss": 0.4846, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.0322, |
|
"grad_norm": 4.492420196533203, |
|
"learning_rate": 3.422222222222223e-07, |
|
"loss": 0.3757, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.0372, |
|
"grad_norm": 3.43796706199646, |
|
"learning_rate": 2.866666666666667e-07, |
|
"loss": 0.2405, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.0422, |
|
"grad_norm": 3.234219551086426, |
|
"learning_rate": 2.3111111111111112e-07, |
|
"loss": 0.1906, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.0472, |
|
"grad_norm": 2.204688549041748, |
|
"learning_rate": 1.7555555555555558e-07, |
|
"loss": 0.1335, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.0522, |
|
"grad_norm": 3.1717331409454346, |
|
"learning_rate": 1.2000000000000002e-07, |
|
"loss": 0.1198, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.0572, |
|
"grad_norm": 2.4640631675720215, |
|
"learning_rate": 6.444444444444445e-08, |
|
"loss": 0.106, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.0622, |
|
"grad_norm": 2.141594648361206, |
|
"learning_rate": 8.88888888888889e-09, |
|
"loss": 0.0977, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0622, |
|
"eval_loss": 0.13527874648571014, |
|
"eval_runtime": 940.5129, |
|
"eval_samples_per_second": 10.066, |
|
"eval_steps_per_second": 1.259, |
|
"eval_wer": 7.122989865404904, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0622, |
|
"step": 5000, |
|
"total_flos": 1.63271270080512e+20, |
|
"train_loss": 0.15365941531658173, |
|
"train_runtime": 18539.6479, |
|
"train_samples_per_second": 8.63, |
|
"train_steps_per_second": 0.27 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.63271270080512e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|