{ "best_metric": 5.218643517767322, "best_model_checkpoint": "OUTCOMESAI/whisper-large-v3-common-n-medical-50-50/checkpoint-4750", "epoch": 1.937984496124031, "eval_steps": 250, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03875968992248062, "grad_norm": 4.327393531799316, "learning_rate": 4.123797088618779e-07, "loss": 6.6181, "step": 100 }, { "epoch": 0.07751937984496124, "grad_norm": 7.569775104522705, "learning_rate": 4.775003968157492e-07, "loss": 5.126, "step": 200 }, { "epoch": 0.09689922480620156, "eval_loss": 0.369384765625, "eval_runtime": 2703.3784, "eval_samples_per_second": 0.83, "eval_steps_per_second": 0.026, "eval_wer": 5.660122234842339, "step": 250 }, { "epoch": 0.11627906976744186, "grad_norm": 3.0063843727111816, "learning_rate": 4.953684210526315e-07, "loss": 4.8144, "step": 300 }, { "epoch": 0.15503875968992248, "grad_norm": 3.107106924057007, "learning_rate": 4.848421052631578e-07, "loss": 4.5831, "step": 400 }, { "epoch": 0.1937984496124031, "grad_norm": 2.855757236480713, "learning_rate": 4.7431578947368417e-07, "loss": 4.367, "step": 500 }, { "epoch": 0.1937984496124031, "eval_loss": 0.358642578125, "eval_runtime": 2810.867, "eval_samples_per_second": 0.798, "eval_steps_per_second": 0.025, "eval_wer": 5.815620098436908, "step": 500 }, { "epoch": 0.23255813953488372, "grad_norm": 2.5796642303466797, "learning_rate": 4.637894736842105e-07, "loss": 4.2067, "step": 600 }, { "epoch": 0.2713178294573643, "grad_norm": 2.537674903869629, "learning_rate": 4.532631578947368e-07, "loss": 4.1514, "step": 700 }, { "epoch": 0.29069767441860467, "eval_loss": 0.35107421875, "eval_runtime": 2945.7156, "eval_samples_per_second": 0.761, "eval_steps_per_second": 0.024, "eval_wer": 5.883903942884959, "step": 750 }, { "epoch": 0.31007751937984496, "grad_norm": 2.430060386657715, "learning_rate": 4.4273684210526315e-07, "loss": 4.0786, "step": 800 }, { "epoch": 0.3488372093023256, "grad_norm": 2.526545286178589, "learning_rate": 4.322105263157895e-07, "loss": 4.0004, "step": 900 }, { "epoch": 0.3875968992248062, "grad_norm": 2.5485153198242188, "learning_rate": 4.2168421052631575e-07, "loss": 3.962, "step": 1000 }, { "epoch": 0.3875968992248062, "eval_loss": 0.344970703125, "eval_runtime": 2929.618, "eval_samples_per_second": 0.766, "eval_steps_per_second": 0.024, "eval_wer": 5.780464059711179, "step": 1000 }, { "epoch": 0.4263565891472868, "grad_norm": 2.46012282371521, "learning_rate": 4.111578947368421e-07, "loss": 3.9314, "step": 1100 }, { "epoch": 0.46511627906976744, "grad_norm": 2.6299571990966797, "learning_rate": 4.0063157894736836e-07, "loss": 3.9038, "step": 1200 }, { "epoch": 0.4844961240310077, "eval_loss": 0.34033203125, "eval_runtime": 3171.0392, "eval_samples_per_second": 0.707, "eval_steps_per_second": 0.022, "eval_wer": 6.174617340040023, "step": 1250 }, { "epoch": 0.5038759689922481, "grad_norm": 2.334170341491699, "learning_rate": 3.9010526315789474e-07, "loss": 3.8762, "step": 1300 }, { "epoch": 0.5426356589147286, "grad_norm": 2.6218087673187256, "learning_rate": 3.7957894736842106e-07, "loss": 3.8491, "step": 1400 }, { "epoch": 0.5813953488372093, "grad_norm": 2.390178680419922, "learning_rate": 3.6905263157894734e-07, "loss": 3.8313, "step": 1500 }, { "epoch": 0.5813953488372093, "eval_loss": 0.3359375, "eval_runtime": 3066.5551, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.023, "eval_wer": 5.973822272702688, "step": 1500 }, { "epoch": 0.6201550387596899, "grad_norm": 2.461087465286255, "learning_rate": 3.5852631578947367e-07, "loss": 3.804, "step": 1600 }, { "epoch": 0.6589147286821705, "grad_norm": 2.4371871948242188, "learning_rate": 3.4799999999999994e-07, "loss": 3.7778, "step": 1700 }, { "epoch": 0.6782945736434108, "eval_loss": 0.333251953125, "eval_runtime": 3069.4158, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.023, "eval_wer": 5.921764292281898, "step": 1750 }, { "epoch": 0.6976744186046512, "grad_norm": 2.413174867630005, "learning_rate": 3.374736842105263e-07, "loss": 3.7737, "step": 1800 }, { "epoch": 0.7364341085271318, "grad_norm": 2.4858949184417725, "learning_rate": 3.2694736842105265e-07, "loss": 3.7538, "step": 1900 }, { "epoch": 0.7751937984496124, "grad_norm": 2.6870527267456055, "learning_rate": 3.164210526315789e-07, "loss": 3.7421, "step": 2000 }, { "epoch": 0.7751937984496124, "eval_loss": 0.33056640625, "eval_runtime": 3182.1093, "eval_samples_per_second": 0.705, "eval_steps_per_second": 0.022, "eval_wer": 6.13270052463627, "step": 2000 }, { "epoch": 0.813953488372093, "grad_norm": 2.504830837249756, "learning_rate": 3.0589473684210525e-07, "loss": 3.7371, "step": 2100 }, { "epoch": 0.8527131782945736, "grad_norm": 2.3637797832489014, "learning_rate": 2.953684210526315e-07, "loss": 3.7367, "step": 2200 }, { "epoch": 0.872093023255814, "eval_loss": 0.328125, "eval_runtime": 2953.0944, "eval_samples_per_second": 0.76, "eval_steps_per_second": 0.024, "eval_wer": 5.656065768835524, "step": 2250 }, { "epoch": 0.8914728682170543, "grad_norm": 2.3057029247283936, "learning_rate": 2.848421052631579e-07, "loss": 3.7176, "step": 2300 }, { "epoch": 0.9302325581395349, "grad_norm": 2.277510643005371, "learning_rate": 2.7431578947368423e-07, "loss": 3.7003, "step": 2400 }, { "epoch": 0.9689922480620154, "grad_norm": 2.3119587898254395, "learning_rate": 2.637894736842105e-07, "loss": 3.6878, "step": 2500 }, { "epoch": 0.9689922480620154, "eval_loss": 0.32568359375, "eval_runtime": 2931.2759, "eval_samples_per_second": 0.765, "eval_steps_per_second": 0.024, "eval_wer": 5.515441613932608, "step": 2500 }, { "epoch": 1.0077519379844961, "grad_norm": 2.32456636428833, "learning_rate": 2.5326315789473683e-07, "loss": 3.6732, "step": 2600 }, { "epoch": 1.0465116279069768, "grad_norm": 2.489800453186035, "learning_rate": 2.4273684210526316e-07, "loss": 3.6769, "step": 2700 }, { "epoch": 1.0658914728682172, "eval_loss": 0.32421875, "eval_runtime": 2928.9686, "eval_samples_per_second": 0.766, "eval_steps_per_second": 0.024, "eval_wer": 5.48028557520688, "step": 2750 }, { "epoch": 1.0852713178294573, "grad_norm": 2.307305097579956, "learning_rate": 2.3221052631578946e-07, "loss": 3.6646, "step": 2800 }, { "epoch": 1.124031007751938, "grad_norm": 2.3552660942077637, "learning_rate": 2.216842105263158e-07, "loss": 3.6604, "step": 2900 }, { "epoch": 1.1627906976744187, "grad_norm": 2.3546299934387207, "learning_rate": 2.111578947368421e-07, "loss": 3.6508, "step": 3000 }, { "epoch": 1.1627906976744187, "eval_loss": 0.323486328125, "eval_runtime": 2929.9225, "eval_samples_per_second": 0.766, "eval_steps_per_second": 0.024, "eval_wer": 5.463383633511818, "step": 3000 }, { "epoch": 1.2015503875968991, "grad_norm": 2.2800722122192383, "learning_rate": 2.006315789473684e-07, "loss": 3.6412, "step": 3100 }, { "epoch": 1.2403100775193798, "grad_norm": 2.368227005004883, "learning_rate": 1.9010526315789475e-07, "loss": 3.6292, "step": 3200 }, { "epoch": 1.2596899224806202, "eval_loss": 0.322021484375, "eval_runtime": 2816.4937, "eval_samples_per_second": 0.796, "eval_steps_per_second": 0.025, "eval_wer": 5.3511547406566065, "step": 3250 }, { "epoch": 1.2790697674418605, "grad_norm": 2.3508598804473877, "learning_rate": 1.7957894736842105e-07, "loss": 3.6196, "step": 3300 }, { "epoch": 1.3178294573643412, "grad_norm": 2.4686381816864014, "learning_rate": 1.6905263157894737e-07, "loss": 3.6282, "step": 3400 }, { "epoch": 1.3565891472868217, "grad_norm": 2.3237240314483643, "learning_rate": 1.5852631578947367e-07, "loss": 3.6179, "step": 3500 }, { "epoch": 1.3565891472868217, "eval_loss": 0.321044921875, "eval_runtime": 2685.856, "eval_samples_per_second": 0.835, "eval_steps_per_second": 0.026, "eval_wer": 5.225404294445346, "step": 3500 }, { "epoch": 1.3953488372093024, "grad_norm": 2.2395377159118652, "learning_rate": 1.4799999999999998e-07, "loss": 3.6078, "step": 3600 }, { "epoch": 1.4341085271317828, "grad_norm": 2.3762331008911133, "learning_rate": 1.374736842105263e-07, "loss": 3.6032, "step": 3700 }, { "epoch": 1.4534883720930232, "eval_loss": 0.320556640625, "eval_runtime": 2693.0163, "eval_samples_per_second": 0.833, "eval_steps_per_second": 0.026, "eval_wer": 5.2206717507707285, "step": 3750 }, { "epoch": 1.4728682170542635, "grad_norm": 2.3068864345550537, "learning_rate": 1.2694736842105263e-07, "loss": 3.5997, "step": 3800 }, { "epoch": 1.5116279069767442, "grad_norm": 2.40097713470459, "learning_rate": 1.1642105263157894e-07, "loss": 3.588, "step": 3900 }, { "epoch": 1.550387596899225, "grad_norm": 2.320568561553955, "learning_rate": 1.0589473684210526e-07, "loss": 3.5922, "step": 4000 }, { "epoch": 1.550387596899225, "eval_loss": 0.320068359375, "eval_runtime": 2803.374, "eval_samples_per_second": 0.8, "eval_steps_per_second": 0.025, "eval_wer": 5.303829303910433, "step": 4000 }, { "epoch": 1.5891472868217056, "grad_norm": 2.4094741344451904, "learning_rate": 9.557894736842105e-08, "loss": 3.5832, "step": 4100 }, { "epoch": 1.627906976744186, "grad_norm": 2.448002815246582, "learning_rate": 8.505263157894736e-08, "loss": 3.5743, "step": 4200 }, { "epoch": 1.6472868217054264, "eval_loss": 0.31982421875, "eval_runtime": 2778.7179, "eval_samples_per_second": 0.807, "eval_steps_per_second": 0.026, "eval_wer": 5.263264643842285, "step": 4250 }, { "epoch": 1.6666666666666665, "grad_norm": 2.362182855606079, "learning_rate": 7.452631578947369e-08, "loss": 3.5831, "step": 4300 }, { "epoch": 1.7054263565891472, "grad_norm": 2.3121325969696045, "learning_rate": 6.4e-08, "loss": 3.5738, "step": 4400 }, { "epoch": 1.744186046511628, "grad_norm": 2.3000595569610596, "learning_rate": 5.347368421052632e-08, "loss": 3.5882, "step": 4500 }, { "epoch": 1.744186046511628, "eval_loss": 0.31982421875, "eval_runtime": 2718.6235, "eval_samples_per_second": 0.825, "eval_steps_per_second": 0.026, "eval_wer": 5.225404294445346, "step": 4500 }, { "epoch": 1.7829457364341086, "grad_norm": 2.3451480865478516, "learning_rate": 4.2947368421052626e-08, "loss": 3.5873, "step": 4600 }, { "epoch": 1.8217054263565893, "grad_norm": 2.301790237426758, "learning_rate": 3.2421052631578947e-08, "loss": 3.6021, "step": 4700 }, { "epoch": 1.8410852713178296, "eval_loss": 0.319580078125, "eval_runtime": 2680.8066, "eval_samples_per_second": 0.837, "eval_steps_per_second": 0.026, "eval_wer": 5.218643517767322, "step": 4750 }, { "epoch": 1.8604651162790697, "grad_norm": 2.2558062076568604, "learning_rate": 2.189473684210526e-08, "loss": 3.5833, "step": 4800 }, { "epoch": 1.8992248062015504, "grad_norm": 2.2542638778686523, "learning_rate": 1.136842105263158e-08, "loss": 3.5732, "step": 4900 }, { "epoch": 1.937984496124031, "grad_norm": 2.378117322921753, "learning_rate": 8.421052631578947e-10, "loss": 3.5865, "step": 5000 }, { "epoch": 1.937984496124031, "eval_loss": 0.3193359375, "eval_runtime": 2696.1288, "eval_samples_per_second": 0.832, "eval_steps_per_second": 0.026, "eval_wer": 5.221347828438531, "step": 5000 }, { "epoch": 1.937984496124031, "step": 5000, "total_flos": 1.087083891827713e+21, "train_loss": 3.86036796875, "train_runtime": 85811.7084, "train_samples_per_second": 3.729, "train_steps_per_second": 0.058 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.087083891827713e+21, "train_batch_size": 64, "trial_name": null, "trial_params": null }