{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9995835068721366, "eval_steps": 240, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00041649312786339027, "grad_norm": 7.0, "learning_rate": 2e-06, "loss": 0.7314, "step": 1 }, { "epoch": 0.04164931278633902, "grad_norm": 0.0712890625, "learning_rate": 0.0002, "loss": 0.377, "step": 100 }, { "epoch": 0.08329862557267805, "grad_norm": 0.12109375, "learning_rate": 0.0004, "loss": 0.2401, "step": 200 }, { "epoch": 0.09995835068721366, "eval_peoplespeech-clean-transcription_loss": 3.7064812183380127, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 14.4755, "eval_peoplespeech-clean-transcription_samples_per_second": 4.421, "eval_peoplespeech-clean-transcription_steps_per_second": 0.069, "step": 240 }, { "epoch": 0.12494793835901707, "grad_norm": 0.10791015625, "learning_rate": 0.0006, "loss": 0.209, "step": 300 }, { "epoch": 0.1665972511453561, "grad_norm": 0.08740234375, "learning_rate": 0.0008, "loss": 0.1586, "step": 400 }, { "epoch": 0.19991670137442732, "eval_peoplespeech-clean-transcription_loss": 2.0534801483154297, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.9793, "eval_peoplespeech-clean-transcription_samples_per_second": 4.578, "eval_peoplespeech-clean-transcription_steps_per_second": 0.072, "step": 480 }, { "epoch": 0.20824656393169513, "grad_norm": 0.059814453125, "learning_rate": 0.001, "loss": 0.1178, "step": 500 }, { "epoch": 0.24989587671803415, "grad_norm": 0.05126953125, "learning_rate": 0.0012, "loss": 0.1031, "step": 600 }, { "epoch": 0.2915451895043732, "grad_norm": 0.04443359375, "learning_rate": 0.0014, "loss": 0.0942, "step": 700 }, { "epoch": 0.299875052061641, "eval_peoplespeech-clean-transcription_loss": 1.931348443031311, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 14.1549, "eval_peoplespeech-clean-transcription_samples_per_second": 4.521, "eval_peoplespeech-clean-transcription_steps_per_second": 0.071, "step": 720 }, { "epoch": 0.3331945022907122, "grad_norm": 0.040283203125, "learning_rate": 0.0016, "loss": 0.0885, "step": 800 }, { "epoch": 0.3748438150770512, "grad_norm": 0.035888671875, "learning_rate": 0.0018000000000000002, "loss": 0.0853, "step": 900 }, { "epoch": 0.39983340274885465, "eval_peoplespeech-clean-transcription_loss": 1.8942928314208984, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.7985, "eval_peoplespeech-clean-transcription_samples_per_second": 4.638, "eval_peoplespeech-clean-transcription_steps_per_second": 0.072, "step": 960 }, { "epoch": 0.41649312786339027, "grad_norm": 0.033203125, "learning_rate": 0.002, "loss": 0.0817, "step": 1000 }, { "epoch": 0.45814244064972925, "grad_norm": 0.03173828125, "learning_rate": 0.001996926043706003, "loss": 0.0798, "step": 1100 }, { "epoch": 0.4997917534360683, "grad_norm": 0.029296875, "learning_rate": 0.0019877251730624503, "loss": 0.0777, "step": 1200 }, { "epoch": 0.4997917534360683, "eval_peoplespeech-clean-transcription_loss": 1.863105297088623, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.1079, "eval_peoplespeech-clean-transcription_samples_per_second": 4.883, "eval_peoplespeech-clean-transcription_steps_per_second": 0.076, "step": 1200 }, { "epoch": 0.5414410662224073, "grad_norm": 0.028076171875, "learning_rate": 0.0019724602393453973, "loss": 0.0761, "step": 1300 }, { "epoch": 0.5830903790087464, "grad_norm": 0.0277099609375, "learning_rate": 0.001951235517530571, "loss": 0.0744, "step": 1400 }, { "epoch": 0.599750104123282, "eval_peoplespeech-clean-transcription_loss": 1.8575688600540161, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 14.2046, "eval_peoplespeech-clean-transcription_samples_per_second": 4.506, "eval_peoplespeech-clean-transcription_steps_per_second": 0.07, "step": 1440 }, { "epoch": 0.6247396917950854, "grad_norm": 0.02490234375, "learning_rate": 0.0019241959939895518, "loss": 0.073, "step": 1500 }, { "epoch": 0.6663890045814244, "grad_norm": 0.0263671875, "learning_rate": 0.0018915263760858401, "loss": 0.071, "step": 1600 }, { "epoch": 0.6997084548104956, "eval_peoplespeech-clean-transcription_loss": 1.83821439743042, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.7329, "eval_peoplespeech-clean-transcription_samples_per_second": 4.66, "eval_peoplespeech-clean-transcription_steps_per_second": 0.073, "step": 1680 }, { "epoch": 0.7080383173677635, "grad_norm": 0.0252685546875, "learning_rate": 0.0018534498304362756, "loss": 0.0704, "step": 1700 }, { "epoch": 0.7496876301541024, "grad_norm": 0.027587890625, "learning_rate": 0.0018102264584567542, "loss": 0.0693, "step": 1800 }, { "epoch": 0.7913369429404414, "grad_norm": 0.0264892578125, "learning_rate": 0.0017621515196058187, "loss": 0.0679, "step": 1900 }, { "epoch": 0.7996668054977093, "eval_peoplespeech-clean-transcription_loss": 1.835157036781311, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.3808, "eval_peoplespeech-clean-transcription_samples_per_second": 4.783, "eval_peoplespeech-clean-transcription_steps_per_second": 0.075, "step": 1920 }, { "epoch": 0.8329862557267805, "grad_norm": 0.02294921875, "learning_rate": 0.001709553414463167, "loss": 0.0678, "step": 2000 }, { "epoch": 0.8746355685131195, "grad_norm": 0.0220947265625, "learning_rate": 0.0016527914414207012, "loss": 0.0669, "step": 2100 }, { "epoch": 0.899625156184923, "eval_peoplespeech-clean-transcription_loss": 1.822905421257019, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.1096, "eval_peoplespeech-clean-transcription_samples_per_second": 4.882, "eval_peoplespeech-clean-transcription_steps_per_second": 0.076, "step": 2160 }, { "epoch": 0.9162848812994585, "grad_norm": 0.0244140625, "learning_rate": 0.0015922533423101844, "loss": 0.0659, "step": 2200 }, { "epoch": 0.9579341940857976, "grad_norm": 0.0255126953125, "learning_rate": 0.0015283526537333662, "loss": 0.0658, "step": 2300 }, { "epoch": 0.9995835068721366, "grad_norm": 0.0225830078125, "learning_rate": 0.0014615258821876727, "loss": 0.0653, "step": 2400 }, { "epoch": 0.9995835068721366, "eval_peoplespeech-clean-transcription_loss": 1.8253135681152344, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0062, "eval_peoplespeech-clean-transcription_runtime": 13.4347, "eval_peoplespeech-clean-transcription_samples_per_second": 4.764, "eval_peoplespeech-clean-transcription_steps_per_second": 0.074, "step": 2400 } ], "logging_steps": 100, "max_steps": 4800, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.590876232926888e+18, "train_batch_size": 576, "trial_name": null, "trial_params": null }