{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7399813606710158, "eval_steps": 318, "global_step": 3176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023299161230195712, "grad_norm": 6.78125, "learning_rate": 2e-06, "loss": 0.7236, "step": 1 }, { "epoch": 0.023299161230195712, "grad_norm": 0.09033203125, "learning_rate": 0.0002, "loss": 0.3799, "step": 100 }, { "epoch": 0.046598322460391424, "grad_norm": 0.11865234375, "learning_rate": 0.0004, "loss": 0.2452, "step": 200 }, { "epoch": 0.06989748369058714, "grad_norm": 0.1396484375, "learning_rate": 0.0006, "loss": 0.2131, "step": 300 }, { "epoch": 0.07409133271202237, "eval_peoplespeech-clean-transcription_loss": 3.0843491554260254, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.6247, "eval_peoplespeech-clean-transcription_samples_per_second": 4.376, "eval_peoplespeech-clean-transcription_steps_per_second": 0.068, "step": 318 }, { "epoch": 0.09319664492078285, "grad_norm": 0.07421875, "learning_rate": 0.0008, "loss": 0.155, "step": 400 }, { "epoch": 0.11649580615097857, "grad_norm": 0.0576171875, "learning_rate": 0.001, "loss": 0.1083, "step": 500 }, { "epoch": 0.13979496738117428, "grad_norm": 0.04736328125, "learning_rate": 0.0012, "loss": 0.0926, "step": 600 }, { "epoch": 0.14818266542404473, "eval_peoplespeech-clean-transcription_loss": 1.7947263717651367, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 13.9513, "eval_peoplespeech-clean-transcription_samples_per_second": 4.587, "eval_peoplespeech-clean-transcription_steps_per_second": 0.072, "step": 636 }, { "epoch": 0.16309412861137, "grad_norm": 0.04150390625, "learning_rate": 0.0014, "loss": 0.0844, "step": 700 }, { "epoch": 0.1863932898415657, "grad_norm": 0.034912109375, "learning_rate": 0.0016, "loss": 0.0793, "step": 800 }, { "epoch": 0.2096924510717614, "grad_norm": 0.032958984375, "learning_rate": 0.0018000000000000002, "loss": 0.0758, "step": 900 }, { "epoch": 0.2222739981360671, "eval_peoplespeech-clean-transcription_loss": 1.7458150386810303, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.2202, "eval_peoplespeech-clean-transcription_samples_per_second": 4.501, "eval_peoplespeech-clean-transcription_steps_per_second": 0.07, "step": 954 }, { "epoch": 0.23299161230195714, "grad_norm": 0.03125, "learning_rate": 0.002, "loss": 0.0732, "step": 1000 }, { "epoch": 0.25629077353215285, "grad_norm": 0.02783203125, "learning_rate": 0.0019984487567773325, "loss": 0.0714, "step": 1100 }, { "epoch": 0.27958993476234856, "grad_norm": 0.0289306640625, "learning_rate": 0.0019938003745660765, "loss": 0.0691, "step": 1200 }, { "epoch": 0.29636533084808947, "eval_peoplespeech-clean-transcription_loss": 1.7118018865585327, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.7608, "eval_peoplespeech-clean-transcription_samples_per_second": 4.336, "eval_peoplespeech-clean-transcription_steps_per_second": 0.068, "step": 1272 }, { "epoch": 0.30288909599254427, "grad_norm": 0.027587890625, "learning_rate": 0.0019860708773026797, "loss": 0.0676, "step": 1300 }, { "epoch": 0.32618825722274, "grad_norm": 0.0257568359375, "learning_rate": 0.001975286910165463, "loss": 0.0657, "step": 1400 }, { "epoch": 0.3494874184529357, "grad_norm": 0.0244140625, "learning_rate": 0.0019614856477231713, "loss": 0.0648, "step": 1500 }, { "epoch": 0.3704566635601118, "eval_peoplespeech-clean-transcription_loss": 1.6898815631866455, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.5539, "eval_peoplespeech-clean-transcription_samples_per_second": 4.397, "eval_peoplespeech-clean-transcription_steps_per_second": 0.069, "step": 1590 }, { "epoch": 0.3727865796831314, "grad_norm": 0.0208740234375, "learning_rate": 0.0019447146657865335, "loss": 0.0641, "step": 1600 }, { "epoch": 0.3960857409133271, "grad_norm": 0.02587890625, "learning_rate": 0.001925031777404586, "loss": 0.0629, "step": 1700 }, { "epoch": 0.4193849021435228, "grad_norm": 0.0269775390625, "learning_rate": 0.0019025048335711181, "loss": 0.0623, "step": 1800 }, { "epoch": 0.4426840633737186, "grad_norm": 0.0208740234375, "learning_rate": 0.0018772114893282391, "loss": 0.0616, "step": 1900 }, { "epoch": 0.4445479962721342, "eval_peoplespeech-clean-transcription_loss": 1.6862739324569702, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.4561, "eval_peoplespeech-clean-transcription_samples_per_second": 4.427, "eval_peoplespeech-clean-transcription_steps_per_second": 0.069, "step": 1908 }, { "epoch": 0.4659832246039143, "grad_norm": 0.02099609375, "learning_rate": 0.0018492389360733603, "loss": 0.0612, "step": 2000 }, { "epoch": 0.48928238583411, "grad_norm": 0.0245361328125, "learning_rate": 0.0018186836009923876, "loss": 0.0605, "step": 2100 }, { "epoch": 0.5125815470643057, "grad_norm": 0.0206298828125, "learning_rate": 0.001785650814655233, "loss": 0.0603, "step": 2200 }, { "epoch": 0.5186393289841565, "eval_peoplespeech-clean-transcription_loss": 1.6496751308441162, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.7041, "eval_peoplespeech-clean-transcription_samples_per_second": 4.353, "eval_peoplespeech-clean-transcription_steps_per_second": 0.068, "step": 2226 }, { "epoch": 0.5358807082945014, "grad_norm": 0.022705078125, "learning_rate": 0.0017502544479195221, "loss": 0.0599, "step": 2300 }, { "epoch": 0.5591798695246971, "grad_norm": 0.0189208984375, "learning_rate": 0.0017126165193941567, "loss": 0.0591, "step": 2400 }, { "epoch": 0.5824790307548928, "grad_norm": 0.0208740234375, "learning_rate": 0.0016728667748158923, "loss": 0.0591, "step": 2500 }, { "epoch": 0.5927306616961789, "eval_peoplespeech-clean-transcription_loss": 1.6663990020751953, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.7316, "eval_peoplespeech-clean-transcription_samples_per_second": 4.344, "eval_peoplespeech-clean-transcription_steps_per_second": 0.068, "step": 2544 }, { "epoch": 0.6057781919850885, "grad_norm": 0.021728515625, "learning_rate": 0.0016311422397889072, "loss": 0.0585, "step": 2600 }, { "epoch": 0.6290773532152842, "grad_norm": 0.022216796875, "learning_rate": 0.0015875867474291552, "loss": 0.0582, "step": 2700 }, { "epoch": 0.65237651444548, "grad_norm": 0.0189208984375, "learning_rate": 0.0015423504425418212, "loss": 0.0574, "step": 2800 }, { "epoch": 0.6668219944082013, "eval_peoplespeech-clean-transcription_loss": 1.619414210319519, "eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, "eval_peoplespeech-clean-transcription_runtime": 14.4777, "eval_peoplespeech-clean-transcription_samples_per_second": 4.421, "eval_peoplespeech-clean-transcription_steps_per_second": 0.069, "step": 2862 }, { "epoch": 0.6756756756756757, "grad_norm": 0.0191650390625, "learning_rate": 0.0014955892640410716, "loss": 0.057, "step": 2900 }, { "epoch": 0.6989748369058714, "grad_norm": 0.017822265625, "learning_rate": 0.0014474644073963134, "loss": 0.0573, "step": 3000 }, { "epoch": 0.7222739981360671, "grad_norm": 0.02001953125, "learning_rate": 0.0013981417689580185, "loss": 0.0572, "step": 3100 } ], "logging_steps": 100, "max_steps": 6350, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1588, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.537292414638621e+18, "train_batch_size": 672, "trial_name": null, "trial_params": null }