|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1097390493942219, |
|
"eval_steps": 318, |
|
"global_step": 4764, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00023299161230195712, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 2e-06, |
|
"loss": 0.7236, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.023299161230195712, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3799, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.046598322460391424, |
|
"grad_norm": 0.11865234375, |
|
"learning_rate": 0.0004, |
|
"loss": 0.2452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06989748369058714, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 0.0006, |
|
"loss": 0.2131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07409133271202237, |
|
"eval_peoplespeech-clean-transcription_loss": 3.0843491554260254, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.6247, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.376, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.09319664492078285, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 0.0008, |
|
"loss": 0.155, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11649580615097857, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 0.001, |
|
"loss": 0.1083, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13979496738117428, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 0.0012, |
|
"loss": 0.0926, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14818266542404473, |
|
"eval_peoplespeech-clean-transcription_loss": 1.7947263717651367, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 13.9513, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.587, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.16309412861137, |
|
"grad_norm": 0.04150390625, |
|
"learning_rate": 0.0014, |
|
"loss": 0.0844, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1863932898415657, |
|
"grad_norm": 0.034912109375, |
|
"learning_rate": 0.0016, |
|
"loss": 0.0793, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2096924510717614, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 0.0018000000000000002, |
|
"loss": 0.0758, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2222739981360671, |
|
"eval_peoplespeech-clean-transcription_loss": 1.7458150386810303, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.2202, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.501, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.07, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.23299161230195714, |
|
"grad_norm": 0.03125, |
|
"learning_rate": 0.002, |
|
"loss": 0.0732, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25629077353215285, |
|
"grad_norm": 0.02783203125, |
|
"learning_rate": 0.0019984487567773325, |
|
"loss": 0.0714, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.27958993476234856, |
|
"grad_norm": 0.0289306640625, |
|
"learning_rate": 0.0019938003745660765, |
|
"loss": 0.0691, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29636533084808947, |
|
"eval_peoplespeech-clean-transcription_loss": 1.7118018865585327, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.7608, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.336, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.30288909599254427, |
|
"grad_norm": 0.027587890625, |
|
"learning_rate": 0.0019860708773026797, |
|
"loss": 0.0676, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.32618825722274, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 0.001975286910165463, |
|
"loss": 0.0657, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3494874184529357, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 0.0019614856477231713, |
|
"loss": 0.0648, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3704566635601118, |
|
"eval_peoplespeech-clean-transcription_loss": 1.6898815631866455, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.5539, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.397, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3727865796831314, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 0.0019447146657865335, |
|
"loss": 0.0641, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3960857409133271, |
|
"grad_norm": 0.02587890625, |
|
"learning_rate": 0.001925031777404586, |
|
"loss": 0.0629, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4193849021435228, |
|
"grad_norm": 0.0269775390625, |
|
"learning_rate": 0.0019025048335711181, |
|
"loss": 0.0623, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4426840633737186, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 0.0018772114893282391, |
|
"loss": 0.0616, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4445479962721342, |
|
"eval_peoplespeech-clean-transcription_loss": 1.6862739324569702, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.4561, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.427, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 0.4659832246039143, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 0.0018492389360733603, |
|
"loss": 0.0612, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48928238583411, |
|
"grad_norm": 0.0245361328125, |
|
"learning_rate": 0.0018186836009923876, |
|
"loss": 0.0605, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5125815470643057, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 0.001785650814655233, |
|
"loss": 0.0603, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5186393289841565, |
|
"eval_peoplespeech-clean-transcription_loss": 1.6496751308441162, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.7041, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.353, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 0.5358807082945014, |
|
"grad_norm": 0.022705078125, |
|
"learning_rate": 0.0017502544479195221, |
|
"loss": 0.0599, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5591798695246971, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 0.0017126165193941567, |
|
"loss": 0.0591, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5824790307548928, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 0.0016728667748158923, |
|
"loss": 0.0591, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5927306616961789, |
|
"eval_peoplespeech-clean-transcription_loss": 1.6663990020751953, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.7316, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.344, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 0.6057781919850885, |
|
"grad_norm": 0.021728515625, |
|
"learning_rate": 0.0016311422397889072, |
|
"loss": 0.0585, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6290773532152842, |
|
"grad_norm": 0.022216796875, |
|
"learning_rate": 0.0015875867474291552, |
|
"loss": 0.0582, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.65237651444548, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 0.0015423504425418212, |
|
"loss": 0.0574, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6668219944082013, |
|
"eval_peoplespeech-clean-transcription_loss": 1.619414210319519, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.4777, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.421, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 0.0191650390625, |
|
"learning_rate": 0.0014955892640410716, |
|
"loss": 0.057, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6989748369058714, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 0.0014474644073963134, |
|
"loss": 0.0573, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7222739981360671, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 0.0013981417689580185, |
|
"loss": 0.0572, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7409133271202236, |
|
"eval_peoplespeech-clean-transcription_loss": 1.6249196529388428, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.2587, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.488, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.07, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7455731593662628, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 0.0013477913740786338, |
|
"loss": 0.0565, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7688723205964585, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 0.0012965867909999716, |
|
"loss": 0.0559, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7921714818266542, |
|
"grad_norm": 0.020263671875, |
|
"learning_rate": 0.0012447045325275216, |
|
"loss": 0.056, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.815004659832246, |
|
"eval_peoplespeech-clean-transcription_loss": 1.623028039932251, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.4831, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.419, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.069, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 0.8154706430568499, |
|
"grad_norm": 0.01806640625, |
|
"learning_rate": 0.0011923234475542434, |
|
"loss": 0.0556, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8387698042870456, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 0.001139624104531383, |
|
"loss": 0.056, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 0.0010867881690116152, |
|
"loss": 0.0552, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8853681267474371, |
|
"grad_norm": 0.0191650390625, |
|
"learning_rate": 0.0010339977774102467, |
|
"loss": 0.0548, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.8890959925442684, |
|
"eval_peoplespeech-clean-transcription_loss": 1.6235145330429077, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.6909, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.356, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 0.9086672879776329, |
|
"grad_norm": 0.0299072265625, |
|
"learning_rate": 0.0009814349091432633, |
|
"loss": 0.0546, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9319664492078286, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 0.0009292807593065831, |
|
"loss": 0.0545, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9552656104380243, |
|
"grad_norm": 0.0166015625, |
|
"learning_rate": 0.0008777151140590245, |
|
"loss": 0.0547, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9631873252562908, |
|
"eval_peoplespeech-clean-transcription_loss": 1.598435640335083, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.2932, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.478, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.07, |
|
"step": 4134 |
|
}, |
|
{ |
|
"epoch": 0.97856477166822, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 0.0008269157308621709, |
|
"loss": 0.0546, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.0016309412861137, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 0.0007770577257135778, |
|
"loss": 0.0539, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.0249301025163093, |
|
"grad_norm": 0.02197265625, |
|
"learning_rate": 0.0007283129694856508, |
|
"loss": 0.0523, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.0370456663560113, |
|
"eval_peoplespeech-clean-transcription_loss": 1.6100133657455444, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.1344, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.528, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.071, |
|
"step": 4452 |
|
}, |
|
{ |
|
"epoch": 1.0482292637465052, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 0.0006808494954511429, |
|
"loss": 0.0538, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0715284249767008, |
|
"grad_norm": 0.0184326171875, |
|
"learning_rate": 0.0006348309200376554, |
|
"loss": 0.0538, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.0948275862068966, |
|
"grad_norm": 0.0177001953125, |
|
"learning_rate": 0.0005904158788079147, |
|
"loss": 0.0533, |
|
"step": 4700 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 6350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1588, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.306252788078215e+18, |
|
"train_batch_size": 672, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|