| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.957983193277311, | |
| "eval_steps": 500, | |
| "global_step": 177, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16806722689075632, | |
| "grad_norm": 1.64210754465846, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.8834, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.33613445378151263, | |
| "grad_norm": 1.6216633128454179, | |
| "learning_rate": 9.99609654676786e-06, | |
| "loss": 0.832, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5042016806722689, | |
| "grad_norm": 1.0890026008565015, | |
| "learning_rate": 9.860114570402055e-06, | |
| "loss": 0.7929, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6722689075630253, | |
| "grad_norm": 0.9852931548778409, | |
| "learning_rate": 9.535012074008688e-06, | |
| "loss": 0.7613, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8403361344537815, | |
| "grad_norm": 1.0010829198190254, | |
| "learning_rate": 9.033439696227966e-06, | |
| "loss": 0.7369, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.9912119135873784, | |
| "learning_rate": 8.374915007591053e-06, | |
| "loss": 0.6853, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.1680672268907564, | |
| "grad_norm": 0.9598316653212201, | |
| "learning_rate": 7.58506302778873e-06, | |
| "loss": 0.6288, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.3361344537815127, | |
| "grad_norm": 0.9969526888332303, | |
| "learning_rate": 6.694619085176159e-06, | |
| "loss": 0.5971, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.504201680672269, | |
| "grad_norm": 5.712426354685314, | |
| "learning_rate": 5.738232820012407e-06, | |
| "loss": 0.5633, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.6722689075630253, | |
| "grad_norm": 0.8053877541293872, | |
| "learning_rate": 4.753119870981486e-06, | |
| "loss": 0.5586, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.8403361344537816, | |
| "grad_norm": 0.9085557253419428, | |
| "learning_rate": 3.777613711607087e-06, | |
| "loss": 0.5633, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9125623622985397, | |
| "learning_rate": 2.8496739886173994e-06, | |
| "loss": 0.5249, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.168067226890756, | |
| "grad_norm": 0.9796403145730364, | |
| "learning_rate": 2.005409406946e-06, | |
| "loss": 0.4774, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.3361344537815127, | |
| "grad_norm": 0.8379009664030831, | |
| "learning_rate": 1.277672640004936e-06, | |
| "loss": 0.4569, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.504201680672269, | |
| "grad_norm": 0.912579146579012, | |
| "learning_rate": 6.947819411632223e-07, | |
| "loss": 0.4598, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.6722689075630255, | |
| "grad_norm": 0.8570497984949336, | |
| "learning_rate": 2.7941920206915443e-07, | |
| "loss": 0.4533, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.8403361344537816, | |
| "grad_norm": 0.913025927962694, | |
| "learning_rate": 4.774733741942206e-08, | |
| "loss": 0.4608, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.957983193277311, | |
| "step": 177, | |
| "total_flos": 92447203917824.0, | |
| "train_loss": 0.6085002004763501, | |
| "train_runtime": 1135.371, | |
| "train_samples_per_second": 20.124, | |
| "train_steps_per_second": 0.156 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 177, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 92447203917824.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |