|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9955555555555555, |
|
"eval_steps": 500, |
|
"global_step": 161, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006183574879227053, |
|
"grad_norm": 6.14696354757804, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3449, |
|
"mean_token_accuracy": 0.915102468803525, |
|
"num_tokens": 536845.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06183574879227053, |
|
"grad_norm": 0.25824490458409005, |
|
"learning_rate": 0.0001, |
|
"loss": 0.255, |
|
"mean_token_accuracy": 0.91935970261693, |
|
"num_tokens": 5420782.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12367149758454106, |
|
"grad_norm": 0.12207035150262055, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1607, |
|
"mean_token_accuracy": 0.9383567951619625, |
|
"num_tokens": 10836749.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1855072463768116, |
|
"grad_norm": 0.0931360663742762, |
|
"learning_rate": 0.0001, |
|
"loss": 0.14, |
|
"mean_token_accuracy": 0.945984673500061, |
|
"num_tokens": 16250383.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24734299516908212, |
|
"grad_norm": 0.08260645299611312, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1294, |
|
"mean_token_accuracy": 0.9498019333928823, |
|
"num_tokens": 21694294.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30917874396135264, |
|
"grad_norm": 0.09270504271704304, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1231, |
|
"mean_token_accuracy": 0.952541628666222, |
|
"num_tokens": 27089618.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3710144927536232, |
|
"grad_norm": 0.09892330366085841, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1173, |
|
"mean_token_accuracy": 0.9548029892146588, |
|
"num_tokens": 32497984.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43285024154589374, |
|
"grad_norm": 0.09081855122666112, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1129, |
|
"mean_token_accuracy": 0.9565541172400117, |
|
"num_tokens": 37913108.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49468599033816424, |
|
"grad_norm": 0.08744357034359414, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1093, |
|
"mean_token_accuracy": 0.9580830905586482, |
|
"num_tokens": 43329106.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5565217391304348, |
|
"grad_norm": 0.09900086305146383, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1064, |
|
"mean_token_accuracy": 0.9591556312516332, |
|
"num_tokens": 48742706.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6183574879227053, |
|
"grad_norm": 0.10897858928578598, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1047, |
|
"mean_token_accuracy": 0.9600882643833757, |
|
"num_tokens": 54145465.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6801932367149759, |
|
"grad_norm": 0.10086205886710188, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1012, |
|
"mean_token_accuracy": 0.9615297164767981, |
|
"num_tokens": 59564822.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7420289855072464, |
|
"grad_norm": 0.09201974209736791, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0995, |
|
"mean_token_accuracy": 0.9620782939717174, |
|
"num_tokens": 64974349.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8038647342995169, |
|
"grad_norm": 0.10194015491082806, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0981, |
|
"mean_token_accuracy": 0.9625154824927449, |
|
"num_tokens": 70366254.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8657004830917875, |
|
"grad_norm": 0.09022409759364422, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0972, |
|
"mean_token_accuracy": 0.9628898801282049, |
|
"num_tokens": 75762074.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.927536231884058, |
|
"grad_norm": 0.08192921340517849, |
|
"learning_rate": 0.0001, |
|
"loss": 0.095, |
|
"mean_token_accuracy": 0.963659605011344, |
|
"num_tokens": 81167608.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9893719806763285, |
|
"grad_norm": 0.08255379291969749, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0943, |
|
"mean_token_accuracy": 0.9640055214986205, |
|
"num_tokens": 86563355.0, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 644, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 233965155254272.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|