{ "best_metric": 4.152973175048828, "best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/fr_clm/wikipedia_30/checkpoint-16000", "epoch": 17.176596886741816, "eval_steps": 2000, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.1470746108427265, "eval_loss": 7.138043403625488, "eval_runtime": 0.8415, "eval_samples_per_second": 1273.93, "eval_steps_per_second": 79.621, "step": 2000 }, { "epoch": 4.294149221685453, "grad_norm": 1.455039381980896, "learning_rate": 1e-05, "loss": 7.213, "step": 4000 }, { "epoch": 4.294149221685453, "eval_loss": 5.8541717529296875, "eval_runtime": 0.7753, "eval_samples_per_second": 1382.676, "eval_steps_per_second": 86.417, "step": 4000 }, { "epoch": 6.4412238325281805, "eval_loss": 5.403579235076904, "eval_runtime": 0.7542, "eval_samples_per_second": 1421.437, "eval_steps_per_second": 88.84, "step": 6000 }, { "epoch": 8.588298443370906, "grad_norm": 3.117489814758301, "learning_rate": 1.9997500000000003e-05, "loss": 5.4304, "step": 8000 }, { "epoch": 8.588298443370906, "eval_loss": 5.049880504608154, "eval_runtime": 0.7597, "eval_samples_per_second": 1411.127, "eval_steps_per_second": 88.195, "step": 8000 }, { "epoch": 10.735373054213634, "eval_loss": 4.760603427886963, "eval_runtime": 0.7624, "eval_samples_per_second": 1406.053, "eval_steps_per_second": 87.878, "step": 10000 }, { "epoch": 12.882447665056361, "grad_norm": 2.9511141777038574, "learning_rate": 2.9995e-05, "loss": 4.771, "step": 12000 }, { "epoch": 12.882447665056361, "eval_loss": 4.517208576202393, "eval_runtime": 0.764, "eval_samples_per_second": 1403.097, "eval_steps_per_second": 87.694, "step": 12000 }, { "epoch": 15.029522275899087, "eval_loss": 4.3206024169921875, "eval_runtime": 0.7595, "eval_samples_per_second": 1411.435, "eval_steps_per_second": 88.215, "step": 14000 }, { "epoch": 17.176596886741816, "grad_norm": 2.955515146255493, "learning_rate": 3.999e-05, "loss": 4.2888, "step": 16000 }, { "epoch": 17.176596886741816, "eval_loss": 4.152973175048828, "eval_runtime": 0.8247, "eval_samples_per_second": 1299.91, "eval_steps_per_second": 81.244, "step": 16000 } ], "logging_steps": 4000, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 108, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4135961232408576.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }