{ "best_metric": 0.5373075008392334, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.15873015873015872, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007936507936507937, "eval_loss": 6.810345649719238, "eval_runtime": 28.5873, "eval_samples_per_second": 13.922, "eval_steps_per_second": 3.498, "step": 1 }, { "epoch": 0.007936507936507936, "grad_norm": 12.51639175415039, "learning_rate": 5.095e-06, "loss": 4.7296, "step": 10 }, { "epoch": 0.015873015873015872, "grad_norm": 11.45109748840332, "learning_rate": 1.019e-05, "loss": 5.4391, "step": 20 }, { "epoch": 0.023809523809523808, "grad_norm": 6.5851359367370605, "learning_rate": 9.623888888888889e-06, "loss": 4.6711, "step": 30 }, { "epoch": 0.031746031746031744, "grad_norm": 9.905716896057129, "learning_rate": 9.057777777777777e-06, "loss": 5.4699, "step": 40 }, { "epoch": 0.03968253968253968, "grad_norm": 14.33770751953125, "learning_rate": 8.491666666666667e-06, "loss": 5.3696, "step": 50 }, { "epoch": 0.03968253968253968, "eval_loss": 4.328246593475342, "eval_runtime": 28.9921, "eval_samples_per_second": 13.728, "eval_steps_per_second": 3.449, "step": 50 }, { "epoch": 0.047619047619047616, "grad_norm": 5.936036109924316, "learning_rate": 7.925555555555557e-06, "loss": 2.5798, "step": 60 }, { "epoch": 0.05555555555555555, "grad_norm": 8.582554817199707, "learning_rate": 7.359444444444445e-06, "loss": 2.2367, "step": 70 }, { "epoch": 0.06349206349206349, "grad_norm": 12.786343574523926, "learning_rate": 6.793333333333333e-06, "loss": 1.8391, "step": 80 }, { "epoch": 0.07142857142857142, "grad_norm": 22.405052185058594, "learning_rate": 6.227222222222223e-06, "loss": 1.3989, "step": 90 }, { "epoch": 0.07936507936507936, "grad_norm": 16.31127166748047, "learning_rate": 5.661111111111112e-06, "loss": 1.4442, "step": 100 }, { "epoch": 0.07936507936507936, "eval_loss": 0.9730060696601868, "eval_runtime": 29.0537, "eval_samples_per_second": 13.699, "eval_steps_per_second": 3.442, "step": 100 }, { "epoch": 0.0873015873015873, "grad_norm": 11.517756462097168, "learning_rate": 5.095e-06, "loss": 0.7625, "step": 110 }, { "epoch": 0.09523809523809523, "grad_norm": 8.872756004333496, "learning_rate": 4.5288888888888885e-06, "loss": 0.5658, "step": 120 }, { "epoch": 0.10317460317460317, "grad_norm": 11.381665229797363, "learning_rate": 3.9627777777777784e-06, "loss": 0.5982, "step": 130 }, { "epoch": 0.1111111111111111, "grad_norm": 28.924938201904297, "learning_rate": 3.3966666666666666e-06, "loss": 0.4895, "step": 140 }, { "epoch": 0.11904761904761904, "grad_norm": 9.414434432983398, "learning_rate": 2.830555555555556e-06, "loss": 0.737, "step": 150 }, { "epoch": 0.11904761904761904, "eval_loss": 0.582314670085907, "eval_runtime": 29.0563, "eval_samples_per_second": 13.698, "eval_steps_per_second": 3.442, "step": 150 }, { "epoch": 0.12698412698412698, "grad_norm": 9.03739070892334, "learning_rate": 2.2644444444444443e-06, "loss": 0.4166, "step": 160 }, { "epoch": 0.1349206349206349, "grad_norm": 5.134159088134766, "learning_rate": 1.6983333333333333e-06, "loss": 0.3406, "step": 170 }, { "epoch": 0.14285714285714285, "grad_norm": 7.450616836547852, "learning_rate": 1.1322222222222221e-06, "loss": 0.4627, "step": 180 }, { "epoch": 0.15079365079365079, "grad_norm": 12.882534980773926, "learning_rate": 5.661111111111111e-07, "loss": 0.5223, "step": 190 }, { "epoch": 0.15873015873015872, "grad_norm": 17.285032272338867, "learning_rate": 0.0, "loss": 0.6207, "step": 200 }, { "epoch": 0.15873015873015872, "eval_loss": 0.5373075008392334, "eval_runtime": 29.0493, "eval_samples_per_second": 13.701, "eval_steps_per_second": 3.442, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.27255432331264e+16, "train_batch_size": 6, "trial_name": null, "trial_params": null }