{ "best_metric": 0.893532776066872, "best_model_checkpoint": "vit-msn-small-wbc-classifier-0316-cleaned-dataset-10/checkpoint-288", "epoch": 25.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.625, "grad_norm": 5.568078994750977, "learning_rate": 1.25e-05, "loss": 1.5193, "step": 10 }, { "epoch": 1.0, "eval_accuracy": 0.7945446546414431, "eval_loss": 0.6822578310966492, "eval_runtime": 7.3634, "eval_samples_per_second": 308.688, "eval_steps_per_second": 4.889, "step": 16 }, { "epoch": 1.25, "grad_norm": 8.283280372619629, "learning_rate": 2.5e-05, "loss": 0.7595, "step": 20 }, { "epoch": 1.875, "grad_norm": 17.42099952697754, "learning_rate": 3.7500000000000003e-05, "loss": 0.5339, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.8438187417509899, "eval_loss": 0.45528778433799744, "eval_runtime": 7.4331, "eval_samples_per_second": 305.795, "eval_steps_per_second": 4.843, "step": 32 }, { "epoch": 2.5, "grad_norm": 13.5732421875, "learning_rate": 5e-05, "loss": 0.4778, "step": 40 }, { "epoch": 3.0, "eval_accuracy": 0.847778266608007, "eval_loss": 0.45250508189201355, "eval_runtime": 7.3291, "eval_samples_per_second": 310.135, "eval_steps_per_second": 4.912, "step": 48 }, { "epoch": 3.125, "grad_norm": 13.014479637145996, "learning_rate": 4.8611111111111115e-05, "loss": 0.4966, "step": 50 }, { "epoch": 3.75, "grad_norm": 3.9621634483337402, "learning_rate": 4.722222222222222e-05, "loss": 0.4253, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.8473383194016718, "eval_loss": 0.40767061710357666, "eval_runtime": 7.3487, "eval_samples_per_second": 309.305, "eval_steps_per_second": 4.899, "step": 64 }, { "epoch": 4.375, "grad_norm": 6.071442127227783, "learning_rate": 4.5833333333333334e-05, "loss": 0.3969, "step": 70 }, { "epoch": 5.0, "grad_norm": 14.56364917755127, "learning_rate": 4.4444444444444447e-05, "loss": 0.4086, "step": 80 }, { "epoch": 5.0, "eval_accuracy": 0.8574571051473823, "eval_loss": 0.42175111174583435, "eval_runtime": 7.398, "eval_samples_per_second": 307.245, "eval_steps_per_second": 4.866, "step": 80 }, { "epoch": 5.625, "grad_norm": 6.646561145782471, "learning_rate": 4.305555555555556e-05, "loss": 0.3673, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.8693356797184338, "eval_loss": 0.400217205286026, "eval_runtime": 7.3916, "eval_samples_per_second": 307.511, "eval_steps_per_second": 4.87, "step": 96 }, { "epoch": 6.25, "grad_norm": 5.845401287078857, "learning_rate": 4.166666666666667e-05, "loss": 0.3728, "step": 100 }, { "epoch": 6.875, "grad_norm": 5.991316318511963, "learning_rate": 4.027777777777778e-05, "loss": 0.3275, "step": 110 }, { "epoch": 7.0, "eval_accuracy": 0.8772547294324681, "eval_loss": 0.33016717433929443, "eval_runtime": 7.3331, "eval_samples_per_second": 309.966, "eval_steps_per_second": 4.909, "step": 112 }, { "epoch": 7.5, "grad_norm": 5.736546993255615, "learning_rate": 3.888888888888889e-05, "loss": 0.3231, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.8803343598768147, "eval_loss": 0.36715179681777954, "eval_runtime": 7.3393, "eval_samples_per_second": 309.704, "eval_steps_per_second": 4.905, "step": 128 }, { "epoch": 8.125, "grad_norm": 6.285913467407227, "learning_rate": 3.7500000000000003e-05, "loss": 0.3186, "step": 130 }, { "epoch": 8.75, "grad_norm": 5.252928733825684, "learning_rate": 3.611111111111111e-05, "loss": 0.302, "step": 140 }, { "epoch": 9.0, "eval_accuracy": 0.8900131984161901, "eval_loss": 0.33626171946525574, "eval_runtime": 7.373, "eval_samples_per_second": 308.286, "eval_steps_per_second": 4.883, "step": 144 }, { "epoch": 9.375, "grad_norm": 6.006847381591797, "learning_rate": 3.472222222222222e-05, "loss": 0.3, "step": 150 }, { "epoch": 10.0, "grad_norm": 6.0960493087768555, "learning_rate": 3.3333333333333335e-05, "loss": 0.3122, "step": 160 }, { "epoch": 10.0, "eval_accuracy": 0.884293884733832, "eval_loss": 0.32835376262664795, "eval_runtime": 7.4834, "eval_samples_per_second": 303.741, "eval_steps_per_second": 4.811, "step": 160 }, { "epoch": 10.625, "grad_norm": 6.523850440979004, "learning_rate": 3.194444444444444e-05, "loss": 0.2686, "step": 170 }, { "epoch": 11.0, "eval_accuracy": 0.8873735151781786, "eval_loss": 0.3317248523235321, "eval_runtime": 7.541, "eval_samples_per_second": 301.418, "eval_steps_per_second": 4.774, "step": 176 }, { "epoch": 11.25, "grad_norm": 4.121973514556885, "learning_rate": 3.055555555555556e-05, "loss": 0.2776, "step": 180 }, { "epoch": 11.875, "grad_norm": 4.277699947357178, "learning_rate": 2.916666666666667e-05, "loss": 0.2786, "step": 190 }, { "epoch": 12.0, "eval_accuracy": 0.8882534095908491, "eval_loss": 0.3660268783569336, "eval_runtime": 7.455, "eval_samples_per_second": 304.896, "eval_steps_per_second": 4.829, "step": 192 }, { "epoch": 12.5, "grad_norm": 5.525660514831543, "learning_rate": 2.777777777777778e-05, "loss": 0.2338, "step": 200 }, { "epoch": 13.0, "eval_accuracy": 0.8834139903211614, "eval_loss": 0.35196566581726074, "eval_runtime": 7.4309, "eval_samples_per_second": 305.884, "eval_steps_per_second": 4.845, "step": 208 }, { "epoch": 13.125, "grad_norm": 3.3051507472991943, "learning_rate": 2.6388888888888892e-05, "loss": 0.2226, "step": 210 }, { "epoch": 13.75, "grad_norm": 5.123415946960449, "learning_rate": 2.5e-05, "loss": 0.2466, "step": 220 }, { "epoch": 14.0, "eval_accuracy": 0.8895732512098549, "eval_loss": 0.34136760234832764, "eval_runtime": 7.2795, "eval_samples_per_second": 312.245, "eval_steps_per_second": 4.945, "step": 224 }, { "epoch": 14.375, "grad_norm": 7.256677627563477, "learning_rate": 2.361111111111111e-05, "loss": 0.2279, "step": 230 }, { "epoch": 15.0, "grad_norm": 5.926828384399414, "learning_rate": 2.2222222222222223e-05, "loss": 0.2296, "step": 240 }, { "epoch": 15.0, "eval_accuracy": 0.8873735151781786, "eval_loss": 0.35307401418685913, "eval_runtime": 7.3954, "eval_samples_per_second": 307.352, "eval_steps_per_second": 4.868, "step": 240 }, { "epoch": 15.625, "grad_norm": 10.099089622497559, "learning_rate": 2.0833333333333336e-05, "loss": 0.1961, "step": 250 }, { "epoch": 16.0, "eval_accuracy": 0.8847338319401672, "eval_loss": 0.38436347246170044, "eval_runtime": 7.4905, "eval_samples_per_second": 303.453, "eval_steps_per_second": 4.806, "step": 256 }, { "epoch": 16.25, "grad_norm": 6.454414367675781, "learning_rate": 1.9444444444444445e-05, "loss": 0.2159, "step": 260 }, { "epoch": 16.875, "grad_norm": 7.058192729949951, "learning_rate": 1.8055555555555555e-05, "loss": 0.2056, "step": 270 }, { "epoch": 17.0, "eval_accuracy": 0.8900131984161901, "eval_loss": 0.3704770803451538, "eval_runtime": 7.5318, "eval_samples_per_second": 301.788, "eval_steps_per_second": 4.78, "step": 272 }, { "epoch": 17.5, "grad_norm": 8.302024841308594, "learning_rate": 1.6666666666666667e-05, "loss": 0.197, "step": 280 }, { "epoch": 18.0, "eval_accuracy": 0.893532776066872, "eval_loss": 0.35377147793769836, "eval_runtime": 7.2947, "eval_samples_per_second": 311.598, "eval_steps_per_second": 4.935, "step": 288 }, { "epoch": 18.125, "grad_norm": 5.322263240814209, "learning_rate": 1.527777777777778e-05, "loss": 0.2136, "step": 290 }, { "epoch": 18.75, "grad_norm": 5.138127326965332, "learning_rate": 1.388888888888889e-05, "loss": 0.1748, "step": 300 }, { "epoch": 19.0, "eval_accuracy": 0.8886933567971843, "eval_loss": 0.37168198823928833, "eval_runtime": 7.471, "eval_samples_per_second": 304.243, "eval_steps_per_second": 4.819, "step": 304 }, { "epoch": 19.375, "grad_norm": 5.1360249519348145, "learning_rate": 1.25e-05, "loss": 0.1676, "step": 310 }, { "epoch": 20.0, "grad_norm": 3.917973041534424, "learning_rate": 1.1111111111111112e-05, "loss": 0.1807, "step": 320 }, { "epoch": 20.0, "eval_accuracy": 0.884293884733832, "eval_loss": 0.40747764706611633, "eval_runtime": 7.4744, "eval_samples_per_second": 304.105, "eval_steps_per_second": 4.816, "step": 320 }, { "epoch": 20.625, "grad_norm": 4.329443454742432, "learning_rate": 9.722222222222223e-06, "loss": 0.177, "step": 330 }, { "epoch": 21.0, "eval_accuracy": 0.8829740431148262, "eval_loss": 0.38811179995536804, "eval_runtime": 7.4018, "eval_samples_per_second": 307.088, "eval_steps_per_second": 4.864, "step": 336 }, { "epoch": 21.25, "grad_norm": 5.614075183868408, "learning_rate": 8.333333333333334e-06, "loss": 0.17, "step": 340 }, { "epoch": 21.875, "grad_norm": 4.243983745574951, "learning_rate": 6.944444444444445e-06, "loss": 0.1433, "step": 350 }, { "epoch": 22.0, "eval_accuracy": 0.8856137263528376, "eval_loss": 0.40139684081077576, "eval_runtime": 7.4287, "eval_samples_per_second": 305.975, "eval_steps_per_second": 4.846, "step": 352 }, { "epoch": 22.5, "grad_norm": 3.7538909912109375, "learning_rate": 5.555555555555556e-06, "loss": 0.1522, "step": 360 }, { "epoch": 23.0, "eval_accuracy": 0.8873735151781786, "eval_loss": 0.3918473422527313, "eval_runtime": 7.5833, "eval_samples_per_second": 299.738, "eval_steps_per_second": 4.747, "step": 368 }, { "epoch": 23.125, "grad_norm": 5.43955659866333, "learning_rate": 4.166666666666667e-06, "loss": 0.1608, "step": 370 }, { "epoch": 23.75, "grad_norm": 3.6251299381256104, "learning_rate": 2.777777777777778e-06, "loss": 0.1322, "step": 380 }, { "epoch": 24.0, "eval_accuracy": 0.8904531456225253, "eval_loss": 0.4198566973209381, "eval_runtime": 7.4829, "eval_samples_per_second": 303.759, "eval_steps_per_second": 4.811, "step": 384 }, { "epoch": 24.375, "grad_norm": 4.068058013916016, "learning_rate": 1.388888888888889e-06, "loss": 0.1224, "step": 390 }, { "epoch": 25.0, "grad_norm": 4.5933332443237305, "learning_rate": 0.0, "loss": 0.1396, "step": 400 }, { "epoch": 25.0, "eval_accuracy": 0.8895732512098549, "eval_loss": 0.4142039120197296, "eval_runtime": 7.6009, "eval_samples_per_second": 299.044, "eval_steps_per_second": 4.736, "step": 400 }, { "epoch": 25.0, "step": 400, "total_flos": 2.0038784309526528e+18, "train_loss": 0.3093862909078598, "train_runtime": 987.5731, "train_samples_per_second": 103.689, "train_steps_per_second": 0.405 } ], "logging_steps": 10, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0038784309526528e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }