|
{ |
|
"best_metric": 0.8032710280373832, |
|
"best_model_checkpoint": "resnet-152-finetuned-cassava-leaf-disease/checkpoint-703", |
|
"epoch": 29.690721649484537, |
|
"eval_steps": 500, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 7.3336, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 7.2158, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.0014018691588785046, |
|
"eval_loss": 6.815145015716553, |
|
"eval_runtime": 8.158, |
|
"eval_samples_per_second": 262.318, |
|
"eval_steps_per_second": 1.348, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 6.9635, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 6.6136, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_accuracy": 0.046261682242990654, |
|
"eval_loss": 5.699448108673096, |
|
"eval_runtime": 8.3972, |
|
"eval_samples_per_second": 254.846, |
|
"eval_steps_per_second": 1.31, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 6.1712, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 5.5789, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 4.7064, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.45560747663551404, |
|
"eval_loss": 3.352895975112915, |
|
"eval_runtime": 7.9592, |
|
"eval_samples_per_second": 268.872, |
|
"eval_steps_per_second": 1.382, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 3.8038, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 3.0888, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5920560747663551, |
|
"eval_loss": 2.1475517749786377, |
|
"eval_runtime": 8.0136, |
|
"eval_samples_per_second": 267.048, |
|
"eval_steps_per_second": 1.373, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.783950617283951e-05, |
|
"loss": 2.5659, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.70679012345679e-05, |
|
"loss": 2.1887, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 1.8458, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.6285046728971962, |
|
"eval_loss": 1.588365077972412, |
|
"eval_runtime": 8.4217, |
|
"eval_samples_per_second": 254.105, |
|
"eval_steps_per_second": 1.306, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 4.5524691358024696e-05, |
|
"loss": 1.6147, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.4753086419753084e-05, |
|
"loss": 1.4168, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_accuracy": 0.6588785046728972, |
|
"eval_loss": 1.2460048198699951, |
|
"eval_runtime": 7.9049, |
|
"eval_samples_per_second": 270.718, |
|
"eval_steps_per_second": 1.392, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 1.271, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 1.1951, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.6869158878504673, |
|
"eval_loss": 1.0658469200134277, |
|
"eval_runtime": 7.9886, |
|
"eval_samples_per_second": 267.881, |
|
"eval_steps_per_second": 1.377, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.243827160493827e-05, |
|
"loss": 1.1222, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.0719, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 4.089506172839506e-05, |
|
"loss": 0.9871, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7037383177570093, |
|
"eval_loss": 0.9436482191085815, |
|
"eval_runtime": 8.7539, |
|
"eval_samples_per_second": 244.461, |
|
"eval_steps_per_second": 1.257, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 0.9461, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.9247, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.7238317757009346, |
|
"eval_loss": 0.8639922738075256, |
|
"eval_runtime": 8.532, |
|
"eval_samples_per_second": 250.821, |
|
"eval_steps_per_second": 1.289, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 3.8580246913580246e-05, |
|
"loss": 0.8806, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 3.780864197530865e-05, |
|
"loss": 0.8467, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.826, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_accuracy": 0.7317757009345794, |
|
"eval_loss": 0.7976669073104858, |
|
"eval_runtime": 7.7765, |
|
"eval_samples_per_second": 275.189, |
|
"eval_steps_per_second": 1.415, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 3.626543209876543e-05, |
|
"loss": 0.79, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 3.5493827160493834e-05, |
|
"loss": 0.7964, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.7509345794392523, |
|
"eval_loss": 0.7539175152778625, |
|
"eval_runtime": 7.6095, |
|
"eval_samples_per_second": 281.226, |
|
"eval_steps_per_second": 1.446, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.7405, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 3.395061728395062e-05, |
|
"loss": 0.7471, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 3.317901234567901e-05, |
|
"loss": 0.7168, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7514018691588785, |
|
"eval_loss": 0.7179247140884399, |
|
"eval_runtime": 8.7034, |
|
"eval_samples_per_second": 245.882, |
|
"eval_steps_per_second": 1.264, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.7166, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 3.16358024691358e-05, |
|
"loss": 0.6893, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.7630841121495328, |
|
"eval_loss": 0.6952459812164307, |
|
"eval_runtime": 8.8433, |
|
"eval_samples_per_second": 241.991, |
|
"eval_steps_per_second": 1.244, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.6942, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.687, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_accuracy": 0.764018691588785, |
|
"eval_loss": 0.6812998652458191, |
|
"eval_runtime": 8.1933, |
|
"eval_samples_per_second": 261.19, |
|
"eval_steps_per_second": 1.343, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 2.9320987654320992e-05, |
|
"loss": 0.6621, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 2.8549382716049384e-05, |
|
"loss": 0.6721, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6496, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.7691588785046729, |
|
"eval_loss": 0.6577351689338684, |
|
"eval_runtime": 7.8611, |
|
"eval_samples_per_second": 272.227, |
|
"eval_steps_per_second": 1.399, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 2.700617283950617e-05, |
|
"loss": 0.6403, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 2.623456790123457e-05, |
|
"loss": 0.6477, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7813084112149533, |
|
"eval_loss": 0.6433616280555725, |
|
"eval_runtime": 7.6827, |
|
"eval_samples_per_second": 278.548, |
|
"eval_steps_per_second": 1.432, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.6452, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.6371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 2.3919753086419755e-05, |
|
"loss": 0.5992, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.7771028037383177, |
|
"eval_loss": 0.6388670802116394, |
|
"eval_runtime": 8.24, |
|
"eval_samples_per_second": 259.708, |
|
"eval_steps_per_second": 1.335, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.6016, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 2.2376543209876542e-05, |
|
"loss": 0.6059, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"eval_accuracy": 0.7906542056074767, |
|
"eval_loss": 0.623566746711731, |
|
"eval_runtime": 8.516, |
|
"eval_samples_per_second": 251.293, |
|
"eval_steps_per_second": 1.292, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 2.1604938271604937e-05, |
|
"loss": 0.6031, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.6126, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 2.006172839506173e-05, |
|
"loss": 0.5993, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.7934579439252336, |
|
"eval_loss": 0.6181445717811584, |
|
"eval_runtime": 7.8023, |
|
"eval_samples_per_second": 274.277, |
|
"eval_steps_per_second": 1.41, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 1.9290123456790123e-05, |
|
"loss": 0.5872, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.5802, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7911214953271029, |
|
"eval_loss": 0.611458957195282, |
|
"eval_runtime": 8.4234, |
|
"eval_samples_per_second": 254.054, |
|
"eval_steps_per_second": 1.306, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 1.7746913580246917e-05, |
|
"loss": 0.6008, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 1.697530864197531e-05, |
|
"loss": 0.5905, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.7929906542056074, |
|
"eval_loss": 0.6005003452301025, |
|
"eval_runtime": 7.7149, |
|
"eval_samples_per_second": 277.386, |
|
"eval_steps_per_second": 1.426, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.5861, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 0.5865, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 1.4660493827160496e-05, |
|
"loss": 0.5825, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"eval_accuracy": 0.7953271028037383, |
|
"eval_loss": 0.5945016145706177, |
|
"eval_runtime": 7.5452, |
|
"eval_samples_per_second": 283.623, |
|
"eval_steps_per_second": 1.458, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.5732, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 22.68, |
|
"learning_rate": 1.3117283950617285e-05, |
|
"loss": 0.5837, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.7981308411214953, |
|
"eval_loss": 0.5882120132446289, |
|
"eval_runtime": 8.1333, |
|
"eval_samples_per_second": 263.116, |
|
"eval_steps_per_second": 1.352, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.5791, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 23.51, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.5783, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 1.0802469135802469e-05, |
|
"loss": 0.5501, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8023364485981308, |
|
"eval_loss": 0.5847517251968384, |
|
"eval_runtime": 8.0927, |
|
"eval_samples_per_second": 264.435, |
|
"eval_steps_per_second": 1.359, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 24.33, |
|
"learning_rate": 1.0030864197530866e-05, |
|
"loss": 0.5841, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 24.74, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.5593, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.7962616822429907, |
|
"eval_loss": 0.5861061811447144, |
|
"eval_runtime": 7.5867, |
|
"eval_samples_per_second": 282.074, |
|
"eval_steps_per_second": 1.45, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 25.15, |
|
"learning_rate": 8.487654320987654e-06, |
|
"loss": 0.5526, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"learning_rate": 7.71604938271605e-06, |
|
"loss": 0.5792, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.5454, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"eval_accuracy": 0.8014018691588785, |
|
"eval_loss": 0.5776079893112183, |
|
"eval_runtime": 7.665, |
|
"eval_samples_per_second": 279.191, |
|
"eval_steps_per_second": 1.435, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 26.39, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.5477, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 5.401234567901234e-06, |
|
"loss": 0.5656, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.7976635514018692, |
|
"eval_loss": 0.579258143901825, |
|
"eval_runtime": 7.9989, |
|
"eval_samples_per_second": 267.536, |
|
"eval_steps_per_second": 1.375, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 27.22, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.5546, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 27.63, |
|
"learning_rate": 3.858024691358025e-06, |
|
"loss": 0.5708, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8004672897196262, |
|
"eval_loss": 0.5814595818519592, |
|
"eval_runtime": 8.2466, |
|
"eval_samples_per_second": 259.502, |
|
"eval_steps_per_second": 1.334, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"learning_rate": 3.0864197530864196e-06, |
|
"loss": 0.5505, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 28.45, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.5565, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 28.87, |
|
"learning_rate": 1.5432098765432098e-06, |
|
"loss": 0.5541, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.8032710280373832, |
|
"eval_loss": 0.5749428868293762, |
|
"eval_runtime": 8.685, |
|
"eval_samples_per_second": 246.402, |
|
"eval_steps_per_second": 1.267, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 7.716049382716049e-07, |
|
"loss": 0.5511, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"learning_rate": 0.0, |
|
"loss": 0.5439, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"eval_accuracy": 0.7995327102803739, |
|
"eval_loss": 0.5757396221160889, |
|
"eval_runtime": 7.6093, |
|
"eval_samples_per_second": 281.236, |
|
"eval_steps_per_second": 1.446, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"step": 720, |
|
"total_flos": 3.1088341164957254e+19, |
|
"train_loss": 1.3989773021803962, |
|
"train_runtime": 2686.7823, |
|
"train_samples_per_second": 215.019, |
|
"train_steps_per_second": 0.268 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 3.1088341164957254e+19, |
|
"train_batch_size": 200, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|