{ "best_metric": 0.9175407198197038, "best_model_checkpoint": "./saved_models/roberta_sbdh_gpt4_v2_0/checkpoint-792", "epoch": 33.0, "eval_steps": 500, "global_step": 792, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.3806931972503662, "learning_rate": 2.5e-06, "loss": 0.6134, "step": 24 }, { "epoch": 1.0, "eval_acc_macro": 0.10215572903385978, "eval_acc_micro": 0.17855121675155125, "eval_auc_macro": 0.6100041638713678, "eval_auc_micro": 0.6464031657743947, "eval_f1_at_5": 0.2718421152169764, "eval_f1_at_8": 0.22628042555556532, "eval_f1_macro": 0.1715749294146787, "eval_f1_micro": 0.30300120048017753, "eval_loss": 0.4604409337043762, "eval_prec_at_5": 0.17442922374429226, "eval_prec_at_8": 0.1329908675799087, "eval_prec_macro": 0.1401339887841245, "eval_prec_micro": 0.2132477188239198, "eval_rec_at_5": 0.6156773211567731, "eval_rec_at_8": 0.7579908675799086, "eval_rec_macro": 0.36147047478751165, "eval_rec_micro": 0.5232172470978007, "eval_runtime": 3.307, "eval_samples_per_second": 264.896, "eval_steps_per_second": 33.263, "step": 24 }, { "epoch": 2.0, "grad_norm": 0.42557960748672485, "learning_rate": 5e-06, "loss": 0.4178, "step": 48 }, { "epoch": 2.0, "eval_acc_macro": 0.054161895439312095, "eval_acc_micro": 0.18999494694289085, "eval_auc_macro": 0.777689574836493, "eval_auc_micro": 0.6837933158193559, "eval_f1_at_5": 0.2980163960623497, "eval_f1_at_8": 0.2355215334598575, "eval_f1_macro": 0.08160871746824805, "eval_f1_micro": 0.31932059447980304, "eval_loss": 0.35270196199417114, "eval_prec_at_5": 0.19109589041095887, "eval_prec_at_8": 0.13855593607305935, "eval_prec_macro": 0.057278901143779114, "eval_prec_micro": 0.32724107919927525, "eval_rec_at_5": 0.6765601217656013, "eval_rec_at_8": 0.784627092846271, "eval_rec_macro": 0.14846491228063083, "eval_rec_micro": 0.3117744610281665, "eval_runtime": 3.2254, "eval_samples_per_second": 271.594, "eval_steps_per_second": 34.104, "step": 48 }, { "epoch": 3.0, "grad_norm": 0.21710661053657532, "learning_rate": 7.500000000000001e-06, "loss": 0.3548, "step": 72 }, { "epoch": 3.0, "eval_acc_macro": 0.08172631395735021, "eval_acc_micro": 0.2394548994159481, "eval_auc_macro": 0.8898708262347639, "eval_auc_micro": 0.7531371635981566, "eval_f1_at_5": 0.3273134867709266, "eval_f1_at_8": 0.2656356996119607, "eval_f1_macro": 0.10962566844915878, "eval_f1_micro": 0.3863874345549333, "eval_loss": 0.3343456983566284, "eval_prec_at_5": 0.20958904109589044, "eval_prec_at_8": 0.15625, "eval_prec_macro": 0.08775419624473717, "eval_prec_micro": 0.5241477272726528, "eval_rec_at_5": 0.746765601217656, "eval_rec_at_8": 0.8856544901065448, "eval_rec_macro": 0.14795321637419723, "eval_rec_micro": 0.30597014925370597, "eval_runtime": 3.2799, "eval_samples_per_second": 267.084, "eval_steps_per_second": 33.538, "step": 72 }, { "epoch": 4.0, "grad_norm": 0.26009050011634827, "learning_rate": 1e-05, "loss": 0.3315, "step": 96 }, { "epoch": 4.0, "eval_acc_macro": 0.2205899892707659, "eval_acc_micro": 0.31238003838770095, "eval_auc_macro": 0.9150780627599414, "eval_auc_micro": 0.8412487048958944, "eval_f1_at_5": 0.36996556565347266, "eval_f1_at_8": 0.27616272202971137, "eval_f1_macro": 0.32075046434231563, "eval_f1_micro": 0.4760511882997823, "eval_loss": 0.30232226848602295, "eval_prec_at_5": 0.23675799086757995, "eval_prec_at_8": 0.16238584474885845, "eval_prec_macro": 0.534140737674402, "eval_prec_micro": 0.4257684761281605, "eval_rec_at_5": 0.8458904109589042, "eval_rec_at_8": 0.9225646879756468, "eval_rec_macro": 0.37458794170416176, "eval_rec_micro": 0.5398009950248308, "eval_runtime": 3.2394, "eval_samples_per_second": 270.421, "eval_steps_per_second": 33.957, "step": 96 }, { "epoch": 5.0, "grad_norm": 0.37228670716285706, "learning_rate": 9.722222222222223e-06, "loss": 0.2947, "step": 120 }, { "epoch": 5.0, "eval_acc_macro": 0.4283107268415045, "eval_acc_micro": 0.5237793278376333, "eval_auc_macro": 0.944198889889774, "eval_auc_micro": 0.9267578754982164, "eval_f1_at_5": 0.4096518568367693, "eval_f1_at_8": 0.29017455585500507, "eval_f1_macro": 0.539018158110013, "eval_f1_micro": 0.6874739908447202, "eval_loss": 0.25655001401901245, "eval_prec_at_5": 0.2618721461187215, "eval_prec_at_8": 0.17051940639269406, "eval_prec_macro": 0.5661692193127564, "eval_prec_micro": 0.6900584795321061, "eval_rec_at_5": 0.9402587519025875, "eval_rec_at_8": 0.9727929984779301, "eval_rec_macro": 0.564018051489352, "eval_rec_micro": 0.6849087893863445, "eval_runtime": 3.287, "eval_samples_per_second": 266.502, "eval_steps_per_second": 33.465, "step": 120 }, { "epoch": 6.0, "grad_norm": 0.2922113835811615, "learning_rate": 9.444444444444445e-06, "loss": 0.255, "step": 144 }, { "epoch": 6.0, "eval_acc_macro": 0.5136685674094098, "eval_acc_micro": 0.6013651877132695, "eval_auc_macro": 0.9633338119361814, "eval_auc_micro": 0.9605888281923002, "eval_f1_at_5": 0.42005389856494574, "eval_f1_at_8": 0.29155799132187266, "eval_f1_macro": 0.6233454431290059, "eval_f1_micro": 0.7510656436486999, "eval_loss": 0.21982233226299286, "eval_prec_at_5": 0.2687214611872146, "eval_prec_at_8": 0.1713755707762557, "eval_prec_macro": 0.7586900162484641, "eval_prec_micro": 0.7728070175437918, "eval_rec_at_5": 0.9615677321156773, "eval_rec_at_8": 0.976027397260274, "eval_rec_macro": 0.6250160961046504, "eval_rec_micro": 0.7305140961856774, "eval_runtime": 3.2407, "eval_samples_per_second": 270.308, "eval_steps_per_second": 33.943, "step": 144 }, { "epoch": 7.0, "grad_norm": 0.6698095202445984, "learning_rate": 9.166666666666666e-06, "loss": 0.2228, "step": 168 }, { "epoch": 7.0, "eval_acc_macro": 0.6303220555404457, "eval_acc_micro": 0.6847290640393606, "eval_auc_macro": 0.9734325874763358, "eval_auc_micro": 0.9748103810769206, "eval_f1_at_5": 0.42340560395061977, "eval_f1_at_8": 0.2920219218917755, "eval_f1_macro": 0.7458516061176264, "eval_f1_micro": 0.8128654970759555, "eval_loss": 0.19146637618541718, "eval_prec_at_5": 0.27100456621004565, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.86104613287547, "eval_prec_micro": 0.8190235690235, "eval_rec_at_5": 0.9674657534246576, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.7345526249297327, "eval_rec_micro": 0.806799336650016, "eval_runtime": 3.3221, "eval_samples_per_second": 263.692, "eval_steps_per_second": 33.112, "step": 168 }, { "epoch": 8.0, "grad_norm": 0.3862050771713257, "learning_rate": 8.888888888888888e-06, "loss": 0.1968, "step": 192 }, { "epoch": 8.0, "eval_acc_macro": 0.6911445121462139, "eval_acc_micro": 0.7442922374428657, "eval_auc_macro": 0.9794839519003108, "eval_auc_micro": 0.9822416144793619, "eval_f1_at_5": 0.4247570125938748, "eval_f1_at_8": 0.291789958878004, "eval_f1_macro": 0.7968110573167572, "eval_f1_micro": 0.853403141361182, "eval_loss": 0.16935397684574127, "eval_prec_at_5": 0.2719178082191781, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.9142914075642903, "eval_prec_micro": 0.9005524861877623, "eval_rec_at_5": 0.9699391171993911, "eval_rec_at_8": 0.9765981735159818, "eval_rec_macro": 0.7524753915565773, "eval_rec_micro": 0.8109452736317735, "eval_runtime": 3.3593, "eval_samples_per_second": 260.765, "eval_steps_per_second": 32.745, "step": 192 }, { "epoch": 9.0, "grad_norm": 0.3425958752632141, "learning_rate": 8.611111111111112e-06, "loss": 0.1756, "step": 216 }, { "epoch": 9.0, "eval_acc_macro": 0.7538851024227721, "eval_acc_micro": 0.7733711048158092, "eval_auc_macro": 0.984468975401199, "eval_auc_micro": 0.9869544212457306, "eval_f1_at_5": 0.42679308597884263, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.8539710926649278, "eval_f1_micro": 0.8722044728433809, "eval_loss": 0.15195928514003754, "eval_prec_at_5": 0.2732876712328767, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.8477325049832564, "eval_prec_micro": 0.8412942989213528, "eval_rec_at_5": 0.973744292237443, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.8733258072194854, "eval_rec_micro": 0.9054726368158453, "eval_runtime": 3.3296, "eval_samples_per_second": 263.093, "eval_steps_per_second": 33.037, "step": 216 }, { "epoch": 10.0, "grad_norm": 0.3327239453792572, "learning_rate": 8.333333333333334e-06, "loss": 0.1578, "step": 240 }, { "epoch": 10.0, "eval_acc_macro": 0.7658814025380632, "eval_acc_micro": 0.7931292008961319, "eval_auc_macro": 0.9863575312994878, "eval_auc_micro": 0.9888696784007464, "eval_f1_at_5": 0.42745954575018524, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.8608080570922364, "eval_f1_micro": 0.8846314035817673, "eval_loss": 0.13785400986671448, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.8916528884446233, "eval_prec_micro": 0.8887029288702185, "eval_rec_at_5": 0.9748858447488584, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.8473101584524304, "eval_rec_micro": 0.8805970149253001, "eval_runtime": 3.3169, "eval_samples_per_second": 264.104, "eval_steps_per_second": 33.164, "step": 240 }, { "epoch": 11.0, "grad_norm": 0.4798758029937744, "learning_rate": 8.055555555555557e-06, "loss": 0.1437, "step": 264 }, { "epoch": 11.0, "eval_acc_macro": 0.7817093750541505, "eval_acc_micro": 0.8088347296267472, "eval_auc_macro": 0.987966835700279, "eval_auc_micro": 0.990288011194119, "eval_f1_at_5": 0.4281808547568474, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.8718351509550711, "eval_f1_micro": 0.8943157894736088, "eval_loss": 0.1261390894651413, "eval_prec_at_5": 0.2742009132420091, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9059016261714952, "eval_prec_micro": 0.9084687767321721, "eval_rec_at_5": 0.9765981735159818, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.852231468930321, "eval_rec_micro": 0.8805970149253001, "eval_runtime": 3.1998, "eval_samples_per_second": 273.768, "eval_steps_per_second": 34.377, "step": 264 }, { "epoch": 12.0, "grad_norm": 0.5915816426277161, "learning_rate": 7.77777777777778e-06, "loss": 0.1315, "step": 288 }, { "epoch": 12.0, "eval_acc_macro": 0.8067338258553937, "eval_acc_micro": 0.8225075528700285, "eval_auc_macro": 0.9890198593241079, "eval_auc_micro": 0.9912727714675422, "eval_f1_at_5": 0.42745954575018524, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.8889792204408117, "eval_f1_micro": 0.9026108578532198, "eval_loss": 0.11689846962690353, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.8957261916294993, "eval_prec_micro": 0.9022369511184007, "eval_rec_at_5": 0.9748858447488584, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.8884178007276748, "eval_rec_micro": 0.9029850746267908, "eval_runtime": 3.2003, "eval_samples_per_second": 273.728, "eval_steps_per_second": 34.372, "step": 288 }, { "epoch": 13.0, "grad_norm": 0.492089182138443, "learning_rate": 7.500000000000001e-06, "loss": 0.1215, "step": 312 }, { "epoch": 13.0, "eval_acc_macro": 0.8126972127948302, "eval_acc_micro": 0.8267363704256291, "eval_auc_macro": 0.9893756572609064, "eval_auc_micro": 0.9916252607583189, "eval_f1_at_5": 0.42745954575018524, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.8933532206974574, "eval_f1_micro": 0.9051512673752325, "eval_loss": 0.10864967107772827, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.8891547780046246, "eval_prec_micro": 0.892741935483799, "eval_rec_at_5": 0.9748858447488584, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.9014409945020224, "eval_rec_micro": 0.9179104477611179, "eval_runtime": 3.3389, "eval_samples_per_second": 262.36, "eval_steps_per_second": 32.945, "step": 312 }, { "epoch": 14.0, "grad_norm": 0.3663789927959442, "learning_rate": 7.222222222222223e-06, "loss": 0.1129, "step": 336 }, { "epoch": 14.0, "eval_acc_macro": 0.8228971770857018, "eval_acc_micro": 0.8329588014980649, "eval_auc_macro": 0.9899641824346502, "eval_auc_micro": 0.9921304716477787, "eval_f1_at_5": 0.4281991387591175, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9004325486077462, "eval_f1_micro": 0.9088680016345804, "eval_loss": 0.10234559327363968, "eval_prec_at_5": 0.2742009132420091, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.890683762800314, "eval_prec_micro": 0.8960515713133846, "eval_rec_at_5": 0.9767884322678843, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9120840636750266, "eval_rec_micro": 0.9220563847428754, "eval_runtime": 3.2169, "eval_samples_per_second": 272.316, "eval_steps_per_second": 34.195, "step": 336 }, { "epoch": 15.0, "grad_norm": 0.37758299708366394, "learning_rate": 6.944444444444445e-06, "loss": 0.1046, "step": 360 }, { "epoch": 15.0, "eval_acc_macro": 0.8302977681001992, "eval_acc_micro": 0.8413059984813333, "eval_auc_macro": 0.9901671140838944, "eval_auc_micro": 0.9919789083809407, "eval_f1_at_5": 0.42784762619628447, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.9052032341883497, "eval_f1_micro": 0.9138144329896153, "eval_loss": 0.09653711318969727, "eval_prec_at_5": 0.273972602739726, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.9028285099698952, "eval_prec_micro": 0.9089417555372511, "eval_rec_at_5": 0.976027397260274, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.9099411755814296, "eval_rec_micro": 0.9187396351574694, "eval_runtime": 3.2472, "eval_samples_per_second": 269.767, "eval_steps_per_second": 33.875, "step": 360 }, { "epoch": 16.0, "grad_norm": 0.4621961712837219, "learning_rate": 6.666666666666667e-06, "loss": 0.0979, "step": 384 }, { "epoch": 16.0, "eval_acc_macro": 0.8273587016423724, "eval_acc_micro": 0.8396299151888327, "eval_auc_macro": 0.989802544326232, "eval_auc_micro": 0.9918006143792109, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2920473967500829, "eval_f1_macro": 0.9028525753108699, "eval_f1_micro": 0.9128248113997558, "eval_loss": 0.09215801954269409, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.9158518703241821, "eval_prec_micro": 0.9228813559321252, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.9777397260273972, "eval_rec_macro": 0.8926981936861532, "eval_rec_micro": 0.9029850746267908, "eval_runtime": 3.233, "eval_samples_per_second": 270.955, "eval_steps_per_second": 34.024, "step": 384 }, { "epoch": 17.0, "grad_norm": 0.4058220386505127, "learning_rate": 6.3888888888888885e-06, "loss": 0.092, "step": 408 }, { "epoch": 17.0, "eval_acc_macro": 0.8347539263761662, "eval_acc_micro": 0.8474708171205566, "eval_auc_macro": 0.9899894858470043, "eval_auc_micro": 0.9920498339308542, "eval_f1_at_5": 0.4275326584009282, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9071161229466114, "eval_f1_micro": 0.9174389216511443, "eval_loss": 0.08766299486160278, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9264703324172373, "eval_prec_micro": 0.9323630136985502, "eval_rec_at_5": 0.9756468797564688, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.8904830078050127, "eval_rec_micro": 0.9029850746267908, "eval_runtime": 3.2006, "eval_samples_per_second": 273.696, "eval_steps_per_second": 34.368, "step": 408 }, { "epoch": 18.0, "grad_norm": 0.4305408000946045, "learning_rate": 6.111111111111112e-06, "loss": 0.0863, "step": 432 }, { "epoch": 18.0, "eval_acc_macro": 0.8361202794579444, "eval_acc_micro": 0.8485316846985433, "eval_auc_macro": 0.9904755972939142, "eval_auc_micro": 0.9926707889023967, "eval_f1_at_5": 0.4278476261962846, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9082261738031537, "eval_f1_micro": 0.9180602006688194, "eval_loss": 0.08403860032558441, "eval_prec_at_5": 0.27397260273972607, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9194984843968411, "eval_prec_micro": 0.9258010118043064, "eval_rec_at_5": 0.976027397260274, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.8989752524534329, "eval_rec_micro": 0.9104477611939543, "eval_runtime": 3.2293, "eval_samples_per_second": 271.27, "eval_steps_per_second": 34.064, "step": 432 }, { "epoch": 19.0, "grad_norm": 0.3943960964679718, "learning_rate": 5.833333333333334e-06, "loss": 0.0814, "step": 456 }, { "epoch": 19.0, "eval_acc_macro": 0.839106311750719, "eval_acc_micro": 0.8515684774291621, "eval_auc_macro": 0.990108145855512, "eval_auc_micro": 0.992045690666946, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9099447887675026, "eval_f1_micro": 0.9198347107437256, "eval_loss": 0.08095283061265945, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9089344385619288, "eval_prec_micro": 0.9168039538714237, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.912673576698828, "eval_rec_micro": 0.922885572139227, "eval_runtime": 3.2592, "eval_samples_per_second": 268.78, "eval_steps_per_second": 33.751, "step": 456 }, { "epoch": 20.0, "grad_norm": 0.40355971455574036, "learning_rate": 5.555555555555557e-06, "loss": 0.077, "step": 480 }, { "epoch": 20.0, "eval_acc_macro": 0.841293209326155, "eval_acc_micro": 0.8523335883702484, "eval_auc_macro": 0.9901090394728352, "eval_auc_micro": 0.9921565786655233, "eval_f1_at_5": 0.42712632039330145, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9118247332403943, "eval_f1_micro": 0.9202808756711341, "eval_loss": 0.07820397615432739, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9128408681170823, "eval_prec_micro": 0.9168724279834636, "eval_rec_at_5": 0.9743150684931506, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.913025214091804, "eval_rec_micro": 0.9237147595355785, "eval_runtime": 3.335, "eval_samples_per_second": 262.673, "eval_steps_per_second": 32.984, "step": 480 }, { "epoch": 21.0, "grad_norm": 0.4673054814338684, "learning_rate": 5.2777777777777785e-06, "loss": 0.0732, "step": 504 }, { "epoch": 21.0, "eval_acc_macro": 0.8435745126219377, "eval_acc_micro": 0.8546017014693847, "eval_auc_macro": 0.990212670611912, "eval_auc_micro": 0.9923085874446093, "eval_f1_at_5": 0.4278476261962846, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9131099703856408, "eval_f1_micro": 0.9216013344452942, "eval_loss": 0.07635616511106491, "eval_prec_at_5": 0.27397260273972607, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9220960220647866, "eval_prec_micro": 0.9270134228187141, "eval_rec_at_5": 0.976027397260274, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9063401498441861, "eval_rec_micro": 0.9162520729684148, "eval_runtime": 3.335, "eval_samples_per_second": 262.666, "eval_steps_per_second": 32.983, "step": 504 }, { "epoch": 22.0, "grad_norm": 0.4005274772644043, "learning_rate": 5e-06, "loss": 0.0693, "step": 528 }, { "epoch": 22.0, "eval_acc_macro": 0.8455313120389342, "eval_acc_micro": 0.8565950920244741, "eval_auc_macro": 0.9901732890859408, "eval_auc_micro": 0.9921753792823973, "eval_f1_at_5": 0.42745954575018524, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9143994666596676, "eval_f1_micro": 0.9227591904171067, "eval_loss": 0.07342522591352463, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9139300975502155, "eval_prec_micro": 0.9193415637859326, "eval_rec_at_5": 0.9748858447488584, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9161172406072337, "eval_rec_micro": 0.9262023217246329, "eval_runtime": 3.2711, "eval_samples_per_second": 267.802, "eval_steps_per_second": 33.628, "step": 528 }, { "epoch": 23.0, "grad_norm": 0.45327135920524597, "learning_rate": 4.722222222222222e-06, "loss": 0.0661, "step": 552 }, { "epoch": 23.0, "eval_acc_macro": 0.8467210056386675, "eval_acc_micro": 0.8575827559660617, "eval_auc_macro": 0.990201788357871, "eval_auc_micro": 0.9922312465183218, "eval_f1_at_5": 0.42745954575018524, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9148197613459009, "eval_f1_micro": 0.9233319519269851, "eval_loss": 0.07164816558361053, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9172647910903643, "eval_prec_micro": 0.9229494614746543, "eval_rec_at_5": 0.9748858447488584, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9139228248292297, "eval_rec_micro": 0.9237147595355785, "eval_runtime": 3.2794, "eval_samples_per_second": 267.12, "eval_steps_per_second": 33.542, "step": 552 }, { "epoch": 24.0, "grad_norm": 0.44867074489593506, "learning_rate": 4.444444444444444e-06, "loss": 0.0626, "step": 576 }, { "epoch": 24.0, "eval_acc_macro": 0.8443607544183838, "eval_acc_micro": 0.8557098765431438, "eval_auc_macro": 0.9902370599717555, "eval_auc_micro": 0.992274817616196, "eval_f1_at_5": 0.42712632039330145, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9135513991863947, "eval_f1_micro": 0.9222453222452455, "eval_loss": 0.07073331624269485, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9185147069172412, "eval_prec_micro": 0.9249374478731505, "eval_rec_at_5": 0.9743150684931506, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9094868029221347, "eval_rec_micro": 0.9195688225538209, "eval_runtime": 3.2386, "eval_samples_per_second": 270.487, "eval_steps_per_second": 33.965, "step": 576 }, { "epoch": 25.0, "grad_norm": 0.509304940700531, "learning_rate": 4.166666666666667e-06, "loss": 0.06, "step": 600 }, { "epoch": 25.0, "eval_acc_macro": 0.8405381328154341, "eval_acc_micro": 0.8539238539237876, "eval_auc_macro": 0.9899142306185617, "eval_auc_micro": 0.9919983327149624, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2920473967500829, "eval_f1_macro": 0.91134605843459, "eval_f1_micro": 0.9212070410728481, "eval_loss": 0.06922697275876999, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.9243865602122843, "eval_prec_micro": 0.9313559322033109, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.9777397260273972, "eval_rec_macro": 0.8997359146591782, "eval_rec_micro": 0.9112769485903058, "eval_runtime": 3.3027, "eval_samples_per_second": 265.24, "eval_steps_per_second": 33.306, "step": 600 }, { "epoch": 26.0, "grad_norm": 0.5278392434120178, "learning_rate": 3.88888888888889e-06, "loss": 0.0575, "step": 624 }, { "epoch": 26.0, "eval_acc_macro": 0.8460307636339017, "eval_acc_micro": 0.8563664596272627, "eval_auc_macro": 0.9901957256905628, "eval_auc_micro": 0.9919254469111567, "eval_f1_at_5": 0.42718114191994605, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.9146326110180735, "eval_f1_micro": 0.9226265161019722, "eval_loss": 0.06812591105699539, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.9259544901379119, "eval_prec_micro": 0.9308016877636345, "eval_rec_at_5": 0.9748858447488584, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.9052694584097175, "eval_rec_micro": 0.9145936981757119, "eval_runtime": 3.238, "eval_samples_per_second": 270.536, "eval_steps_per_second": 33.971, "step": 624 }, { "epoch": 27.0, "grad_norm": 0.46283113956451416, "learning_rate": 3.6111111111111115e-06, "loss": 0.0547, "step": 648 }, { "epoch": 27.0, "eval_acc_macro": 0.8437978381816325, "eval_acc_micro": 0.8569194683345694, "eval_auc_macro": 0.9900672235373739, "eval_auc_micro": 0.9922836387587103, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9133228707359765, "eval_f1_micro": 0.9229473684209749, "eval_loss": 0.06721309572458267, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9302491335296357, "eval_prec_micro": 0.9375534644994921, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.8981640840687014, "eval_rec_micro": 0.9087893864012513, "eval_runtime": 3.2966, "eval_samples_per_second": 265.728, "eval_steps_per_second": 33.368, "step": 648 }, { "epoch": 28.0, "grad_norm": 0.5683040022850037, "learning_rate": 3.3333333333333333e-06, "loss": 0.0523, "step": 672 }, { "epoch": 28.0, "eval_acc_macro": 0.847085539786285, "eval_acc_micro": 0.8558139534883057, "eval_auc_macro": 0.9896638187547525, "eval_auc_micro": 0.99160271783856, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2915834447216144, "eval_f1_macro": 0.9152884545582967, "eval_f1_micro": 0.9223057644109505, "eval_loss": 0.06634338945150375, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.1713755707762557, "eval_prec_macro": 0.9276311524633641, "eval_prec_micro": 0.929292929292851, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.9765981735159818, "eval_rec_macro": 0.9050852833749707, "eval_rec_micro": 0.9154228855720634, "eval_runtime": 3.3379, "eval_samples_per_second": 262.442, "eval_steps_per_second": 32.955, "step": 672 }, { "epoch": 29.0, "grad_norm": 0.5064497590065002, "learning_rate": 3.055555555555556e-06, "loss": 0.0507, "step": 696 }, { "epoch": 29.0, "eval_acc_macro": 0.8424595332709467, "eval_acc_micro": 0.8554687499999332, "eval_auc_macro": 0.9903800574812361, "eval_auc_micro": 0.9924352911279979, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.9125139257396052, "eval_f1_micro": 0.9221052631578169, "eval_loss": 0.06477358192205429, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.9271042210984995, "eval_prec_micro": 0.9366980325063355, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.8991646180232234, "eval_rec_micro": 0.9079601990048998, "eval_runtime": 3.3033, "eval_samples_per_second": 265.185, "eval_steps_per_second": 33.3, "step": 696 }, { "epoch": 30.0, "grad_norm": 0.5206765532493591, "learning_rate": 2.7777777777777783e-06, "loss": 0.0484, "step": 720 }, { "epoch": 30.0, "eval_acc_macro": 0.8482879664337308, "eval_acc_micro": 0.8564920273347869, "eval_auc_macro": 0.9902796769975294, "eval_auc_micro": 0.9923823642729116, "eval_f1_at_5": 0.42712632039330145, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.9155214146126874, "eval_f1_micro": 0.9226993865029919, "eval_loss": 0.06509387493133545, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.9025052330017584, "eval_prec_micro": 0.9104116222759555, "eval_rec_at_5": 0.9743150684931506, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.9295157618718927, "eval_rec_micro": 0.9353233830844995, "eval_runtime": 3.2722, "eval_samples_per_second": 267.709, "eval_steps_per_second": 33.616, "step": 720 }, { "epoch": 31.0, "grad_norm": 0.5268795490264893, "learning_rate": 2.5e-06, "loss": 0.0456, "step": 744 }, { "epoch": 31.0, "eval_acc_macro": 0.847691548444017, "eval_acc_micro": 0.8558421851289183, "eval_auc_macro": 0.9898163244235566, "eval_auc_micro": 0.9917076805242361, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.9153671638721068, "eval_f1_micro": 0.9223221586262532, "eval_loss": 0.0645858645439148, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.9015493618181559, "eval_prec_micro": 0.9096774193547653, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.9300094432719982, "eval_rec_micro": 0.9353233830844995, "eval_runtime": 3.3402, "eval_samples_per_second": 262.263, "eval_steps_per_second": 32.933, "step": 744 }, { "epoch": 32.0, "grad_norm": 0.4488939046859741, "learning_rate": 2.222222222222222e-06, "loss": 0.0446, "step": 768 }, { "epoch": 32.0, "eval_acc_macro": 0.8474584233282965, "eval_acc_micro": 0.8549848942597541, "eval_auc_macro": 0.9896580840322796, "eval_auc_micro": 0.9914708462130924, "eval_f1_at_5": 0.42712632039330145, "eval_f1_at_8": 0.29135146186873845, "eval_f1_macro": 0.9153561345178298, "eval_f1_micro": 0.9218241042344526, "eval_loss": 0.06430496275424957, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17123287671232876, "eval_prec_macro": 0.8975121306620646, "eval_prec_micro": 0.9055999999999276, "eval_rec_at_5": 0.9743150684931506, "eval_rec_at_8": 0.976027397260274, "eval_rec_macro": 0.9345814562185115, "eval_rec_micro": 0.9386401326699055, "eval_runtime": 3.2443, "eval_samples_per_second": 270.009, "eval_steps_per_second": 33.905, "step": 768 }, { "epoch": 33.0, "grad_norm": 0.5452151298522949, "learning_rate": 1.944444444444445e-06, "loss": 0.0425, "step": 792 }, { "epoch": 33.0, "eval_acc_macro": 0.8512273558913187, "eval_acc_micro": 0.8594224924011504, "eval_auc_macro": 0.9896851270268344, "eval_auc_micro": 0.9917295997268475, "eval_f1_at_5": 0.42712632039330145, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.9175407198197038, "eval_f1_micro": 0.9243972210869698, "eval_loss": 0.06364509463310242, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.9039169093915104, "eval_prec_micro": 0.9113618049958975, "eval_rec_at_5": 0.9743150684931506, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.9318495447404759, "eval_rec_micro": 0.937810945273554, "eval_runtime": 3.2885, "eval_samples_per_second": 266.382, "eval_steps_per_second": 33.45, "step": 792 } ], "logging_steps": 500, "max_steps": 960, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 1.332040372236288e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }