craffel HF Staff commited on
Commit
c276076
·
verified ·
1 Parent(s): 217c3d8

Upload test_checkpoint/metrics.eval.jsonl with huggingface_hub

Browse files
Files changed (1) hide show
  1. test_checkpoint/metrics.eval.jsonl +5 -0
test_checkpoint/metrics.eval.jsonl ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {"created_at": "2025-08-11T20:22:14.465221", "global_step": 52000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3216723549488055, "acc_stderr,none": 0.013650488084494166, "acc_norm,none": 0.3447098976109215, "acc_norm_stderr,none": 0.013888816286782112}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6944444444444444, "acc_stderr,none": 0.009452181213593465, "acc_norm,none": 0.6241582491582491, "acc_norm_stderr,none": 0.009938436373170626}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.01456789134238004, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.01456789134238004}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.014649486385262143, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.014649486385262143}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792662, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792662}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2733333333333333, "acc_stderr,none": 0.014863944409417497, "acc_norm,none": 0.2733333333333333, "acc_norm_stderr,none": 0.014863944409417497}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2688888888888889, "acc_stderr,none": 0.014787619747567612, "acc_norm,none": 0.2688888888888889, "acc_norm_stderr,none": 0.014787619747567612}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4250149372634933, "acc_stderr,none": 0.00493334962158933, "acc_norm,none": 0.5537741485759808, "acc_norm_stderr,none": 0.004960839986099525}, "humaneval": {"alias": "humaneval", "pass@1,create_test": 0.0, "pass@1_stderr,create_test": 0.0}, "include_base_44_chinese": {"acc,none": 0.25137614678899084, "acc_stderr,none": 0.018675762325601565, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3103448275862069, "acc_stderr,none": 0.049887188500387446}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.2737226277372263, "acc_stderr,none": 0.01901892719650816, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387485}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569706}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.32903225806451614, "acc_stderr,none": 0.03786253598588384}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.03253989433108519}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.5, "acc_stderr,none": 0.15075567228888181}, "include_base_44_turkish": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018686657020585463, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.7247007616974973, "acc_stderr,none": 0.01042142927736953, "acc_norm,none": 0.7219804134929271, "acc_norm_stderr,none": 0.010453117358332813}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5273092369477912, "acc_stderr,none": 0.010007112889731988}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43092369477911646, "acc_stderr,none": 0.009925970741520653}}
2
+ {"created_at": "2025-08-11T21:58:06.730485", "global_step": 54000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3225255972696246, "acc_stderr,none": 0.013659980894277366, "acc_norm,none": 0.3438566552901024, "acc_norm_stderr,none": 0.013880644570156215}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6961279461279462, "acc_stderr,none": 0.009437524848293738, "acc_norm,none": 0.6127946127946128, "acc_norm_stderr,none": 0.009995312065890348}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.01458847408965166, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.01458847408965166}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642519, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642519}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356818, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356818}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2722222222222222, "acc_stderr,none": 0.014845038794433947, "acc_norm,none": 0.2722222222222222, "acc_norm_stderr,none": 0.014845038794433947}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2733333333333333, "acc_stderr,none": 0.014863944409417486, "acc_norm,none": 0.2733333333333333, "acc_norm_stderr,none": 0.014863944409417486}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42939653455486954, "acc_stderr,none": 0.004939784311448987, "acc_norm,none": 0.5560645289782912, "acc_norm_stderr,none": 0.004958314114266492}, "humaneval": {"alias": "humaneval", "pass@1,create_test": 0.0, "pass@1_stderr,create_test": 0.0}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.01848764283633194, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348951}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3563218390804598, "acc_stderr,none": 0.051642395833086094}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.01888503261332542, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.03490350467428358}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23353293413173654, "acc_stderr,none": 0.03283724952964296}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_turkish": {"acc,none": 0.23722627737226276, "acc_stderr,none": 0.01821002396809858, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "piqa": {"alias": "piqa", "acc,none": 0.720892274211099, "acc_stderr,none": 0.010465657948498228, "acc_norm,none": 0.7132752992383025, "acc_norm_stderr,none": 0.01055131450310807}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5228915662650603, "acc_stderr,none": 0.010011563747774333}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43453815261044176, "acc_stderr,none": 0.00993580735485683}}
3
+ {"created_at": "2025-08-11T23:48:14.549488", "global_step": 56000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3242320819112628, "acc_stderr,none": 0.013678810399518824, "acc_norm,none": 0.3455631399317406, "acc_norm_stderr,none": 0.013896938461145678}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6973905723905723, "acc_stderr,none": 0.009426434542371225, "acc_norm,none": 0.6254208754208754, "acc_norm_stderr,none": 0.00993175882041062}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168493, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168493}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409359, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409359}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2777777777777778, "acc_stderr,none": 0.014938408363642788, "acc_norm,none": 0.2777777777777778, "acc_norm_stderr,none": 0.014938408363642788}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2733333333333333, "acc_stderr,none": 0.01486394440941749, "acc_norm,none": 0.2733333333333333, "acc_norm_stderr,none": 0.01486394440941749}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4291973710416252, "acc_stderr,none": 0.004939500404882176, "acc_norm,none": 0.5561641107349133, "acc_norm_stderr,none": 0.004958201874334088}, "humaneval": {"alias": "humaneval", "pass@1,create_test": 0.0, "pass@1_stderr,create_test": 0.0}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.018525611404843975, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018786033139634294, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387485}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.033675118801687026}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25161290322580643, "acc_stderr,none": 0.034967874881680024}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569705}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_turkish": {"acc,none": 0.25, "acc_stderr,none": 0.018495790839801834, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.0357160923005348}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "piqa": {"alias": "piqa", "acc,none": 0.7219804134929271, "acc_stderr,none": 0.010453117358332804, "acc_norm,none": 0.7181719260065288, "acc_norm_stderr,none": 0.010496675231258171}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5236947791164659, "acc_stderr,none": 0.010010812905412057}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4353413654618474, "acc_stderr,none": 0.009937920221480505}}
4
+ {"created_at": "2025-08-12T12:48:49.524049", "global_step": 58000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3293515358361775, "acc_stderr,none": 0.013734057652635473, "acc_norm,none": 0.3447098976109215, "acc_norm_stderr,none": 0.013888816286782112}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6990740740740741, "acc_stderr,none": 0.009411516193787188, "acc_norm,none": 0.6296296296296297, "acc_norm_stderr,none": 0.009908978578665757}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356797, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356797}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945582, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945582}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409402, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409402}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241934, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241934}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2633333333333333, "acc_stderr,none": 0.014689553047342506, "acc_norm,none": 0.2633333333333333, "acc_norm_stderr,none": 0.014689553047342506}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4303923521210914, "acc_stderr,none": 0.00494119160731791, "acc_norm,none": 0.5560645289782912, "acc_norm_stderr,none": 0.004958314114266491}, "humaneval": {"alias": "humaneval", "pass@1,create_test": 0.0, "pass@1_stderr,create_test": 0.0}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018378379109904197, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2988505747126437, "acc_stderr,none": 0.049360904959780114}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2664233576642336, "acc_stderr,none": 0.01885773540392922, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569707}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.034351824402457654}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283575}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.5833333333333334, "acc_stderr,none": 0.1486470975026408}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018562316478036214, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "piqa": {"alias": "piqa", "acc,none": 0.7252448313384113, "acc_stderr,none": 0.010415033676676035, "acc_norm,none": 0.7225244831338411, "acc_norm_stderr,none": 0.01044681828103995}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5236947791164659, "acc_stderr,none": 0.010010812905412054}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43614457831325304, "acc_stderr,none": 0.0099400065624986}}
5
+ {"created_at": "2025-08-12T12:52:03.117950", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3319112627986348, "acc_stderr,none": 0.01376098820088054, "acc_norm,none": 0.3447098976109215, "acc_norm_stderr,none": 0.013888816286782112}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6990740740740741, "acc_stderr,none": 0.009411516193787188, "acc_norm,none": 0.6279461279461279, "acc_norm_stderr,none": 0.009918187193096468}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.014463114105170807, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.014463114105170807}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687942, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687942}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336072, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336072}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241932, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241932}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168509, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168509}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4304919338777136, "acc_stderr,none": 0.004941331215598552, "acc_norm,none": 0.5571599283011353, "acc_norm_stderr,none": 0.00495706837751651}, "humaneval": {"alias": "humaneval", "pass@1,create_test": 0.0, "pass@1_stderr,create_test": 0.0}, "include_base_44_chinese": {"acc,none": 0.23486238532110093, "acc_stderr,none": 0.01818236439502987, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3218390804597701, "acc_stderr,none": 0.05037749206122547}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.044792908199096614}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.018795304995459977, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.034436234538994775}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23225806451612904, "acc_stderr,none": 0.03402770605128516}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_turkish": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018633181653151712, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.0355092018568963}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.052372293656638154}, "piqa": {"alias": "piqa", "acc,none": 0.7263329706202394, "acc_stderr,none": 0.010402184206229207, "acc_norm,none": 0.7187159956474428, "acc_norm_stderr,none": 0.010490509832327423}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.529718875502008, "acc_stderr,none": 0.010004353982613843}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44377510040160645, "acc_stderr,none": 0.009958506938896473}}