Update README.md
Browse files
README.md
CHANGED
@@ -31,7 +31,7 @@ The numbers reported in the table below are evaluated with our open-source tool
|
|
31 |
| | AIME24 | MATH500 | GPQA-Diamond | LCBv2 Easy | LCBv2 Medium | LCBv2 Hard | LCBv2 All |
|
32 |
| --------------------------- | -------- | ------- | ------------ | ----------- | ------------- | ----------- | ---------- |
|
33 |
| OpenThinker-7B | 31.3 | 83.0 | 42.4 | 75.3 | 28.6 | 6.5 | 39.9 |
|
34 |
-
| Bespoke-Stratos-7B | 22.
|
35 |
| DeepSeek-R1-Distill-Qwen-7B | 60 | 88.2 | 46.9 | 79.7 | 45.1 | 14.6 | 50.1 |
|
36 |
| gpt-4o-0513 | 8.7 | 75.8 | 46.5 | 87.4 | 42.7 | 8.9 | 50.5 |
|
37 |
| o1-mini | 64 | 85.6 | 60 | 92.8 | 74.7 | 39.8 | 72.8 |
|
|
|
31 |
| | AIME24 | MATH500 | GPQA-Diamond | LCBv2 Easy | LCBv2 Medium | LCBv2 Hard | LCBv2 All |
|
32 |
| --------------------------- | -------- | ------- | ------------ | ----------- | ------------- | ----------- | ---------- |
|
33 |
| OpenThinker-7B | 31.3 | 83.0 | 42.4 | 75.3 | 28.6 | 6.5 | 39.9 |
|
34 |
+
| Bespoke-Stratos-7B | 22.7 | 79.6 | 38.9 | 71.4 | 25.2 | 0.8 | 35.8 |
|
35 |
| DeepSeek-R1-Distill-Qwen-7B | 60 | 88.2 | 46.9 | 79.7 | 45.1 | 14.6 | 50.1 |
|
36 |
| gpt-4o-0513 | 8.7 | 75.8 | 46.5 | 87.4 | 42.7 | 8.9 | 50.5 |
|
37 |
| o1-mini | 64 | 85.6 | 60 | 92.8 | 74.7 | 39.8 | 72.8 |
|