Update README.md
Browse files
README.md
CHANGED
@@ -163,6 +163,26 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
163 |
<td><strong>Recovery</strong>
|
164 |
</td>
|
165 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
<tr>
|
167 |
<td>GSM-8K (CoT, 8-shot, strict-match)
|
168 |
</td>
|
|
|
163 |
<td><strong>Recovery</strong>
|
164 |
</td>
|
165 |
</tr>
|
166 |
+
<tr>
|
167 |
+
<td>MMLU (CoT, 0-shot)
|
168 |
+
</td>
|
169 |
+
<td>88.11
|
170 |
+
</td>
|
171 |
+
<td>87.42
|
172 |
+
</td>
|
173 |
+
<td>99.2%
|
174 |
+
</td>
|
175 |
+
</tr>
|
176 |
+
<tr>
|
177 |
+
<td>ARC Challenge (0-shot)
|
178 |
+
</td>
|
179 |
+
<td>94.97
|
180 |
+
</td>
|
181 |
+
<td>94.62
|
182 |
+
</td>
|
183 |
+
<td>99.6%
|
184 |
+
</td>
|
185 |
+
</tr>
|
186 |
<tr>
|
187 |
<td>GSM-8K (CoT, 8-shot, strict-match)
|
188 |
</td>
|