alexmarques commited on
Commit
83fb9f8
·
verified ·
1 Parent(s): 0f846ce

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -23
README.md CHANGED
@@ -51,7 +51,7 @@ messages = [
51
  {"role": "user", "content": "Who are you?"},
52
  ]
53
 
54
- prompts = tokenizer.apply_chat_template(messages, tokenize=False)
55
 
56
  llm = LLM(model=model_id, trust_remote_code=True, max_model_len=8196)
57
 
@@ -170,9 +170,9 @@ lm_eval \
170
  <tr>
171
  <td><strong>Benchmark</strong>
172
  </td>
173
- <td><strong>Meta-Llama-3-8B-Instruct </strong>
174
  </td>
175
- <td><strong>Meta-Llama-3-8B-Instruct-quantized.w8a16(this model)</strong>
176
  </td>
177
  <td><strong>Recovery</strong>
178
  </td>
@@ -180,39 +180,39 @@ lm_eval \
180
  <tr>
181
  <td>MMLU (5-shot)
182
  </td>
183
- <td>66.54
184
  </td>
185
- <td>66.55
186
  </td>
187
- <td>100.0%
188
  </td>
189
  </tr>
190
  <tr>
191
  <td>ARC Challenge (25-shot)
192
  </td>
193
- <td>62.63
194
  </td>
195
- <td>61.52
196
  </td>
197
- <td>98.2%
198
  </td>
199
  </tr>
200
  <tr>
201
  <td>GSM-8K (5-shot, strict-match)
202
  </td>
203
- <td>75.21
204
  </td>
205
- <td>75.89
206
  </td>
207
- <td>100.9%
208
  </td>
209
  </tr>
210
  <tr>
211
  <td>Hellaswag (10-shot)
212
  </td>
213
- <td>78.81
214
  </td>
215
- <td>78.69
216
  </td>
217
  <td>99.8%
218
  </td>
@@ -220,31 +220,31 @@ lm_eval \
220
  <tr>
221
  <td>Winogrande (5-shot)
222
  </td>
223
- <td>76.48
224
  </td>
225
- <td>76.01
226
  </td>
227
- <td>98.2%
228
  </td>
229
  </tr>
230
  <tr>
231
  <td>TruthfulQA (0-shot)
232
  </td>
233
- <td>52.49
234
  </td>
235
- <td>52.60
236
  </td>
237
- <td>100.2%
238
  </td>
239
  </tr>
240
  <tr>
241
  <td><strong>Average</strong>
242
  </td>
243
- <td><strong>68.69</strong>
244
  </td>
245
- <td><strong>68.54</strong>
246
  </td>
247
- <td><strong>99.8%</strong>
248
  </td>
249
  </tr>
250
  </table>
 
51
  {"role": "user", "content": "Who are you?"},
52
  ]
53
 
54
+ prompts = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
55
 
56
  llm = LLM(model=model_id, trust_remote_code=True, max_model_len=8196)
57
 
 
170
  <tr>
171
  <td><strong>Benchmark</strong>
172
  </td>
173
+ <td><strong>Phi-3-mini-128k-instruct </strong>
174
  </td>
175
+ <td><strong>Phi-3-mini-128k-instruct-quantized.w8a16(this model)</strong>
176
  </td>
177
  <td><strong>Recovery</strong>
178
  </td>
 
180
  <tr>
181
  <td>MMLU (5-shot)
182
  </td>
183
+ <td>69.44
184
  </td>
185
+ <td>69.39
186
  </td>
187
+ <td>99.9%
188
  </td>
189
  </tr>
190
  <tr>
191
  <td>ARC Challenge (25-shot)
192
  </td>
193
+ <td>63.23
194
  </td>
195
+ <td>63.14
196
  </td>
197
+ <td>99.9%
198
  </td>
199
  </tr>
200
  <tr>
201
  <td>GSM-8K (5-shot, strict-match)
202
  </td>
203
+ <td>77.03
204
  </td>
205
+ <td>75.44
206
  </td>
207
+ <td>97.9%
208
  </td>
209
  </tr>
210
  <tr>
211
  <td>Hellaswag (10-shot)
212
  </td>
213
+ <td>79.65
214
  </td>
215
+ <td>79.51
216
  </td>
217
  <td>99.8%
218
  </td>
 
220
  <tr>
221
  <td>Winogrande (5-shot)
222
  </td>
223
+ <td>74.66
224
  </td>
225
+ <td>74.82
226
  </td>
227
+ <td>100.2%
228
  </td>
229
  </tr>
230
  <tr>
231
  <td>TruthfulQA (0-shot)
232
  </td>
233
+ <td>54.31
234
  </td>
235
+ <td>54.36
236
  </td>
237
+ <td>100.1%
238
  </td>
239
  </tr>
240
  <tr>
241
  <td><strong>Average</strong>
242
  </td>
243
+ <td><strong>69.72</strong>
244
  </td>
245
+ <td><strong>69.44</strong>
246
  </td>
247
+ <td><strong>99.6%</strong>
248
  </td>
249
  </tr>
250
  </table>