nm-research commited on
Commit
18dc839
·
verified ·
1 Parent(s): 277ac4a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -11
README.md CHANGED
@@ -68,7 +68,7 @@ This model was created with [llm-compressor](https://github.com/vllm-project/llm
68
 
69
 
70
  ```bash
71
- python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse
72
  ```
73
 
74
 
@@ -87,7 +87,7 @@ parser.add_argument('--quant_path', type=str)
87
  parser.add_argument('--calib_size', type=int, default=256)
88
  parser.add_argument('--dampening_frac', type=float, default=0.1)
89
  parser.add_argument('--observer', type=str, default="minmax")
90
- parser.add_argument('--actorder', type=str, default="None")
91
 
92
  args = parser.parse_args()
93
 
@@ -131,6 +131,7 @@ recipe = [
131
  scheme="w4a16",
132
  dampening_frac=args.dampening_frac,
133
  observer=args.observer,
 
134
  )
135
  ]
136
  oneshot(
@@ -195,14 +196,14 @@ Here is the updated table where the column for the quantized model is kept, but
195
 
196
  | Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
197
  |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
198
- | ARC-Challenge (Acc-Norm, 25-shot) | 64.68 | 64.25 |
199
- | GSM8K (Strict-Match, 5-shot) | 60.88 | 60.50 |
200
- | HellaSwag (Acc-Norm, 10-shot) | 83.52 | 83.22 |
201
- | MMLU (Acc, 5-shot) | 63.33 | 63.16 |
202
- | TruthfulQA (MC2, 0-shot) | 51.33 | 52.59 |
203
- | Winogrande (Acc, 5-shot) | 80.90 | 80.11 |
204
- | **Average Score** | **67.44** | **67.30** |
205
- | **Recovery** | **100.00** | **99.80** |
206
 
207
  ---
208
 
@@ -225,7 +226,7 @@ Here is the updated table where the column for the quantized model is kept, but
225
 
226
  | Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
227
  |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
228
- | HumanEval Pass@1 | 44.10 | 43.10 |
229
 
230
  ---
231
 
 
68
 
69
 
70
  ```bash
71
+ python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse --actorder static
72
  ```
73
 
74
 
 
87
  parser.add_argument('--calib_size', type=int, default=256)
88
  parser.add_argument('--dampening_frac', type=float, default=0.1)
89
  parser.add_argument('--observer', type=str, default="minmax")
90
+ parser.add_argument('--actorder', type=str, default="dynamic")
91
 
92
  args = parser.parse_args()
93
 
 
131
  scheme="w4a16",
132
  dampening_frac=args.dampening_frac,
133
  observer=args.observer,
134
+ actorder=args.actorder,
135
  )
136
  ]
137
  oneshot(
 
196
 
197
  | Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
198
  |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
199
+ | ARC-Challenge (Acc-Norm, 25-shot) | 64.68 | 62.37 |
200
+ | GSM8K (Strict-Match, 5-shot) | 60.88 | 54.89 |
201
+ | HellaSwag (Acc-Norm, 10-shot) | 83.52 | 82.53 |
202
+ | MMLU (Acc, 5-shot) | 63.33 | 62.78 |
203
+ | TruthfulQA (MC2, 0-shot) | 51.33 | 51.30 |
204
+ | Winogrande (Acc, 5-shot) | 80.90 | 79.24 |
205
+ | **Average Score** | **67.44** | **65.52** |
206
+ | **Recovery** | **100.00** | **97.15** |
207
 
208
  ---
209
 
 
226
 
227
  | Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
228
  |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
229
+ | HumanEval Pass@1 | 44.10 | 40.70 |
230
 
231
  ---
232