Update README.md
Browse files
README.md
CHANGED
@@ -68,7 +68,7 @@ This model was created with [llm-compressor](https://github.com/vllm-project/llm
|
|
68 |
|
69 |
|
70 |
```bash
|
71 |
-
python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse
|
72 |
```
|
73 |
|
74 |
|
@@ -87,7 +87,7 @@ parser.add_argument('--quant_path', type=str)
|
|
87 |
parser.add_argument('--calib_size', type=int, default=256)
|
88 |
parser.add_argument('--dampening_frac', type=float, default=0.1)
|
89 |
parser.add_argument('--observer', type=str, default="minmax")
|
90 |
-
parser.add_argument('--actorder', type=str, default="
|
91 |
|
92 |
args = parser.parse_args()
|
93 |
|
@@ -131,6 +131,7 @@ recipe = [
|
|
131 |
scheme="w4a16",
|
132 |
dampening_frac=args.dampening_frac,
|
133 |
observer=args.observer,
|
|
|
134 |
)
|
135 |
]
|
136 |
oneshot(
|
@@ -195,14 +196,14 @@ Here is the updated table where the column for the quantized model is kept, but
|
|
195 |
|
196 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
197 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
198 |
-
| ARC-Challenge (Acc-Norm, 25-shot) | 64.68 |
|
199 |
-
| GSM8K (Strict-Match, 5-shot) | 60.88 |
|
200 |
-
| HellaSwag (Acc-Norm, 10-shot) | 83.52 |
|
201 |
-
| MMLU (Acc, 5-shot) | 63.33 |
|
202 |
-
| TruthfulQA (MC2, 0-shot) | 51.33 |
|
203 |
-
| Winogrande (Acc, 5-shot) | 80.90 |
|
204 |
-
| **Average Score** | **67.44** | **
|
205 |
-
| **Recovery** | **100.00** | **
|
206 |
|
207 |
---
|
208 |
|
@@ -225,7 +226,7 @@ Here is the updated table where the column for the quantized model is kept, but
|
|
225 |
|
226 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
227 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
228 |
-
| HumanEval Pass@1 | 44.10 |
|
229 |
|
230 |
---
|
231 |
|
|
|
68 |
|
69 |
|
70 |
```bash
|
71 |
+
python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse --actorder static
|
72 |
```
|
73 |
|
74 |
|
|
|
87 |
parser.add_argument('--calib_size', type=int, default=256)
|
88 |
parser.add_argument('--dampening_frac', type=float, default=0.1)
|
89 |
parser.add_argument('--observer', type=str, default="minmax")
|
90 |
+
parser.add_argument('--actorder', type=str, default="dynamic")
|
91 |
|
92 |
args = parser.parse_args()
|
93 |
|
|
|
131 |
scheme="w4a16",
|
132 |
dampening_frac=args.dampening_frac,
|
133 |
observer=args.observer,
|
134 |
+
actorder=args.actorder,
|
135 |
)
|
136 |
]
|
137 |
oneshot(
|
|
|
196 |
|
197 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
198 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
199 |
+
| ARC-Challenge (Acc-Norm, 25-shot) | 64.68 | 62.37 |
|
200 |
+
| GSM8K (Strict-Match, 5-shot) | 60.88 | 54.89 |
|
201 |
+
| HellaSwag (Acc-Norm, 10-shot) | 83.52 | 82.53 |
|
202 |
+
| MMLU (Acc, 5-shot) | 63.33 | 62.78 |
|
203 |
+
| TruthfulQA (MC2, 0-shot) | 51.33 | 51.30 |
|
204 |
+
| Winogrande (Acc, 5-shot) | 80.90 | 79.24 |
|
205 |
+
| **Average Score** | **67.44** | **65.52** |
|
206 |
+
| **Recovery** | **100.00** | **97.15** |
|
207 |
|
208 |
---
|
209 |
|
|
|
226 |
|
227 |
| Metric | ibm-granite/granite-3.1-8b-base | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
|
228 |
|-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
|
229 |
+
| HumanEval Pass@1 | 44.10 | 40.70 |
|
230 |
|
231 |
---
|
232 |
|