RedHatAI
/

granite-3.1-8b-base-quantized.w4a16

@@ -68,7 +68,7 @@ This model was created with [llm-compressor](https://github.com/vllm-project/llm
 ```bash
-python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse
 ```
@@ -87,7 +87,7 @@ parser.add_argument('--quant_path', type=str)
 parser.add_argument('--calib_size', type=int, default=256)
 parser.add_argument('--dampening_frac', type=float, default=0.1)
 parser.add_argument('--observer', type=str, default="minmax")
-parser.add_argument('--actorder', type=str, default="None")
 args = parser.parse_args()
@@ -131,6 +131,7 @@ recipe = [
         scheme="w4a16",
         dampening_frac=args.dampening_frac,
         observer=args.observer,
     )
 ]
 oneshot(
@@ -195,14 +196,14 @@ Here is the updated table where the column for the quantized model is kept, but
 | Metric                                  | ibm-granite/granite-3.1-8b-base             | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
 |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
-| ARC-Challenge (Acc-Norm, 25-shot)       | 64.68                             | 64.25                                            |
-| GSM8K (Strict-Match, 5-shot)            | 60.88                             | 60.50                                            |
-| HellaSwag (Acc-Norm, 10-shot)           | 83.52                             | 83.22                                            |
-| MMLU (Acc, 5-shot)                      | 63.33                             | 63.16                                            |
-| TruthfulQA (MC2, 0-shot)                | 51.33                             | 52.59                                            |
-| Winogrande (Acc, 5-shot)                | 80.90                             | 80.11                                            |
-| **Average Score**                       | **67.44**                         | **67.30**                                        |
-| **Recovery**                            | **100.00**                        | **99.80**                                        |
 ---
@@ -225,7 +226,7 @@ Here is the updated table where the column for the quantized model is kept, but
 | Metric                                  | ibm-granite/granite-3.1-8b-base             | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
 |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
-| HumanEval Pass@1                        | 44.10                             | 43.10                                           |
 ---

 ```bash
+python quantize.py --model_path ibm-granite/granite-3.1-8b-base --quant_path "output_dir/granite-3.1-8b-base-quantized.w4a16" --calib_size 3072 --dampening_frac 0.1 --observer mse --actorder static
 ```
 parser.add_argument('--calib_size', type=int, default=256)
 parser.add_argument('--dampening_frac', type=float, default=0.1)
 parser.add_argument('--observer', type=str, default="minmax")
+parser.add_argument('--actorder', type=str, default="dynamic")
 args = parser.parse_args()
         scheme="w4a16",
         dampening_frac=args.dampening_frac,
         observer=args.observer,
+        actorder=args.actorder,
     )
 ]
 oneshot(
 | Metric                                  | ibm-granite/granite-3.1-8b-base             | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
 |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
+| ARC-Challenge (Acc-Norm, 25-shot)       | 64.68                             | 62.37                                            |
+| GSM8K (Strict-Match, 5-shot)            | 60.88                             | 54.89                                            |
+| HellaSwag (Acc-Norm, 10-shot)           | 83.52                             | 82.53                                            |
+| MMLU (Acc, 5-shot)                      | 63.33                             | 62.78                                            |
+| TruthfulQA (MC2, 0-shot)                | 51.33                             | 51.30                                            |
+| Winogrande (Acc, 5-shot)                | 80.90                             | 79.24                                            |
+| **Average Score**                       | **67.44**                         | **65.52**                                        |
+| **Recovery**                            | **100.00**                        | **97.15**                                        |
 ---
 | Metric                                  | ibm-granite/granite-3.1-8b-base             | neuralmagic-ent/granite-3.1-8b-base-quantized.w4a16 |
 |-----------------------------------------|:---------------------------------:|:-------------------------------------------:|
+| HumanEval Pass@1                        | 44.10                             | 40.70                                           |
 ---