jiangchengchengNLP
/

L3.3-MS-Nevoria-70b-FP8

Text Generation

text-generation-inference

Model card Files Files and versions

jiangchengchengNLP commited on Jul 16

Commit

62494c4

·

verified ·

1 Parent(s): 787b4d1

Update config.json

Files changed (1) hide show

config.json +27 -23

config.json CHANGED Viewed

@@ -37,28 +37,32 @@
     "use_cache": true,
     "vocab_size": 128256,
     "quantization_config": {
-        "config_groups": {
-            "group_0": {
-                "input_activations": {
-                    "dynamic": false,
-                    "num_bits": 8,
-                    "type": "float"
-                },
-                "weights": {
-                    "dynamic": false,
-                    "num_bits": 8,
-                    "type": "float"
-                }
-            }
-        },
-        "ignore": [
-            "lm_head"
-        ],
-        "quant_algo": "FP8",
-        "kv_cache_scheme": "FP8",
-        "producer": {
-            "name": "modelopt",
-            "version": "0.31.0"
-        }
     }
 }

     "use_cache": true,
     "vocab_size": 128256,
     "quantization_config": {
+	"quantization": {
+		"config_groups": {
+		    "group_0": {
+			"input_activations": {
+			    "dynamic": false,
+			    "num_bits": 8,
+			    "type": "float"
+			},
+			"weights": {
+			    "dynamic": false,
+			    "num_bits": 8,
+			    "type": "float"
+			}
+		    }
+		},
+		"ignore": [
+		    "lm_head"
+		],
+		"quant_method": "modelopt",
+		"quant_algo": "FP8",
+		"kv_cache_scheme": "FP8",
+		"producer": {
+		    "name": "modelopt",
+		    "version": "0.31.0"
+		}
+	},
+	"quant_method": "modelopt"
     }
 }