Update config.json
Browse files- config.json +27 -23
config.json
CHANGED
@@ -37,28 +37,32 @@
|
|
37 |
"use_cache": true,
|
38 |
"vocab_size": 128256,
|
39 |
"quantization_config": {
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
63 |
}
|
64 |
}
|
|
|
37 |
"use_cache": true,
|
38 |
"vocab_size": 128256,
|
39 |
"quantization_config": {
|
40 |
+
"quantization": {
|
41 |
+
"config_groups": {
|
42 |
+
"group_0": {
|
43 |
+
"input_activations": {
|
44 |
+
"dynamic": false,
|
45 |
+
"num_bits": 8,
|
46 |
+
"type": "float"
|
47 |
+
},
|
48 |
+
"weights": {
|
49 |
+
"dynamic": false,
|
50 |
+
"num_bits": 8,
|
51 |
+
"type": "float"
|
52 |
+
}
|
53 |
+
}
|
54 |
+
},
|
55 |
+
"ignore": [
|
56 |
+
"lm_head"
|
57 |
+
],
|
58 |
+
"quant_method": "modelopt",
|
59 |
+
"quant_algo": "FP8",
|
60 |
+
"kv_cache_scheme": "FP8",
|
61 |
+
"producer": {
|
62 |
+
"name": "modelopt",
|
63 |
+
"version": "0.31.0"
|
64 |
+
}
|
65 |
+
},
|
66 |
+
"quant_method": "modelopt"
|
67 |
}
|
68 |
}
|