Update config.json
Browse filesThe quantization_config wasn't added in the json hence vllm hasn't able to run this !
- config.json +9 -0
config.json
CHANGED
@@ -47,6 +47,15 @@
|
|
47 |
"num_local_experts": 32,
|
48 |
"output_router_logits": false,
|
49 |
"pad_token_id": 199999,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
"rms_norm_eps": 1e-05,
|
51 |
"rope_scaling": {
|
52 |
"beta_fast": 32.0,
|
|
|
47 |
"num_local_experts": 32,
|
48 |
"output_router_logits": false,
|
49 |
"pad_token_id": 199999,
|
50 |
+
"quantization_config": {
|
51 |
+
"modules_to_not_convert": [
|
52 |
+
"model.layers.*.self_attn",
|
53 |
+
"model.layers.*.mlp.router",
|
54 |
+
"model.embed_tokens",
|
55 |
+
"lm_head"
|
56 |
+
],
|
57 |
+
"quant_method": "mxfp4"
|
58 |
+
},
|
59 |
"rms_norm_eps": 1e-05,
|
60 |
"rope_scaling": {
|
61 |
"beta_fast": 32.0,
|