marcsun13 HF Staff commited on
Commit
dc28f1a
·
verified ·
1 Parent(s): 03feb5a

Update config.json

Browse files

The quantization_config wasn't added in the json hence vllm hasn't able to run this !

Files changed (1) hide show
  1. config.json +9 -0
config.json CHANGED
@@ -47,6 +47,15 @@
47
  "num_local_experts": 32,
48
  "output_router_logits": false,
49
  "pad_token_id": 199999,
 
 
 
 
 
 
 
 
 
50
  "rms_norm_eps": 1e-05,
51
  "rope_scaling": {
52
  "beta_fast": 32.0,
 
47
  "num_local_experts": 32,
48
  "output_router_logits": false,
49
  "pad_token_id": 199999,
50
+ "quantization_config": {
51
+ "modules_to_not_convert": [
52
+ "model.layers.*.self_attn",
53
+ "model.layers.*.mlp.router",
54
+ "model.embed_tokens",
55
+ "lm_head"
56
+ ],
57
+ "quant_method": "mxfp4"
58
+ },
59
  "rms_norm_eps": 1e-05,
60
  "rope_scaling": {
61
  "beta_fast": 32.0,