Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +2 -2
config.json +5 -3
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +2 -2
model-00003-of-00003.safetensors +2 -2
model.safetensors.index.json +1 -1
quantization_config.json +387 -385

README.md CHANGED Viewed

@@ -25,11 +25,11 @@ tags:
 <p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
-<p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it.</p>
 A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
-Master import for ST: [https://files.catbox.moe/b6nwbc.json](https://files.catbox.moe/b6nwbc.json)
 ## Reasoning

 <p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
+<p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it. (or not, see below.)</p>
 A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
+Master import for ST: https://files.catbox.moe/w812at.png
 ## Reasoning

config.json CHANGED Viewed

@@ -5,7 +5,7 @@
     ],
     "attention_dropout": 0.0,
     "bos_token_id": 151643,
-    "eos_token_id": 151643,
     "hidden_act": "silu",
     "hidden_size": 5120,
     "initializer_range": 0.02,
@@ -28,11 +28,13 @@
     "vocab_size": 151665,
     "quantization_config": {
         "quant_method": "exl3",
-        "version": "0.0.1",
         "bits": 4.5,
         "calibration": {
             "rows": 100,
             "cols": 2048
-        }
     }
 }

     ],
     "attention_dropout": 0.0,
     "bos_token_id": 151643,
+    "eos_token_id": 151645,
     "hidden_act": "silu",
     "hidden_size": 5120,
     "initializer_range": 0.02,
     "vocab_size": 151665,
     "quantization_config": {
         "quant_method": "exl3",
+        "version": "0.0.4",
         "bits": 4.5,
+        "head_bits": 6,
         "calibration": {
             "rows": 100,
             "cols": 2048
+        },
+        "out_scales": "auto"
     }
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5eb874850762e60bd37e6db41c178f95e1d05a0ba9fab68ec57f27e3ef8de895
 size 8413645240

 version https://git-lfs.github.com/spec/v1
+oid sha256:04b1d3f2f4220a2ec286bd0995d310b60651c9fd50a2ee7ad3c822a4d000595f
 size 8413645240

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58a176cc87b5a78c055ed177185853d9f8f0cd679d10b7943b74068aa7aa3895
-size 8515209112

 version https://git-lfs.github.com/spec/v1
+oid sha256:0dcf0e0f248fd776b6c64b082281916c36b78e2548c1359bf620b3408bfe7c7a
+size 8507344792

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0c8afe4e8906057a43f04e77818904c9fc2af0d53b006787b15ac2314d2d942
-size 2779318448

 version https://git-lfs.github.com/spec/v1
+oid sha256:81e6935c2c3d8916b4ce7b5668e4940a34b505f5595b6bd88caaec7f388a794c
+size 2782595248

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "metadata": {
-        "total_size": 19707992320
     },
     "weight_map": {
         "model.embed_tokens.weight": "model-00001-of-00003.safetensors",

 {
     "metadata": {
+        "total_size": 19703404800
     },
     "weight_map": {
         "model.embed_tokens.weight": "model-00001-of-00003.safetensors",

quantization_config.json CHANGED Viewed

@@ -1,11 +1,13 @@
 {
     "quant_method": "exl3",
-    "version": "0.0.1",
     "bits": 4.5,
     "calibration": {
         "rows": 100,
         "cols": 2048
     },
     "tensor_storage": {
         "model.embed_tokens": {
             "stored_tensors": {
@@ -585,14 +587,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.2.self_attn.v_proj": {
             "stored_tensors": {
@@ -621,14 +623,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.2.self_attn.o_proj": {
             "stored_tensors": {
@@ -1323,14 +1325,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.5.self_attn.v_proj": {
             "stored_tensors": {
@@ -1779,14 +1781,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.7.self_attn.k_proj": {
             "stored_tensors": {
@@ -1815,14 +1817,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.7.self_attn.v_proj": {
             "stored_tensors": {
@@ -1851,14 +1853,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.7.self_attn.o_proj": {
             "stored_tensors": {
@@ -1880,14 +1882,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.7.post_attention_layernorm": {
             "stored_tensors": {
@@ -1920,14 +1922,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.7.mlp.gate_proj": {
             "stored_tensors": {
@@ -2025,14 +2027,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.8.self_attn.k_proj": {
             "stored_tensors": {
@@ -2126,14 +2128,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.8.post_attention_layernorm": {
             "stored_tensors": {
@@ -2166,14 +2168,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.8.mlp.gate_proj": {
             "stored_tensors": {
@@ -2517,14 +2519,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.10.self_attn.k_proj": {
             "stored_tensors": {
@@ -2553,14 +2555,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.10.self_attn.v_proj": {
             "stored_tensors": {
@@ -2618,14 +2620,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.10.post_attention_layernorm": {
             "stored_tensors": {
@@ -2658,14 +2660,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.10.mlp.gate_proj": {
             "stored_tensors": {
@@ -2763,14 +2765,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.11.self_attn.k_proj": {
             "stored_tensors": {
@@ -2864,14 +2866,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.11.post_attention_layernorm": {
             "stored_tensors": {
@@ -2904,14 +2906,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.11.mlp.gate_proj": {
             "stored_tensors": {
@@ -3255,14 +3257,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.13.self_attn.k_proj": {
             "stored_tensors": {
@@ -3291,14 +3293,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.13.self_attn.v_proj": {
             "stored_tensors": {
@@ -3356,14 +3358,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.13.post_attention_layernorm": {
             "stored_tensors": {
@@ -3396,14 +3398,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.13.mlp.gate_proj": {
             "stored_tensors": {
@@ -3501,14 +3503,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.14.self_attn.k_proj": {
             "stored_tensors": {
@@ -3537,14 +3539,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.14.self_attn.v_proj": {
             "stored_tensors": {
@@ -3573,14 +3575,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.14.self_attn.o_proj": {
             "stored_tensors": {
@@ -3602,14 +3604,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.14.post_attention_layernorm": {
             "stored_tensors": {
@@ -3642,14 +3644,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.14.mlp.gate_proj": {
             "stored_tensors": {
@@ -3747,14 +3749,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.15.self_attn.k_proj": {
             "stored_tensors": {
@@ -3783,14 +3785,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.15.self_attn.v_proj": {
             "stored_tensors": {
@@ -3819,14 +3821,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.15.self_attn.o_proj": {
             "stored_tensors": {
@@ -3848,14 +3850,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.15.post_attention_layernorm": {
             "stored_tensors": {
@@ -3888,14 +3890,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.15.mlp.gate_proj": {
             "stored_tensors": {
@@ -4239,14 +4241,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.17.self_attn.k_proj": {
             "stored_tensors": {
@@ -4340,14 +4342,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.17.post_attention_layernorm": {
             "stored_tensors": {
@@ -4380,14 +4382,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.17.mlp.gate_proj": {
             "stored_tensors": {
@@ -4485,14 +4487,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.18.self_attn.k_proj": {
             "stored_tensors": {
@@ -4521,14 +4523,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.18.self_attn.v_proj": {
             "stored_tensors": {
@@ -4586,14 +4588,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.18.post_attention_layernorm": {
             "stored_tensors": {
@@ -4626,14 +4628,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.18.mlp.gate_proj": {
             "stored_tensors": {
@@ -4977,14 +4979,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.20.self_attn.k_proj": {
             "stored_tensors": {
@@ -5078,14 +5080,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.20.post_attention_layernorm": {
             "stored_tensors": {
@@ -5118,14 +5120,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.20.mlp.gate_proj": {
             "stored_tensors": {
@@ -5223,14 +5225,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.21.self_attn.k_proj": {
             "stored_tensors": {
@@ -5259,14 +5261,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.21.self_attn.v_proj": {
             "stored_tensors": {
@@ -5324,14 +5326,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.21.post_attention_layernorm": {
             "stored_tensors": {
@@ -5364,14 +5366,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.21.mlp.gate_proj": {
             "stored_tensors": {
@@ -5751,14 +5753,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.23.self_attn.v_proj": {
             "stored_tensors": {
@@ -5787,14 +5789,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.23.self_attn.o_proj": {
             "stored_tensors": {
@@ -6489,14 +6491,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.26.self_attn.v_proj": {
             "stored_tensors": {
@@ -6525,14 +6527,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.26.self_attn.o_proj": {
             "stored_tensors": {
@@ -7227,14 +7229,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.29.self_attn.v_proj": {
             "stored_tensors": {
@@ -7683,14 +7685,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.31.self_attn.k_proj": {
             "stored_tensors": {
@@ -7719,14 +7721,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.31.self_attn.v_proj": {
             "stored_tensors": {
@@ -7755,14 +7757,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.31.self_attn.o_proj": {
             "stored_tensors": {
@@ -7784,14 +7786,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.31.post_attention_layernorm": {
             "stored_tensors": {
@@ -7824,14 +7826,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.31.mlp.gate_proj": {
             "stored_tensors": {
@@ -7929,14 +7931,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.32.self_attn.k_proj": {
             "stored_tensors": {
@@ -8030,14 +8032,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.32.post_attention_layernorm": {
             "stored_tensors": {
@@ -8070,14 +8072,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.32.mlp.gate_proj": {
             "stored_tensors": {
@@ -8421,14 +8423,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.34.self_attn.k_proj": {
             "stored_tensors": {
@@ -8457,14 +8459,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.34.self_attn.v_proj": {
             "stored_tensors": {
@@ -8522,14 +8524,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.34.post_attention_layernorm": {
             "stored_tensors": {
@@ -8562,14 +8564,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.34.mlp.gate_proj": {
             "stored_tensors": {
@@ -8667,14 +8669,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.35.self_attn.k_proj": {
             "stored_tensors": {
@@ -8768,14 +8770,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.35.post_attention_layernorm": {
             "stored_tensors": {
@@ -8808,14 +8810,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.35.mlp.gate_proj": {
             "stored_tensors": {
@@ -9159,14 +9161,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.37.self_attn.k_proj": {
             "stored_tensors": {
@@ -9195,14 +9197,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.37.self_attn.v_proj": {
             "stored_tensors": {
@@ -9260,14 +9262,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.37.post_attention_layernorm": {
             "stored_tensors": {
@@ -9300,14 +9302,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.37.mlp.gate_proj": {
             "stored_tensors": {
@@ -9405,14 +9407,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.38.self_attn.k_proj": {
             "stored_tensors": {
@@ -9441,14 +9443,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.38.self_attn.v_proj": {
             "stored_tensors": {
@@ -9477,14 +9479,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.38.self_attn.o_proj": {
             "stored_tensors": {
@@ -9506,14 +9508,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.38.post_attention_layernorm": {
             "stored_tensors": {
@@ -9546,14 +9548,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.38.mlp.gate_proj": {
             "stored_tensors": {
@@ -9651,14 +9653,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.39.self_attn.k_proj": {
             "stored_tensors": {
@@ -9687,14 +9689,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.39.self_attn.v_proj": {
             "stored_tensors": {
@@ -9723,14 +9725,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.39.self_attn.o_proj": {
             "stored_tensors": {
@@ -9752,14 +9754,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.39.post_attention_layernorm": {
             "stored_tensors": {
@@ -9792,14 +9794,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.39.mlp.gate_proj": {
             "stored_tensors": {
@@ -10143,14 +10145,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.41.self_attn.k_proj": {
             "stored_tensors": {
@@ -10244,14 +10246,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.41.post_attention_layernorm": {
             "stored_tensors": {
@@ -10284,14 +10286,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.41.mlp.gate_proj": {
             "stored_tensors": {
@@ -10389,14 +10391,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.42.self_attn.k_proj": {
             "stored_tensors": {
@@ -10425,14 +10427,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.42.self_attn.v_proj": {
             "stored_tensors": {
@@ -10490,14 +10492,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.42.post_attention_layernorm": {
             "stored_tensors": {
@@ -10530,14 +10532,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.42.mlp.gate_proj": {
             "stored_tensors": {
@@ -10881,14 +10883,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.44.self_attn.k_proj": {
             "stored_tensors": {
@@ -10982,14 +10984,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.44.post_attention_layernorm": {
             "stored_tensors": {
@@ -11022,14 +11024,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.44.mlp.gate_proj": {
             "stored_tensors": {
@@ -11127,14 +11129,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.45.self_attn.k_proj": {
             "stored_tensors": {
@@ -11163,14 +11165,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.45.self_attn.v_proj": {
             "stored_tensors": {
@@ -11228,14 +11230,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.45.post_attention_layernorm": {
             "stored_tensors": {
@@ -11268,14 +11270,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.45.mlp.gate_proj": {
             "stored_tensors": {
@@ -11655,14 +11657,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.47.self_attn.v_proj": {
             "stored_tensors": {
@@ -11691,14 +11693,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.47.self_attn.o_proj": {
             "stored_tensors": {
@@ -12393,14 +12395,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.50.self_attn.v_proj": {
             "stored_tensors": {
@@ -12429,14 +12431,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.50.self_attn.o_proj": {
             "stored_tensors": {
@@ -13131,14 +13133,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.53.self_attn.v_proj": {
             "stored_tensors": {
@@ -13587,14 +13589,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.55.self_attn.k_proj": {
             "stored_tensors": {
@@ -13623,14 +13625,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.55.self_attn.v_proj": {
             "stored_tensors": {
@@ -13659,14 +13661,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.55.self_attn.o_proj": {
             "stored_tensors": {
@@ -13688,14 +13690,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.55.post_attention_layernorm": {
             "stored_tensors": {
@@ -13728,14 +13730,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.55.mlp.gate_proj": {
             "stored_tensors": {
@@ -13833,14 +13835,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.56.self_attn.k_proj": {
             "stored_tensors": {
@@ -13934,14 +13936,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.56.post_attention_layernorm": {
             "stored_tensors": {
@@ -13974,14 +13976,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.56.mlp.gate_proj": {
             "stored_tensors": {
@@ -14325,14 +14327,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.58.self_attn.k_proj": {
             "stored_tensors": {
@@ -14361,14 +14363,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.58.self_attn.v_proj": {
             "stored_tensors": {
@@ -14426,14 +14428,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.58.post_attention_layernorm": {
             "stored_tensors": {
@@ -14466,14 +14468,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.58.mlp.gate_proj": {
             "stored_tensors": {
@@ -14571,14 +14573,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.59.self_attn.k_proj": {
             "stored_tensors": {
@@ -14672,14 +14674,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.59.post_attention_layernorm": {
             "stored_tensors": {
@@ -14712,14 +14714,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.59.mlp.gate_proj": {
             "stored_tensors": {
@@ -15063,14 +15065,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.61.self_attn.k_proj": {
             "stored_tensors": {
@@ -15099,14 +15101,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.61.self_attn.v_proj": {
             "stored_tensors": {
@@ -15164,14 +15166,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.61.post_attention_layernorm": {
             "stored_tensors": {
@@ -15204,14 +15206,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.61.mlp.gate_proj": {
             "stored_tensors": {
@@ -15309,14 +15311,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.62.self_attn.k_proj": {
             "stored_tensors": {
@@ -15345,14 +15347,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.62.self_attn.v_proj": {
             "stored_tensors": {
@@ -15381,14 +15383,14 @@
                     "shape": [
                         320,
                         64,
-                        96
                     ],
-                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 6
         },
         "model.layers.62.self_attn.o_proj": {
             "stored_tensors": {
@@ -15410,14 +15412,14 @@
                     "shape": [
                         320,
                         320,
-                        80
                     ],
-                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.62.post_attention_layernorm": {
             "stored_tensors": {
@@ -15450,14 +15452,14 @@
                     "shape": [
                         320,
                         1728,
-                        64
                     ],
-                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.62.mlp.gate_proj": {
             "stored_tensors": {
@@ -15555,14 +15557,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.63.self_attn.k_proj": {
             "stored_tensors": {
@@ -15591,14 +15593,14 @@
                     "shape": [
                         320,
                         64,
-                        64
                     ],
-                    "n_bytes": 2621440,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.63.self_attn.v_proj": {
             "stored_tensors": {
@@ -15627,14 +15629,14 @@
                     "shape": [
                         320,
                         64,
-                        80
                     ],
-                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.63.self_attn.o_proj": {
             "stored_tensors": {
@@ -15656,14 +15658,14 @@
                     "shape": [
                         320,
                         320,
-                        64
                     ],
-                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 4
         },
         "model.layers.63.post_attention_layernorm": {
             "stored_tensors": {
@@ -15696,14 +15698,14 @@
                     "shape": [
                         320,
                         1728,
-                        80
                     ],
-                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
-            "bits_per_weight": 5
         },
         "model.layers.63.mlp.gate_proj": {
             "stored_tensors": {

 {
     "quant_method": "exl3",
+    "version": "0.0.4",
     "bits": 4.5,
+    "head_bits": 6,
     "calibration": {
         "rows": 100,
         "cols": 2048
     },
+    "out_scales": "auto",
     "tensor_storage": {
         "model.embed_tokens": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.2.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.2.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.5.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.7.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.7.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.7.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.7.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.7.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.8.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.8.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.8.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.10.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.10.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.10.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.10.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.11.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.11.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.11.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.13.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.13.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.13.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.13.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.14.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.14.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.14.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.14.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.14.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.15.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.15.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.15.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.15.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.15.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.17.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.17.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.17.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.18.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.18.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.18.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.18.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.20.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.20.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.20.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.21.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.21.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.21.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.21.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.23.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.23.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.26.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.26.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.29.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.31.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.31.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.31.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.31.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.31.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.32.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.32.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.32.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.34.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.34.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.34.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.34.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.35.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.35.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.35.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.37.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.37.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.37.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.37.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.38.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.38.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.38.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.38.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.38.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.39.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.39.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.39.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.39.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.39.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.41.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.41.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.41.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.42.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.42.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.42.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.42.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.44.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.44.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.44.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.45.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.45.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.45.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.45.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.47.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.47.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.50.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.50.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.53.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.55.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.55.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.55.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.55.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.55.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.56.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.56.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.56.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.58.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.58.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.58.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.58.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.59.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.59.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.59.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.61.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.61.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.61.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.61.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.62.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.62.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        80
                     ],
+                    "n_bytes": 3276800,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.62.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        64
                     ],
+                    "n_bytes": 13107200,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.62.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        80
                     ],
+                    "n_bytes": 88473600,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.62.mlp.gate_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.63.self_attn.k_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.63.self_attn.v_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         64,
+                        96
                     ],
+                    "n_bytes": 3932160,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 6
         },
         "model.layers.63.self_attn.o_proj": {
             "stored_tensors": {
                     "shape": [
                         320,
                         320,
+                        80
                     ],
+                    "n_bytes": 16384000,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 5
         },
         "model.layers.63.post_attention_layernorm": {
             "stored_tensors": {
                     "shape": [
                         320,
                         1728,
+                        64
                     ],
+                    "n_bytes": 70778880,
                     "dtype": "torch.int16"
                 }
             },
             "quant_format": "exl3",
+            "bits_per_weight": 4
         },
         "model.layers.63.mlp.gate_proj": {
             "stored_tensors": {