Upload folder using huggingface_hub
Browse files- README.md +2 -2
- config.json +5 -3
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +2 -2
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +1 -1
- quantization_config.json +387 -385
README.md
CHANGED
@@ -25,11 +25,11 @@ tags:
|
|
25 |
|
26 |
<p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
|
27 |
|
28 |
-
<p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it
|
29 |
|
30 |
A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
|
31 |
|
32 |
-
Master import for ST:
|
33 |
|
34 |
## Reasoning
|
35 |
|
|
|
25 |
|
26 |
<p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
|
27 |
|
28 |
+
<p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it. (or not, see below.)</p>
|
29 |
|
30 |
A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
|
31 |
|
32 |
+
Master import for ST: https://files.catbox.moe/w812at.png
|
33 |
|
34 |
## Reasoning
|
35 |
|
config.json
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
],
|
6 |
"attention_dropout": 0.0,
|
7 |
"bos_token_id": 151643,
|
8 |
-
"eos_token_id":
|
9 |
"hidden_act": "silu",
|
10 |
"hidden_size": 5120,
|
11 |
"initializer_range": 0.02,
|
@@ -28,11 +28,13 @@
|
|
28 |
"vocab_size": 151665,
|
29 |
"quantization_config": {
|
30 |
"quant_method": "exl3",
|
31 |
-
"version": "0.0.
|
32 |
"bits": 4.5,
|
|
|
33 |
"calibration": {
|
34 |
"rows": 100,
|
35 |
"cols": 2048
|
36 |
-
}
|
|
|
37 |
}
|
38 |
}
|
|
|
5 |
],
|
6 |
"attention_dropout": 0.0,
|
7 |
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151645,
|
9 |
"hidden_act": "silu",
|
10 |
"hidden_size": 5120,
|
11 |
"initializer_range": 0.02,
|
|
|
28 |
"vocab_size": 151665,
|
29 |
"quantization_config": {
|
30 |
"quant_method": "exl3",
|
31 |
+
"version": "0.0.4",
|
32 |
"bits": 4.5,
|
33 |
+
"head_bits": 6,
|
34 |
"calibration": {
|
35 |
"rows": 100,
|
36 |
"cols": 2048
|
37 |
+
},
|
38 |
+
"out_scales": "auto"
|
39 |
}
|
40 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8413645240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04b1d3f2f4220a2ec286bd0995d310b60651c9fd50a2ee7ad3c822a4d000595f
|
3 |
size 8413645240
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0dcf0e0f248fd776b6c64b082281916c36b78e2548c1359bf620b3408bfe7c7a
|
3 |
+
size 8507344792
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81e6935c2c3d8916b4ce7b5668e4940a34b505f5595b6bd88caaec7f388a794c
|
3 |
+
size 2782595248
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 19703404800
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
quantization_config.json
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
{
|
2 |
"quant_method": "exl3",
|
3 |
-
"version": "0.0.
|
4 |
"bits": 4.5,
|
|
|
5 |
"calibration": {
|
6 |
"rows": 100,
|
7 |
"cols": 2048
|
8 |
},
|
|
|
9 |
"tensor_storage": {
|
10 |
"model.embed_tokens": {
|
11 |
"stored_tensors": {
|
@@ -585,14 +587,14 @@
|
|
585 |
"shape": [
|
586 |
320,
|
587 |
64,
|
588 |
-
|
589 |
],
|
590 |
-
"n_bytes":
|
591 |
"dtype": "torch.int16"
|
592 |
}
|
593 |
},
|
594 |
"quant_format": "exl3",
|
595 |
-
"bits_per_weight":
|
596 |
},
|
597 |
"model.layers.2.self_attn.v_proj": {
|
598 |
"stored_tensors": {
|
@@ -621,14 +623,14 @@
|
|
621 |
"shape": [
|
622 |
320,
|
623 |
64,
|
624 |
-
|
625 |
],
|
626 |
-
"n_bytes":
|
627 |
"dtype": "torch.int16"
|
628 |
}
|
629 |
},
|
630 |
"quant_format": "exl3",
|
631 |
-
"bits_per_weight":
|
632 |
},
|
633 |
"model.layers.2.self_attn.o_proj": {
|
634 |
"stored_tensors": {
|
@@ -1323,14 +1325,14 @@
|
|
1323 |
"shape": [
|
1324 |
320,
|
1325 |
64,
|
1326 |
-
|
1327 |
],
|
1328 |
-
"n_bytes":
|
1329 |
"dtype": "torch.int16"
|
1330 |
}
|
1331 |
},
|
1332 |
"quant_format": "exl3",
|
1333 |
-
"bits_per_weight":
|
1334 |
},
|
1335 |
"model.layers.5.self_attn.v_proj": {
|
1336 |
"stored_tensors": {
|
@@ -1779,14 +1781,14 @@
|
|
1779 |
"shape": [
|
1780 |
320,
|
1781 |
320,
|
1782 |
-
|
1783 |
],
|
1784 |
-
"n_bytes":
|
1785 |
"dtype": "torch.int16"
|
1786 |
}
|
1787 |
},
|
1788 |
"quant_format": "exl3",
|
1789 |
-
"bits_per_weight":
|
1790 |
},
|
1791 |
"model.layers.7.self_attn.k_proj": {
|
1792 |
"stored_tensors": {
|
@@ -1815,14 +1817,14 @@
|
|
1815 |
"shape": [
|
1816 |
320,
|
1817 |
64,
|
1818 |
-
|
1819 |
],
|
1820 |
-
"n_bytes":
|
1821 |
"dtype": "torch.int16"
|
1822 |
}
|
1823 |
},
|
1824 |
"quant_format": "exl3",
|
1825 |
-
"bits_per_weight":
|
1826 |
},
|
1827 |
"model.layers.7.self_attn.v_proj": {
|
1828 |
"stored_tensors": {
|
@@ -1851,14 +1853,14 @@
|
|
1851 |
"shape": [
|
1852 |
320,
|
1853 |
64,
|
1854 |
-
|
1855 |
],
|
1856 |
-
"n_bytes":
|
1857 |
"dtype": "torch.int16"
|
1858 |
}
|
1859 |
},
|
1860 |
"quant_format": "exl3",
|
1861 |
-
"bits_per_weight":
|
1862 |
},
|
1863 |
"model.layers.7.self_attn.o_proj": {
|
1864 |
"stored_tensors": {
|
@@ -1880,14 +1882,14 @@
|
|
1880 |
"shape": [
|
1881 |
320,
|
1882 |
320,
|
1883 |
-
|
1884 |
],
|
1885 |
-
"n_bytes":
|
1886 |
"dtype": "torch.int16"
|
1887 |
}
|
1888 |
},
|
1889 |
"quant_format": "exl3",
|
1890 |
-
"bits_per_weight":
|
1891 |
},
|
1892 |
"model.layers.7.post_attention_layernorm": {
|
1893 |
"stored_tensors": {
|
@@ -1920,14 +1922,14 @@
|
|
1920 |
"shape": [
|
1921 |
320,
|
1922 |
1728,
|
1923 |
-
|
1924 |
],
|
1925 |
-
"n_bytes":
|
1926 |
"dtype": "torch.int16"
|
1927 |
}
|
1928 |
},
|
1929 |
"quant_format": "exl3",
|
1930 |
-
"bits_per_weight":
|
1931 |
},
|
1932 |
"model.layers.7.mlp.gate_proj": {
|
1933 |
"stored_tensors": {
|
@@ -2025,14 +2027,14 @@
|
|
2025 |
"shape": [
|
2026 |
320,
|
2027 |
320,
|
2028 |
-
|
2029 |
],
|
2030 |
-
"n_bytes":
|
2031 |
"dtype": "torch.int16"
|
2032 |
}
|
2033 |
},
|
2034 |
"quant_format": "exl3",
|
2035 |
-
"bits_per_weight":
|
2036 |
},
|
2037 |
"model.layers.8.self_attn.k_proj": {
|
2038 |
"stored_tensors": {
|
@@ -2126,14 +2128,14 @@
|
|
2126 |
"shape": [
|
2127 |
320,
|
2128 |
320,
|
2129 |
-
|
2130 |
],
|
2131 |
-
"n_bytes":
|
2132 |
"dtype": "torch.int16"
|
2133 |
}
|
2134 |
},
|
2135 |
"quant_format": "exl3",
|
2136 |
-
"bits_per_weight":
|
2137 |
},
|
2138 |
"model.layers.8.post_attention_layernorm": {
|
2139 |
"stored_tensors": {
|
@@ -2166,14 +2168,14 @@
|
|
2166 |
"shape": [
|
2167 |
320,
|
2168 |
1728,
|
2169 |
-
|
2170 |
],
|
2171 |
-
"n_bytes":
|
2172 |
"dtype": "torch.int16"
|
2173 |
}
|
2174 |
},
|
2175 |
"quant_format": "exl3",
|
2176 |
-
"bits_per_weight":
|
2177 |
},
|
2178 |
"model.layers.8.mlp.gate_proj": {
|
2179 |
"stored_tensors": {
|
@@ -2517,14 +2519,14 @@
|
|
2517 |
"shape": [
|
2518 |
320,
|
2519 |
320,
|
2520 |
-
|
2521 |
],
|
2522 |
-
"n_bytes":
|
2523 |
"dtype": "torch.int16"
|
2524 |
}
|
2525 |
},
|
2526 |
"quant_format": "exl3",
|
2527 |
-
"bits_per_weight":
|
2528 |
},
|
2529 |
"model.layers.10.self_attn.k_proj": {
|
2530 |
"stored_tensors": {
|
@@ -2553,14 +2555,14 @@
|
|
2553 |
"shape": [
|
2554 |
320,
|
2555 |
64,
|
2556 |
-
|
2557 |
],
|
2558 |
-
"n_bytes":
|
2559 |
"dtype": "torch.int16"
|
2560 |
}
|
2561 |
},
|
2562 |
"quant_format": "exl3",
|
2563 |
-
"bits_per_weight":
|
2564 |
},
|
2565 |
"model.layers.10.self_attn.v_proj": {
|
2566 |
"stored_tensors": {
|
@@ -2618,14 +2620,14 @@
|
|
2618 |
"shape": [
|
2619 |
320,
|
2620 |
320,
|
2621 |
-
|
2622 |
],
|
2623 |
-
"n_bytes":
|
2624 |
"dtype": "torch.int16"
|
2625 |
}
|
2626 |
},
|
2627 |
"quant_format": "exl3",
|
2628 |
-
"bits_per_weight":
|
2629 |
},
|
2630 |
"model.layers.10.post_attention_layernorm": {
|
2631 |
"stored_tensors": {
|
@@ -2658,14 +2660,14 @@
|
|
2658 |
"shape": [
|
2659 |
320,
|
2660 |
1728,
|
2661 |
-
|
2662 |
],
|
2663 |
-
"n_bytes":
|
2664 |
"dtype": "torch.int16"
|
2665 |
}
|
2666 |
},
|
2667 |
"quant_format": "exl3",
|
2668 |
-
"bits_per_weight":
|
2669 |
},
|
2670 |
"model.layers.10.mlp.gate_proj": {
|
2671 |
"stored_tensors": {
|
@@ -2763,14 +2765,14 @@
|
|
2763 |
"shape": [
|
2764 |
320,
|
2765 |
320,
|
2766 |
-
|
2767 |
],
|
2768 |
-
"n_bytes":
|
2769 |
"dtype": "torch.int16"
|
2770 |
}
|
2771 |
},
|
2772 |
"quant_format": "exl3",
|
2773 |
-
"bits_per_weight":
|
2774 |
},
|
2775 |
"model.layers.11.self_attn.k_proj": {
|
2776 |
"stored_tensors": {
|
@@ -2864,14 +2866,14 @@
|
|
2864 |
"shape": [
|
2865 |
320,
|
2866 |
320,
|
2867 |
-
|
2868 |
],
|
2869 |
-
"n_bytes":
|
2870 |
"dtype": "torch.int16"
|
2871 |
}
|
2872 |
},
|
2873 |
"quant_format": "exl3",
|
2874 |
-
"bits_per_weight":
|
2875 |
},
|
2876 |
"model.layers.11.post_attention_layernorm": {
|
2877 |
"stored_tensors": {
|
@@ -2904,14 +2906,14 @@
|
|
2904 |
"shape": [
|
2905 |
320,
|
2906 |
1728,
|
2907 |
-
|
2908 |
],
|
2909 |
-
"n_bytes":
|
2910 |
"dtype": "torch.int16"
|
2911 |
}
|
2912 |
},
|
2913 |
"quant_format": "exl3",
|
2914 |
-
"bits_per_weight":
|
2915 |
},
|
2916 |
"model.layers.11.mlp.gate_proj": {
|
2917 |
"stored_tensors": {
|
@@ -3255,14 +3257,14 @@
|
|
3255 |
"shape": [
|
3256 |
320,
|
3257 |
320,
|
3258 |
-
|
3259 |
],
|
3260 |
-
"n_bytes":
|
3261 |
"dtype": "torch.int16"
|
3262 |
}
|
3263 |
},
|
3264 |
"quant_format": "exl3",
|
3265 |
-
"bits_per_weight":
|
3266 |
},
|
3267 |
"model.layers.13.self_attn.k_proj": {
|
3268 |
"stored_tensors": {
|
@@ -3291,14 +3293,14 @@
|
|
3291 |
"shape": [
|
3292 |
320,
|
3293 |
64,
|
3294 |
-
|
3295 |
],
|
3296 |
-
"n_bytes":
|
3297 |
"dtype": "torch.int16"
|
3298 |
}
|
3299 |
},
|
3300 |
"quant_format": "exl3",
|
3301 |
-
"bits_per_weight":
|
3302 |
},
|
3303 |
"model.layers.13.self_attn.v_proj": {
|
3304 |
"stored_tensors": {
|
@@ -3356,14 +3358,14 @@
|
|
3356 |
"shape": [
|
3357 |
320,
|
3358 |
320,
|
3359 |
-
|
3360 |
],
|
3361 |
-
"n_bytes":
|
3362 |
"dtype": "torch.int16"
|
3363 |
}
|
3364 |
},
|
3365 |
"quant_format": "exl3",
|
3366 |
-
"bits_per_weight":
|
3367 |
},
|
3368 |
"model.layers.13.post_attention_layernorm": {
|
3369 |
"stored_tensors": {
|
@@ -3396,14 +3398,14 @@
|
|
3396 |
"shape": [
|
3397 |
320,
|
3398 |
1728,
|
3399 |
-
|
3400 |
],
|
3401 |
-
"n_bytes":
|
3402 |
"dtype": "torch.int16"
|
3403 |
}
|
3404 |
},
|
3405 |
"quant_format": "exl3",
|
3406 |
-
"bits_per_weight":
|
3407 |
},
|
3408 |
"model.layers.13.mlp.gate_proj": {
|
3409 |
"stored_tensors": {
|
@@ -3501,14 +3503,14 @@
|
|
3501 |
"shape": [
|
3502 |
320,
|
3503 |
320,
|
3504 |
-
|
3505 |
],
|
3506 |
-
"n_bytes":
|
3507 |
"dtype": "torch.int16"
|
3508 |
}
|
3509 |
},
|
3510 |
"quant_format": "exl3",
|
3511 |
-
"bits_per_weight":
|
3512 |
},
|
3513 |
"model.layers.14.self_attn.k_proj": {
|
3514 |
"stored_tensors": {
|
@@ -3537,14 +3539,14 @@
|
|
3537 |
"shape": [
|
3538 |
320,
|
3539 |
64,
|
3540 |
-
|
3541 |
],
|
3542 |
-
"n_bytes":
|
3543 |
"dtype": "torch.int16"
|
3544 |
}
|
3545 |
},
|
3546 |
"quant_format": "exl3",
|
3547 |
-
"bits_per_weight":
|
3548 |
},
|
3549 |
"model.layers.14.self_attn.v_proj": {
|
3550 |
"stored_tensors": {
|
@@ -3573,14 +3575,14 @@
|
|
3573 |
"shape": [
|
3574 |
320,
|
3575 |
64,
|
3576 |
-
|
3577 |
],
|
3578 |
-
"n_bytes":
|
3579 |
"dtype": "torch.int16"
|
3580 |
}
|
3581 |
},
|
3582 |
"quant_format": "exl3",
|
3583 |
-
"bits_per_weight":
|
3584 |
},
|
3585 |
"model.layers.14.self_attn.o_proj": {
|
3586 |
"stored_tensors": {
|
@@ -3602,14 +3604,14 @@
|
|
3602 |
"shape": [
|
3603 |
320,
|
3604 |
320,
|
3605 |
-
|
3606 |
],
|
3607 |
-
"n_bytes":
|
3608 |
"dtype": "torch.int16"
|
3609 |
}
|
3610 |
},
|
3611 |
"quant_format": "exl3",
|
3612 |
-
"bits_per_weight":
|
3613 |
},
|
3614 |
"model.layers.14.post_attention_layernorm": {
|
3615 |
"stored_tensors": {
|
@@ -3642,14 +3644,14 @@
|
|
3642 |
"shape": [
|
3643 |
320,
|
3644 |
1728,
|
3645 |
-
|
3646 |
],
|
3647 |
-
"n_bytes":
|
3648 |
"dtype": "torch.int16"
|
3649 |
}
|
3650 |
},
|
3651 |
"quant_format": "exl3",
|
3652 |
-
"bits_per_weight":
|
3653 |
},
|
3654 |
"model.layers.14.mlp.gate_proj": {
|
3655 |
"stored_tensors": {
|
@@ -3747,14 +3749,14 @@
|
|
3747 |
"shape": [
|
3748 |
320,
|
3749 |
320,
|
3750 |
-
|
3751 |
],
|
3752 |
-
"n_bytes":
|
3753 |
"dtype": "torch.int16"
|
3754 |
}
|
3755 |
},
|
3756 |
"quant_format": "exl3",
|
3757 |
-
"bits_per_weight":
|
3758 |
},
|
3759 |
"model.layers.15.self_attn.k_proj": {
|
3760 |
"stored_tensors": {
|
@@ -3783,14 +3785,14 @@
|
|
3783 |
"shape": [
|
3784 |
320,
|
3785 |
64,
|
3786 |
-
|
3787 |
],
|
3788 |
-
"n_bytes":
|
3789 |
"dtype": "torch.int16"
|
3790 |
}
|
3791 |
},
|
3792 |
"quant_format": "exl3",
|
3793 |
-
"bits_per_weight":
|
3794 |
},
|
3795 |
"model.layers.15.self_attn.v_proj": {
|
3796 |
"stored_tensors": {
|
@@ -3819,14 +3821,14 @@
|
|
3819 |
"shape": [
|
3820 |
320,
|
3821 |
64,
|
3822 |
-
|
3823 |
],
|
3824 |
-
"n_bytes":
|
3825 |
"dtype": "torch.int16"
|
3826 |
}
|
3827 |
},
|
3828 |
"quant_format": "exl3",
|
3829 |
-
"bits_per_weight":
|
3830 |
},
|
3831 |
"model.layers.15.self_attn.o_proj": {
|
3832 |
"stored_tensors": {
|
@@ -3848,14 +3850,14 @@
|
|
3848 |
"shape": [
|
3849 |
320,
|
3850 |
320,
|
3851 |
-
|
3852 |
],
|
3853 |
-
"n_bytes":
|
3854 |
"dtype": "torch.int16"
|
3855 |
}
|
3856 |
},
|
3857 |
"quant_format": "exl3",
|
3858 |
-
"bits_per_weight":
|
3859 |
},
|
3860 |
"model.layers.15.post_attention_layernorm": {
|
3861 |
"stored_tensors": {
|
@@ -3888,14 +3890,14 @@
|
|
3888 |
"shape": [
|
3889 |
320,
|
3890 |
1728,
|
3891 |
-
|
3892 |
],
|
3893 |
-
"n_bytes":
|
3894 |
"dtype": "torch.int16"
|
3895 |
}
|
3896 |
},
|
3897 |
"quant_format": "exl3",
|
3898 |
-
"bits_per_weight":
|
3899 |
},
|
3900 |
"model.layers.15.mlp.gate_proj": {
|
3901 |
"stored_tensors": {
|
@@ -4239,14 +4241,14 @@
|
|
4239 |
"shape": [
|
4240 |
320,
|
4241 |
320,
|
4242 |
-
|
4243 |
],
|
4244 |
-
"n_bytes":
|
4245 |
"dtype": "torch.int16"
|
4246 |
}
|
4247 |
},
|
4248 |
"quant_format": "exl3",
|
4249 |
-
"bits_per_weight":
|
4250 |
},
|
4251 |
"model.layers.17.self_attn.k_proj": {
|
4252 |
"stored_tensors": {
|
@@ -4340,14 +4342,14 @@
|
|
4340 |
"shape": [
|
4341 |
320,
|
4342 |
320,
|
4343 |
-
|
4344 |
],
|
4345 |
-
"n_bytes":
|
4346 |
"dtype": "torch.int16"
|
4347 |
}
|
4348 |
},
|
4349 |
"quant_format": "exl3",
|
4350 |
-
"bits_per_weight":
|
4351 |
},
|
4352 |
"model.layers.17.post_attention_layernorm": {
|
4353 |
"stored_tensors": {
|
@@ -4380,14 +4382,14 @@
|
|
4380 |
"shape": [
|
4381 |
320,
|
4382 |
1728,
|
4383 |
-
|
4384 |
],
|
4385 |
-
"n_bytes":
|
4386 |
"dtype": "torch.int16"
|
4387 |
}
|
4388 |
},
|
4389 |
"quant_format": "exl3",
|
4390 |
-
"bits_per_weight":
|
4391 |
},
|
4392 |
"model.layers.17.mlp.gate_proj": {
|
4393 |
"stored_tensors": {
|
@@ -4485,14 +4487,14 @@
|
|
4485 |
"shape": [
|
4486 |
320,
|
4487 |
320,
|
4488 |
-
|
4489 |
],
|
4490 |
-
"n_bytes":
|
4491 |
"dtype": "torch.int16"
|
4492 |
}
|
4493 |
},
|
4494 |
"quant_format": "exl3",
|
4495 |
-
"bits_per_weight":
|
4496 |
},
|
4497 |
"model.layers.18.self_attn.k_proj": {
|
4498 |
"stored_tensors": {
|
@@ -4521,14 +4523,14 @@
|
|
4521 |
"shape": [
|
4522 |
320,
|
4523 |
64,
|
4524 |
-
|
4525 |
],
|
4526 |
-
"n_bytes":
|
4527 |
"dtype": "torch.int16"
|
4528 |
}
|
4529 |
},
|
4530 |
"quant_format": "exl3",
|
4531 |
-
"bits_per_weight":
|
4532 |
},
|
4533 |
"model.layers.18.self_attn.v_proj": {
|
4534 |
"stored_tensors": {
|
@@ -4586,14 +4588,14 @@
|
|
4586 |
"shape": [
|
4587 |
320,
|
4588 |
320,
|
4589 |
-
|
4590 |
],
|
4591 |
-
"n_bytes":
|
4592 |
"dtype": "torch.int16"
|
4593 |
}
|
4594 |
},
|
4595 |
"quant_format": "exl3",
|
4596 |
-
"bits_per_weight":
|
4597 |
},
|
4598 |
"model.layers.18.post_attention_layernorm": {
|
4599 |
"stored_tensors": {
|
@@ -4626,14 +4628,14 @@
|
|
4626 |
"shape": [
|
4627 |
320,
|
4628 |
1728,
|
4629 |
-
|
4630 |
],
|
4631 |
-
"n_bytes":
|
4632 |
"dtype": "torch.int16"
|
4633 |
}
|
4634 |
},
|
4635 |
"quant_format": "exl3",
|
4636 |
-
"bits_per_weight":
|
4637 |
},
|
4638 |
"model.layers.18.mlp.gate_proj": {
|
4639 |
"stored_tensors": {
|
@@ -4977,14 +4979,14 @@
|
|
4977 |
"shape": [
|
4978 |
320,
|
4979 |
320,
|
4980 |
-
|
4981 |
],
|
4982 |
-
"n_bytes":
|
4983 |
"dtype": "torch.int16"
|
4984 |
}
|
4985 |
},
|
4986 |
"quant_format": "exl3",
|
4987 |
-
"bits_per_weight":
|
4988 |
},
|
4989 |
"model.layers.20.self_attn.k_proj": {
|
4990 |
"stored_tensors": {
|
@@ -5078,14 +5080,14 @@
|
|
5078 |
"shape": [
|
5079 |
320,
|
5080 |
320,
|
5081 |
-
|
5082 |
],
|
5083 |
-
"n_bytes":
|
5084 |
"dtype": "torch.int16"
|
5085 |
}
|
5086 |
},
|
5087 |
"quant_format": "exl3",
|
5088 |
-
"bits_per_weight":
|
5089 |
},
|
5090 |
"model.layers.20.post_attention_layernorm": {
|
5091 |
"stored_tensors": {
|
@@ -5118,14 +5120,14 @@
|
|
5118 |
"shape": [
|
5119 |
320,
|
5120 |
1728,
|
5121 |
-
|
5122 |
],
|
5123 |
-
"n_bytes":
|
5124 |
"dtype": "torch.int16"
|
5125 |
}
|
5126 |
},
|
5127 |
"quant_format": "exl3",
|
5128 |
-
"bits_per_weight":
|
5129 |
},
|
5130 |
"model.layers.20.mlp.gate_proj": {
|
5131 |
"stored_tensors": {
|
@@ -5223,14 +5225,14 @@
|
|
5223 |
"shape": [
|
5224 |
320,
|
5225 |
320,
|
5226 |
-
|
5227 |
],
|
5228 |
-
"n_bytes":
|
5229 |
"dtype": "torch.int16"
|
5230 |
}
|
5231 |
},
|
5232 |
"quant_format": "exl3",
|
5233 |
-
"bits_per_weight":
|
5234 |
},
|
5235 |
"model.layers.21.self_attn.k_proj": {
|
5236 |
"stored_tensors": {
|
@@ -5259,14 +5261,14 @@
|
|
5259 |
"shape": [
|
5260 |
320,
|
5261 |
64,
|
5262 |
-
|
5263 |
],
|
5264 |
-
"n_bytes":
|
5265 |
"dtype": "torch.int16"
|
5266 |
}
|
5267 |
},
|
5268 |
"quant_format": "exl3",
|
5269 |
-
"bits_per_weight":
|
5270 |
},
|
5271 |
"model.layers.21.self_attn.v_proj": {
|
5272 |
"stored_tensors": {
|
@@ -5324,14 +5326,14 @@
|
|
5324 |
"shape": [
|
5325 |
320,
|
5326 |
320,
|
5327 |
-
|
5328 |
],
|
5329 |
-
"n_bytes":
|
5330 |
"dtype": "torch.int16"
|
5331 |
}
|
5332 |
},
|
5333 |
"quant_format": "exl3",
|
5334 |
-
"bits_per_weight":
|
5335 |
},
|
5336 |
"model.layers.21.post_attention_layernorm": {
|
5337 |
"stored_tensors": {
|
@@ -5364,14 +5366,14 @@
|
|
5364 |
"shape": [
|
5365 |
320,
|
5366 |
1728,
|
5367 |
-
|
5368 |
],
|
5369 |
-
"n_bytes":
|
5370 |
"dtype": "torch.int16"
|
5371 |
}
|
5372 |
},
|
5373 |
"quant_format": "exl3",
|
5374 |
-
"bits_per_weight":
|
5375 |
},
|
5376 |
"model.layers.21.mlp.gate_proj": {
|
5377 |
"stored_tensors": {
|
@@ -5751,14 +5753,14 @@
|
|
5751 |
"shape": [
|
5752 |
320,
|
5753 |
64,
|
5754 |
-
|
5755 |
],
|
5756 |
-
"n_bytes":
|
5757 |
"dtype": "torch.int16"
|
5758 |
}
|
5759 |
},
|
5760 |
"quant_format": "exl3",
|
5761 |
-
"bits_per_weight":
|
5762 |
},
|
5763 |
"model.layers.23.self_attn.v_proj": {
|
5764 |
"stored_tensors": {
|
@@ -5787,14 +5789,14 @@
|
|
5787 |
"shape": [
|
5788 |
320,
|
5789 |
64,
|
5790 |
-
|
5791 |
],
|
5792 |
-
"n_bytes":
|
5793 |
"dtype": "torch.int16"
|
5794 |
}
|
5795 |
},
|
5796 |
"quant_format": "exl3",
|
5797 |
-
"bits_per_weight":
|
5798 |
},
|
5799 |
"model.layers.23.self_attn.o_proj": {
|
5800 |
"stored_tensors": {
|
@@ -6489,14 +6491,14 @@
|
|
6489 |
"shape": [
|
6490 |
320,
|
6491 |
64,
|
6492 |
-
|
6493 |
],
|
6494 |
-
"n_bytes":
|
6495 |
"dtype": "torch.int16"
|
6496 |
}
|
6497 |
},
|
6498 |
"quant_format": "exl3",
|
6499 |
-
"bits_per_weight":
|
6500 |
},
|
6501 |
"model.layers.26.self_attn.v_proj": {
|
6502 |
"stored_tensors": {
|
@@ -6525,14 +6527,14 @@
|
|
6525 |
"shape": [
|
6526 |
320,
|
6527 |
64,
|
6528 |
-
|
6529 |
],
|
6530 |
-
"n_bytes":
|
6531 |
"dtype": "torch.int16"
|
6532 |
}
|
6533 |
},
|
6534 |
"quant_format": "exl3",
|
6535 |
-
"bits_per_weight":
|
6536 |
},
|
6537 |
"model.layers.26.self_attn.o_proj": {
|
6538 |
"stored_tensors": {
|
@@ -7227,14 +7229,14 @@
|
|
7227 |
"shape": [
|
7228 |
320,
|
7229 |
64,
|
7230 |
-
|
7231 |
],
|
7232 |
-
"n_bytes":
|
7233 |
"dtype": "torch.int16"
|
7234 |
}
|
7235 |
},
|
7236 |
"quant_format": "exl3",
|
7237 |
-
"bits_per_weight":
|
7238 |
},
|
7239 |
"model.layers.29.self_attn.v_proj": {
|
7240 |
"stored_tensors": {
|
@@ -7683,14 +7685,14 @@
|
|
7683 |
"shape": [
|
7684 |
320,
|
7685 |
320,
|
7686 |
-
|
7687 |
],
|
7688 |
-
"n_bytes":
|
7689 |
"dtype": "torch.int16"
|
7690 |
}
|
7691 |
},
|
7692 |
"quant_format": "exl3",
|
7693 |
-
"bits_per_weight":
|
7694 |
},
|
7695 |
"model.layers.31.self_attn.k_proj": {
|
7696 |
"stored_tensors": {
|
@@ -7719,14 +7721,14 @@
|
|
7719 |
"shape": [
|
7720 |
320,
|
7721 |
64,
|
7722 |
-
|
7723 |
],
|
7724 |
-
"n_bytes":
|
7725 |
"dtype": "torch.int16"
|
7726 |
}
|
7727 |
},
|
7728 |
"quant_format": "exl3",
|
7729 |
-
"bits_per_weight":
|
7730 |
},
|
7731 |
"model.layers.31.self_attn.v_proj": {
|
7732 |
"stored_tensors": {
|
@@ -7755,14 +7757,14 @@
|
|
7755 |
"shape": [
|
7756 |
320,
|
7757 |
64,
|
7758 |
-
|
7759 |
],
|
7760 |
-
"n_bytes":
|
7761 |
"dtype": "torch.int16"
|
7762 |
}
|
7763 |
},
|
7764 |
"quant_format": "exl3",
|
7765 |
-
"bits_per_weight":
|
7766 |
},
|
7767 |
"model.layers.31.self_attn.o_proj": {
|
7768 |
"stored_tensors": {
|
@@ -7784,14 +7786,14 @@
|
|
7784 |
"shape": [
|
7785 |
320,
|
7786 |
320,
|
7787 |
-
|
7788 |
],
|
7789 |
-
"n_bytes":
|
7790 |
"dtype": "torch.int16"
|
7791 |
}
|
7792 |
},
|
7793 |
"quant_format": "exl3",
|
7794 |
-
"bits_per_weight":
|
7795 |
},
|
7796 |
"model.layers.31.post_attention_layernorm": {
|
7797 |
"stored_tensors": {
|
@@ -7824,14 +7826,14 @@
|
|
7824 |
"shape": [
|
7825 |
320,
|
7826 |
1728,
|
7827 |
-
|
7828 |
],
|
7829 |
-
"n_bytes":
|
7830 |
"dtype": "torch.int16"
|
7831 |
}
|
7832 |
},
|
7833 |
"quant_format": "exl3",
|
7834 |
-
"bits_per_weight":
|
7835 |
},
|
7836 |
"model.layers.31.mlp.gate_proj": {
|
7837 |
"stored_tensors": {
|
@@ -7929,14 +7931,14 @@
|
|
7929 |
"shape": [
|
7930 |
320,
|
7931 |
320,
|
7932 |
-
|
7933 |
],
|
7934 |
-
"n_bytes":
|
7935 |
"dtype": "torch.int16"
|
7936 |
}
|
7937 |
},
|
7938 |
"quant_format": "exl3",
|
7939 |
-
"bits_per_weight":
|
7940 |
},
|
7941 |
"model.layers.32.self_attn.k_proj": {
|
7942 |
"stored_tensors": {
|
@@ -8030,14 +8032,14 @@
|
|
8030 |
"shape": [
|
8031 |
320,
|
8032 |
320,
|
8033 |
-
|
8034 |
],
|
8035 |
-
"n_bytes":
|
8036 |
"dtype": "torch.int16"
|
8037 |
}
|
8038 |
},
|
8039 |
"quant_format": "exl3",
|
8040 |
-
"bits_per_weight":
|
8041 |
},
|
8042 |
"model.layers.32.post_attention_layernorm": {
|
8043 |
"stored_tensors": {
|
@@ -8070,14 +8072,14 @@
|
|
8070 |
"shape": [
|
8071 |
320,
|
8072 |
1728,
|
8073 |
-
|
8074 |
],
|
8075 |
-
"n_bytes":
|
8076 |
"dtype": "torch.int16"
|
8077 |
}
|
8078 |
},
|
8079 |
"quant_format": "exl3",
|
8080 |
-
"bits_per_weight":
|
8081 |
},
|
8082 |
"model.layers.32.mlp.gate_proj": {
|
8083 |
"stored_tensors": {
|
@@ -8421,14 +8423,14 @@
|
|
8421 |
"shape": [
|
8422 |
320,
|
8423 |
320,
|
8424 |
-
|
8425 |
],
|
8426 |
-
"n_bytes":
|
8427 |
"dtype": "torch.int16"
|
8428 |
}
|
8429 |
},
|
8430 |
"quant_format": "exl3",
|
8431 |
-
"bits_per_weight":
|
8432 |
},
|
8433 |
"model.layers.34.self_attn.k_proj": {
|
8434 |
"stored_tensors": {
|
@@ -8457,14 +8459,14 @@
|
|
8457 |
"shape": [
|
8458 |
320,
|
8459 |
64,
|
8460 |
-
|
8461 |
],
|
8462 |
-
"n_bytes":
|
8463 |
"dtype": "torch.int16"
|
8464 |
}
|
8465 |
},
|
8466 |
"quant_format": "exl3",
|
8467 |
-
"bits_per_weight":
|
8468 |
},
|
8469 |
"model.layers.34.self_attn.v_proj": {
|
8470 |
"stored_tensors": {
|
@@ -8522,14 +8524,14 @@
|
|
8522 |
"shape": [
|
8523 |
320,
|
8524 |
320,
|
8525 |
-
|
8526 |
],
|
8527 |
-
"n_bytes":
|
8528 |
"dtype": "torch.int16"
|
8529 |
}
|
8530 |
},
|
8531 |
"quant_format": "exl3",
|
8532 |
-
"bits_per_weight":
|
8533 |
},
|
8534 |
"model.layers.34.post_attention_layernorm": {
|
8535 |
"stored_tensors": {
|
@@ -8562,14 +8564,14 @@
|
|
8562 |
"shape": [
|
8563 |
320,
|
8564 |
1728,
|
8565 |
-
|
8566 |
],
|
8567 |
-
"n_bytes":
|
8568 |
"dtype": "torch.int16"
|
8569 |
}
|
8570 |
},
|
8571 |
"quant_format": "exl3",
|
8572 |
-
"bits_per_weight":
|
8573 |
},
|
8574 |
"model.layers.34.mlp.gate_proj": {
|
8575 |
"stored_tensors": {
|
@@ -8667,14 +8669,14 @@
|
|
8667 |
"shape": [
|
8668 |
320,
|
8669 |
320,
|
8670 |
-
|
8671 |
],
|
8672 |
-
"n_bytes":
|
8673 |
"dtype": "torch.int16"
|
8674 |
}
|
8675 |
},
|
8676 |
"quant_format": "exl3",
|
8677 |
-
"bits_per_weight":
|
8678 |
},
|
8679 |
"model.layers.35.self_attn.k_proj": {
|
8680 |
"stored_tensors": {
|
@@ -8768,14 +8770,14 @@
|
|
8768 |
"shape": [
|
8769 |
320,
|
8770 |
320,
|
8771 |
-
|
8772 |
],
|
8773 |
-
"n_bytes":
|
8774 |
"dtype": "torch.int16"
|
8775 |
}
|
8776 |
},
|
8777 |
"quant_format": "exl3",
|
8778 |
-
"bits_per_weight":
|
8779 |
},
|
8780 |
"model.layers.35.post_attention_layernorm": {
|
8781 |
"stored_tensors": {
|
@@ -8808,14 +8810,14 @@
|
|
8808 |
"shape": [
|
8809 |
320,
|
8810 |
1728,
|
8811 |
-
|
8812 |
],
|
8813 |
-
"n_bytes":
|
8814 |
"dtype": "torch.int16"
|
8815 |
}
|
8816 |
},
|
8817 |
"quant_format": "exl3",
|
8818 |
-
"bits_per_weight":
|
8819 |
},
|
8820 |
"model.layers.35.mlp.gate_proj": {
|
8821 |
"stored_tensors": {
|
@@ -9159,14 +9161,14 @@
|
|
9159 |
"shape": [
|
9160 |
320,
|
9161 |
320,
|
9162 |
-
|
9163 |
],
|
9164 |
-
"n_bytes":
|
9165 |
"dtype": "torch.int16"
|
9166 |
}
|
9167 |
},
|
9168 |
"quant_format": "exl3",
|
9169 |
-
"bits_per_weight":
|
9170 |
},
|
9171 |
"model.layers.37.self_attn.k_proj": {
|
9172 |
"stored_tensors": {
|
@@ -9195,14 +9197,14 @@
|
|
9195 |
"shape": [
|
9196 |
320,
|
9197 |
64,
|
9198 |
-
|
9199 |
],
|
9200 |
-
"n_bytes":
|
9201 |
"dtype": "torch.int16"
|
9202 |
}
|
9203 |
},
|
9204 |
"quant_format": "exl3",
|
9205 |
-
"bits_per_weight":
|
9206 |
},
|
9207 |
"model.layers.37.self_attn.v_proj": {
|
9208 |
"stored_tensors": {
|
@@ -9260,14 +9262,14 @@
|
|
9260 |
"shape": [
|
9261 |
320,
|
9262 |
320,
|
9263 |
-
|
9264 |
],
|
9265 |
-
"n_bytes":
|
9266 |
"dtype": "torch.int16"
|
9267 |
}
|
9268 |
},
|
9269 |
"quant_format": "exl3",
|
9270 |
-
"bits_per_weight":
|
9271 |
},
|
9272 |
"model.layers.37.post_attention_layernorm": {
|
9273 |
"stored_tensors": {
|
@@ -9300,14 +9302,14 @@
|
|
9300 |
"shape": [
|
9301 |
320,
|
9302 |
1728,
|
9303 |
-
|
9304 |
],
|
9305 |
-
"n_bytes":
|
9306 |
"dtype": "torch.int16"
|
9307 |
}
|
9308 |
},
|
9309 |
"quant_format": "exl3",
|
9310 |
-
"bits_per_weight":
|
9311 |
},
|
9312 |
"model.layers.37.mlp.gate_proj": {
|
9313 |
"stored_tensors": {
|
@@ -9405,14 +9407,14 @@
|
|
9405 |
"shape": [
|
9406 |
320,
|
9407 |
320,
|
9408 |
-
|
9409 |
],
|
9410 |
-
"n_bytes":
|
9411 |
"dtype": "torch.int16"
|
9412 |
}
|
9413 |
},
|
9414 |
"quant_format": "exl3",
|
9415 |
-
"bits_per_weight":
|
9416 |
},
|
9417 |
"model.layers.38.self_attn.k_proj": {
|
9418 |
"stored_tensors": {
|
@@ -9441,14 +9443,14 @@
|
|
9441 |
"shape": [
|
9442 |
320,
|
9443 |
64,
|
9444 |
-
|
9445 |
],
|
9446 |
-
"n_bytes":
|
9447 |
"dtype": "torch.int16"
|
9448 |
}
|
9449 |
},
|
9450 |
"quant_format": "exl3",
|
9451 |
-
"bits_per_weight":
|
9452 |
},
|
9453 |
"model.layers.38.self_attn.v_proj": {
|
9454 |
"stored_tensors": {
|
@@ -9477,14 +9479,14 @@
|
|
9477 |
"shape": [
|
9478 |
320,
|
9479 |
64,
|
9480 |
-
|
9481 |
],
|
9482 |
-
"n_bytes":
|
9483 |
"dtype": "torch.int16"
|
9484 |
}
|
9485 |
},
|
9486 |
"quant_format": "exl3",
|
9487 |
-
"bits_per_weight":
|
9488 |
},
|
9489 |
"model.layers.38.self_attn.o_proj": {
|
9490 |
"stored_tensors": {
|
@@ -9506,14 +9508,14 @@
|
|
9506 |
"shape": [
|
9507 |
320,
|
9508 |
320,
|
9509 |
-
|
9510 |
],
|
9511 |
-
"n_bytes":
|
9512 |
"dtype": "torch.int16"
|
9513 |
}
|
9514 |
},
|
9515 |
"quant_format": "exl3",
|
9516 |
-
"bits_per_weight":
|
9517 |
},
|
9518 |
"model.layers.38.post_attention_layernorm": {
|
9519 |
"stored_tensors": {
|
@@ -9546,14 +9548,14 @@
|
|
9546 |
"shape": [
|
9547 |
320,
|
9548 |
1728,
|
9549 |
-
|
9550 |
],
|
9551 |
-
"n_bytes":
|
9552 |
"dtype": "torch.int16"
|
9553 |
}
|
9554 |
},
|
9555 |
"quant_format": "exl3",
|
9556 |
-
"bits_per_weight":
|
9557 |
},
|
9558 |
"model.layers.38.mlp.gate_proj": {
|
9559 |
"stored_tensors": {
|
@@ -9651,14 +9653,14 @@
|
|
9651 |
"shape": [
|
9652 |
320,
|
9653 |
320,
|
9654 |
-
|
9655 |
],
|
9656 |
-
"n_bytes":
|
9657 |
"dtype": "torch.int16"
|
9658 |
}
|
9659 |
},
|
9660 |
"quant_format": "exl3",
|
9661 |
-
"bits_per_weight":
|
9662 |
},
|
9663 |
"model.layers.39.self_attn.k_proj": {
|
9664 |
"stored_tensors": {
|
@@ -9687,14 +9689,14 @@
|
|
9687 |
"shape": [
|
9688 |
320,
|
9689 |
64,
|
9690 |
-
|
9691 |
],
|
9692 |
-
"n_bytes":
|
9693 |
"dtype": "torch.int16"
|
9694 |
}
|
9695 |
},
|
9696 |
"quant_format": "exl3",
|
9697 |
-
"bits_per_weight":
|
9698 |
},
|
9699 |
"model.layers.39.self_attn.v_proj": {
|
9700 |
"stored_tensors": {
|
@@ -9723,14 +9725,14 @@
|
|
9723 |
"shape": [
|
9724 |
320,
|
9725 |
64,
|
9726 |
-
|
9727 |
],
|
9728 |
-
"n_bytes":
|
9729 |
"dtype": "torch.int16"
|
9730 |
}
|
9731 |
},
|
9732 |
"quant_format": "exl3",
|
9733 |
-
"bits_per_weight":
|
9734 |
},
|
9735 |
"model.layers.39.self_attn.o_proj": {
|
9736 |
"stored_tensors": {
|
@@ -9752,14 +9754,14 @@
|
|
9752 |
"shape": [
|
9753 |
320,
|
9754 |
320,
|
9755 |
-
|
9756 |
],
|
9757 |
-
"n_bytes":
|
9758 |
"dtype": "torch.int16"
|
9759 |
}
|
9760 |
},
|
9761 |
"quant_format": "exl3",
|
9762 |
-
"bits_per_weight":
|
9763 |
},
|
9764 |
"model.layers.39.post_attention_layernorm": {
|
9765 |
"stored_tensors": {
|
@@ -9792,14 +9794,14 @@
|
|
9792 |
"shape": [
|
9793 |
320,
|
9794 |
1728,
|
9795 |
-
|
9796 |
],
|
9797 |
-
"n_bytes":
|
9798 |
"dtype": "torch.int16"
|
9799 |
}
|
9800 |
},
|
9801 |
"quant_format": "exl3",
|
9802 |
-
"bits_per_weight":
|
9803 |
},
|
9804 |
"model.layers.39.mlp.gate_proj": {
|
9805 |
"stored_tensors": {
|
@@ -10143,14 +10145,14 @@
|
|
10143 |
"shape": [
|
10144 |
320,
|
10145 |
320,
|
10146 |
-
|
10147 |
],
|
10148 |
-
"n_bytes":
|
10149 |
"dtype": "torch.int16"
|
10150 |
}
|
10151 |
},
|
10152 |
"quant_format": "exl3",
|
10153 |
-
"bits_per_weight":
|
10154 |
},
|
10155 |
"model.layers.41.self_attn.k_proj": {
|
10156 |
"stored_tensors": {
|
@@ -10244,14 +10246,14 @@
|
|
10244 |
"shape": [
|
10245 |
320,
|
10246 |
320,
|
10247 |
-
|
10248 |
],
|
10249 |
-
"n_bytes":
|
10250 |
"dtype": "torch.int16"
|
10251 |
}
|
10252 |
},
|
10253 |
"quant_format": "exl3",
|
10254 |
-
"bits_per_weight":
|
10255 |
},
|
10256 |
"model.layers.41.post_attention_layernorm": {
|
10257 |
"stored_tensors": {
|
@@ -10284,14 +10286,14 @@
|
|
10284 |
"shape": [
|
10285 |
320,
|
10286 |
1728,
|
10287 |
-
|
10288 |
],
|
10289 |
-
"n_bytes":
|
10290 |
"dtype": "torch.int16"
|
10291 |
}
|
10292 |
},
|
10293 |
"quant_format": "exl3",
|
10294 |
-
"bits_per_weight":
|
10295 |
},
|
10296 |
"model.layers.41.mlp.gate_proj": {
|
10297 |
"stored_tensors": {
|
@@ -10389,14 +10391,14 @@
|
|
10389 |
"shape": [
|
10390 |
320,
|
10391 |
320,
|
10392 |
-
|
10393 |
],
|
10394 |
-
"n_bytes":
|
10395 |
"dtype": "torch.int16"
|
10396 |
}
|
10397 |
},
|
10398 |
"quant_format": "exl3",
|
10399 |
-
"bits_per_weight":
|
10400 |
},
|
10401 |
"model.layers.42.self_attn.k_proj": {
|
10402 |
"stored_tensors": {
|
@@ -10425,14 +10427,14 @@
|
|
10425 |
"shape": [
|
10426 |
320,
|
10427 |
64,
|
10428 |
-
|
10429 |
],
|
10430 |
-
"n_bytes":
|
10431 |
"dtype": "torch.int16"
|
10432 |
}
|
10433 |
},
|
10434 |
"quant_format": "exl3",
|
10435 |
-
"bits_per_weight":
|
10436 |
},
|
10437 |
"model.layers.42.self_attn.v_proj": {
|
10438 |
"stored_tensors": {
|
@@ -10490,14 +10492,14 @@
|
|
10490 |
"shape": [
|
10491 |
320,
|
10492 |
320,
|
10493 |
-
|
10494 |
],
|
10495 |
-
"n_bytes":
|
10496 |
"dtype": "torch.int16"
|
10497 |
}
|
10498 |
},
|
10499 |
"quant_format": "exl3",
|
10500 |
-
"bits_per_weight":
|
10501 |
},
|
10502 |
"model.layers.42.post_attention_layernorm": {
|
10503 |
"stored_tensors": {
|
@@ -10530,14 +10532,14 @@
|
|
10530 |
"shape": [
|
10531 |
320,
|
10532 |
1728,
|
10533 |
-
|
10534 |
],
|
10535 |
-
"n_bytes":
|
10536 |
"dtype": "torch.int16"
|
10537 |
}
|
10538 |
},
|
10539 |
"quant_format": "exl3",
|
10540 |
-
"bits_per_weight":
|
10541 |
},
|
10542 |
"model.layers.42.mlp.gate_proj": {
|
10543 |
"stored_tensors": {
|
@@ -10881,14 +10883,14 @@
|
|
10881 |
"shape": [
|
10882 |
320,
|
10883 |
320,
|
10884 |
-
|
10885 |
],
|
10886 |
-
"n_bytes":
|
10887 |
"dtype": "torch.int16"
|
10888 |
}
|
10889 |
},
|
10890 |
"quant_format": "exl3",
|
10891 |
-
"bits_per_weight":
|
10892 |
},
|
10893 |
"model.layers.44.self_attn.k_proj": {
|
10894 |
"stored_tensors": {
|
@@ -10982,14 +10984,14 @@
|
|
10982 |
"shape": [
|
10983 |
320,
|
10984 |
320,
|
10985 |
-
|
10986 |
],
|
10987 |
-
"n_bytes":
|
10988 |
"dtype": "torch.int16"
|
10989 |
}
|
10990 |
},
|
10991 |
"quant_format": "exl3",
|
10992 |
-
"bits_per_weight":
|
10993 |
},
|
10994 |
"model.layers.44.post_attention_layernorm": {
|
10995 |
"stored_tensors": {
|
@@ -11022,14 +11024,14 @@
|
|
11022 |
"shape": [
|
11023 |
320,
|
11024 |
1728,
|
11025 |
-
|
11026 |
],
|
11027 |
-
"n_bytes":
|
11028 |
"dtype": "torch.int16"
|
11029 |
}
|
11030 |
},
|
11031 |
"quant_format": "exl3",
|
11032 |
-
"bits_per_weight":
|
11033 |
},
|
11034 |
"model.layers.44.mlp.gate_proj": {
|
11035 |
"stored_tensors": {
|
@@ -11127,14 +11129,14 @@
|
|
11127 |
"shape": [
|
11128 |
320,
|
11129 |
320,
|
11130 |
-
|
11131 |
],
|
11132 |
-
"n_bytes":
|
11133 |
"dtype": "torch.int16"
|
11134 |
}
|
11135 |
},
|
11136 |
"quant_format": "exl3",
|
11137 |
-
"bits_per_weight":
|
11138 |
},
|
11139 |
"model.layers.45.self_attn.k_proj": {
|
11140 |
"stored_tensors": {
|
@@ -11163,14 +11165,14 @@
|
|
11163 |
"shape": [
|
11164 |
320,
|
11165 |
64,
|
11166 |
-
|
11167 |
],
|
11168 |
-
"n_bytes":
|
11169 |
"dtype": "torch.int16"
|
11170 |
}
|
11171 |
},
|
11172 |
"quant_format": "exl3",
|
11173 |
-
"bits_per_weight":
|
11174 |
},
|
11175 |
"model.layers.45.self_attn.v_proj": {
|
11176 |
"stored_tensors": {
|
@@ -11228,14 +11230,14 @@
|
|
11228 |
"shape": [
|
11229 |
320,
|
11230 |
320,
|
11231 |
-
|
11232 |
],
|
11233 |
-
"n_bytes":
|
11234 |
"dtype": "torch.int16"
|
11235 |
}
|
11236 |
},
|
11237 |
"quant_format": "exl3",
|
11238 |
-
"bits_per_weight":
|
11239 |
},
|
11240 |
"model.layers.45.post_attention_layernorm": {
|
11241 |
"stored_tensors": {
|
@@ -11268,14 +11270,14 @@
|
|
11268 |
"shape": [
|
11269 |
320,
|
11270 |
1728,
|
11271 |
-
|
11272 |
],
|
11273 |
-
"n_bytes":
|
11274 |
"dtype": "torch.int16"
|
11275 |
}
|
11276 |
},
|
11277 |
"quant_format": "exl3",
|
11278 |
-
"bits_per_weight":
|
11279 |
},
|
11280 |
"model.layers.45.mlp.gate_proj": {
|
11281 |
"stored_tensors": {
|
@@ -11655,14 +11657,14 @@
|
|
11655 |
"shape": [
|
11656 |
320,
|
11657 |
64,
|
11658 |
-
|
11659 |
],
|
11660 |
-
"n_bytes":
|
11661 |
"dtype": "torch.int16"
|
11662 |
}
|
11663 |
},
|
11664 |
"quant_format": "exl3",
|
11665 |
-
"bits_per_weight":
|
11666 |
},
|
11667 |
"model.layers.47.self_attn.v_proj": {
|
11668 |
"stored_tensors": {
|
@@ -11691,14 +11693,14 @@
|
|
11691 |
"shape": [
|
11692 |
320,
|
11693 |
64,
|
11694 |
-
|
11695 |
],
|
11696 |
-
"n_bytes":
|
11697 |
"dtype": "torch.int16"
|
11698 |
}
|
11699 |
},
|
11700 |
"quant_format": "exl3",
|
11701 |
-
"bits_per_weight":
|
11702 |
},
|
11703 |
"model.layers.47.self_attn.o_proj": {
|
11704 |
"stored_tensors": {
|
@@ -12393,14 +12395,14 @@
|
|
12393 |
"shape": [
|
12394 |
320,
|
12395 |
64,
|
12396 |
-
|
12397 |
],
|
12398 |
-
"n_bytes":
|
12399 |
"dtype": "torch.int16"
|
12400 |
}
|
12401 |
},
|
12402 |
"quant_format": "exl3",
|
12403 |
-
"bits_per_weight":
|
12404 |
},
|
12405 |
"model.layers.50.self_attn.v_proj": {
|
12406 |
"stored_tensors": {
|
@@ -12429,14 +12431,14 @@
|
|
12429 |
"shape": [
|
12430 |
320,
|
12431 |
64,
|
12432 |
-
|
12433 |
],
|
12434 |
-
"n_bytes":
|
12435 |
"dtype": "torch.int16"
|
12436 |
}
|
12437 |
},
|
12438 |
"quant_format": "exl3",
|
12439 |
-
"bits_per_weight":
|
12440 |
},
|
12441 |
"model.layers.50.self_attn.o_proj": {
|
12442 |
"stored_tensors": {
|
@@ -13131,14 +13133,14 @@
|
|
13131 |
"shape": [
|
13132 |
320,
|
13133 |
64,
|
13134 |
-
|
13135 |
],
|
13136 |
-
"n_bytes":
|
13137 |
"dtype": "torch.int16"
|
13138 |
}
|
13139 |
},
|
13140 |
"quant_format": "exl3",
|
13141 |
-
"bits_per_weight":
|
13142 |
},
|
13143 |
"model.layers.53.self_attn.v_proj": {
|
13144 |
"stored_tensors": {
|
@@ -13587,14 +13589,14 @@
|
|
13587 |
"shape": [
|
13588 |
320,
|
13589 |
320,
|
13590 |
-
|
13591 |
],
|
13592 |
-
"n_bytes":
|
13593 |
"dtype": "torch.int16"
|
13594 |
}
|
13595 |
},
|
13596 |
"quant_format": "exl3",
|
13597 |
-
"bits_per_weight":
|
13598 |
},
|
13599 |
"model.layers.55.self_attn.k_proj": {
|
13600 |
"stored_tensors": {
|
@@ -13623,14 +13625,14 @@
|
|
13623 |
"shape": [
|
13624 |
320,
|
13625 |
64,
|
13626 |
-
|
13627 |
],
|
13628 |
-
"n_bytes":
|
13629 |
"dtype": "torch.int16"
|
13630 |
}
|
13631 |
},
|
13632 |
"quant_format": "exl3",
|
13633 |
-
"bits_per_weight":
|
13634 |
},
|
13635 |
"model.layers.55.self_attn.v_proj": {
|
13636 |
"stored_tensors": {
|
@@ -13659,14 +13661,14 @@
|
|
13659 |
"shape": [
|
13660 |
320,
|
13661 |
64,
|
13662 |
-
|
13663 |
],
|
13664 |
-
"n_bytes":
|
13665 |
"dtype": "torch.int16"
|
13666 |
}
|
13667 |
},
|
13668 |
"quant_format": "exl3",
|
13669 |
-
"bits_per_weight":
|
13670 |
},
|
13671 |
"model.layers.55.self_attn.o_proj": {
|
13672 |
"stored_tensors": {
|
@@ -13688,14 +13690,14 @@
|
|
13688 |
"shape": [
|
13689 |
320,
|
13690 |
320,
|
13691 |
-
|
13692 |
],
|
13693 |
-
"n_bytes":
|
13694 |
"dtype": "torch.int16"
|
13695 |
}
|
13696 |
},
|
13697 |
"quant_format": "exl3",
|
13698 |
-
"bits_per_weight":
|
13699 |
},
|
13700 |
"model.layers.55.post_attention_layernorm": {
|
13701 |
"stored_tensors": {
|
@@ -13728,14 +13730,14 @@
|
|
13728 |
"shape": [
|
13729 |
320,
|
13730 |
1728,
|
13731 |
-
|
13732 |
],
|
13733 |
-
"n_bytes":
|
13734 |
"dtype": "torch.int16"
|
13735 |
}
|
13736 |
},
|
13737 |
"quant_format": "exl3",
|
13738 |
-
"bits_per_weight":
|
13739 |
},
|
13740 |
"model.layers.55.mlp.gate_proj": {
|
13741 |
"stored_tensors": {
|
@@ -13833,14 +13835,14 @@
|
|
13833 |
"shape": [
|
13834 |
320,
|
13835 |
320,
|
13836 |
-
|
13837 |
],
|
13838 |
-
"n_bytes":
|
13839 |
"dtype": "torch.int16"
|
13840 |
}
|
13841 |
},
|
13842 |
"quant_format": "exl3",
|
13843 |
-
"bits_per_weight":
|
13844 |
},
|
13845 |
"model.layers.56.self_attn.k_proj": {
|
13846 |
"stored_tensors": {
|
@@ -13934,14 +13936,14 @@
|
|
13934 |
"shape": [
|
13935 |
320,
|
13936 |
320,
|
13937 |
-
|
13938 |
],
|
13939 |
-
"n_bytes":
|
13940 |
"dtype": "torch.int16"
|
13941 |
}
|
13942 |
},
|
13943 |
"quant_format": "exl3",
|
13944 |
-
"bits_per_weight":
|
13945 |
},
|
13946 |
"model.layers.56.post_attention_layernorm": {
|
13947 |
"stored_tensors": {
|
@@ -13974,14 +13976,14 @@
|
|
13974 |
"shape": [
|
13975 |
320,
|
13976 |
1728,
|
13977 |
-
|
13978 |
],
|
13979 |
-
"n_bytes":
|
13980 |
"dtype": "torch.int16"
|
13981 |
}
|
13982 |
},
|
13983 |
"quant_format": "exl3",
|
13984 |
-
"bits_per_weight":
|
13985 |
},
|
13986 |
"model.layers.56.mlp.gate_proj": {
|
13987 |
"stored_tensors": {
|
@@ -14325,14 +14327,14 @@
|
|
14325 |
"shape": [
|
14326 |
320,
|
14327 |
320,
|
14328 |
-
|
14329 |
],
|
14330 |
-
"n_bytes":
|
14331 |
"dtype": "torch.int16"
|
14332 |
}
|
14333 |
},
|
14334 |
"quant_format": "exl3",
|
14335 |
-
"bits_per_weight":
|
14336 |
},
|
14337 |
"model.layers.58.self_attn.k_proj": {
|
14338 |
"stored_tensors": {
|
@@ -14361,14 +14363,14 @@
|
|
14361 |
"shape": [
|
14362 |
320,
|
14363 |
64,
|
14364 |
-
|
14365 |
],
|
14366 |
-
"n_bytes":
|
14367 |
"dtype": "torch.int16"
|
14368 |
}
|
14369 |
},
|
14370 |
"quant_format": "exl3",
|
14371 |
-
"bits_per_weight":
|
14372 |
},
|
14373 |
"model.layers.58.self_attn.v_proj": {
|
14374 |
"stored_tensors": {
|
@@ -14426,14 +14428,14 @@
|
|
14426 |
"shape": [
|
14427 |
320,
|
14428 |
320,
|
14429 |
-
|
14430 |
],
|
14431 |
-
"n_bytes":
|
14432 |
"dtype": "torch.int16"
|
14433 |
}
|
14434 |
},
|
14435 |
"quant_format": "exl3",
|
14436 |
-
"bits_per_weight":
|
14437 |
},
|
14438 |
"model.layers.58.post_attention_layernorm": {
|
14439 |
"stored_tensors": {
|
@@ -14466,14 +14468,14 @@
|
|
14466 |
"shape": [
|
14467 |
320,
|
14468 |
1728,
|
14469 |
-
|
14470 |
],
|
14471 |
-
"n_bytes":
|
14472 |
"dtype": "torch.int16"
|
14473 |
}
|
14474 |
},
|
14475 |
"quant_format": "exl3",
|
14476 |
-
"bits_per_weight":
|
14477 |
},
|
14478 |
"model.layers.58.mlp.gate_proj": {
|
14479 |
"stored_tensors": {
|
@@ -14571,14 +14573,14 @@
|
|
14571 |
"shape": [
|
14572 |
320,
|
14573 |
320,
|
14574 |
-
|
14575 |
],
|
14576 |
-
"n_bytes":
|
14577 |
"dtype": "torch.int16"
|
14578 |
}
|
14579 |
},
|
14580 |
"quant_format": "exl3",
|
14581 |
-
"bits_per_weight":
|
14582 |
},
|
14583 |
"model.layers.59.self_attn.k_proj": {
|
14584 |
"stored_tensors": {
|
@@ -14672,14 +14674,14 @@
|
|
14672 |
"shape": [
|
14673 |
320,
|
14674 |
320,
|
14675 |
-
|
14676 |
],
|
14677 |
-
"n_bytes":
|
14678 |
"dtype": "torch.int16"
|
14679 |
}
|
14680 |
},
|
14681 |
"quant_format": "exl3",
|
14682 |
-
"bits_per_weight":
|
14683 |
},
|
14684 |
"model.layers.59.post_attention_layernorm": {
|
14685 |
"stored_tensors": {
|
@@ -14712,14 +14714,14 @@
|
|
14712 |
"shape": [
|
14713 |
320,
|
14714 |
1728,
|
14715 |
-
|
14716 |
],
|
14717 |
-
"n_bytes":
|
14718 |
"dtype": "torch.int16"
|
14719 |
}
|
14720 |
},
|
14721 |
"quant_format": "exl3",
|
14722 |
-
"bits_per_weight":
|
14723 |
},
|
14724 |
"model.layers.59.mlp.gate_proj": {
|
14725 |
"stored_tensors": {
|
@@ -15063,14 +15065,14 @@
|
|
15063 |
"shape": [
|
15064 |
320,
|
15065 |
320,
|
15066 |
-
|
15067 |
],
|
15068 |
-
"n_bytes":
|
15069 |
"dtype": "torch.int16"
|
15070 |
}
|
15071 |
},
|
15072 |
"quant_format": "exl3",
|
15073 |
-
"bits_per_weight":
|
15074 |
},
|
15075 |
"model.layers.61.self_attn.k_proj": {
|
15076 |
"stored_tensors": {
|
@@ -15099,14 +15101,14 @@
|
|
15099 |
"shape": [
|
15100 |
320,
|
15101 |
64,
|
15102 |
-
|
15103 |
],
|
15104 |
-
"n_bytes":
|
15105 |
"dtype": "torch.int16"
|
15106 |
}
|
15107 |
},
|
15108 |
"quant_format": "exl3",
|
15109 |
-
"bits_per_weight":
|
15110 |
},
|
15111 |
"model.layers.61.self_attn.v_proj": {
|
15112 |
"stored_tensors": {
|
@@ -15164,14 +15166,14 @@
|
|
15164 |
"shape": [
|
15165 |
320,
|
15166 |
320,
|
15167 |
-
|
15168 |
],
|
15169 |
-
"n_bytes":
|
15170 |
"dtype": "torch.int16"
|
15171 |
}
|
15172 |
},
|
15173 |
"quant_format": "exl3",
|
15174 |
-
"bits_per_weight":
|
15175 |
},
|
15176 |
"model.layers.61.post_attention_layernorm": {
|
15177 |
"stored_tensors": {
|
@@ -15204,14 +15206,14 @@
|
|
15204 |
"shape": [
|
15205 |
320,
|
15206 |
1728,
|
15207 |
-
|
15208 |
],
|
15209 |
-
"n_bytes":
|
15210 |
"dtype": "torch.int16"
|
15211 |
}
|
15212 |
},
|
15213 |
"quant_format": "exl3",
|
15214 |
-
"bits_per_weight":
|
15215 |
},
|
15216 |
"model.layers.61.mlp.gate_proj": {
|
15217 |
"stored_tensors": {
|
@@ -15309,14 +15311,14 @@
|
|
15309 |
"shape": [
|
15310 |
320,
|
15311 |
320,
|
15312 |
-
|
15313 |
],
|
15314 |
-
"n_bytes":
|
15315 |
"dtype": "torch.int16"
|
15316 |
}
|
15317 |
},
|
15318 |
"quant_format": "exl3",
|
15319 |
-
"bits_per_weight":
|
15320 |
},
|
15321 |
"model.layers.62.self_attn.k_proj": {
|
15322 |
"stored_tensors": {
|
@@ -15345,14 +15347,14 @@
|
|
15345 |
"shape": [
|
15346 |
320,
|
15347 |
64,
|
15348 |
-
|
15349 |
],
|
15350 |
-
"n_bytes":
|
15351 |
"dtype": "torch.int16"
|
15352 |
}
|
15353 |
},
|
15354 |
"quant_format": "exl3",
|
15355 |
-
"bits_per_weight":
|
15356 |
},
|
15357 |
"model.layers.62.self_attn.v_proj": {
|
15358 |
"stored_tensors": {
|
@@ -15381,14 +15383,14 @@
|
|
15381 |
"shape": [
|
15382 |
320,
|
15383 |
64,
|
15384 |
-
|
15385 |
],
|
15386 |
-
"n_bytes":
|
15387 |
"dtype": "torch.int16"
|
15388 |
}
|
15389 |
},
|
15390 |
"quant_format": "exl3",
|
15391 |
-
"bits_per_weight":
|
15392 |
},
|
15393 |
"model.layers.62.self_attn.o_proj": {
|
15394 |
"stored_tensors": {
|
@@ -15410,14 +15412,14 @@
|
|
15410 |
"shape": [
|
15411 |
320,
|
15412 |
320,
|
15413 |
-
|
15414 |
],
|
15415 |
-
"n_bytes":
|
15416 |
"dtype": "torch.int16"
|
15417 |
}
|
15418 |
},
|
15419 |
"quant_format": "exl3",
|
15420 |
-
"bits_per_weight":
|
15421 |
},
|
15422 |
"model.layers.62.post_attention_layernorm": {
|
15423 |
"stored_tensors": {
|
@@ -15450,14 +15452,14 @@
|
|
15450 |
"shape": [
|
15451 |
320,
|
15452 |
1728,
|
15453 |
-
|
15454 |
],
|
15455 |
-
"n_bytes":
|
15456 |
"dtype": "torch.int16"
|
15457 |
}
|
15458 |
},
|
15459 |
"quant_format": "exl3",
|
15460 |
-
"bits_per_weight":
|
15461 |
},
|
15462 |
"model.layers.62.mlp.gate_proj": {
|
15463 |
"stored_tensors": {
|
@@ -15555,14 +15557,14 @@
|
|
15555 |
"shape": [
|
15556 |
320,
|
15557 |
320,
|
15558 |
-
|
15559 |
],
|
15560 |
-
"n_bytes":
|
15561 |
"dtype": "torch.int16"
|
15562 |
}
|
15563 |
},
|
15564 |
"quant_format": "exl3",
|
15565 |
-
"bits_per_weight":
|
15566 |
},
|
15567 |
"model.layers.63.self_attn.k_proj": {
|
15568 |
"stored_tensors": {
|
@@ -15591,14 +15593,14 @@
|
|
15591 |
"shape": [
|
15592 |
320,
|
15593 |
64,
|
15594 |
-
|
15595 |
],
|
15596 |
-
"n_bytes":
|
15597 |
"dtype": "torch.int16"
|
15598 |
}
|
15599 |
},
|
15600 |
"quant_format": "exl3",
|
15601 |
-
"bits_per_weight":
|
15602 |
},
|
15603 |
"model.layers.63.self_attn.v_proj": {
|
15604 |
"stored_tensors": {
|
@@ -15627,14 +15629,14 @@
|
|
15627 |
"shape": [
|
15628 |
320,
|
15629 |
64,
|
15630 |
-
|
15631 |
],
|
15632 |
-
"n_bytes":
|
15633 |
"dtype": "torch.int16"
|
15634 |
}
|
15635 |
},
|
15636 |
"quant_format": "exl3",
|
15637 |
-
"bits_per_weight":
|
15638 |
},
|
15639 |
"model.layers.63.self_attn.o_proj": {
|
15640 |
"stored_tensors": {
|
@@ -15656,14 +15658,14 @@
|
|
15656 |
"shape": [
|
15657 |
320,
|
15658 |
320,
|
15659 |
-
|
15660 |
],
|
15661 |
-
"n_bytes":
|
15662 |
"dtype": "torch.int16"
|
15663 |
}
|
15664 |
},
|
15665 |
"quant_format": "exl3",
|
15666 |
-
"bits_per_weight":
|
15667 |
},
|
15668 |
"model.layers.63.post_attention_layernorm": {
|
15669 |
"stored_tensors": {
|
@@ -15696,14 +15698,14 @@
|
|
15696 |
"shape": [
|
15697 |
320,
|
15698 |
1728,
|
15699 |
-
|
15700 |
],
|
15701 |
-
"n_bytes":
|
15702 |
"dtype": "torch.int16"
|
15703 |
}
|
15704 |
},
|
15705 |
"quant_format": "exl3",
|
15706 |
-
"bits_per_weight":
|
15707 |
},
|
15708 |
"model.layers.63.mlp.gate_proj": {
|
15709 |
"stored_tensors": {
|
|
|
1 |
{
|
2 |
"quant_method": "exl3",
|
3 |
+
"version": "0.0.4",
|
4 |
"bits": 4.5,
|
5 |
+
"head_bits": 6,
|
6 |
"calibration": {
|
7 |
"rows": 100,
|
8 |
"cols": 2048
|
9 |
},
|
10 |
+
"out_scales": "auto",
|
11 |
"tensor_storage": {
|
12 |
"model.embed_tokens": {
|
13 |
"stored_tensors": {
|
|
|
587 |
"shape": [
|
588 |
320,
|
589 |
64,
|
590 |
+
80
|
591 |
],
|
592 |
+
"n_bytes": 3276800,
|
593 |
"dtype": "torch.int16"
|
594 |
}
|
595 |
},
|
596 |
"quant_format": "exl3",
|
597 |
+
"bits_per_weight": 5
|
598 |
},
|
599 |
"model.layers.2.self_attn.v_proj": {
|
600 |
"stored_tensors": {
|
|
|
623 |
"shape": [
|
624 |
320,
|
625 |
64,
|
626 |
+
80
|
627 |
],
|
628 |
+
"n_bytes": 3276800,
|
629 |
"dtype": "torch.int16"
|
630 |
}
|
631 |
},
|
632 |
"quant_format": "exl3",
|
633 |
+
"bits_per_weight": 5
|
634 |
},
|
635 |
"model.layers.2.self_attn.o_proj": {
|
636 |
"stored_tensors": {
|
|
|
1325 |
"shape": [
|
1326 |
320,
|
1327 |
64,
|
1328 |
+
96
|
1329 |
],
|
1330 |
+
"n_bytes": 3932160,
|
1331 |
"dtype": "torch.int16"
|
1332 |
}
|
1333 |
},
|
1334 |
"quant_format": "exl3",
|
1335 |
+
"bits_per_weight": 6
|
1336 |
},
|
1337 |
"model.layers.5.self_attn.v_proj": {
|
1338 |
"stored_tensors": {
|
|
|
1781 |
"shape": [
|
1782 |
320,
|
1783 |
320,
|
1784 |
+
80
|
1785 |
],
|
1786 |
+
"n_bytes": 16384000,
|
1787 |
"dtype": "torch.int16"
|
1788 |
}
|
1789 |
},
|
1790 |
"quant_format": "exl3",
|
1791 |
+
"bits_per_weight": 5
|
1792 |
},
|
1793 |
"model.layers.7.self_attn.k_proj": {
|
1794 |
"stored_tensors": {
|
|
|
1817 |
"shape": [
|
1818 |
320,
|
1819 |
64,
|
1820 |
+
96
|
1821 |
],
|
1822 |
+
"n_bytes": 3932160,
|
1823 |
"dtype": "torch.int16"
|
1824 |
}
|
1825 |
},
|
1826 |
"quant_format": "exl3",
|
1827 |
+
"bits_per_weight": 6
|
1828 |
},
|
1829 |
"model.layers.7.self_attn.v_proj": {
|
1830 |
"stored_tensors": {
|
|
|
1853 |
"shape": [
|
1854 |
320,
|
1855 |
64,
|
1856 |
+
96
|
1857 |
],
|
1858 |
+
"n_bytes": 3932160,
|
1859 |
"dtype": "torch.int16"
|
1860 |
}
|
1861 |
},
|
1862 |
"quant_format": "exl3",
|
1863 |
+
"bits_per_weight": 6
|
1864 |
},
|
1865 |
"model.layers.7.self_attn.o_proj": {
|
1866 |
"stored_tensors": {
|
|
|
1882 |
"shape": [
|
1883 |
320,
|
1884 |
320,
|
1885 |
+
80
|
1886 |
],
|
1887 |
+
"n_bytes": 16384000,
|
1888 |
"dtype": "torch.int16"
|
1889 |
}
|
1890 |
},
|
1891 |
"quant_format": "exl3",
|
1892 |
+
"bits_per_weight": 5
|
1893 |
},
|
1894 |
"model.layers.7.post_attention_layernorm": {
|
1895 |
"stored_tensors": {
|
|
|
1922 |
"shape": [
|
1923 |
320,
|
1924 |
1728,
|
1925 |
+
64
|
1926 |
],
|
1927 |
+
"n_bytes": 70778880,
|
1928 |
"dtype": "torch.int16"
|
1929 |
}
|
1930 |
},
|
1931 |
"quant_format": "exl3",
|
1932 |
+
"bits_per_weight": 4
|
1933 |
},
|
1934 |
"model.layers.7.mlp.gate_proj": {
|
1935 |
"stored_tensors": {
|
|
|
2027 |
"shape": [
|
2028 |
320,
|
2029 |
320,
|
2030 |
+
64
|
2031 |
],
|
2032 |
+
"n_bytes": 13107200,
|
2033 |
"dtype": "torch.int16"
|
2034 |
}
|
2035 |
},
|
2036 |
"quant_format": "exl3",
|
2037 |
+
"bits_per_weight": 4
|
2038 |
},
|
2039 |
"model.layers.8.self_attn.k_proj": {
|
2040 |
"stored_tensors": {
|
|
|
2128 |
"shape": [
|
2129 |
320,
|
2130 |
320,
|
2131 |
+
64
|
2132 |
],
|
2133 |
+
"n_bytes": 13107200,
|
2134 |
"dtype": "torch.int16"
|
2135 |
}
|
2136 |
},
|
2137 |
"quant_format": "exl3",
|
2138 |
+
"bits_per_weight": 4
|
2139 |
},
|
2140 |
"model.layers.8.post_attention_layernorm": {
|
2141 |
"stored_tensors": {
|
|
|
2168 |
"shape": [
|
2169 |
320,
|
2170 |
1728,
|
2171 |
+
80
|
2172 |
],
|
2173 |
+
"n_bytes": 88473600,
|
2174 |
"dtype": "torch.int16"
|
2175 |
}
|
2176 |
},
|
2177 |
"quant_format": "exl3",
|
2178 |
+
"bits_per_weight": 5
|
2179 |
},
|
2180 |
"model.layers.8.mlp.gate_proj": {
|
2181 |
"stored_tensors": {
|
|
|
2519 |
"shape": [
|
2520 |
320,
|
2521 |
320,
|
2522 |
+
80
|
2523 |
],
|
2524 |
+
"n_bytes": 16384000,
|
2525 |
"dtype": "torch.int16"
|
2526 |
}
|
2527 |
},
|
2528 |
"quant_format": "exl3",
|
2529 |
+
"bits_per_weight": 5
|
2530 |
},
|
2531 |
"model.layers.10.self_attn.k_proj": {
|
2532 |
"stored_tensors": {
|
|
|
2555 |
"shape": [
|
2556 |
320,
|
2557 |
64,
|
2558 |
+
96
|
2559 |
],
|
2560 |
+
"n_bytes": 3932160,
|
2561 |
"dtype": "torch.int16"
|
2562 |
}
|
2563 |
},
|
2564 |
"quant_format": "exl3",
|
2565 |
+
"bits_per_weight": 6
|
2566 |
},
|
2567 |
"model.layers.10.self_attn.v_proj": {
|
2568 |
"stored_tensors": {
|
|
|
2620 |
"shape": [
|
2621 |
320,
|
2622 |
320,
|
2623 |
+
80
|
2624 |
],
|
2625 |
+
"n_bytes": 16384000,
|
2626 |
"dtype": "torch.int16"
|
2627 |
}
|
2628 |
},
|
2629 |
"quant_format": "exl3",
|
2630 |
+
"bits_per_weight": 5
|
2631 |
},
|
2632 |
"model.layers.10.post_attention_layernorm": {
|
2633 |
"stored_tensors": {
|
|
|
2660 |
"shape": [
|
2661 |
320,
|
2662 |
1728,
|
2663 |
+
64
|
2664 |
],
|
2665 |
+
"n_bytes": 70778880,
|
2666 |
"dtype": "torch.int16"
|
2667 |
}
|
2668 |
},
|
2669 |
"quant_format": "exl3",
|
2670 |
+
"bits_per_weight": 4
|
2671 |
},
|
2672 |
"model.layers.10.mlp.gate_proj": {
|
2673 |
"stored_tensors": {
|
|
|
2765 |
"shape": [
|
2766 |
320,
|
2767 |
320,
|
2768 |
+
64
|
2769 |
],
|
2770 |
+
"n_bytes": 13107200,
|
2771 |
"dtype": "torch.int16"
|
2772 |
}
|
2773 |
},
|
2774 |
"quant_format": "exl3",
|
2775 |
+
"bits_per_weight": 4
|
2776 |
},
|
2777 |
"model.layers.11.self_attn.k_proj": {
|
2778 |
"stored_tensors": {
|
|
|
2866 |
"shape": [
|
2867 |
320,
|
2868 |
320,
|
2869 |
+
64
|
2870 |
],
|
2871 |
+
"n_bytes": 13107200,
|
2872 |
"dtype": "torch.int16"
|
2873 |
}
|
2874 |
},
|
2875 |
"quant_format": "exl3",
|
2876 |
+
"bits_per_weight": 4
|
2877 |
},
|
2878 |
"model.layers.11.post_attention_layernorm": {
|
2879 |
"stored_tensors": {
|
|
|
2906 |
"shape": [
|
2907 |
320,
|
2908 |
1728,
|
2909 |
+
80
|
2910 |
],
|
2911 |
+
"n_bytes": 88473600,
|
2912 |
"dtype": "torch.int16"
|
2913 |
}
|
2914 |
},
|
2915 |
"quant_format": "exl3",
|
2916 |
+
"bits_per_weight": 5
|
2917 |
},
|
2918 |
"model.layers.11.mlp.gate_proj": {
|
2919 |
"stored_tensors": {
|
|
|
3257 |
"shape": [
|
3258 |
320,
|
3259 |
320,
|
3260 |
+
80
|
3261 |
],
|
3262 |
+
"n_bytes": 16384000,
|
3263 |
"dtype": "torch.int16"
|
3264 |
}
|
3265 |
},
|
3266 |
"quant_format": "exl3",
|
3267 |
+
"bits_per_weight": 5
|
3268 |
},
|
3269 |
"model.layers.13.self_attn.k_proj": {
|
3270 |
"stored_tensors": {
|
|
|
3293 |
"shape": [
|
3294 |
320,
|
3295 |
64,
|
3296 |
+
96
|
3297 |
],
|
3298 |
+
"n_bytes": 3932160,
|
3299 |
"dtype": "torch.int16"
|
3300 |
}
|
3301 |
},
|
3302 |
"quant_format": "exl3",
|
3303 |
+
"bits_per_weight": 6
|
3304 |
},
|
3305 |
"model.layers.13.self_attn.v_proj": {
|
3306 |
"stored_tensors": {
|
|
|
3358 |
"shape": [
|
3359 |
320,
|
3360 |
320,
|
3361 |
+
80
|
3362 |
],
|
3363 |
+
"n_bytes": 16384000,
|
3364 |
"dtype": "torch.int16"
|
3365 |
}
|
3366 |
},
|
3367 |
"quant_format": "exl3",
|
3368 |
+
"bits_per_weight": 5
|
3369 |
},
|
3370 |
"model.layers.13.post_attention_layernorm": {
|
3371 |
"stored_tensors": {
|
|
|
3398 |
"shape": [
|
3399 |
320,
|
3400 |
1728,
|
3401 |
+
64
|
3402 |
],
|
3403 |
+
"n_bytes": 70778880,
|
3404 |
"dtype": "torch.int16"
|
3405 |
}
|
3406 |
},
|
3407 |
"quant_format": "exl3",
|
3408 |
+
"bits_per_weight": 4
|
3409 |
},
|
3410 |
"model.layers.13.mlp.gate_proj": {
|
3411 |
"stored_tensors": {
|
|
|
3503 |
"shape": [
|
3504 |
320,
|
3505 |
320,
|
3506 |
+
64
|
3507 |
],
|
3508 |
+
"n_bytes": 13107200,
|
3509 |
"dtype": "torch.int16"
|
3510 |
}
|
3511 |
},
|
3512 |
"quant_format": "exl3",
|
3513 |
+
"bits_per_weight": 4
|
3514 |
},
|
3515 |
"model.layers.14.self_attn.k_proj": {
|
3516 |
"stored_tensors": {
|
|
|
3539 |
"shape": [
|
3540 |
320,
|
3541 |
64,
|
3542 |
+
80
|
3543 |
],
|
3544 |
+
"n_bytes": 3276800,
|
3545 |
"dtype": "torch.int16"
|
3546 |
}
|
3547 |
},
|
3548 |
"quant_format": "exl3",
|
3549 |
+
"bits_per_weight": 5
|
3550 |
},
|
3551 |
"model.layers.14.self_attn.v_proj": {
|
3552 |
"stored_tensors": {
|
|
|
3575 |
"shape": [
|
3576 |
320,
|
3577 |
64,
|
3578 |
+
80
|
3579 |
],
|
3580 |
+
"n_bytes": 3276800,
|
3581 |
"dtype": "torch.int16"
|
3582 |
}
|
3583 |
},
|
3584 |
"quant_format": "exl3",
|
3585 |
+
"bits_per_weight": 5
|
3586 |
},
|
3587 |
"model.layers.14.self_attn.o_proj": {
|
3588 |
"stored_tensors": {
|
|
|
3604 |
"shape": [
|
3605 |
320,
|
3606 |
320,
|
3607 |
+
64
|
3608 |
],
|
3609 |
+
"n_bytes": 13107200,
|
3610 |
"dtype": "torch.int16"
|
3611 |
}
|
3612 |
},
|
3613 |
"quant_format": "exl3",
|
3614 |
+
"bits_per_weight": 4
|
3615 |
},
|
3616 |
"model.layers.14.post_attention_layernorm": {
|
3617 |
"stored_tensors": {
|
|
|
3644 |
"shape": [
|
3645 |
320,
|
3646 |
1728,
|
3647 |
+
80
|
3648 |
],
|
3649 |
+
"n_bytes": 88473600,
|
3650 |
"dtype": "torch.int16"
|
3651 |
}
|
3652 |
},
|
3653 |
"quant_format": "exl3",
|
3654 |
+
"bits_per_weight": 5
|
3655 |
},
|
3656 |
"model.layers.14.mlp.gate_proj": {
|
3657 |
"stored_tensors": {
|
|
|
3749 |
"shape": [
|
3750 |
320,
|
3751 |
320,
|
3752 |
+
80
|
3753 |
],
|
3754 |
+
"n_bytes": 16384000,
|
3755 |
"dtype": "torch.int16"
|
3756 |
}
|
3757 |
},
|
3758 |
"quant_format": "exl3",
|
3759 |
+
"bits_per_weight": 5
|
3760 |
},
|
3761 |
"model.layers.15.self_attn.k_proj": {
|
3762 |
"stored_tensors": {
|
|
|
3785 |
"shape": [
|
3786 |
320,
|
3787 |
64,
|
3788 |
+
96
|
3789 |
],
|
3790 |
+
"n_bytes": 3932160,
|
3791 |
"dtype": "torch.int16"
|
3792 |
}
|
3793 |
},
|
3794 |
"quant_format": "exl3",
|
3795 |
+
"bits_per_weight": 6
|
3796 |
},
|
3797 |
"model.layers.15.self_attn.v_proj": {
|
3798 |
"stored_tensors": {
|
|
|
3821 |
"shape": [
|
3822 |
320,
|
3823 |
64,
|
3824 |
+
96
|
3825 |
],
|
3826 |
+
"n_bytes": 3932160,
|
3827 |
"dtype": "torch.int16"
|
3828 |
}
|
3829 |
},
|
3830 |
"quant_format": "exl3",
|
3831 |
+
"bits_per_weight": 6
|
3832 |
},
|
3833 |
"model.layers.15.self_attn.o_proj": {
|
3834 |
"stored_tensors": {
|
|
|
3850 |
"shape": [
|
3851 |
320,
|
3852 |
320,
|
3853 |
+
80
|
3854 |
],
|
3855 |
+
"n_bytes": 16384000,
|
3856 |
"dtype": "torch.int16"
|
3857 |
}
|
3858 |
},
|
3859 |
"quant_format": "exl3",
|
3860 |
+
"bits_per_weight": 5
|
3861 |
},
|
3862 |
"model.layers.15.post_attention_layernorm": {
|
3863 |
"stored_tensors": {
|
|
|
3890 |
"shape": [
|
3891 |
320,
|
3892 |
1728,
|
3893 |
+
64
|
3894 |
],
|
3895 |
+
"n_bytes": 70778880,
|
3896 |
"dtype": "torch.int16"
|
3897 |
}
|
3898 |
},
|
3899 |
"quant_format": "exl3",
|
3900 |
+
"bits_per_weight": 4
|
3901 |
},
|
3902 |
"model.layers.15.mlp.gate_proj": {
|
3903 |
"stored_tensors": {
|
|
|
4241 |
"shape": [
|
4242 |
320,
|
4243 |
320,
|
4244 |
+
64
|
4245 |
],
|
4246 |
+
"n_bytes": 13107200,
|
4247 |
"dtype": "torch.int16"
|
4248 |
}
|
4249 |
},
|
4250 |
"quant_format": "exl3",
|
4251 |
+
"bits_per_weight": 4
|
4252 |
},
|
4253 |
"model.layers.17.self_attn.k_proj": {
|
4254 |
"stored_tensors": {
|
|
|
4342 |
"shape": [
|
4343 |
320,
|
4344 |
320,
|
4345 |
+
64
|
4346 |
],
|
4347 |
+
"n_bytes": 13107200,
|
4348 |
"dtype": "torch.int16"
|
4349 |
}
|
4350 |
},
|
4351 |
"quant_format": "exl3",
|
4352 |
+
"bits_per_weight": 4
|
4353 |
},
|
4354 |
"model.layers.17.post_attention_layernorm": {
|
4355 |
"stored_tensors": {
|
|
|
4382 |
"shape": [
|
4383 |
320,
|
4384 |
1728,
|
4385 |
+
80
|
4386 |
],
|
4387 |
+
"n_bytes": 88473600,
|
4388 |
"dtype": "torch.int16"
|
4389 |
}
|
4390 |
},
|
4391 |
"quant_format": "exl3",
|
4392 |
+
"bits_per_weight": 5
|
4393 |
},
|
4394 |
"model.layers.17.mlp.gate_proj": {
|
4395 |
"stored_tensors": {
|
|
|
4487 |
"shape": [
|
4488 |
320,
|
4489 |
320,
|
4490 |
+
80
|
4491 |
],
|
4492 |
+
"n_bytes": 16384000,
|
4493 |
"dtype": "torch.int16"
|
4494 |
}
|
4495 |
},
|
4496 |
"quant_format": "exl3",
|
4497 |
+
"bits_per_weight": 5
|
4498 |
},
|
4499 |
"model.layers.18.self_attn.k_proj": {
|
4500 |
"stored_tensors": {
|
|
|
4523 |
"shape": [
|
4524 |
320,
|
4525 |
64,
|
4526 |
+
96
|
4527 |
],
|
4528 |
+
"n_bytes": 3932160,
|
4529 |
"dtype": "torch.int16"
|
4530 |
}
|
4531 |
},
|
4532 |
"quant_format": "exl3",
|
4533 |
+
"bits_per_weight": 6
|
4534 |
},
|
4535 |
"model.layers.18.self_attn.v_proj": {
|
4536 |
"stored_tensors": {
|
|
|
4588 |
"shape": [
|
4589 |
320,
|
4590 |
320,
|
4591 |
+
80
|
4592 |
],
|
4593 |
+
"n_bytes": 16384000,
|
4594 |
"dtype": "torch.int16"
|
4595 |
}
|
4596 |
},
|
4597 |
"quant_format": "exl3",
|
4598 |
+
"bits_per_weight": 5
|
4599 |
},
|
4600 |
"model.layers.18.post_attention_layernorm": {
|
4601 |
"stored_tensors": {
|
|
|
4628 |
"shape": [
|
4629 |
320,
|
4630 |
1728,
|
4631 |
+
64
|
4632 |
],
|
4633 |
+
"n_bytes": 70778880,
|
4634 |
"dtype": "torch.int16"
|
4635 |
}
|
4636 |
},
|
4637 |
"quant_format": "exl3",
|
4638 |
+
"bits_per_weight": 4
|
4639 |
},
|
4640 |
"model.layers.18.mlp.gate_proj": {
|
4641 |
"stored_tensors": {
|
|
|
4979 |
"shape": [
|
4980 |
320,
|
4981 |
320,
|
4982 |
+
64
|
4983 |
],
|
4984 |
+
"n_bytes": 13107200,
|
4985 |
"dtype": "torch.int16"
|
4986 |
}
|
4987 |
},
|
4988 |
"quant_format": "exl3",
|
4989 |
+
"bits_per_weight": 4
|
4990 |
},
|
4991 |
"model.layers.20.self_attn.k_proj": {
|
4992 |
"stored_tensors": {
|
|
|
5080 |
"shape": [
|
5081 |
320,
|
5082 |
320,
|
5083 |
+
64
|
5084 |
],
|
5085 |
+
"n_bytes": 13107200,
|
5086 |
"dtype": "torch.int16"
|
5087 |
}
|
5088 |
},
|
5089 |
"quant_format": "exl3",
|
5090 |
+
"bits_per_weight": 4
|
5091 |
},
|
5092 |
"model.layers.20.post_attention_layernorm": {
|
5093 |
"stored_tensors": {
|
|
|
5120 |
"shape": [
|
5121 |
320,
|
5122 |
1728,
|
5123 |
+
80
|
5124 |
],
|
5125 |
+
"n_bytes": 88473600,
|
5126 |
"dtype": "torch.int16"
|
5127 |
}
|
5128 |
},
|
5129 |
"quant_format": "exl3",
|
5130 |
+
"bits_per_weight": 5
|
5131 |
},
|
5132 |
"model.layers.20.mlp.gate_proj": {
|
5133 |
"stored_tensors": {
|
|
|
5225 |
"shape": [
|
5226 |
320,
|
5227 |
320,
|
5228 |
+
80
|
5229 |
],
|
5230 |
+
"n_bytes": 16384000,
|
5231 |
"dtype": "torch.int16"
|
5232 |
}
|
5233 |
},
|
5234 |
"quant_format": "exl3",
|
5235 |
+
"bits_per_weight": 5
|
5236 |
},
|
5237 |
"model.layers.21.self_attn.k_proj": {
|
5238 |
"stored_tensors": {
|
|
|
5261 |
"shape": [
|
5262 |
320,
|
5263 |
64,
|
5264 |
+
96
|
5265 |
],
|
5266 |
+
"n_bytes": 3932160,
|
5267 |
"dtype": "torch.int16"
|
5268 |
}
|
5269 |
},
|
5270 |
"quant_format": "exl3",
|
5271 |
+
"bits_per_weight": 6
|
5272 |
},
|
5273 |
"model.layers.21.self_attn.v_proj": {
|
5274 |
"stored_tensors": {
|
|
|
5326 |
"shape": [
|
5327 |
320,
|
5328 |
320,
|
5329 |
+
80
|
5330 |
],
|
5331 |
+
"n_bytes": 16384000,
|
5332 |
"dtype": "torch.int16"
|
5333 |
}
|
5334 |
},
|
5335 |
"quant_format": "exl3",
|
5336 |
+
"bits_per_weight": 5
|
5337 |
},
|
5338 |
"model.layers.21.post_attention_layernorm": {
|
5339 |
"stored_tensors": {
|
|
|
5366 |
"shape": [
|
5367 |
320,
|
5368 |
1728,
|
5369 |
+
64
|
5370 |
],
|
5371 |
+
"n_bytes": 70778880,
|
5372 |
"dtype": "torch.int16"
|
5373 |
}
|
5374 |
},
|
5375 |
"quant_format": "exl3",
|
5376 |
+
"bits_per_weight": 4
|
5377 |
},
|
5378 |
"model.layers.21.mlp.gate_proj": {
|
5379 |
"stored_tensors": {
|
|
|
5753 |
"shape": [
|
5754 |
320,
|
5755 |
64,
|
5756 |
+
96
|
5757 |
],
|
5758 |
+
"n_bytes": 3932160,
|
5759 |
"dtype": "torch.int16"
|
5760 |
}
|
5761 |
},
|
5762 |
"quant_format": "exl3",
|
5763 |
+
"bits_per_weight": 6
|
5764 |
},
|
5765 |
"model.layers.23.self_attn.v_proj": {
|
5766 |
"stored_tensors": {
|
|
|
5789 |
"shape": [
|
5790 |
320,
|
5791 |
64,
|
5792 |
+
96
|
5793 |
],
|
5794 |
+
"n_bytes": 3932160,
|
5795 |
"dtype": "torch.int16"
|
5796 |
}
|
5797 |
},
|
5798 |
"quant_format": "exl3",
|
5799 |
+
"bits_per_weight": 6
|
5800 |
},
|
5801 |
"model.layers.23.self_attn.o_proj": {
|
5802 |
"stored_tensors": {
|
|
|
6491 |
"shape": [
|
6492 |
320,
|
6493 |
64,
|
6494 |
+
80
|
6495 |
],
|
6496 |
+
"n_bytes": 3276800,
|
6497 |
"dtype": "torch.int16"
|
6498 |
}
|
6499 |
},
|
6500 |
"quant_format": "exl3",
|
6501 |
+
"bits_per_weight": 5
|
6502 |
},
|
6503 |
"model.layers.26.self_attn.v_proj": {
|
6504 |
"stored_tensors": {
|
|
|
6527 |
"shape": [
|
6528 |
320,
|
6529 |
64,
|
6530 |
+
80
|
6531 |
],
|
6532 |
+
"n_bytes": 3276800,
|
6533 |
"dtype": "torch.int16"
|
6534 |
}
|
6535 |
},
|
6536 |
"quant_format": "exl3",
|
6537 |
+
"bits_per_weight": 5
|
6538 |
},
|
6539 |
"model.layers.26.self_attn.o_proj": {
|
6540 |
"stored_tensors": {
|
|
|
7229 |
"shape": [
|
7230 |
320,
|
7231 |
64,
|
7232 |
+
96
|
7233 |
],
|
7234 |
+
"n_bytes": 3932160,
|
7235 |
"dtype": "torch.int16"
|
7236 |
}
|
7237 |
},
|
7238 |
"quant_format": "exl3",
|
7239 |
+
"bits_per_weight": 6
|
7240 |
},
|
7241 |
"model.layers.29.self_attn.v_proj": {
|
7242 |
"stored_tensors": {
|
|
|
7685 |
"shape": [
|
7686 |
320,
|
7687 |
320,
|
7688 |
+
80
|
7689 |
],
|
7690 |
+
"n_bytes": 16384000,
|
7691 |
"dtype": "torch.int16"
|
7692 |
}
|
7693 |
},
|
7694 |
"quant_format": "exl3",
|
7695 |
+
"bits_per_weight": 5
|
7696 |
},
|
7697 |
"model.layers.31.self_attn.k_proj": {
|
7698 |
"stored_tensors": {
|
|
|
7721 |
"shape": [
|
7722 |
320,
|
7723 |
64,
|
7724 |
+
96
|
7725 |
],
|
7726 |
+
"n_bytes": 3932160,
|
7727 |
"dtype": "torch.int16"
|
7728 |
}
|
7729 |
},
|
7730 |
"quant_format": "exl3",
|
7731 |
+
"bits_per_weight": 6
|
7732 |
},
|
7733 |
"model.layers.31.self_attn.v_proj": {
|
7734 |
"stored_tensors": {
|
|
|
7757 |
"shape": [
|
7758 |
320,
|
7759 |
64,
|
7760 |
+
96
|
7761 |
],
|
7762 |
+
"n_bytes": 3932160,
|
7763 |
"dtype": "torch.int16"
|
7764 |
}
|
7765 |
},
|
7766 |
"quant_format": "exl3",
|
7767 |
+
"bits_per_weight": 6
|
7768 |
},
|
7769 |
"model.layers.31.self_attn.o_proj": {
|
7770 |
"stored_tensors": {
|
|
|
7786 |
"shape": [
|
7787 |
320,
|
7788 |
320,
|
7789 |
+
80
|
7790 |
],
|
7791 |
+
"n_bytes": 16384000,
|
7792 |
"dtype": "torch.int16"
|
7793 |
}
|
7794 |
},
|
7795 |
"quant_format": "exl3",
|
7796 |
+
"bits_per_weight": 5
|
7797 |
},
|
7798 |
"model.layers.31.post_attention_layernorm": {
|
7799 |
"stored_tensors": {
|
|
|
7826 |
"shape": [
|
7827 |
320,
|
7828 |
1728,
|
7829 |
+
64
|
7830 |
],
|
7831 |
+
"n_bytes": 70778880,
|
7832 |
"dtype": "torch.int16"
|
7833 |
}
|
7834 |
},
|
7835 |
"quant_format": "exl3",
|
7836 |
+
"bits_per_weight": 4
|
7837 |
},
|
7838 |
"model.layers.31.mlp.gate_proj": {
|
7839 |
"stored_tensors": {
|
|
|
7931 |
"shape": [
|
7932 |
320,
|
7933 |
320,
|
7934 |
+
64
|
7935 |
],
|
7936 |
+
"n_bytes": 13107200,
|
7937 |
"dtype": "torch.int16"
|
7938 |
}
|
7939 |
},
|
7940 |
"quant_format": "exl3",
|
7941 |
+
"bits_per_weight": 4
|
7942 |
},
|
7943 |
"model.layers.32.self_attn.k_proj": {
|
7944 |
"stored_tensors": {
|
|
|
8032 |
"shape": [
|
8033 |
320,
|
8034 |
320,
|
8035 |
+
64
|
8036 |
],
|
8037 |
+
"n_bytes": 13107200,
|
8038 |
"dtype": "torch.int16"
|
8039 |
}
|
8040 |
},
|
8041 |
"quant_format": "exl3",
|
8042 |
+
"bits_per_weight": 4
|
8043 |
},
|
8044 |
"model.layers.32.post_attention_layernorm": {
|
8045 |
"stored_tensors": {
|
|
|
8072 |
"shape": [
|
8073 |
320,
|
8074 |
1728,
|
8075 |
+
80
|
8076 |
],
|
8077 |
+
"n_bytes": 88473600,
|
8078 |
"dtype": "torch.int16"
|
8079 |
}
|
8080 |
},
|
8081 |
"quant_format": "exl3",
|
8082 |
+
"bits_per_weight": 5
|
8083 |
},
|
8084 |
"model.layers.32.mlp.gate_proj": {
|
8085 |
"stored_tensors": {
|
|
|
8423 |
"shape": [
|
8424 |
320,
|
8425 |
320,
|
8426 |
+
80
|
8427 |
],
|
8428 |
+
"n_bytes": 16384000,
|
8429 |
"dtype": "torch.int16"
|
8430 |
}
|
8431 |
},
|
8432 |
"quant_format": "exl3",
|
8433 |
+
"bits_per_weight": 5
|
8434 |
},
|
8435 |
"model.layers.34.self_attn.k_proj": {
|
8436 |
"stored_tensors": {
|
|
|
8459 |
"shape": [
|
8460 |
320,
|
8461 |
64,
|
8462 |
+
96
|
8463 |
],
|
8464 |
+
"n_bytes": 3932160,
|
8465 |
"dtype": "torch.int16"
|
8466 |
}
|
8467 |
},
|
8468 |
"quant_format": "exl3",
|
8469 |
+
"bits_per_weight": 6
|
8470 |
},
|
8471 |
"model.layers.34.self_attn.v_proj": {
|
8472 |
"stored_tensors": {
|
|
|
8524 |
"shape": [
|
8525 |
320,
|
8526 |
320,
|
8527 |
+
80
|
8528 |
],
|
8529 |
+
"n_bytes": 16384000,
|
8530 |
"dtype": "torch.int16"
|
8531 |
}
|
8532 |
},
|
8533 |
"quant_format": "exl3",
|
8534 |
+
"bits_per_weight": 5
|
8535 |
},
|
8536 |
"model.layers.34.post_attention_layernorm": {
|
8537 |
"stored_tensors": {
|
|
|
8564 |
"shape": [
|
8565 |
320,
|
8566 |
1728,
|
8567 |
+
64
|
8568 |
],
|
8569 |
+
"n_bytes": 70778880,
|
8570 |
"dtype": "torch.int16"
|
8571 |
}
|
8572 |
},
|
8573 |
"quant_format": "exl3",
|
8574 |
+
"bits_per_weight": 4
|
8575 |
},
|
8576 |
"model.layers.34.mlp.gate_proj": {
|
8577 |
"stored_tensors": {
|
|
|
8669 |
"shape": [
|
8670 |
320,
|
8671 |
320,
|
8672 |
+
64
|
8673 |
],
|
8674 |
+
"n_bytes": 13107200,
|
8675 |
"dtype": "torch.int16"
|
8676 |
}
|
8677 |
},
|
8678 |
"quant_format": "exl3",
|
8679 |
+
"bits_per_weight": 4
|
8680 |
},
|
8681 |
"model.layers.35.self_attn.k_proj": {
|
8682 |
"stored_tensors": {
|
|
|
8770 |
"shape": [
|
8771 |
320,
|
8772 |
320,
|
8773 |
+
64
|
8774 |
],
|
8775 |
+
"n_bytes": 13107200,
|
8776 |
"dtype": "torch.int16"
|
8777 |
}
|
8778 |
},
|
8779 |
"quant_format": "exl3",
|
8780 |
+
"bits_per_weight": 4
|
8781 |
},
|
8782 |
"model.layers.35.post_attention_layernorm": {
|
8783 |
"stored_tensors": {
|
|
|
8810 |
"shape": [
|
8811 |
320,
|
8812 |
1728,
|
8813 |
+
80
|
8814 |
],
|
8815 |
+
"n_bytes": 88473600,
|
8816 |
"dtype": "torch.int16"
|
8817 |
}
|
8818 |
},
|
8819 |
"quant_format": "exl3",
|
8820 |
+
"bits_per_weight": 5
|
8821 |
},
|
8822 |
"model.layers.35.mlp.gate_proj": {
|
8823 |
"stored_tensors": {
|
|
|
9161 |
"shape": [
|
9162 |
320,
|
9163 |
320,
|
9164 |
+
80
|
9165 |
],
|
9166 |
+
"n_bytes": 16384000,
|
9167 |
"dtype": "torch.int16"
|
9168 |
}
|
9169 |
},
|
9170 |
"quant_format": "exl3",
|
9171 |
+
"bits_per_weight": 5
|
9172 |
},
|
9173 |
"model.layers.37.self_attn.k_proj": {
|
9174 |
"stored_tensors": {
|
|
|
9197 |
"shape": [
|
9198 |
320,
|
9199 |
64,
|
9200 |
+
96
|
9201 |
],
|
9202 |
+
"n_bytes": 3932160,
|
9203 |
"dtype": "torch.int16"
|
9204 |
}
|
9205 |
},
|
9206 |
"quant_format": "exl3",
|
9207 |
+
"bits_per_weight": 6
|
9208 |
},
|
9209 |
"model.layers.37.self_attn.v_proj": {
|
9210 |
"stored_tensors": {
|
|
|
9262 |
"shape": [
|
9263 |
320,
|
9264 |
320,
|
9265 |
+
80
|
9266 |
],
|
9267 |
+
"n_bytes": 16384000,
|
9268 |
"dtype": "torch.int16"
|
9269 |
}
|
9270 |
},
|
9271 |
"quant_format": "exl3",
|
9272 |
+
"bits_per_weight": 5
|
9273 |
},
|
9274 |
"model.layers.37.post_attention_layernorm": {
|
9275 |
"stored_tensors": {
|
|
|
9302 |
"shape": [
|
9303 |
320,
|
9304 |
1728,
|
9305 |
+
64
|
9306 |
],
|
9307 |
+
"n_bytes": 70778880,
|
9308 |
"dtype": "torch.int16"
|
9309 |
}
|
9310 |
},
|
9311 |
"quant_format": "exl3",
|
9312 |
+
"bits_per_weight": 4
|
9313 |
},
|
9314 |
"model.layers.37.mlp.gate_proj": {
|
9315 |
"stored_tensors": {
|
|
|
9407 |
"shape": [
|
9408 |
320,
|
9409 |
320,
|
9410 |
+
64
|
9411 |
],
|
9412 |
+
"n_bytes": 13107200,
|
9413 |
"dtype": "torch.int16"
|
9414 |
}
|
9415 |
},
|
9416 |
"quant_format": "exl3",
|
9417 |
+
"bits_per_weight": 4
|
9418 |
},
|
9419 |
"model.layers.38.self_attn.k_proj": {
|
9420 |
"stored_tensors": {
|
|
|
9443 |
"shape": [
|
9444 |
320,
|
9445 |
64,
|
9446 |
+
80
|
9447 |
],
|
9448 |
+
"n_bytes": 3276800,
|
9449 |
"dtype": "torch.int16"
|
9450 |
}
|
9451 |
},
|
9452 |
"quant_format": "exl3",
|
9453 |
+
"bits_per_weight": 5
|
9454 |
},
|
9455 |
"model.layers.38.self_attn.v_proj": {
|
9456 |
"stored_tensors": {
|
|
|
9479 |
"shape": [
|
9480 |
320,
|
9481 |
64,
|
9482 |
+
80
|
9483 |
],
|
9484 |
+
"n_bytes": 3276800,
|
9485 |
"dtype": "torch.int16"
|
9486 |
}
|
9487 |
},
|
9488 |
"quant_format": "exl3",
|
9489 |
+
"bits_per_weight": 5
|
9490 |
},
|
9491 |
"model.layers.38.self_attn.o_proj": {
|
9492 |
"stored_tensors": {
|
|
|
9508 |
"shape": [
|
9509 |
320,
|
9510 |
320,
|
9511 |
+
64
|
9512 |
],
|
9513 |
+
"n_bytes": 13107200,
|
9514 |
"dtype": "torch.int16"
|
9515 |
}
|
9516 |
},
|
9517 |
"quant_format": "exl3",
|
9518 |
+
"bits_per_weight": 4
|
9519 |
},
|
9520 |
"model.layers.38.post_attention_layernorm": {
|
9521 |
"stored_tensors": {
|
|
|
9548 |
"shape": [
|
9549 |
320,
|
9550 |
1728,
|
9551 |
+
80
|
9552 |
],
|
9553 |
+
"n_bytes": 88473600,
|
9554 |
"dtype": "torch.int16"
|
9555 |
}
|
9556 |
},
|
9557 |
"quant_format": "exl3",
|
9558 |
+
"bits_per_weight": 5
|
9559 |
},
|
9560 |
"model.layers.38.mlp.gate_proj": {
|
9561 |
"stored_tensors": {
|
|
|
9653 |
"shape": [
|
9654 |
320,
|
9655 |
320,
|
9656 |
+
80
|
9657 |
],
|
9658 |
+
"n_bytes": 16384000,
|
9659 |
"dtype": "torch.int16"
|
9660 |
}
|
9661 |
},
|
9662 |
"quant_format": "exl3",
|
9663 |
+
"bits_per_weight": 5
|
9664 |
},
|
9665 |
"model.layers.39.self_attn.k_proj": {
|
9666 |
"stored_tensors": {
|
|
|
9689 |
"shape": [
|
9690 |
320,
|
9691 |
64,
|
9692 |
+
96
|
9693 |
],
|
9694 |
+
"n_bytes": 3932160,
|
9695 |
"dtype": "torch.int16"
|
9696 |
}
|
9697 |
},
|
9698 |
"quant_format": "exl3",
|
9699 |
+
"bits_per_weight": 6
|
9700 |
},
|
9701 |
"model.layers.39.self_attn.v_proj": {
|
9702 |
"stored_tensors": {
|
|
|
9725 |
"shape": [
|
9726 |
320,
|
9727 |
64,
|
9728 |
+
96
|
9729 |
],
|
9730 |
+
"n_bytes": 3932160,
|
9731 |
"dtype": "torch.int16"
|
9732 |
}
|
9733 |
},
|
9734 |
"quant_format": "exl3",
|
9735 |
+
"bits_per_weight": 6
|
9736 |
},
|
9737 |
"model.layers.39.self_attn.o_proj": {
|
9738 |
"stored_tensors": {
|
|
|
9754 |
"shape": [
|
9755 |
320,
|
9756 |
320,
|
9757 |
+
80
|
9758 |
],
|
9759 |
+
"n_bytes": 16384000,
|
9760 |
"dtype": "torch.int16"
|
9761 |
}
|
9762 |
},
|
9763 |
"quant_format": "exl3",
|
9764 |
+
"bits_per_weight": 5
|
9765 |
},
|
9766 |
"model.layers.39.post_attention_layernorm": {
|
9767 |
"stored_tensors": {
|
|
|
9794 |
"shape": [
|
9795 |
320,
|
9796 |
1728,
|
9797 |
+
64
|
9798 |
],
|
9799 |
+
"n_bytes": 70778880,
|
9800 |
"dtype": "torch.int16"
|
9801 |
}
|
9802 |
},
|
9803 |
"quant_format": "exl3",
|
9804 |
+
"bits_per_weight": 4
|
9805 |
},
|
9806 |
"model.layers.39.mlp.gate_proj": {
|
9807 |
"stored_tensors": {
|
|
|
10145 |
"shape": [
|
10146 |
320,
|
10147 |
320,
|
10148 |
+
64
|
10149 |
],
|
10150 |
+
"n_bytes": 13107200,
|
10151 |
"dtype": "torch.int16"
|
10152 |
}
|
10153 |
},
|
10154 |
"quant_format": "exl3",
|
10155 |
+
"bits_per_weight": 4
|
10156 |
},
|
10157 |
"model.layers.41.self_attn.k_proj": {
|
10158 |
"stored_tensors": {
|
|
|
10246 |
"shape": [
|
10247 |
320,
|
10248 |
320,
|
10249 |
+
64
|
10250 |
],
|
10251 |
+
"n_bytes": 13107200,
|
10252 |
"dtype": "torch.int16"
|
10253 |
}
|
10254 |
},
|
10255 |
"quant_format": "exl3",
|
10256 |
+
"bits_per_weight": 4
|
10257 |
},
|
10258 |
"model.layers.41.post_attention_layernorm": {
|
10259 |
"stored_tensors": {
|
|
|
10286 |
"shape": [
|
10287 |
320,
|
10288 |
1728,
|
10289 |
+
80
|
10290 |
],
|
10291 |
+
"n_bytes": 88473600,
|
10292 |
"dtype": "torch.int16"
|
10293 |
}
|
10294 |
},
|
10295 |
"quant_format": "exl3",
|
10296 |
+
"bits_per_weight": 5
|
10297 |
},
|
10298 |
"model.layers.41.mlp.gate_proj": {
|
10299 |
"stored_tensors": {
|
|
|
10391 |
"shape": [
|
10392 |
320,
|
10393 |
320,
|
10394 |
+
80
|
10395 |
],
|
10396 |
+
"n_bytes": 16384000,
|
10397 |
"dtype": "torch.int16"
|
10398 |
}
|
10399 |
},
|
10400 |
"quant_format": "exl3",
|
10401 |
+
"bits_per_weight": 5
|
10402 |
},
|
10403 |
"model.layers.42.self_attn.k_proj": {
|
10404 |
"stored_tensors": {
|
|
|
10427 |
"shape": [
|
10428 |
320,
|
10429 |
64,
|
10430 |
+
96
|
10431 |
],
|
10432 |
+
"n_bytes": 3932160,
|
10433 |
"dtype": "torch.int16"
|
10434 |
}
|
10435 |
},
|
10436 |
"quant_format": "exl3",
|
10437 |
+
"bits_per_weight": 6
|
10438 |
},
|
10439 |
"model.layers.42.self_attn.v_proj": {
|
10440 |
"stored_tensors": {
|
|
|
10492 |
"shape": [
|
10493 |
320,
|
10494 |
320,
|
10495 |
+
80
|
10496 |
],
|
10497 |
+
"n_bytes": 16384000,
|
10498 |
"dtype": "torch.int16"
|
10499 |
}
|
10500 |
},
|
10501 |
"quant_format": "exl3",
|
10502 |
+
"bits_per_weight": 5
|
10503 |
},
|
10504 |
"model.layers.42.post_attention_layernorm": {
|
10505 |
"stored_tensors": {
|
|
|
10532 |
"shape": [
|
10533 |
320,
|
10534 |
1728,
|
10535 |
+
64
|
10536 |
],
|
10537 |
+
"n_bytes": 70778880,
|
10538 |
"dtype": "torch.int16"
|
10539 |
}
|
10540 |
},
|
10541 |
"quant_format": "exl3",
|
10542 |
+
"bits_per_weight": 4
|
10543 |
},
|
10544 |
"model.layers.42.mlp.gate_proj": {
|
10545 |
"stored_tensors": {
|
|
|
10883 |
"shape": [
|
10884 |
320,
|
10885 |
320,
|
10886 |
+
64
|
10887 |
],
|
10888 |
+
"n_bytes": 13107200,
|
10889 |
"dtype": "torch.int16"
|
10890 |
}
|
10891 |
},
|
10892 |
"quant_format": "exl3",
|
10893 |
+
"bits_per_weight": 4
|
10894 |
},
|
10895 |
"model.layers.44.self_attn.k_proj": {
|
10896 |
"stored_tensors": {
|
|
|
10984 |
"shape": [
|
10985 |
320,
|
10986 |
320,
|
10987 |
+
64
|
10988 |
],
|
10989 |
+
"n_bytes": 13107200,
|
10990 |
"dtype": "torch.int16"
|
10991 |
}
|
10992 |
},
|
10993 |
"quant_format": "exl3",
|
10994 |
+
"bits_per_weight": 4
|
10995 |
},
|
10996 |
"model.layers.44.post_attention_layernorm": {
|
10997 |
"stored_tensors": {
|
|
|
11024 |
"shape": [
|
11025 |
320,
|
11026 |
1728,
|
11027 |
+
80
|
11028 |
],
|
11029 |
+
"n_bytes": 88473600,
|
11030 |
"dtype": "torch.int16"
|
11031 |
}
|
11032 |
},
|
11033 |
"quant_format": "exl3",
|
11034 |
+
"bits_per_weight": 5
|
11035 |
},
|
11036 |
"model.layers.44.mlp.gate_proj": {
|
11037 |
"stored_tensors": {
|
|
|
11129 |
"shape": [
|
11130 |
320,
|
11131 |
320,
|
11132 |
+
80
|
11133 |
],
|
11134 |
+
"n_bytes": 16384000,
|
11135 |
"dtype": "torch.int16"
|
11136 |
}
|
11137 |
},
|
11138 |
"quant_format": "exl3",
|
11139 |
+
"bits_per_weight": 5
|
11140 |
},
|
11141 |
"model.layers.45.self_attn.k_proj": {
|
11142 |
"stored_tensors": {
|
|
|
11165 |
"shape": [
|
11166 |
320,
|
11167 |
64,
|
11168 |
+
96
|
11169 |
],
|
11170 |
+
"n_bytes": 3932160,
|
11171 |
"dtype": "torch.int16"
|
11172 |
}
|
11173 |
},
|
11174 |
"quant_format": "exl3",
|
11175 |
+
"bits_per_weight": 6
|
11176 |
},
|
11177 |
"model.layers.45.self_attn.v_proj": {
|
11178 |
"stored_tensors": {
|
|
|
11230 |
"shape": [
|
11231 |
320,
|
11232 |
320,
|
11233 |
+
80
|
11234 |
],
|
11235 |
+
"n_bytes": 16384000,
|
11236 |
"dtype": "torch.int16"
|
11237 |
}
|
11238 |
},
|
11239 |
"quant_format": "exl3",
|
11240 |
+
"bits_per_weight": 5
|
11241 |
},
|
11242 |
"model.layers.45.post_attention_layernorm": {
|
11243 |
"stored_tensors": {
|
|
|
11270 |
"shape": [
|
11271 |
320,
|
11272 |
1728,
|
11273 |
+
64
|
11274 |
],
|
11275 |
+
"n_bytes": 70778880,
|
11276 |
"dtype": "torch.int16"
|
11277 |
}
|
11278 |
},
|
11279 |
"quant_format": "exl3",
|
11280 |
+
"bits_per_weight": 4
|
11281 |
},
|
11282 |
"model.layers.45.mlp.gate_proj": {
|
11283 |
"stored_tensors": {
|
|
|
11657 |
"shape": [
|
11658 |
320,
|
11659 |
64,
|
11660 |
+
96
|
11661 |
],
|
11662 |
+
"n_bytes": 3932160,
|
11663 |
"dtype": "torch.int16"
|
11664 |
}
|
11665 |
},
|
11666 |
"quant_format": "exl3",
|
11667 |
+
"bits_per_weight": 6
|
11668 |
},
|
11669 |
"model.layers.47.self_attn.v_proj": {
|
11670 |
"stored_tensors": {
|
|
|
11693 |
"shape": [
|
11694 |
320,
|
11695 |
64,
|
11696 |
+
96
|
11697 |
],
|
11698 |
+
"n_bytes": 3932160,
|
11699 |
"dtype": "torch.int16"
|
11700 |
}
|
11701 |
},
|
11702 |
"quant_format": "exl3",
|
11703 |
+
"bits_per_weight": 6
|
11704 |
},
|
11705 |
"model.layers.47.self_attn.o_proj": {
|
11706 |
"stored_tensors": {
|
|
|
12395 |
"shape": [
|
12396 |
320,
|
12397 |
64,
|
12398 |
+
80
|
12399 |
],
|
12400 |
+
"n_bytes": 3276800,
|
12401 |
"dtype": "torch.int16"
|
12402 |
}
|
12403 |
},
|
12404 |
"quant_format": "exl3",
|
12405 |
+
"bits_per_weight": 5
|
12406 |
},
|
12407 |
"model.layers.50.self_attn.v_proj": {
|
12408 |
"stored_tensors": {
|
|
|
12431 |
"shape": [
|
12432 |
320,
|
12433 |
64,
|
12434 |
+
80
|
12435 |
],
|
12436 |
+
"n_bytes": 3276800,
|
12437 |
"dtype": "torch.int16"
|
12438 |
}
|
12439 |
},
|
12440 |
"quant_format": "exl3",
|
12441 |
+
"bits_per_weight": 5
|
12442 |
},
|
12443 |
"model.layers.50.self_attn.o_proj": {
|
12444 |
"stored_tensors": {
|
|
|
13133 |
"shape": [
|
13134 |
320,
|
13135 |
64,
|
13136 |
+
96
|
13137 |
],
|
13138 |
+
"n_bytes": 3932160,
|
13139 |
"dtype": "torch.int16"
|
13140 |
}
|
13141 |
},
|
13142 |
"quant_format": "exl3",
|
13143 |
+
"bits_per_weight": 6
|
13144 |
},
|
13145 |
"model.layers.53.self_attn.v_proj": {
|
13146 |
"stored_tensors": {
|
|
|
13589 |
"shape": [
|
13590 |
320,
|
13591 |
320,
|
13592 |
+
80
|
13593 |
],
|
13594 |
+
"n_bytes": 16384000,
|
13595 |
"dtype": "torch.int16"
|
13596 |
}
|
13597 |
},
|
13598 |
"quant_format": "exl3",
|
13599 |
+
"bits_per_weight": 5
|
13600 |
},
|
13601 |
"model.layers.55.self_attn.k_proj": {
|
13602 |
"stored_tensors": {
|
|
|
13625 |
"shape": [
|
13626 |
320,
|
13627 |
64,
|
13628 |
+
96
|
13629 |
],
|
13630 |
+
"n_bytes": 3932160,
|
13631 |
"dtype": "torch.int16"
|
13632 |
}
|
13633 |
},
|
13634 |
"quant_format": "exl3",
|
13635 |
+
"bits_per_weight": 6
|
13636 |
},
|
13637 |
"model.layers.55.self_attn.v_proj": {
|
13638 |
"stored_tensors": {
|
|
|
13661 |
"shape": [
|
13662 |
320,
|
13663 |
64,
|
13664 |
+
96
|
13665 |
],
|
13666 |
+
"n_bytes": 3932160,
|
13667 |
"dtype": "torch.int16"
|
13668 |
}
|
13669 |
},
|
13670 |
"quant_format": "exl3",
|
13671 |
+
"bits_per_weight": 6
|
13672 |
},
|
13673 |
"model.layers.55.self_attn.o_proj": {
|
13674 |
"stored_tensors": {
|
|
|
13690 |
"shape": [
|
13691 |
320,
|
13692 |
320,
|
13693 |
+
80
|
13694 |
],
|
13695 |
+
"n_bytes": 16384000,
|
13696 |
"dtype": "torch.int16"
|
13697 |
}
|
13698 |
},
|
13699 |
"quant_format": "exl3",
|
13700 |
+
"bits_per_weight": 5
|
13701 |
},
|
13702 |
"model.layers.55.post_attention_layernorm": {
|
13703 |
"stored_tensors": {
|
|
|
13730 |
"shape": [
|
13731 |
320,
|
13732 |
1728,
|
13733 |
+
64
|
13734 |
],
|
13735 |
+
"n_bytes": 70778880,
|
13736 |
"dtype": "torch.int16"
|
13737 |
}
|
13738 |
},
|
13739 |
"quant_format": "exl3",
|
13740 |
+
"bits_per_weight": 4
|
13741 |
},
|
13742 |
"model.layers.55.mlp.gate_proj": {
|
13743 |
"stored_tensors": {
|
|
|
13835 |
"shape": [
|
13836 |
320,
|
13837 |
320,
|
13838 |
+
64
|
13839 |
],
|
13840 |
+
"n_bytes": 13107200,
|
13841 |
"dtype": "torch.int16"
|
13842 |
}
|
13843 |
},
|
13844 |
"quant_format": "exl3",
|
13845 |
+
"bits_per_weight": 4
|
13846 |
},
|
13847 |
"model.layers.56.self_attn.k_proj": {
|
13848 |
"stored_tensors": {
|
|
|
13936 |
"shape": [
|
13937 |
320,
|
13938 |
320,
|
13939 |
+
64
|
13940 |
],
|
13941 |
+
"n_bytes": 13107200,
|
13942 |
"dtype": "torch.int16"
|
13943 |
}
|
13944 |
},
|
13945 |
"quant_format": "exl3",
|
13946 |
+
"bits_per_weight": 4
|
13947 |
},
|
13948 |
"model.layers.56.post_attention_layernorm": {
|
13949 |
"stored_tensors": {
|
|
|
13976 |
"shape": [
|
13977 |
320,
|
13978 |
1728,
|
13979 |
+
80
|
13980 |
],
|
13981 |
+
"n_bytes": 88473600,
|
13982 |
"dtype": "torch.int16"
|
13983 |
}
|
13984 |
},
|
13985 |
"quant_format": "exl3",
|
13986 |
+
"bits_per_weight": 5
|
13987 |
},
|
13988 |
"model.layers.56.mlp.gate_proj": {
|
13989 |
"stored_tensors": {
|
|
|
14327 |
"shape": [
|
14328 |
320,
|
14329 |
320,
|
14330 |
+
80
|
14331 |
],
|
14332 |
+
"n_bytes": 16384000,
|
14333 |
"dtype": "torch.int16"
|
14334 |
}
|
14335 |
},
|
14336 |
"quant_format": "exl3",
|
14337 |
+
"bits_per_weight": 5
|
14338 |
},
|
14339 |
"model.layers.58.self_attn.k_proj": {
|
14340 |
"stored_tensors": {
|
|
|
14363 |
"shape": [
|
14364 |
320,
|
14365 |
64,
|
14366 |
+
96
|
14367 |
],
|
14368 |
+
"n_bytes": 3932160,
|
14369 |
"dtype": "torch.int16"
|
14370 |
}
|
14371 |
},
|
14372 |
"quant_format": "exl3",
|
14373 |
+
"bits_per_weight": 6
|
14374 |
},
|
14375 |
"model.layers.58.self_attn.v_proj": {
|
14376 |
"stored_tensors": {
|
|
|
14428 |
"shape": [
|
14429 |
320,
|
14430 |
320,
|
14431 |
+
80
|
14432 |
],
|
14433 |
+
"n_bytes": 16384000,
|
14434 |
"dtype": "torch.int16"
|
14435 |
}
|
14436 |
},
|
14437 |
"quant_format": "exl3",
|
14438 |
+
"bits_per_weight": 5
|
14439 |
},
|
14440 |
"model.layers.58.post_attention_layernorm": {
|
14441 |
"stored_tensors": {
|
|
|
14468 |
"shape": [
|
14469 |
320,
|
14470 |
1728,
|
14471 |
+
64
|
14472 |
],
|
14473 |
+
"n_bytes": 70778880,
|
14474 |
"dtype": "torch.int16"
|
14475 |
}
|
14476 |
},
|
14477 |
"quant_format": "exl3",
|
14478 |
+
"bits_per_weight": 4
|
14479 |
},
|
14480 |
"model.layers.58.mlp.gate_proj": {
|
14481 |
"stored_tensors": {
|
|
|
14573 |
"shape": [
|
14574 |
320,
|
14575 |
320,
|
14576 |
+
64
|
14577 |
],
|
14578 |
+
"n_bytes": 13107200,
|
14579 |
"dtype": "torch.int16"
|
14580 |
}
|
14581 |
},
|
14582 |
"quant_format": "exl3",
|
14583 |
+
"bits_per_weight": 4
|
14584 |
},
|
14585 |
"model.layers.59.self_attn.k_proj": {
|
14586 |
"stored_tensors": {
|
|
|
14674 |
"shape": [
|
14675 |
320,
|
14676 |
320,
|
14677 |
+
64
|
14678 |
],
|
14679 |
+
"n_bytes": 13107200,
|
14680 |
"dtype": "torch.int16"
|
14681 |
}
|
14682 |
},
|
14683 |
"quant_format": "exl3",
|
14684 |
+
"bits_per_weight": 4
|
14685 |
},
|
14686 |
"model.layers.59.post_attention_layernorm": {
|
14687 |
"stored_tensors": {
|
|
|
14714 |
"shape": [
|
14715 |
320,
|
14716 |
1728,
|
14717 |
+
80
|
14718 |
],
|
14719 |
+
"n_bytes": 88473600,
|
14720 |
"dtype": "torch.int16"
|
14721 |
}
|
14722 |
},
|
14723 |
"quant_format": "exl3",
|
14724 |
+
"bits_per_weight": 5
|
14725 |
},
|
14726 |
"model.layers.59.mlp.gate_proj": {
|
14727 |
"stored_tensors": {
|
|
|
15065 |
"shape": [
|
15066 |
320,
|
15067 |
320,
|
15068 |
+
80
|
15069 |
],
|
15070 |
+
"n_bytes": 16384000,
|
15071 |
"dtype": "torch.int16"
|
15072 |
}
|
15073 |
},
|
15074 |
"quant_format": "exl3",
|
15075 |
+
"bits_per_weight": 5
|
15076 |
},
|
15077 |
"model.layers.61.self_attn.k_proj": {
|
15078 |
"stored_tensors": {
|
|
|
15101 |
"shape": [
|
15102 |
320,
|
15103 |
64,
|
15104 |
+
96
|
15105 |
],
|
15106 |
+
"n_bytes": 3932160,
|
15107 |
"dtype": "torch.int16"
|
15108 |
}
|
15109 |
},
|
15110 |
"quant_format": "exl3",
|
15111 |
+
"bits_per_weight": 6
|
15112 |
},
|
15113 |
"model.layers.61.self_attn.v_proj": {
|
15114 |
"stored_tensors": {
|
|
|
15166 |
"shape": [
|
15167 |
320,
|
15168 |
320,
|
15169 |
+
80
|
15170 |
],
|
15171 |
+
"n_bytes": 16384000,
|
15172 |
"dtype": "torch.int16"
|
15173 |
}
|
15174 |
},
|
15175 |
"quant_format": "exl3",
|
15176 |
+
"bits_per_weight": 5
|
15177 |
},
|
15178 |
"model.layers.61.post_attention_layernorm": {
|
15179 |
"stored_tensors": {
|
|
|
15206 |
"shape": [
|
15207 |
320,
|
15208 |
1728,
|
15209 |
+
64
|
15210 |
],
|
15211 |
+
"n_bytes": 70778880,
|
15212 |
"dtype": "torch.int16"
|
15213 |
}
|
15214 |
},
|
15215 |
"quant_format": "exl3",
|
15216 |
+
"bits_per_weight": 4
|
15217 |
},
|
15218 |
"model.layers.61.mlp.gate_proj": {
|
15219 |
"stored_tensors": {
|
|
|
15311 |
"shape": [
|
15312 |
320,
|
15313 |
320,
|
15314 |
+
64
|
15315 |
],
|
15316 |
+
"n_bytes": 13107200,
|
15317 |
"dtype": "torch.int16"
|
15318 |
}
|
15319 |
},
|
15320 |
"quant_format": "exl3",
|
15321 |
+
"bits_per_weight": 4
|
15322 |
},
|
15323 |
"model.layers.62.self_attn.k_proj": {
|
15324 |
"stored_tensors": {
|
|
|
15347 |
"shape": [
|
15348 |
320,
|
15349 |
64,
|
15350 |
+
80
|
15351 |
],
|
15352 |
+
"n_bytes": 3276800,
|
15353 |
"dtype": "torch.int16"
|
15354 |
}
|
15355 |
},
|
15356 |
"quant_format": "exl3",
|
15357 |
+
"bits_per_weight": 5
|
15358 |
},
|
15359 |
"model.layers.62.self_attn.v_proj": {
|
15360 |
"stored_tensors": {
|
|
|
15383 |
"shape": [
|
15384 |
320,
|
15385 |
64,
|
15386 |
+
80
|
15387 |
],
|
15388 |
+
"n_bytes": 3276800,
|
15389 |
"dtype": "torch.int16"
|
15390 |
}
|
15391 |
},
|
15392 |
"quant_format": "exl3",
|
15393 |
+
"bits_per_weight": 5
|
15394 |
},
|
15395 |
"model.layers.62.self_attn.o_proj": {
|
15396 |
"stored_tensors": {
|
|
|
15412 |
"shape": [
|
15413 |
320,
|
15414 |
320,
|
15415 |
+
64
|
15416 |
],
|
15417 |
+
"n_bytes": 13107200,
|
15418 |
"dtype": "torch.int16"
|
15419 |
}
|
15420 |
},
|
15421 |
"quant_format": "exl3",
|
15422 |
+
"bits_per_weight": 4
|
15423 |
},
|
15424 |
"model.layers.62.post_attention_layernorm": {
|
15425 |
"stored_tensors": {
|
|
|
15452 |
"shape": [
|
15453 |
320,
|
15454 |
1728,
|
15455 |
+
80
|
15456 |
],
|
15457 |
+
"n_bytes": 88473600,
|
15458 |
"dtype": "torch.int16"
|
15459 |
}
|
15460 |
},
|
15461 |
"quant_format": "exl3",
|
15462 |
+
"bits_per_weight": 5
|
15463 |
},
|
15464 |
"model.layers.62.mlp.gate_proj": {
|
15465 |
"stored_tensors": {
|
|
|
15557 |
"shape": [
|
15558 |
320,
|
15559 |
320,
|
15560 |
+
80
|
15561 |
],
|
15562 |
+
"n_bytes": 16384000,
|
15563 |
"dtype": "torch.int16"
|
15564 |
}
|
15565 |
},
|
15566 |
"quant_format": "exl3",
|
15567 |
+
"bits_per_weight": 5
|
15568 |
},
|
15569 |
"model.layers.63.self_attn.k_proj": {
|
15570 |
"stored_tensors": {
|
|
|
15593 |
"shape": [
|
15594 |
320,
|
15595 |
64,
|
15596 |
+
96
|
15597 |
],
|
15598 |
+
"n_bytes": 3932160,
|
15599 |
"dtype": "torch.int16"
|
15600 |
}
|
15601 |
},
|
15602 |
"quant_format": "exl3",
|
15603 |
+
"bits_per_weight": 6
|
15604 |
},
|
15605 |
"model.layers.63.self_attn.v_proj": {
|
15606 |
"stored_tensors": {
|
|
|
15629 |
"shape": [
|
15630 |
320,
|
15631 |
64,
|
15632 |
+
96
|
15633 |
],
|
15634 |
+
"n_bytes": 3932160,
|
15635 |
"dtype": "torch.int16"
|
15636 |
}
|
15637 |
},
|
15638 |
"quant_format": "exl3",
|
15639 |
+
"bits_per_weight": 6
|
15640 |
},
|
15641 |
"model.layers.63.self_attn.o_proj": {
|
15642 |
"stored_tensors": {
|
|
|
15658 |
"shape": [
|
15659 |
320,
|
15660 |
320,
|
15661 |
+
80
|
15662 |
],
|
15663 |
+
"n_bytes": 16384000,
|
15664 |
"dtype": "torch.int16"
|
15665 |
}
|
15666 |
},
|
15667 |
"quant_format": "exl3",
|
15668 |
+
"bits_per_weight": 5
|
15669 |
},
|
15670 |
"model.layers.63.post_attention_layernorm": {
|
15671 |
"stored_tensors": {
|
|
|
15698 |
"shape": [
|
15699 |
320,
|
15700 |
1728,
|
15701 |
+
64
|
15702 |
],
|
15703 |
+
"n_bytes": 70778880,
|
15704 |
"dtype": "torch.int16"
|
15705 |
}
|
15706 |
},
|
15707 |
"quant_format": "exl3",
|
15708 |
+
"bits_per_weight": 4
|
15709 |
},
|
15710 |
"model.layers.63.mlp.gate_proj": {
|
15711 |
"stored_tensors": {
|