async0x42 commited on
Commit
b09627f
·
verified ·
1 Parent(s): 52b1842

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -25,11 +25,11 @@ tags:
25
 
26
  <p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
27
 
28
- <p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it.</p>
29
 
30
  A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
31
 
32
- Master import for ST: [https://files.catbox.moe/b6nwbc.json](https://files.catbox.moe/b6nwbc.json)
33
 
34
  ## Reasoning
35
 
 
25
 
26
  <p><b>Context/instruct template</b>: ChatML. <s>Was definitely not tested with ChatML instruct and Mistral v7 template, nuh-uh.</s></p>
27
 
28
+ <p><b>Samplers</b>: temperature at 0.9, min_p at 0.05, top_a at 0.3, TFS at 0.75, repetition_penalty at 1.03, DRY if you have access to it. (or not, see below.)</p>
29
 
30
  A virt-io derivative prompt worked best during our testing, but feel free to use what you like.
31
 
32
+ Master import for ST: https://files.catbox.moe/w812at.png
33
 
34
  ## Reasoning
35
 
config.json CHANGED
@@ -5,7 +5,7 @@
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
- "eos_token_id": 151643,
9
  "hidden_act": "silu",
10
  "hidden_size": 5120,
11
  "initializer_range": 0.02,
@@ -28,11 +28,13 @@
28
  "vocab_size": 151665,
29
  "quantization_config": {
30
  "quant_method": "exl3",
31
- "version": "0.0.1",
32
  "bits": 4.5,
 
33
  "calibration": {
34
  "rows": 100,
35
  "cols": 2048
36
- }
 
37
  }
38
  }
 
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
  "hidden_act": "silu",
10
  "hidden_size": 5120,
11
  "initializer_range": 0.02,
 
28
  "vocab_size": 151665,
29
  "quantization_config": {
30
  "quant_method": "exl3",
31
+ "version": "0.0.4",
32
  "bits": 4.5,
33
+ "head_bits": 6,
34
  "calibration": {
35
  "rows": 100,
36
  "cols": 2048
37
+ },
38
+ "out_scales": "auto"
39
  }
40
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eb874850762e60bd37e6db41c178f95e1d05a0ba9fab68ec57f27e3ef8de895
3
  size 8413645240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b1d3f2f4220a2ec286bd0995d310b60651c9fd50a2ee7ad3c822a4d000595f
3
  size 8413645240
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58a176cc87b5a78c055ed177185853d9f8f0cd679d10b7943b74068aa7aa3895
3
- size 8515209112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dcf0e0f248fd776b6c64b082281916c36b78e2548c1359bf620b3408bfe7c7a
3
+ size 8507344792
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0c8afe4e8906057a43f04e77818904c9fc2af0d53b006787b15ac2314d2d942
3
- size 2779318448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e6935c2c3d8916b4ce7b5668e4940a34b505f5595b6bd88caaec7f388a794c
3
+ size 2782595248
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 19707992320
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 19703404800
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
quantization_config.json CHANGED
@@ -1,11 +1,13 @@
1
  {
2
  "quant_method": "exl3",
3
- "version": "0.0.1",
4
  "bits": 4.5,
 
5
  "calibration": {
6
  "rows": 100,
7
  "cols": 2048
8
  },
 
9
  "tensor_storage": {
10
  "model.embed_tokens": {
11
  "stored_tensors": {
@@ -585,14 +587,14 @@
585
  "shape": [
586
  320,
587
  64,
588
- 64
589
  ],
590
- "n_bytes": 2621440,
591
  "dtype": "torch.int16"
592
  }
593
  },
594
  "quant_format": "exl3",
595
- "bits_per_weight": 4
596
  },
597
  "model.layers.2.self_attn.v_proj": {
598
  "stored_tensors": {
@@ -621,14 +623,14 @@
621
  "shape": [
622
  320,
623
  64,
624
- 96
625
  ],
626
- "n_bytes": 3932160,
627
  "dtype": "torch.int16"
628
  }
629
  },
630
  "quant_format": "exl3",
631
- "bits_per_weight": 6
632
  },
633
  "model.layers.2.self_attn.o_proj": {
634
  "stored_tensors": {
@@ -1323,14 +1325,14 @@
1323
  "shape": [
1324
  320,
1325
  64,
1326
- 64
1327
  ],
1328
- "n_bytes": 2621440,
1329
  "dtype": "torch.int16"
1330
  }
1331
  },
1332
  "quant_format": "exl3",
1333
- "bits_per_weight": 4
1334
  },
1335
  "model.layers.5.self_attn.v_proj": {
1336
  "stored_tensors": {
@@ -1779,14 +1781,14 @@
1779
  "shape": [
1780
  320,
1781
  320,
1782
- 64
1783
  ],
1784
- "n_bytes": 13107200,
1785
  "dtype": "torch.int16"
1786
  }
1787
  },
1788
  "quant_format": "exl3",
1789
- "bits_per_weight": 4
1790
  },
1791
  "model.layers.7.self_attn.k_proj": {
1792
  "stored_tensors": {
@@ -1815,14 +1817,14 @@
1815
  "shape": [
1816
  320,
1817
  64,
1818
- 64
1819
  ],
1820
- "n_bytes": 2621440,
1821
  "dtype": "torch.int16"
1822
  }
1823
  },
1824
  "quant_format": "exl3",
1825
- "bits_per_weight": 4
1826
  },
1827
  "model.layers.7.self_attn.v_proj": {
1828
  "stored_tensors": {
@@ -1851,14 +1853,14 @@
1851
  "shape": [
1852
  320,
1853
  64,
1854
- 80
1855
  ],
1856
- "n_bytes": 3276800,
1857
  "dtype": "torch.int16"
1858
  }
1859
  },
1860
  "quant_format": "exl3",
1861
- "bits_per_weight": 5
1862
  },
1863
  "model.layers.7.self_attn.o_proj": {
1864
  "stored_tensors": {
@@ -1880,14 +1882,14 @@
1880
  "shape": [
1881
  320,
1882
  320,
1883
- 64
1884
  ],
1885
- "n_bytes": 13107200,
1886
  "dtype": "torch.int16"
1887
  }
1888
  },
1889
  "quant_format": "exl3",
1890
- "bits_per_weight": 4
1891
  },
1892
  "model.layers.7.post_attention_layernorm": {
1893
  "stored_tensors": {
@@ -1920,14 +1922,14 @@
1920
  "shape": [
1921
  320,
1922
  1728,
1923
- 80
1924
  ],
1925
- "n_bytes": 88473600,
1926
  "dtype": "torch.int16"
1927
  }
1928
  },
1929
  "quant_format": "exl3",
1930
- "bits_per_weight": 5
1931
  },
1932
  "model.layers.7.mlp.gate_proj": {
1933
  "stored_tensors": {
@@ -2025,14 +2027,14 @@
2025
  "shape": [
2026
  320,
2027
  320,
2028
- 80
2029
  ],
2030
- "n_bytes": 16384000,
2031
  "dtype": "torch.int16"
2032
  }
2033
  },
2034
  "quant_format": "exl3",
2035
- "bits_per_weight": 5
2036
  },
2037
  "model.layers.8.self_attn.k_proj": {
2038
  "stored_tensors": {
@@ -2126,14 +2128,14 @@
2126
  "shape": [
2127
  320,
2128
  320,
2129
- 80
2130
  ],
2131
- "n_bytes": 16384000,
2132
  "dtype": "torch.int16"
2133
  }
2134
  },
2135
  "quant_format": "exl3",
2136
- "bits_per_weight": 5
2137
  },
2138
  "model.layers.8.post_attention_layernorm": {
2139
  "stored_tensors": {
@@ -2166,14 +2168,14 @@
2166
  "shape": [
2167
  320,
2168
  1728,
2169
- 64
2170
  ],
2171
- "n_bytes": 70778880,
2172
  "dtype": "torch.int16"
2173
  }
2174
  },
2175
  "quant_format": "exl3",
2176
- "bits_per_weight": 4
2177
  },
2178
  "model.layers.8.mlp.gate_proj": {
2179
  "stored_tensors": {
@@ -2517,14 +2519,14 @@
2517
  "shape": [
2518
  320,
2519
  320,
2520
- 64
2521
  ],
2522
- "n_bytes": 13107200,
2523
  "dtype": "torch.int16"
2524
  }
2525
  },
2526
  "quant_format": "exl3",
2527
- "bits_per_weight": 4
2528
  },
2529
  "model.layers.10.self_attn.k_proj": {
2530
  "stored_tensors": {
@@ -2553,14 +2555,14 @@
2553
  "shape": [
2554
  320,
2555
  64,
2556
- 64
2557
  ],
2558
- "n_bytes": 2621440,
2559
  "dtype": "torch.int16"
2560
  }
2561
  },
2562
  "quant_format": "exl3",
2563
- "bits_per_weight": 4
2564
  },
2565
  "model.layers.10.self_attn.v_proj": {
2566
  "stored_tensors": {
@@ -2618,14 +2620,14 @@
2618
  "shape": [
2619
  320,
2620
  320,
2621
- 64
2622
  ],
2623
- "n_bytes": 13107200,
2624
  "dtype": "torch.int16"
2625
  }
2626
  },
2627
  "quant_format": "exl3",
2628
- "bits_per_weight": 4
2629
  },
2630
  "model.layers.10.post_attention_layernorm": {
2631
  "stored_tensors": {
@@ -2658,14 +2660,14 @@
2658
  "shape": [
2659
  320,
2660
  1728,
2661
- 80
2662
  ],
2663
- "n_bytes": 88473600,
2664
  "dtype": "torch.int16"
2665
  }
2666
  },
2667
  "quant_format": "exl3",
2668
- "bits_per_weight": 5
2669
  },
2670
  "model.layers.10.mlp.gate_proj": {
2671
  "stored_tensors": {
@@ -2763,14 +2765,14 @@
2763
  "shape": [
2764
  320,
2765
  320,
2766
- 80
2767
  ],
2768
- "n_bytes": 16384000,
2769
  "dtype": "torch.int16"
2770
  }
2771
  },
2772
  "quant_format": "exl3",
2773
- "bits_per_weight": 5
2774
  },
2775
  "model.layers.11.self_attn.k_proj": {
2776
  "stored_tensors": {
@@ -2864,14 +2866,14 @@
2864
  "shape": [
2865
  320,
2866
  320,
2867
- 80
2868
  ],
2869
- "n_bytes": 16384000,
2870
  "dtype": "torch.int16"
2871
  }
2872
  },
2873
  "quant_format": "exl3",
2874
- "bits_per_weight": 5
2875
  },
2876
  "model.layers.11.post_attention_layernorm": {
2877
  "stored_tensors": {
@@ -2904,14 +2906,14 @@
2904
  "shape": [
2905
  320,
2906
  1728,
2907
- 64
2908
  ],
2909
- "n_bytes": 70778880,
2910
  "dtype": "torch.int16"
2911
  }
2912
  },
2913
  "quant_format": "exl3",
2914
- "bits_per_weight": 4
2915
  },
2916
  "model.layers.11.mlp.gate_proj": {
2917
  "stored_tensors": {
@@ -3255,14 +3257,14 @@
3255
  "shape": [
3256
  320,
3257
  320,
3258
- 64
3259
  ],
3260
- "n_bytes": 13107200,
3261
  "dtype": "torch.int16"
3262
  }
3263
  },
3264
  "quant_format": "exl3",
3265
- "bits_per_weight": 4
3266
  },
3267
  "model.layers.13.self_attn.k_proj": {
3268
  "stored_tensors": {
@@ -3291,14 +3293,14 @@
3291
  "shape": [
3292
  320,
3293
  64,
3294
- 64
3295
  ],
3296
- "n_bytes": 2621440,
3297
  "dtype": "torch.int16"
3298
  }
3299
  },
3300
  "quant_format": "exl3",
3301
- "bits_per_weight": 4
3302
  },
3303
  "model.layers.13.self_attn.v_proj": {
3304
  "stored_tensors": {
@@ -3356,14 +3358,14 @@
3356
  "shape": [
3357
  320,
3358
  320,
3359
- 64
3360
  ],
3361
- "n_bytes": 13107200,
3362
  "dtype": "torch.int16"
3363
  }
3364
  },
3365
  "quant_format": "exl3",
3366
- "bits_per_weight": 4
3367
  },
3368
  "model.layers.13.post_attention_layernorm": {
3369
  "stored_tensors": {
@@ -3396,14 +3398,14 @@
3396
  "shape": [
3397
  320,
3398
  1728,
3399
- 80
3400
  ],
3401
- "n_bytes": 88473600,
3402
  "dtype": "torch.int16"
3403
  }
3404
  },
3405
  "quant_format": "exl3",
3406
- "bits_per_weight": 5
3407
  },
3408
  "model.layers.13.mlp.gate_proj": {
3409
  "stored_tensors": {
@@ -3501,14 +3503,14 @@
3501
  "shape": [
3502
  320,
3503
  320,
3504
- 80
3505
  ],
3506
- "n_bytes": 16384000,
3507
  "dtype": "torch.int16"
3508
  }
3509
  },
3510
  "quant_format": "exl3",
3511
- "bits_per_weight": 5
3512
  },
3513
  "model.layers.14.self_attn.k_proj": {
3514
  "stored_tensors": {
@@ -3537,14 +3539,14 @@
3537
  "shape": [
3538
  320,
3539
  64,
3540
- 96
3541
  ],
3542
- "n_bytes": 3932160,
3543
  "dtype": "torch.int16"
3544
  }
3545
  },
3546
  "quant_format": "exl3",
3547
- "bits_per_weight": 6
3548
  },
3549
  "model.layers.14.self_attn.v_proj": {
3550
  "stored_tensors": {
@@ -3573,14 +3575,14 @@
3573
  "shape": [
3574
  320,
3575
  64,
3576
- 96
3577
  ],
3578
- "n_bytes": 3932160,
3579
  "dtype": "torch.int16"
3580
  }
3581
  },
3582
  "quant_format": "exl3",
3583
- "bits_per_weight": 6
3584
  },
3585
  "model.layers.14.self_attn.o_proj": {
3586
  "stored_tensors": {
@@ -3602,14 +3604,14 @@
3602
  "shape": [
3603
  320,
3604
  320,
3605
- 80
3606
  ],
3607
- "n_bytes": 16384000,
3608
  "dtype": "torch.int16"
3609
  }
3610
  },
3611
  "quant_format": "exl3",
3612
- "bits_per_weight": 5
3613
  },
3614
  "model.layers.14.post_attention_layernorm": {
3615
  "stored_tensors": {
@@ -3642,14 +3644,14 @@
3642
  "shape": [
3643
  320,
3644
  1728,
3645
- 64
3646
  ],
3647
- "n_bytes": 70778880,
3648
  "dtype": "torch.int16"
3649
  }
3650
  },
3651
  "quant_format": "exl3",
3652
- "bits_per_weight": 4
3653
  },
3654
  "model.layers.14.mlp.gate_proj": {
3655
  "stored_tensors": {
@@ -3747,14 +3749,14 @@
3747
  "shape": [
3748
  320,
3749
  320,
3750
- 64
3751
  ],
3752
- "n_bytes": 13107200,
3753
  "dtype": "torch.int16"
3754
  }
3755
  },
3756
  "quant_format": "exl3",
3757
- "bits_per_weight": 4
3758
  },
3759
  "model.layers.15.self_attn.k_proj": {
3760
  "stored_tensors": {
@@ -3783,14 +3785,14 @@
3783
  "shape": [
3784
  320,
3785
  64,
3786
- 64
3787
  ],
3788
- "n_bytes": 2621440,
3789
  "dtype": "torch.int16"
3790
  }
3791
  },
3792
  "quant_format": "exl3",
3793
- "bits_per_weight": 4
3794
  },
3795
  "model.layers.15.self_attn.v_proj": {
3796
  "stored_tensors": {
@@ -3819,14 +3821,14 @@
3819
  "shape": [
3820
  320,
3821
  64,
3822
- 80
3823
  ],
3824
- "n_bytes": 3276800,
3825
  "dtype": "torch.int16"
3826
  }
3827
  },
3828
  "quant_format": "exl3",
3829
- "bits_per_weight": 5
3830
  },
3831
  "model.layers.15.self_attn.o_proj": {
3832
  "stored_tensors": {
@@ -3848,14 +3850,14 @@
3848
  "shape": [
3849
  320,
3850
  320,
3851
- 64
3852
  ],
3853
- "n_bytes": 13107200,
3854
  "dtype": "torch.int16"
3855
  }
3856
  },
3857
  "quant_format": "exl3",
3858
- "bits_per_weight": 4
3859
  },
3860
  "model.layers.15.post_attention_layernorm": {
3861
  "stored_tensors": {
@@ -3888,14 +3890,14 @@
3888
  "shape": [
3889
  320,
3890
  1728,
3891
- 80
3892
  ],
3893
- "n_bytes": 88473600,
3894
  "dtype": "torch.int16"
3895
  }
3896
  },
3897
  "quant_format": "exl3",
3898
- "bits_per_weight": 5
3899
  },
3900
  "model.layers.15.mlp.gate_proj": {
3901
  "stored_tensors": {
@@ -4239,14 +4241,14 @@
4239
  "shape": [
4240
  320,
4241
  320,
4242
- 80
4243
  ],
4244
- "n_bytes": 16384000,
4245
  "dtype": "torch.int16"
4246
  }
4247
  },
4248
  "quant_format": "exl3",
4249
- "bits_per_weight": 5
4250
  },
4251
  "model.layers.17.self_attn.k_proj": {
4252
  "stored_tensors": {
@@ -4340,14 +4342,14 @@
4340
  "shape": [
4341
  320,
4342
  320,
4343
- 80
4344
  ],
4345
- "n_bytes": 16384000,
4346
  "dtype": "torch.int16"
4347
  }
4348
  },
4349
  "quant_format": "exl3",
4350
- "bits_per_weight": 5
4351
  },
4352
  "model.layers.17.post_attention_layernorm": {
4353
  "stored_tensors": {
@@ -4380,14 +4382,14 @@
4380
  "shape": [
4381
  320,
4382
  1728,
4383
- 64
4384
  ],
4385
- "n_bytes": 70778880,
4386
  "dtype": "torch.int16"
4387
  }
4388
  },
4389
  "quant_format": "exl3",
4390
- "bits_per_weight": 4
4391
  },
4392
  "model.layers.17.mlp.gate_proj": {
4393
  "stored_tensors": {
@@ -4485,14 +4487,14 @@
4485
  "shape": [
4486
  320,
4487
  320,
4488
- 64
4489
  ],
4490
- "n_bytes": 13107200,
4491
  "dtype": "torch.int16"
4492
  }
4493
  },
4494
  "quant_format": "exl3",
4495
- "bits_per_weight": 4
4496
  },
4497
  "model.layers.18.self_attn.k_proj": {
4498
  "stored_tensors": {
@@ -4521,14 +4523,14 @@
4521
  "shape": [
4522
  320,
4523
  64,
4524
- 64
4525
  ],
4526
- "n_bytes": 2621440,
4527
  "dtype": "torch.int16"
4528
  }
4529
  },
4530
  "quant_format": "exl3",
4531
- "bits_per_weight": 4
4532
  },
4533
  "model.layers.18.self_attn.v_proj": {
4534
  "stored_tensors": {
@@ -4586,14 +4588,14 @@
4586
  "shape": [
4587
  320,
4588
  320,
4589
- 64
4590
  ],
4591
- "n_bytes": 13107200,
4592
  "dtype": "torch.int16"
4593
  }
4594
  },
4595
  "quant_format": "exl3",
4596
- "bits_per_weight": 4
4597
  },
4598
  "model.layers.18.post_attention_layernorm": {
4599
  "stored_tensors": {
@@ -4626,14 +4628,14 @@
4626
  "shape": [
4627
  320,
4628
  1728,
4629
- 80
4630
  ],
4631
- "n_bytes": 88473600,
4632
  "dtype": "torch.int16"
4633
  }
4634
  },
4635
  "quant_format": "exl3",
4636
- "bits_per_weight": 5
4637
  },
4638
  "model.layers.18.mlp.gate_proj": {
4639
  "stored_tensors": {
@@ -4977,14 +4979,14 @@
4977
  "shape": [
4978
  320,
4979
  320,
4980
- 80
4981
  ],
4982
- "n_bytes": 16384000,
4983
  "dtype": "torch.int16"
4984
  }
4985
  },
4986
  "quant_format": "exl3",
4987
- "bits_per_weight": 5
4988
  },
4989
  "model.layers.20.self_attn.k_proj": {
4990
  "stored_tensors": {
@@ -5078,14 +5080,14 @@
5078
  "shape": [
5079
  320,
5080
  320,
5081
- 80
5082
  ],
5083
- "n_bytes": 16384000,
5084
  "dtype": "torch.int16"
5085
  }
5086
  },
5087
  "quant_format": "exl3",
5088
- "bits_per_weight": 5
5089
  },
5090
  "model.layers.20.post_attention_layernorm": {
5091
  "stored_tensors": {
@@ -5118,14 +5120,14 @@
5118
  "shape": [
5119
  320,
5120
  1728,
5121
- 64
5122
  ],
5123
- "n_bytes": 70778880,
5124
  "dtype": "torch.int16"
5125
  }
5126
  },
5127
  "quant_format": "exl3",
5128
- "bits_per_weight": 4
5129
  },
5130
  "model.layers.20.mlp.gate_proj": {
5131
  "stored_tensors": {
@@ -5223,14 +5225,14 @@
5223
  "shape": [
5224
  320,
5225
  320,
5226
- 64
5227
  ],
5228
- "n_bytes": 13107200,
5229
  "dtype": "torch.int16"
5230
  }
5231
  },
5232
  "quant_format": "exl3",
5233
- "bits_per_weight": 4
5234
  },
5235
  "model.layers.21.self_attn.k_proj": {
5236
  "stored_tensors": {
@@ -5259,14 +5261,14 @@
5259
  "shape": [
5260
  320,
5261
  64,
5262
- 64
5263
  ],
5264
- "n_bytes": 2621440,
5265
  "dtype": "torch.int16"
5266
  }
5267
  },
5268
  "quant_format": "exl3",
5269
- "bits_per_weight": 4
5270
  },
5271
  "model.layers.21.self_attn.v_proj": {
5272
  "stored_tensors": {
@@ -5324,14 +5326,14 @@
5324
  "shape": [
5325
  320,
5326
  320,
5327
- 64
5328
  ],
5329
- "n_bytes": 13107200,
5330
  "dtype": "torch.int16"
5331
  }
5332
  },
5333
  "quant_format": "exl3",
5334
- "bits_per_weight": 4
5335
  },
5336
  "model.layers.21.post_attention_layernorm": {
5337
  "stored_tensors": {
@@ -5364,14 +5366,14 @@
5364
  "shape": [
5365
  320,
5366
  1728,
5367
- 80
5368
  ],
5369
- "n_bytes": 88473600,
5370
  "dtype": "torch.int16"
5371
  }
5372
  },
5373
  "quant_format": "exl3",
5374
- "bits_per_weight": 5
5375
  },
5376
  "model.layers.21.mlp.gate_proj": {
5377
  "stored_tensors": {
@@ -5751,14 +5753,14 @@
5751
  "shape": [
5752
  320,
5753
  64,
5754
- 64
5755
  ],
5756
- "n_bytes": 2621440,
5757
  "dtype": "torch.int16"
5758
  }
5759
  },
5760
  "quant_format": "exl3",
5761
- "bits_per_weight": 4
5762
  },
5763
  "model.layers.23.self_attn.v_proj": {
5764
  "stored_tensors": {
@@ -5787,14 +5789,14 @@
5787
  "shape": [
5788
  320,
5789
  64,
5790
- 80
5791
  ],
5792
- "n_bytes": 3276800,
5793
  "dtype": "torch.int16"
5794
  }
5795
  },
5796
  "quant_format": "exl3",
5797
- "bits_per_weight": 5
5798
  },
5799
  "model.layers.23.self_attn.o_proj": {
5800
  "stored_tensors": {
@@ -6489,14 +6491,14 @@
6489
  "shape": [
6490
  320,
6491
  64,
6492
- 64
6493
  ],
6494
- "n_bytes": 2621440,
6495
  "dtype": "torch.int16"
6496
  }
6497
  },
6498
  "quant_format": "exl3",
6499
- "bits_per_weight": 4
6500
  },
6501
  "model.layers.26.self_attn.v_proj": {
6502
  "stored_tensors": {
@@ -6525,14 +6527,14 @@
6525
  "shape": [
6526
  320,
6527
  64,
6528
- 96
6529
  ],
6530
- "n_bytes": 3932160,
6531
  "dtype": "torch.int16"
6532
  }
6533
  },
6534
  "quant_format": "exl3",
6535
- "bits_per_weight": 6
6536
  },
6537
  "model.layers.26.self_attn.o_proj": {
6538
  "stored_tensors": {
@@ -7227,14 +7229,14 @@
7227
  "shape": [
7228
  320,
7229
  64,
7230
- 64
7231
  ],
7232
- "n_bytes": 2621440,
7233
  "dtype": "torch.int16"
7234
  }
7235
  },
7236
  "quant_format": "exl3",
7237
- "bits_per_weight": 4
7238
  },
7239
  "model.layers.29.self_attn.v_proj": {
7240
  "stored_tensors": {
@@ -7683,14 +7685,14 @@
7683
  "shape": [
7684
  320,
7685
  320,
7686
- 64
7687
  ],
7688
- "n_bytes": 13107200,
7689
  "dtype": "torch.int16"
7690
  }
7691
  },
7692
  "quant_format": "exl3",
7693
- "bits_per_weight": 4
7694
  },
7695
  "model.layers.31.self_attn.k_proj": {
7696
  "stored_tensors": {
@@ -7719,14 +7721,14 @@
7719
  "shape": [
7720
  320,
7721
  64,
7722
- 64
7723
  ],
7724
- "n_bytes": 2621440,
7725
  "dtype": "torch.int16"
7726
  }
7727
  },
7728
  "quant_format": "exl3",
7729
- "bits_per_weight": 4
7730
  },
7731
  "model.layers.31.self_attn.v_proj": {
7732
  "stored_tensors": {
@@ -7755,14 +7757,14 @@
7755
  "shape": [
7756
  320,
7757
  64,
7758
- 80
7759
  ],
7760
- "n_bytes": 3276800,
7761
  "dtype": "torch.int16"
7762
  }
7763
  },
7764
  "quant_format": "exl3",
7765
- "bits_per_weight": 5
7766
  },
7767
  "model.layers.31.self_attn.o_proj": {
7768
  "stored_tensors": {
@@ -7784,14 +7786,14 @@
7784
  "shape": [
7785
  320,
7786
  320,
7787
- 64
7788
  ],
7789
- "n_bytes": 13107200,
7790
  "dtype": "torch.int16"
7791
  }
7792
  },
7793
  "quant_format": "exl3",
7794
- "bits_per_weight": 4
7795
  },
7796
  "model.layers.31.post_attention_layernorm": {
7797
  "stored_tensors": {
@@ -7824,14 +7826,14 @@
7824
  "shape": [
7825
  320,
7826
  1728,
7827
- 80
7828
  ],
7829
- "n_bytes": 88473600,
7830
  "dtype": "torch.int16"
7831
  }
7832
  },
7833
  "quant_format": "exl3",
7834
- "bits_per_weight": 5
7835
  },
7836
  "model.layers.31.mlp.gate_proj": {
7837
  "stored_tensors": {
@@ -7929,14 +7931,14 @@
7929
  "shape": [
7930
  320,
7931
  320,
7932
- 80
7933
  ],
7934
- "n_bytes": 16384000,
7935
  "dtype": "torch.int16"
7936
  }
7937
  },
7938
  "quant_format": "exl3",
7939
- "bits_per_weight": 5
7940
  },
7941
  "model.layers.32.self_attn.k_proj": {
7942
  "stored_tensors": {
@@ -8030,14 +8032,14 @@
8030
  "shape": [
8031
  320,
8032
  320,
8033
- 80
8034
  ],
8035
- "n_bytes": 16384000,
8036
  "dtype": "torch.int16"
8037
  }
8038
  },
8039
  "quant_format": "exl3",
8040
- "bits_per_weight": 5
8041
  },
8042
  "model.layers.32.post_attention_layernorm": {
8043
  "stored_tensors": {
@@ -8070,14 +8072,14 @@
8070
  "shape": [
8071
  320,
8072
  1728,
8073
- 64
8074
  ],
8075
- "n_bytes": 70778880,
8076
  "dtype": "torch.int16"
8077
  }
8078
  },
8079
  "quant_format": "exl3",
8080
- "bits_per_weight": 4
8081
  },
8082
  "model.layers.32.mlp.gate_proj": {
8083
  "stored_tensors": {
@@ -8421,14 +8423,14 @@
8421
  "shape": [
8422
  320,
8423
  320,
8424
- 64
8425
  ],
8426
- "n_bytes": 13107200,
8427
  "dtype": "torch.int16"
8428
  }
8429
  },
8430
  "quant_format": "exl3",
8431
- "bits_per_weight": 4
8432
  },
8433
  "model.layers.34.self_attn.k_proj": {
8434
  "stored_tensors": {
@@ -8457,14 +8459,14 @@
8457
  "shape": [
8458
  320,
8459
  64,
8460
- 64
8461
  ],
8462
- "n_bytes": 2621440,
8463
  "dtype": "torch.int16"
8464
  }
8465
  },
8466
  "quant_format": "exl3",
8467
- "bits_per_weight": 4
8468
  },
8469
  "model.layers.34.self_attn.v_proj": {
8470
  "stored_tensors": {
@@ -8522,14 +8524,14 @@
8522
  "shape": [
8523
  320,
8524
  320,
8525
- 64
8526
  ],
8527
- "n_bytes": 13107200,
8528
  "dtype": "torch.int16"
8529
  }
8530
  },
8531
  "quant_format": "exl3",
8532
- "bits_per_weight": 4
8533
  },
8534
  "model.layers.34.post_attention_layernorm": {
8535
  "stored_tensors": {
@@ -8562,14 +8564,14 @@
8562
  "shape": [
8563
  320,
8564
  1728,
8565
- 80
8566
  ],
8567
- "n_bytes": 88473600,
8568
  "dtype": "torch.int16"
8569
  }
8570
  },
8571
  "quant_format": "exl3",
8572
- "bits_per_weight": 5
8573
  },
8574
  "model.layers.34.mlp.gate_proj": {
8575
  "stored_tensors": {
@@ -8667,14 +8669,14 @@
8667
  "shape": [
8668
  320,
8669
  320,
8670
- 80
8671
  ],
8672
- "n_bytes": 16384000,
8673
  "dtype": "torch.int16"
8674
  }
8675
  },
8676
  "quant_format": "exl3",
8677
- "bits_per_weight": 5
8678
  },
8679
  "model.layers.35.self_attn.k_proj": {
8680
  "stored_tensors": {
@@ -8768,14 +8770,14 @@
8768
  "shape": [
8769
  320,
8770
  320,
8771
- 80
8772
  ],
8773
- "n_bytes": 16384000,
8774
  "dtype": "torch.int16"
8775
  }
8776
  },
8777
  "quant_format": "exl3",
8778
- "bits_per_weight": 5
8779
  },
8780
  "model.layers.35.post_attention_layernorm": {
8781
  "stored_tensors": {
@@ -8808,14 +8810,14 @@
8808
  "shape": [
8809
  320,
8810
  1728,
8811
- 64
8812
  ],
8813
- "n_bytes": 70778880,
8814
  "dtype": "torch.int16"
8815
  }
8816
  },
8817
  "quant_format": "exl3",
8818
- "bits_per_weight": 4
8819
  },
8820
  "model.layers.35.mlp.gate_proj": {
8821
  "stored_tensors": {
@@ -9159,14 +9161,14 @@
9159
  "shape": [
9160
  320,
9161
  320,
9162
- 64
9163
  ],
9164
- "n_bytes": 13107200,
9165
  "dtype": "torch.int16"
9166
  }
9167
  },
9168
  "quant_format": "exl3",
9169
- "bits_per_weight": 4
9170
  },
9171
  "model.layers.37.self_attn.k_proj": {
9172
  "stored_tensors": {
@@ -9195,14 +9197,14 @@
9195
  "shape": [
9196
  320,
9197
  64,
9198
- 64
9199
  ],
9200
- "n_bytes": 2621440,
9201
  "dtype": "torch.int16"
9202
  }
9203
  },
9204
  "quant_format": "exl3",
9205
- "bits_per_weight": 4
9206
  },
9207
  "model.layers.37.self_attn.v_proj": {
9208
  "stored_tensors": {
@@ -9260,14 +9262,14 @@
9260
  "shape": [
9261
  320,
9262
  320,
9263
- 64
9264
  ],
9265
- "n_bytes": 13107200,
9266
  "dtype": "torch.int16"
9267
  }
9268
  },
9269
  "quant_format": "exl3",
9270
- "bits_per_weight": 4
9271
  },
9272
  "model.layers.37.post_attention_layernorm": {
9273
  "stored_tensors": {
@@ -9300,14 +9302,14 @@
9300
  "shape": [
9301
  320,
9302
  1728,
9303
- 80
9304
  ],
9305
- "n_bytes": 88473600,
9306
  "dtype": "torch.int16"
9307
  }
9308
  },
9309
  "quant_format": "exl3",
9310
- "bits_per_weight": 5
9311
  },
9312
  "model.layers.37.mlp.gate_proj": {
9313
  "stored_tensors": {
@@ -9405,14 +9407,14 @@
9405
  "shape": [
9406
  320,
9407
  320,
9408
- 80
9409
  ],
9410
- "n_bytes": 16384000,
9411
  "dtype": "torch.int16"
9412
  }
9413
  },
9414
  "quant_format": "exl3",
9415
- "bits_per_weight": 5
9416
  },
9417
  "model.layers.38.self_attn.k_proj": {
9418
  "stored_tensors": {
@@ -9441,14 +9443,14 @@
9441
  "shape": [
9442
  320,
9443
  64,
9444
- 96
9445
  ],
9446
- "n_bytes": 3932160,
9447
  "dtype": "torch.int16"
9448
  }
9449
  },
9450
  "quant_format": "exl3",
9451
- "bits_per_weight": 6
9452
  },
9453
  "model.layers.38.self_attn.v_proj": {
9454
  "stored_tensors": {
@@ -9477,14 +9479,14 @@
9477
  "shape": [
9478
  320,
9479
  64,
9480
- 96
9481
  ],
9482
- "n_bytes": 3932160,
9483
  "dtype": "torch.int16"
9484
  }
9485
  },
9486
  "quant_format": "exl3",
9487
- "bits_per_weight": 6
9488
  },
9489
  "model.layers.38.self_attn.o_proj": {
9490
  "stored_tensors": {
@@ -9506,14 +9508,14 @@
9506
  "shape": [
9507
  320,
9508
  320,
9509
- 80
9510
  ],
9511
- "n_bytes": 16384000,
9512
  "dtype": "torch.int16"
9513
  }
9514
  },
9515
  "quant_format": "exl3",
9516
- "bits_per_weight": 5
9517
  },
9518
  "model.layers.38.post_attention_layernorm": {
9519
  "stored_tensors": {
@@ -9546,14 +9548,14 @@
9546
  "shape": [
9547
  320,
9548
  1728,
9549
- 64
9550
  ],
9551
- "n_bytes": 70778880,
9552
  "dtype": "torch.int16"
9553
  }
9554
  },
9555
  "quant_format": "exl3",
9556
- "bits_per_weight": 4
9557
  },
9558
  "model.layers.38.mlp.gate_proj": {
9559
  "stored_tensors": {
@@ -9651,14 +9653,14 @@
9651
  "shape": [
9652
  320,
9653
  320,
9654
- 64
9655
  ],
9656
- "n_bytes": 13107200,
9657
  "dtype": "torch.int16"
9658
  }
9659
  },
9660
  "quant_format": "exl3",
9661
- "bits_per_weight": 4
9662
  },
9663
  "model.layers.39.self_attn.k_proj": {
9664
  "stored_tensors": {
@@ -9687,14 +9689,14 @@
9687
  "shape": [
9688
  320,
9689
  64,
9690
- 64
9691
  ],
9692
- "n_bytes": 2621440,
9693
  "dtype": "torch.int16"
9694
  }
9695
  },
9696
  "quant_format": "exl3",
9697
- "bits_per_weight": 4
9698
  },
9699
  "model.layers.39.self_attn.v_proj": {
9700
  "stored_tensors": {
@@ -9723,14 +9725,14 @@
9723
  "shape": [
9724
  320,
9725
  64,
9726
- 80
9727
  ],
9728
- "n_bytes": 3276800,
9729
  "dtype": "torch.int16"
9730
  }
9731
  },
9732
  "quant_format": "exl3",
9733
- "bits_per_weight": 5
9734
  },
9735
  "model.layers.39.self_attn.o_proj": {
9736
  "stored_tensors": {
@@ -9752,14 +9754,14 @@
9752
  "shape": [
9753
  320,
9754
  320,
9755
- 64
9756
  ],
9757
- "n_bytes": 13107200,
9758
  "dtype": "torch.int16"
9759
  }
9760
  },
9761
  "quant_format": "exl3",
9762
- "bits_per_weight": 4
9763
  },
9764
  "model.layers.39.post_attention_layernorm": {
9765
  "stored_tensors": {
@@ -9792,14 +9794,14 @@
9792
  "shape": [
9793
  320,
9794
  1728,
9795
- 80
9796
  ],
9797
- "n_bytes": 88473600,
9798
  "dtype": "torch.int16"
9799
  }
9800
  },
9801
  "quant_format": "exl3",
9802
- "bits_per_weight": 5
9803
  },
9804
  "model.layers.39.mlp.gate_proj": {
9805
  "stored_tensors": {
@@ -10143,14 +10145,14 @@
10143
  "shape": [
10144
  320,
10145
  320,
10146
- 80
10147
  ],
10148
- "n_bytes": 16384000,
10149
  "dtype": "torch.int16"
10150
  }
10151
  },
10152
  "quant_format": "exl3",
10153
- "bits_per_weight": 5
10154
  },
10155
  "model.layers.41.self_attn.k_proj": {
10156
  "stored_tensors": {
@@ -10244,14 +10246,14 @@
10244
  "shape": [
10245
  320,
10246
  320,
10247
- 80
10248
  ],
10249
- "n_bytes": 16384000,
10250
  "dtype": "torch.int16"
10251
  }
10252
  },
10253
  "quant_format": "exl3",
10254
- "bits_per_weight": 5
10255
  },
10256
  "model.layers.41.post_attention_layernorm": {
10257
  "stored_tensors": {
@@ -10284,14 +10286,14 @@
10284
  "shape": [
10285
  320,
10286
  1728,
10287
- 64
10288
  ],
10289
- "n_bytes": 70778880,
10290
  "dtype": "torch.int16"
10291
  }
10292
  },
10293
  "quant_format": "exl3",
10294
- "bits_per_weight": 4
10295
  },
10296
  "model.layers.41.mlp.gate_proj": {
10297
  "stored_tensors": {
@@ -10389,14 +10391,14 @@
10389
  "shape": [
10390
  320,
10391
  320,
10392
- 64
10393
  ],
10394
- "n_bytes": 13107200,
10395
  "dtype": "torch.int16"
10396
  }
10397
  },
10398
  "quant_format": "exl3",
10399
- "bits_per_weight": 4
10400
  },
10401
  "model.layers.42.self_attn.k_proj": {
10402
  "stored_tensors": {
@@ -10425,14 +10427,14 @@
10425
  "shape": [
10426
  320,
10427
  64,
10428
- 64
10429
  ],
10430
- "n_bytes": 2621440,
10431
  "dtype": "torch.int16"
10432
  }
10433
  },
10434
  "quant_format": "exl3",
10435
- "bits_per_weight": 4
10436
  },
10437
  "model.layers.42.self_attn.v_proj": {
10438
  "stored_tensors": {
@@ -10490,14 +10492,14 @@
10490
  "shape": [
10491
  320,
10492
  320,
10493
- 64
10494
  ],
10495
- "n_bytes": 13107200,
10496
  "dtype": "torch.int16"
10497
  }
10498
  },
10499
  "quant_format": "exl3",
10500
- "bits_per_weight": 4
10501
  },
10502
  "model.layers.42.post_attention_layernorm": {
10503
  "stored_tensors": {
@@ -10530,14 +10532,14 @@
10530
  "shape": [
10531
  320,
10532
  1728,
10533
- 80
10534
  ],
10535
- "n_bytes": 88473600,
10536
  "dtype": "torch.int16"
10537
  }
10538
  },
10539
  "quant_format": "exl3",
10540
- "bits_per_weight": 5
10541
  },
10542
  "model.layers.42.mlp.gate_proj": {
10543
  "stored_tensors": {
@@ -10881,14 +10883,14 @@
10881
  "shape": [
10882
  320,
10883
  320,
10884
- 80
10885
  ],
10886
- "n_bytes": 16384000,
10887
  "dtype": "torch.int16"
10888
  }
10889
  },
10890
  "quant_format": "exl3",
10891
- "bits_per_weight": 5
10892
  },
10893
  "model.layers.44.self_attn.k_proj": {
10894
  "stored_tensors": {
@@ -10982,14 +10984,14 @@
10982
  "shape": [
10983
  320,
10984
  320,
10985
- 80
10986
  ],
10987
- "n_bytes": 16384000,
10988
  "dtype": "torch.int16"
10989
  }
10990
  },
10991
  "quant_format": "exl3",
10992
- "bits_per_weight": 5
10993
  },
10994
  "model.layers.44.post_attention_layernorm": {
10995
  "stored_tensors": {
@@ -11022,14 +11024,14 @@
11022
  "shape": [
11023
  320,
11024
  1728,
11025
- 64
11026
  ],
11027
- "n_bytes": 70778880,
11028
  "dtype": "torch.int16"
11029
  }
11030
  },
11031
  "quant_format": "exl3",
11032
- "bits_per_weight": 4
11033
  },
11034
  "model.layers.44.mlp.gate_proj": {
11035
  "stored_tensors": {
@@ -11127,14 +11129,14 @@
11127
  "shape": [
11128
  320,
11129
  320,
11130
- 64
11131
  ],
11132
- "n_bytes": 13107200,
11133
  "dtype": "torch.int16"
11134
  }
11135
  },
11136
  "quant_format": "exl3",
11137
- "bits_per_weight": 4
11138
  },
11139
  "model.layers.45.self_attn.k_proj": {
11140
  "stored_tensors": {
@@ -11163,14 +11165,14 @@
11163
  "shape": [
11164
  320,
11165
  64,
11166
- 64
11167
  ],
11168
- "n_bytes": 2621440,
11169
  "dtype": "torch.int16"
11170
  }
11171
  },
11172
  "quant_format": "exl3",
11173
- "bits_per_weight": 4
11174
  },
11175
  "model.layers.45.self_attn.v_proj": {
11176
  "stored_tensors": {
@@ -11228,14 +11230,14 @@
11228
  "shape": [
11229
  320,
11230
  320,
11231
- 64
11232
  ],
11233
- "n_bytes": 13107200,
11234
  "dtype": "torch.int16"
11235
  }
11236
  },
11237
  "quant_format": "exl3",
11238
- "bits_per_weight": 4
11239
  },
11240
  "model.layers.45.post_attention_layernorm": {
11241
  "stored_tensors": {
@@ -11268,14 +11270,14 @@
11268
  "shape": [
11269
  320,
11270
  1728,
11271
- 80
11272
  ],
11273
- "n_bytes": 88473600,
11274
  "dtype": "torch.int16"
11275
  }
11276
  },
11277
  "quant_format": "exl3",
11278
- "bits_per_weight": 5
11279
  },
11280
  "model.layers.45.mlp.gate_proj": {
11281
  "stored_tensors": {
@@ -11655,14 +11657,14 @@
11655
  "shape": [
11656
  320,
11657
  64,
11658
- 64
11659
  ],
11660
- "n_bytes": 2621440,
11661
  "dtype": "torch.int16"
11662
  }
11663
  },
11664
  "quant_format": "exl3",
11665
- "bits_per_weight": 4
11666
  },
11667
  "model.layers.47.self_attn.v_proj": {
11668
  "stored_tensors": {
@@ -11691,14 +11693,14 @@
11691
  "shape": [
11692
  320,
11693
  64,
11694
- 80
11695
  ],
11696
- "n_bytes": 3276800,
11697
  "dtype": "torch.int16"
11698
  }
11699
  },
11700
  "quant_format": "exl3",
11701
- "bits_per_weight": 5
11702
  },
11703
  "model.layers.47.self_attn.o_proj": {
11704
  "stored_tensors": {
@@ -12393,14 +12395,14 @@
12393
  "shape": [
12394
  320,
12395
  64,
12396
- 64
12397
  ],
12398
- "n_bytes": 2621440,
12399
  "dtype": "torch.int16"
12400
  }
12401
  },
12402
  "quant_format": "exl3",
12403
- "bits_per_weight": 4
12404
  },
12405
  "model.layers.50.self_attn.v_proj": {
12406
  "stored_tensors": {
@@ -12429,14 +12431,14 @@
12429
  "shape": [
12430
  320,
12431
  64,
12432
- 96
12433
  ],
12434
- "n_bytes": 3932160,
12435
  "dtype": "torch.int16"
12436
  }
12437
  },
12438
  "quant_format": "exl3",
12439
- "bits_per_weight": 6
12440
  },
12441
  "model.layers.50.self_attn.o_proj": {
12442
  "stored_tensors": {
@@ -13131,14 +13133,14 @@
13131
  "shape": [
13132
  320,
13133
  64,
13134
- 64
13135
  ],
13136
- "n_bytes": 2621440,
13137
  "dtype": "torch.int16"
13138
  }
13139
  },
13140
  "quant_format": "exl3",
13141
- "bits_per_weight": 4
13142
  },
13143
  "model.layers.53.self_attn.v_proj": {
13144
  "stored_tensors": {
@@ -13587,14 +13589,14 @@
13587
  "shape": [
13588
  320,
13589
  320,
13590
- 64
13591
  ],
13592
- "n_bytes": 13107200,
13593
  "dtype": "torch.int16"
13594
  }
13595
  },
13596
  "quant_format": "exl3",
13597
- "bits_per_weight": 4
13598
  },
13599
  "model.layers.55.self_attn.k_proj": {
13600
  "stored_tensors": {
@@ -13623,14 +13625,14 @@
13623
  "shape": [
13624
  320,
13625
  64,
13626
- 64
13627
  ],
13628
- "n_bytes": 2621440,
13629
  "dtype": "torch.int16"
13630
  }
13631
  },
13632
  "quant_format": "exl3",
13633
- "bits_per_weight": 4
13634
  },
13635
  "model.layers.55.self_attn.v_proj": {
13636
  "stored_tensors": {
@@ -13659,14 +13661,14 @@
13659
  "shape": [
13660
  320,
13661
  64,
13662
- 80
13663
  ],
13664
- "n_bytes": 3276800,
13665
  "dtype": "torch.int16"
13666
  }
13667
  },
13668
  "quant_format": "exl3",
13669
- "bits_per_weight": 5
13670
  },
13671
  "model.layers.55.self_attn.o_proj": {
13672
  "stored_tensors": {
@@ -13688,14 +13690,14 @@
13688
  "shape": [
13689
  320,
13690
  320,
13691
- 64
13692
  ],
13693
- "n_bytes": 13107200,
13694
  "dtype": "torch.int16"
13695
  }
13696
  },
13697
  "quant_format": "exl3",
13698
- "bits_per_weight": 4
13699
  },
13700
  "model.layers.55.post_attention_layernorm": {
13701
  "stored_tensors": {
@@ -13728,14 +13730,14 @@
13728
  "shape": [
13729
  320,
13730
  1728,
13731
- 80
13732
  ],
13733
- "n_bytes": 88473600,
13734
  "dtype": "torch.int16"
13735
  }
13736
  },
13737
  "quant_format": "exl3",
13738
- "bits_per_weight": 5
13739
  },
13740
  "model.layers.55.mlp.gate_proj": {
13741
  "stored_tensors": {
@@ -13833,14 +13835,14 @@
13833
  "shape": [
13834
  320,
13835
  320,
13836
- 80
13837
  ],
13838
- "n_bytes": 16384000,
13839
  "dtype": "torch.int16"
13840
  }
13841
  },
13842
  "quant_format": "exl3",
13843
- "bits_per_weight": 5
13844
  },
13845
  "model.layers.56.self_attn.k_proj": {
13846
  "stored_tensors": {
@@ -13934,14 +13936,14 @@
13934
  "shape": [
13935
  320,
13936
  320,
13937
- 80
13938
  ],
13939
- "n_bytes": 16384000,
13940
  "dtype": "torch.int16"
13941
  }
13942
  },
13943
  "quant_format": "exl3",
13944
- "bits_per_weight": 5
13945
  },
13946
  "model.layers.56.post_attention_layernorm": {
13947
  "stored_tensors": {
@@ -13974,14 +13976,14 @@
13974
  "shape": [
13975
  320,
13976
  1728,
13977
- 64
13978
  ],
13979
- "n_bytes": 70778880,
13980
  "dtype": "torch.int16"
13981
  }
13982
  },
13983
  "quant_format": "exl3",
13984
- "bits_per_weight": 4
13985
  },
13986
  "model.layers.56.mlp.gate_proj": {
13987
  "stored_tensors": {
@@ -14325,14 +14327,14 @@
14325
  "shape": [
14326
  320,
14327
  320,
14328
- 64
14329
  ],
14330
- "n_bytes": 13107200,
14331
  "dtype": "torch.int16"
14332
  }
14333
  },
14334
  "quant_format": "exl3",
14335
- "bits_per_weight": 4
14336
  },
14337
  "model.layers.58.self_attn.k_proj": {
14338
  "stored_tensors": {
@@ -14361,14 +14363,14 @@
14361
  "shape": [
14362
  320,
14363
  64,
14364
- 64
14365
  ],
14366
- "n_bytes": 2621440,
14367
  "dtype": "torch.int16"
14368
  }
14369
  },
14370
  "quant_format": "exl3",
14371
- "bits_per_weight": 4
14372
  },
14373
  "model.layers.58.self_attn.v_proj": {
14374
  "stored_tensors": {
@@ -14426,14 +14428,14 @@
14426
  "shape": [
14427
  320,
14428
  320,
14429
- 64
14430
  ],
14431
- "n_bytes": 13107200,
14432
  "dtype": "torch.int16"
14433
  }
14434
  },
14435
  "quant_format": "exl3",
14436
- "bits_per_weight": 4
14437
  },
14438
  "model.layers.58.post_attention_layernorm": {
14439
  "stored_tensors": {
@@ -14466,14 +14468,14 @@
14466
  "shape": [
14467
  320,
14468
  1728,
14469
- 80
14470
  ],
14471
- "n_bytes": 88473600,
14472
  "dtype": "torch.int16"
14473
  }
14474
  },
14475
  "quant_format": "exl3",
14476
- "bits_per_weight": 5
14477
  },
14478
  "model.layers.58.mlp.gate_proj": {
14479
  "stored_tensors": {
@@ -14571,14 +14573,14 @@
14571
  "shape": [
14572
  320,
14573
  320,
14574
- 80
14575
  ],
14576
- "n_bytes": 16384000,
14577
  "dtype": "torch.int16"
14578
  }
14579
  },
14580
  "quant_format": "exl3",
14581
- "bits_per_weight": 5
14582
  },
14583
  "model.layers.59.self_attn.k_proj": {
14584
  "stored_tensors": {
@@ -14672,14 +14674,14 @@
14672
  "shape": [
14673
  320,
14674
  320,
14675
- 80
14676
  ],
14677
- "n_bytes": 16384000,
14678
  "dtype": "torch.int16"
14679
  }
14680
  },
14681
  "quant_format": "exl3",
14682
- "bits_per_weight": 5
14683
  },
14684
  "model.layers.59.post_attention_layernorm": {
14685
  "stored_tensors": {
@@ -14712,14 +14714,14 @@
14712
  "shape": [
14713
  320,
14714
  1728,
14715
- 64
14716
  ],
14717
- "n_bytes": 70778880,
14718
  "dtype": "torch.int16"
14719
  }
14720
  },
14721
  "quant_format": "exl3",
14722
- "bits_per_weight": 4
14723
  },
14724
  "model.layers.59.mlp.gate_proj": {
14725
  "stored_tensors": {
@@ -15063,14 +15065,14 @@
15063
  "shape": [
15064
  320,
15065
  320,
15066
- 64
15067
  ],
15068
- "n_bytes": 13107200,
15069
  "dtype": "torch.int16"
15070
  }
15071
  },
15072
  "quant_format": "exl3",
15073
- "bits_per_weight": 4
15074
  },
15075
  "model.layers.61.self_attn.k_proj": {
15076
  "stored_tensors": {
@@ -15099,14 +15101,14 @@
15099
  "shape": [
15100
  320,
15101
  64,
15102
- 64
15103
  ],
15104
- "n_bytes": 2621440,
15105
  "dtype": "torch.int16"
15106
  }
15107
  },
15108
  "quant_format": "exl3",
15109
- "bits_per_weight": 4
15110
  },
15111
  "model.layers.61.self_attn.v_proj": {
15112
  "stored_tensors": {
@@ -15164,14 +15166,14 @@
15164
  "shape": [
15165
  320,
15166
  320,
15167
- 64
15168
  ],
15169
- "n_bytes": 13107200,
15170
  "dtype": "torch.int16"
15171
  }
15172
  },
15173
  "quant_format": "exl3",
15174
- "bits_per_weight": 4
15175
  },
15176
  "model.layers.61.post_attention_layernorm": {
15177
  "stored_tensors": {
@@ -15204,14 +15206,14 @@
15204
  "shape": [
15205
  320,
15206
  1728,
15207
- 80
15208
  ],
15209
- "n_bytes": 88473600,
15210
  "dtype": "torch.int16"
15211
  }
15212
  },
15213
  "quant_format": "exl3",
15214
- "bits_per_weight": 5
15215
  },
15216
  "model.layers.61.mlp.gate_proj": {
15217
  "stored_tensors": {
@@ -15309,14 +15311,14 @@
15309
  "shape": [
15310
  320,
15311
  320,
15312
- 80
15313
  ],
15314
- "n_bytes": 16384000,
15315
  "dtype": "torch.int16"
15316
  }
15317
  },
15318
  "quant_format": "exl3",
15319
- "bits_per_weight": 5
15320
  },
15321
  "model.layers.62.self_attn.k_proj": {
15322
  "stored_tensors": {
@@ -15345,14 +15347,14 @@
15345
  "shape": [
15346
  320,
15347
  64,
15348
- 96
15349
  ],
15350
- "n_bytes": 3932160,
15351
  "dtype": "torch.int16"
15352
  }
15353
  },
15354
  "quant_format": "exl3",
15355
- "bits_per_weight": 6
15356
  },
15357
  "model.layers.62.self_attn.v_proj": {
15358
  "stored_tensors": {
@@ -15381,14 +15383,14 @@
15381
  "shape": [
15382
  320,
15383
  64,
15384
- 96
15385
  ],
15386
- "n_bytes": 3932160,
15387
  "dtype": "torch.int16"
15388
  }
15389
  },
15390
  "quant_format": "exl3",
15391
- "bits_per_weight": 6
15392
  },
15393
  "model.layers.62.self_attn.o_proj": {
15394
  "stored_tensors": {
@@ -15410,14 +15412,14 @@
15410
  "shape": [
15411
  320,
15412
  320,
15413
- 80
15414
  ],
15415
- "n_bytes": 16384000,
15416
  "dtype": "torch.int16"
15417
  }
15418
  },
15419
  "quant_format": "exl3",
15420
- "bits_per_weight": 5
15421
  },
15422
  "model.layers.62.post_attention_layernorm": {
15423
  "stored_tensors": {
@@ -15450,14 +15452,14 @@
15450
  "shape": [
15451
  320,
15452
  1728,
15453
- 64
15454
  ],
15455
- "n_bytes": 70778880,
15456
  "dtype": "torch.int16"
15457
  }
15458
  },
15459
  "quant_format": "exl3",
15460
- "bits_per_weight": 4
15461
  },
15462
  "model.layers.62.mlp.gate_proj": {
15463
  "stored_tensors": {
@@ -15555,14 +15557,14 @@
15555
  "shape": [
15556
  320,
15557
  320,
15558
- 64
15559
  ],
15560
- "n_bytes": 13107200,
15561
  "dtype": "torch.int16"
15562
  }
15563
  },
15564
  "quant_format": "exl3",
15565
- "bits_per_weight": 4
15566
  },
15567
  "model.layers.63.self_attn.k_proj": {
15568
  "stored_tensors": {
@@ -15591,14 +15593,14 @@
15591
  "shape": [
15592
  320,
15593
  64,
15594
- 64
15595
  ],
15596
- "n_bytes": 2621440,
15597
  "dtype": "torch.int16"
15598
  }
15599
  },
15600
  "quant_format": "exl3",
15601
- "bits_per_weight": 4
15602
  },
15603
  "model.layers.63.self_attn.v_proj": {
15604
  "stored_tensors": {
@@ -15627,14 +15629,14 @@
15627
  "shape": [
15628
  320,
15629
  64,
15630
- 80
15631
  ],
15632
- "n_bytes": 3276800,
15633
  "dtype": "torch.int16"
15634
  }
15635
  },
15636
  "quant_format": "exl3",
15637
- "bits_per_weight": 5
15638
  },
15639
  "model.layers.63.self_attn.o_proj": {
15640
  "stored_tensors": {
@@ -15656,14 +15658,14 @@
15656
  "shape": [
15657
  320,
15658
  320,
15659
- 64
15660
  ],
15661
- "n_bytes": 13107200,
15662
  "dtype": "torch.int16"
15663
  }
15664
  },
15665
  "quant_format": "exl3",
15666
- "bits_per_weight": 4
15667
  },
15668
  "model.layers.63.post_attention_layernorm": {
15669
  "stored_tensors": {
@@ -15696,14 +15698,14 @@
15696
  "shape": [
15697
  320,
15698
  1728,
15699
- 80
15700
  ],
15701
- "n_bytes": 88473600,
15702
  "dtype": "torch.int16"
15703
  }
15704
  },
15705
  "quant_format": "exl3",
15706
- "bits_per_weight": 5
15707
  },
15708
  "model.layers.63.mlp.gate_proj": {
15709
  "stored_tensors": {
 
1
  {
2
  "quant_method": "exl3",
3
+ "version": "0.0.4",
4
  "bits": 4.5,
5
+ "head_bits": 6,
6
  "calibration": {
7
  "rows": 100,
8
  "cols": 2048
9
  },
10
+ "out_scales": "auto",
11
  "tensor_storage": {
12
  "model.embed_tokens": {
13
  "stored_tensors": {
 
587
  "shape": [
588
  320,
589
  64,
590
+ 80
591
  ],
592
+ "n_bytes": 3276800,
593
  "dtype": "torch.int16"
594
  }
595
  },
596
  "quant_format": "exl3",
597
+ "bits_per_weight": 5
598
  },
599
  "model.layers.2.self_attn.v_proj": {
600
  "stored_tensors": {
 
623
  "shape": [
624
  320,
625
  64,
626
+ 80
627
  ],
628
+ "n_bytes": 3276800,
629
  "dtype": "torch.int16"
630
  }
631
  },
632
  "quant_format": "exl3",
633
+ "bits_per_weight": 5
634
  },
635
  "model.layers.2.self_attn.o_proj": {
636
  "stored_tensors": {
 
1325
  "shape": [
1326
  320,
1327
  64,
1328
+ 96
1329
  ],
1330
+ "n_bytes": 3932160,
1331
  "dtype": "torch.int16"
1332
  }
1333
  },
1334
  "quant_format": "exl3",
1335
+ "bits_per_weight": 6
1336
  },
1337
  "model.layers.5.self_attn.v_proj": {
1338
  "stored_tensors": {
 
1781
  "shape": [
1782
  320,
1783
  320,
1784
+ 80
1785
  ],
1786
+ "n_bytes": 16384000,
1787
  "dtype": "torch.int16"
1788
  }
1789
  },
1790
  "quant_format": "exl3",
1791
+ "bits_per_weight": 5
1792
  },
1793
  "model.layers.7.self_attn.k_proj": {
1794
  "stored_tensors": {
 
1817
  "shape": [
1818
  320,
1819
  64,
1820
+ 96
1821
  ],
1822
+ "n_bytes": 3932160,
1823
  "dtype": "torch.int16"
1824
  }
1825
  },
1826
  "quant_format": "exl3",
1827
+ "bits_per_weight": 6
1828
  },
1829
  "model.layers.7.self_attn.v_proj": {
1830
  "stored_tensors": {
 
1853
  "shape": [
1854
  320,
1855
  64,
1856
+ 96
1857
  ],
1858
+ "n_bytes": 3932160,
1859
  "dtype": "torch.int16"
1860
  }
1861
  },
1862
  "quant_format": "exl3",
1863
+ "bits_per_weight": 6
1864
  },
1865
  "model.layers.7.self_attn.o_proj": {
1866
  "stored_tensors": {
 
1882
  "shape": [
1883
  320,
1884
  320,
1885
+ 80
1886
  ],
1887
+ "n_bytes": 16384000,
1888
  "dtype": "torch.int16"
1889
  }
1890
  },
1891
  "quant_format": "exl3",
1892
+ "bits_per_weight": 5
1893
  },
1894
  "model.layers.7.post_attention_layernorm": {
1895
  "stored_tensors": {
 
1922
  "shape": [
1923
  320,
1924
  1728,
1925
+ 64
1926
  ],
1927
+ "n_bytes": 70778880,
1928
  "dtype": "torch.int16"
1929
  }
1930
  },
1931
  "quant_format": "exl3",
1932
+ "bits_per_weight": 4
1933
  },
1934
  "model.layers.7.mlp.gate_proj": {
1935
  "stored_tensors": {
 
2027
  "shape": [
2028
  320,
2029
  320,
2030
+ 64
2031
  ],
2032
+ "n_bytes": 13107200,
2033
  "dtype": "torch.int16"
2034
  }
2035
  },
2036
  "quant_format": "exl3",
2037
+ "bits_per_weight": 4
2038
  },
2039
  "model.layers.8.self_attn.k_proj": {
2040
  "stored_tensors": {
 
2128
  "shape": [
2129
  320,
2130
  320,
2131
+ 64
2132
  ],
2133
+ "n_bytes": 13107200,
2134
  "dtype": "torch.int16"
2135
  }
2136
  },
2137
  "quant_format": "exl3",
2138
+ "bits_per_weight": 4
2139
  },
2140
  "model.layers.8.post_attention_layernorm": {
2141
  "stored_tensors": {
 
2168
  "shape": [
2169
  320,
2170
  1728,
2171
+ 80
2172
  ],
2173
+ "n_bytes": 88473600,
2174
  "dtype": "torch.int16"
2175
  }
2176
  },
2177
  "quant_format": "exl3",
2178
+ "bits_per_weight": 5
2179
  },
2180
  "model.layers.8.mlp.gate_proj": {
2181
  "stored_tensors": {
 
2519
  "shape": [
2520
  320,
2521
  320,
2522
+ 80
2523
  ],
2524
+ "n_bytes": 16384000,
2525
  "dtype": "torch.int16"
2526
  }
2527
  },
2528
  "quant_format": "exl3",
2529
+ "bits_per_weight": 5
2530
  },
2531
  "model.layers.10.self_attn.k_proj": {
2532
  "stored_tensors": {
 
2555
  "shape": [
2556
  320,
2557
  64,
2558
+ 96
2559
  ],
2560
+ "n_bytes": 3932160,
2561
  "dtype": "torch.int16"
2562
  }
2563
  },
2564
  "quant_format": "exl3",
2565
+ "bits_per_weight": 6
2566
  },
2567
  "model.layers.10.self_attn.v_proj": {
2568
  "stored_tensors": {
 
2620
  "shape": [
2621
  320,
2622
  320,
2623
+ 80
2624
  ],
2625
+ "n_bytes": 16384000,
2626
  "dtype": "torch.int16"
2627
  }
2628
  },
2629
  "quant_format": "exl3",
2630
+ "bits_per_weight": 5
2631
  },
2632
  "model.layers.10.post_attention_layernorm": {
2633
  "stored_tensors": {
 
2660
  "shape": [
2661
  320,
2662
  1728,
2663
+ 64
2664
  ],
2665
+ "n_bytes": 70778880,
2666
  "dtype": "torch.int16"
2667
  }
2668
  },
2669
  "quant_format": "exl3",
2670
+ "bits_per_weight": 4
2671
  },
2672
  "model.layers.10.mlp.gate_proj": {
2673
  "stored_tensors": {
 
2765
  "shape": [
2766
  320,
2767
  320,
2768
+ 64
2769
  ],
2770
+ "n_bytes": 13107200,
2771
  "dtype": "torch.int16"
2772
  }
2773
  },
2774
  "quant_format": "exl3",
2775
+ "bits_per_weight": 4
2776
  },
2777
  "model.layers.11.self_attn.k_proj": {
2778
  "stored_tensors": {
 
2866
  "shape": [
2867
  320,
2868
  320,
2869
+ 64
2870
  ],
2871
+ "n_bytes": 13107200,
2872
  "dtype": "torch.int16"
2873
  }
2874
  },
2875
  "quant_format": "exl3",
2876
+ "bits_per_weight": 4
2877
  },
2878
  "model.layers.11.post_attention_layernorm": {
2879
  "stored_tensors": {
 
2906
  "shape": [
2907
  320,
2908
  1728,
2909
+ 80
2910
  ],
2911
+ "n_bytes": 88473600,
2912
  "dtype": "torch.int16"
2913
  }
2914
  },
2915
  "quant_format": "exl3",
2916
+ "bits_per_weight": 5
2917
  },
2918
  "model.layers.11.mlp.gate_proj": {
2919
  "stored_tensors": {
 
3257
  "shape": [
3258
  320,
3259
  320,
3260
+ 80
3261
  ],
3262
+ "n_bytes": 16384000,
3263
  "dtype": "torch.int16"
3264
  }
3265
  },
3266
  "quant_format": "exl3",
3267
+ "bits_per_weight": 5
3268
  },
3269
  "model.layers.13.self_attn.k_proj": {
3270
  "stored_tensors": {
 
3293
  "shape": [
3294
  320,
3295
  64,
3296
+ 96
3297
  ],
3298
+ "n_bytes": 3932160,
3299
  "dtype": "torch.int16"
3300
  }
3301
  },
3302
  "quant_format": "exl3",
3303
+ "bits_per_weight": 6
3304
  },
3305
  "model.layers.13.self_attn.v_proj": {
3306
  "stored_tensors": {
 
3358
  "shape": [
3359
  320,
3360
  320,
3361
+ 80
3362
  ],
3363
+ "n_bytes": 16384000,
3364
  "dtype": "torch.int16"
3365
  }
3366
  },
3367
  "quant_format": "exl3",
3368
+ "bits_per_weight": 5
3369
  },
3370
  "model.layers.13.post_attention_layernorm": {
3371
  "stored_tensors": {
 
3398
  "shape": [
3399
  320,
3400
  1728,
3401
+ 64
3402
  ],
3403
+ "n_bytes": 70778880,
3404
  "dtype": "torch.int16"
3405
  }
3406
  },
3407
  "quant_format": "exl3",
3408
+ "bits_per_weight": 4
3409
  },
3410
  "model.layers.13.mlp.gate_proj": {
3411
  "stored_tensors": {
 
3503
  "shape": [
3504
  320,
3505
  320,
3506
+ 64
3507
  ],
3508
+ "n_bytes": 13107200,
3509
  "dtype": "torch.int16"
3510
  }
3511
  },
3512
  "quant_format": "exl3",
3513
+ "bits_per_weight": 4
3514
  },
3515
  "model.layers.14.self_attn.k_proj": {
3516
  "stored_tensors": {
 
3539
  "shape": [
3540
  320,
3541
  64,
3542
+ 80
3543
  ],
3544
+ "n_bytes": 3276800,
3545
  "dtype": "torch.int16"
3546
  }
3547
  },
3548
  "quant_format": "exl3",
3549
+ "bits_per_weight": 5
3550
  },
3551
  "model.layers.14.self_attn.v_proj": {
3552
  "stored_tensors": {
 
3575
  "shape": [
3576
  320,
3577
  64,
3578
+ 80
3579
  ],
3580
+ "n_bytes": 3276800,
3581
  "dtype": "torch.int16"
3582
  }
3583
  },
3584
  "quant_format": "exl3",
3585
+ "bits_per_weight": 5
3586
  },
3587
  "model.layers.14.self_attn.o_proj": {
3588
  "stored_tensors": {
 
3604
  "shape": [
3605
  320,
3606
  320,
3607
+ 64
3608
  ],
3609
+ "n_bytes": 13107200,
3610
  "dtype": "torch.int16"
3611
  }
3612
  },
3613
  "quant_format": "exl3",
3614
+ "bits_per_weight": 4
3615
  },
3616
  "model.layers.14.post_attention_layernorm": {
3617
  "stored_tensors": {
 
3644
  "shape": [
3645
  320,
3646
  1728,
3647
+ 80
3648
  ],
3649
+ "n_bytes": 88473600,
3650
  "dtype": "torch.int16"
3651
  }
3652
  },
3653
  "quant_format": "exl3",
3654
+ "bits_per_weight": 5
3655
  },
3656
  "model.layers.14.mlp.gate_proj": {
3657
  "stored_tensors": {
 
3749
  "shape": [
3750
  320,
3751
  320,
3752
+ 80
3753
  ],
3754
+ "n_bytes": 16384000,
3755
  "dtype": "torch.int16"
3756
  }
3757
  },
3758
  "quant_format": "exl3",
3759
+ "bits_per_weight": 5
3760
  },
3761
  "model.layers.15.self_attn.k_proj": {
3762
  "stored_tensors": {
 
3785
  "shape": [
3786
  320,
3787
  64,
3788
+ 96
3789
  ],
3790
+ "n_bytes": 3932160,
3791
  "dtype": "torch.int16"
3792
  }
3793
  },
3794
  "quant_format": "exl3",
3795
+ "bits_per_weight": 6
3796
  },
3797
  "model.layers.15.self_attn.v_proj": {
3798
  "stored_tensors": {
 
3821
  "shape": [
3822
  320,
3823
  64,
3824
+ 96
3825
  ],
3826
+ "n_bytes": 3932160,
3827
  "dtype": "torch.int16"
3828
  }
3829
  },
3830
  "quant_format": "exl3",
3831
+ "bits_per_weight": 6
3832
  },
3833
  "model.layers.15.self_attn.o_proj": {
3834
  "stored_tensors": {
 
3850
  "shape": [
3851
  320,
3852
  320,
3853
+ 80
3854
  ],
3855
+ "n_bytes": 16384000,
3856
  "dtype": "torch.int16"
3857
  }
3858
  },
3859
  "quant_format": "exl3",
3860
+ "bits_per_weight": 5
3861
  },
3862
  "model.layers.15.post_attention_layernorm": {
3863
  "stored_tensors": {
 
3890
  "shape": [
3891
  320,
3892
  1728,
3893
+ 64
3894
  ],
3895
+ "n_bytes": 70778880,
3896
  "dtype": "torch.int16"
3897
  }
3898
  },
3899
  "quant_format": "exl3",
3900
+ "bits_per_weight": 4
3901
  },
3902
  "model.layers.15.mlp.gate_proj": {
3903
  "stored_tensors": {
 
4241
  "shape": [
4242
  320,
4243
  320,
4244
+ 64
4245
  ],
4246
+ "n_bytes": 13107200,
4247
  "dtype": "torch.int16"
4248
  }
4249
  },
4250
  "quant_format": "exl3",
4251
+ "bits_per_weight": 4
4252
  },
4253
  "model.layers.17.self_attn.k_proj": {
4254
  "stored_tensors": {
 
4342
  "shape": [
4343
  320,
4344
  320,
4345
+ 64
4346
  ],
4347
+ "n_bytes": 13107200,
4348
  "dtype": "torch.int16"
4349
  }
4350
  },
4351
  "quant_format": "exl3",
4352
+ "bits_per_weight": 4
4353
  },
4354
  "model.layers.17.post_attention_layernorm": {
4355
  "stored_tensors": {
 
4382
  "shape": [
4383
  320,
4384
  1728,
4385
+ 80
4386
  ],
4387
+ "n_bytes": 88473600,
4388
  "dtype": "torch.int16"
4389
  }
4390
  },
4391
  "quant_format": "exl3",
4392
+ "bits_per_weight": 5
4393
  },
4394
  "model.layers.17.mlp.gate_proj": {
4395
  "stored_tensors": {
 
4487
  "shape": [
4488
  320,
4489
  320,
4490
+ 80
4491
  ],
4492
+ "n_bytes": 16384000,
4493
  "dtype": "torch.int16"
4494
  }
4495
  },
4496
  "quant_format": "exl3",
4497
+ "bits_per_weight": 5
4498
  },
4499
  "model.layers.18.self_attn.k_proj": {
4500
  "stored_tensors": {
 
4523
  "shape": [
4524
  320,
4525
  64,
4526
+ 96
4527
  ],
4528
+ "n_bytes": 3932160,
4529
  "dtype": "torch.int16"
4530
  }
4531
  },
4532
  "quant_format": "exl3",
4533
+ "bits_per_weight": 6
4534
  },
4535
  "model.layers.18.self_attn.v_proj": {
4536
  "stored_tensors": {
 
4588
  "shape": [
4589
  320,
4590
  320,
4591
+ 80
4592
  ],
4593
+ "n_bytes": 16384000,
4594
  "dtype": "torch.int16"
4595
  }
4596
  },
4597
  "quant_format": "exl3",
4598
+ "bits_per_weight": 5
4599
  },
4600
  "model.layers.18.post_attention_layernorm": {
4601
  "stored_tensors": {
 
4628
  "shape": [
4629
  320,
4630
  1728,
4631
+ 64
4632
  ],
4633
+ "n_bytes": 70778880,
4634
  "dtype": "torch.int16"
4635
  }
4636
  },
4637
  "quant_format": "exl3",
4638
+ "bits_per_weight": 4
4639
  },
4640
  "model.layers.18.mlp.gate_proj": {
4641
  "stored_tensors": {
 
4979
  "shape": [
4980
  320,
4981
  320,
4982
+ 64
4983
  ],
4984
+ "n_bytes": 13107200,
4985
  "dtype": "torch.int16"
4986
  }
4987
  },
4988
  "quant_format": "exl3",
4989
+ "bits_per_weight": 4
4990
  },
4991
  "model.layers.20.self_attn.k_proj": {
4992
  "stored_tensors": {
 
5080
  "shape": [
5081
  320,
5082
  320,
5083
+ 64
5084
  ],
5085
+ "n_bytes": 13107200,
5086
  "dtype": "torch.int16"
5087
  }
5088
  },
5089
  "quant_format": "exl3",
5090
+ "bits_per_weight": 4
5091
  },
5092
  "model.layers.20.post_attention_layernorm": {
5093
  "stored_tensors": {
 
5120
  "shape": [
5121
  320,
5122
  1728,
5123
+ 80
5124
  ],
5125
+ "n_bytes": 88473600,
5126
  "dtype": "torch.int16"
5127
  }
5128
  },
5129
  "quant_format": "exl3",
5130
+ "bits_per_weight": 5
5131
  },
5132
  "model.layers.20.mlp.gate_proj": {
5133
  "stored_tensors": {
 
5225
  "shape": [
5226
  320,
5227
  320,
5228
+ 80
5229
  ],
5230
+ "n_bytes": 16384000,
5231
  "dtype": "torch.int16"
5232
  }
5233
  },
5234
  "quant_format": "exl3",
5235
+ "bits_per_weight": 5
5236
  },
5237
  "model.layers.21.self_attn.k_proj": {
5238
  "stored_tensors": {
 
5261
  "shape": [
5262
  320,
5263
  64,
5264
+ 96
5265
  ],
5266
+ "n_bytes": 3932160,
5267
  "dtype": "torch.int16"
5268
  }
5269
  },
5270
  "quant_format": "exl3",
5271
+ "bits_per_weight": 6
5272
  },
5273
  "model.layers.21.self_attn.v_proj": {
5274
  "stored_tensors": {
 
5326
  "shape": [
5327
  320,
5328
  320,
5329
+ 80
5330
  ],
5331
+ "n_bytes": 16384000,
5332
  "dtype": "torch.int16"
5333
  }
5334
  },
5335
  "quant_format": "exl3",
5336
+ "bits_per_weight": 5
5337
  },
5338
  "model.layers.21.post_attention_layernorm": {
5339
  "stored_tensors": {
 
5366
  "shape": [
5367
  320,
5368
  1728,
5369
+ 64
5370
  ],
5371
+ "n_bytes": 70778880,
5372
  "dtype": "torch.int16"
5373
  }
5374
  },
5375
  "quant_format": "exl3",
5376
+ "bits_per_weight": 4
5377
  },
5378
  "model.layers.21.mlp.gate_proj": {
5379
  "stored_tensors": {
 
5753
  "shape": [
5754
  320,
5755
  64,
5756
+ 96
5757
  ],
5758
+ "n_bytes": 3932160,
5759
  "dtype": "torch.int16"
5760
  }
5761
  },
5762
  "quant_format": "exl3",
5763
+ "bits_per_weight": 6
5764
  },
5765
  "model.layers.23.self_attn.v_proj": {
5766
  "stored_tensors": {
 
5789
  "shape": [
5790
  320,
5791
  64,
5792
+ 96
5793
  ],
5794
+ "n_bytes": 3932160,
5795
  "dtype": "torch.int16"
5796
  }
5797
  },
5798
  "quant_format": "exl3",
5799
+ "bits_per_weight": 6
5800
  },
5801
  "model.layers.23.self_attn.o_proj": {
5802
  "stored_tensors": {
 
6491
  "shape": [
6492
  320,
6493
  64,
6494
+ 80
6495
  ],
6496
+ "n_bytes": 3276800,
6497
  "dtype": "torch.int16"
6498
  }
6499
  },
6500
  "quant_format": "exl3",
6501
+ "bits_per_weight": 5
6502
  },
6503
  "model.layers.26.self_attn.v_proj": {
6504
  "stored_tensors": {
 
6527
  "shape": [
6528
  320,
6529
  64,
6530
+ 80
6531
  ],
6532
+ "n_bytes": 3276800,
6533
  "dtype": "torch.int16"
6534
  }
6535
  },
6536
  "quant_format": "exl3",
6537
+ "bits_per_weight": 5
6538
  },
6539
  "model.layers.26.self_attn.o_proj": {
6540
  "stored_tensors": {
 
7229
  "shape": [
7230
  320,
7231
  64,
7232
+ 96
7233
  ],
7234
+ "n_bytes": 3932160,
7235
  "dtype": "torch.int16"
7236
  }
7237
  },
7238
  "quant_format": "exl3",
7239
+ "bits_per_weight": 6
7240
  },
7241
  "model.layers.29.self_attn.v_proj": {
7242
  "stored_tensors": {
 
7685
  "shape": [
7686
  320,
7687
  320,
7688
+ 80
7689
  ],
7690
+ "n_bytes": 16384000,
7691
  "dtype": "torch.int16"
7692
  }
7693
  },
7694
  "quant_format": "exl3",
7695
+ "bits_per_weight": 5
7696
  },
7697
  "model.layers.31.self_attn.k_proj": {
7698
  "stored_tensors": {
 
7721
  "shape": [
7722
  320,
7723
  64,
7724
+ 96
7725
  ],
7726
+ "n_bytes": 3932160,
7727
  "dtype": "torch.int16"
7728
  }
7729
  },
7730
  "quant_format": "exl3",
7731
+ "bits_per_weight": 6
7732
  },
7733
  "model.layers.31.self_attn.v_proj": {
7734
  "stored_tensors": {
 
7757
  "shape": [
7758
  320,
7759
  64,
7760
+ 96
7761
  ],
7762
+ "n_bytes": 3932160,
7763
  "dtype": "torch.int16"
7764
  }
7765
  },
7766
  "quant_format": "exl3",
7767
+ "bits_per_weight": 6
7768
  },
7769
  "model.layers.31.self_attn.o_proj": {
7770
  "stored_tensors": {
 
7786
  "shape": [
7787
  320,
7788
  320,
7789
+ 80
7790
  ],
7791
+ "n_bytes": 16384000,
7792
  "dtype": "torch.int16"
7793
  }
7794
  },
7795
  "quant_format": "exl3",
7796
+ "bits_per_weight": 5
7797
  },
7798
  "model.layers.31.post_attention_layernorm": {
7799
  "stored_tensors": {
 
7826
  "shape": [
7827
  320,
7828
  1728,
7829
+ 64
7830
  ],
7831
+ "n_bytes": 70778880,
7832
  "dtype": "torch.int16"
7833
  }
7834
  },
7835
  "quant_format": "exl3",
7836
+ "bits_per_weight": 4
7837
  },
7838
  "model.layers.31.mlp.gate_proj": {
7839
  "stored_tensors": {
 
7931
  "shape": [
7932
  320,
7933
  320,
7934
+ 64
7935
  ],
7936
+ "n_bytes": 13107200,
7937
  "dtype": "torch.int16"
7938
  }
7939
  },
7940
  "quant_format": "exl3",
7941
+ "bits_per_weight": 4
7942
  },
7943
  "model.layers.32.self_attn.k_proj": {
7944
  "stored_tensors": {
 
8032
  "shape": [
8033
  320,
8034
  320,
8035
+ 64
8036
  ],
8037
+ "n_bytes": 13107200,
8038
  "dtype": "torch.int16"
8039
  }
8040
  },
8041
  "quant_format": "exl3",
8042
+ "bits_per_weight": 4
8043
  },
8044
  "model.layers.32.post_attention_layernorm": {
8045
  "stored_tensors": {
 
8072
  "shape": [
8073
  320,
8074
  1728,
8075
+ 80
8076
  ],
8077
+ "n_bytes": 88473600,
8078
  "dtype": "torch.int16"
8079
  }
8080
  },
8081
  "quant_format": "exl3",
8082
+ "bits_per_weight": 5
8083
  },
8084
  "model.layers.32.mlp.gate_proj": {
8085
  "stored_tensors": {
 
8423
  "shape": [
8424
  320,
8425
  320,
8426
+ 80
8427
  ],
8428
+ "n_bytes": 16384000,
8429
  "dtype": "torch.int16"
8430
  }
8431
  },
8432
  "quant_format": "exl3",
8433
+ "bits_per_weight": 5
8434
  },
8435
  "model.layers.34.self_attn.k_proj": {
8436
  "stored_tensors": {
 
8459
  "shape": [
8460
  320,
8461
  64,
8462
+ 96
8463
  ],
8464
+ "n_bytes": 3932160,
8465
  "dtype": "torch.int16"
8466
  }
8467
  },
8468
  "quant_format": "exl3",
8469
+ "bits_per_weight": 6
8470
  },
8471
  "model.layers.34.self_attn.v_proj": {
8472
  "stored_tensors": {
 
8524
  "shape": [
8525
  320,
8526
  320,
8527
+ 80
8528
  ],
8529
+ "n_bytes": 16384000,
8530
  "dtype": "torch.int16"
8531
  }
8532
  },
8533
  "quant_format": "exl3",
8534
+ "bits_per_weight": 5
8535
  },
8536
  "model.layers.34.post_attention_layernorm": {
8537
  "stored_tensors": {
 
8564
  "shape": [
8565
  320,
8566
  1728,
8567
+ 64
8568
  ],
8569
+ "n_bytes": 70778880,
8570
  "dtype": "torch.int16"
8571
  }
8572
  },
8573
  "quant_format": "exl3",
8574
+ "bits_per_weight": 4
8575
  },
8576
  "model.layers.34.mlp.gate_proj": {
8577
  "stored_tensors": {
 
8669
  "shape": [
8670
  320,
8671
  320,
8672
+ 64
8673
  ],
8674
+ "n_bytes": 13107200,
8675
  "dtype": "torch.int16"
8676
  }
8677
  },
8678
  "quant_format": "exl3",
8679
+ "bits_per_weight": 4
8680
  },
8681
  "model.layers.35.self_attn.k_proj": {
8682
  "stored_tensors": {
 
8770
  "shape": [
8771
  320,
8772
  320,
8773
+ 64
8774
  ],
8775
+ "n_bytes": 13107200,
8776
  "dtype": "torch.int16"
8777
  }
8778
  },
8779
  "quant_format": "exl3",
8780
+ "bits_per_weight": 4
8781
  },
8782
  "model.layers.35.post_attention_layernorm": {
8783
  "stored_tensors": {
 
8810
  "shape": [
8811
  320,
8812
  1728,
8813
+ 80
8814
  ],
8815
+ "n_bytes": 88473600,
8816
  "dtype": "torch.int16"
8817
  }
8818
  },
8819
  "quant_format": "exl3",
8820
+ "bits_per_weight": 5
8821
  },
8822
  "model.layers.35.mlp.gate_proj": {
8823
  "stored_tensors": {
 
9161
  "shape": [
9162
  320,
9163
  320,
9164
+ 80
9165
  ],
9166
+ "n_bytes": 16384000,
9167
  "dtype": "torch.int16"
9168
  }
9169
  },
9170
  "quant_format": "exl3",
9171
+ "bits_per_weight": 5
9172
  },
9173
  "model.layers.37.self_attn.k_proj": {
9174
  "stored_tensors": {
 
9197
  "shape": [
9198
  320,
9199
  64,
9200
+ 96
9201
  ],
9202
+ "n_bytes": 3932160,
9203
  "dtype": "torch.int16"
9204
  }
9205
  },
9206
  "quant_format": "exl3",
9207
+ "bits_per_weight": 6
9208
  },
9209
  "model.layers.37.self_attn.v_proj": {
9210
  "stored_tensors": {
 
9262
  "shape": [
9263
  320,
9264
  320,
9265
+ 80
9266
  ],
9267
+ "n_bytes": 16384000,
9268
  "dtype": "torch.int16"
9269
  }
9270
  },
9271
  "quant_format": "exl3",
9272
+ "bits_per_weight": 5
9273
  },
9274
  "model.layers.37.post_attention_layernorm": {
9275
  "stored_tensors": {
 
9302
  "shape": [
9303
  320,
9304
  1728,
9305
+ 64
9306
  ],
9307
+ "n_bytes": 70778880,
9308
  "dtype": "torch.int16"
9309
  }
9310
  },
9311
  "quant_format": "exl3",
9312
+ "bits_per_weight": 4
9313
  },
9314
  "model.layers.37.mlp.gate_proj": {
9315
  "stored_tensors": {
 
9407
  "shape": [
9408
  320,
9409
  320,
9410
+ 64
9411
  ],
9412
+ "n_bytes": 13107200,
9413
  "dtype": "torch.int16"
9414
  }
9415
  },
9416
  "quant_format": "exl3",
9417
+ "bits_per_weight": 4
9418
  },
9419
  "model.layers.38.self_attn.k_proj": {
9420
  "stored_tensors": {
 
9443
  "shape": [
9444
  320,
9445
  64,
9446
+ 80
9447
  ],
9448
+ "n_bytes": 3276800,
9449
  "dtype": "torch.int16"
9450
  }
9451
  },
9452
  "quant_format": "exl3",
9453
+ "bits_per_weight": 5
9454
  },
9455
  "model.layers.38.self_attn.v_proj": {
9456
  "stored_tensors": {
 
9479
  "shape": [
9480
  320,
9481
  64,
9482
+ 80
9483
  ],
9484
+ "n_bytes": 3276800,
9485
  "dtype": "torch.int16"
9486
  }
9487
  },
9488
  "quant_format": "exl3",
9489
+ "bits_per_weight": 5
9490
  },
9491
  "model.layers.38.self_attn.o_proj": {
9492
  "stored_tensors": {
 
9508
  "shape": [
9509
  320,
9510
  320,
9511
+ 64
9512
  ],
9513
+ "n_bytes": 13107200,
9514
  "dtype": "torch.int16"
9515
  }
9516
  },
9517
  "quant_format": "exl3",
9518
+ "bits_per_weight": 4
9519
  },
9520
  "model.layers.38.post_attention_layernorm": {
9521
  "stored_tensors": {
 
9548
  "shape": [
9549
  320,
9550
  1728,
9551
+ 80
9552
  ],
9553
+ "n_bytes": 88473600,
9554
  "dtype": "torch.int16"
9555
  }
9556
  },
9557
  "quant_format": "exl3",
9558
+ "bits_per_weight": 5
9559
  },
9560
  "model.layers.38.mlp.gate_proj": {
9561
  "stored_tensors": {
 
9653
  "shape": [
9654
  320,
9655
  320,
9656
+ 80
9657
  ],
9658
+ "n_bytes": 16384000,
9659
  "dtype": "torch.int16"
9660
  }
9661
  },
9662
  "quant_format": "exl3",
9663
+ "bits_per_weight": 5
9664
  },
9665
  "model.layers.39.self_attn.k_proj": {
9666
  "stored_tensors": {
 
9689
  "shape": [
9690
  320,
9691
  64,
9692
+ 96
9693
  ],
9694
+ "n_bytes": 3932160,
9695
  "dtype": "torch.int16"
9696
  }
9697
  },
9698
  "quant_format": "exl3",
9699
+ "bits_per_weight": 6
9700
  },
9701
  "model.layers.39.self_attn.v_proj": {
9702
  "stored_tensors": {
 
9725
  "shape": [
9726
  320,
9727
  64,
9728
+ 96
9729
  ],
9730
+ "n_bytes": 3932160,
9731
  "dtype": "torch.int16"
9732
  }
9733
  },
9734
  "quant_format": "exl3",
9735
+ "bits_per_weight": 6
9736
  },
9737
  "model.layers.39.self_attn.o_proj": {
9738
  "stored_tensors": {
 
9754
  "shape": [
9755
  320,
9756
  320,
9757
+ 80
9758
  ],
9759
+ "n_bytes": 16384000,
9760
  "dtype": "torch.int16"
9761
  }
9762
  },
9763
  "quant_format": "exl3",
9764
+ "bits_per_weight": 5
9765
  },
9766
  "model.layers.39.post_attention_layernorm": {
9767
  "stored_tensors": {
 
9794
  "shape": [
9795
  320,
9796
  1728,
9797
+ 64
9798
  ],
9799
+ "n_bytes": 70778880,
9800
  "dtype": "torch.int16"
9801
  }
9802
  },
9803
  "quant_format": "exl3",
9804
+ "bits_per_weight": 4
9805
  },
9806
  "model.layers.39.mlp.gate_proj": {
9807
  "stored_tensors": {
 
10145
  "shape": [
10146
  320,
10147
  320,
10148
+ 64
10149
  ],
10150
+ "n_bytes": 13107200,
10151
  "dtype": "torch.int16"
10152
  }
10153
  },
10154
  "quant_format": "exl3",
10155
+ "bits_per_weight": 4
10156
  },
10157
  "model.layers.41.self_attn.k_proj": {
10158
  "stored_tensors": {
 
10246
  "shape": [
10247
  320,
10248
  320,
10249
+ 64
10250
  ],
10251
+ "n_bytes": 13107200,
10252
  "dtype": "torch.int16"
10253
  }
10254
  },
10255
  "quant_format": "exl3",
10256
+ "bits_per_weight": 4
10257
  },
10258
  "model.layers.41.post_attention_layernorm": {
10259
  "stored_tensors": {
 
10286
  "shape": [
10287
  320,
10288
  1728,
10289
+ 80
10290
  ],
10291
+ "n_bytes": 88473600,
10292
  "dtype": "torch.int16"
10293
  }
10294
  },
10295
  "quant_format": "exl3",
10296
+ "bits_per_weight": 5
10297
  },
10298
  "model.layers.41.mlp.gate_proj": {
10299
  "stored_tensors": {
 
10391
  "shape": [
10392
  320,
10393
  320,
10394
+ 80
10395
  ],
10396
+ "n_bytes": 16384000,
10397
  "dtype": "torch.int16"
10398
  }
10399
  },
10400
  "quant_format": "exl3",
10401
+ "bits_per_weight": 5
10402
  },
10403
  "model.layers.42.self_attn.k_proj": {
10404
  "stored_tensors": {
 
10427
  "shape": [
10428
  320,
10429
  64,
10430
+ 96
10431
  ],
10432
+ "n_bytes": 3932160,
10433
  "dtype": "torch.int16"
10434
  }
10435
  },
10436
  "quant_format": "exl3",
10437
+ "bits_per_weight": 6
10438
  },
10439
  "model.layers.42.self_attn.v_proj": {
10440
  "stored_tensors": {
 
10492
  "shape": [
10493
  320,
10494
  320,
10495
+ 80
10496
  ],
10497
+ "n_bytes": 16384000,
10498
  "dtype": "torch.int16"
10499
  }
10500
  },
10501
  "quant_format": "exl3",
10502
+ "bits_per_weight": 5
10503
  },
10504
  "model.layers.42.post_attention_layernorm": {
10505
  "stored_tensors": {
 
10532
  "shape": [
10533
  320,
10534
  1728,
10535
+ 64
10536
  ],
10537
+ "n_bytes": 70778880,
10538
  "dtype": "torch.int16"
10539
  }
10540
  },
10541
  "quant_format": "exl3",
10542
+ "bits_per_weight": 4
10543
  },
10544
  "model.layers.42.mlp.gate_proj": {
10545
  "stored_tensors": {
 
10883
  "shape": [
10884
  320,
10885
  320,
10886
+ 64
10887
  ],
10888
+ "n_bytes": 13107200,
10889
  "dtype": "torch.int16"
10890
  }
10891
  },
10892
  "quant_format": "exl3",
10893
+ "bits_per_weight": 4
10894
  },
10895
  "model.layers.44.self_attn.k_proj": {
10896
  "stored_tensors": {
 
10984
  "shape": [
10985
  320,
10986
  320,
10987
+ 64
10988
  ],
10989
+ "n_bytes": 13107200,
10990
  "dtype": "torch.int16"
10991
  }
10992
  },
10993
  "quant_format": "exl3",
10994
+ "bits_per_weight": 4
10995
  },
10996
  "model.layers.44.post_attention_layernorm": {
10997
  "stored_tensors": {
 
11024
  "shape": [
11025
  320,
11026
  1728,
11027
+ 80
11028
  ],
11029
+ "n_bytes": 88473600,
11030
  "dtype": "torch.int16"
11031
  }
11032
  },
11033
  "quant_format": "exl3",
11034
+ "bits_per_weight": 5
11035
  },
11036
  "model.layers.44.mlp.gate_proj": {
11037
  "stored_tensors": {
 
11129
  "shape": [
11130
  320,
11131
  320,
11132
+ 80
11133
  ],
11134
+ "n_bytes": 16384000,
11135
  "dtype": "torch.int16"
11136
  }
11137
  },
11138
  "quant_format": "exl3",
11139
+ "bits_per_weight": 5
11140
  },
11141
  "model.layers.45.self_attn.k_proj": {
11142
  "stored_tensors": {
 
11165
  "shape": [
11166
  320,
11167
  64,
11168
+ 96
11169
  ],
11170
+ "n_bytes": 3932160,
11171
  "dtype": "torch.int16"
11172
  }
11173
  },
11174
  "quant_format": "exl3",
11175
+ "bits_per_weight": 6
11176
  },
11177
  "model.layers.45.self_attn.v_proj": {
11178
  "stored_tensors": {
 
11230
  "shape": [
11231
  320,
11232
  320,
11233
+ 80
11234
  ],
11235
+ "n_bytes": 16384000,
11236
  "dtype": "torch.int16"
11237
  }
11238
  },
11239
  "quant_format": "exl3",
11240
+ "bits_per_weight": 5
11241
  },
11242
  "model.layers.45.post_attention_layernorm": {
11243
  "stored_tensors": {
 
11270
  "shape": [
11271
  320,
11272
  1728,
11273
+ 64
11274
  ],
11275
+ "n_bytes": 70778880,
11276
  "dtype": "torch.int16"
11277
  }
11278
  },
11279
  "quant_format": "exl3",
11280
+ "bits_per_weight": 4
11281
  },
11282
  "model.layers.45.mlp.gate_proj": {
11283
  "stored_tensors": {
 
11657
  "shape": [
11658
  320,
11659
  64,
11660
+ 96
11661
  ],
11662
+ "n_bytes": 3932160,
11663
  "dtype": "torch.int16"
11664
  }
11665
  },
11666
  "quant_format": "exl3",
11667
+ "bits_per_weight": 6
11668
  },
11669
  "model.layers.47.self_attn.v_proj": {
11670
  "stored_tensors": {
 
11693
  "shape": [
11694
  320,
11695
  64,
11696
+ 96
11697
  ],
11698
+ "n_bytes": 3932160,
11699
  "dtype": "torch.int16"
11700
  }
11701
  },
11702
  "quant_format": "exl3",
11703
+ "bits_per_weight": 6
11704
  },
11705
  "model.layers.47.self_attn.o_proj": {
11706
  "stored_tensors": {
 
12395
  "shape": [
12396
  320,
12397
  64,
12398
+ 80
12399
  ],
12400
+ "n_bytes": 3276800,
12401
  "dtype": "torch.int16"
12402
  }
12403
  },
12404
  "quant_format": "exl3",
12405
+ "bits_per_weight": 5
12406
  },
12407
  "model.layers.50.self_attn.v_proj": {
12408
  "stored_tensors": {
 
12431
  "shape": [
12432
  320,
12433
  64,
12434
+ 80
12435
  ],
12436
+ "n_bytes": 3276800,
12437
  "dtype": "torch.int16"
12438
  }
12439
  },
12440
  "quant_format": "exl3",
12441
+ "bits_per_weight": 5
12442
  },
12443
  "model.layers.50.self_attn.o_proj": {
12444
  "stored_tensors": {
 
13133
  "shape": [
13134
  320,
13135
  64,
13136
+ 96
13137
  ],
13138
+ "n_bytes": 3932160,
13139
  "dtype": "torch.int16"
13140
  }
13141
  },
13142
  "quant_format": "exl3",
13143
+ "bits_per_weight": 6
13144
  },
13145
  "model.layers.53.self_attn.v_proj": {
13146
  "stored_tensors": {
 
13589
  "shape": [
13590
  320,
13591
  320,
13592
+ 80
13593
  ],
13594
+ "n_bytes": 16384000,
13595
  "dtype": "torch.int16"
13596
  }
13597
  },
13598
  "quant_format": "exl3",
13599
+ "bits_per_weight": 5
13600
  },
13601
  "model.layers.55.self_attn.k_proj": {
13602
  "stored_tensors": {
 
13625
  "shape": [
13626
  320,
13627
  64,
13628
+ 96
13629
  ],
13630
+ "n_bytes": 3932160,
13631
  "dtype": "torch.int16"
13632
  }
13633
  },
13634
  "quant_format": "exl3",
13635
+ "bits_per_weight": 6
13636
  },
13637
  "model.layers.55.self_attn.v_proj": {
13638
  "stored_tensors": {
 
13661
  "shape": [
13662
  320,
13663
  64,
13664
+ 96
13665
  ],
13666
+ "n_bytes": 3932160,
13667
  "dtype": "torch.int16"
13668
  }
13669
  },
13670
  "quant_format": "exl3",
13671
+ "bits_per_weight": 6
13672
  },
13673
  "model.layers.55.self_attn.o_proj": {
13674
  "stored_tensors": {
 
13690
  "shape": [
13691
  320,
13692
  320,
13693
+ 80
13694
  ],
13695
+ "n_bytes": 16384000,
13696
  "dtype": "torch.int16"
13697
  }
13698
  },
13699
  "quant_format": "exl3",
13700
+ "bits_per_weight": 5
13701
  },
13702
  "model.layers.55.post_attention_layernorm": {
13703
  "stored_tensors": {
 
13730
  "shape": [
13731
  320,
13732
  1728,
13733
+ 64
13734
  ],
13735
+ "n_bytes": 70778880,
13736
  "dtype": "torch.int16"
13737
  }
13738
  },
13739
  "quant_format": "exl3",
13740
+ "bits_per_weight": 4
13741
  },
13742
  "model.layers.55.mlp.gate_proj": {
13743
  "stored_tensors": {
 
13835
  "shape": [
13836
  320,
13837
  320,
13838
+ 64
13839
  ],
13840
+ "n_bytes": 13107200,
13841
  "dtype": "torch.int16"
13842
  }
13843
  },
13844
  "quant_format": "exl3",
13845
+ "bits_per_weight": 4
13846
  },
13847
  "model.layers.56.self_attn.k_proj": {
13848
  "stored_tensors": {
 
13936
  "shape": [
13937
  320,
13938
  320,
13939
+ 64
13940
  ],
13941
+ "n_bytes": 13107200,
13942
  "dtype": "torch.int16"
13943
  }
13944
  },
13945
  "quant_format": "exl3",
13946
+ "bits_per_weight": 4
13947
  },
13948
  "model.layers.56.post_attention_layernorm": {
13949
  "stored_tensors": {
 
13976
  "shape": [
13977
  320,
13978
  1728,
13979
+ 80
13980
  ],
13981
+ "n_bytes": 88473600,
13982
  "dtype": "torch.int16"
13983
  }
13984
  },
13985
  "quant_format": "exl3",
13986
+ "bits_per_weight": 5
13987
  },
13988
  "model.layers.56.mlp.gate_proj": {
13989
  "stored_tensors": {
 
14327
  "shape": [
14328
  320,
14329
  320,
14330
+ 80
14331
  ],
14332
+ "n_bytes": 16384000,
14333
  "dtype": "torch.int16"
14334
  }
14335
  },
14336
  "quant_format": "exl3",
14337
+ "bits_per_weight": 5
14338
  },
14339
  "model.layers.58.self_attn.k_proj": {
14340
  "stored_tensors": {
 
14363
  "shape": [
14364
  320,
14365
  64,
14366
+ 96
14367
  ],
14368
+ "n_bytes": 3932160,
14369
  "dtype": "torch.int16"
14370
  }
14371
  },
14372
  "quant_format": "exl3",
14373
+ "bits_per_weight": 6
14374
  },
14375
  "model.layers.58.self_attn.v_proj": {
14376
  "stored_tensors": {
 
14428
  "shape": [
14429
  320,
14430
  320,
14431
+ 80
14432
  ],
14433
+ "n_bytes": 16384000,
14434
  "dtype": "torch.int16"
14435
  }
14436
  },
14437
  "quant_format": "exl3",
14438
+ "bits_per_weight": 5
14439
  },
14440
  "model.layers.58.post_attention_layernorm": {
14441
  "stored_tensors": {
 
14468
  "shape": [
14469
  320,
14470
  1728,
14471
+ 64
14472
  ],
14473
+ "n_bytes": 70778880,
14474
  "dtype": "torch.int16"
14475
  }
14476
  },
14477
  "quant_format": "exl3",
14478
+ "bits_per_weight": 4
14479
  },
14480
  "model.layers.58.mlp.gate_proj": {
14481
  "stored_tensors": {
 
14573
  "shape": [
14574
  320,
14575
  320,
14576
+ 64
14577
  ],
14578
+ "n_bytes": 13107200,
14579
  "dtype": "torch.int16"
14580
  }
14581
  },
14582
  "quant_format": "exl3",
14583
+ "bits_per_weight": 4
14584
  },
14585
  "model.layers.59.self_attn.k_proj": {
14586
  "stored_tensors": {
 
14674
  "shape": [
14675
  320,
14676
  320,
14677
+ 64
14678
  ],
14679
+ "n_bytes": 13107200,
14680
  "dtype": "torch.int16"
14681
  }
14682
  },
14683
  "quant_format": "exl3",
14684
+ "bits_per_weight": 4
14685
  },
14686
  "model.layers.59.post_attention_layernorm": {
14687
  "stored_tensors": {
 
14714
  "shape": [
14715
  320,
14716
  1728,
14717
+ 80
14718
  ],
14719
+ "n_bytes": 88473600,
14720
  "dtype": "torch.int16"
14721
  }
14722
  },
14723
  "quant_format": "exl3",
14724
+ "bits_per_weight": 5
14725
  },
14726
  "model.layers.59.mlp.gate_proj": {
14727
  "stored_tensors": {
 
15065
  "shape": [
15066
  320,
15067
  320,
15068
+ 80
15069
  ],
15070
+ "n_bytes": 16384000,
15071
  "dtype": "torch.int16"
15072
  }
15073
  },
15074
  "quant_format": "exl3",
15075
+ "bits_per_weight": 5
15076
  },
15077
  "model.layers.61.self_attn.k_proj": {
15078
  "stored_tensors": {
 
15101
  "shape": [
15102
  320,
15103
  64,
15104
+ 96
15105
  ],
15106
+ "n_bytes": 3932160,
15107
  "dtype": "torch.int16"
15108
  }
15109
  },
15110
  "quant_format": "exl3",
15111
+ "bits_per_weight": 6
15112
  },
15113
  "model.layers.61.self_attn.v_proj": {
15114
  "stored_tensors": {
 
15166
  "shape": [
15167
  320,
15168
  320,
15169
+ 80
15170
  ],
15171
+ "n_bytes": 16384000,
15172
  "dtype": "torch.int16"
15173
  }
15174
  },
15175
  "quant_format": "exl3",
15176
+ "bits_per_weight": 5
15177
  },
15178
  "model.layers.61.post_attention_layernorm": {
15179
  "stored_tensors": {
 
15206
  "shape": [
15207
  320,
15208
  1728,
15209
+ 64
15210
  ],
15211
+ "n_bytes": 70778880,
15212
  "dtype": "torch.int16"
15213
  }
15214
  },
15215
  "quant_format": "exl3",
15216
+ "bits_per_weight": 4
15217
  },
15218
  "model.layers.61.mlp.gate_proj": {
15219
  "stored_tensors": {
 
15311
  "shape": [
15312
  320,
15313
  320,
15314
+ 64
15315
  ],
15316
+ "n_bytes": 13107200,
15317
  "dtype": "torch.int16"
15318
  }
15319
  },
15320
  "quant_format": "exl3",
15321
+ "bits_per_weight": 4
15322
  },
15323
  "model.layers.62.self_attn.k_proj": {
15324
  "stored_tensors": {
 
15347
  "shape": [
15348
  320,
15349
  64,
15350
+ 80
15351
  ],
15352
+ "n_bytes": 3276800,
15353
  "dtype": "torch.int16"
15354
  }
15355
  },
15356
  "quant_format": "exl3",
15357
+ "bits_per_weight": 5
15358
  },
15359
  "model.layers.62.self_attn.v_proj": {
15360
  "stored_tensors": {
 
15383
  "shape": [
15384
  320,
15385
  64,
15386
+ 80
15387
  ],
15388
+ "n_bytes": 3276800,
15389
  "dtype": "torch.int16"
15390
  }
15391
  },
15392
  "quant_format": "exl3",
15393
+ "bits_per_weight": 5
15394
  },
15395
  "model.layers.62.self_attn.o_proj": {
15396
  "stored_tensors": {
 
15412
  "shape": [
15413
  320,
15414
  320,
15415
+ 64
15416
  ],
15417
+ "n_bytes": 13107200,
15418
  "dtype": "torch.int16"
15419
  }
15420
  },
15421
  "quant_format": "exl3",
15422
+ "bits_per_weight": 4
15423
  },
15424
  "model.layers.62.post_attention_layernorm": {
15425
  "stored_tensors": {
 
15452
  "shape": [
15453
  320,
15454
  1728,
15455
+ 80
15456
  ],
15457
+ "n_bytes": 88473600,
15458
  "dtype": "torch.int16"
15459
  }
15460
  },
15461
  "quant_format": "exl3",
15462
+ "bits_per_weight": 5
15463
  },
15464
  "model.layers.62.mlp.gate_proj": {
15465
  "stored_tensors": {
 
15557
  "shape": [
15558
  320,
15559
  320,
15560
+ 80
15561
  ],
15562
+ "n_bytes": 16384000,
15563
  "dtype": "torch.int16"
15564
  }
15565
  },
15566
  "quant_format": "exl3",
15567
+ "bits_per_weight": 5
15568
  },
15569
  "model.layers.63.self_attn.k_proj": {
15570
  "stored_tensors": {
 
15593
  "shape": [
15594
  320,
15595
  64,
15596
+ 96
15597
  ],
15598
+ "n_bytes": 3932160,
15599
  "dtype": "torch.int16"
15600
  }
15601
  },
15602
  "quant_format": "exl3",
15603
+ "bits_per_weight": 6
15604
  },
15605
  "model.layers.63.self_attn.v_proj": {
15606
  "stored_tensors": {
 
15629
  "shape": [
15630
  320,
15631
  64,
15632
+ 96
15633
  ],
15634
+ "n_bytes": 3932160,
15635
  "dtype": "torch.int16"
15636
  }
15637
  },
15638
  "quant_format": "exl3",
15639
+ "bits_per_weight": 6
15640
  },
15641
  "model.layers.63.self_attn.o_proj": {
15642
  "stored_tensors": {
 
15658
  "shape": [
15659
  320,
15660
  320,
15661
+ 80
15662
  ],
15663
+ "n_bytes": 16384000,
15664
  "dtype": "torch.int16"
15665
  }
15666
  },
15667
  "quant_format": "exl3",
15668
+ "bits_per_weight": 5
15669
  },
15670
  "model.layers.63.post_attention_layernorm": {
15671
  "stored_tensors": {
 
15698
  "shape": [
15699
  320,
15700
  1728,
15701
+ 64
15702
  ],
15703
+ "n_bytes": 70778880,
15704
  "dtype": "torch.int16"
15705
  }
15706
  },
15707
  "quant_format": "exl3",
15708
+ "bits_per_weight": 4
15709
  },
15710
  "model.layers.63.mlp.gate_proj": {
15711
  "stored_tensors": {