Add amazing new smol-IQ4_KSS
Browse files- README.md +57 -0
- images/perplexity.png +2 -2
README.md
CHANGED
@@ -267,6 +267,63 @@ numactl -N "$SOCKET" -m "$SOCKET" \
|
|
267 |
|
268 |
</details>
|
269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
## IQ3_K 293.177 GiB (3.753 BPW)
|
271 |
Final estimate: PPL = 3.4260 +/- 0.01995
|
272 |
|
|
|
267 |
|
268 |
</details>
|
269 |
|
270 |
+
## smol-IQ4_KSS 318.745 GiB (4.080 BPW)
|
271 |
+
Final estimate: PPL = 3.3898 +/- 0.01964
|
272 |
+
|
273 |
+
<details>
|
274 |
+
|
275 |
+
<summary>👈 Secret Recipe</summary>
|
276 |
+
|
277 |
+
```bash
|
278 |
+
#!/usr/bin/env bash
|
279 |
+
|
280 |
+
custom="
|
281 |
+
## Attention [0-60] (GPU)
|
282 |
+
blk\..*\.attn_k_b\.weight=q8_0
|
283 |
+
blk\..*\.attn_v_b\.weight=q8_0
|
284 |
+
|
285 |
+
# Balance of attn tensors
|
286 |
+
blk\..*\.attn_kv_a_mqa\.weight=q8_0
|
287 |
+
blk\..*\.attn_q_a\.weight=q8_0
|
288 |
+
blk\..*\.attn_q_b\.weight=q8_0
|
289 |
+
blk\..*\.attn_output\.weight=iq6_k
|
290 |
+
|
291 |
+
## First Three Dense Layers [0-2] (GPU)
|
292 |
+
blk\..*\.ffn_down\.weight=iq5_ks
|
293 |
+
blk\..*\.ffn_(gate|up)\.weight=iq5_ks
|
294 |
+
|
295 |
+
## Shared Expert [3-60] (GPU)
|
296 |
+
blk\..*\.ffn_down_shexp\.weight=iq5_ks
|
297 |
+
blk\..*\.ffn_(gate|up)_shexp\.weight=iq5_ks
|
298 |
+
|
299 |
+
## Routed Experts [3-60] (CPU)
|
300 |
+
blk\..*\.ffn_down_exps\.weight=iq4_kss
|
301 |
+
blk\..*\.ffn_(gate|up)_exps\.weight=iq4_kss
|
302 |
+
|
303 |
+
## Token embedding and output tensors (GPU)
|
304 |
+
token_embd\.weight=iq4_k
|
305 |
+
output\.weight=iq6_k
|
306 |
+
"
|
307 |
+
|
308 |
+
custom=$(
|
309 |
+
echo "$custom" | grep -v '^#' | \
|
310 |
+
sed -Ez 's:\n+:,:g;s:,$::;s:^,::'
|
311 |
+
)
|
312 |
+
|
313 |
+
SOCKET=1
|
314 |
+
|
315 |
+
numactl -N "$SOCKET" -m "$SOCKET" \
|
316 |
+
./build/bin/llama-quantize \
|
317 |
+
--custom-q "$custom" \
|
318 |
+
--imatrix /mnt/raid/models/ubergarm/DeepSeek-V3.1-GGUF/imatrix-DeepSeek-V3.1-Q8_0.dat \
|
319 |
+
/mnt/raid/models/ubergarm/DeepSeek-V3.1-GGUF/DeepSeek-V3.1-256x20B-safetensors-BF16-00001-of-00030.gguf \
|
320 |
+
/mnt/raid/models/ubergarm/DeepSeek-V3.1-GGUF/DeepSeek-V3.1-smol-IQ4_KSS.gguf \
|
321 |
+
IQ4_KSS \
|
322 |
+
192
|
323 |
+
```
|
324 |
+
|
325 |
+
</details>
|
326 |
+
|
327 |
## IQ3_K 293.177 GiB (3.753 BPW)
|
328 |
Final estimate: PPL = 3.4260 +/- 0.01995
|
329 |
|
images/perplexity.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|