File size: 3,006 Bytes
811d18f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
#!/usr/bin/env python3
from __future__ import annotations
import logging
import argparse
import os
import sys
import json
from pathlib import Path
from tqdm import tqdm
from typing import Any, Sequence, NamedTuple
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import gguf
logger = logging.getLogger("gguf-mmproj-merge")
class MetadataDetails(NamedTuple):
type: gguf.GGUFValueType
value: Any
description: str = ''
sub_type: gguf.GGUFValueType | None = None
def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
field = reader.get_field(key)
return field.contents() if field else None
def merge_multiple_ggufs(readers: Sequence[gguf.GGUFReader], writer: gguf.GGUFWriter) -> None:
total_bytes = 0
seen_fields = set()
for reader in readers:
for field in reader.fields.values():
# Suppress virtual fields and fields written by GGUFWriter
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.') or "projector_type" in field.name:
logger.debug(f'Suppressing {field.name}')
continue
if field.name in seen_fields:
logger.debug(f'Skipping duplicate field {field.name}')
continue
seen_fields.add(field.name)
val_type = field.types[0]
sub_type = field.types[-1] if val_type == gguf.GGUFValueType.ARRAY else None
old_val = MetadataDetails(val_type, field.contents(), sub_type=sub_type)
val = old_val
assert val.value is not None
logger.debug(f'Copying {field.name}')
writer.add_key_value(field.name, val.value, val.type, sub_type=sub_type if val.sub_type is None else val.sub_type)
for tensor in reader.tensors:
total_bytes += tensor.n_bytes
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
writer.add_string("clip.vision.projector_type", "pixtral")
writer.add_string("clip.audio.projector_type", "voxtral")
writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_ti_data_to_file()
for reader in readers:
for tensor in reader.tensors:
writer.write_tensor_data(tensor.data)
bar.update(tensor.n_bytes)
writer.close()
def main() -> None:
reader0 = gguf.GGUFReader('audio.gguf', 'r')
reader1 = gguf.GGUFReader('vision.gguf', 'r')
output_path = 'mmproj-model.gguf'
logger.info(f'* Writing: {output_path}')
writer = gguf.GGUFWriter(output_path, arch='clip', endianess=reader0.endianess)
merge_multiple_ggufs([reader0, reader1], writer)
if __name__ == '__main__':
main()
|