|
|
|
from __future__ import annotations |
|
|
|
import logging |
|
import argparse |
|
import os |
|
import sys |
|
import json |
|
from pathlib import Path |
|
|
|
from tqdm import tqdm |
|
from typing import Any, Sequence, NamedTuple |
|
|
|
|
|
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists(): |
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent)) |
|
|
|
import gguf |
|
|
|
logger = logging.getLogger("gguf-mmproj-merge") |
|
|
|
|
|
class MetadataDetails(NamedTuple): |
|
type: gguf.GGUFValueType |
|
value: Any |
|
description: str = '' |
|
sub_type: gguf.GGUFValueType | None = None |
|
|
|
|
|
def get_field_data(reader: gguf.GGUFReader, key: str) -> Any: |
|
field = reader.get_field(key) |
|
|
|
return field.contents() if field else None |
|
|
|
|
|
def merge_multiple_ggufs(readers: Sequence[gguf.GGUFReader], writer: gguf.GGUFWriter) -> None: |
|
total_bytes = 0 |
|
seen_fields = set() |
|
|
|
for reader in readers: |
|
for field in reader.fields.values(): |
|
|
|
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.') or "projector_type" in field.name: |
|
logger.debug(f'Suppressing {field.name}') |
|
continue |
|
|
|
if field.name in seen_fields: |
|
logger.debug(f'Skipping duplicate field {field.name}') |
|
continue |
|
|
|
seen_fields.add(field.name) |
|
|
|
val_type = field.types[0] |
|
sub_type = field.types[-1] if val_type == gguf.GGUFValueType.ARRAY else None |
|
old_val = MetadataDetails(val_type, field.contents(), sub_type=sub_type) |
|
val = old_val |
|
|
|
assert val.value is not None |
|
logger.debug(f'Copying {field.name}') |
|
writer.add_key_value(field.name, val.value, val.type, sub_type=sub_type if val.sub_type is None else val.sub_type) |
|
|
|
for tensor in reader.tensors: |
|
total_bytes += tensor.n_bytes |
|
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type) |
|
|
|
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True) |
|
|
|
writer.add_string("clip.vision.projector_type", "pixtral") |
|
writer.add_string("clip.audio.projector_type", "voxtral") |
|
writer.write_header_to_file() |
|
writer.write_kv_data_to_file() |
|
writer.write_ti_data_to_file() |
|
|
|
for reader in readers: |
|
for tensor in reader.tensors: |
|
writer.write_tensor_data(tensor.data) |
|
bar.update(tensor.n_bytes) |
|
|
|
writer.close() |
|
|
|
|
|
def main() -> None: |
|
reader0 = gguf.GGUFReader('audio.gguf', 'r') |
|
reader1 = gguf.GGUFReader('vision.gguf', 'r') |
|
output_path = 'mmproj-model.gguf' |
|
|
|
logger.info(f'* Writing: {output_path}') |
|
writer = gguf.GGUFWriter(output_path, arch='clip', endianess=reader0.endianess) |
|
|
|
merge_multiple_ggufs([reader0, reader1], writer) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|