Brian Tang commited on 3 days ago

Commit

49ebb9c

0 Parent(s):

Snapshot of current state 4a58ca57710c49f51896e4bc820e202fbf64904b

Files changed (26) hide show

.gitattributes +36 -0
.gitignore +73 -0
README.md +366 -0
adapters/adapter_config.json +31 -0
adapters/adapter_model.safetensors +3 -0
added_tokens.json +24 -0
chat_template.json +3 -0
config.json +108 -0
config_sentence_transformers.json +13 -0
configuration_jina_embeddings_v4.py +23 -0
custom_lora_module.py +193 -0
custom_st.py +185 -0
generation_config.json +6 -0
merges.txt +0 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +833 -0
modeling_jina_embeddings_v4.py +609 -0
modules.json +9 -0
preprocessor_config.json +33 -0
qwen2_5_vl.py +0 -0
results.json +582 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +209 -0
vocab.json +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,73 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+env/
+ENV/
+.env
+.venv
+env.bak/
+venv.bak/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.project
+.pydevproject
+.settings/
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+# Distribution / packaging
+.Python
+*.manifest
+*.spec
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+# Logs and databases
+*.log
+*.sqlite
+*.db
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db

README.md ADDED Viewed

	@@ -0,0 +1,366 @@

+---
+license: cc-by-nc-4.0
+tags:
+- vidore
+- colpali
+- multimodal-embedding
+- multilingual-embedding
+- Text-to-Visual Document (T→VD) retrieval
+- feature-extraction
+- sentence-similarity
+- mteb
+- sentence-transformers
+language:
+  - multilingual
+inference: false
+library_name: transformers
+pipeline_tag: visual-document-retrieval
+---
+<br><br>
+<p align="center">
+<img src="https://huggingface.co/datasets/jinaai/documentation-images/resolve/main/logo.webp" alt="Jina AI: Your Search Foundation, Supercharged!" width="150px">
+</p>
+<p align="center">
+<b>The embedding model trained by <a href="https://jina.ai/"><b>Jina AI</b></a>.</b>
+</p>
+# Jina Embeddings v4: Universal Embeddings for Multimodal Multilingual Retrieval
+[GGUF](https://github.com/jina-ai/jina-embeddings-v4-gguf) | [Blog](https://jina.ai/news/jina-embeddings-v4-universal-embeddings-for-multimodal-multilingual-retrieval) | [Technical Report](https://arxiv.org/abs/2506.18902) | [API](https://jina.ai/embeddings)
+## Intended Usage & Model Info
+`jina-embeddings-v4` is a universal embedding model for multimodal and multilingual retrieval.
+The model is specially designed for complex document retrieval, including visually rich documents with charts, tables, and illustrations.
+Built on [Qwen/Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct), `jina-embeddings-v4` features:
+- **Unified embeddings** for text, images, and visual documents, supporting both dense (single-vector) and late-interaction (multi-vector) retrieval.
+- **Multilingual support** (30+ languages) and compatibility with a wide range of domains, including technical and visually complex documents.
+- **Task-specific adapters** for retrieval, text matching, and code-related tasks, which can be selected at inference time.
+- **Flexible embedding size**: dense embeddings are 2048 dimensions by default but can be truncated to as low as 128 with minimal performance loss.
+Summary of features:
+| Feature   | Jina Embeddings V4   |
+|------------|------------|
+| Base Model | Qwen2.5-VL-3B-Instruct |
+| Supported Tasks | `retrieval`, `text-matching`, `code` |
+| Model DType | BFloat 16 |
+| Max Sequence Length | 32768 |
+| Single-Vector Dimension | 2048 |
+| Multi-Vector Dimension | 128 |
+| Matryoshka dimensions | 128, 256, 512, 1024, 2048 |
+| Pooling Strategy | Mean pooling |
+| Attention Mechanism | FlashAttention2 |
+## Training & Evaluation
+Please refer to our [technical report of jina-embeddings-v4](https://arxiv.org/abs/2506.18902) for training details and benchmarks.
+## Usage
+<details>
+  <summary>Requirements</a></summary>
+The following Python packages are required:
+- `transformers>=4.52.0`
+- `torch>=2.6.0`
+- `peft>=0.15.2`
+- `torchvision`
+- `pillow`
+### Optional / Recommended
+- **flash-attention**: Installing [flash-attention](https://github.com/Dao-AILab/flash-attention) is recommended for improved inference speed and efficiency, but not mandatory.
+- **sentence-transformers**: If you want to use the model via the `sentence-transformers` interface, install this package as well.
+</details>
+<details>
+  <summary>via <a href="https://jina.ai/embeddings/">Jina AI Embeddings API</a></summary>
+```bash
+curl https://api.jina.ai/v1/embeddings \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $JINA_AI_API_TOKEN" \
+  -d @- <<EOFEOF
+  {
+    "model": "jina-embeddings-v4",
+    "task": "text-matching",
+    "input": [
+        {
+            "text": "غروب جميل على الشاطئ"
+        },
+        {
+            "text": "海滩上美丽的日落"
+        },
+        {
+            "text": "A beautiful sunset over the beach"
+        },
+        {
+            "text": "Un beau coucher de soleil sur la plage"
+        },
+        {
+            "text": "Ein wunderschöner Sonnenuntergang am Strand"
+        },
+        {
+            "text": "Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία"
+        },
+        {
+            "text": "समुद्र तट पर एक खूबसूरत सूर्यास्त"
+        },
+        {
+            "text": "Un bellissimo tramonto sulla spiaggia"
+        },
+        {
+            "text": "浜辺に沈む美しい夕日"
+        },
+        {
+            "text": "해변 위로 아름다운 일몰"
+        },
+        {
+            "image": "https://i.ibb.co/nQNGqL0/beach1.jpg"
+        },
+        {
+            "image": "https://i.ibb.co/r5w8hG8/beach2.jpg"
+        }
+    ]
+  }
+EOFEOF
+```
+</details>
+<details>
+  <summary>via <a href="https://huggingface.co/docs/transformers/en/index">transformers</a></summary>
+```python
+# !pip install transformers>=4.52.0 torch>=2.6.0 peft>=0.15.2 torchvision pillow
+# !pip install
+from transformers import AutoModel
+import torch
+# Initialize the model
+model = AutoModel.from_pretrained("jinaai/jina-embeddings-v4", trust_remote_code=True, torch_dtype=torch.float16)
+model.to("cuda")
+# ========================
+# 1. Retrieval Task
+# ========================
+# Configure truncate_dim, max_length (for texts), max_pixels (for images), vector_type, batch_size in the encode function if needed
+# Encode query
+query_embeddings = model.encode_text(
+    texts=["Overview of climate change impacts on coastal cities"],
+    task="retrieval",
+    prompt_name="query",
+)
+# Encode passage (text)
+passage_embeddings = model.encode_text(
+    texts=[
+        "Climate change has led to rising sea levels, increased frequency of extreme weather events..."
+    ],
+    task="retrieval",
+    prompt_name="passage",
+)
+# Encode image/document
+image_embeddings = model.encode_image(
+    images=["https://i.ibb.co/nQNGqL0/beach1.jpg"],
+    task="retrieval",
+)
+# ========================
+# 2. Text Matching Task
+# ========================
+texts = [
+    "غروب جميل على الشاطئ",  # Arabic
+    "海滩上美丽的日落",  # Chinese
+    "Un beau coucher de soleil sur la plage",  # French
+    "Ein wunderschöner Sonnenuntergang am Strand",  # German
+    "Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία",  # Greek
+    "समुद्र तट पर एक खूबसूरत सूर्यास्त",  # Hindi
+    "Un bellissimo tramonto sulla spiaggia",  # Italian
+    "浜辺に沈む美しい夕日",  # Japanese
+    "해변 위로 아름다운 일몰",  # Korean
+]
+text_embeddings = model.encode_text(texts=texts, task="text-matching")
+# ========================
+# 3. Code Understanding Task
+# ========================
+# Encode query
+query_embedding = model.encode_text(
+    texts=["Find a function that prints a greeting message to the console"],
+    task="code",
+    prompt_name="query",
+)
+# Encode code
+code_embeddings = model.encode_text(
+    texts=["def hello_world():\n    print('Hello, World!')"],
+    task="code",
+    prompt_name="passage",
+)
+# ========================
+# 4. Use multivectors
+# ========================
+multivector_embeddings = model.encode_text(
+    texts=texts,
+    task="retrieval",
+    prompt_name="query",
+    return_multivector=True,
+)
+images = ["https://i.ibb.co/nQNGqL0/beach1.jpg", "https://i.ibb.co/r5w8hG8/beach2.jpg"]
+multivector_image_embeddings = model.encode_image(
+    images=images,
+    task="retrieval",
+    return_multivector=True,
+)
+```
+</details>
+<details>
+  <summary>via <a href="https://sbert.net/">sentence-transformers</a></summary>
+```python
+from sentence_transformers import SentenceTransformer
+# Initialize the model
+model = SentenceTransformer("jinaai/jina-embeddings-v4", trust_remote_code=True)
+# ========================
+# 1. Retrieval Task
+# ========================
+# Encode query
+query_embeddings = model.encode(
+    sentences=["Overview of climate change impacts on coastal cities"],
+    task="retrieval",
+    prompt_name="query",
+)
+print(f"query_embeddings.shape = {query_embeddings.shape}")
+# Encode passage (text)
+passage_embeddings = model.encode(
+    sentences=[
+        "Climate change has led to rising sea levels, increased frequency of extreme weather events..."
+    ],
+    task="retrieval",
+    prompt_name="passage",
+)
+print(f"passage_embeddings.shape = {passage_embeddings.shape}")
+# Encode image/document
+image_embeddings = model.encode(
+    sentences=["https://i.ibb.co/nQNGqL0/beach1.jpg"],
+    task="retrieval",
+)
+print(f"image_embeddings.shape = {image_embeddings.shape}")
+# ========================
+# 2. Text Matching Task
+# ========================
+texts = [
+    "غروب جميل على الشاطئ",  # Arabic
+    "海滩上美丽的日落",  # Chinese
+    "Un beau coucher de soleil sur la plage",  # French
+    "Ein wunderschöner Sonnenuntergang am Strand",  # German
+    "Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία",  # Greek
+    "समुद्र तट पर एक खूबसूरत सूर्यास्त",  # Hindi
+    "Un bellissimo tramonto sulla spiaggia",  # Italian
+    "浜辺に沈む美しい夕日",  # Japanese
+    "해변 위로 아름다운 일몰",  # Korean
+]
+text_embeddings = model.encode(sentences=texts, task="text-matching")
+# ========================
+# 3. Code Understanding Task
+# ========================
+# Encode query
+query_embeddings = model.encode(
+    sentences=["Find a function that prints a greeting message to the console"],
+    task="code",
+    prompt_name="query",
+)
+# Encode code
+code_embeddings = model.encode(
+    sentences=["def hello_world():\n    print('Hello, World!')"],
+    task="code",
+    prompt_name="passage",
+)
+# ========================
+# 4. Use multivectors
+# ========================
+# If you want to use multi-vector embeddings, please use the Hugging Face model directly.
+```
+</details>
+<details>
+  <summary>via <a href="https://github.com/vllm-project/vllm">vLLM</a></summary>
+We provide separate model versions for each task (`retrieval`, `text-matching`, `code`) where specific adapter is merged into the base `Qwen2.5-VL` weights.
+This modification enables native compatibility with vLLM.
+Instructions and usage examples for each task are available in their respective directories:
+- [jina-embeddings-v4-vllm-retrieval](https://huggingface.co/jinaai/jina-embeddings-v4-vllm-retrieval)
+- [jina-embeddings-v4-vllm-text-matching](https://huggingface.co/jinaai/jina-embeddings-v4-vllm-text-matching)
+- [jina-embeddings-v4-vllm-code](https://huggingface.co/jinaai/jina-embeddings-v4-vllm-code)
+Please refer to the directory that matches your task for more details.
+</details>
+## Jina-VDR
+Alongside `jina-embeddings-v4`, we’re releasing [Jina VDR](https://github.com/jina-ai/jina-vdr), a multilingual, multi-domain benchmark for visual document retrieval. The task collection can be viewed [here](https://huggingface.co/collections/jinaai/jinavdr-visual-document-retrieval-684831c022c53b21c313b449), and evaluation instructions can be found [here](https://github.com/jina-ai/jina-vdr).
+## License
+This model is licensed to download and run under [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en).
+## Contact
+Join our [Discord community](https://discord.jina.ai) and chat with other community members about ideas.
+## Citation
+If you find `jina-embeddings-v4` useful in your research, please cite the following paper:
+```
+@misc{günther2025jinaembeddingsv4universalembeddingsmultimodal,
+      title={jina-embeddings-v4: Universal Embeddings for Multimodal Multilingual Retrieval},
+      author={Michael Günther and Saba Sturua and Mohammad Kalim Akram and Isabelle Mohr and Andrei Ungureanu and Sedigheh Eslami and Scott Martens and Bo Wang and Nan Wang and Han Xiao},
+      year={2025},
+      eprint={2506.18902},
+      archivePrefix={arXiv},
+      primaryClass={cs.AI},
+      url={https://arxiv.org/abs/2506.18902},
+}
+```

adapters/adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "jinaai/jina-embeddings-v4",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": ".*visual.*",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
+  "task_type": "FEATURE_EXTRACTION",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

adapters/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6b7ab4a79daa3b4f3b5274500cc99d3dc89aa8c3419e9d79f89e366685e12e5
+size 359863776

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "jinaai/jina-embeddings-v4",
+  "architectures": [
+    "JinaEmbeddingsV4Model"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_jina_embeddings_v4.JinaEmbeddingsV4Config",
+    "AutoModel": "modeling_jina_embeddings_v4.JinaEmbeddingsV4Model"
+  },
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 128000,
+  "max_window_layers": 70,
+  "multi_vector_projector_dim": 128,
+  "num_attention_heads": 16,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "single_vector_pool_strategy": "mean",
+  "sliding_window": 32768,
+  "tie_word_embeddings": true,
+  "text_config": {
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "hidden_act": "silu",
+    "hidden_size": 2048,
+    "image_token_id": null,
+    "initializer_range": 0.02,
+    "intermediate_size": 11008,
+    "max_position_embeddings": 128000,
+    "max_window_layers": 70,
+    "model_type": "qwen2_5_vl_text",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 36,
+    "num_key_value_heads": 2,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": {
+      "mrope_section": [
+        16,
+        24,
+        24
+      ],
+      "rope_type": "default",
+      "type": "default"
+    },
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "torch_dtype": "bfloat16",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 151936
+  },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
+    "depth": 32,
+    "fullatt_block_indexes": [
+      7,
+      15,
+      23,
+      31
+    ],
+    "hidden_act": "silu",
+    "hidden_size": 1280,
+    "in_channels": 3,
+    "in_chans": 3,
+    "initializer_range": 0.02,
+    "intermediate_size": 3420,
+    "model_type": "qwen2_5_vl",
+    "num_heads": 16,
+    "out_hidden_size": 2048,
+    "patch_size": 14,
+    "spatial_merge_size": 2,
+    "spatial_patch_size": 14,
+    "temporal_patch_size": 2,
+    "tokens_per_second": 2,
+    "torch_dtype": "bfloat16",
+    "window_size": 112
+  },
+  "task_names": ["retrieval", "text-matching", "code"],
+  "matryoshka_dims": [128, 256, 512, 1024, 2048],
+  "_attn_implementation": "flash_attention_2",
+  "truncate_dim": null,
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "__version__": {
+      "sentence_transformers": "4.1.0",
+      "transformers": "4.50.0",
+      "pytorch": "2.6.0"
+    },
+    "prompts":{
+      "query":"Query: ",
+      "passage":"Passage: "
+    },
+    "default_prompt_name": null,
+    "similarity_fn_name": "cosine"
+  }

configuration_jina_embeddings_v4.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from transformers.models.qwen2_5_vl import Qwen2_5_VLConfig
+from typing import Optional
+class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
+    """
+    Configuration for the JinaEmbeddingsV4 model.
+    """
+    def __init__(
+        self,
+        single_vector_pool_strategy: str = "mean",
+        multi_vector_projector_dim: int = 128,
+        pretrained_peft_model_name_or_path: Optional[str] = None,
+        verbosity: int = 1,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.single_vector_pool_strategy = single_vector_pool_strategy
+        self.multi_vector_projector_dim = multi_vector_projector_dim
+        self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
+        self.verbosity = verbosity

custom_lora_module.py ADDED Viewed

	@@ -0,0 +1,193 @@

+from __future__ import annotations
+import math
+import warnings
+from typing import Any, Optional, Union, List
+import torch
+import torch.nn as nn
+from peft.tuners.lora import LoraLayer
+class MultiAdapterLinear(nn.Module, LoraLayer):
+    """
+    Custom LoRA module supporting multiple adapters for a linear layer.
+    This module extends the standard LoRA implementation to support multiple task-specific
+    adapters that can be dynamically selected during the forward pass. The task_label
+    parameter passed to the forward function determines which LoRA adapter(s) to use:
+    - If task_label is a string, all examples in the batch use the same adapter
+    - If task_label is a list of strings, each example can use a different adapter
+    This enables efficient multi-task inference where all task-specific LoRA adapters
+    are loaded in memory simultaneously and dynamically selected per example, eliminating
+    the need to switch adapter states between tasks and allowing optimal throughput
+    for mixed-task batches.
+    Derived from peft.tuners.lora.Linear.
+    """
+    def __init__(
+        self,
+        base_layer,
+        adapter_name: str,
+        task_names: List[str],
+        r: int = 0,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        fan_in_fan_out: bool = False,  # Set this to True if the layer to replace stores weight like (fan_in, fan_out)
+        is_target_conv_1d_layer: bool = False,
+        init_lora_weights: Union[bool, str] = True,
+        use_rslora: bool = False,
+        use_dora: bool = False,
+        lora_bias: bool = False,
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        LoraLayer.__init__(self, base_layer, **kwargs)
+        self.fan_in_fan_out = fan_in_fan_out
+        self.task_names = task_names
+        self._active_adapter = adapter_name
+        self.update_layer(
+            adapter_name,
+            r,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            init_lora_weights=init_lora_weights,
+            use_rslora=use_rslora,
+            use_dora=use_dora,
+            lora_bias=lora_bias,
+        )
+        self.is_target_conv_1d_layer = is_target_conv_1d_layer
+    def forward(self, x: torch.Tensor, task_label: Union[str, List[str]], *args: Any, **kwargs: Any) -> torch.Tensor:
+        self._check_forward_args(x, *args, **kwargs)
+        if self.disable_adapters:
+            if self.merged:
+                self.unmerge()
+            result = self.base_layer(x, *args, **kwargs)
+        elif self.merged:
+            result = self.base_layer(x, *args, **kwargs)
+        else:
+            result = self.base_layer(x, *args, **kwargs)
+            torch_result_dtype = result.dtype
+            lora_A_keys = self.lora_A.keys()
+            for active_adapter in self.active_adapters:
+                if active_adapter not in lora_A_keys:
+                    continue
+                if isinstance(task_label, str):
+                    lora_A = self.lora_A[active_adapter][task_label]
+                    lora_B = self.lora_B[active_adapter][task_label]
+                    dropout = self.lora_dropout[active_adapter]
+                    scaling = self.scaling[active_adapter]
+                    x = self._cast_input_dtype(x, lora_A.weight.dtype)
+                    result = result + lora_B(lora_A(dropout(x))) * scaling
+                else:
+                    unique_tasks = list(set(task_label))
+                    lora_output = torch.zeros_like(result)
+                    for task in unique_tasks:
+                        task_indices = [i for i, t in enumerate(task_label) if t == task]
+                        task_x = x[task_indices]
+                        lora_A = self.lora_A[active_adapter][task]
+                        lora_B = self.lora_B[active_adapter][task]
+                        dropout = self.lora_dropout[active_adapter]
+                        scaling = self.scaling[active_adapter]
+                        task_x = self._cast_input_dtype(task_x, lora_A.weight.dtype)
+                        task_lora_value = lora_B(lora_A(dropout(task_x))) * scaling
+                        for i, idx in enumerate(task_indices):
+                            lora_output[idx] = task_lora_value[i]
+                    result = result + lora_output
+            result = result.to(torch_result_dtype)
+        return result
+    def __repr__(self) -> str:
+        rep = super().__repr__()
+        return "lora." + rep
+    def update_layer(
+        self,
+        adapter_name,
+        r,
+        lora_alpha,
+        lora_dropout,
+        init_lora_weights,
+        use_rslora,
+        use_dora: bool = False,
+        lora_bias: bool = False,
+    ):
+        # This code works for linear layers, override for other layer types
+        if r <= 0:
+            raise ValueError(f"`r` should be a positive integer value but the value passed is {r}")
+        self.r[adapter_name] = r
+        self.lora_alpha[adapter_name] = lora_alpha
+        if lora_dropout > 0.0:
+            lora_dropout_layer = nn.Dropout(p=lora_dropout)
+        else:
+            lora_dropout_layer = nn.Identity()
+        self.lora_dropout.update(nn.ModuleDict({adapter_name: lora_dropout_layer}))
+        # Actual trainable parameters
+        self.lora_A[adapter_name] = nn.ModuleDict({
+            task_name: nn.Linear(self.in_features, r, bias=False)
+            for task_name in self.task_names
+        })
+        self.lora_B[adapter_name] = nn.ModuleDict({
+            task_name: nn.Linear(r, self.out_features, bias=lora_bias)
+            for task_name in self.task_names
+        })
+        self.lora_bias[adapter_name] = lora_bias
+        if use_rslora:
+            self.scaling[adapter_name] = lora_alpha / math.sqrt(r)
+        else:
+            self.scaling[adapter_name] = lora_alpha / r
+        self.reset_lora_parameters(adapter_name, init_lora_weights)
+        self._move_adapter_to_device_of_base_layer(adapter_name)
+        self.use_dora[adapter_name] = False
+        self.set_adapter(self.active_adapters)
+    def reset_lora_parameters(self, adapter_name, init_lora_weights):
+        if init_lora_weights is False:
+            return
+        if init_lora_weights is True:
+            # initialize A the same way as the default for nn.Linear and B to zero
+            # https://github.com/microsoft/LoRA/blob/a0a92e0f26c067cf94747bdbf1ce73793fa44d19/loralib/layers.py#L124
+            for task_name in self.task_names:
+                nn.init.kaiming_uniform_(self.lora_A[adapter_name][task_name].weight, a=math.sqrt(5))
+        elif init_lora_weights.lower() == "gaussian":
+            for task_name in self.task_names:
+                nn.init.normal_(self.lora_A[adapter_name][task_name].weight, std=1 / self.r[adapter_name])
+        else:
+            raise ValueError(f"Unknown initialization {init_lora_weights=}")
+        for task_name in self.task_names:
+            nn.init.zeros_(self.lora_B[adapter_name][task_name].weight)
+        if self.lora_bias[adapter_name]:
+            for task_name in self.task_names:
+                nn.init.zeros_(self.lora_B[adapter_name][task_name].bias)
+    def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
+        """
+        Merge the active adapter weights into the base weights
+        """
+        raise NotImplementedError("Merge operation is not supported")
+    def unmerge(self) -> None:
+        """
+        This method unmerges all merged adapter layers from the base weights.
+        """
+        raise NotImplementedError("Unmerge operation is not supported")

custom_st.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import json
+import os
+from io import BytesIO
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Union
+import requests
+import torch
+from PIL import Image
+from torch import nn
+from transformers import AutoConfig, AutoModel, AutoProcessor
+class Transformer(nn.Module):
+    save_in_root: bool = True
+    def __init__(
+        self,
+        model_name_or_path: str = "jinaai/jina-embeddings-v4",
+        max_seq_length: Optional[int] = None,
+        config_args: Optional[Dict[str, Any]] = None,
+        model_args: Optional[Dict[str, Any]] = None,
+        tokenizer_args: Optional[Dict[str, Any]] = None,
+        cache_dir: Optional[str] = None,
+        backend: Literal["torch", "onnx", "openvino"] = "torch",
+        **kwargs,
+    ) -> None:
+        super(Transformer, self).__init__()
+        if backend != "torch":
+            raise ValueError(
+                f"Backend '{backend}' is not supported, please use 'torch' instead"
+            )
+        config_kwargs = config_args or {}
+        model_kwargs = model_args or {}
+        tokenizer_kwargs = tokenizer_args or {}
+        self.config = AutoConfig.from_pretrained(
+            model_name_or_path, cache_dir=cache_dir, **config_kwargs
+        )
+        self.default_task = model_args.pop("default_task", None)
+        if self.default_task and self.default_task not in self.config.task_names:
+            raise ValueError(
+                f"Invalid task: {self.default_task}. Must be one of {self.config.task_names}."
+            )
+        self.model = AutoModel.from_pretrained(
+            model_name_or_path, config=self.config, cache_dir=cache_dir, **model_kwargs
+        )
+        self.processor = AutoProcessor.from_pretrained(
+            model_name_or_path,
+            cache_dir=cache_dir,
+            use_fast=True,
+            **tokenizer_kwargs,
+        )
+        self.max_seq_length = max_seq_length or 8192
+    def tokenize(
+        self, texts: List[Union[str, Image.Image]], padding: Union[str, bool] = True
+    ) -> Dict[str, torch.Tensor]:
+        encoding = {}
+        text_indices = []
+        image_indices = []
+        for i, text in enumerate(texts):
+            if isinstance(text, str):
+                # Remove Query: or Passage: prefixes when checking for URLs or file paths
+                clean_text = text
+                if text.startswith("Query: "):
+                    clean_text = text[len("Query: ") :]
+                elif text.startswith("Passage: "):
+                    clean_text = text[len("Passage: ") :]
+                if clean_text.startswith("http"):
+                    response = requests.get(clean_text)
+                    texts[i] = Image.open(BytesIO(response.content)).convert("RGB")
+                    image_indices.append(i)
+                else:
+                    try:
+                        if Path(clean_text).is_file():
+                            texts[i] = Image.open(clean_text).convert("RGB")
+                            image_indices.append(i)
+                        else:
+                            text_indices.append(i)
+                    except Exception as e:
+                        text_indices.append(i)
+            elif isinstance(text, Image.Image):
+                image_indices.append(i)
+            else:
+                raise ValueError(f"Invalid input type: {type(text)}")
+        if text_indices:
+            _texts = [texts[i] for i in text_indices]
+            text_features = self.processor.process_texts(
+                _texts, max_length=self.max_seq_length
+            )
+            for key, value in text_features.items():
+                encoding[f"text_{key}"] = value
+            encoding["text_indices"] = text_indices
+        if image_indices:
+            _images = [texts[i] for i in image_indices]
+            img_features = self.processor.process_images(_images)
+            for key, value in img_features.items():
+                encoding[f"image_{key}"] = value
+            encoding["image_indices"] = image_indices
+        return encoding
+    def forward(
+        self,
+        features: Dict[str, torch.Tensor],
+        task: Optional[str] = None,
+        truncate_dim: Optional[int] = None,
+    ) -> Dict[str, torch.Tensor]:
+        self.model.eval()
+        if task is None:
+            if self.default_task is None:
+                raise ValueError(
+                    "Task must be specified before encoding data. You can set it either during "
+                    "loading the model (e.g., model_kwargs={'default_task': 'retrieval'}) or "
+                    "pass it as an argument to the encode method (e.g., model.encode(texts, task='retrieval'))."
+                )
+            task = self.default_task
+        else:
+            if task not in self.config.task_names:
+                raise ValueError(
+                    f"Invalid task: {task}. Must be one of {self.config.task_names}."
+                )
+        device = self.model.device.type
+        all_embeddings = []
+        with torch.no_grad():
+            if any(k.startswith("text_") for k in features.keys()):
+                text_batch = {
+                    k[len("text_") :]: v.to(device)
+                    for k, v in features.items()
+                    if k.startswith("text_") and k != "text_indices"
+                }
+                text_indices = features.get("text_indices", [])
+                with torch.autocast(device_type=device, dtype=torch.bfloat16):
+                    text_embeddings = self.model(
+                        **text_batch, task_label=task
+                    ).single_vec_emb
+                    if truncate_dim:
+                        text_embeddings = text_embeddings[:, :truncate_dim]
+                        text_embeddings = torch.nn.functional.normalize(
+                            text_embeddings, p=2, dim=-1
+                        )
+                for i, embedding in enumerate(text_embeddings):
+                    all_embeddings.append((text_indices[i], embedding))
+            if any(k.startswith("image_") for k in features.keys()):
+                image_batch = {
+                    k[len("image_") :]: v.to(device)
+                    for k, v in features.items()
+                    if k.startswith("image_") and k != "image_indices"
+                }
+                image_indices = features.get("image_indices", [])
+                with torch.autocast(device_type=device, dtype=torch.bfloat16):
+                    img_embeddings = self.model(
+                        **image_batch, task_label=task
+                    ).single_vec_emb
+                    if truncate_dim:
+                        img_embeddings = img_embeddings[:, :truncate_dim]
+                        img_embeddings = torch.nn.functional.normalize(
+                            img_embeddings, p=2, dim=-1
+                        )
+                for i, embedding in enumerate(img_embeddings):
+                    all_embeddings.append((image_indices[i], embedding))
+        if not all_embeddings:
+            raise RuntimeError("No embeddings were generated")
+        all_embeddings.sort(key=lambda x: x[0])  # sort by original index
+        combined_embeddings = torch.stack([emb for _, emb in all_embeddings])
+        features["sentence_embedding"] = combined_embeddings
+        return features
+    @classmethod
+    def load(cls, input_path: str) -> "Transformer":
+        return cls(model_name_or_path=input_path)

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "transformers_version": "4.50.0.dev0"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abb244162956ec2f26d944b6c10cbb96afe211d2aff908b8b2f498ec27a9100b
+size 4997750728

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d5252a7ede6469220b0e7386af53fea9a45fa299a1d2af6fe68cb29897de3e3
+size 2512111904

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,833 @@

+{
+  "metadata": {
+    "total_size": 7513966848
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.norm.weight": "model-00002-of-00002.safetensors",
+    "multi_vector_projector.bias": "model-00002-of-00002.safetensors",
+    "multi_vector_projector.weight": "model-00002-of-00002.safetensors",
+    "visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.24.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.25.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.26.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.27.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.28.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.29.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.30.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.31.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
+    "visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.ln_q.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.0.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.0.weight": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.2.bias": "model-00001-of-00002.safetensors",
+    "visual.merger.mlp.2.weight": "model-00001-of-00002.safetensors",
+    "visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors"
+  }
+}

modeling_jina_embeddings_v4.py ADDED Viewed

	@@ -0,0 +1,609 @@

+# Jina Embeddings V4 Model implementation was inspired by the ColPali codebase:
+# https://github.com/illuin-tech/colpali
+import os
+from dataclasses import dataclass
+from enum import Enum
+from functools import partial
+from io import BytesIO
+from typing import Any, Callable, ClassVar, Dict, List, Optional, Union, cast
+import numpy as np
+import requests
+import torch
+from huggingface_hub import snapshot_download
+from peft import LoraConfig, PeftModel
+from PIL import Image
+from torch import nn
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import BatchFeature
+from transformers.utils import is_flash_attn_2_available
+from .configuration_jina_embeddings_v4 import JinaEmbeddingsV4Config
+from .custom_lora_module import MultiAdapterLinear
+from .qwen2_5_vl import Qwen2_5_VLForConditionalGeneration, Qwen2_5_VLProcessor
+class PromptType(str, Enum):
+    query = "query"
+    passage = "passage"
+PREFIX_DICT = {"query": "Query", "passage": "Passage"}
+class JinaEmbeddingsV4Processor(Qwen2_5_VLProcessor):
+    def __init__(self, *args, **kwargs) -> None:
+        Qwen2_5_VLProcessor.__init__(self, *args, **kwargs)
+        self.assistant_prefix_len = 58
+        self.text_max_length = 32768
+    def process_images(
+        self,
+        images: Union[List[Image.Image], List[List[Image.Image]]],
+    ) -> BatchFeature:
+        if isinstance(images[0], list):
+            images = cast(List[List[Image.Image]], images)
+            text_doc = []
+            for i in range(len(images)):
+                conversation = [
+                    {"role": "user", "content": [{"type": "image"}] * len(images[i])}
+                ]
+                template = self.apply_chat_template(
+                    conversation, add_generation_prompt=False
+                )
+                text_doc.append(template[self.assistant_prefix_len :])
+        else:
+            images = cast(List[Image.Image], images)
+            text_doc = [
+                "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>Describe the image.<|im_end|>\n"
+            ] * len(images)
+        # The following code is a hack to make sure the scatter in DDP is done correctly when training on multiple GPUs
+        batch_doc = self(text=text_doc, images=images, padding="longest", return_tensors="pt")  # type: ignore
+        # Separate pixel_values for each image
+        offsets = batch_doc["image_grid_thw"][:, 1] * batch_doc["image_grid_thw"][:, 2]
+        # Pad pixel_values to the same length to be able to make it into a tensor
+        pixel_values = torch.split(batch_doc["pixel_values"], offsets.tolist())
+        max_length = max([len(pv) for pv in pixel_values])
+        pixel_values = [
+            torch.cat(
+                [
+                    pv,
+                    torch.zeros(
+                        (max_length - len(pv), pv.shape[1]),
+                        dtype=pv.dtype,
+                        device=pv.device,
+                    ),
+                ]
+            )
+            for pv in pixel_values
+        ]
+        batch_doc["pixel_values"] = torch.stack(pixel_values)
+        return batch_doc
+    def process_texts(
+        self,
+        texts: List[str],
+        max_length: Optional[int] = None,
+        prefix: Optional[str] = None,
+        padding: Optional[str] = None,
+    ) -> BatchFeature:
+        max_length = (
+            self.text_max_length
+            if max_length is None
+            else min(max_length, self.text_max_length)
+        )
+        padded_texts: List[str] = []
+        for text in texts:
+            if prefix:
+                text = f"{prefix}: {text}"
+            padded_texts.append(text)
+        text_batch = self(
+            text=padded_texts,
+            return_tensors="pt",
+            padding=padding or "longest",
+            max_length=max_length,
+            truncation=True,
+        )
+        return text_batch
+@dataclass
+class JinaEmbeddingsV4ModelOutput:
+    """
+    Base class for the Hybrid Model outputs.
+    Args:
+        vlm_last_hidden_states (torch.Tensor, optional): Last hidden states of the VLM.
+        single_vec_emb (torch.Tensor, optional): Single-vector embeddings.
+        multi_vec_emb (torch.Tensor, optional): Multi-vector embeddings.
+    """
+    vlm_last_hidden_states: Optional[torch.Tensor] = None
+    single_vec_emb: Optional[torch.Tensor] = None
+    multi_vec_emb: Optional[torch.Tensor] = None
+class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
+    config_class = JinaEmbeddingsV4Config
+    main_input_name: ClassVar[str] = "doc_input_ids"
+    def __init__(self, config: JinaEmbeddingsV4Config):
+        Qwen2_5_VLForConditionalGeneration.__init__(self, config)
+        self._init_projection_layer(config)
+        self.post_init()
+        self.processor = JinaEmbeddingsV4Processor.from_pretrained(
+            self.name_or_path, trust_remote_code=True, use_fast=True
+        )
+        self.multi_vector_projector_dim = config.multi_vector_projector_dim
+        self.verbosity = config.verbosity
+        self._task = None
+    @property
+    def task(self) -> Optional[str]:
+        """Get the current task set for the model."""
+        return self._task
+    @task.setter
+    def task(self, task: str):
+        """
+        Set the task for the model.
+        Args:
+            task (str): The task name. Must be one of ['retrieval', 'text-matching', 'code']
+        """
+        if task not in self.config.task_names:
+            raise ValueError(
+                f"Invalid task: {task}. Must be one of {self.config.task_names}."
+            )
+        self._task = task
+    def get_last_hidden_states(
+        self,
+        task_label: Union[str, List[str]],
+        input_ids: torch.LongTensor,
+        attention_mask: torch.Tensor,
+        **kwargs,
+    ) -> torch.Tensor:
+        if "pixel_values" in kwargs:
+            offsets = kwargs["image_grid_thw"][:, 1] * kwargs["image_grid_thw"][:, 2]
+            kwargs["pixel_values"] = torch.cat(
+                [pv[:o] for pv, o in zip(kwargs["pixel_values"], offsets)], dim=0
+            )
+        position_ids, rope_deltas = self.model.get_rope_index(
+            input_ids=input_ids,
+            image_grid_thw=kwargs.get("image_grid_thw", None),
+            attention_mask=attention_mask,
+        )
+        kwargs["output_hidden_states"] = True
+        outputs = super().forward(
+            task_label=task_label,
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            **kwargs,
+            position_ids=position_ids,
+            rope_deltas=rope_deltas,
+            use_cache=False,
+        )
+        hidden_states = outputs.hidden_states
+        if not hidden_states:
+            raise ValueError("Hidden states not found in model output")
+        return hidden_states[-1]
+    def _init_projection_layer(self, config) -> None:
+        """
+        Initializes projection layers.
+        """
+        self.config.multi_vector_projector_dim = config.multi_vector_projector_dim
+        self.multi_vector_projector = nn.Linear(
+            in_features=self.config.text_config.hidden_size,
+            out_features=self.config.multi_vector_projector_dim,
+        )
+    def get_single_vector_embeddings(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: torch.Tensor,
+        input_ids: Optional[torch.LongTensor] = None,
+    ) -> torch.Tensor:
+        """
+        Get the single-vector embeddings from the hidden states.
+        """
+        if self._input_has_image(input_ids[0]):  # got document image
+            img_start_positions = torch.where(
+                input_ids == self.config.vision_start_token_id
+            )[1]
+            img_end_positions = torch.where(
+                input_ids == self.config.vision_end_token_id
+            )[1]
+            batch_size, seq_len = input_ids.shape
+            position_indices = torch.arange(seq_len, device=input_ids.device).expand(
+                batch_size, -1
+            )
+            image_mask = (position_indices >= img_start_positions.unsqueeze(1)) & (
+                position_indices <= img_end_positions.unsqueeze(1)
+            )
+            masked_hidden_states = hidden_states * image_mask.unsqueeze(-1)
+            pooled_output = masked_hidden_states.sum(dim=1) / image_mask.sum(
+                dim=1, keepdim=True
+            )
+        else:  # got query text
+            pooled_output = torch.sum(
+                hidden_states * attention_mask.unsqueeze(-1), dim=1
+            ) / torch.sum(attention_mask, dim=1, keepdim=True)
+        return torch.nn.functional.normalize(pooled_output, dim=-1)
+    def get_multi_vector_embeddings(
+        self,
+        task_label: Union[str, List[str]],
+        hidden_states: torch.Tensor,
+        attention_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        """
+        Project the hidden states to multi-vector embeddings.
+        """
+        multi_vec_emb = self.multi_vector_projector(
+            hidden_states, task_label=task_label
+        )
+        multi_vec_emb = torch.nn.functional.normalize(multi_vec_emb, dim=-1)
+        return multi_vec_emb * attention_mask.unsqueeze(-1)
+    def _input_has_image(self, input_ids):
+        return self.config.vision_start_token_id in input_ids
+    def forward(
+        self,
+        task_label: Union[str, List[str]],
+        input_ids: torch.LongTensor,
+        attention_mask: torch.Tensor,
+        output_vlm_last_hidden_states: bool = False,
+        **kwargs,
+    ) -> JinaEmbeddingsV4ModelOutput:
+        """
+        Forward pass through the model. Returns both single-vector and multi-vector embeddings.
+        Args:
+            input_ids (torch.Tensor): The input tokens tensor.
+            attention_mask (torch.Tensor): The attention mask tensor.
+        Returns:
+            JinaEmbeddingsV4ModelOutput:
+                vlm_last_hidden_states (torch.Tensor, optional): Last hidden states of the VLM.
+                single_vec_emb (torch.Tensor, optional): Single-vector embeddings.
+                multi_vec_emb (torch.Tensor, optional): Multi-vector embeddings.
+        """
+        # Forward pass through the VLM
+        hidden_states = self.get_last_hidden_states(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            task_label=task_label,
+            **kwargs,
+        )  # (batch_size, seq_length, hidden_size)
+        # Compute the embeddings
+        single_vec_emb = self.get_single_vector_embeddings(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            input_ids=input_ids,
+        )
+        multi_vec_emb = self.get_multi_vector_embeddings(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            task_label=task_label,
+        )
+        return JinaEmbeddingsV4ModelOutput(
+            vlm_last_hidden_states=(
+                hidden_states if output_vlm_last_hidden_states else None
+            ),
+            single_vec_emb=single_vec_emb,
+            multi_vec_emb=multi_vec_emb,
+        )
+    def _process_batches(
+        self,
+        data: List[Union[str, Image.Image]],
+        task_label: Union[str, List[str]],
+        processor_fn: Callable,
+        desc: str,
+        return_multivector: bool = False,
+        return_numpy: bool = False,
+        batch_size: int = 32,
+        truncate_dim: Optional[int] = None,
+    ) -> Union[np.ndarray, List[torch.Tensor]]:
+        dataloader = DataLoader(
+            dataset=data,
+            batch_size=batch_size,
+            shuffle=False,
+            collate_fn=processor_fn,
+        )
+        if return_multivector and len(data) > 1:
+            assert (
+                not return_numpy
+            ), "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
+        results = []
+        self.eval()
+        for batch in tqdm(dataloader, desc=desc, disable=self.verbosity == 0):
+            with torch.no_grad():
+                batch = {k: v.to(self.device) for k, v in batch.items()}
+                with torch.autocast(
+                    device_type=torch.device(self.device).type, dtype=torch.bfloat16
+                ):
+                    embeddings = self(**batch, task_label=task_label)
+                    if not return_multivector:
+                        embeddings = embeddings.single_vec_emb
+                        if truncate_dim is not None:
+                            embeddings = embeddings[:, :truncate_dim]
+                            embeddings = torch.nn.functional.normalize(
+                                embeddings, p=2, dim=-1
+                            )
+                    else:
+                        embeddings = embeddings.multi_vec_emb
+                    if return_multivector and not return_numpy:
+                        valid_tokens = batch["attention_mask"].bool()
+                        embeddings = [
+                            emb[mask] for emb, mask in zip(embeddings, valid_tokens)
+                        ]
+                        results.append(embeddings)
+                    else:
+                        results.append(
+                            embeddings.cpu()
+                            if return_numpy
+                            else list(torch.unbind(embeddings))
+                        )
+        if return_numpy:
+            return np.concatenate([result.numpy() for result in results], axis=0)
+        return [item for sublist in results for item in sublist]
+    def _validate_encoding_params(
+        self,
+        truncate_dim: Optional[int] = None,
+        prompt_name: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        encode_kwargs = {}
+        if prompt_name is not None:
+            if prompt_name not in PREFIX_DICT:
+                raise ValueError(
+                    f"Invalid prompt_name: {prompt_name}. Must be one of {list(PREFIX_DICT.keys())}."
+                )
+            else:
+                encode_kwargs["prefix"] = (
+                    PREFIX_DICT[prompt_name]
+                    if self.task != "text-matching"
+                    else PREFIX_DICT["query"]
+                )
+        truncate_dim = truncate_dim or self.config.truncate_dim
+        if truncate_dim is not None and truncate_dim not in self.config.matryoshka_dims:
+            raise ValueError(
+                f"Invalid truncate_dim: {truncate_dim}. Must be one of {self.config.matryoshka_dims}."
+            )
+        else:
+            encode_kwargs["truncate_dim"] = truncate_dim
+        return encode_kwargs
+    def _validate_task(self, task: Optional[str] = None) -> str:
+        if task is None:
+            if self.task is None:
+                raise ValueError(
+                    "Task must be specified before encoding data. You can set it either as a model property "
+                    "(e.g., model.task = 'retrieval') or pass it as an argument to the encode method."
+                )
+            task = self.task
+        else:
+            if task not in self.config.task_names:
+                raise ValueError(
+                    f"Invalid task: {task}. Must be one of {self.config.task_names}."
+                )
+        return task
+    def encode_text(
+        self,
+        texts: Union[str, List[str]],
+        task: Optional[str] = None,
+        max_length: int = 32768,
+        batch_size: int = 8,
+        return_multivector: bool = False,
+        return_numpy: bool = False,
+        truncate_dim: Optional[int] = None,
+        prompt_name: Optional[str] = None,
+    ) -> Union[List[torch.Tensor], torch.Tensor]:
+        """
+        Encodes a list of texts into embeddings.
+        Args:
+            texts: text or list of text strings to encode
+            max_length: Maximum token length for text processing
+            batch_size: Number of texts to process at once
+            return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
+            return_numpy: Whether to return numpy arrays instead of torch tensors
+            truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
+            prompt_name: Type of text being encoded ('query' or 'passage')
+        Returns:
+            List of text embeddings as tensors or numpy arrays when encoding multiple texts, or single text embedding as tensor when encoding a single text
+        """
+        prompt_name = prompt_name or "query"
+        encode_kwargs = self._validate_encoding_params(
+            truncate_dim=truncate_dim, prompt_name=prompt_name
+        )
+        task = self._validate_task(task)
+        processor_fn = partial(
+            self.processor.process_texts,
+            max_length=max_length,
+            prefix=encode_kwargs.pop("prefix"),
+        )
+        return_list = isinstance(texts, list)
+        # If return_multivector is True and encoding multiple texts, ignore return_numpy
+        if return_multivector and return_list and len(texts) > 1:
+            if return_numpy:
+                print(
+                    "Warning: `return_numpy` is ignored when `return_multivector=True` and `len(texts) > 1`"
+                )
+            return_numpy = False
+        if isinstance(texts, str):
+            texts = [texts]
+        embeddings = self._process_batches(
+            data=texts,
+            processor_fn=processor_fn,
+            desc="Encoding texts...",
+            task_label=task,
+            return_multivector=return_multivector,
+            return_numpy=return_numpy,
+            batch_size=batch_size,
+            **encode_kwargs,
+        )
+        return embeddings if return_list else embeddings[0]
+    def _load_images_if_needed(
+        self, images: List[Union[str, Image.Image]]
+    ) -> List[Image.Image]:
+        loaded_images = []
+        for image in images:
+            if isinstance(image, str):
+                if image.startswith("http"):
+                    response = requests.get(image)
+                    image = Image.open(BytesIO(response.content)).convert("RGB")
+                else:
+                    image = Image.open(image).convert("RGB")
+            loaded_images.append(image)
+        return loaded_images
+    def encode_image(
+        self,
+        images: Union[str, Image.Image, List[Union[str, Image.Image]]],
+        task: Optional[str] = None,
+        batch_size: int = 8,
+        return_multivector: bool = False,
+        return_numpy: bool = False,
+        truncate_dim: Optional[int] = None,
+        max_pixels: Optional[int] = None,
+    ) -> Union[List[torch.Tensor], torch.Tensor]:
+        """
+        Encodes a list of images or a single image into embedding(s).
+        Args:
+            images: image(s) to encode, can be PIL Image(s), URL(s), or local file path(s)
+            batch_size: Number of images to process at once
+            return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
+            return_numpy: Whether to return numpy arrays instead of torch tensors. If `return_multivector` is `True` and more than one image is encoded, this parameter is ignored.
+            truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
+            max_pixels: Maximum number of pixels to process per image
+        Returns:
+            List of image embeddings as tensors or numpy arrays when encoding multiple images, or single image embedding as tensor when encoding a single image
+        """
+        if max_pixels:
+            default_max_pixels = self.processor.image_processor.max_pixels
+            self.processor.image_processor.max_pixels = (
+                max_pixels  # change during encoding
+            )
+        encode_kwargs = self._validate_encoding_params(truncate_dim=truncate_dim)
+        task = self._validate_task(task)
+        return_list = isinstance(images, list)
+        # If return_multivector is True and encoding multiple images, ignore return_numpy
+        if return_multivector and return_list and len(images) > 1:
+            if return_numpy:
+                print(
+                    "Warning: `return_numpy` is ignored when `return_multivector=True` and `len(images) > 1`"
+                )
+            return_numpy = False
+        # Convert single image to list
+        if isinstance(images, (str, Image.Image)):
+            images = [images]
+        images = self._load_images_if_needed(images)
+        embeddings = self._process_batches(
+            data=images,
+            processor_fn=self.processor.process_images,
+            desc="Encoding images...",
+            task_label=task,
+            batch_size=batch_size,
+            return_multivector=return_multivector,
+            return_numpy=return_numpy,
+            **encode_kwargs,
+        )
+        if max_pixels:
+            self.processor.image_processor.max_pixels = default_max_pixels
+        return embeddings if return_list else embeddings[0]
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path,
+        *args,
+        **kwargs,
+    ):
+        """
+        Loads a pretrained model and configures it with the appropriate task adapter (`retrieval` by default).
+        """
+        if "torch_dtype" not in kwargs:
+            kwargs["torch_dtype"] = "auto"
+        kwargs["key_mapping"] = super()._checkpoint_conversion_mapping
+        if not is_flash_attn_2_available():
+            kwargs["attn_implementation"] = "sdpa"
+        base_model = super().from_pretrained(
+            pretrained_model_name_or_path, *args, **kwargs
+        )
+        # Configure adapter directory
+        if os.path.isdir(base_model.name_or_path):
+            adapter_dir = os.path.join(base_model.name_or_path, "adapters")
+        else:
+            adapter_cache_path = snapshot_download(
+                repo_id=base_model.name_or_path, allow_patterns=["adapters/*"]
+            )
+            adapter_dir = os.path.join(adapter_cache_path, "adapters")
+        lora_config = LoraConfig.from_pretrained(adapter_dir)
+        lora_config._custom_modules = {
+            torch.nn.modules.linear.Linear: partial(
+                MultiAdapterLinear,
+                task_names=base_model.config.task_names,
+            )
+        }
+        peft_model = PeftModel.from_pretrained(
+            model=base_model,
+            model_id=adapter_dir,
+            config=lora_config,
+        )
+        def task_getter(self):
+            return self.model.task
+        def task_setter(self, value):
+            self.model.task = value
+        peft_model.__class__.task = property(task_getter, task_setter)
+        return peft_model

modules.json ADDED Viewed

	@@ -0,0 +1,9 @@

+[
+    {
+        "idx": 0,
+        "name": "transformer",
+        "path": "",
+        "type": "custom_st.Transformer",
+        "kwargs": ["task", "truncate_dim"]
+    }
+]

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 602112,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "JinaEmbeddingsV4Processor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "video_processor_type": "Qwen2VLVideoProcessor",
+  "size": {
+    "longest_edge": 602112,
+    "shortest_edge": 3136
+  },
+  "temporal_patch_size": 2,
+  "auto_map": {
+    "AutoProcessor": "modeling_jina_embeddings_v4.JinaEmbeddingsV4Processor"
+  }
+}

qwen2_5_vl.py ADDED Viewed

The diff for this file is too large to render. See raw diff

results.json ADDED Viewed

	@@ -0,0 +1,582 @@

+{
+    "arxivqa_test_subsampled": {
+        "ndcg_at_1": 0.844,
+        "ndcg_at_3": 0.88524,
+        "ndcg_at_5": 0.88954,
+        "ndcg_at_10": 0.89512,
+        "ndcg_at_20": 0.90085,
+        "ndcg_at_50": 0.90479,
+        "ndcg_at_100": 0.90578,
+        "map_at_1": 0.844,
+        "map_at_3": 0.87467,
+        "map_at_5": 0.87717,
+        "map_at_10": 0.87933,
+        "map_at_20": 0.88099,
+        "map_at_50": 0.88161,
+        "map_at_100": 0.8817,
+        "recall_at_1": 0.844,
+        "recall_at_3": 0.916,
+        "recall_at_5": 0.926,
+        "recall_at_10": 0.944,
+        "recall_at_20": 0.966,
+        "recall_at_50": 0.986,
+        "recall_at_100": 0.992,
+        "precision_at_1": 0.844,
+        "precision_at_3": 0.30533,
+        "precision_at_5": 0.1852,
+        "precision_at_10": 0.0944,
+        "precision_at_20": 0.0483,
+        "precision_at_50": 0.01972,
+        "precision_at_100": 0.00992,
+        "mrr_at_1": 0.844,
+        "mrr_at_3": 0.8746666666666665,
+        "mrr_at_5": 0.8771666666666665,
+        "mrr_at_10": 0.8793301587301586,
+        "mrr_at_20": 0.880986183261183,
+        "mrr_at_50": 0.8816066058267283,
+        "mrr_at_100": 0.8816959272950264,
+        "naucs_at_1_max": 0.7413901379085128,
+        "naucs_at_1_std": 0.3454872013866209,
+        "naucs_at_1_diff1": 0.9600906830113787,
+        "naucs_at_3_max": 0.7713307545240329,
+        "naucs_at_3_std": 0.4801698457160663,
+        "naucs_at_3_diff1": 0.9489240140500664,
+        "naucs_at_5_max": 0.7514699573523106,
+        "naucs_at_5_std": 0.4375552022610836,
+        "naucs_at_5_diff1": 0.9526206879148043,
+        "naucs_at_10_max": 0.8086901427237575,
+        "naucs_at_10_std": 0.5144891289849284,
+        "naucs_at_10_diff1": 0.9513972255568919,
+        "naucs_at_20_max": 0.907453177349375,
+        "naucs_at_20_std": 0.5683802932937894,
+        "naucs_at_20_diff1": 0.9692425990003846,
+        "naucs_at_50_max": 0.8709483793517359,
+        "naucs_at_50_std": 0.7055488862211612,
+        "naucs_at_50_diff1": 0.9626517273576126,
+        "naucs_at_100_max": 0.8068394024276366,
+        "naucs_at_100_std": 0.7076330532212914,
+        "naucs_at_100_diff1": 0.9673202614378978
+    },
+    "docvqa_test_subsampled": {
+        "ndcg_at_1": 0.52328,
+        "ndcg_at_3": 0.5841,
+        "ndcg_at_5": 0.59975,
+        "ndcg_at_10": 0.62669,
+        "ndcg_at_20": 0.64245,
+        "ndcg_at_50": 0.65661,
+        "ndcg_at_100": 0.66492,
+        "map_at_1": 0.52328,
+        "map_at_3": 0.56911,
+        "map_at_5": 0.57786,
+        "map_at_10": 0.58881,
+        "map_at_20": 0.59317,
+        "map_at_50": 0.59548,
+        "map_at_100": 0.59622,
+        "recall_at_1": 0.52328,
+        "recall_at_3": 0.62749,
+        "recall_at_5": 0.66519,
+        "recall_at_10": 0.74945,
+        "recall_at_20": 0.81153,
+        "recall_at_50": 0.88248,
+        "recall_at_100": 0.93348,
+        "precision_at_1": 0.52328,
+        "precision_at_3": 0.20916,
+        "precision_at_5": 0.13304,
+        "precision_at_10": 0.07494,
+        "precision_at_20": 0.04058,
+        "precision_at_50": 0.01765,
+        "precision_at_100": 0.00933,
+        "mrr_at_1": 0.5232815964523282,
+        "mrr_at_3": 0.5691056910569108,
+        "mrr_at_5": 0.5778640059127865,
+        "mrr_at_10": 0.5888132193010243,
+        "mrr_at_20": 0.5931663069177401,
+        "mrr_at_50": 0.5954783504735428,
+        "mrr_at_100": 0.5962169799244146,
+        "naucs_at_1_max": 0.46089368028029637,
+        "naucs_at_1_std": 0.19359243300005127,
+        "naucs_at_1_diff1": 0.8483527783001977,
+        "naucs_at_3_max": 0.4640279399849662,
+        "naucs_at_3_std": 0.1814509120980464,
+        "naucs_at_3_diff1": 0.7719022256243834,
+        "naucs_at_5_max": 0.45716016762761796,
+        "naucs_at_5_std": 0.16428980258139747,
+        "naucs_at_5_diff1": 0.750196647594659,
+        "naucs_at_10_max": 0.3956528364820721,
+        "naucs_at_10_std": 0.09973122080056422,
+        "naucs_at_10_diff1": 0.7237863238311393,
+        "naucs_at_20_max": 0.35927664451426317,
+        "naucs_at_20_std": 0.09080366240903168,
+        "naucs_at_20_diff1": 0.6946736504983693,
+        "naucs_at_50_max": 0.3626447370884348,
+        "naucs_at_50_std": 0.2775120087087966,
+        "naucs_at_50_diff1": 0.6534710933108262,
+        "naucs_at_100_max": 0.32155287639122004,
+        "naucs_at_100_std": 0.3495021025151782,
+        "naucs_at_100_diff1": 0.6165810885563539
+    },
+    "infovqa_test_subsampled": {
+        "ndcg_at_1": 0.90283,
+        "ndcg_at_3": 0.93062,
+        "ndcg_at_5": 0.93567,
+        "ndcg_at_10": 0.93969,
+        "ndcg_at_20": 0.94324,
+        "ndcg_at_50": 0.94401,
+        "ndcg_at_100": 0.945,
+        "map_at_1": 0.90283,
+        "map_at_3": 0.92409,
+        "map_at_5": 0.92692,
+        "map_at_10": 0.92863,
+        "map_at_20": 0.92959,
+        "map_at_50": 0.9297,
+        "map_at_100": 0.92979,
+        "recall_at_1": 0.90283,
+        "recall_at_3": 0.94939,
+        "recall_at_5": 0.96154,
+        "recall_at_10": 0.97368,
+        "recall_at_20": 0.98785,
+        "recall_at_50": 0.9919,
+        "recall_at_100": 0.99798,
+        "precision_at_1": 0.90283,
+        "precision_at_3": 0.31646,
+        "precision_at_5": 0.19231,
+        "precision_at_10": 0.09737,
+        "precision_at_20": 0.04939,
+        "precision_at_50": 0.01984,
+        "precision_at_100": 0.00998,
+        "mrr_at_1": 0.902834008097166,
+        "mrr_at_3": 0.9240890688259108,
+        "mrr_at_5": 0.9269230769230767,
+        "mrr_at_10": 0.9286316753422016,
+        "mrr_at_20": 0.9295898610333593,
+        "mrr_at_50": 0.929699602843506,
+        "mrr_at_100": 0.929788457049907,
+        "naucs_at_1_max": 0.6026903076230651,
+        "naucs_at_1_std": 0.261936050485784,
+        "naucs_at_1_diff1": 0.9396804875719484,
+        "naucs_at_3_max": 0.7565375225904929,
+        "naucs_at_3_std": 0.45980620999702715,
+        "naucs_at_3_diff1": 0.9534218386220948,
+        "naucs_at_5_max": 0.8235249494008307,
+        "naucs_at_5_std": 0.5316999544043512,
+        "naucs_at_5_diff1": 0.9524604670358964,
+        "naucs_at_10_max": 0.8684766575602219,
+        "naucs_at_10_std": 0.5944713216706646,
+        "naucs_at_10_diff1": 0.9405654098266761,
+        "naucs_at_20_max": 0.7830887900175995,
+        "naucs_at_20_std": 0.5643438299512757,
+        "naucs_at_20_diff1": 0.8929919636352566,
+        "naucs_at_50_max": 0.7072835485426375,
+        "naucs_at_50_std": 0.5764614839135555,
+        "naucs_at_50_diff1": 0.8394879454528887,
+        "naucs_at_100_max": 1.0,
+        "naucs_at_100_std": 1.0,
+        "naucs_at_100_diff1": 1.0
+    },
+    "tabfquad_test_subsampled": {
+        "ndcg_at_1": 0.9,
+        "ndcg_at_3": 0.94685,
+        "ndcg_at_5": 0.95131,
+        "ndcg_at_10": 0.95366,
+        "ndcg_at_20": 0.95455,
+        "ndcg_at_50": 0.9553,
+        "ndcg_at_100": 0.9553,
+        "map_at_1": 0.9,
+        "map_at_3": 0.9369,
+        "map_at_5": 0.9394,
+        "map_at_10": 0.9404,
+        "map_at_20": 0.94063,
+        "map_at_50": 0.94077,
+        "map_at_100": 0.94077,
+        "recall_at_1": 0.9,
+        "recall_at_3": 0.975,
+        "recall_at_5": 0.98571,
+        "recall_at_10": 0.99286,
+        "recall_at_20": 0.99643,
+        "recall_at_50": 1.0,
+        "recall_at_100": 1.0,
+        "precision_at_1": 0.9,
+        "precision_at_3": 0.325,
+        "precision_at_5": 0.19714,
+        "precision_at_10": 0.09929,
+        "precision_at_20": 0.04982,
+        "precision_at_50": 0.02,
+        "precision_at_100": 0.01,
+        "mrr_at_1": 0.9,
+        "mrr_at_3": 0.936904761904762,
+        "mrr_at_5": 0.9394047619047617,
+        "mrr_at_10": 0.9403968253968255,
+        "mrr_at_20": 0.9406349206349207,
+        "mrr_at_50": 0.9407722832722833,
+        "mrr_at_100": 0.9407722832722833,
+        "naucs_at_1_max": 0.39284046952114193,
+        "naucs_at_1_std": 0.06274176337201544,
+        "naucs_at_1_diff1": 0.9321395224756563,
+        "naucs_at_3_max": 0.98132586367881,
+        "naucs_at_3_std": 0.9042950513538718,
+        "naucs_at_3_diff1": 0.98132586367881,
+        "naucs_at_5_max": 0.967320261437913,
+        "naucs_at_5_std": 0.8978758169934754,
+        "naucs_at_5_diff1": 1.0,
+        "naucs_at_10_max": 1.0,
+        "naucs_at_10_std": 0.9346405228758269,
+        "naucs_at_10_diff1": 1.0,
+        "naucs_at_20_max": 1.0,
+        "naucs_at_20_std": 1.0,
+        "naucs_at_20_diff1": 1.0,
+        "naucs_at_50_max": 1.0,
+        "naucs_at_50_std": 1.0,
+        "naucs_at_50_diff1": 1.0,
+        "naucs_at_100_max": 1.0,
+        "naucs_at_100_std": 1.0,
+        "naucs_at_100_diff1": 1.0
+    },
+    "tatdqa_test": {
+        "ndcg_at_1": 0.68834,
+        "ndcg_at_3": 0.7834,
+        "ndcg_at_5": 0.80344,
+        "ndcg_at_10": 0.81851,
+        "ndcg_at_20": 0.82469,
+        "ndcg_at_50": 0.82852,
+        "ndcg_at_100": 0.82981,
+        "map_at_1": 0.68834,
+        "map_at_3": 0.76073,
+        "map_at_5": 0.772,
+        "map_at_10": 0.7783,
+        "map_at_20": 0.78002,
+        "map_at_50": 0.78067,
+        "map_at_100": 0.78079,
+        "recall_at_1": 0.68834,
+        "recall_at_3": 0.84872,
+        "recall_at_5": 0.89672,
+        "recall_at_10": 0.94289,
+        "recall_at_20": 0.96719,
+        "recall_at_50": 0.98603,
+        "recall_at_100": 0.99392,
+        "precision_at_1": 0.68834,
+        "precision_at_3": 0.28291,
+        "precision_at_5": 0.17934,
+        "precision_at_10": 0.09429,
+        "precision_at_20": 0.04836,
+        "precision_at_50": 0.01972,
+        "precision_at_100": 0.00994,
+        "mrr_at_1": 0.6865127582017011,
+        "mrr_at_3": 0.7598217901984609,
+        "mrr_at_5": 0.7710307816929933,
+        "mrr_at_10": 0.7773322532739296,
+        "mrr_at_20": 0.7790656715075932,
+        "mrr_at_50": 0.7797137179788176,
+        "mrr_at_100": 0.7798294471430899,
+        "naucs_at_1_max": 0.19289339347399329,
+        "naucs_at_1_std": -0.05373436574034402,
+        "naucs_at_1_diff1": 0.8118815353915732,
+        "naucs_at_3_max": 0.24444248974914928,
+        "naucs_at_3_std": 0.012951438245694854,
+        "naucs_at_3_diff1": 0.7252009696977523,
+        "naucs_at_5_max": 0.27477480629269946,
+        "naucs_at_5_std": 0.10687833140288663,
+        "naucs_at_5_diff1": 0.7019146338300569,
+        "naucs_at_10_max": 0.23474834180340118,
+        "naucs_at_10_std": 0.13375117651376378,
+        "naucs_at_10_diff1": 0.6766342016471449,
+        "naucs_at_20_max": 0.3762582961131715,
+        "naucs_at_20_std": 0.29216428469292166,
+        "naucs_at_20_diff1": 0.6564671335087516,
+        "naucs_at_50_max": 0.4691053847445,
+        "naucs_at_50_std": 0.4359718488363951,
+        "naucs_at_50_diff1": 0.7152604718494652,
+        "naucs_at_100_max": 0.5259975902909616,
+        "naucs_at_100_std": 0.651086653120611,
+        "naucs_at_100_diff1": 0.7663843453532901
+    },
+    "shiftproject_test": {
+        "ndcg_at_1": 0.85,
+        "ndcg_at_3": 0.91917,
+        "ndcg_at_5": 0.92347,
+        "ndcg_at_10": 0.92949,
+        "ndcg_at_20": 0.92949,
+        "ndcg_at_50": 0.92949,
+        "ndcg_at_100": 0.92949,
+        "map_at_1": 0.85,
+        "map_at_3": 0.90167,
+        "map_at_5": 0.90417,
+        "map_at_10": 0.90639,
+        "map_at_20": 0.90639,
+        "map_at_50": 0.90639,
+        "map_at_100": 0.90639,
+        "recall_at_1": 0.85,
+        "recall_at_3": 0.97,
+        "recall_at_5": 0.98,
+        "recall_at_10": 1.0,
+        "recall_at_20": 1.0,
+        "recall_at_50": 1.0,
+        "recall_at_100": 1.0,
+        "precision_at_1": 0.85,
+        "precision_at_3": 0.32333,
+        "precision_at_5": 0.196,
+        "precision_at_10": 0.1,
+        "precision_at_20": 0.05,
+        "precision_at_50": 0.02,
+        "precision_at_100": 0.01,
+        "mrr_at_1": 0.85,
+        "mrr_at_3": 0.9016666666666666,
+        "mrr_at_5": 0.9041666666666666,
+        "mrr_at_10": 0.9063888888888889,
+        "mrr_at_20": 0.9063888888888889,
+        "mrr_at_50": 0.9063888888888889,
+        "mrr_at_100": 0.9063888888888889,
+        "naucs_at_1_max": 0.029189716889034732,
+        "naucs_at_1_std": -0.37507321835340074,
+        "naucs_at_1_diff1": 0.7931012040351454,
+        "naucs_at_3_max": 0.5589791472144446,
+        "naucs_at_3_std": 0.09056956115779448,
+        "naucs_at_3_diff1": 0.9564270152505466,
+        "naucs_at_5_max": 0.3384687208216692,
+        "naucs_at_5_std": -0.2987861811391239,
+        "naucs_at_5_diff1": 1.0,
+        "naucs_at_10_max": 1.0,
+        "naucs_at_10_std": 1.0,
+        "naucs_at_10_diff1": 1.0,
+        "naucs_at_20_max": 1.0,
+        "naucs_at_20_std": 1.0,
+        "naucs_at_20_diff1": 1.0,
+        "naucs_at_50_max": null,
+        "naucs_at_50_std": null,
+        "naucs_at_50_diff1": null,
+        "naucs_at_100_max": null,
+        "naucs_at_100_std": null,
+        "naucs_at_100_diff1": null
+    },
+    "syntheticDocQA_artificial_intelligence_test": {
+        "ndcg_at_1": 0.98,
+        "ndcg_at_3": 0.99262,
+        "ndcg_at_5": 0.99262,
+        "ndcg_at_10": 0.99262,
+        "ndcg_at_20": 0.99262,
+        "ndcg_at_50": 0.99262,
+        "ndcg_at_100": 0.99262,
+        "map_at_1": 0.98,
+        "map_at_3": 0.99,
+        "map_at_5": 0.99,
+        "map_at_10": 0.99,
+        "map_at_20": 0.99,
+        "map_at_50": 0.99,
+        "map_at_100": 0.99,
+        "recall_at_1": 0.98,
+        "recall_at_3": 1.0,
+        "recall_at_5": 1.0,
+        "recall_at_10": 1.0,
+        "recall_at_20": 1.0,
+        "recall_at_50": 1.0,
+        "recall_at_100": 1.0,
+        "precision_at_1": 0.98,
+        "precision_at_3": 0.33333,
+        "precision_at_5": 0.2,
+        "precision_at_10": 0.1,
+        "precision_at_20": 0.05,
+        "precision_at_50": 0.02,
+        "precision_at_100": 0.01,
+        "mrr_at_1": 0.98,
+        "mrr_at_3": 0.99,
+        "mrr_at_5": 0.99,
+        "mrr_at_10": 0.99,
+        "mrr_at_20": 0.99,
+        "mrr_at_50": 0.99,
+        "mrr_at_100": 0.99,
+        "naucs_at_1_max": 0.540149393090569,
+        "naucs_at_1_std": 0.3384687208216605,
+        "naucs_at_1_diff1": 0.9346405228758133,
+        "naucs_at_3_max": 1.0,
+        "naucs_at_3_std": 1.0,
+        "naucs_at_3_diff1": 1.0,
+        "naucs_at_5_max": 1.0,
+        "naucs_at_5_std": 1.0,
+        "naucs_at_5_diff1": 1.0,
+        "naucs_at_10_max": 1.0,
+        "naucs_at_10_std": 1.0,
+        "naucs_at_10_diff1": 1.0,
+        "naucs_at_20_max": 1.0,
+        "naucs_at_20_std": 1.0,
+        "naucs_at_20_diff1": 1.0,
+        "naucs_at_50_max": null,
+        "naucs_at_50_std": null,
+        "naucs_at_50_diff1": null,
+        "naucs_at_100_max": null,
+        "naucs_at_100_std": null,
+        "naucs_at_100_diff1": null
+    },
+    "syntheticDocQA_energy_test": {
+        "ndcg_at_1": 0.95,
+        "ndcg_at_3": 0.96762,
+        "ndcg_at_5": 0.96762,
+        "ndcg_at_10": 0.97118,
+        "ndcg_at_20": 0.97118,
+        "ndcg_at_50": 0.973,
+        "ndcg_at_100": 0.973,
+        "map_at_1": 0.95,
+        "map_at_3": 0.96333,
+        "map_at_5": 0.96333,
+        "map_at_10": 0.965,
+        "map_at_20": 0.965,
+        "map_at_50": 0.96523,
+        "map_at_100": 0.96523,
+        "recall_at_1": 0.95,
+        "recall_at_3": 0.98,
+        "recall_at_5": 0.98,
+        "recall_at_10": 0.99,
+        "recall_at_20": 0.99,
+        "recall_at_50": 1.0,
+        "recall_at_100": 1.0,
+        "precision_at_1": 0.95,
+        "precision_at_3": 0.32667,
+        "precision_at_5": 0.196,
+        "precision_at_10": 0.099,
+        "precision_at_20": 0.0495,
+        "precision_at_50": 0.02,
+        "precision_at_100": 0.01,
+        "mrr_at_1": 0.95,
+        "mrr_at_3": 0.9633333333333333,
+        "mrr_at_5": 0.9633333333333333,
+        "mrr_at_10": 0.965,
+        "mrr_at_20": 0.965,
+        "mrr_at_50": 0.9652272727272727,
+        "mrr_at_100": 0.9652272727272727,
+        "naucs_at_1_max": 0.42726423902894384,
+        "naucs_at_1_std": -0.4889822595704953,
+        "naucs_at_1_diff1": 1.0,
+        "naucs_at_3_max": 0.6136788048552655,
+        "naucs_at_3_std": -0.6909430438842241,
+        "naucs_at_3_diff1": 1.0,
+        "naucs_at_5_max": 0.6136788048552745,
+        "naucs_at_5_std": -0.690943043884218,
+        "naucs_at_5_diff1": 1.0,
+        "naucs_at_10_max": 0.8692810457516413,
+        "naucs_at_10_std": 0.35807656395891135,
+        "naucs_at_10_diff1": 1.0,
+        "naucs_at_20_max": 0.8692810457516413,
+        "naucs_at_20_std": 0.35807656395891135,
+        "naucs_at_20_diff1": 1.0,
+        "naucs_at_50_max": null,
+        "naucs_at_50_std": null,
+        "naucs_at_50_diff1": null,
+        "naucs_at_100_max": null,
+        "naucs_at_100_std": null,
+        "naucs_at_100_diff1": null
+    },
+    "syntheticDocQA_government_reports_test": {
+        "ndcg_at_1": 0.93,
+        "ndcg_at_3": 0.96524,
+        "ndcg_at_5": 0.96954,
+        "ndcg_at_10": 0.96954,
+        "ndcg_at_20": 0.96954,
+        "ndcg_at_50": 0.96954,
+        "ndcg_at_100": 0.96954,
+        "map_at_1": 0.93,
+        "map_at_3": 0.95667,
+        "map_at_5": 0.95917,
+        "map_at_10": 0.95917,
+        "map_at_20": 0.95917,
+        "map_at_50": 0.95917,
+        "map_at_100": 0.95917,
+        "recall_at_1": 0.93,
+        "recall_at_3": 0.99,
+        "recall_at_5": 1.0,
+        "recall_at_10": 1.0,
+        "recall_at_20": 1.0,
+        "recall_at_50": 1.0,
+        "recall_at_100": 1.0,
+        "precision_at_1": 0.93,
+        "precision_at_3": 0.33,
+        "precision_at_5": 0.2,
+        "precision_at_10": 0.1,
+        "precision_at_20": 0.05,
+        "precision_at_50": 0.02,
+        "precision_at_100": 0.01,
+        "mrr_at_1": 0.93,
+        "mrr_at_3": 0.9566666666666667,
+        "mrr_at_5": 0.9591666666666667,
+        "mrr_at_10": 0.9591666666666667,
+        "mrr_at_20": 0.9591666666666667,
+        "mrr_at_50": 0.9591666666666667,
+        "mrr_at_100": 0.9591666666666667,
+        "naucs_at_1_max": 0.6809390422835813,
+        "naucs_at_1_std": 0.5458850206749362,
+        "naucs_at_1_diff1": 0.9229691876750709,
+        "naucs_at_3_max": 1.0,
+        "naucs_at_3_std": 1.0,
+        "naucs_at_3_diff1": 1.0,
+        "naucs_at_5_max": 1.0,
+        "naucs_at_5_std": 1.0,
+        "naucs_at_5_diff1": 1.0,
+        "naucs_at_10_max": 1.0,
+        "naucs_at_10_std": 1.0,
+        "naucs_at_10_diff1": 1.0,
+        "naucs_at_20_max": 1.0,
+        "naucs_at_20_std": 1.0,
+        "naucs_at_20_diff1": 1.0,
+        "naucs_at_50_max": null,
+        "naucs_at_50_std": null,
+        "naucs_at_50_diff1": null,
+        "naucs_at_100_max": null,
+        "naucs_at_100_std": null,
+        "naucs_at_100_diff1": null
+    },
+    "syntheticDocQA_healthcare_industry_test": {
+        "ndcg_at_1": 0.96,
+        "ndcg_at_3": 0.98393,
+        "ndcg_at_5": 0.98393,
+        "ndcg_at_10": 0.98393,
+        "ndcg_at_20": 0.98393,
+        "ndcg_at_50": 0.98393,
+        "ndcg_at_100": 0.98393,
+        "map_at_1": 0.96,
+        "map_at_3": 0.97833,
+        "map_at_5": 0.97833,
+        "map_at_10": 0.97833,
+        "map_at_20": 0.97833,
+        "map_at_50": 0.97833,
+        "map_at_100": 0.97833,
+        "recall_at_1": 0.96,
+        "recall_at_3": 1.0,
+        "recall_at_5": 1.0,
+        "recall_at_10": 1.0,
+        "recall_at_20": 1.0,
+        "recall_at_50": 1.0,
+        "recall_at_100": 1.0,
+        "precision_at_1": 0.96,
+        "precision_at_3": 0.33333,
+        "precision_at_5": 0.2,
+        "precision_at_10": 0.1,
+        "precision_at_20": 0.05,
+        "precision_at_50": 0.02,
+        "precision_at_100": 0.01,
+        "mrr_at_1": 0.96,
+        "mrr_at_3": 0.9783333333333333,
+        "mrr_at_5": 0.9783333333333333,
+        "mrr_at_10": 0.9783333333333333,
+        "mrr_at_20": 0.9783333333333333,
+        "mrr_at_50": 0.9783333333333333,
+        "mrr_at_100": 0.9783333333333333,
+        "naucs_at_1_max": 0.7047152194211012,
+        "naucs_at_1_std": 0.32037815126050734,
+        "naucs_at_1_diff1": 1.0,
+        "naucs_at_3_max": 1.0,
+        "naucs_at_3_std": 1.0,
+        "naucs_at_3_diff1": 1.0,
+        "naucs_at_5_max": 1.0,
+        "naucs_at_5_std": 1.0,
+        "naucs_at_5_diff1": 1.0,
+        "naucs_at_10_max": 1.0,
+        "naucs_at_10_std": 1.0,
+        "naucs_at_10_diff1": 1.0,
+        "naucs_at_20_max": 1.0,
+        "naucs_at_20_std": 1.0,
+        "naucs_at_20_diff1": 1.0,
+        "naucs_at_50_max": null,
+        "naucs_at_50_std": null,
+        "naucs_at_50_diff1": null,
+        "naucs_at_100_max": null,
+        "naucs_at_100_std": null,
+        "naucs_at_100_diff1": null
+    }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "processor_class": "JinaEmbeddingsV4Processor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff