Brian Tang commited on
Commit
49ebb9c
·
0 Parent(s):

Snapshot of current state 4a58ca57710c49f51896e4bc820e202fbf64904b

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .env
28
+ .venv
29
+ env.bak/
30
+ venv.bak/
31
+
32
+ # IDE
33
+ .idea/
34
+ .vscode/
35
+ *.swp
36
+ *.swo
37
+ .project
38
+ .pydevproject
39
+ .settings/
40
+
41
+ # Jupyter Notebook
42
+ .ipynb_checkpoints
43
+ *.ipynb
44
+
45
+ # Distribution / packaging
46
+ .Python
47
+ *.manifest
48
+ *.spec
49
+
50
+ # Unit test / coverage reports
51
+ htmlcov/
52
+ .tox/
53
+ .coverage
54
+ .coverage.*
55
+ .cache
56
+ nosetests.xml
57
+ coverage.xml
58
+ *.cover
59
+ .hypothesis/
60
+
61
+ # Logs and databases
62
+ *.log
63
+ *.sqlite
64
+ *.db
65
+
66
+ # OS generated files
67
+ .DS_Store
68
+ .DS_Store?
69
+ ._*
70
+ .Spotlight-V100
71
+ .Trashes
72
+ ehthumbs.db
73
+ Thumbs.db
README.md ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ tags:
4
+ - vidore
5
+ - colpali
6
+ - multimodal-embedding
7
+ - multilingual-embedding
8
+ - Text-to-Visual Document (T→VD) retrieval
9
+ - feature-extraction
10
+ - sentence-similarity
11
+ - mteb
12
+ - sentence-transformers
13
+ language:
14
+ - multilingual
15
+ inference: false
16
+ library_name: transformers
17
+ pipeline_tag: visual-document-retrieval
18
+ ---
19
+ <br><br>
20
+
21
+ <p align="center">
22
+ <img src="https://huggingface.co/datasets/jinaai/documentation-images/resolve/main/logo.webp" alt="Jina AI: Your Search Foundation, Supercharged!" width="150px">
23
+ </p>
24
+
25
+
26
+ <p align="center">
27
+ <b>The embedding model trained by <a href="https://jina.ai/"><b>Jina AI</b></a>.</b>
28
+ </p>
29
+
30
+ # Jina Embeddings v4: Universal Embeddings for Multimodal Multilingual Retrieval
31
+
32
+
33
+ [GGUF](https://github.com/jina-ai/jina-embeddings-v4-gguf) | [Blog](https://jina.ai/news/jina-embeddings-v4-universal-embeddings-for-multimodal-multilingual-retrieval) | [Technical Report](https://arxiv.org/abs/2506.18902) | [API](https://jina.ai/embeddings)
34
+
35
+
36
+ ## Intended Usage & Model Info
37
+ `jina-embeddings-v4` is a universal embedding model for multimodal and multilingual retrieval.
38
+ The model is specially designed for complex document retrieval, including visually rich documents with charts, tables, and illustrations.
39
+
40
+
41
+ Built on [Qwen/Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct), `jina-embeddings-v4` features:
42
+
43
+ - **Unified embeddings** for text, images, and visual documents, supporting both dense (single-vector) and late-interaction (multi-vector) retrieval.
44
+ - **Multilingual support** (30+ languages) and compatibility with a wide range of domains, including technical and visually complex documents.
45
+ - **Task-specific adapters** for retrieval, text matching, and code-related tasks, which can be selected at inference time.
46
+ - **Flexible embedding size**: dense embeddings are 2048 dimensions by default but can be truncated to as low as 128 with minimal performance loss.
47
+
48
+
49
+ Summary of features:
50
+
51
+ | Feature | Jina Embeddings V4 |
52
+ |------------|------------|
53
+ | Base Model | Qwen2.5-VL-3B-Instruct |
54
+ | Supported Tasks | `retrieval`, `text-matching`, `code` |
55
+ | Model DType | BFloat 16 |
56
+ | Max Sequence Length | 32768 |
57
+ | Single-Vector Dimension | 2048 |
58
+ | Multi-Vector Dimension | 128 |
59
+ | Matryoshka dimensions | 128, 256, 512, 1024, 2048 |
60
+ | Pooling Strategy | Mean pooling |
61
+ | Attention Mechanism | FlashAttention2 |
62
+
63
+
64
+
65
+ ## Training & Evaluation
66
+
67
+ Please refer to our [technical report of jina-embeddings-v4](https://arxiv.org/abs/2506.18902) for training details and benchmarks.
68
+
69
+
70
+ ## Usage
71
+
72
+ <details>
73
+ <summary>Requirements</a></summary>
74
+
75
+ The following Python packages are required:
76
+
77
+ - `transformers>=4.52.0`
78
+ - `torch>=2.6.0`
79
+ - `peft>=0.15.2`
80
+ - `torchvision`
81
+ - `pillow`
82
+
83
+ ### Optional / Recommended
84
+ - **flash-attention**: Installing [flash-attention](https://github.com/Dao-AILab/flash-attention) is recommended for improved inference speed and efficiency, but not mandatory.
85
+ - **sentence-transformers**: If you want to use the model via the `sentence-transformers` interface, install this package as well.
86
+
87
+ </details>
88
+
89
+
90
+ <details>
91
+ <summary>via <a href="https://jina.ai/embeddings/">Jina AI Embeddings API</a></summary>
92
+
93
+
94
+ ```bash
95
+ curl https://api.jina.ai/v1/embeddings \
96
+ -H "Content-Type: application/json" \
97
+ -H "Authorization: Bearer $JINA_AI_API_TOKEN" \
98
+ -d @- <<EOFEOF
99
+ {
100
+ "model": "jina-embeddings-v4",
101
+ "task": "text-matching",
102
+ "input": [
103
+ {
104
+ "text": "غروب جميل على الشاطئ"
105
+ },
106
+ {
107
+ "text": "海滩上美丽的日落"
108
+ },
109
+ {
110
+ "text": "A beautiful sunset over the beach"
111
+ },
112
+ {
113
+ "text": "Un beau coucher de soleil sur la plage"
114
+ },
115
+ {
116
+ "text": "Ein wunderschöner Sonnenuntergang am Strand"
117
+ },
118
+ {
119
+ "text": "Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία"
120
+ },
121
+ {
122
+ "text": "समुद्र तट पर एक खूबसूरत सूर्यास्त"
123
+ },
124
+ {
125
+ "text": "Un bellissimo tramonto sulla spiaggia"
126
+ },
127
+ {
128
+ "text": "浜辺に沈む美しい夕日"
129
+ },
130
+ {
131
+ "text": "해변 위로 아름다운 일몰"
132
+ },
133
+ {
134
+ "image": "https://i.ibb.co/nQNGqL0/beach1.jpg"
135
+ },
136
+ {
137
+ "image": "https://i.ibb.co/r5w8hG8/beach2.jpg"
138
+ }
139
+ ]
140
+ }
141
+ EOFEOF
142
+ ```
143
+
144
+ </details>
145
+
146
+ <details>
147
+ <summary>via <a href="https://huggingface.co/docs/transformers/en/index">transformers</a></summary>
148
+
149
+ ```python
150
+ # !pip install transformers>=4.52.0 torch>=2.6.0 peft>=0.15.2 torchvision pillow
151
+ # !pip install
152
+ from transformers import AutoModel
153
+ import torch
154
+
155
+ # Initialize the model
156
+ model = AutoModel.from_pretrained("jinaai/jina-embeddings-v4", trust_remote_code=True, torch_dtype=torch.float16)
157
+
158
+ model.to("cuda")
159
+
160
+ # ========================
161
+ # 1. Retrieval Task
162
+ # ========================
163
+ # Configure truncate_dim, max_length (for texts), max_pixels (for images), vector_type, batch_size in the encode function if needed
164
+
165
+ # Encode query
166
+ query_embeddings = model.encode_text(
167
+ texts=["Overview of climate change impacts on coastal cities"],
168
+ task="retrieval",
169
+ prompt_name="query",
170
+ )
171
+
172
+ # Encode passage (text)
173
+ passage_embeddings = model.encode_text(
174
+ texts=[
175
+ "Climate change has led to rising sea levels, increased frequency of extreme weather events..."
176
+ ],
177
+ task="retrieval",
178
+ prompt_name="passage",
179
+ )
180
+
181
+ # Encode image/document
182
+ image_embeddings = model.encode_image(
183
+ images=["https://i.ibb.co/nQNGqL0/beach1.jpg"],
184
+ task="retrieval",
185
+ )
186
+
187
+ # ========================
188
+ # 2. Text Matching Task
189
+ # ========================
190
+ texts = [
191
+ "غروب جميل على الشاطئ", # Arabic
192
+ "海滩上美丽的日落", # Chinese
193
+ "Un beau coucher de soleil sur la plage", # French
194
+ "Ein wunderschöner Sonnenuntergang am Strand", # German
195
+ "Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία", # Greek
196
+ "समुद्र तट पर एक खूबसूरत सूर्यास्त", # Hindi
197
+ "Un bellissimo tramonto sulla spiaggia", # Italian
198
+ "浜辺に沈む美しい夕日", # Japanese
199
+ "해변 위로 아름다운 일몰", # Korean
200
+ ]
201
+
202
+ text_embeddings = model.encode_text(texts=texts, task="text-matching")
203
+
204
+ # ========================
205
+ # 3. Code Understanding Task
206
+ # ========================
207
+
208
+ # Encode query
209
+ query_embedding = model.encode_text(
210
+ texts=["Find a function that prints a greeting message to the console"],
211
+ task="code",
212
+ prompt_name="query",
213
+ )
214
+
215
+ # Encode code
216
+ code_embeddings = model.encode_text(
217
+ texts=["def hello_world():\n print('Hello, World!')"],
218
+ task="code",
219
+ prompt_name="passage",
220
+ )
221
+
222
+ # ========================
223
+ # 4. Use multivectors
224
+ # ========================
225
+
226
+ multivector_embeddings = model.encode_text(
227
+ texts=texts,
228
+ task="retrieval",
229
+ prompt_name="query",
230
+ return_multivector=True,
231
+ )
232
+
233
+ images = ["https://i.ibb.co/nQNGqL0/beach1.jpg", "https://i.ibb.co/r5w8hG8/beach2.jpg"]
234
+ multivector_image_embeddings = model.encode_image(
235
+ images=images,
236
+ task="retrieval",
237
+ return_multivector=True,
238
+ )
239
+ ```
240
+ </details>
241
+
242
+ <details>
243
+ <summary>via <a href="https://sbert.net/">sentence-transformers</a></summary>
244
+
245
+ ```python
246
+ from sentence_transformers import SentenceTransformer
247
+
248
+ # Initialize the model
249
+ model = SentenceTransformer("jinaai/jina-embeddings-v4", trust_remote_code=True)
250
+ # ========================
251
+ # 1. Retrieval Task
252
+ # ========================
253
+ # Encode query
254
+ query_embeddings = model.encode(
255
+ sentences=["Overview of climate change impacts on coastal cities"],
256
+ task="retrieval",
257
+ prompt_name="query",
258
+ )
259
+
260
+ print(f"query_embeddings.shape = {query_embeddings.shape}")
261
+
262
+ # Encode passage (text)
263
+ passage_embeddings = model.encode(
264
+ sentences=[
265
+ "Climate change has led to rising sea levels, increased frequency of extreme weather events..."
266
+ ],
267
+ task="retrieval",
268
+ prompt_name="passage",
269
+ )
270
+
271
+ print(f"passage_embeddings.shape = {passage_embeddings.shape}")
272
+
273
+ # Encode image/document
274
+ image_embeddings = model.encode(
275
+ sentences=["https://i.ibb.co/nQNGqL0/beach1.jpg"],
276
+ task="retrieval",
277
+ )
278
+
279
+ print(f"image_embeddings.shape = {image_embeddings.shape}")
280
+
281
+ # ========================
282
+ # 2. Text Matching Task
283
+ # ========================
284
+ texts = [
285
+ "غروب جميل على الشاطئ", # Arabic
286
+ "海滩上美丽的日落", # Chinese
287
+ "Un beau coucher de soleil sur la plage", # French
288
+ "Ein wunderschöner Sonnenuntergang am Strand", # German
289
+ "Ένα όμορφο ηλιοβασίλεμα πάνω από την παραλία", # Greek
290
+ "समुद्र तट पर एक खूबसूरत सूर्यास्त", # Hindi
291
+ "Un bellissimo tramonto sulla spiaggia", # Italian
292
+ "浜辺に沈む美しい夕日", # Japanese
293
+ "해변 위로 아름다운 일몰", # Korean
294
+ ]
295
+
296
+ text_embeddings = model.encode(sentences=texts, task="text-matching")
297
+
298
+ # ========================
299
+ # 3. Code Understanding Task
300
+ # ========================
301
+
302
+ # Encode query
303
+ query_embeddings = model.encode(
304
+ sentences=["Find a function that prints a greeting message to the console"],
305
+ task="code",
306
+ prompt_name="query",
307
+ )
308
+
309
+ # Encode code
310
+ code_embeddings = model.encode(
311
+ sentences=["def hello_world():\n print('Hello, World!')"],
312
+ task="code",
313
+ prompt_name="passage",
314
+ )
315
+
316
+ # ========================
317
+ # 4. Use multivectors
318
+ # ========================
319
+ # If you want to use multi-vector embeddings, please use the Hugging Face model directly.
320
+ ```
321
+ </details>
322
+
323
+ <details>
324
+ <summary>via <a href="https://github.com/vllm-project/vllm">vLLM</a></summary>
325
+
326
+ We provide separate model versions for each task (`retrieval`, `text-matching`, `code`) where specific adapter is merged into the base `Qwen2.5-VL` weights.
327
+ This modification enables native compatibility with vLLM.
328
+
329
+ Instructions and usage examples for each task are available in their respective directories:
330
+ - [jina-embeddings-v4-vllm-retrieval](https://huggingface.co/jinaai/jina-embeddings-v4-vllm-retrieval)
331
+ - [jina-embeddings-v4-vllm-text-matching](https://huggingface.co/jinaai/jina-embeddings-v4-vllm-text-matching)
332
+ - [jina-embeddings-v4-vllm-code](https://huggingface.co/jinaai/jina-embeddings-v4-vllm-code)
333
+
334
+ Please refer to the directory that matches your task for more details.
335
+
336
+ </details>
337
+
338
+
339
+ ## Jina-VDR
340
+ Alongside `jina-embeddings-v4`, we’re releasing [Jina VDR](https://github.com/jina-ai/jina-vdr), a multilingual, multi-domain benchmark for visual document retrieval. The task collection can be viewed [here](https://huggingface.co/collections/jinaai/jinavdr-visual-document-retrieval-684831c022c53b21c313b449), and evaluation instructions can be found [here](https://github.com/jina-ai/jina-vdr).
341
+
342
+
343
+ ## License
344
+
345
+ This model is licensed to download and run under [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en).
346
+
347
+
348
+ ## Contact
349
+
350
+ Join our [Discord community](https://discord.jina.ai) and chat with other community members about ideas.
351
+
352
+
353
+ ## Citation
354
+
355
+ If you find `jina-embeddings-v4` useful in your research, please cite the following paper:
356
+ ```
357
+ @misc{günther2025jinaembeddingsv4universalembeddingsmultimodal,
358
+ title={jina-embeddings-v4: Universal Embeddings for Multimodal Multilingual Retrieval},
359
+ author={Michael Günther and Saba Sturua and Mohammad Kalim Akram and Isabelle Mohr and Andrei Ungureanu and Sedigheh Eslami and Scott Martens and Bo Wang and Nan Wang and Han Xiao},
360
+ year={2025},
361
+ eprint={2506.18902},
362
+ archivePrefix={arXiv},
363
+ primaryClass={cs.AI},
364
+ url={https://arxiv.org/abs/2506.18902},
365
+ }
366
+ ```
adapters/adapter_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "jinaai/jina-embeddings-v4",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": ".*visual.*",
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": "gaussian",
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 32,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
27
+ "task_type": "FEATURE_EXTRACTION",
28
+ "trainable_token_indices": null,
29
+ "use_dora": false,
30
+ "use_rslora": false
31
+ }
adapters/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b7ab4a79daa3b4f3b5274500cc99d3dc89aa8c3419e9d79f89e366685e12e5
3
+ size 359863776
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "jinaai/jina-embeddings-v4",
3
+ "architectures": [
4
+ "JinaEmbeddingsV4Model"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_jina_embeddings_v4.JinaEmbeddingsV4Config",
8
+ "AutoModel": "modeling_jina_embeddings_v4.JinaEmbeddingsV4Model"
9
+ },
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 151643,
12
+ "eos_token_id": 151645,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "image_token_id": 151655,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 11008,
18
+ "max_position_embeddings": 128000,
19
+ "max_window_layers": 70,
20
+ "multi_vector_projector_dim": 128,
21
+ "num_attention_heads": 16,
22
+ "num_hidden_layers": 36,
23
+ "num_key_value_heads": 2,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_scaling": {
26
+ "mrope_section": [
27
+ 16,
28
+ 24,
29
+ 24
30
+ ],
31
+ "rope_type": "default",
32
+ "type": "default"
33
+ },
34
+ "rope_theta": 1000000.0,
35
+ "single_vector_pool_strategy": "mean",
36
+ "sliding_window": 32768,
37
+ "tie_word_embeddings": true,
38
+ "text_config": {
39
+ "attention_dropout": 0.0,
40
+ "bos_token_id": 151643,
41
+ "eos_token_id": 151645,
42
+ "hidden_act": "silu",
43
+ "hidden_size": 2048,
44
+ "image_token_id": null,
45
+ "initializer_range": 0.02,
46
+ "intermediate_size": 11008,
47
+ "max_position_embeddings": 128000,
48
+ "max_window_layers": 70,
49
+ "model_type": "qwen2_5_vl_text",
50
+ "num_attention_heads": 16,
51
+ "num_hidden_layers": 36,
52
+ "num_key_value_heads": 2,
53
+ "rms_norm_eps": 1e-06,
54
+ "rope_scaling": {
55
+ "mrope_section": [
56
+ 16,
57
+ 24,
58
+ 24
59
+ ],
60
+ "rope_type": "default",
61
+ "type": "default"
62
+ },
63
+ "rope_theta": 1000000.0,
64
+ "sliding_window": null,
65
+ "tie_word_embeddings": true,
66
+ "torch_dtype": "bfloat16",
67
+ "use_cache": true,
68
+ "use_sliding_window": false,
69
+ "vocab_size": 151936
70
+ },
71
+ "torch_dtype": "bfloat16",
72
+ "transformers_version": "4.52.0",
73
+ "use_cache": true,
74
+ "use_sliding_window": false,
75
+ "video_token_id": 151656,
76
+ "vision_config": {
77
+ "depth": 32,
78
+ "fullatt_block_indexes": [
79
+ 7,
80
+ 15,
81
+ 23,
82
+ 31
83
+ ],
84
+ "hidden_act": "silu",
85
+ "hidden_size": 1280,
86
+ "in_channels": 3,
87
+ "in_chans": 3,
88
+ "initializer_range": 0.02,
89
+ "intermediate_size": 3420,
90
+ "model_type": "qwen2_5_vl",
91
+ "num_heads": 16,
92
+ "out_hidden_size": 2048,
93
+ "patch_size": 14,
94
+ "spatial_merge_size": 2,
95
+ "spatial_patch_size": 14,
96
+ "temporal_patch_size": 2,
97
+ "tokens_per_second": 2,
98
+ "torch_dtype": "bfloat16",
99
+ "window_size": 112
100
+ },
101
+ "task_names": ["retrieval", "text-matching", "code"],
102
+ "matryoshka_dims": [128, 256, 512, 1024, 2048],
103
+ "_attn_implementation": "flash_attention_2",
104
+ "truncate_dim": null,
105
+ "vision_end_token_id": 151653,
106
+ "vision_start_token_id": 151652,
107
+ "vision_token_id": 151654
108
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "4.1.0",
4
+ "transformers": "4.50.0",
5
+ "pytorch": "2.6.0"
6
+ },
7
+ "prompts":{
8
+ "query":"Query: ",
9
+ "passage":"Passage: "
10
+ },
11
+ "default_prompt_name": null,
12
+ "similarity_fn_name": "cosine"
13
+ }
configuration_jina_embeddings_v4.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.models.qwen2_5_vl import Qwen2_5_VLConfig
2
+
3
+ from typing import Optional
4
+
5
+
6
+ class JinaEmbeddingsV4Config(Qwen2_5_VLConfig):
7
+ """
8
+ Configuration for the JinaEmbeddingsV4 model.
9
+ """
10
+
11
+ def __init__(
12
+ self,
13
+ single_vector_pool_strategy: str = "mean",
14
+ multi_vector_projector_dim: int = 128,
15
+ pretrained_peft_model_name_or_path: Optional[str] = None,
16
+ verbosity: int = 1,
17
+ **kwargs,
18
+ ):
19
+ super().__init__(**kwargs)
20
+ self.single_vector_pool_strategy = single_vector_pool_strategy
21
+ self.multi_vector_projector_dim = multi_vector_projector_dim
22
+ self.pretrained_peft_model_name_or_path = pretrained_peft_model_name_or_path
23
+ self.verbosity = verbosity
custom_lora_module.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import warnings
5
+ from typing import Any, Optional, Union, List
6
+
7
+ import torch
8
+ import torch.nn as nn
9
+
10
+ from peft.tuners.lora import LoraLayer
11
+
12
+ class MultiAdapterLinear(nn.Module, LoraLayer):
13
+ """
14
+ Custom LoRA module supporting multiple adapters for a linear layer.
15
+
16
+ This module extends the standard LoRA implementation to support multiple task-specific
17
+ adapters that can be dynamically selected during the forward pass. The task_label
18
+ parameter passed to the forward function determines which LoRA adapter(s) to use:
19
+ - If task_label is a string, all examples in the batch use the same adapter
20
+ - If task_label is a list of strings, each example can use a different adapter
21
+
22
+ This enables efficient multi-task inference where all task-specific LoRA adapters
23
+ are loaded in memory simultaneously and dynamically selected per example, eliminating
24
+ the need to switch adapter states between tasks and allowing optimal throughput
25
+ for mixed-task batches.
26
+
27
+ Derived from peft.tuners.lora.Linear.
28
+ """
29
+ def __init__(
30
+ self,
31
+ base_layer,
32
+ adapter_name: str,
33
+ task_names: List[str],
34
+ r: int = 0,
35
+ lora_alpha: int = 1,
36
+ lora_dropout: float = 0.0,
37
+ fan_in_fan_out: bool = False, # Set this to True if the layer to replace stores weight like (fan_in, fan_out)
38
+ is_target_conv_1d_layer: bool = False,
39
+ init_lora_weights: Union[bool, str] = True,
40
+ use_rslora: bool = False,
41
+ use_dora: bool = False,
42
+ lora_bias: bool = False,
43
+ **kwargs,
44
+ ) -> None:
45
+ super().__init__()
46
+ LoraLayer.__init__(self, base_layer, **kwargs)
47
+
48
+ self.fan_in_fan_out = fan_in_fan_out
49
+ self.task_names = task_names
50
+ self._active_adapter = adapter_name
51
+ self.update_layer(
52
+ adapter_name,
53
+ r,
54
+ lora_alpha=lora_alpha,
55
+ lora_dropout=lora_dropout,
56
+ init_lora_weights=init_lora_weights,
57
+ use_rslora=use_rslora,
58
+ use_dora=use_dora,
59
+ lora_bias=lora_bias,
60
+ )
61
+ self.is_target_conv_1d_layer = is_target_conv_1d_layer
62
+
63
+
64
+ def forward(self, x: torch.Tensor, task_label: Union[str, List[str]], *args: Any, **kwargs: Any) -> torch.Tensor:
65
+ self._check_forward_args(x, *args, **kwargs)
66
+
67
+ if self.disable_adapters:
68
+ if self.merged:
69
+ self.unmerge()
70
+ result = self.base_layer(x, *args, **kwargs)
71
+ elif self.merged:
72
+ result = self.base_layer(x, *args, **kwargs)
73
+ else:
74
+ result = self.base_layer(x, *args, **kwargs)
75
+ torch_result_dtype = result.dtype
76
+
77
+ lora_A_keys = self.lora_A.keys()
78
+ for active_adapter in self.active_adapters:
79
+ if active_adapter not in lora_A_keys:
80
+ continue
81
+
82
+ if isinstance(task_label, str):
83
+ lora_A = self.lora_A[active_adapter][task_label]
84
+ lora_B = self.lora_B[active_adapter][task_label]
85
+ dropout = self.lora_dropout[active_adapter]
86
+ scaling = self.scaling[active_adapter]
87
+ x = self._cast_input_dtype(x, lora_A.weight.dtype)
88
+ result = result + lora_B(lora_A(dropout(x))) * scaling
89
+ else:
90
+ unique_tasks = list(set(task_label))
91
+ lora_output = torch.zeros_like(result)
92
+
93
+ for task in unique_tasks:
94
+ task_indices = [i for i, t in enumerate(task_label) if t == task]
95
+ task_x = x[task_indices]
96
+
97
+ lora_A = self.lora_A[active_adapter][task]
98
+ lora_B = self.lora_B[active_adapter][task]
99
+ dropout = self.lora_dropout[active_adapter]
100
+ scaling = self.scaling[active_adapter]
101
+
102
+ task_x = self._cast_input_dtype(task_x, lora_A.weight.dtype)
103
+ task_lora_value = lora_B(lora_A(dropout(task_x))) * scaling
104
+
105
+ for i, idx in enumerate(task_indices):
106
+ lora_output[idx] = task_lora_value[i]
107
+
108
+ result = result + lora_output
109
+
110
+ result = result.to(torch_result_dtype)
111
+
112
+ return result
113
+
114
+ def __repr__(self) -> str:
115
+ rep = super().__repr__()
116
+ return "lora." + rep
117
+
118
+
119
+ def update_layer(
120
+ self,
121
+ adapter_name,
122
+ r,
123
+ lora_alpha,
124
+ lora_dropout,
125
+ init_lora_weights,
126
+ use_rslora,
127
+ use_dora: bool = False,
128
+ lora_bias: bool = False,
129
+ ):
130
+ # This code works for linear layers, override for other layer types
131
+ if r <= 0:
132
+ raise ValueError(f"`r` should be a positive integer value but the value passed is {r}")
133
+
134
+ self.r[adapter_name] = r
135
+ self.lora_alpha[adapter_name] = lora_alpha
136
+ if lora_dropout > 0.0:
137
+ lora_dropout_layer = nn.Dropout(p=lora_dropout)
138
+ else:
139
+ lora_dropout_layer = nn.Identity()
140
+
141
+ self.lora_dropout.update(nn.ModuleDict({adapter_name: lora_dropout_layer}))
142
+ # Actual trainable parameters
143
+ self.lora_A[adapter_name] = nn.ModuleDict({
144
+ task_name: nn.Linear(self.in_features, r, bias=False)
145
+ for task_name in self.task_names
146
+ })
147
+ self.lora_B[adapter_name] = nn.ModuleDict({
148
+ task_name: nn.Linear(r, self.out_features, bias=lora_bias)
149
+ for task_name in self.task_names
150
+ })
151
+ self.lora_bias[adapter_name] = lora_bias
152
+
153
+ if use_rslora:
154
+ self.scaling[adapter_name] = lora_alpha / math.sqrt(r)
155
+ else:
156
+ self.scaling[adapter_name] = lora_alpha / r
157
+
158
+ self.reset_lora_parameters(adapter_name, init_lora_weights)
159
+ self._move_adapter_to_device_of_base_layer(adapter_name)
160
+ self.use_dora[adapter_name] = False
161
+ self.set_adapter(self.active_adapters)
162
+
163
+ def reset_lora_parameters(self, adapter_name, init_lora_weights):
164
+ if init_lora_weights is False:
165
+ return
166
+ if init_lora_weights is True:
167
+ # initialize A the same way as the default for nn.Linear and B to zero
168
+ # https://github.com/microsoft/LoRA/blob/a0a92e0f26c067cf94747bdbf1ce73793fa44d19/loralib/layers.py#L124
169
+ for task_name in self.task_names:
170
+ nn.init.kaiming_uniform_(self.lora_A[adapter_name][task_name].weight, a=math.sqrt(5))
171
+ elif init_lora_weights.lower() == "gaussian":
172
+ for task_name in self.task_names:
173
+ nn.init.normal_(self.lora_A[adapter_name][task_name].weight, std=1 / self.r[adapter_name])
174
+ else:
175
+ raise ValueError(f"Unknown initialization {init_lora_weights=}")
176
+ for task_name in self.task_names:
177
+ nn.init.zeros_(self.lora_B[adapter_name][task_name].weight)
178
+ if self.lora_bias[adapter_name]:
179
+ for task_name in self.task_names:
180
+ nn.init.zeros_(self.lora_B[adapter_name][task_name].bias)
181
+
182
+
183
+ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
184
+ """
185
+ Merge the active adapter weights into the base weights
186
+ """
187
+ raise NotImplementedError("Merge operation is not supported")
188
+
189
+ def unmerge(self) -> None:
190
+ """
191
+ This method unmerges all merged adapter layers from the base weights.
192
+ """
193
+ raise NotImplementedError("Unmerge operation is not supported")
custom_st.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from io import BytesIO
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Literal, Optional, Union
6
+
7
+ import requests
8
+ import torch
9
+ from PIL import Image
10
+ from torch import nn
11
+ from transformers import AutoConfig, AutoModel, AutoProcessor
12
+
13
+
14
+ class Transformer(nn.Module):
15
+
16
+ save_in_root: bool = True
17
+
18
+ def __init__(
19
+ self,
20
+ model_name_or_path: str = "jinaai/jina-embeddings-v4",
21
+ max_seq_length: Optional[int] = None,
22
+ config_args: Optional[Dict[str, Any]] = None,
23
+ model_args: Optional[Dict[str, Any]] = None,
24
+ tokenizer_args: Optional[Dict[str, Any]] = None,
25
+ cache_dir: Optional[str] = None,
26
+ backend: Literal["torch", "onnx", "openvino"] = "torch",
27
+ **kwargs,
28
+ ) -> None:
29
+ super(Transformer, self).__init__()
30
+ if backend != "torch":
31
+ raise ValueError(
32
+ f"Backend '{backend}' is not supported, please use 'torch' instead"
33
+ )
34
+ config_kwargs = config_args or {}
35
+ model_kwargs = model_args or {}
36
+ tokenizer_kwargs = tokenizer_args or {}
37
+
38
+ self.config = AutoConfig.from_pretrained(
39
+ model_name_or_path, cache_dir=cache_dir, **config_kwargs
40
+ )
41
+ self.default_task = model_args.pop("default_task", None)
42
+ if self.default_task and self.default_task not in self.config.task_names:
43
+ raise ValueError(
44
+ f"Invalid task: {self.default_task}. Must be one of {self.config.task_names}."
45
+ )
46
+
47
+ self.model = AutoModel.from_pretrained(
48
+ model_name_or_path, config=self.config, cache_dir=cache_dir, **model_kwargs
49
+ )
50
+ self.processor = AutoProcessor.from_pretrained(
51
+ model_name_or_path,
52
+ cache_dir=cache_dir,
53
+ use_fast=True,
54
+ **tokenizer_kwargs,
55
+ )
56
+ self.max_seq_length = max_seq_length or 8192
57
+
58
+ def tokenize(
59
+ self, texts: List[Union[str, Image.Image]], padding: Union[str, bool] = True
60
+ ) -> Dict[str, torch.Tensor]:
61
+ encoding = {}
62
+ text_indices = []
63
+ image_indices = []
64
+ for i, text in enumerate(texts):
65
+ if isinstance(text, str):
66
+ # Remove Query: or Passage: prefixes when checking for URLs or file paths
67
+ clean_text = text
68
+ if text.startswith("Query: "):
69
+ clean_text = text[len("Query: ") :]
70
+ elif text.startswith("Passage: "):
71
+ clean_text = text[len("Passage: ") :]
72
+
73
+ if clean_text.startswith("http"):
74
+ response = requests.get(clean_text)
75
+ texts[i] = Image.open(BytesIO(response.content)).convert("RGB")
76
+ image_indices.append(i)
77
+ else:
78
+ try:
79
+ if Path(clean_text).is_file():
80
+ texts[i] = Image.open(clean_text).convert("RGB")
81
+ image_indices.append(i)
82
+ else:
83
+ text_indices.append(i)
84
+ except Exception as e:
85
+ text_indices.append(i)
86
+ elif isinstance(text, Image.Image):
87
+ image_indices.append(i)
88
+ else:
89
+ raise ValueError(f"Invalid input type: {type(text)}")
90
+ if text_indices:
91
+ _texts = [texts[i] for i in text_indices]
92
+ text_features = self.processor.process_texts(
93
+ _texts, max_length=self.max_seq_length
94
+ )
95
+ for key, value in text_features.items():
96
+ encoding[f"text_{key}"] = value
97
+ encoding["text_indices"] = text_indices
98
+
99
+ if image_indices:
100
+ _images = [texts[i] for i in image_indices]
101
+ img_features = self.processor.process_images(_images)
102
+ for key, value in img_features.items():
103
+ encoding[f"image_{key}"] = value
104
+ encoding["image_indices"] = image_indices
105
+
106
+ return encoding
107
+
108
+ def forward(
109
+ self,
110
+ features: Dict[str, torch.Tensor],
111
+ task: Optional[str] = None,
112
+ truncate_dim: Optional[int] = None,
113
+ ) -> Dict[str, torch.Tensor]:
114
+ self.model.eval()
115
+
116
+ if task is None:
117
+ if self.default_task is None:
118
+ raise ValueError(
119
+ "Task must be specified before encoding data. You can set it either during "
120
+ "loading the model (e.g., model_kwargs={'default_task': 'retrieval'}) or "
121
+ "pass it as an argument to the encode method (e.g., model.encode(texts, task='retrieval'))."
122
+ )
123
+ task = self.default_task
124
+ else:
125
+ if task not in self.config.task_names:
126
+ raise ValueError(
127
+ f"Invalid task: {task}. Must be one of {self.config.task_names}."
128
+ )
129
+
130
+ device = self.model.device.type
131
+ all_embeddings = []
132
+
133
+ with torch.no_grad():
134
+ if any(k.startswith("text_") for k in features.keys()):
135
+ text_batch = {
136
+ k[len("text_") :]: v.to(device)
137
+ for k, v in features.items()
138
+ if k.startswith("text_") and k != "text_indices"
139
+ }
140
+ text_indices = features.get("text_indices", [])
141
+ with torch.autocast(device_type=device, dtype=torch.bfloat16):
142
+ text_embeddings = self.model(
143
+ **text_batch, task_label=task
144
+ ).single_vec_emb
145
+ if truncate_dim:
146
+ text_embeddings = text_embeddings[:, :truncate_dim]
147
+ text_embeddings = torch.nn.functional.normalize(
148
+ text_embeddings, p=2, dim=-1
149
+ )
150
+ for i, embedding in enumerate(text_embeddings):
151
+ all_embeddings.append((text_indices[i], embedding))
152
+
153
+ if any(k.startswith("image_") for k in features.keys()):
154
+ image_batch = {
155
+ k[len("image_") :]: v.to(device)
156
+ for k, v in features.items()
157
+ if k.startswith("image_") and k != "image_indices"
158
+ }
159
+ image_indices = features.get("image_indices", [])
160
+
161
+ with torch.autocast(device_type=device, dtype=torch.bfloat16):
162
+ img_embeddings = self.model(
163
+ **image_batch, task_label=task
164
+ ).single_vec_emb
165
+ if truncate_dim:
166
+ img_embeddings = img_embeddings[:, :truncate_dim]
167
+ img_embeddings = torch.nn.functional.normalize(
168
+ img_embeddings, p=2, dim=-1
169
+ )
170
+
171
+ for i, embedding in enumerate(img_embeddings):
172
+ all_embeddings.append((image_indices[i], embedding))
173
+
174
+ if not all_embeddings:
175
+ raise RuntimeError("No embeddings were generated")
176
+
177
+ all_embeddings.sort(key=lambda x: x[0]) # sort by original index
178
+ combined_embeddings = torch.stack([emb for _, emb in all_embeddings])
179
+ features["sentence_embedding"] = combined_embeddings
180
+
181
+ return features
182
+
183
+ @classmethod
184
+ def load(cls, input_path: str) -> "Transformer":
185
+ return cls(model_name_or_path=input_path)
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.50.0.dev0"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abb244162956ec2f26d944b6c10cbb96afe211d2aff908b8b2f498ec27a9100b
3
+ size 4997750728
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d5252a7ede6469220b0e7386af53fea9a45fa299a1d2af6fe68cb29897de3e3
3
+ size 2512111904
model.safetensors.index.json ADDED
@@ -0,0 +1,833 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 7513966848
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
7
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
8
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
18
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
25
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
28
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
30
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
32
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
37
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
39
+ "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
40
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
42
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
44
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
46
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
48
+ "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
49
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
50
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
52
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
54
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
58
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
60
+ "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
61
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
64
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
66
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
72
+ "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
73
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
76
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
78
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
80
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
83
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
84
+ "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
85
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
88
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
90
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
91
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
92
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
94
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
96
+ "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
97
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
98
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
100
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
101
+ "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
102
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
104
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
105
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
108
+ "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
109
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
110
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
112
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
114
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
115
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
116
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
117
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
118
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
119
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
120
+ "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
121
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
122
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
124
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
125
+ "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
126
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
127
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
128
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
129
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
130
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
131
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
132
+ "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
133
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
134
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
135
+ "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
136
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
137
+ "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
138
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
139
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
140
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
141
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
144
+ "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
145
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
146
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
148
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
149
+ "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
150
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
152
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
153
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
154
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
155
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
156
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
157
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
158
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
159
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
160
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
161
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
162
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
163
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
164
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
166
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
167
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
168
+ "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
169
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
171
+ "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
172
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
174
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
176
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
178
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
179
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
180
+ "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
181
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
182
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
184
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
185
+ "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
186
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
188
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
189
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
190
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
192
+ "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
193
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
196
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
198
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
201
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
202
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
204
+ "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
205
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
207
+ "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
208
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
210
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
211
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
212
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
215
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
216
+ "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
217
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
218
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
220
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
222
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
223
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
224
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
225
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
226
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
228
+ "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
229
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
232
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
233
+ "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
234
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
236
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
237
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
238
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
241
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
244
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
245
+ "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
246
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
248
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
250
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
251
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
252
+ "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
253
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
254
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
255
+ "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
256
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
258
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
259
+ "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
260
+ "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
261
+ "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
262
+ "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
263
+ "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
264
+ "model.layers.28.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
265
+ "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
266
+ "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
267
+ "model.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
268
+ "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
269
+ "model.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
270
+ "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
271
+ "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
272
+ "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
273
+ "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
274
+ "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
275
+ "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
276
+ "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
277
+ "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
278
+ "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
279
+ "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
280
+ "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
281
+ "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
282
+ "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
283
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
284
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
285
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
286
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
287
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
288
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
289
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
290
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
291
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
292
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
293
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
294
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
295
+ "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
296
+ "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
297
+ "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
298
+ "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
299
+ "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
300
+ "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
301
+ "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
302
+ "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
303
+ "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
304
+ "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
305
+ "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
306
+ "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
307
+ "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
308
+ "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
309
+ "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
310
+ "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
311
+ "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
312
+ "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
313
+ "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
314
+ "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
315
+ "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
316
+ "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
317
+ "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
318
+ "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
319
+ "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
320
+ "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
321
+ "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
322
+ "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
323
+ "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
324
+ "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
325
+ "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
326
+ "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
327
+ "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
328
+ "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
329
+ "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
330
+ "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
331
+ "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
332
+ "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
333
+ "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
334
+ "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
335
+ "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
336
+ "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
337
+ "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
338
+ "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
339
+ "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
340
+ "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
341
+ "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
342
+ "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
343
+ "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
344
+ "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
345
+ "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
346
+ "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
347
+ "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
348
+ "model.layers.34.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
349
+ "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
350
+ "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
351
+ "model.layers.34.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
352
+ "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
353
+ "model.layers.34.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
354
+ "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
355
+ "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
356
+ "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
357
+ "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
358
+ "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
359
+ "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
360
+ "model.layers.35.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
361
+ "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
362
+ "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
363
+ "model.layers.35.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
364
+ "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
365
+ "model.layers.35.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
366
+ "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
367
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
368
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
369
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
370
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
371
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
372
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
373
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
374
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
375
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
376
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
377
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
378
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
379
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
380
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
381
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
382
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
383
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
384
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
385
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
386
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
387
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
388
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
389
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
390
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
391
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
392
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
393
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
394
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
395
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
396
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
397
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
398
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
399
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
400
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
401
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
402
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
403
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
404
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
405
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
406
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
407
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
408
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
409
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
410
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
411
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
412
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
413
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
414
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
415
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
416
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
417
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
418
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
419
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
420
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
421
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
422
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
423
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
424
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
425
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
426
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
427
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
428
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
429
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
430
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
431
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
432
+ "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
433
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
434
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
435
+ "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
436
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
437
+ "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
438
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
439
+ "model.norm.weight": "model-00002-of-00002.safetensors",
440
+ "multi_vector_projector.bias": "model-00002-of-00002.safetensors",
441
+ "multi_vector_projector.weight": "model-00002-of-00002.safetensors",
442
+ "visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
443
+ "visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
444
+ "visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
445
+ "visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
446
+ "visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
447
+ "visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
448
+ "visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
449
+ "visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
450
+ "visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
451
+ "visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
452
+ "visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
453
+ "visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
454
+ "visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
455
+ "visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
456
+ "visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
457
+ "visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
458
+ "visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
459
+ "visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
460
+ "visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
461
+ "visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
462
+ "visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
463
+ "visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
464
+ "visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
465
+ "visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
466
+ "visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
467
+ "visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
468
+ "visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
469
+ "visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
470
+ "visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
471
+ "visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
472
+ "visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
473
+ "visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
474
+ "visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
475
+ "visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
476
+ "visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
477
+ "visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
478
+ "visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
479
+ "visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
480
+ "visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
481
+ "visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
482
+ "visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
483
+ "visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
484
+ "visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
485
+ "visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
486
+ "visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
487
+ "visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
488
+ "visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
489
+ "visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
490
+ "visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
491
+ "visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
492
+ "visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
493
+ "visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
494
+ "visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
495
+ "visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
496
+ "visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
497
+ "visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
498
+ "visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
499
+ "visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
500
+ "visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
501
+ "visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
502
+ "visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
503
+ "visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
504
+ "visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
505
+ "visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
506
+ "visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
507
+ "visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
508
+ "visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
509
+ "visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
510
+ "visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
511
+ "visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
512
+ "visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
513
+ "visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
514
+ "visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
515
+ "visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
516
+ "visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
517
+ "visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
518
+ "visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
519
+ "visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
520
+ "visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
521
+ "visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
522
+ "visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
523
+ "visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
524
+ "visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
525
+ "visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
526
+ "visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
527
+ "visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
528
+ "visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
529
+ "visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
530
+ "visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
531
+ "visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
532
+ "visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
533
+ "visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
534
+ "visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
535
+ "visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
536
+ "visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
537
+ "visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
538
+ "visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
539
+ "visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
540
+ "visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
541
+ "visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
542
+ "visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
543
+ "visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
544
+ "visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
545
+ "visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
546
+ "visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
547
+ "visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
548
+ "visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
549
+ "visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
550
+ "visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
551
+ "visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
552
+ "visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
553
+ "visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
554
+ "visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
555
+ "visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
556
+ "visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
557
+ "visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
558
+ "visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
559
+ "visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
560
+ "visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
561
+ "visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
562
+ "visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
563
+ "visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
564
+ "visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
565
+ "visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
566
+ "visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
567
+ "visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
568
+ "visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
569
+ "visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
570
+ "visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
571
+ "visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
572
+ "visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
573
+ "visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
574
+ "visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
575
+ "visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
576
+ "visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
577
+ "visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
578
+ "visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
579
+ "visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
580
+ "visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
581
+ "visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
582
+ "visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
583
+ "visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
584
+ "visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
585
+ "visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
586
+ "visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
587
+ "visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
588
+ "visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
589
+ "visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
590
+ "visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
591
+ "visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
592
+ "visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
593
+ "visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
594
+ "visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
595
+ "visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
596
+ "visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
597
+ "visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
598
+ "visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
599
+ "visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
600
+ "visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
601
+ "visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
602
+ "visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
603
+ "visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
604
+ "visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
605
+ "visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
606
+ "visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
607
+ "visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
608
+ "visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
609
+ "visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
610
+ "visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
611
+ "visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
612
+ "visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
613
+ "visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
614
+ "visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
615
+ "visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
616
+ "visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
617
+ "visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
618
+ "visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
619
+ "visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
620
+ "visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
621
+ "visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
622
+ "visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
623
+ "visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
624
+ "visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
625
+ "visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
626
+ "visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
627
+ "visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
628
+ "visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
629
+ "visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
630
+ "visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
631
+ "visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
632
+ "visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
633
+ "visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
634
+ "visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
635
+ "visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
636
+ "visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
637
+ "visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
638
+ "visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
639
+ "visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
640
+ "visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
641
+ "visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
642
+ "visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
643
+ "visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
644
+ "visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
645
+ "visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
646
+ "visual.blocks.24.attn.proj.bias": "model-00001-of-00002.safetensors",
647
+ "visual.blocks.24.attn.proj.weight": "model-00001-of-00002.safetensors",
648
+ "visual.blocks.24.attn.qkv.bias": "model-00001-of-00002.safetensors",
649
+ "visual.blocks.24.attn.qkv.weight": "model-00001-of-00002.safetensors",
650
+ "visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
651
+ "visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
652
+ "visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
653
+ "visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
654
+ "visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
655
+ "visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
656
+ "visual.blocks.24.norm1.weight": "model-00001-of-00002.safetensors",
657
+ "visual.blocks.24.norm2.weight": "model-00001-of-00002.safetensors",
658
+ "visual.blocks.25.attn.proj.bias": "model-00001-of-00002.safetensors",
659
+ "visual.blocks.25.attn.proj.weight": "model-00001-of-00002.safetensors",
660
+ "visual.blocks.25.attn.qkv.bias": "model-00001-of-00002.safetensors",
661
+ "visual.blocks.25.attn.qkv.weight": "model-00001-of-00002.safetensors",
662
+ "visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
663
+ "visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
664
+ "visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
665
+ "visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
666
+ "visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
667
+ "visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
668
+ "visual.blocks.25.norm1.weight": "model-00001-of-00002.safetensors",
669
+ "visual.blocks.25.norm2.weight": "model-00001-of-00002.safetensors",
670
+ "visual.blocks.26.attn.proj.bias": "model-00001-of-00002.safetensors",
671
+ "visual.blocks.26.attn.proj.weight": "model-00001-of-00002.safetensors",
672
+ "visual.blocks.26.attn.qkv.bias": "model-00001-of-00002.safetensors",
673
+ "visual.blocks.26.attn.qkv.weight": "model-00001-of-00002.safetensors",
674
+ "visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
675
+ "visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
676
+ "visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
677
+ "visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
678
+ "visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
679
+ "visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
680
+ "visual.blocks.26.norm1.weight": "model-00001-of-00002.safetensors",
681
+ "visual.blocks.26.norm2.weight": "model-00001-of-00002.safetensors",
682
+ "visual.blocks.27.attn.proj.bias": "model-00001-of-00002.safetensors",
683
+ "visual.blocks.27.attn.proj.weight": "model-00001-of-00002.safetensors",
684
+ "visual.blocks.27.attn.qkv.bias": "model-00001-of-00002.safetensors",
685
+ "visual.blocks.27.attn.qkv.weight": "model-00001-of-00002.safetensors",
686
+ "visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
687
+ "visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
688
+ "visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
689
+ "visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
690
+ "visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
691
+ "visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
692
+ "visual.blocks.27.norm1.weight": "model-00001-of-00002.safetensors",
693
+ "visual.blocks.27.norm2.weight": "model-00001-of-00002.safetensors",
694
+ "visual.blocks.28.attn.proj.bias": "model-00001-of-00002.safetensors",
695
+ "visual.blocks.28.attn.proj.weight": "model-00001-of-00002.safetensors",
696
+ "visual.blocks.28.attn.qkv.bias": "model-00001-of-00002.safetensors",
697
+ "visual.blocks.28.attn.qkv.weight": "model-00001-of-00002.safetensors",
698
+ "visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
699
+ "visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
700
+ "visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
701
+ "visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
702
+ "visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
703
+ "visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
704
+ "visual.blocks.28.norm1.weight": "model-00001-of-00002.safetensors",
705
+ "visual.blocks.28.norm2.weight": "model-00001-of-00002.safetensors",
706
+ "visual.blocks.29.attn.proj.bias": "model-00001-of-00002.safetensors",
707
+ "visual.blocks.29.attn.proj.weight": "model-00001-of-00002.safetensors",
708
+ "visual.blocks.29.attn.qkv.bias": "model-00001-of-00002.safetensors",
709
+ "visual.blocks.29.attn.qkv.weight": "model-00001-of-00002.safetensors",
710
+ "visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
711
+ "visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
712
+ "visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
713
+ "visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
714
+ "visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
715
+ "visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
716
+ "visual.blocks.29.norm1.weight": "model-00001-of-00002.safetensors",
717
+ "visual.blocks.29.norm2.weight": "model-00001-of-00002.safetensors",
718
+ "visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
719
+ "visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
720
+ "visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
721
+ "visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
722
+ "visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
723
+ "visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
724
+ "visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
725
+ "visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
726
+ "visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
727
+ "visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
728
+ "visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
729
+ "visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
730
+ "visual.blocks.30.attn.proj.bias": "model-00001-of-00002.safetensors",
731
+ "visual.blocks.30.attn.proj.weight": "model-00001-of-00002.safetensors",
732
+ "visual.blocks.30.attn.qkv.bias": "model-00001-of-00002.safetensors",
733
+ "visual.blocks.30.attn.qkv.weight": "model-00001-of-00002.safetensors",
734
+ "visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
735
+ "visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
736
+ "visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
737
+ "visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
738
+ "visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
739
+ "visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
740
+ "visual.blocks.30.norm1.weight": "model-00001-of-00002.safetensors",
741
+ "visual.blocks.30.norm2.weight": "model-00001-of-00002.safetensors",
742
+ "visual.blocks.31.attn.proj.bias": "model-00001-of-00002.safetensors",
743
+ "visual.blocks.31.attn.proj.weight": "model-00001-of-00002.safetensors",
744
+ "visual.blocks.31.attn.qkv.bias": "model-00001-of-00002.safetensors",
745
+ "visual.blocks.31.attn.qkv.weight": "model-00001-of-00002.safetensors",
746
+ "visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
747
+ "visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
748
+ "visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
749
+ "visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
750
+ "visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
751
+ "visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
752
+ "visual.blocks.31.norm1.weight": "model-00001-of-00002.safetensors",
753
+ "visual.blocks.31.norm2.weight": "model-00001-of-00002.safetensors",
754
+ "visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
755
+ "visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
756
+ "visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
757
+ "visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
758
+ "visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
759
+ "visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
760
+ "visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
761
+ "visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
762
+ "visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
763
+ "visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
764
+ "visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
765
+ "visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
766
+ "visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
767
+ "visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
768
+ "visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
769
+ "visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
770
+ "visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
771
+ "visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
772
+ "visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
773
+ "visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
774
+ "visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
775
+ "visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
776
+ "visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
777
+ "visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
778
+ "visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
779
+ "visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
780
+ "visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
781
+ "visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
782
+ "visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
783
+ "visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
784
+ "visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
785
+ "visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
786
+ "visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
787
+ "visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
788
+ "visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
789
+ "visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
790
+ "visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
791
+ "visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
792
+ "visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
793
+ "visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
794
+ "visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
795
+ "visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
796
+ "visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
797
+ "visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
798
+ "visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
799
+ "visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
800
+ "visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
801
+ "visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
802
+ "visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
803
+ "visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
804
+ "visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
805
+ "visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
806
+ "visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
807
+ "visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
808
+ "visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
809
+ "visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
810
+ "visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
811
+ "visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
812
+ "visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
813
+ "visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
814
+ "visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
815
+ "visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
816
+ "visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
817
+ "visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
818
+ "visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00002.safetensors",
819
+ "visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
820
+ "visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00002.safetensors",
821
+ "visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
822
+ "visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00002.safetensors",
823
+ "visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
824
+ "visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
825
+ "visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
826
+ "visual.merger.ln_q.weight": "model-00001-of-00002.safetensors",
827
+ "visual.merger.mlp.0.bias": "model-00001-of-00002.safetensors",
828
+ "visual.merger.mlp.0.weight": "model-00001-of-00002.safetensors",
829
+ "visual.merger.mlp.2.bias": "model-00001-of-00002.safetensors",
830
+ "visual.merger.mlp.2.weight": "model-00001-of-00002.safetensors",
831
+ "visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors"
832
+ }
833
+ }
modeling_jina_embeddings_v4.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Jina Embeddings V4 Model implementation was inspired by the ColPali codebase:
2
+ # https://github.com/illuin-tech/colpali
3
+
4
+ import os
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from functools import partial
8
+ from io import BytesIO
9
+ from typing import Any, Callable, ClassVar, Dict, List, Optional, Union, cast
10
+
11
+ import numpy as np
12
+ import requests
13
+ import torch
14
+ from huggingface_hub import snapshot_download
15
+ from peft import LoraConfig, PeftModel
16
+ from PIL import Image
17
+ from torch import nn
18
+ from torch.utils.data import DataLoader
19
+ from tqdm import tqdm
20
+ from transformers import BatchFeature
21
+ from transformers.utils import is_flash_attn_2_available
22
+
23
+ from .configuration_jina_embeddings_v4 import JinaEmbeddingsV4Config
24
+ from .custom_lora_module import MultiAdapterLinear
25
+ from .qwen2_5_vl import Qwen2_5_VLForConditionalGeneration, Qwen2_5_VLProcessor
26
+
27
+
28
+ class PromptType(str, Enum):
29
+ query = "query"
30
+ passage = "passage"
31
+
32
+
33
+ PREFIX_DICT = {"query": "Query", "passage": "Passage"}
34
+
35
+
36
+ class JinaEmbeddingsV4Processor(Qwen2_5_VLProcessor):
37
+ def __init__(self, *args, **kwargs) -> None:
38
+ Qwen2_5_VLProcessor.__init__(self, *args, **kwargs)
39
+ self.assistant_prefix_len = 58
40
+ self.text_max_length = 32768
41
+
42
+ def process_images(
43
+ self,
44
+ images: Union[List[Image.Image], List[List[Image.Image]]],
45
+ ) -> BatchFeature:
46
+
47
+ if isinstance(images[0], list):
48
+ images = cast(List[List[Image.Image]], images)
49
+ text_doc = []
50
+ for i in range(len(images)):
51
+ conversation = [
52
+ {"role": "user", "content": [{"type": "image"}] * len(images[i])}
53
+ ]
54
+ template = self.apply_chat_template(
55
+ conversation, add_generation_prompt=False
56
+ )
57
+ text_doc.append(template[self.assistant_prefix_len :])
58
+
59
+ else:
60
+ images = cast(List[Image.Image], images)
61
+ text_doc = [
62
+ "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>Describe the image.<|im_end|>\n"
63
+ ] * len(images)
64
+
65
+ # The following code is a hack to make sure the scatter in DDP is done correctly when training on multiple GPUs
66
+ batch_doc = self(text=text_doc, images=images, padding="longest", return_tensors="pt") # type: ignore
67
+ # Separate pixel_values for each image
68
+ offsets = batch_doc["image_grid_thw"][:, 1] * batch_doc["image_grid_thw"][:, 2]
69
+ # Pad pixel_values to the same length to be able to make it into a tensor
70
+ pixel_values = torch.split(batch_doc["pixel_values"], offsets.tolist())
71
+
72
+ max_length = max([len(pv) for pv in pixel_values])
73
+
74
+ pixel_values = [
75
+ torch.cat(
76
+ [
77
+ pv,
78
+ torch.zeros(
79
+ (max_length - len(pv), pv.shape[1]),
80
+ dtype=pv.dtype,
81
+ device=pv.device,
82
+ ),
83
+ ]
84
+ )
85
+ for pv in pixel_values
86
+ ]
87
+
88
+ batch_doc["pixel_values"] = torch.stack(pixel_values)
89
+ return batch_doc
90
+
91
+ def process_texts(
92
+ self,
93
+ texts: List[str],
94
+ max_length: Optional[int] = None,
95
+ prefix: Optional[str] = None,
96
+ padding: Optional[str] = None,
97
+ ) -> BatchFeature:
98
+
99
+ max_length = (
100
+ self.text_max_length
101
+ if max_length is None
102
+ else min(max_length, self.text_max_length)
103
+ )
104
+ padded_texts: List[str] = []
105
+
106
+ for text in texts:
107
+ if prefix:
108
+ text = f"{prefix}: {text}"
109
+ padded_texts.append(text)
110
+
111
+ text_batch = self(
112
+ text=padded_texts,
113
+ return_tensors="pt",
114
+ padding=padding or "longest",
115
+ max_length=max_length,
116
+ truncation=True,
117
+ )
118
+
119
+ return text_batch
120
+
121
+
122
+ @dataclass
123
+ class JinaEmbeddingsV4ModelOutput:
124
+ """
125
+ Base class for the Hybrid Model outputs.
126
+ Args:
127
+ vlm_last_hidden_states (torch.Tensor, optional): Last hidden states of the VLM.
128
+ single_vec_emb (torch.Tensor, optional): Single-vector embeddings.
129
+ multi_vec_emb (torch.Tensor, optional): Multi-vector embeddings.
130
+ """
131
+
132
+ vlm_last_hidden_states: Optional[torch.Tensor] = None
133
+ single_vec_emb: Optional[torch.Tensor] = None
134
+ multi_vec_emb: Optional[torch.Tensor] = None
135
+
136
+
137
+ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
138
+ config_class = JinaEmbeddingsV4Config
139
+ main_input_name: ClassVar[str] = "doc_input_ids"
140
+
141
+ def __init__(self, config: JinaEmbeddingsV4Config):
142
+ Qwen2_5_VLForConditionalGeneration.__init__(self, config)
143
+ self._init_projection_layer(config)
144
+ self.post_init()
145
+ self.processor = JinaEmbeddingsV4Processor.from_pretrained(
146
+ self.name_or_path, trust_remote_code=True, use_fast=True
147
+ )
148
+ self.multi_vector_projector_dim = config.multi_vector_projector_dim
149
+ self.verbosity = config.verbosity
150
+ self._task = None
151
+
152
+ @property
153
+ def task(self) -> Optional[str]:
154
+ """Get the current task set for the model."""
155
+ return self._task
156
+
157
+ @task.setter
158
+ def task(self, task: str):
159
+ """
160
+ Set the task for the model.
161
+
162
+ Args:
163
+ task (str): The task name. Must be one of ['retrieval', 'text-matching', 'code']
164
+ """
165
+ if task not in self.config.task_names:
166
+ raise ValueError(
167
+ f"Invalid task: {task}. Must be one of {self.config.task_names}."
168
+ )
169
+ self._task = task
170
+
171
+ def get_last_hidden_states(
172
+ self,
173
+ task_label: Union[str, List[str]],
174
+ input_ids: torch.LongTensor,
175
+ attention_mask: torch.Tensor,
176
+ **kwargs,
177
+ ) -> torch.Tensor:
178
+ if "pixel_values" in kwargs:
179
+ offsets = kwargs["image_grid_thw"][:, 1] * kwargs["image_grid_thw"][:, 2]
180
+ kwargs["pixel_values"] = torch.cat(
181
+ [pv[:o] for pv, o in zip(kwargs["pixel_values"], offsets)], dim=0
182
+ )
183
+ position_ids, rope_deltas = self.model.get_rope_index(
184
+ input_ids=input_ids,
185
+ image_grid_thw=kwargs.get("image_grid_thw", None),
186
+ attention_mask=attention_mask,
187
+ )
188
+
189
+ kwargs["output_hidden_states"] = True
190
+ outputs = super().forward(
191
+ task_label=task_label,
192
+ input_ids=input_ids,
193
+ attention_mask=attention_mask,
194
+ **kwargs,
195
+ position_ids=position_ids,
196
+ rope_deltas=rope_deltas,
197
+ use_cache=False,
198
+ )
199
+
200
+ hidden_states = outputs.hidden_states
201
+ if not hidden_states:
202
+ raise ValueError("Hidden states not found in model output")
203
+
204
+ return hidden_states[-1]
205
+
206
+ def _init_projection_layer(self, config) -> None:
207
+ """
208
+ Initializes projection layers.
209
+ """
210
+ self.config.multi_vector_projector_dim = config.multi_vector_projector_dim
211
+
212
+ self.multi_vector_projector = nn.Linear(
213
+ in_features=self.config.text_config.hidden_size,
214
+ out_features=self.config.multi_vector_projector_dim,
215
+ )
216
+
217
+ def get_single_vector_embeddings(
218
+ self,
219
+ hidden_states: torch.Tensor,
220
+ attention_mask: torch.Tensor,
221
+ input_ids: Optional[torch.LongTensor] = None,
222
+ ) -> torch.Tensor:
223
+ """
224
+ Get the single-vector embeddings from the hidden states.
225
+ """
226
+ if self._input_has_image(input_ids[0]): # got document image
227
+ img_start_positions = torch.where(
228
+ input_ids == self.config.vision_start_token_id
229
+ )[1]
230
+ img_end_positions = torch.where(
231
+ input_ids == self.config.vision_end_token_id
232
+ )[1]
233
+
234
+ batch_size, seq_len = input_ids.shape
235
+ position_indices = torch.arange(seq_len, device=input_ids.device).expand(
236
+ batch_size, -1
237
+ )
238
+ image_mask = (position_indices >= img_start_positions.unsqueeze(1)) & (
239
+ position_indices <= img_end_positions.unsqueeze(1)
240
+ )
241
+
242
+ masked_hidden_states = hidden_states * image_mask.unsqueeze(-1)
243
+ pooled_output = masked_hidden_states.sum(dim=1) / image_mask.sum(
244
+ dim=1, keepdim=True
245
+ )
246
+ else: # got query text
247
+ pooled_output = torch.sum(
248
+ hidden_states * attention_mask.unsqueeze(-1), dim=1
249
+ ) / torch.sum(attention_mask, dim=1, keepdim=True)
250
+
251
+ return torch.nn.functional.normalize(pooled_output, dim=-1)
252
+
253
+ def get_multi_vector_embeddings(
254
+ self,
255
+ task_label: Union[str, List[str]],
256
+ hidden_states: torch.Tensor,
257
+ attention_mask: torch.Tensor,
258
+ ) -> torch.Tensor:
259
+ """
260
+ Project the hidden states to multi-vector embeddings.
261
+ """
262
+ multi_vec_emb = self.multi_vector_projector(
263
+ hidden_states, task_label=task_label
264
+ )
265
+ multi_vec_emb = torch.nn.functional.normalize(multi_vec_emb, dim=-1)
266
+ return multi_vec_emb * attention_mask.unsqueeze(-1)
267
+
268
+ def _input_has_image(self, input_ids):
269
+ return self.config.vision_start_token_id in input_ids
270
+
271
+ def forward(
272
+ self,
273
+ task_label: Union[str, List[str]],
274
+ input_ids: torch.LongTensor,
275
+ attention_mask: torch.Tensor,
276
+ output_vlm_last_hidden_states: bool = False,
277
+ **kwargs,
278
+ ) -> JinaEmbeddingsV4ModelOutput:
279
+ """
280
+ Forward pass through the model. Returns both single-vector and multi-vector embeddings.
281
+ Args:
282
+ input_ids (torch.Tensor): The input tokens tensor.
283
+ attention_mask (torch.Tensor): The attention mask tensor.
284
+ Returns:
285
+ JinaEmbeddingsV4ModelOutput:
286
+ vlm_last_hidden_states (torch.Tensor, optional): Last hidden states of the VLM.
287
+ single_vec_emb (torch.Tensor, optional): Single-vector embeddings.
288
+ multi_vec_emb (torch.Tensor, optional): Multi-vector embeddings.
289
+ """
290
+ # Forward pass through the VLM
291
+ hidden_states = self.get_last_hidden_states(
292
+ input_ids=input_ids,
293
+ attention_mask=attention_mask,
294
+ task_label=task_label,
295
+ **kwargs,
296
+ ) # (batch_size, seq_length, hidden_size)
297
+ # Compute the embeddings
298
+ single_vec_emb = self.get_single_vector_embeddings(
299
+ hidden_states=hidden_states,
300
+ attention_mask=attention_mask,
301
+ input_ids=input_ids,
302
+ )
303
+ multi_vec_emb = self.get_multi_vector_embeddings(
304
+ hidden_states=hidden_states,
305
+ attention_mask=attention_mask,
306
+ task_label=task_label,
307
+ )
308
+
309
+ return JinaEmbeddingsV4ModelOutput(
310
+ vlm_last_hidden_states=(
311
+ hidden_states if output_vlm_last_hidden_states else None
312
+ ),
313
+ single_vec_emb=single_vec_emb,
314
+ multi_vec_emb=multi_vec_emb,
315
+ )
316
+
317
+ def _process_batches(
318
+ self,
319
+ data: List[Union[str, Image.Image]],
320
+ task_label: Union[str, List[str]],
321
+ processor_fn: Callable,
322
+ desc: str,
323
+ return_multivector: bool = False,
324
+ return_numpy: bool = False,
325
+ batch_size: int = 32,
326
+ truncate_dim: Optional[int] = None,
327
+ ) -> Union[np.ndarray, List[torch.Tensor]]:
328
+ dataloader = DataLoader(
329
+ dataset=data,
330
+ batch_size=batch_size,
331
+ shuffle=False,
332
+ collate_fn=processor_fn,
333
+ )
334
+ if return_multivector and len(data) > 1:
335
+ assert (
336
+ not return_numpy
337
+ ), "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
338
+ results = []
339
+ self.eval()
340
+ for batch in tqdm(dataloader, desc=desc, disable=self.verbosity == 0):
341
+ with torch.no_grad():
342
+ batch = {k: v.to(self.device) for k, v in batch.items()}
343
+ with torch.autocast(
344
+ device_type=torch.device(self.device).type, dtype=torch.bfloat16
345
+ ):
346
+ embeddings = self(**batch, task_label=task_label)
347
+ if not return_multivector:
348
+ embeddings = embeddings.single_vec_emb
349
+ if truncate_dim is not None:
350
+ embeddings = embeddings[:, :truncate_dim]
351
+ embeddings = torch.nn.functional.normalize(
352
+ embeddings, p=2, dim=-1
353
+ )
354
+ else:
355
+ embeddings = embeddings.multi_vec_emb
356
+
357
+ if return_multivector and not return_numpy:
358
+ valid_tokens = batch["attention_mask"].bool()
359
+ embeddings = [
360
+ emb[mask] for emb, mask in zip(embeddings, valid_tokens)
361
+ ]
362
+ results.append(embeddings)
363
+ else:
364
+ results.append(
365
+ embeddings.cpu()
366
+ if return_numpy
367
+ else list(torch.unbind(embeddings))
368
+ )
369
+ if return_numpy:
370
+ return np.concatenate([result.numpy() for result in results], axis=0)
371
+ return [item for sublist in results for item in sublist]
372
+
373
+ def _validate_encoding_params(
374
+ self,
375
+ truncate_dim: Optional[int] = None,
376
+ prompt_name: Optional[str] = None,
377
+ ) -> Dict[str, Any]:
378
+ encode_kwargs = {}
379
+ if prompt_name is not None:
380
+ if prompt_name not in PREFIX_DICT:
381
+ raise ValueError(
382
+ f"Invalid prompt_name: {prompt_name}. Must be one of {list(PREFIX_DICT.keys())}."
383
+ )
384
+ else:
385
+ encode_kwargs["prefix"] = (
386
+ PREFIX_DICT[prompt_name]
387
+ if self.task != "text-matching"
388
+ else PREFIX_DICT["query"]
389
+ )
390
+
391
+ truncate_dim = truncate_dim or self.config.truncate_dim
392
+ if truncate_dim is not None and truncate_dim not in self.config.matryoshka_dims:
393
+ raise ValueError(
394
+ f"Invalid truncate_dim: {truncate_dim}. Must be one of {self.config.matryoshka_dims}."
395
+ )
396
+ else:
397
+ encode_kwargs["truncate_dim"] = truncate_dim
398
+
399
+ return encode_kwargs
400
+
401
+ def _validate_task(self, task: Optional[str] = None) -> str:
402
+ if task is None:
403
+ if self.task is None:
404
+ raise ValueError(
405
+ "Task must be specified before encoding data. You can set it either as a model property "
406
+ "(e.g., model.task = 'retrieval') or pass it as an argument to the encode method."
407
+ )
408
+ task = self.task
409
+ else:
410
+ if task not in self.config.task_names:
411
+ raise ValueError(
412
+ f"Invalid task: {task}. Must be one of {self.config.task_names}."
413
+ )
414
+ return task
415
+
416
+ def encode_text(
417
+ self,
418
+ texts: Union[str, List[str]],
419
+ task: Optional[str] = None,
420
+ max_length: int = 32768,
421
+ batch_size: int = 8,
422
+ return_multivector: bool = False,
423
+ return_numpy: bool = False,
424
+ truncate_dim: Optional[int] = None,
425
+ prompt_name: Optional[str] = None,
426
+ ) -> Union[List[torch.Tensor], torch.Tensor]:
427
+ """
428
+ Encodes a list of texts into embeddings.
429
+
430
+ Args:
431
+ texts: text or list of text strings to encode
432
+ max_length: Maximum token length for text processing
433
+ batch_size: Number of texts to process at once
434
+ return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
435
+ return_numpy: Whether to return numpy arrays instead of torch tensors
436
+ truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
437
+ prompt_name: Type of text being encoded ('query' or 'passage')
438
+
439
+ Returns:
440
+ List of text embeddings as tensors or numpy arrays when encoding multiple texts, or single text embedding as tensor when encoding a single text
441
+ """
442
+ prompt_name = prompt_name or "query"
443
+ encode_kwargs = self._validate_encoding_params(
444
+ truncate_dim=truncate_dim, prompt_name=prompt_name
445
+ )
446
+
447
+ task = self._validate_task(task)
448
+
449
+ processor_fn = partial(
450
+ self.processor.process_texts,
451
+ max_length=max_length,
452
+ prefix=encode_kwargs.pop("prefix"),
453
+ )
454
+
455
+ return_list = isinstance(texts, list)
456
+
457
+ # If return_multivector is True and encoding multiple texts, ignore return_numpy
458
+ if return_multivector and return_list and len(texts) > 1:
459
+ if return_numpy:
460
+ print(
461
+ "Warning: `return_numpy` is ignored when `return_multivector=True` and `len(texts) > 1`"
462
+ )
463
+ return_numpy = False
464
+
465
+ if isinstance(texts, str):
466
+ texts = [texts]
467
+
468
+ embeddings = self._process_batches(
469
+ data=texts,
470
+ processor_fn=processor_fn,
471
+ desc="Encoding texts...",
472
+ task_label=task,
473
+ return_multivector=return_multivector,
474
+ return_numpy=return_numpy,
475
+ batch_size=batch_size,
476
+ **encode_kwargs,
477
+ )
478
+
479
+ return embeddings if return_list else embeddings[0]
480
+
481
+ def _load_images_if_needed(
482
+ self, images: List[Union[str, Image.Image]]
483
+ ) -> List[Image.Image]:
484
+ loaded_images = []
485
+ for image in images:
486
+ if isinstance(image, str):
487
+ if image.startswith("http"):
488
+ response = requests.get(image)
489
+ image = Image.open(BytesIO(response.content)).convert("RGB")
490
+ else:
491
+ image = Image.open(image).convert("RGB")
492
+ loaded_images.append(image)
493
+ return loaded_images
494
+
495
+ def encode_image(
496
+ self,
497
+ images: Union[str, Image.Image, List[Union[str, Image.Image]]],
498
+ task: Optional[str] = None,
499
+ batch_size: int = 8,
500
+ return_multivector: bool = False,
501
+ return_numpy: bool = False,
502
+ truncate_dim: Optional[int] = None,
503
+ max_pixels: Optional[int] = None,
504
+ ) -> Union[List[torch.Tensor], torch.Tensor]:
505
+ """
506
+ Encodes a list of images or a single image into embedding(s).
507
+
508
+ Args:
509
+ images: image(s) to encode, can be PIL Image(s), URL(s), or local file path(s)
510
+ batch_size: Number of images to process at once
511
+ return_multivector: Whether to return multi-vector embeddings instead of single-vector embeddings
512
+ return_numpy: Whether to return numpy arrays instead of torch tensors. If `return_multivector` is `True` and more than one image is encoded, this parameter is ignored.
513
+ truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
514
+ max_pixels: Maximum number of pixels to process per image
515
+
516
+ Returns:
517
+ List of image embeddings as tensors or numpy arrays when encoding multiple images, or single image embedding as tensor when encoding a single image
518
+ """
519
+ if max_pixels:
520
+ default_max_pixels = self.processor.image_processor.max_pixels
521
+ self.processor.image_processor.max_pixels = (
522
+ max_pixels # change during encoding
523
+ )
524
+ encode_kwargs = self._validate_encoding_params(truncate_dim=truncate_dim)
525
+ task = self._validate_task(task)
526
+
527
+ return_list = isinstance(images, list)
528
+
529
+ # If return_multivector is True and encoding multiple images, ignore return_numpy
530
+ if return_multivector and return_list and len(images) > 1:
531
+ if return_numpy:
532
+ print(
533
+ "Warning: `return_numpy` is ignored when `return_multivector=True` and `len(images) > 1`"
534
+ )
535
+ return_numpy = False
536
+
537
+ # Convert single image to list
538
+ if isinstance(images, (str, Image.Image)):
539
+ images = [images]
540
+
541
+ images = self._load_images_if_needed(images)
542
+ embeddings = self._process_batches(
543
+ data=images,
544
+ processor_fn=self.processor.process_images,
545
+ desc="Encoding images...",
546
+ task_label=task,
547
+ batch_size=batch_size,
548
+ return_multivector=return_multivector,
549
+ return_numpy=return_numpy,
550
+ **encode_kwargs,
551
+ )
552
+
553
+ if max_pixels:
554
+ self.processor.image_processor.max_pixels = default_max_pixels
555
+
556
+ return embeddings if return_list else embeddings[0]
557
+
558
+ @classmethod
559
+ def from_pretrained(
560
+ cls,
561
+ pretrained_model_name_or_path,
562
+ *args,
563
+ **kwargs,
564
+ ):
565
+ """
566
+ Loads a pretrained model and configures it with the appropriate task adapter (`retrieval` by default).
567
+ """
568
+ if "torch_dtype" not in kwargs:
569
+ kwargs["torch_dtype"] = "auto"
570
+
571
+ kwargs["key_mapping"] = super()._checkpoint_conversion_mapping
572
+ if not is_flash_attn_2_available():
573
+ kwargs["attn_implementation"] = "sdpa"
574
+
575
+ base_model = super().from_pretrained(
576
+ pretrained_model_name_or_path, *args, **kwargs
577
+ )
578
+
579
+ # Configure adapter directory
580
+ if os.path.isdir(base_model.name_or_path):
581
+ adapter_dir = os.path.join(base_model.name_or_path, "adapters")
582
+ else:
583
+ adapter_cache_path = snapshot_download(
584
+ repo_id=base_model.name_or_path, allow_patterns=["adapters/*"]
585
+ )
586
+ adapter_dir = os.path.join(adapter_cache_path, "adapters")
587
+
588
+ lora_config = LoraConfig.from_pretrained(adapter_dir)
589
+ lora_config._custom_modules = {
590
+ torch.nn.modules.linear.Linear: partial(
591
+ MultiAdapterLinear,
592
+ task_names=base_model.config.task_names,
593
+ )
594
+ }
595
+ peft_model = PeftModel.from_pretrained(
596
+ model=base_model,
597
+ model_id=adapter_dir,
598
+ config=lora_config,
599
+ )
600
+
601
+ def task_getter(self):
602
+ return self.model.task
603
+
604
+ def task_setter(self, value):
605
+ self.model.task = value
606
+
607
+ peft_model.__class__.task = property(task_getter, task_setter)
608
+
609
+ return peft_model
modules.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "transformer",
5
+ "path": "",
6
+ "type": "custom_st.Transformer",
7
+ "kwargs": ["task", "truncate_dim"]
8
+ }
9
+ ]
preprocessor_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 602112,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "JinaEmbeddingsV4Processor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "video_processor_type": "Qwen2VLVideoProcessor",
25
+ "size": {
26
+ "longest_edge": 602112,
27
+ "shortest_edge": 3136
28
+ },
29
+ "temporal_patch_size": 2,
30
+ "auto_map": {
31
+ "AutoProcessor": "modeling_jina_embeddings_v4.JinaEmbeddingsV4Processor"
32
+ }
33
+ }
qwen2_5_vl.py ADDED
The diff for this file is too large to render. See raw diff
 
results.json ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arxivqa_test_subsampled": {
3
+ "ndcg_at_1": 0.844,
4
+ "ndcg_at_3": 0.88524,
5
+ "ndcg_at_5": 0.88954,
6
+ "ndcg_at_10": 0.89512,
7
+ "ndcg_at_20": 0.90085,
8
+ "ndcg_at_50": 0.90479,
9
+ "ndcg_at_100": 0.90578,
10
+ "map_at_1": 0.844,
11
+ "map_at_3": 0.87467,
12
+ "map_at_5": 0.87717,
13
+ "map_at_10": 0.87933,
14
+ "map_at_20": 0.88099,
15
+ "map_at_50": 0.88161,
16
+ "map_at_100": 0.8817,
17
+ "recall_at_1": 0.844,
18
+ "recall_at_3": 0.916,
19
+ "recall_at_5": 0.926,
20
+ "recall_at_10": 0.944,
21
+ "recall_at_20": 0.966,
22
+ "recall_at_50": 0.986,
23
+ "recall_at_100": 0.992,
24
+ "precision_at_1": 0.844,
25
+ "precision_at_3": 0.30533,
26
+ "precision_at_5": 0.1852,
27
+ "precision_at_10": 0.0944,
28
+ "precision_at_20": 0.0483,
29
+ "precision_at_50": 0.01972,
30
+ "precision_at_100": 0.00992,
31
+ "mrr_at_1": 0.844,
32
+ "mrr_at_3": 0.8746666666666665,
33
+ "mrr_at_5": 0.8771666666666665,
34
+ "mrr_at_10": 0.8793301587301586,
35
+ "mrr_at_20": 0.880986183261183,
36
+ "mrr_at_50": 0.8816066058267283,
37
+ "mrr_at_100": 0.8816959272950264,
38
+ "naucs_at_1_max": 0.7413901379085128,
39
+ "naucs_at_1_std": 0.3454872013866209,
40
+ "naucs_at_1_diff1": 0.9600906830113787,
41
+ "naucs_at_3_max": 0.7713307545240329,
42
+ "naucs_at_3_std": 0.4801698457160663,
43
+ "naucs_at_3_diff1": 0.9489240140500664,
44
+ "naucs_at_5_max": 0.7514699573523106,
45
+ "naucs_at_5_std": 0.4375552022610836,
46
+ "naucs_at_5_diff1": 0.9526206879148043,
47
+ "naucs_at_10_max": 0.8086901427237575,
48
+ "naucs_at_10_std": 0.5144891289849284,
49
+ "naucs_at_10_diff1": 0.9513972255568919,
50
+ "naucs_at_20_max": 0.907453177349375,
51
+ "naucs_at_20_std": 0.5683802932937894,
52
+ "naucs_at_20_diff1": 0.9692425990003846,
53
+ "naucs_at_50_max": 0.8709483793517359,
54
+ "naucs_at_50_std": 0.7055488862211612,
55
+ "naucs_at_50_diff1": 0.9626517273576126,
56
+ "naucs_at_100_max": 0.8068394024276366,
57
+ "naucs_at_100_std": 0.7076330532212914,
58
+ "naucs_at_100_diff1": 0.9673202614378978
59
+ },
60
+ "docvqa_test_subsampled": {
61
+ "ndcg_at_1": 0.52328,
62
+ "ndcg_at_3": 0.5841,
63
+ "ndcg_at_5": 0.59975,
64
+ "ndcg_at_10": 0.62669,
65
+ "ndcg_at_20": 0.64245,
66
+ "ndcg_at_50": 0.65661,
67
+ "ndcg_at_100": 0.66492,
68
+ "map_at_1": 0.52328,
69
+ "map_at_3": 0.56911,
70
+ "map_at_5": 0.57786,
71
+ "map_at_10": 0.58881,
72
+ "map_at_20": 0.59317,
73
+ "map_at_50": 0.59548,
74
+ "map_at_100": 0.59622,
75
+ "recall_at_1": 0.52328,
76
+ "recall_at_3": 0.62749,
77
+ "recall_at_5": 0.66519,
78
+ "recall_at_10": 0.74945,
79
+ "recall_at_20": 0.81153,
80
+ "recall_at_50": 0.88248,
81
+ "recall_at_100": 0.93348,
82
+ "precision_at_1": 0.52328,
83
+ "precision_at_3": 0.20916,
84
+ "precision_at_5": 0.13304,
85
+ "precision_at_10": 0.07494,
86
+ "precision_at_20": 0.04058,
87
+ "precision_at_50": 0.01765,
88
+ "precision_at_100": 0.00933,
89
+ "mrr_at_1": 0.5232815964523282,
90
+ "mrr_at_3": 0.5691056910569108,
91
+ "mrr_at_5": 0.5778640059127865,
92
+ "mrr_at_10": 0.5888132193010243,
93
+ "mrr_at_20": 0.5931663069177401,
94
+ "mrr_at_50": 0.5954783504735428,
95
+ "mrr_at_100": 0.5962169799244146,
96
+ "naucs_at_1_max": 0.46089368028029637,
97
+ "naucs_at_1_std": 0.19359243300005127,
98
+ "naucs_at_1_diff1": 0.8483527783001977,
99
+ "naucs_at_3_max": 0.4640279399849662,
100
+ "naucs_at_3_std": 0.1814509120980464,
101
+ "naucs_at_3_diff1": 0.7719022256243834,
102
+ "naucs_at_5_max": 0.45716016762761796,
103
+ "naucs_at_5_std": 0.16428980258139747,
104
+ "naucs_at_5_diff1": 0.750196647594659,
105
+ "naucs_at_10_max": 0.3956528364820721,
106
+ "naucs_at_10_std": 0.09973122080056422,
107
+ "naucs_at_10_diff1": 0.7237863238311393,
108
+ "naucs_at_20_max": 0.35927664451426317,
109
+ "naucs_at_20_std": 0.09080366240903168,
110
+ "naucs_at_20_diff1": 0.6946736504983693,
111
+ "naucs_at_50_max": 0.3626447370884348,
112
+ "naucs_at_50_std": 0.2775120087087966,
113
+ "naucs_at_50_diff1": 0.6534710933108262,
114
+ "naucs_at_100_max": 0.32155287639122004,
115
+ "naucs_at_100_std": 0.3495021025151782,
116
+ "naucs_at_100_diff1": 0.6165810885563539
117
+ },
118
+ "infovqa_test_subsampled": {
119
+ "ndcg_at_1": 0.90283,
120
+ "ndcg_at_3": 0.93062,
121
+ "ndcg_at_5": 0.93567,
122
+ "ndcg_at_10": 0.93969,
123
+ "ndcg_at_20": 0.94324,
124
+ "ndcg_at_50": 0.94401,
125
+ "ndcg_at_100": 0.945,
126
+ "map_at_1": 0.90283,
127
+ "map_at_3": 0.92409,
128
+ "map_at_5": 0.92692,
129
+ "map_at_10": 0.92863,
130
+ "map_at_20": 0.92959,
131
+ "map_at_50": 0.9297,
132
+ "map_at_100": 0.92979,
133
+ "recall_at_1": 0.90283,
134
+ "recall_at_3": 0.94939,
135
+ "recall_at_5": 0.96154,
136
+ "recall_at_10": 0.97368,
137
+ "recall_at_20": 0.98785,
138
+ "recall_at_50": 0.9919,
139
+ "recall_at_100": 0.99798,
140
+ "precision_at_1": 0.90283,
141
+ "precision_at_3": 0.31646,
142
+ "precision_at_5": 0.19231,
143
+ "precision_at_10": 0.09737,
144
+ "precision_at_20": 0.04939,
145
+ "precision_at_50": 0.01984,
146
+ "precision_at_100": 0.00998,
147
+ "mrr_at_1": 0.902834008097166,
148
+ "mrr_at_3": 0.9240890688259108,
149
+ "mrr_at_5": 0.9269230769230767,
150
+ "mrr_at_10": 0.9286316753422016,
151
+ "mrr_at_20": 0.9295898610333593,
152
+ "mrr_at_50": 0.929699602843506,
153
+ "mrr_at_100": 0.929788457049907,
154
+ "naucs_at_1_max": 0.6026903076230651,
155
+ "naucs_at_1_std": 0.261936050485784,
156
+ "naucs_at_1_diff1": 0.9396804875719484,
157
+ "naucs_at_3_max": 0.7565375225904929,
158
+ "naucs_at_3_std": 0.45980620999702715,
159
+ "naucs_at_3_diff1": 0.9534218386220948,
160
+ "naucs_at_5_max": 0.8235249494008307,
161
+ "naucs_at_5_std": 0.5316999544043512,
162
+ "naucs_at_5_diff1": 0.9524604670358964,
163
+ "naucs_at_10_max": 0.8684766575602219,
164
+ "naucs_at_10_std": 0.5944713216706646,
165
+ "naucs_at_10_diff1": 0.9405654098266761,
166
+ "naucs_at_20_max": 0.7830887900175995,
167
+ "naucs_at_20_std": 0.5643438299512757,
168
+ "naucs_at_20_diff1": 0.8929919636352566,
169
+ "naucs_at_50_max": 0.7072835485426375,
170
+ "naucs_at_50_std": 0.5764614839135555,
171
+ "naucs_at_50_diff1": 0.8394879454528887,
172
+ "naucs_at_100_max": 1.0,
173
+ "naucs_at_100_std": 1.0,
174
+ "naucs_at_100_diff1": 1.0
175
+ },
176
+ "tabfquad_test_subsampled": {
177
+ "ndcg_at_1": 0.9,
178
+ "ndcg_at_3": 0.94685,
179
+ "ndcg_at_5": 0.95131,
180
+ "ndcg_at_10": 0.95366,
181
+ "ndcg_at_20": 0.95455,
182
+ "ndcg_at_50": 0.9553,
183
+ "ndcg_at_100": 0.9553,
184
+ "map_at_1": 0.9,
185
+ "map_at_3": 0.9369,
186
+ "map_at_5": 0.9394,
187
+ "map_at_10": 0.9404,
188
+ "map_at_20": 0.94063,
189
+ "map_at_50": 0.94077,
190
+ "map_at_100": 0.94077,
191
+ "recall_at_1": 0.9,
192
+ "recall_at_3": 0.975,
193
+ "recall_at_5": 0.98571,
194
+ "recall_at_10": 0.99286,
195
+ "recall_at_20": 0.99643,
196
+ "recall_at_50": 1.0,
197
+ "recall_at_100": 1.0,
198
+ "precision_at_1": 0.9,
199
+ "precision_at_3": 0.325,
200
+ "precision_at_5": 0.19714,
201
+ "precision_at_10": 0.09929,
202
+ "precision_at_20": 0.04982,
203
+ "precision_at_50": 0.02,
204
+ "precision_at_100": 0.01,
205
+ "mrr_at_1": 0.9,
206
+ "mrr_at_3": 0.936904761904762,
207
+ "mrr_at_5": 0.9394047619047617,
208
+ "mrr_at_10": 0.9403968253968255,
209
+ "mrr_at_20": 0.9406349206349207,
210
+ "mrr_at_50": 0.9407722832722833,
211
+ "mrr_at_100": 0.9407722832722833,
212
+ "naucs_at_1_max": 0.39284046952114193,
213
+ "naucs_at_1_std": 0.06274176337201544,
214
+ "naucs_at_1_diff1": 0.9321395224756563,
215
+ "naucs_at_3_max": 0.98132586367881,
216
+ "naucs_at_3_std": 0.9042950513538718,
217
+ "naucs_at_3_diff1": 0.98132586367881,
218
+ "naucs_at_5_max": 0.967320261437913,
219
+ "naucs_at_5_std": 0.8978758169934754,
220
+ "naucs_at_5_diff1": 1.0,
221
+ "naucs_at_10_max": 1.0,
222
+ "naucs_at_10_std": 0.9346405228758269,
223
+ "naucs_at_10_diff1": 1.0,
224
+ "naucs_at_20_max": 1.0,
225
+ "naucs_at_20_std": 1.0,
226
+ "naucs_at_20_diff1": 1.0,
227
+ "naucs_at_50_max": 1.0,
228
+ "naucs_at_50_std": 1.0,
229
+ "naucs_at_50_diff1": 1.0,
230
+ "naucs_at_100_max": 1.0,
231
+ "naucs_at_100_std": 1.0,
232
+ "naucs_at_100_diff1": 1.0
233
+ },
234
+ "tatdqa_test": {
235
+ "ndcg_at_1": 0.68834,
236
+ "ndcg_at_3": 0.7834,
237
+ "ndcg_at_5": 0.80344,
238
+ "ndcg_at_10": 0.81851,
239
+ "ndcg_at_20": 0.82469,
240
+ "ndcg_at_50": 0.82852,
241
+ "ndcg_at_100": 0.82981,
242
+ "map_at_1": 0.68834,
243
+ "map_at_3": 0.76073,
244
+ "map_at_5": 0.772,
245
+ "map_at_10": 0.7783,
246
+ "map_at_20": 0.78002,
247
+ "map_at_50": 0.78067,
248
+ "map_at_100": 0.78079,
249
+ "recall_at_1": 0.68834,
250
+ "recall_at_3": 0.84872,
251
+ "recall_at_5": 0.89672,
252
+ "recall_at_10": 0.94289,
253
+ "recall_at_20": 0.96719,
254
+ "recall_at_50": 0.98603,
255
+ "recall_at_100": 0.99392,
256
+ "precision_at_1": 0.68834,
257
+ "precision_at_3": 0.28291,
258
+ "precision_at_5": 0.17934,
259
+ "precision_at_10": 0.09429,
260
+ "precision_at_20": 0.04836,
261
+ "precision_at_50": 0.01972,
262
+ "precision_at_100": 0.00994,
263
+ "mrr_at_1": 0.6865127582017011,
264
+ "mrr_at_3": 0.7598217901984609,
265
+ "mrr_at_5": 0.7710307816929933,
266
+ "mrr_at_10": 0.7773322532739296,
267
+ "mrr_at_20": 0.7790656715075932,
268
+ "mrr_at_50": 0.7797137179788176,
269
+ "mrr_at_100": 0.7798294471430899,
270
+ "naucs_at_1_max": 0.19289339347399329,
271
+ "naucs_at_1_std": -0.05373436574034402,
272
+ "naucs_at_1_diff1": 0.8118815353915732,
273
+ "naucs_at_3_max": 0.24444248974914928,
274
+ "naucs_at_3_std": 0.012951438245694854,
275
+ "naucs_at_3_diff1": 0.7252009696977523,
276
+ "naucs_at_5_max": 0.27477480629269946,
277
+ "naucs_at_5_std": 0.10687833140288663,
278
+ "naucs_at_5_diff1": 0.7019146338300569,
279
+ "naucs_at_10_max": 0.23474834180340118,
280
+ "naucs_at_10_std": 0.13375117651376378,
281
+ "naucs_at_10_diff1": 0.6766342016471449,
282
+ "naucs_at_20_max": 0.3762582961131715,
283
+ "naucs_at_20_std": 0.29216428469292166,
284
+ "naucs_at_20_diff1": 0.6564671335087516,
285
+ "naucs_at_50_max": 0.4691053847445,
286
+ "naucs_at_50_std": 0.4359718488363951,
287
+ "naucs_at_50_diff1": 0.7152604718494652,
288
+ "naucs_at_100_max": 0.5259975902909616,
289
+ "naucs_at_100_std": 0.651086653120611,
290
+ "naucs_at_100_diff1": 0.7663843453532901
291
+ },
292
+ "shiftproject_test": {
293
+ "ndcg_at_1": 0.85,
294
+ "ndcg_at_3": 0.91917,
295
+ "ndcg_at_5": 0.92347,
296
+ "ndcg_at_10": 0.92949,
297
+ "ndcg_at_20": 0.92949,
298
+ "ndcg_at_50": 0.92949,
299
+ "ndcg_at_100": 0.92949,
300
+ "map_at_1": 0.85,
301
+ "map_at_3": 0.90167,
302
+ "map_at_5": 0.90417,
303
+ "map_at_10": 0.90639,
304
+ "map_at_20": 0.90639,
305
+ "map_at_50": 0.90639,
306
+ "map_at_100": 0.90639,
307
+ "recall_at_1": 0.85,
308
+ "recall_at_3": 0.97,
309
+ "recall_at_5": 0.98,
310
+ "recall_at_10": 1.0,
311
+ "recall_at_20": 1.0,
312
+ "recall_at_50": 1.0,
313
+ "recall_at_100": 1.0,
314
+ "precision_at_1": 0.85,
315
+ "precision_at_3": 0.32333,
316
+ "precision_at_5": 0.196,
317
+ "precision_at_10": 0.1,
318
+ "precision_at_20": 0.05,
319
+ "precision_at_50": 0.02,
320
+ "precision_at_100": 0.01,
321
+ "mrr_at_1": 0.85,
322
+ "mrr_at_3": 0.9016666666666666,
323
+ "mrr_at_5": 0.9041666666666666,
324
+ "mrr_at_10": 0.9063888888888889,
325
+ "mrr_at_20": 0.9063888888888889,
326
+ "mrr_at_50": 0.9063888888888889,
327
+ "mrr_at_100": 0.9063888888888889,
328
+ "naucs_at_1_max": 0.029189716889034732,
329
+ "naucs_at_1_std": -0.37507321835340074,
330
+ "naucs_at_1_diff1": 0.7931012040351454,
331
+ "naucs_at_3_max": 0.5589791472144446,
332
+ "naucs_at_3_std": 0.09056956115779448,
333
+ "naucs_at_3_diff1": 0.9564270152505466,
334
+ "naucs_at_5_max": 0.3384687208216692,
335
+ "naucs_at_5_std": -0.2987861811391239,
336
+ "naucs_at_5_diff1": 1.0,
337
+ "naucs_at_10_max": 1.0,
338
+ "naucs_at_10_std": 1.0,
339
+ "naucs_at_10_diff1": 1.0,
340
+ "naucs_at_20_max": 1.0,
341
+ "naucs_at_20_std": 1.0,
342
+ "naucs_at_20_diff1": 1.0,
343
+ "naucs_at_50_max": null,
344
+ "naucs_at_50_std": null,
345
+ "naucs_at_50_diff1": null,
346
+ "naucs_at_100_max": null,
347
+ "naucs_at_100_std": null,
348
+ "naucs_at_100_diff1": null
349
+ },
350
+ "syntheticDocQA_artificial_intelligence_test": {
351
+ "ndcg_at_1": 0.98,
352
+ "ndcg_at_3": 0.99262,
353
+ "ndcg_at_5": 0.99262,
354
+ "ndcg_at_10": 0.99262,
355
+ "ndcg_at_20": 0.99262,
356
+ "ndcg_at_50": 0.99262,
357
+ "ndcg_at_100": 0.99262,
358
+ "map_at_1": 0.98,
359
+ "map_at_3": 0.99,
360
+ "map_at_5": 0.99,
361
+ "map_at_10": 0.99,
362
+ "map_at_20": 0.99,
363
+ "map_at_50": 0.99,
364
+ "map_at_100": 0.99,
365
+ "recall_at_1": 0.98,
366
+ "recall_at_3": 1.0,
367
+ "recall_at_5": 1.0,
368
+ "recall_at_10": 1.0,
369
+ "recall_at_20": 1.0,
370
+ "recall_at_50": 1.0,
371
+ "recall_at_100": 1.0,
372
+ "precision_at_1": 0.98,
373
+ "precision_at_3": 0.33333,
374
+ "precision_at_5": 0.2,
375
+ "precision_at_10": 0.1,
376
+ "precision_at_20": 0.05,
377
+ "precision_at_50": 0.02,
378
+ "precision_at_100": 0.01,
379
+ "mrr_at_1": 0.98,
380
+ "mrr_at_3": 0.99,
381
+ "mrr_at_5": 0.99,
382
+ "mrr_at_10": 0.99,
383
+ "mrr_at_20": 0.99,
384
+ "mrr_at_50": 0.99,
385
+ "mrr_at_100": 0.99,
386
+ "naucs_at_1_max": 0.540149393090569,
387
+ "naucs_at_1_std": 0.3384687208216605,
388
+ "naucs_at_1_diff1": 0.9346405228758133,
389
+ "naucs_at_3_max": 1.0,
390
+ "naucs_at_3_std": 1.0,
391
+ "naucs_at_3_diff1": 1.0,
392
+ "naucs_at_5_max": 1.0,
393
+ "naucs_at_5_std": 1.0,
394
+ "naucs_at_5_diff1": 1.0,
395
+ "naucs_at_10_max": 1.0,
396
+ "naucs_at_10_std": 1.0,
397
+ "naucs_at_10_diff1": 1.0,
398
+ "naucs_at_20_max": 1.0,
399
+ "naucs_at_20_std": 1.0,
400
+ "naucs_at_20_diff1": 1.0,
401
+ "naucs_at_50_max": null,
402
+ "naucs_at_50_std": null,
403
+ "naucs_at_50_diff1": null,
404
+ "naucs_at_100_max": null,
405
+ "naucs_at_100_std": null,
406
+ "naucs_at_100_diff1": null
407
+ },
408
+ "syntheticDocQA_energy_test": {
409
+ "ndcg_at_1": 0.95,
410
+ "ndcg_at_3": 0.96762,
411
+ "ndcg_at_5": 0.96762,
412
+ "ndcg_at_10": 0.97118,
413
+ "ndcg_at_20": 0.97118,
414
+ "ndcg_at_50": 0.973,
415
+ "ndcg_at_100": 0.973,
416
+ "map_at_1": 0.95,
417
+ "map_at_3": 0.96333,
418
+ "map_at_5": 0.96333,
419
+ "map_at_10": 0.965,
420
+ "map_at_20": 0.965,
421
+ "map_at_50": 0.96523,
422
+ "map_at_100": 0.96523,
423
+ "recall_at_1": 0.95,
424
+ "recall_at_3": 0.98,
425
+ "recall_at_5": 0.98,
426
+ "recall_at_10": 0.99,
427
+ "recall_at_20": 0.99,
428
+ "recall_at_50": 1.0,
429
+ "recall_at_100": 1.0,
430
+ "precision_at_1": 0.95,
431
+ "precision_at_3": 0.32667,
432
+ "precision_at_5": 0.196,
433
+ "precision_at_10": 0.099,
434
+ "precision_at_20": 0.0495,
435
+ "precision_at_50": 0.02,
436
+ "precision_at_100": 0.01,
437
+ "mrr_at_1": 0.95,
438
+ "mrr_at_3": 0.9633333333333333,
439
+ "mrr_at_5": 0.9633333333333333,
440
+ "mrr_at_10": 0.965,
441
+ "mrr_at_20": 0.965,
442
+ "mrr_at_50": 0.9652272727272727,
443
+ "mrr_at_100": 0.9652272727272727,
444
+ "naucs_at_1_max": 0.42726423902894384,
445
+ "naucs_at_1_std": -0.4889822595704953,
446
+ "naucs_at_1_diff1": 1.0,
447
+ "naucs_at_3_max": 0.6136788048552655,
448
+ "naucs_at_3_std": -0.6909430438842241,
449
+ "naucs_at_3_diff1": 1.0,
450
+ "naucs_at_5_max": 0.6136788048552745,
451
+ "naucs_at_5_std": -0.690943043884218,
452
+ "naucs_at_5_diff1": 1.0,
453
+ "naucs_at_10_max": 0.8692810457516413,
454
+ "naucs_at_10_std": 0.35807656395891135,
455
+ "naucs_at_10_diff1": 1.0,
456
+ "naucs_at_20_max": 0.8692810457516413,
457
+ "naucs_at_20_std": 0.35807656395891135,
458
+ "naucs_at_20_diff1": 1.0,
459
+ "naucs_at_50_max": null,
460
+ "naucs_at_50_std": null,
461
+ "naucs_at_50_diff1": null,
462
+ "naucs_at_100_max": null,
463
+ "naucs_at_100_std": null,
464
+ "naucs_at_100_diff1": null
465
+ },
466
+ "syntheticDocQA_government_reports_test": {
467
+ "ndcg_at_1": 0.93,
468
+ "ndcg_at_3": 0.96524,
469
+ "ndcg_at_5": 0.96954,
470
+ "ndcg_at_10": 0.96954,
471
+ "ndcg_at_20": 0.96954,
472
+ "ndcg_at_50": 0.96954,
473
+ "ndcg_at_100": 0.96954,
474
+ "map_at_1": 0.93,
475
+ "map_at_3": 0.95667,
476
+ "map_at_5": 0.95917,
477
+ "map_at_10": 0.95917,
478
+ "map_at_20": 0.95917,
479
+ "map_at_50": 0.95917,
480
+ "map_at_100": 0.95917,
481
+ "recall_at_1": 0.93,
482
+ "recall_at_3": 0.99,
483
+ "recall_at_5": 1.0,
484
+ "recall_at_10": 1.0,
485
+ "recall_at_20": 1.0,
486
+ "recall_at_50": 1.0,
487
+ "recall_at_100": 1.0,
488
+ "precision_at_1": 0.93,
489
+ "precision_at_3": 0.33,
490
+ "precision_at_5": 0.2,
491
+ "precision_at_10": 0.1,
492
+ "precision_at_20": 0.05,
493
+ "precision_at_50": 0.02,
494
+ "precision_at_100": 0.01,
495
+ "mrr_at_1": 0.93,
496
+ "mrr_at_3": 0.9566666666666667,
497
+ "mrr_at_5": 0.9591666666666667,
498
+ "mrr_at_10": 0.9591666666666667,
499
+ "mrr_at_20": 0.9591666666666667,
500
+ "mrr_at_50": 0.9591666666666667,
501
+ "mrr_at_100": 0.9591666666666667,
502
+ "naucs_at_1_max": 0.6809390422835813,
503
+ "naucs_at_1_std": 0.5458850206749362,
504
+ "naucs_at_1_diff1": 0.9229691876750709,
505
+ "naucs_at_3_max": 1.0,
506
+ "naucs_at_3_std": 1.0,
507
+ "naucs_at_3_diff1": 1.0,
508
+ "naucs_at_5_max": 1.0,
509
+ "naucs_at_5_std": 1.0,
510
+ "naucs_at_5_diff1": 1.0,
511
+ "naucs_at_10_max": 1.0,
512
+ "naucs_at_10_std": 1.0,
513
+ "naucs_at_10_diff1": 1.0,
514
+ "naucs_at_20_max": 1.0,
515
+ "naucs_at_20_std": 1.0,
516
+ "naucs_at_20_diff1": 1.0,
517
+ "naucs_at_50_max": null,
518
+ "naucs_at_50_std": null,
519
+ "naucs_at_50_diff1": null,
520
+ "naucs_at_100_max": null,
521
+ "naucs_at_100_std": null,
522
+ "naucs_at_100_diff1": null
523
+ },
524
+ "syntheticDocQA_healthcare_industry_test": {
525
+ "ndcg_at_1": 0.96,
526
+ "ndcg_at_3": 0.98393,
527
+ "ndcg_at_5": 0.98393,
528
+ "ndcg_at_10": 0.98393,
529
+ "ndcg_at_20": 0.98393,
530
+ "ndcg_at_50": 0.98393,
531
+ "ndcg_at_100": 0.98393,
532
+ "map_at_1": 0.96,
533
+ "map_at_3": 0.97833,
534
+ "map_at_5": 0.97833,
535
+ "map_at_10": 0.97833,
536
+ "map_at_20": 0.97833,
537
+ "map_at_50": 0.97833,
538
+ "map_at_100": 0.97833,
539
+ "recall_at_1": 0.96,
540
+ "recall_at_3": 1.0,
541
+ "recall_at_5": 1.0,
542
+ "recall_at_10": 1.0,
543
+ "recall_at_20": 1.0,
544
+ "recall_at_50": 1.0,
545
+ "recall_at_100": 1.0,
546
+ "precision_at_1": 0.96,
547
+ "precision_at_3": 0.33333,
548
+ "precision_at_5": 0.2,
549
+ "precision_at_10": 0.1,
550
+ "precision_at_20": 0.05,
551
+ "precision_at_50": 0.02,
552
+ "precision_at_100": 0.01,
553
+ "mrr_at_1": 0.96,
554
+ "mrr_at_3": 0.9783333333333333,
555
+ "mrr_at_5": 0.9783333333333333,
556
+ "mrr_at_10": 0.9783333333333333,
557
+ "mrr_at_20": 0.9783333333333333,
558
+ "mrr_at_50": 0.9783333333333333,
559
+ "mrr_at_100": 0.9783333333333333,
560
+ "naucs_at_1_max": 0.7047152194211012,
561
+ "naucs_at_1_std": 0.32037815126050734,
562
+ "naucs_at_1_diff1": 1.0,
563
+ "naucs_at_3_max": 1.0,
564
+ "naucs_at_3_std": 1.0,
565
+ "naucs_at_3_diff1": 1.0,
566
+ "naucs_at_5_max": 1.0,
567
+ "naucs_at_5_std": 1.0,
568
+ "naucs_at_5_diff1": 1.0,
569
+ "naucs_at_10_max": 1.0,
570
+ "naucs_at_10_std": 1.0,
571
+ "naucs_at_10_diff1": 1.0,
572
+ "naucs_at_20_max": 1.0,
573
+ "naucs_at_20_std": 1.0,
574
+ "naucs_at_20_diff1": 1.0,
575
+ "naucs_at_50_max": null,
576
+ "naucs_at_50_std": null,
577
+ "naucs_at_50_diff1": null,
578
+ "naucs_at_100_max": null,
579
+ "naucs_at_100_std": null,
580
+ "naucs_at_100_diff1": null
581
+ }
582
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
tokenizer_config.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "processor_class": "JinaEmbeddingsV4Processor",
206
+ "split_special_tokens": false,
207
+ "tokenizer_class": "Qwen2Tokenizer",
208
+ "unk_token": null
209
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff