Update FlexRAG retriever
Browse files
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
database.lance/data/4b860054-e1c3-4e40-8f0e-7f2eeb399c54.lance filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Test
|
config.yaml
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
index_type: faiss
|
2 |
+
annoy_config:
|
3 |
+
distance_function: IP
|
4 |
+
index_train_num: 1000000
|
5 |
+
log_interval: 10000
|
6 |
+
batch_size: 512
|
7 |
+
n_trees: -1
|
8 |
+
n_jobs: -1
|
9 |
+
search_k: -1
|
10 |
+
on_disk_build: false
|
11 |
+
faiss_config:
|
12 |
+
distance_function: IP
|
13 |
+
index_train_num: 1000000
|
14 |
+
log_interval: 10000
|
15 |
+
batch_size: 512
|
16 |
+
index_type: auto
|
17 |
+
n_subquantizers: 8
|
18 |
+
n_bits: 8
|
19 |
+
n_list: 1000
|
20 |
+
factory_str: null
|
21 |
+
n_probe: 32
|
22 |
+
device_id: []
|
23 |
+
k_factor: 10
|
24 |
+
polysemous_ht: 0
|
25 |
+
efSearch: 100
|
26 |
+
scann_config:
|
27 |
+
distance_function: IP
|
28 |
+
index_train_num: 1000000
|
29 |
+
log_interval: 10000
|
30 |
+
batch_size: 512
|
31 |
+
num_leaves: 2000
|
32 |
+
num_leaves_to_search: 500
|
33 |
+
num_neighbors: 10
|
34 |
+
anisotropic_quantization_threshold: 0.2
|
35 |
+
dimensions_per_block: 2
|
36 |
+
threads: 0
|
37 |
+
log_interval: 10000
|
38 |
+
top_k: 10
|
39 |
+
batch_size: 512
|
40 |
+
query_preprocess_pipeline:
|
41 |
+
processor_type: []
|
42 |
+
length_filter_config:
|
43 |
+
max_tokens: null
|
44 |
+
min_tokens: null
|
45 |
+
max_chars: null
|
46 |
+
min_chars: null
|
47 |
+
max_bytes: null
|
48 |
+
min_bytes: null
|
49 |
+
tokenizer_config:
|
50 |
+
tokenizer_type: moses
|
51 |
+
hf_tokenizer_path: null
|
52 |
+
tiktok_tokenizer_name: null
|
53 |
+
lang: null
|
54 |
+
token_normalize_config:
|
55 |
+
lang: en
|
56 |
+
penn: true
|
57 |
+
norm_quote_commas: true
|
58 |
+
norm_numbers: true
|
59 |
+
pre_replace_unicode_punct: false
|
60 |
+
post_remove_control_chars: false
|
61 |
+
perl_parity: false
|
62 |
+
truncate_config:
|
63 |
+
max_chars: null
|
64 |
+
max_bytes: null
|
65 |
+
max_tokens: null
|
66 |
+
tokenizer_config:
|
67 |
+
tokenizer_type: moses
|
68 |
+
hf_tokenizer_path: null
|
69 |
+
tiktok_tokenizer_name: null
|
70 |
+
lang: null
|
71 |
+
database_path: null
|
72 |
+
query_encoder_config:
|
73 |
+
encoder_type: hf
|
74 |
+
cohere_config:
|
75 |
+
model: embed-multilingual-v3.0
|
76 |
+
input_type: search_document
|
77 |
+
base_url: null
|
78 |
+
api_key: ???
|
79 |
+
proxy: null
|
80 |
+
hf_config:
|
81 |
+
model_path: sentence-transformers/all-MiniLM-L6-v2
|
82 |
+
tokenizer_path: null
|
83 |
+
trust_remote_code: false
|
84 |
+
device_id:
|
85 |
+
- 0
|
86 |
+
load_dtype: auto
|
87 |
+
max_encode_length: 512
|
88 |
+
encode_method: mean
|
89 |
+
normalize: false
|
90 |
+
prompt: ''
|
91 |
+
task: ''
|
92 |
+
hf_clip_config:
|
93 |
+
model_path: ???
|
94 |
+
tokenizer_path: null
|
95 |
+
trust_remote_code: false
|
96 |
+
device_id: []
|
97 |
+
load_dtype: auto
|
98 |
+
max_encode_length: 512
|
99 |
+
normalize: false
|
100 |
+
convert_to_rgb: false
|
101 |
+
jina_config:
|
102 |
+
model: jina-embeddings-v3
|
103 |
+
base_url: https://api.jina.ai/v1/embeddings
|
104 |
+
api_key: jina_9c53f483a93542228e62c02fbc599daeItZmWTUU2zuukZ-O3S3UoVtURxX5
|
105 |
+
dimensions: 1024
|
106 |
+
task: null
|
107 |
+
proxy: null
|
108 |
+
ollama_config:
|
109 |
+
model_name: ???
|
110 |
+
base_url: ???
|
111 |
+
prompt: null
|
112 |
+
verbose: false
|
113 |
+
embedding_size: 768
|
114 |
+
allow_parallel: true
|
115 |
+
openai_config:
|
116 |
+
is_azure: false
|
117 |
+
model_name: ???
|
118 |
+
base_url: null
|
119 |
+
api_key: EMPTY
|
120 |
+
api_version: 2024-07-01-preview
|
121 |
+
verbose: false
|
122 |
+
proxy: null
|
123 |
+
dimension: null
|
124 |
+
sentence_transformer_config:
|
125 |
+
model_path: ???
|
126 |
+
device_id: []
|
127 |
+
trust_remote_code: false
|
128 |
+
task: null
|
129 |
+
prompt_name: null
|
130 |
+
prompt: null
|
131 |
+
prompt_dict: null
|
132 |
+
normalize: false
|
133 |
+
model_kwargs: {}
|
134 |
+
passage_encoder_config:
|
135 |
+
encoder_type: hf
|
136 |
+
cohere_config:
|
137 |
+
model: embed-multilingual-v3.0
|
138 |
+
input_type: search_document
|
139 |
+
base_url: null
|
140 |
+
api_key: ???
|
141 |
+
proxy: null
|
142 |
+
hf_config:
|
143 |
+
model_path: sentence-transformers/all-MiniLM-L6-v2
|
144 |
+
tokenizer_path: null
|
145 |
+
trust_remote_code: false
|
146 |
+
device_id:
|
147 |
+
- 0
|
148 |
+
- 1
|
149 |
+
- 2
|
150 |
+
- 3
|
151 |
+
load_dtype: auto
|
152 |
+
max_encode_length: 512
|
153 |
+
encode_method: mean
|
154 |
+
normalize: false
|
155 |
+
prompt: ''
|
156 |
+
task: ''
|
157 |
+
hf_clip_config:
|
158 |
+
model_path: ???
|
159 |
+
tokenizer_path: null
|
160 |
+
trust_remote_code: false
|
161 |
+
device_id: []
|
162 |
+
load_dtype: auto
|
163 |
+
max_encode_length: 512
|
164 |
+
normalize: false
|
165 |
+
convert_to_rgb: false
|
166 |
+
jina_config:
|
167 |
+
model: jina-embeddings-v3
|
168 |
+
base_url: https://api.jina.ai/v1/embeddings
|
169 |
+
api_key: jina_9c53f483a93542228e62c02fbc599daeItZmWTUU2zuukZ-O3S3UoVtURxX5
|
170 |
+
dimensions: 1024
|
171 |
+
task: null
|
172 |
+
proxy: null
|
173 |
+
ollama_config:
|
174 |
+
model_name: ???
|
175 |
+
base_url: ???
|
176 |
+
prompt: null
|
177 |
+
verbose: false
|
178 |
+
embedding_size: 768
|
179 |
+
allow_parallel: true
|
180 |
+
openai_config:
|
181 |
+
is_azure: false
|
182 |
+
model_name: ???
|
183 |
+
base_url: null
|
184 |
+
api_key: EMPTY
|
185 |
+
api_version: 2024-07-01-preview
|
186 |
+
verbose: false
|
187 |
+
proxy: null
|
188 |
+
dimension: null
|
189 |
+
sentence_transformer_config:
|
190 |
+
model_path: ???
|
191 |
+
device_id: []
|
192 |
+
trust_remote_code: false
|
193 |
+
task: null
|
194 |
+
prompt_name: null
|
195 |
+
prompt: null
|
196 |
+
prompt_dict: null
|
197 |
+
normalize: false
|
198 |
+
model_kwargs: {}
|
199 |
+
refine_factor: 10
|
200 |
+
encode_fields:
|
201 |
+
- text
|
database.lance/_transactions/0-9f490a28-83ad-48b4-a4ca-abfd75153d5b.txn
ADDED
Binary file (393 Bytes). View file
|
|
database.lance/_versions/1.manifest
ADDED
Binary file (1.43 kB). View file
|
|
database.lance/data/4b860054-e1c3-4e40-8f0e-7f2eeb399c54.lance
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93a1577787477721f0fe6ac3b19195c22e98370ecb665c3403daece49445c34c
|
3 |
+
size 230676568
|
retriever.id
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
DenseRetriever
|