zhuocheng commited on
Commit
61598b0
·
verified ·
1 Parent(s): 7d39dca

Update FlexRAG retriever

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ database.lance/data/4b860054-e1c3-4e40-8f0e-7f2eeb399c54.lance filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Test
config.yaml ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index_type: faiss
2
+ annoy_config:
3
+ distance_function: IP
4
+ index_train_num: 1000000
5
+ log_interval: 10000
6
+ batch_size: 512
7
+ n_trees: -1
8
+ n_jobs: -1
9
+ search_k: -1
10
+ on_disk_build: false
11
+ faiss_config:
12
+ distance_function: IP
13
+ index_train_num: 1000000
14
+ log_interval: 10000
15
+ batch_size: 512
16
+ index_type: auto
17
+ n_subquantizers: 8
18
+ n_bits: 8
19
+ n_list: 1000
20
+ factory_str: null
21
+ n_probe: 32
22
+ device_id: []
23
+ k_factor: 10
24
+ polysemous_ht: 0
25
+ efSearch: 100
26
+ scann_config:
27
+ distance_function: IP
28
+ index_train_num: 1000000
29
+ log_interval: 10000
30
+ batch_size: 512
31
+ num_leaves: 2000
32
+ num_leaves_to_search: 500
33
+ num_neighbors: 10
34
+ anisotropic_quantization_threshold: 0.2
35
+ dimensions_per_block: 2
36
+ threads: 0
37
+ log_interval: 10000
38
+ top_k: 10
39
+ batch_size: 512
40
+ query_preprocess_pipeline:
41
+ processor_type: []
42
+ length_filter_config:
43
+ max_tokens: null
44
+ min_tokens: null
45
+ max_chars: null
46
+ min_chars: null
47
+ max_bytes: null
48
+ min_bytes: null
49
+ tokenizer_config:
50
+ tokenizer_type: moses
51
+ hf_tokenizer_path: null
52
+ tiktok_tokenizer_name: null
53
+ lang: null
54
+ token_normalize_config:
55
+ lang: en
56
+ penn: true
57
+ norm_quote_commas: true
58
+ norm_numbers: true
59
+ pre_replace_unicode_punct: false
60
+ post_remove_control_chars: false
61
+ perl_parity: false
62
+ truncate_config:
63
+ max_chars: null
64
+ max_bytes: null
65
+ max_tokens: null
66
+ tokenizer_config:
67
+ tokenizer_type: moses
68
+ hf_tokenizer_path: null
69
+ tiktok_tokenizer_name: null
70
+ lang: null
71
+ database_path: null
72
+ query_encoder_config:
73
+ encoder_type: hf
74
+ cohere_config:
75
+ model: embed-multilingual-v3.0
76
+ input_type: search_document
77
+ base_url: null
78
+ api_key: ???
79
+ proxy: null
80
+ hf_config:
81
+ model_path: sentence-transformers/all-MiniLM-L6-v2
82
+ tokenizer_path: null
83
+ trust_remote_code: false
84
+ device_id:
85
+ - 0
86
+ load_dtype: auto
87
+ max_encode_length: 512
88
+ encode_method: mean
89
+ normalize: false
90
+ prompt: ''
91
+ task: ''
92
+ hf_clip_config:
93
+ model_path: ???
94
+ tokenizer_path: null
95
+ trust_remote_code: false
96
+ device_id: []
97
+ load_dtype: auto
98
+ max_encode_length: 512
99
+ normalize: false
100
+ convert_to_rgb: false
101
+ jina_config:
102
+ model: jina-embeddings-v3
103
+ base_url: https://api.jina.ai/v1/embeddings
104
+ api_key: jina_9c53f483a93542228e62c02fbc599daeItZmWTUU2zuukZ-O3S3UoVtURxX5
105
+ dimensions: 1024
106
+ task: null
107
+ proxy: null
108
+ ollama_config:
109
+ model_name: ???
110
+ base_url: ???
111
+ prompt: null
112
+ verbose: false
113
+ embedding_size: 768
114
+ allow_parallel: true
115
+ openai_config:
116
+ is_azure: false
117
+ model_name: ???
118
+ base_url: null
119
+ api_key: EMPTY
120
+ api_version: 2024-07-01-preview
121
+ verbose: false
122
+ proxy: null
123
+ dimension: null
124
+ sentence_transformer_config:
125
+ model_path: ???
126
+ device_id: []
127
+ trust_remote_code: false
128
+ task: null
129
+ prompt_name: null
130
+ prompt: null
131
+ prompt_dict: null
132
+ normalize: false
133
+ model_kwargs: {}
134
+ passage_encoder_config:
135
+ encoder_type: hf
136
+ cohere_config:
137
+ model: embed-multilingual-v3.0
138
+ input_type: search_document
139
+ base_url: null
140
+ api_key: ???
141
+ proxy: null
142
+ hf_config:
143
+ model_path: sentence-transformers/all-MiniLM-L6-v2
144
+ tokenizer_path: null
145
+ trust_remote_code: false
146
+ device_id:
147
+ - 0
148
+ - 1
149
+ - 2
150
+ - 3
151
+ load_dtype: auto
152
+ max_encode_length: 512
153
+ encode_method: mean
154
+ normalize: false
155
+ prompt: ''
156
+ task: ''
157
+ hf_clip_config:
158
+ model_path: ???
159
+ tokenizer_path: null
160
+ trust_remote_code: false
161
+ device_id: []
162
+ load_dtype: auto
163
+ max_encode_length: 512
164
+ normalize: false
165
+ convert_to_rgb: false
166
+ jina_config:
167
+ model: jina-embeddings-v3
168
+ base_url: https://api.jina.ai/v1/embeddings
169
+ api_key: jina_9c53f483a93542228e62c02fbc599daeItZmWTUU2zuukZ-O3S3UoVtURxX5
170
+ dimensions: 1024
171
+ task: null
172
+ proxy: null
173
+ ollama_config:
174
+ model_name: ???
175
+ base_url: ???
176
+ prompt: null
177
+ verbose: false
178
+ embedding_size: 768
179
+ allow_parallel: true
180
+ openai_config:
181
+ is_azure: false
182
+ model_name: ???
183
+ base_url: null
184
+ api_key: EMPTY
185
+ api_version: 2024-07-01-preview
186
+ verbose: false
187
+ proxy: null
188
+ dimension: null
189
+ sentence_transformer_config:
190
+ model_path: ???
191
+ device_id: []
192
+ trust_remote_code: false
193
+ task: null
194
+ prompt_name: null
195
+ prompt: null
196
+ prompt_dict: null
197
+ normalize: false
198
+ model_kwargs: {}
199
+ refine_factor: 10
200
+ encode_fields:
201
+ - text
database.lance/_transactions/0-9f490a28-83ad-48b4-a4ca-abfd75153d5b.txn ADDED
Binary file (393 Bytes). View file
 
database.lance/_versions/1.manifest ADDED
Binary file (1.43 kB). View file
 
database.lance/data/4b860054-e1c3-4e40-8f0e-7f2eeb399c54.lance ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a1577787477721f0fe6ac3b19195c22e98370ecb665c3403daece49445c34c
3
+ size 230676568
retriever.id ADDED
@@ -0,0 +1 @@
 
 
1
+ DenseRetriever