Frinkleko
/

opensearch-doc-v3-gte-finetune-limit-samples-1700

Feature Extraction

sentence-transformers

Generated from Trainer

dataset_size:1700

loss:SpladeLoss

loss:SparseMultipleNegativesRankingLoss

Model card Files Files and versions Community

Frinkleko commited on 3 days ago

Commit

666751a

·

verified ·

1 Parent(s): 5884feb

update metrics and citations

Files changed (1) hide show

README.md +64 -0

README.md CHANGED Viewed

@@ -65,6 +65,36 @@ SparseEncoder(
 )
 ```
 ## Usage
 ### Direct Usage (Sentence Transformers)
@@ -324,6 +354,40 @@ You can finetune this model on your own dataset.
 }
 ```
 #### SpladeLoss
 ```bibtex
 @misc{formal2022distillationhardnegativesampling,

 )
 ```
+### Metrics
+```
+{
+    "NDCG": {
+        "NDCG@2": 0.90484,
+        "NDCG@10": 0.91822,
+        "NDCG@20": 0.9204,
+        "NDCG@100": 0.92605
+    },
+    "MAP": {
+        "MAP@2": 0.90125,
+        "MAP@10": 0.91146,
+        "MAP@20": 0.91216,
+        "MAP@100": 0.91316
+    },
+    "Recall": {
+        "Recall@2": 0.9045,
+        "Recall@10": 0.931,
+        "Recall@20": 0.938,
+        "Recall@100": 0.963
+    },
+    "Precision": {
+        "P@2": 0.9045,
+        "P@10": 0.1862,
+        "P@20": 0.0938,
+        "P@100": 0.01926
+    }
+}
+```
 ## Usage
 ### Direct Usage (Sentence Transformers)
 }
 ```
+#### LIMIT
+@misc{weller2025theoreticallimit,
+      title={On the Theoretical Limitations of Embedding-Based Retrieval},
+      author={Orion Weller and Michael Boratko and Iftekhar Naim and Jinhyuk Lee},
+      year={2025},
+      eprint={2508.21038},
+      archivePrefix={arXiv},
+      primaryClass={cs.IR},
+      url={https://arxiv.org/abs/2508.21038},
+}
+#### OpenSearch Models
+@inproceedings{Shen_2025, series={SIGIR ’25},
+   title={Exploring $\ell_0$ parsification for Inference-free Sparse Retrievers},
+   url={http://dx.doi.org/10.1145/3726302.3730192},
+   DOI={10.1145/3726302.3730192},
+   booktitle={Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval},
+   publisher={ACM},
+   author={Shen, Xinjie and Geng, Zhichao and Yang, Yang},
+   year={2025},
+   month=jul, pages={2572–2576},
+   collection={SIGIR ’25}
+}
+@misc{geng2025competitivesearchrelevanceinferencefree,
+      title={Towards Competitive Search Relevance For Inference-Free Learned Sparse Retrievers},
+      author={Zhichao Geng and Yiwen Wang and Dongyu Ru and Yang Yang},
+      year={2025},
+      eprint={2411.04403},
+      archivePrefix={arXiv},
+      primaryClass={cs.IR},
+      url={https://arxiv.org/abs/2411.04403},
+}
 #### SpladeLoss
 ```bibtex
 @misc{formal2022distillationhardnegativesampling,