Add new CrossEncoder model

Browse files

Files changed (7) hide show

README.md +505 -0
config.json +34 -0
model.safetensors +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,505 @@

+---
+language:
+- en
+tags:
+- sentence-transformers
+- cross-encoder
+- reranker
+- generated_from_trainer
+- dataset_size:78704
+- loss:ListNetLoss
+base_model: bansalaman18/bert-uncased_L-10_H-256_A-4
+datasets:
+- microsoft/ms_marco
+pipeline_tag: text-ranking
+library_name: sentence-transformers
+metrics:
+- map
+- mrr@10
+- ndcg@10
+model-index:
+- name: CrossEncoder based on bansalaman18/bert-uncased_L-10_H-256_A-4
+  results:
+  - task:
+      type: cross-encoder-reranking
+      name: Cross Encoder Reranking
+    dataset:
+      name: NanoMSMARCO R100
+      type: NanoMSMARCO_R100
+    metrics:
+    - type: map
+      value: 0.0654
+      name: Map
+    - type: mrr@10
+      value: 0.039
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.0574
+      name: Ndcg@10
+  - task:
+      type: cross-encoder-reranking
+      name: Cross Encoder Reranking
+    dataset:
+      name: NanoNFCorpus R100
+      type: NanoNFCorpus_R100
+    metrics:
+    - type: map
+      value: 0.2752
+      name: Map
+    - type: mrr@10
+      value: 0.3973
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.2485
+      name: Ndcg@10
+  - task:
+      type: cross-encoder-reranking
+      name: Cross Encoder Reranking
+    dataset:
+      name: NanoNQ R100
+      type: NanoNQ_R100
+    metrics:
+    - type: map
+      value: 0.0653
+      name: Map
+    - type: mrr@10
+      value: 0.0417
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.0648
+      name: Ndcg@10
+  - task:
+      type: cross-encoder-nano-beir
+      name: Cross Encoder Nano BEIR
+    dataset:
+      name: NanoBEIR R100 mean
+      type: NanoBEIR_R100_mean
+    metrics:
+    - type: map
+      value: 0.1353
+      name: Map
+    - type: mrr@10
+      value: 0.1593
+      name: Mrr@10
+    - type: ndcg@10
+      value: 0.1235
+      name: Ndcg@10
+---
+# CrossEncoder based on bansalaman18/bert-uncased_L-10_H-256_A-4
+This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [bansalaman18/bert-uncased_L-10_H-256_A-4](https://huggingface.co/bansalaman18/bert-uncased_L-10_H-256_A-4) on the [ms_marco](https://huggingface.co/datasets/microsoft/ms_marco) dataset using the [sentence-transformers](https://www.SBERT.net) library. It computes scores for pairs of texts, which can be used for text reranking and semantic search.
+## Model Details
+### Model Description
+- **Model Type:** Cross Encoder
+- **Base model:** [bansalaman18/bert-uncased_L-10_H-256_A-4](https://huggingface.co/bansalaman18/bert-uncased_L-10_H-256_A-4) <!-- at revision 2c743a1678c7e2a9a2ba9cda4400b08cfa7054fc -->
+- **Maximum Sequence Length:** 512 tokens
+- **Number of Output Labels:** 1 label
+- **Training Dataset:**
+    - [ms_marco](https://huggingface.co/datasets/microsoft/ms_marco)
+- **Language:** en
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Documentation:** [Cross Encoder Documentation](https://www.sbert.net/docs/cross_encoder/usage/usage.html)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Cross Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=cross-encoder)
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import CrossEncoder
+# Download from the 🤗 Hub
+model = CrossEncoder("rahulseetharaman/reranker-msmarco-v1.1-bert-uncased_L-10_H-256_A-4-listnet")
+# Get scores for pairs of texts
+pairs = [
+    ['what does sertraline treat', 'Sertraline is used to treat depression, obsessive compulsive disorder (OCD), panic disorder, premenstrual dysphoric disorder (PMDD), posttraumatic stress disorder (PTSD), and social anxiety disorder (SAD). Sertraline belongs to a group of medicines known as selective serotonin reuptake inhibitors (SSRIs). '],
+    ['what does sertraline treat', 'Sertraline is used for a number of conditions including: major depression, obsessive-compulsive disorder (OCD), body dysmorphic disorder (BDD), posttraumatic stress disorder (PTSD), premenstrual dysphoric disorder (PMDD), panic disorder and social phobia (social anxiety disorder). It was introduced to the market by Pfizer in 1991. Sertraline is primarily prescribed for major depressive disorder in adult outpatients as well as obsessive-compulsive disorder, panic disorder, and social anxiety disorder, in both adults and children.'],
+    ['what does sertraline treat', 'Zoloft is the brand name of sertraline, an antidepressant used to treat major depressive disorders. Zoloft is in a class of antidepressants known as selective serotonin reuptake inhibitors (SSRIs). They work by controlling levels of serotonin (a neurotransmitter) in the brain. A: Zoloft (sertraline) is a medication that is used to treat depression or anxiety. This medication is in the family of drugs called SSRIs and works by bringing a balance to serotonin in the brain that is causing your condition.'],
+    ['what does sertraline treat', 'A: Zoloft (sertraline) is a type of antidepressant known as a selective serotonin reuptake inhibitor (SSRI). It is commonly used to treat depression, social anxiety disorder, posttraumatic stress disorder (PTSD), panic disorder and obsessive-compulsive disorder (OCD). A: Zoloft (sertraline) is a medication that is used to treat depression or anxiety. This medication is in the family of drugs called SSRIs and works by bringing a balance to serotonin in the brain that is causing your condition.'],
+    ['what does sertraline treat', 'A Stacy Wiegman, PharmD, Pharmacy, answered. Sertraline is an antidepressant that treats the symptoms of different psychological disorders, including obsessive-compulsive disorder (OCD), by increasing serotonin and balancing chemicals in the brain. Sertraline is classified as a selective serotonin reuptake inhibitor (SSRI). '],
+]
+scores = model.predict(pairs)
+print(scores.shape)
+# (5,)
+# Or rank different texts based on similarity to a single text
+ranks = model.rank(
+    'what does sertraline treat',
+    [
+        'Sertraline is used to treat depression, obsessive compulsive disorder (OCD), panic disorder, premenstrual dysphoric disorder (PMDD), posttraumatic stress disorder (PTSD), and social anxiety disorder (SAD). Sertraline belongs to a group of medicines known as selective serotonin reuptake inhibitors (SSRIs). ',
+        'Sertraline is used for a number of conditions including: major depression, obsessive-compulsive disorder (OCD), body dysmorphic disorder (BDD), posttraumatic stress disorder (PTSD), premenstrual dysphoric disorder (PMDD), panic disorder and social phobia (social anxiety disorder). It was introduced to the market by Pfizer in 1991. Sertraline is primarily prescribed for major depressive disorder in adult outpatients as well as obsessive-compulsive disorder, panic disorder, and social anxiety disorder, in both adults and children.',
+        'Zoloft is the brand name of sertraline, an antidepressant used to treat major depressive disorders. Zoloft is in a class of antidepressants known as selective serotonin reuptake inhibitors (SSRIs). They work by controlling levels of serotonin (a neurotransmitter) in the brain. A: Zoloft (sertraline) is a medication that is used to treat depression or anxiety. This medication is in the family of drugs called SSRIs and works by bringing a balance to serotonin in the brain that is causing your condition.',
+        'A: Zoloft (sertraline) is a type of antidepressant known as a selective serotonin reuptake inhibitor (SSRI). It is commonly used to treat depression, social anxiety disorder, posttraumatic stress disorder (PTSD), panic disorder and obsessive-compulsive disorder (OCD). A: Zoloft (sertraline) is a medication that is used to treat depression or anxiety. This medication is in the family of drugs called SSRIs and works by bringing a balance to serotonin in the brain that is causing your condition.',
+        'A Stacy Wiegman, PharmD, Pharmacy, answered. Sertraline is an antidepressant that treats the symptoms of different psychological disorders, including obsessive-compulsive disorder (OCD), by increasing serotonin and balancing chemicals in the brain. Sertraline is classified as a selective serotonin reuptake inhibitor (SSRI). ',
+    ]
+)
+# [{'corpus_id': ..., 'score': ...}, {'corpus_id': ..., 'score': ...}, ...]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Cross Encoder Reranking
+* Datasets: `NanoMSMARCO_R100`, `NanoNFCorpus_R100` and `NanoNQ_R100`
+* Evaluated with [<code>CrossEncoderRerankingEvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderRerankingEvaluator) with these parameters:
+  ```json
+  {
+      "at_k": 10,
+      "always_rerank_positives": true
+  }
+  ```
+| Metric      | NanoMSMARCO_R100     | NanoNFCorpus_R100    | NanoNQ_R100          |
+|:------------|:---------------------|:---------------------|:---------------------|
+| map         | 0.0654 (-0.4242)     | 0.2752 (+0.0142)     | 0.0653 (-0.3543)     |
+| mrr@10      | 0.0390 (-0.4385)     | 0.3973 (-0.1026)     | 0.0417 (-0.3850)     |
+| **ndcg@10** | **0.0574 (-0.4831)** | **0.2485 (-0.0765)** | **0.0648 (-0.4359)** |
+#### Cross Encoder Nano BEIR
+* Dataset: `NanoBEIR_R100_mean`
+* Evaluated with [<code>CrossEncoderNanoBEIREvaluator</code>](https://sbert.net/docs/package_reference/cross_encoder/evaluation.html#sentence_transformers.cross_encoder.evaluation.CrossEncoderNanoBEIREvaluator) with these parameters:
+  ```json
+  {
+      "dataset_names": [
+          "msmarco",
+          "nfcorpus",
+          "nq"
+      ],
+      "rerank_k": 100,
+      "at_k": 10,
+      "always_rerank_positives": true
+  }
+  ```
+| Metric      | Value                |
+|:------------|:---------------------|
+| map         | 0.1353 (-0.2548)     |
+| mrr@10      | 0.1593 (-0.3087)     |
+| **ndcg@10** | **0.1235 (-0.3318)** |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### ms_marco
+* Dataset: [ms_marco](https://huggingface.co/datasets/microsoft/ms_marco) at [a47ee7a](https://huggingface.co/datasets/microsoft/ms_marco/tree/a47ee7aae8d7d466ba15f9f0bfac3b3681087b3a)
+* Size: 78,704 training samples
+* Columns: <code>query</code>, <code>docs</code>, and <code>labels</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | query                                                                                           | docs                                                                                   | labels                                                                                 |
+  |:--------|:------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|
+  | type    | string                                                                                          | list                                                                                   | list                                                                                   |
+  | details | <ul><li>min: 10 characters</li><li>mean: 34.03 characters</li><li>max: 109 characters</li></ul> | <ul><li>min: 1 elements</li><li>mean: 5.89 elements</li><li>max: 10 elements</li></ul> | <ul><li>min: 1 elements</li><li>mean: 5.89 elements</li><li>max: 10 elements</li></ul> |
+* Samples:
+  | query                                           | docs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | labels                            |
+  |:------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------|
+  | <code>what is vascular tissue in a plant</code> | <code>['Vascular tissue. vascular tissue. (botany) plant tissue that transports nutrients and water throughout a plant; as veins and arteries are to animals. The two kind of vascular tissue are xylem (used mainly for water) and phloem (used more for nutrients). ', 'The primary components of vascular tissue are the xylem and phloem. These two tissues transport fluid and nutrients internally. There are also two meristems associated with vascular tissue: the vascular cambium and the cork cambium. All the vascular tissues within a particular plant together constitute the vascular tissue system of that plant. The cells in vascular tissue are typically long and slender. Since the xylem and phloem function in the conduction of water, minerals, and nutrients throughout the plant, it is not surprising that their form should be similar to pipes. The vascular tissue in plants is arranged in long, discrete strands called vascular bundles. These bundles include both xylem and phloem, as well as supportin...</code> | <code>[1, 1, 0, 0, 0, ...]</code> |
+  | <code>what is lithotripsy</code>                | <code>['Lithotripsy is the use of high-energy shock waves to fragment and disintegrate kidney stones. The shock wave, created by using a high-voltage spark or an electromagnetic impulse outside of the body, is focused on the stone. The shock wave shatters the stone, allowing the fragments to pass through the urinary system. Description. Lithotripsy uses the technique of focused shock waves to fragment a stone in the kidney or the ureter. The affected person is placed in a tub of water or in contact with a water-filled cushion.', 'Overview. Lithotripsy is a medical procedure used to treat kidney stones. It may also be used to treat stones in other organs, such as the gall bladder or the liver. Kidney stones are collections of solid minerals that sometimes form in the kidneys. Healthy kidneys do not have these stone-like formations', 'Lithotripsy is a procedure that uses shock waves to break up stones in the kidney, bladder, or ureter (tube that carries urine from your kidneys to your bladder)...</code> | <code>[1, 1, 0, 0, 0, ...]</code> |
+  | <code>what is an eye</code>                     | <code>['A compound eye may consist of thousands of individual photoreceptor units or ommatidia (ommatidium, singular). The image perceived is a combination of inputs from the numerous ommatidia (individual eye units), which are located on a convex surface, thus pointing in slightly different directions. The eye of a red-tailed hawk. Visual acuity, or resolving power, is the ability to distinguish fine detail and is the property of cone cells. It is often measured in cycles per degree (CPD), which measures an angular resolution, or how much an eye can differentiate one object from another in terms of visual angles.', '1 The iris is the colored part of the eye (most often blue or brown). 2  It surrounds the pupil, the small opening that lets light enter the eyeball. 3  The choroid is a thin, pigmented layer lining the eyeball that nourishes the retina and the front of the eye with blood. Intraocular melanoma (melanoma of the eye). Intraocular melanoma is the most common type of cancer that dev...</code> | <code>[1, 0, 0, 0, 0, ...]</code> |
+* Loss: [<code>ListNetLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#listnetloss) with these parameters:
+  ```json
+  {
+      "activation_fn": "torch.nn.modules.linear.Identity",
+      "mini_batch_size": 16
+  }
+  ```
+### Evaluation Dataset
+#### ms_marco
+* Dataset: [ms_marco](https://huggingface.co/datasets/microsoft/ms_marco) at [a47ee7a](https://huggingface.co/datasets/microsoft/ms_marco/tree/a47ee7aae8d7d466ba15f9f0bfac3b3681087b3a)
+* Size: 1,000 evaluation samples
+* Columns: <code>query</code>, <code>docs</code>, and <code>labels</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | query                                                                                          | docs                                                                                   | labels                                                                                 |
+  |:--------|:-----------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|
+  | type    | string                                                                                         | list                                                                                   | list                                                                                   |
+  | details | <ul><li>min: 8 characters</li><li>mean: 34.34 characters</li><li>max: 110 characters</li></ul> | <ul><li>min: 3 elements</li><li>mean: 6.50 elements</li><li>max: 10 elements</li></ul> | <ul><li>min: 3 elements</li><li>mean: 6.50 elements</li><li>max: 10 elements</li></ul> |
+* Samples:
+  | query                                                                  | docs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | labels                            |
+  |:-----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------|
+  | <code>what does sertraline treat</code>                                | <code>['Sertraline is used to treat depression, obsessive compulsive disorder (OCD), panic disorder, premenstrual dysphoric disorder (PMDD), posttraumatic stress disorder (PTSD), and social anxiety disorder (SAD). Sertraline belongs to a group of medicines known as selective serotonin reuptake inhibitors (SSRIs). ', 'Sertraline is used for a number of conditions including: major depression, obsessive-compulsive disorder (OCD), body dysmorphic disorder (BDD), posttraumatic stress disorder (PTSD), premenstrual dysphoric disorder (PMDD), panic disorder and social phobia (social anxiety disorder). It was introduced to the market by Pfizer in 1991. Sertraline is primarily prescribed for major depressive disorder in adult outpatients as well as obsessive-compulsive disorder, panic disorder, and social anxiety disorder, in both adults and children.', 'Zoloft is the brand name of sertraline, an antidepressant used to treat major depressive disorders. Zoloft is in a class of antidepressants known as ...</code> | <code>[1, 0, 0, 0, 0, ...]</code> |
+  | <code>can i take just hand luggage and a handbag on thomas cook</code> | <code>["With regard to hand luggage, you can take one bag each max 5kg per person and the bag must not be above a certain size. Your handbag will be counted as your hand luggage bag. The official rules for Thomas Cook Airlines are..... I have checked the paper work and it says we are flying Thomas Cook Airlines All passengers receive a complementary hand baggage allowance of 5kgs when travelling on a Thomas Cook Airline flights. This time it is about luggage. There are 4 of us (2 adults 2 children) when we booked the holiday with Thomas Cook we were told it was 15kg per person for hold luggage and 5kg per person for hand luggage. He rep said we could add the 15kg's together and have 2x 30kg suitcases.", "Advice on hold/hand luggage please! Just me again, with yet ANOTHER question LOL. This time it is about luggage. There are 4 of us (2 adults 2 children) when we booked the holiday with Thomas Cook we were told it was 15kg per person for hold luggage and 5kg per person for hand luggage. He re...</code> | <code>[1, 0, 0, 0, 0, ...]</code> |
+  | <code>what causes neisseria gonorrhoeae</code>                         | <code>['Neisseria gonorrhoeae. by Yen Lemire. Introduction. Neisseria gonorrhoeae is the obligate human pathogen that causes the sexually transmitted disease (STD) gonorrhea. This Gram-negative diplococci/gonococci does not infect other animals or experimental animals and does not survive freely in the environment', 'Neisseria gonorrhoeae, also known as gonococci (plural), or gonococcus (singular), is a species of Gram-negative coffee bean-shaped diplococci bacteria responsible for the sexually transmitted infection gonorrhea. ', 'Gonorrhea is a sexually transmitted disease (STD) that can infect both men and women. It can cause infections in the genitals, rectum, and throat. It is a very common infection, especially among young people ages 15-24 years. ', 'Background. Gonorrhea is a sexually transmitted disease caused by Neisseria gonorrhoeae, a bacterium that can infect areas of the reproductive tract, including the cervix, uterus, and fallopian tubes in women, and the urethra, mouth, throa...</code> | <code>[1, 0, 0, 0, 0, ...]</code> |
+* Loss: [<code>ListNetLoss</code>](https://sbert.net/docs/package_reference/cross_encoder/losses.html#listnetloss) with these parameters:
+  ```json
+  {
+      "activation_fn": "torch.nn.modules.linear.Identity",
+      "mini_batch_size": 16
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `warmup_ratio`: 0.1
+- `seed`: 12
+- `bf16`: True
+- `load_best_model_at_end`: True
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 12
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: proportional
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch      | Step     | Training Loss | Validation Loss | NanoMSMARCO_R100_ndcg@10 | NanoNFCorpus_R100_ndcg@10 | NanoNQ_R100_ndcg@10  | NanoBEIR_R100_mean_ndcg@10 |
+|:----------:|:--------:|:-------------:|:---------------:|:------------------------:|:-------------------------:|:--------------------:|:--------------------------:|
+| -1         | -1       | -             | -               | 0.0797 (-0.4607)         | 0.2817 (-0.0434)          | 0.0302 (-0.4704)     | 0.1305 (-0.3248)           |
+| 0.0002     | 1        | 2.0922        | -               | -                        | -                         | -                    | -                          |
+| 0.0508     | 250      | 2.0905        | -               | -                        | -                         | -                    | -                          |
+| 0.1016     | 500      | 2.0908        | 2.1004          | 0.0181 (-0.5223)         | 0.2538 (-0.0713)          | 0.0256 (-0.4751)     | 0.0991 (-0.3562)           |
+| 0.1525     | 750      | 2.0904        | -               | -                        | -                         | -                    | -                          |
+| 0.2033     | 1000     | 2.0849        | 2.1000          | 0.0400 (-0.5004)         | 0.2665 (-0.0586)          | 0.0176 (-0.4830)     | 0.1080 (-0.3474)           |
+| 0.2541     | 1250     | 2.0934        | -               | -                        | -                         | -                    | -                          |
+| 0.3049     | 1500     | 2.087         | 2.0992          | 0.0393 (-0.5011)         | 0.2181 (-0.1069)          | 0.0503 (-0.4504)     | 0.1026 (-0.3528)           |
+| 0.3558     | 1750     | 2.0929        | -               | -                        | -                         | -                    | -                          |
+| 0.4066     | 2000     | 2.089         | 2.0989          | 0.0447 (-0.4957)         | 0.2442 (-0.0808)          | 0.0450 (-0.4557)     | 0.1113 (-0.3441)           |
+| 0.4574     | 2250     | 2.0888        | -               | -                        | -                         | -                    | -                          |
+| 0.5082     | 2500     | 2.0865        | 2.0988          | 0.0393 (-0.5011)         | 0.2211 (-0.1040)          | 0.0424 (-0.4582)     | 0.1009 (-0.3544)           |
+| 0.5591     | 2750     | 2.0858        | -               | -                        | -                         | -                    | -                          |
+| 0.6099     | 3000     | 2.0825        | 2.0985          | 0.0447 (-0.4957)         | 0.2312 (-0.0938)          | 0.0569 (-0.4438)     | 0.1109 (-0.3444)           |
+| 0.6607     | 3250     | 2.0859        | -               | -                        | -                         | -                    | -                          |
+| 0.7115     | 3500     | 2.0905        | 2.0984          | 0.0447 (-0.4958)         | 0.2419 (-0.0831)          | 0.0593 (-0.4414)     | 0.1153 (-0.3401)           |
+| 0.7624     | 3750     | 2.0838        | -               | -                        | -                         | -                    | -                          |
+| 0.8132     | 4000     | 2.0883        | 2.0984          | 0.0605 (-0.4799)         | 0.2393 (-0.0858)          | 0.0705 (-0.4302)     | 0.1234 (-0.3320)           |
+| 0.8640     | 4250     | 2.0885        | -               | -                        | -                         | -                    | -                          |
+| **0.9148** | **4500** | **2.0832**    | **2.0984**      | **0.0574 (-0.4831)**     | **0.2485 (-0.0765)**      | **0.0648 (-0.4359)** | **0.1235 (-0.3318)**       |
+| 0.9656     | 4750     | 2.0815        | -               | -                        | -                         | -                    | -                          |
+| -1         | -1       | -             | -               | 0.0574 (-0.4831)         | 0.2485 (-0.0765)          | 0.0648 (-0.4359)     | 0.1235 (-0.3318)           |
+* The bold row denotes the saved checkpoint.
+### Framework Versions
+- Python: 3.10.18
+- Sentence Transformers: 5.0.0
+- Transformers: 4.56.0.dev0
+- PyTorch: 2.7.1+cu126
+- Accelerate: 1.9.0
+- Datasets: 4.0.0
+- Tokenizers: 0.21.4
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### ListNetLoss
+```bibtex
+@inproceedings{cao2007learning,
+    title={Learning to Rank: From Pairwise Approach to Listwise Approach},
+    author={Cao, Zhe and Qin, Tao and Liu, Tie-Yan and Tsai, Ming-Feng and Li, Hang},
+    booktitle={Proceedings of the 24th international conference on Machine learning},
+    pages={129--136},
+    year={2007}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 256,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 1024,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 10,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "sentence_transformers": {
+    "activation_fn": "torch.nn.modules.activation.Sigmoid",
+    "version": "5.0.0"
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.56.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:975b6dc833dcae94a9e51d134bbc667aeaa11628b07d51747dd56a7ad54f4cb4
+size 63656924

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff