Add new SentenceTransformer model.
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +883 -0
- added_tokens.json +3 -0
- config.json +35 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +15 -0
- spm.model +3 -0
- tokenizer.json +3 -0
- tokenizer_config.json +858 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,883 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: microsoft/mdeberta-v3-base
|
3 |
+
library_name: sentence-transformers
|
4 |
+
metrics:
|
5 |
+
- cosine_accuracy@1
|
6 |
+
- cosine_accuracy@3
|
7 |
+
- cosine_accuracy@5
|
8 |
+
- cosine_accuracy@10
|
9 |
+
- cosine_precision@1
|
10 |
+
- cosine_precision@3
|
11 |
+
- cosine_precision@5
|
12 |
+
- cosine_precision@10
|
13 |
+
- cosine_recall@1
|
14 |
+
- cosine_recall@3
|
15 |
+
- cosine_recall@5
|
16 |
+
- cosine_recall@10
|
17 |
+
- cosine_ndcg@10
|
18 |
+
- cosine_mrr@10
|
19 |
+
- cosine_map@100
|
20 |
+
- dot_accuracy@1
|
21 |
+
- dot_accuracy@3
|
22 |
+
- dot_accuracy@5
|
23 |
+
- dot_accuracy@10
|
24 |
+
- dot_precision@1
|
25 |
+
- dot_precision@3
|
26 |
+
- dot_precision@5
|
27 |
+
- dot_precision@10
|
28 |
+
- dot_recall@1
|
29 |
+
- dot_recall@3
|
30 |
+
- dot_recall@5
|
31 |
+
- dot_recall@10
|
32 |
+
- dot_ndcg@10
|
33 |
+
- dot_mrr@10
|
34 |
+
- dot_map@100
|
35 |
+
pipeline_tag: sentence-similarity
|
36 |
+
tags:
|
37 |
+
- sentence-transformers
|
38 |
+
- sentence-similarity
|
39 |
+
- feature-extraction
|
40 |
+
- generated_from_trainer
|
41 |
+
- dataset_size:110575
|
42 |
+
- loss:MultipleNegativesRankingLoss
|
43 |
+
widget:
|
44 |
+
- source_sentence: plant-based diets
|
45 |
+
sentences:
|
46 |
+
- Effect of a low-fat high-carbohydrate diet on symptoms of cyclical mastopathy.
|
47 |
+
21 patients with severe persistent cyclical mastopathy of at least 5 years' duration
|
48 |
+
were randomised to a control group who received general dietary advice or to an
|
49 |
+
intervention group who were taught how to reduce the fat content of their diet
|
50 |
+
to 15% of calories while increasing complex carbohydrate consumption to maintain
|
51 |
+
caloric intake. Both groups were followed for 6 months with food records and measurement
|
52 |
+
of plasma hormone and lipid levels. Severity of symptoms was recorded with daily
|
53 |
+
diaries and patients were assessed at the beginning and end of the study by a
|
54 |
+
physician who was unaware of their dietary regimen. After 6 months there was a
|
55 |
+
significant reduction in the intervention group in the severity of premenstrual
|
56 |
+
breast tenderness and swelling. Physical examination showed reduced breast swelling,
|
57 |
+
tenderness, and nodularity in 6 of 10 patients in the intervention group and 2
|
58 |
+
of 9 patients in the control group.
|
59 |
+
- 'Cranberries and cranberry products: powerful in vitro, ex vivo, and in vivo sources
|
60 |
+
of antioxidants. Cranberry products and especially cranberry juice (CJ) have been
|
61 |
+
consumed for health reasons primarily due to their effect on urinary tract infections.
|
62 |
+
We investigated the quantity of both free and total (after hydrolysis) phenolic
|
63 |
+
antioxidants in cranberry products using the Folin assay. The order of amount
|
64 |
+
of total polyphenols in cranberry foods on a fresh weight basis was as follows:
|
65 |
+
dried > frozen > sauce > jellied sauce. On a serving size basis for all cranberry
|
66 |
+
products, the order was as follows: frozen > 100% juice > dried > 27% juice >
|
67 |
+
sauce > jellied sauce. High fructose corn syrup (HFCS) is a major source of sugar
|
68 |
+
consumption in the U.S. and contains both glucose and fructose, potential mediators
|
69 |
+
of oxidative stress. We investigated the effect of the consumption of HFCS and
|
70 |
+
ascorbate with CJ antioxidants or without CJ (control) given to 10 normal individuals
|
71 |
+
after an overnight fast. Plasma antioxidant capacity, glucose, triglycerides,
|
72 |
+
and ascorbate were measured 6 times over 7 h after the consumption of a single
|
73 |
+
240 mL serving of the two different beverages. The control HFCS caused a slight
|
74 |
+
decrease in plasma antioxidant capacity at all time points and thus an oxidative
|
75 |
+
stress in spite of the presence of ascorbate. CJ produced an increase in plasma
|
76 |
+
antioxidant capacity that was significantly greater than control HFCS at all time
|
77 |
+
points. Postprandial triglycerides, due to fructose in the beverages, were mainly
|
78 |
+
responsible for the oxidative stress and were significantly correlated with the
|
79 |
+
oxidative stress as measured by the antioxidant capacity. Cranberries are an excellent
|
80 |
+
source of high quality antioxidants and should be examined in human supplementation
|
81 |
+
studies.'
|
82 |
+
- Açai Palm Fruit (Euterpe oleracea Mart.) Pulp Improves Survival of Flies on a
|
83 |
+
High Fat Diet Reducing oxidative damage is thought to be an effective aging intervention.
|
84 |
+
Açai, a fruit indigenous to the Amazon, is rich in phytochemicals that possesses
|
85 |
+
high anti-oxidant activities, and has anti-inflammatory, anti-cancer and anti-cardiovascular
|
86 |
+
disease properties. However, little is known about its potential anti-aging properties
|
87 |
+
especially at the organismal level. Here we evaluated the effect of açai pulp
|
88 |
+
on modulating lifespan in Drosophila melanogaster. We found that açai supplementation
|
89 |
+
at 2% in the food increased the lifespan of female flies fed a high fat diet compared
|
90 |
+
to the non-supplemented control. We measured transcript changes induced by açai
|
91 |
+
for age-related genes. Although transcript levels of most genes tested were not
|
92 |
+
altered, açai increased the transcript level of l(2)efl, a small heat-shock-related
|
93 |
+
protein, and two detoxification genes, gstD1 and mtnA, while decreasing the transcript
|
94 |
+
level of phosphoenolpyruvate carboxykinase (Pepck), a key gene involved in gluconeogenesis.
|
95 |
+
Furthermore, açai increased the lifespan of oxidative stressed females caused
|
96 |
+
by sod1 RNAi. This suggests that açai improves survival of flies fed a high fat
|
97 |
+
diet through activation of stress response pathways and suppression of Pepck expression.
|
98 |
+
Açai has the potential to antagonize the detrimental effect of fat in the diet
|
99 |
+
and alleviate oxidative stress in aging.
|
100 |
+
- source_sentence: reproductive health
|
101 |
+
sentences:
|
102 |
+
- 'Do national advisories serve local consumers: an assessment of mercury in economically
|
103 |
+
important North Carolina fish. Consumption of marine fish provides both benefits
|
104 |
+
(lean protein, omega-3 fatty acids and essential nutrients) and risks (main source
|
105 |
+
of mercury (Hg) exposure for humans). Mercury is a potent neurotoxin and the source
|
106 |
+
of more fish advisories nationwide than any other toxicant. Despite the widespread
|
107 |
+
nature of Hg, it is unknown whether local Hg contamination reflects national and
|
108 |
+
regional levels often used as bases to inform consumers of potential fish consumption
|
109 |
+
risk. Thus, the objectives of our study were to examine Hg levels of six commonly
|
110 |
+
consumed marine species harvested locally off the North Carolina coast and to
|
111 |
+
compare our results to published regional (Monterey Bay Aquarium''s Seafood Watch
|
112 |
+
List) and national (Environmental Protection Agency, EPA, and Food and Drug Administration,
|
113 |
+
FDA) Hg averages, action levels, and guidelines. We found significant differences
|
114 |
+
in Hg concentrations among collected species, and we identified correlations between
|
115 |
+
Hg concentration and fish length and trophic levels. Collected mahi mahi and triggerfish
|
116 |
+
were below the EPA fish tissue action level (0.3ppm). Wahoo and grouper exceeded
|
117 |
+
the EPA action level but were below the FDA action level (1.0ppm). King mackerel
|
118 |
+
had the highest Hg concentration among targeted species, exceeding both EPA and
|
119 |
+
FDA action levels. Further, our local results were not always consistent with
|
120 |
+
calculated averages from EPA and FDA databases for the same species, and although
|
121 |
+
many of our findings were consistent with Monterey Bay Aquarium''s Seafood Watch
|
122 |
+
List (southeast region), recommendations based on Hg levels would conflict with
|
123 |
+
recommendations they provide based on sustainability. We find regional and national
|
124 |
+
averages are not always reflective of local Hg contamination and suggest local
|
125 |
+
data may be needed to accurately assess consumer risk.'
|
126 |
+
- Purple rice (Oryza sativa L.) extract and its constituents inhibit VEGF-induced
|
127 |
+
angiogenesis. The study evaluated the protective effects of purple rice (Oryza
|
128 |
+
sativa L.) bran extract (PRE) and its constituents, cyanidin and peonidin, against
|
129 |
+
angiogenesis induced by vascular endothelial growth factor (VEGF). The effects
|
130 |
+
of VEGF and PRE were examined by in vitro tube formation assays and following
|
131 |
+
14-day co-culture of human umbilical vein endothelial cells (HUVECs) and fibroblasts.
|
132 |
+
The antiangiogenic mechanism of PRE was evaluated by VEGF-induced proliferation
|
133 |
+
and migration of HUVECs and/or human retinal microvascular endothelial cells (HRMECs)
|
134 |
+
and phosphorylation of extracellular signal-regulated kinase (ERK) and p38. The
|
135 |
+
PRE significantly suppressed VEGF-induced tube formation, proliferation and migration
|
136 |
+
in HUVECs and HRMECs as well as phosphorylation of ERK and p38. Cyanidin and peonidin
|
137 |
+
also suppressed the proliferation and migration induced by VEGF. These findings
|
138 |
+
indicate that PRE and anthocyanidins suppress VEGF-induced angiogenesis by inhibiting
|
139 |
+
proliferation and migration and suggest that the inhibition of phosphorylated-ERK
|
140 |
+
and -p38 may be involved in the underlying mechanism. Copyright © 2011 John Wiley
|
141 |
+
& Sons, Ltd.
|
142 |
+
- 'Endurance exercise results in DNA damage as detected by the comet assay. To determine
|
143 |
+
if 6 weeks of supplementation with antioxidants could alleviate exercise-induced
|
144 |
+
DNA damage, we studied 21 runners during a 50 km ultramarathon. Subjects were
|
145 |
+
randomly assigned to one of two groups: (1) placebos (PL) or (2) antioxidants
|
146 |
+
(AO) (1000 mg vitamin C and 400 IU RRR-alpha-tocopheryl acetate). The comet assay
|
147 |
+
was used to assess DNA damage in circulating leukocytes at selected time points:
|
148 |
+
pre-, mid-, and 2 h postrace and daily for 6 days postrace. All subjects completed
|
149 |
+
the race: run time 7.1 +/- 0.1 h, energy expenditure 5008 +/- 80 kcal for women
|
150 |
+
(n = 10) and 6932 +/- 206 kcal for men (n = 11). Overall, the percentage DNA damage
|
151 |
+
increased at midrace (p <.02), but returned to baseline by 2 h postrace, indicating
|
152 |
+
that the exercise bout induced nonpersistent DNA damage. There was a gender x
|
153 |
+
treatment x time interaction (p <.01). One day postrace, women taking AO had 62%
|
154 |
+
less DNA damage than women taking PL (p <.0008). In contrast, there were no statistically
|
155 |
+
significant differences between the two treatment groups of men at any time point.
|
156 |
+
Thus, endurance exercise resulted in DNA damage as shown by the comet assay and
|
157 |
+
AO seemed to enhance recovery in women but not in men.'
|
158 |
+
- source_sentence: immune function
|
159 |
+
sentences:
|
160 |
+
- 'Relationship between the prenatal exposure to low-level of mercury and the size
|
161 |
+
of a newborn''s cerebellum. Exposure to methylmercury at any stage of central
|
162 |
+
nervous system development could induce alterations and result in severe congenital
|
163 |
+
abnormalities. Total mercury level in maternal hair during pregnancy correlates
|
164 |
+
well with blood levels of methylmercury and with total mercury levels in fetal
|
165 |
+
brain. A prospective study has been conducted and a total of 137 childbearing
|
166 |
+
women living at the coastal region with term, normal pregnancies were included
|
167 |
+
and their newborns evaluated by ultrasonography. Mothers and their newborns are
|
168 |
+
divided in two groups according to their hair mercury levels; examined group with
|
169 |
+
high body levels of mercury (≥ 1 μg/g) and control group with low body levels
|
170 |
+
of mercury (<1 μg/g). Neurosonographic examination was conducted to all newborns.
|
171 |
+
Two dimensions of cerebellum in the sagital-medial plane have been measured: maximum
|
172 |
+
height and width starting from the roof of the fourth chamber. Majority of mothers
|
173 |
+
had hair mercury levels lower than 1 μg/g (N = 107). Mean value was 0.88 μg/g
|
174 |
+
(SD 1.24), ranging from 0.02 to 8.71 μg/g. There was no significant difference
|
175 |
+
between the two groups when it comes to the width of cerebellum (Mann-Whitney
|
176 |
+
test: Z = 1471; p = 0.141). However, comparison related to the length of cerebellum
|
177 |
+
shows statistically significant smaller cerebellum in newborns whose mother had
|
178 |
+
hair mercury levels higher than 1 μg/g (Mann-Whitney test: Z = 2329; p = 0.019).
|
179 |
+
Our results lead to a conclusion that prenatal exposure to, what we consider to
|
180 |
+
be, low-levels of methylmercury does influence fetal brain development detected
|
181 |
+
as decreased size of newborn''s cerebellum. From a clinical point of view, a question
|
182 |
+
related to the influence of prenatal low-level methylmercury exposure on fetal
|
183 |
+
neurodevelopment remains open. Our further objectives are to direct the research
|
184 |
+
towards performing detailed neuropshychological tests on children at the age of
|
185 |
+
18 months. Such tests could indicate the presence of subtle neurological or neuropsychological
|
186 |
+
deficits. Copyright © 2010 Elsevier Ltd. All rights reserved.'
|
187 |
+
- 'Radioprotective effects of Zingiber officinale Roscoe (ginger): past, present
|
188 |
+
and future. Radiation is an important modality in treating people with cancer
|
189 |
+
especially when surgical intervention is impracticable or might debilitate the
|
190 |
+
patient. However, effective use of ionizing radiation is compromised by the side
|
191 |
+
effects that result from radiation-induced damage to normal tissue. The use of
|
192 |
+
radioprotective compounds, which can selectively protect normal tissues against
|
193 |
+
radiation injury is of immense use because in addition to association with protecting
|
194 |
+
the normal tissue, it will also permits use of higher doses of radiation to obtain
|
195 |
+
better cancer control and possible cure. However, till date no ideal radioprotectors
|
196 |
+
are available as most synthetic compounds are toxic at their optimal concentrations.
|
197 |
+
Plants commonly used as dietary and or therapeutic agents have recently been the
|
198 |
+
focus of attention since in most cases they are non-toxic and are easily accepted
|
199 |
+
for human use. Ginger, the rhizomes of Zingiber officinale Roscoe (Zingiberaceae),
|
200 |
+
has widely been used as both culinary and medicinal agent. Preclinical studies
|
201 |
+
carried out in the last decade has shown that ginger and its phytochemicals dehydrozingerone,
|
202 |
+
zingerone possess radioprotective effects in laboratory animals and in cultured
|
203 |
+
cells in vitro. The hydroalcoholic extract of ginger rhizome when administered
|
204 |
+
either through intraperitoneal or oral route was effective in protecting against
|
205 |
+
gamma radiation-induced sickness and mortality. The phytochemicals dehydrogingerone
|
206 |
+
and zingerone present in ginger are also shown to protect mice against radiation-induced
|
207 |
+
sickness and mortality. Mechanistic studies have indicated that the free radical
|
208 |
+
scavenging, antioxidant affects, anti-inflammatory and anti-clastogenic effects
|
209 |
+
may contribute towards the observed protection. Additionally, studies with tumor
|
210 |
+
bearing mice have also shown that zingerone selectively protects the normal tissues
|
211 |
+
against the tumoricidal effects of radiation. This review for the first time summarizes
|
212 |
+
the results related to the radioprotective properties and also emphasizes the
|
213 |
+
aspects that warrant future research to establish its activity and utility as
|
214 |
+
a radioprotective agent.'
|
215 |
+
- Traditional non-Western diets. In traditional cultures, balancing health with
|
216 |
+
a balanced lifestyle was a core belief. The diseases of modern civilization were
|
217 |
+
rare. Indigenous people have patterns of illness very different from Western civilization;
|
218 |
+
yet, they rapidly develop diseases once exposed to Western foods and lifestyles.
|
219 |
+
Food and medicine were interwoven. All cultures used special or functional foods
|
220 |
+
to prevent disease. Food could be used at different times either as food or medicine.
|
221 |
+
Foods, cultivation, and cooking methods maximized community health and well-being.
|
222 |
+
With methods passed down through generations, cooking processes were utilized
|
223 |
+
that enhanced mineral and nutrient bioavailability. This article focuses on what
|
224 |
+
researchers observed about the food traditions of indigenous people, their disease
|
225 |
+
patterns, the use of specific foods, and the environmental factors that affect
|
226 |
+
people who still eat traditional foods.
|
227 |
+
- source_sentence: ginger
|
228 |
+
sentences:
|
229 |
+
- Xeno-estrogenic compounds in precipitation. The exposure to some chemicals can
|
230 |
+
lead to hormone disrupting effects. Presently, much attention is focused on so-called
|
231 |
+
xeno-estrogens, synthetic compounds that interact with hormone receptors causing
|
232 |
+
a number of reactions that eventually lead to effects related to reproduction
|
233 |
+
and development. The current study was initiated to investigate the presence of
|
234 |
+
a number of such compounds in precipitation as a follow-up on a previous study
|
235 |
+
in which pesticide concentrations in air and precipitation were determined. Rainwater
|
236 |
+
samples were collected at about 50 locations in The Netherlands in a four week
|
237 |
+
period. The samples were analysed for bisphenol-A, alkylphenols and alkylphenol
|
238 |
+
ethoxylates, phthalates, flame retardants and synthetic musk compounds. The results
|
239 |
+
clearly indicated the presence of these compounds in precipitation. The concentrations
|
240 |
+
ranged from the low ng l(-1) range for flame retardants to several thousands of
|
241 |
+
ng l(-1) for the phthalates. Bisphenol-A was found in 30% of the samples in concentrations
|
242 |
+
up to 130 ng l(-1), while alkylphenols and alkylphenol ethoxylates were found
|
243 |
+
in virtually all locations in concentrations up to 920 ng l(-1) for the individual
|
244 |
+
compounds. Phthalates were by far the most abundant xeno-estrogens in the precipitation
|
245 |
+
samples and were found in every sample. Di-isodecyl phthalate was found in a surprisingly
|
246 |
+
high concentration of almost 100 000 ng l(-1). Polybrominated flame retardants
|
247 |
+
were found in the low ng l(-1) range and generally in less than 20% of the samples.
|
248 |
+
Noticeable was the finding of hexabromocyclododecane, a replacement for the polybrominted
|
249 |
+
diphenyl ethers at one location in a concentration of almost 2000 ng l(-1). Finally,
|
250 |
+
as expected, synthetic musk compounds were detected in almost all samples. This
|
251 |
+
is especially true for the polycyclic musks HHCB and AHTN. Nitro musks were found,
|
252 |
+
but only on a few locations. Kriging techniques were used to calculate precipitation
|
253 |
+
concentrations in between actual sampling locations to produce contour plots for
|
254 |
+
a number of compounds. These plots clearly show located emission sources for a
|
255 |
+
number of compounds such as bisphenol-A, nonylphenol ethoxylate, phthalates and
|
256 |
+
AHTN. On the contrary, the results for HHCB and some phthalates indicated diffuse
|
257 |
+
emission patterns, probably as the result of the use of consumer products containing
|
258 |
+
these compounds.
|
259 |
+
- 'The state of US health, 1990-2010: burden of diseases, injuries, and risk factors.
|
260 |
+
IMPORTANCE: Understanding the major health problems in the United States and how
|
261 |
+
they are changing over time is critical for informing national health policy.
|
262 |
+
OBJECTIVES: To measure the burden of diseases, injuries, and leading risk factors
|
263 |
+
in the United States from 1990 to 2010 and to compare these measurements with
|
264 |
+
those of the 34 countries in the Organisation for Economic Co-operation and Development
|
265 |
+
(OECD) countries. DESIGN: We used the systematic analysis of descriptive epidemiology
|
266 |
+
of 291 diseases and injuries, 1160 sequelae of these diseases and injuries, and
|
267 |
+
67 risk factors or clusters of risk factors from 1990 to 2010 for 187 countries
|
268 |
+
developed for the Global Burden of Disease 2010 Study to describe the health status
|
269 |
+
of the United States and to compare US health outcomes with those of 34 OECD countries.
|
270 |
+
Years of life lost due to premature mortality (YLLs) were computed by multiplying
|
271 |
+
the number of deaths at each age by a reference life expectancy at that age. Years
|
272 |
+
lived with disability (YLDs) were calculated by multiplying prevalence (based
|
273 |
+
on systematic reviews) by the disability weight (based on population-based surveys)
|
274 |
+
for each sequela; disability in this study refers to any short- or long-term loss
|
275 |
+
of health. Disability-adjusted life-years (DALYs) were estimated as the sum of
|
276 |
+
YLDs and YLLs. Deaths and DALYs related to risk factors were based on systematic
|
277 |
+
reviews and meta-analyses of exposure data and relative risks for risk-outcome
|
278 |
+
pairs. Healthy life expectancy (HALE) was used to summarize overall population
|
279 |
+
health, accounting for both length of life and levels of ill health experienced
|
280 |
+
at different ages. RESULTS: US life expectancy for both sexes combined increased
|
281 |
+
from 75.2 years in 1990 to 78.2 years in 2010; during the same period, HALE increased
|
282 |
+
from 65.8 years to 68.1 years. The diseases and injuries with the largest number
|
283 |
+
of YLLs in 2010 were ischemic heart disease, lung cancer, stroke, chronic obstructive
|
284 |
+
pulmonary disease, and road injury. Age-standardized YLL rates increased for Alzheimer
|
285 |
+
disease, drug use disorders, chronic kidney disease, kidney cancer, and falls.
|
286 |
+
The diseases with the largest number of YLDs in 2010 were low back pain, major
|
287 |
+
depressive disorder, other musculoskeletal disorders, neck pain, and anxiety disorders.
|
288 |
+
As the US population has aged, YLDs have comprised a larger share of DALYs than
|
289 |
+
have YLLs. The leading risk factors related to DALYs were dietary risks, tobacco
|
290 |
+
smoking, high body mass index, high blood pressure, high fasting plasma glucose,
|
291 |
+
physical inactivity, and alcohol use. Among 34 OECD countries between 1990 and
|
292 |
+
2010, the US rank for the age-standardized death rate changed from 18th to 27th,
|
293 |
+
for the age-standardized YLL rate from 23rd to 28th, for the age-standardized
|
294 |
+
YLD rate from 5th to 6th, for life expectancy at birth from 20th to 27th, and
|
295 |
+
for HALE from 14th to 26th. CONCLUSIONS AND RELEVANCE: From 1990 to 2010, the
|
296 |
+
United States made substantial progress in improving health. Life expectancy at
|
297 |
+
birth and HALE increased, all-cause death rates at all ages decreased, and age-specific
|
298 |
+
rates of years lived with disability remained stable. However, morbidity and chronic
|
299 |
+
disability now account for nearly half of the US health burden, and improvements
|
300 |
+
in population health in the United States have not kept pace with advances in
|
301 |
+
population health in other wealthy nations.'
|
302 |
+
- 'Curcumin as "Curecumin": from kitchen to clinic. Although turmeric (Curcuma longa;
|
303 |
+
an Indian spice) has been described in Ayurveda, as a treatment for inflammatory
|
304 |
+
diseases and is referred by different names in different cultures, the active
|
305 |
+
principle called curcumin or diferuloylmethane, a yellow pigment present in turmeric
|
306 |
+
(curry powder) has been shown to exhibit numerous activities. Extensive research
|
307 |
+
over the last half century has revealed several important functions of curcumin.
|
308 |
+
It binds to a variety of proteins and inhibits the activity of various kinases.
|
309 |
+
By modulating the activation of various transcription factors, curcumin regulates
|
310 |
+
the expression of inflammatory enzymes, cytokines, adhesion molecules, and cell
|
311 |
+
survival proteins. Curcumin also downregulates cyclin D1, cyclin E and MDM2; and
|
312 |
+
upregulates p21, p27, and p53. Various preclinical cell culture and animal studies
|
313 |
+
suggest that curcumin has potential as an antiproliferative, anti-invasive, and
|
314 |
+
antiangiogenic agent; as a mediator of chemoresistance and radioresistance; as
|
315 |
+
a chemopreventive agent; and as a therapeutic agent in wound healing, diabetes,
|
316 |
+
Alzheimer disease, Parkinson disease, cardiovascular disease, pulmonary disease,
|
317 |
+
and arthritis. Pilot phase I clinical trials have shown curcumin to be safe even
|
318 |
+
when consumed at a daily dose of 12g for 3 months. Other clinical trials suggest
|
319 |
+
a potential therapeutic role for curcumin in diseases such as familial adenomatous
|
320 |
+
polyposis, inflammatory bowel disease, ulcerative colitis, colon cancer, pancreatic
|
321 |
+
cancer, hypercholesteremia, atherosclerosis, pancreatitis, psoriasis, chronic
|
322 |
+
anterior uveitis and arthritis. Thus, curcumin, a spice once relegated to the
|
323 |
+
kitchen shelf, has moved into the clinic and may prove to be "Curecumin".'
|
324 |
+
- source_sentence: hot dogs
|
325 |
+
sentences:
|
326 |
+
- 'Occurrence of pharmaceuticals and personal care products in fish: results of
|
327 |
+
a national pilot study in the United States. Pharmaceuticals and personal care
|
328 |
+
products are being increasingly reported in a variety of biological matrices,
|
329 |
+
including fish tissue; however, screening studies have presently not encompassed
|
330 |
+
broad geographical areas. A national pilot study was initiated in the United States
|
331 |
+
to assess the accumulation of pharmaceuticals and personal care products in fish
|
332 |
+
sampled from five effluent-dominated rivers that receive direct discharge from
|
333 |
+
wastewater treatment facilities in Chicago, Illinois; Dallas, Texas; Orlando,
|
334 |
+
Florida; Phoenix, Arizona; and West Chester, Pennsylvania, USA. Fish were also
|
335 |
+
collected from the Gila River, New Mexico, USA, as a reference condition expected
|
336 |
+
to be minimally impacted by anthropogenic influence. High performance liquid chromatography-tandem
|
337 |
+
mass spectrometry analysis of pharmaceuticals revealed the presence of norfluoxetine,
|
338 |
+
sertraline, diphenhydramine, diltiazem, and carbamazepine at nanogram-per-gram
|
339 |
+
concentrations in fillet composites from effluent-dominated sampling locations;
|
340 |
+
the additional presence of fluoxetine and gemfibrozil was confirmed in liver tissue.
|
341 |
+
Sertraline was detected at concentrations as high as 19 and 545 ng/g in fillet
|
342 |
+
and liver tissue, respectively. Gas chromatography-tandem mass spectrometry analysis
|
343 |
+
of personal care products in fillet composites revealed the presence of galaxolide
|
344 |
+
and tonalide at maximum concentrations of 2,100 and 290 ng/g, respectively, and
|
345 |
+
trace levels of triclosan. In general, more pharmaceuticals were detected at higher
|
346 |
+
concentrations and with greater frequency in liver than in fillet tissues. Higher
|
347 |
+
lipid content in liver tissue could not account for this discrepancy as no significant
|
348 |
+
positive correlations were found between accumulated pharmaceutical concentrations
|
349 |
+
and lipid content for either tissue type from any sampling site. In contrast,
|
350 |
+
accumulation of the personal care products galaxolide and tonalide was significantly
|
351 |
+
related to lipid content. Results suggest that the detection of pharmaceuticals
|
352 |
+
and personal care products was dependent on the degree of wastewater treatment
|
353 |
+
employed.'
|
354 |
+
- 'Dietary modification of human macular pigment density. PURPOSE: The retinal carotenoids
|
355 |
+
lutein (L) and zeaxanthin (Z) that form the macular pigment (MP) may help to prevent
|
356 |
+
neovascular age-related macular degeneration. The purpose of this study was to
|
357 |
+
determine whether MP density in the retina could be raised by increasing dietary
|
358 |
+
intake of L and Z from foods. METHODS: Macular pigment was measured psychophysically
|
359 |
+
for 13 subjects. Serum concentrations of L, Z, and beta-carotene were measured
|
360 |
+
by high-performance liquid chromatography. Eleven subjects modified their usual
|
361 |
+
daily diets by adding 60 g of spinach (10.8 mg L, 0.3 mg Z, 5 mg beta-carotene)
|
362 |
+
and ten also added 150 g of corn (0.3 mg Z, 0.4 mg L); two other subjects were
|
363 |
+
given only corn. Dietary modification lasted up to 15 weeks. RESULTS: For the
|
364 |
+
subjects fed spinach or spinach and corn, three types of responses to dietary
|
365 |
+
modification were identified: Eight "retinal responders" had increases in serum
|
366 |
+
L (mean, 33%; SD, 22%) and in MP density (mean, 19%; SD, 11%); two "retinal nonresponders"
|
367 |
+
showed substantial increases in serum L (mean, 31%) but not in MP density (mean,
|
368 |
+
-11%); one "serum and retinal nonresponder" showed no changes in serum L, Z, or
|
369 |
+
beta-carotene and no change in MP density. For the two subjects given only corn,
|
370 |
+
serum L changed little (+11%, -6%), but in one subject serum Z increased (70%)
|
371 |
+
and MP density increased (25%). CONCLUSIONS: Increases in MP density were obtained
|
372 |
+
within 4 weeks of dietary modification for most, but not all, subjects. When MP
|
373 |
+
density increased with dietary modification, it remained elevated for at least
|
374 |
+
several months after resuming an unmodified diet. Augmentation of MP for both
|
375 |
+
experimental and clinical investigation appears to be feasible for many persons.'
|
376 |
+
- 'Habitual physical exercise has beneficial effects on telomere length in postmenopausal
|
377 |
+
women. OBJECTIVE: It has been reported that women benefit from the maintenance
|
378 |
+
of telomere length by estrogen. Exercise may favorably influence telomere length,
|
379 |
+
although results are inconsistent regarding the duration and type of exercise
|
380 |
+
and the cell type used to measure telomere length. The purpose of this study was
|
381 |
+
to investigate the relationship between habitual physical exercise and telomere
|
382 |
+
length in peripheral blood mononuclear cells (PBMCs) in postmenopausal women.
|
383 |
+
Postmenopausal women were chosen as study participants because they are typically
|
384 |
+
estrogen deficient. METHODS: This experimental-control, cross-sectional study
|
385 |
+
included 44 healthy, nondiabetic, nonsmoking, postmenopausal women. Habitual exercisers
|
386 |
+
and sedentary participants were matched for age and body mass index. Body weight,
|
387 |
+
height, blood pressure, and waist and hip circumference were measured. Mitochondrial
|
388 |
+
DNA copy number and telomere length in PBMCs were determined, and biochemical
|
389 |
+
tests were performed. Habitual physical exercise was defined as combined aerobic
|
390 |
+
and resistance exercise performed for at least 60 minutes per session more than
|
391 |
+
three times a week for more than 12 months. RESULTS: The mean age of all participants
|
392 |
+
was 58.11 ± 6.84 years, and participants in the habitual exercise group had been
|
393 |
+
exercising more than three times per week for an average of 19.23 ± 5.15 months.
|
394 |
+
Serum triglyceride levels (P = 0.01), fasting insulin concentrations (P < 0.01),
|
395 |
+
and homeostasis model assessment of insulin resistance (P < 0.01) were significantly
|
396 |
+
lower and high-density lipoprotein cholesterol levels (P < 0.01), circulating
|
397 |
+
adiponectin (P < 0.01), mitochondrial DNA copy number (P < 0.01), and telomere
|
398 |
+
length (P < 0.01) were significantly higher in the habitual exercise group than
|
399 |
+
in the sedentary group. In a stepwise multiple regression analysis, habitual exercise
|
400 |
+
(β = 0.522, P < 0.01) and adiponectin levels (β = 0.139, P = 0.03) were the independent
|
401 |
+
factors associated with the telomere length of PBMCs in postmenopausal women.
|
402 |
+
CONCLUSIONS: Habitual physical exercise is associated with greater telomere length
|
403 |
+
in postmenopausal women. This finding suggests that habitual physical exercise
|
404 |
+
in postmenopausal women may reduce telomere attrition.'
|
405 |
+
model-index:
|
406 |
+
- name: SentenceTransformer based on microsoft/mdeberta-v3-base
|
407 |
+
results:
|
408 |
+
- task:
|
409 |
+
type: information-retrieval
|
410 |
+
name: Information Retrieval
|
411 |
+
dataset:
|
412 |
+
name: eval
|
413 |
+
type: eval
|
414 |
+
metrics:
|
415 |
+
- type: cosine_accuracy@1
|
416 |
+
value: 0.19753086419753085
|
417 |
+
name: Cosine Accuracy@1
|
418 |
+
- type: cosine_accuracy@3
|
419 |
+
value: 0.3117283950617284
|
420 |
+
name: Cosine Accuracy@3
|
421 |
+
- type: cosine_accuracy@5
|
422 |
+
value: 0.36728395061728397
|
423 |
+
name: Cosine Accuracy@5
|
424 |
+
- type: cosine_accuracy@10
|
425 |
+
value: 0.4074074074074074
|
426 |
+
name: Cosine Accuracy@10
|
427 |
+
- type: cosine_precision@1
|
428 |
+
value: 0.19753086419753085
|
429 |
+
name: Cosine Precision@1
|
430 |
+
- type: cosine_precision@3
|
431 |
+
value: 0.17901234567901234
|
432 |
+
name: Cosine Precision@3
|
433 |
+
- type: cosine_precision@5
|
434 |
+
value: 0.18271604938271604
|
435 |
+
name: Cosine Precision@5
|
436 |
+
- type: cosine_precision@10
|
437 |
+
value: 0.15987654320987654
|
438 |
+
name: Cosine Precision@10
|
439 |
+
- type: cosine_recall@1
|
440 |
+
value: 0.012537229433577907
|
441 |
+
name: Cosine Recall@1
|
442 |
+
- type: cosine_recall@3
|
443 |
+
value: 0.028172357070419075
|
444 |
+
name: Cosine Recall@3
|
445 |
+
- type: cosine_recall@5
|
446 |
+
value: 0.04616926987049525
|
447 |
+
name: Cosine Recall@5
|
448 |
+
- type: cosine_recall@10
|
449 |
+
value: 0.07464352973292211
|
450 |
+
name: Cosine Recall@10
|
451 |
+
- type: cosine_ndcg@10
|
452 |
+
value: 0.1771093240394393
|
453 |
+
name: Cosine Ndcg@10
|
454 |
+
- type: cosine_mrr@10
|
455 |
+
value: 0.2604485106799922
|
456 |
+
name: Cosine Mrr@10
|
457 |
+
- type: cosine_map@100
|
458 |
+
value: 0.10892177775157068
|
459 |
+
name: Cosine Map@100
|
460 |
+
- type: dot_accuracy@1
|
461 |
+
value: 0.1419753086419753
|
462 |
+
name: Dot Accuracy@1
|
463 |
+
- type: dot_accuracy@3
|
464 |
+
value: 0.2345679012345679
|
465 |
+
name: Dot Accuracy@3
|
466 |
+
- type: dot_accuracy@5
|
467 |
+
value: 0.26851851851851855
|
468 |
+
name: Dot Accuracy@5
|
469 |
+
- type: dot_accuracy@10
|
470 |
+
value: 0.33024691358024694
|
471 |
+
name: Dot Accuracy@10
|
472 |
+
- type: dot_precision@1
|
473 |
+
value: 0.1419753086419753
|
474 |
+
name: Dot Precision@1
|
475 |
+
- type: dot_precision@3
|
476 |
+
value: 0.13786008230452673
|
477 |
+
name: Dot Precision@3
|
478 |
+
- type: dot_precision@5
|
479 |
+
value: 0.13950617283950617
|
480 |
+
name: Dot Precision@5
|
481 |
+
- type: dot_precision@10
|
482 |
+
value: 0.1308641975308642
|
483 |
+
name: Dot Precision@10
|
484 |
+
- type: dot_recall@1
|
485 |
+
value: 0.005321414264515094
|
486 |
+
name: Dot Recall@1
|
487 |
+
- type: dot_recall@3
|
488 |
+
value: 0.019846033299858425
|
489 |
+
name: Dot Recall@3
|
490 |
+
- type: dot_recall@5
|
491 |
+
value: 0.029084171632431998
|
492 |
+
name: Dot Recall@5
|
493 |
+
- type: dot_recall@10
|
494 |
+
value: 0.049244765309017405
|
495 |
+
name: Dot Recall@10
|
496 |
+
- type: dot_ndcg@10
|
497 |
+
value: 0.13684384060235508
|
498 |
+
name: Dot Ndcg@10
|
499 |
+
- type: dot_mrr@10
|
500 |
+
value: 0.19590192043895757
|
501 |
+
name: Dot Mrr@10
|
502 |
+
- type: dot_map@100
|
503 |
+
value: 0.08815891364299847
|
504 |
+
name: Dot Map@100
|
505 |
+
---
|
506 |
+
|
507 |
+
# SentenceTransformer based on microsoft/mdeberta-v3-base
|
508 |
+
|
509 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
510 |
+
|
511 |
+
## Model Details
|
512 |
+
|
513 |
+
### Model Description
|
514 |
+
- **Model Type:** Sentence Transformer
|
515 |
+
- **Base model:** [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) <!-- at revision a0484667b22365f84929a935b5e50a51f71f159d -->
|
516 |
+
- **Maximum Sequence Length:** 512 tokens
|
517 |
+
- **Output Dimensionality:** 768 tokens
|
518 |
+
- **Similarity Function:** Cosine Similarity
|
519 |
+
<!-- - **Training Dataset:** Unknown -->
|
520 |
+
<!-- - **Language:** Unknown -->
|
521 |
+
<!-- - **License:** Unknown -->
|
522 |
+
|
523 |
+
### Model Sources
|
524 |
+
|
525 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
526 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
527 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
528 |
+
|
529 |
+
### Full Model Architecture
|
530 |
+
|
531 |
+
```
|
532 |
+
SentenceTransformer(
|
533 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: DebertaV2Model
|
534 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
535 |
+
)
|
536 |
+
```
|
537 |
+
|
538 |
+
## Usage
|
539 |
+
|
540 |
+
### Direct Usage (Sentence Transformers)
|
541 |
+
|
542 |
+
First install the Sentence Transformers library:
|
543 |
+
|
544 |
+
```bash
|
545 |
+
pip install -U sentence-transformers
|
546 |
+
```
|
547 |
+
|
548 |
+
Then you can load this model and run inference.
|
549 |
+
```python
|
550 |
+
from sentence_transformers import SentenceTransformer
|
551 |
+
|
552 |
+
# Download from the 🤗 Hub
|
553 |
+
model = SentenceTransformer("BlackBeenie/mdeberta-v3-base-sbert")
|
554 |
+
# Run inference
|
555 |
+
sentences = [
|
556 |
+
'hot dogs',
|
557 |
+
'Habitual physical exercise has beneficial effects on telomere length in postmenopausal women. OBJECTIVE: It has been reported that women benefit from the maintenance of telomere length by estrogen. Exercise may favorably influence telomere length, although results are inconsistent regarding the duration and type of exercise and the cell type used to measure telomere length. The purpose of this study was to investigate the relationship between habitual physical exercise and telomere length in peripheral blood mononuclear cells (PBMCs) in postmenopausal women. Postmenopausal women were chosen as study participants because they are typically estrogen deficient. METHODS: This experimental-control, cross-sectional study included 44 healthy, nondiabetic, nonsmoking, postmenopausal women. Habitual exercisers and sedentary participants were matched for age and body mass index. Body weight, height, blood pressure, and waist and hip circumference were measured. Mitochondrial DNA copy number and telomere length in PBMCs were determined, and biochemical tests were performed. Habitual physical exercise was defined as combined aerobic and resistance exercise performed for at least 60 minutes per session more than three times a week for more than 12 months. RESULTS: The mean age of all participants was 58.11 ± 6.84 years, and participants in the habitual exercise group had been exercising more than three times per week for an average of 19.23 ± 5.15 months. Serum triglyceride levels (P = 0.01), fasting insulin concentrations (P < 0.01), and homeostasis model assessment of insulin resistance (P < 0.01) were significantly lower and high-density lipoprotein cholesterol levels (P < 0.01), circulating adiponectin (P < 0.01), mitochondrial DNA copy number (P < 0.01), and telomere length (P < 0.01) were significantly higher in the habitual exercise group than in the sedentary group. In a stepwise multiple regression analysis, habitual exercise (β = 0.522, P < 0.01) and adiponectin levels (β = 0.139, P = 0.03) were the independent factors associated with the telomere length of PBMCs in postmenopausal women. CONCLUSIONS: Habitual physical exercise is associated with greater telomere length in postmenopausal women. This finding suggests that habitual physical exercise in postmenopausal women may reduce telomere attrition.',
|
558 |
+
'Dietary modification of human macular pigment density. PURPOSE: The retinal carotenoids lutein (L) and zeaxanthin (Z) that form the macular pigment (MP) may help to prevent neovascular age-related macular degeneration. The purpose of this study was to determine whether MP density in the retina could be raised by increasing dietary intake of L and Z from foods. METHODS: Macular pigment was measured psychophysically for 13 subjects. Serum concentrations of L, Z, and beta-carotene were measured by high-performance liquid chromatography. Eleven subjects modified their usual daily diets by adding 60 g of spinach (10.8 mg L, 0.3 mg Z, 5 mg beta-carotene) and ten also added 150 g of corn (0.3 mg Z, 0.4 mg L); two other subjects were given only corn. Dietary modification lasted up to 15 weeks. RESULTS: For the subjects fed spinach or spinach and corn, three types of responses to dietary modification were identified: Eight "retinal responders" had increases in serum L (mean, 33%; SD, 22%) and in MP density (mean, 19%; SD, 11%); two "retinal nonresponders" showed substantial increases in serum L (mean, 31%) but not in MP density (mean, -11%); one "serum and retinal nonresponder" showed no changes in serum L, Z, or beta-carotene and no change in MP density. For the two subjects given only corn, serum L changed little (+11%, -6%), but in one subject serum Z increased (70%) and MP density increased (25%). CONCLUSIONS: Increases in MP density were obtained within 4 weeks of dietary modification for most, but not all, subjects. When MP density increased with dietary modification, it remained elevated for at least several months after resuming an unmodified diet. Augmentation of MP for both experimental and clinical investigation appears to be feasible for many persons.',
|
559 |
+
]
|
560 |
+
embeddings = model.encode(sentences)
|
561 |
+
print(embeddings.shape)
|
562 |
+
# [3, 768]
|
563 |
+
|
564 |
+
# Get the similarity scores for the embeddings
|
565 |
+
similarities = model.similarity(embeddings, embeddings)
|
566 |
+
print(similarities.shape)
|
567 |
+
# [3, 3]
|
568 |
+
```
|
569 |
+
|
570 |
+
<!--
|
571 |
+
### Direct Usage (Transformers)
|
572 |
+
|
573 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
574 |
+
|
575 |
+
</details>
|
576 |
+
-->
|
577 |
+
|
578 |
+
<!--
|
579 |
+
### Downstream Usage (Sentence Transformers)
|
580 |
+
|
581 |
+
You can finetune this model on your own dataset.
|
582 |
+
|
583 |
+
<details><summary>Click to expand</summary>
|
584 |
+
|
585 |
+
</details>
|
586 |
+
-->
|
587 |
+
|
588 |
+
<!--
|
589 |
+
### Out-of-Scope Use
|
590 |
+
|
591 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
592 |
+
-->
|
593 |
+
|
594 |
+
## Evaluation
|
595 |
+
|
596 |
+
### Metrics
|
597 |
+
|
598 |
+
#### Information Retrieval
|
599 |
+
* Dataset: `eval`
|
600 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
601 |
+
|
602 |
+
| Metric | Value |
|
603 |
+
|:--------------------|:-----------|
|
604 |
+
| cosine_accuracy@1 | 0.1975 |
|
605 |
+
| cosine_accuracy@3 | 0.3117 |
|
606 |
+
| cosine_accuracy@5 | 0.3673 |
|
607 |
+
| cosine_accuracy@10 | 0.4074 |
|
608 |
+
| cosine_precision@1 | 0.1975 |
|
609 |
+
| cosine_precision@3 | 0.179 |
|
610 |
+
| cosine_precision@5 | 0.1827 |
|
611 |
+
| cosine_precision@10 | 0.1599 |
|
612 |
+
| cosine_recall@1 | 0.0125 |
|
613 |
+
| cosine_recall@3 | 0.0282 |
|
614 |
+
| cosine_recall@5 | 0.0462 |
|
615 |
+
| cosine_recall@10 | 0.0746 |
|
616 |
+
| cosine_ndcg@10 | 0.1771 |
|
617 |
+
| cosine_mrr@10 | 0.2604 |
|
618 |
+
| **cosine_map@100** | **0.1089** |
|
619 |
+
| dot_accuracy@1 | 0.142 |
|
620 |
+
| dot_accuracy@3 | 0.2346 |
|
621 |
+
| dot_accuracy@5 | 0.2685 |
|
622 |
+
| dot_accuracy@10 | 0.3302 |
|
623 |
+
| dot_precision@1 | 0.142 |
|
624 |
+
| dot_precision@3 | 0.1379 |
|
625 |
+
| dot_precision@5 | 0.1395 |
|
626 |
+
| dot_precision@10 | 0.1309 |
|
627 |
+
| dot_recall@1 | 0.0053 |
|
628 |
+
| dot_recall@3 | 0.0198 |
|
629 |
+
| dot_recall@5 | 0.0291 |
|
630 |
+
| dot_recall@10 | 0.0492 |
|
631 |
+
| dot_ndcg@10 | 0.1368 |
|
632 |
+
| dot_mrr@10 | 0.1959 |
|
633 |
+
| dot_map@100 | 0.0882 |
|
634 |
+
|
635 |
+
<!--
|
636 |
+
## Bias, Risks and Limitations
|
637 |
+
|
638 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
639 |
+
-->
|
640 |
+
|
641 |
+
<!--
|
642 |
+
### Recommendations
|
643 |
+
|
644 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
645 |
+
-->
|
646 |
+
|
647 |
+
## Training Details
|
648 |
+
|
649 |
+
### Training Dataset
|
650 |
+
|
651 |
+
#### Unnamed Dataset
|
652 |
+
|
653 |
+
|
654 |
+
* Size: 110,575 training samples
|
655 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
656 |
+
* Approximate statistics based on the first 1000 samples:
|
657 |
+
| | sentence_0 | sentence_1 | label |
|
658 |
+
|:--------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-----------------------------|
|
659 |
+
| type | string | string | int |
|
660 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 6.46 tokens</li><li>max: 19 tokens</li></ul> | <ul><li>min: 27 tokens</li><li>mean: 394.71 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>1: 100.00%</li></ul> |
|
661 |
+
* Samples:
|
662 |
+
| sentence_0 | sentence_1 | label |
|
663 |
+
|:-------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
664 |
+
| <code>chronic diseases</code> | <code>Role of antioxidants in cancer therapy. Oxidative stress is a key component in linking environmental toxicity to the multistage carcinogenic process. Reactive oxygen species (ROS) are generated in response to both endogenous and exogenous stimuli. To counterbalance ROS-mediated injury, an endogenous antioxidants defense system exists; however, when oxidation exceeds the control mechanisms, oxidative stress arises. Chronic and cumulative oxidative stress induces deleterious modifications to a variety of macromolecular components, such as DNA, lipids, and proteins. A primary mechanism of many chemotherapy drugs against cancer cells is the formation of ROS, or free radicals. Radiotherapy is based on the fact that ionizing radiation destroys tumor cells. Radiotherapy induces direct lesions in the DNA or biological molecules, which eventually affect DNA. Free radicals produced by oncology therapy are often a source of serious side effects as well. The objective of this review is to provide information about the effects of antioxidants during oncology treatments and to discuss the possible events and efficacy. Much debate has arisen about whether antioxidant supplementation alters the efficacy of cancer chemotherapy. There is still limited evidence in both quality and sample size, suggesting that certain antioxidant supplements may reduce adverse reactions and toxicities. Significant reductions in toxicity may alleviate dose-limiting toxicities so that more patients are able to complete prescribed chemotherapy regimens and thus, in turn, improve the potential for success in terms of tumor response and survival. Copyright © 2013 Elsevier Inc. All rights reserved.</code> | <code>1</code> |
|
665 |
+
| <code>plant-based diets</code> | <code>Diet, infection and wheezy illness: lessons from adults. An increase in asthma and atopic disease has been recorded in many countries where society has become more prosperous. We have investigated two possible explanations: a reduction in childhood infections and a change in diet. In a cohort of people followed up since 1964, originally selected as a random sample of primary school children, we have investigated the relevance of family size and the common childhood infectious diseases to development of eczema, hay fever and asthma. Although membership of a large family reduced risks of hay fever and eczema (but not asthma), this was not explained by the infections the child had suffered. Indeed, the more infections the child had had, the greater the likelihood of asthma, although measles gave a modest measure of protection. We have investigated dietary factors in two separate studies. In the first, we have shown the risks of bronchial hyper-reactivity are increased seven-fold among those with the lowest intake of vitamin C, while the lowest intake of saturated fats gave a 10-fold protection. In the second, we have shown that the risk of adult-onset wheezy illness is increased five-fold by the lowest intake of vitamin E and doubled by the lowest intake of vitamin C. These results were supported by direct measurements of the vitamins and triglycerides in plasma. We have proposed that changes in the diet of pregnant women may have reflected those observed in the population as a whole and that these may have resulted in the birth of cohorts of children predisposed to atopy and asthma. The direct test of this is to study the diet and nutritional status of a large cohort of pregnant women and to follow their offspring forward. This is our current research.</code> | <code>1</code> |
|
666 |
+
| <code>liver health</code> | <code>Effect of a very-high-fiber vegetable, fruit, and nut diet on serum lipids and colonic function. We tested the effects of feeding a diet very high in fiber from fruit and vegetables. The levels fed were those, which had originally inspired the dietary fiber hypothesis related to colon cancer and heart disease prevention and also may have been eaten early in human evolution. Ten healthy volunteers each took 3 metabolic diets of 2 weeks duration. The diets were: high-vegetable, fruit, and nut (very-high-fiber, 55 g/1,000 kcal); starch-based containing cereals and legumes (early agricultural diet); or low-fat (contemporary therapeutic diet). All diets were intended to be weight-maintaining (mean intake, 2,577 kcal/d). Compared with the starch-based and low-fat diets, the high-fiber vegetable diet resulted in the largest reduction in low-density lipoprotein (LDL) cholesterol (33% +/- 4%, P <.001) and the greatest fecal bile acid output (1.13 +/- 0.30 g/d, P =.002), fecal bulk (906 +/- 130 g/d, P <.001), and fecal short-chain fatty acid outputs (78 +/- 13 mmol/d, P <.001). Nevertheless, due to the increase in fecal bulk, the actual concentrations of fecal bile acids were lowest on the vegetable diet (1.2 mg/g wet weight, P =.002). Maximum lipid reductions occurred within 1 week. Urinary mevalonic acid excretion increased (P =.036) on the high-vegetable diet reflecting large fecal steroid losses. We conclude that very high-vegetable fiber intakes reduce risk factors for cardiovascular disease and possibly colon cancer. Vegetable and fruit fibers therefore warrant further detailed investigation. Copyright 2001 by W.B. Saunders Company</code> | <code>1</code> |
|
667 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
668 |
+
```json
|
669 |
+
{
|
670 |
+
"scale": 20.0,
|
671 |
+
"similarity_fct": "cos_sim"
|
672 |
+
}
|
673 |
+
```
|
674 |
+
|
675 |
+
### Training Hyperparameters
|
676 |
+
#### Non-Default Hyperparameters
|
677 |
+
|
678 |
+
- `eval_strategy`: steps
|
679 |
+
- `per_device_train_batch_size`: 32
|
680 |
+
- `per_device_eval_batch_size`: 32
|
681 |
+
- `fp16`: True
|
682 |
+
- `multi_dataset_batch_sampler`: round_robin
|
683 |
+
|
684 |
+
#### All Hyperparameters
|
685 |
+
<details><summary>Click to expand</summary>
|
686 |
+
|
687 |
+
- `overwrite_output_dir`: False
|
688 |
+
- `do_predict`: False
|
689 |
+
- `eval_strategy`: steps
|
690 |
+
- `prediction_loss_only`: True
|
691 |
+
- `per_device_train_batch_size`: 32
|
692 |
+
- `per_device_eval_batch_size`: 32
|
693 |
+
- `per_gpu_train_batch_size`: None
|
694 |
+
- `per_gpu_eval_batch_size`: None
|
695 |
+
- `gradient_accumulation_steps`: 1
|
696 |
+
- `eval_accumulation_steps`: None
|
697 |
+
- `torch_empty_cache_steps`: None
|
698 |
+
- `learning_rate`: 5e-05
|
699 |
+
- `weight_decay`: 0.0
|
700 |
+
- `adam_beta1`: 0.9
|
701 |
+
- `adam_beta2`: 0.999
|
702 |
+
- `adam_epsilon`: 1e-08
|
703 |
+
- `max_grad_norm`: 1
|
704 |
+
- `num_train_epochs`: 3
|
705 |
+
- `max_steps`: -1
|
706 |
+
- `lr_scheduler_type`: linear
|
707 |
+
- `lr_scheduler_kwargs`: {}
|
708 |
+
- `warmup_ratio`: 0.0
|
709 |
+
- `warmup_steps`: 0
|
710 |
+
- `log_level`: passive
|
711 |
+
- `log_level_replica`: warning
|
712 |
+
- `log_on_each_node`: True
|
713 |
+
- `logging_nan_inf_filter`: True
|
714 |
+
- `save_safetensors`: True
|
715 |
+
- `save_on_each_node`: False
|
716 |
+
- `save_only_model`: False
|
717 |
+
- `restore_callback_states_from_checkpoint`: False
|
718 |
+
- `no_cuda`: False
|
719 |
+
- `use_cpu`: False
|
720 |
+
- `use_mps_device`: False
|
721 |
+
- `seed`: 42
|
722 |
+
- `data_seed`: None
|
723 |
+
- `jit_mode_eval`: False
|
724 |
+
- `use_ipex`: False
|
725 |
+
- `bf16`: False
|
726 |
+
- `fp16`: True
|
727 |
+
- `fp16_opt_level`: O1
|
728 |
+
- `half_precision_backend`: auto
|
729 |
+
- `bf16_full_eval`: False
|
730 |
+
- `fp16_full_eval`: False
|
731 |
+
- `tf32`: None
|
732 |
+
- `local_rank`: 0
|
733 |
+
- `ddp_backend`: None
|
734 |
+
- `tpu_num_cores`: None
|
735 |
+
- `tpu_metrics_debug`: False
|
736 |
+
- `debug`: []
|
737 |
+
- `dataloader_drop_last`: False
|
738 |
+
- `dataloader_num_workers`: 0
|
739 |
+
- `dataloader_prefetch_factor`: None
|
740 |
+
- `past_index`: -1
|
741 |
+
- `disable_tqdm`: False
|
742 |
+
- `remove_unused_columns`: True
|
743 |
+
- `label_names`: None
|
744 |
+
- `load_best_model_at_end`: False
|
745 |
+
- `ignore_data_skip`: False
|
746 |
+
- `fsdp`: []
|
747 |
+
- `fsdp_min_num_params`: 0
|
748 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
749 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
750 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
751 |
+
- `deepspeed`: None
|
752 |
+
- `label_smoothing_factor`: 0.0
|
753 |
+
- `optim`: adamw_torch
|
754 |
+
- `optim_args`: None
|
755 |
+
- `adafactor`: False
|
756 |
+
- `group_by_length`: False
|
757 |
+
- `length_column_name`: length
|
758 |
+
- `ddp_find_unused_parameters`: None
|
759 |
+
- `ddp_bucket_cap_mb`: None
|
760 |
+
- `ddp_broadcast_buffers`: False
|
761 |
+
- `dataloader_pin_memory`: True
|
762 |
+
- `dataloader_persistent_workers`: False
|
763 |
+
- `skip_memory_metrics`: True
|
764 |
+
- `use_legacy_prediction_loop`: False
|
765 |
+
- `push_to_hub`: False
|
766 |
+
- `resume_from_checkpoint`: None
|
767 |
+
- `hub_model_id`: None
|
768 |
+
- `hub_strategy`: every_save
|
769 |
+
- `hub_private_repo`: False
|
770 |
+
- `hub_always_push`: False
|
771 |
+
- `gradient_checkpointing`: False
|
772 |
+
- `gradient_checkpointing_kwargs`: None
|
773 |
+
- `include_inputs_for_metrics`: False
|
774 |
+
- `eval_do_concat_batches`: True
|
775 |
+
- `fp16_backend`: auto
|
776 |
+
- `push_to_hub_model_id`: None
|
777 |
+
- `push_to_hub_organization`: None
|
778 |
+
- `mp_parameters`:
|
779 |
+
- `auto_find_batch_size`: False
|
780 |
+
- `full_determinism`: False
|
781 |
+
- `torchdynamo`: None
|
782 |
+
- `ray_scope`: last
|
783 |
+
- `ddp_timeout`: 1800
|
784 |
+
- `torch_compile`: False
|
785 |
+
- `torch_compile_backend`: None
|
786 |
+
- `torch_compile_mode`: None
|
787 |
+
- `dispatch_batches`: None
|
788 |
+
- `split_batches`: None
|
789 |
+
- `include_tokens_per_second`: False
|
790 |
+
- `include_num_input_tokens_seen`: False
|
791 |
+
- `neftune_noise_alpha`: None
|
792 |
+
- `optim_target_modules`: None
|
793 |
+
- `batch_eval_metrics`: False
|
794 |
+
- `eval_on_start`: False
|
795 |
+
- `eval_use_gather_object`: False
|
796 |
+
- `batch_sampler`: batch_sampler
|
797 |
+
- `multi_dataset_batch_sampler`: round_robin
|
798 |
+
|
799 |
+
</details>
|
800 |
+
|
801 |
+
### Training Logs
|
802 |
+
| Epoch | Step | Training Loss | eval_cosine_map@100 |
|
803 |
+
|:------:|:-----:|:-------------:|:-------------------:|
|
804 |
+
| 0.1447 | 500 | 3.4744 | - |
|
805 |
+
| 0.2894 | 1000 | 3.3463 | - |
|
806 |
+
| 0.4340 | 1500 | 3.2119 | - |
|
807 |
+
| 0.5787 | 2000 | 3.0852 | - |
|
808 |
+
| 0.7234 | 2500 | 2.9736 | - |
|
809 |
+
| 0.8681 | 3000 | 2.8964 | - |
|
810 |
+
| 1.0 | 3456 | - | 0.0628 |
|
811 |
+
| 1.0127 | 3500 | 2.8117 | - |
|
812 |
+
| 1.1574 | 4000 | 2.7464 | - |
|
813 |
+
| 1.3021 | 4500 | 2.6987 | - |
|
814 |
+
| 1.4468 | 5000 | 2.6423 | 0.0795 |
|
815 |
+
| 1.5914 | 5500 | 2.584 | - |
|
816 |
+
| 1.7361 | 6000 | 2.5438 | - |
|
817 |
+
| 1.8808 | 6500 | 2.4891 | - |
|
818 |
+
| 2.0 | 6912 | - | 0.0948 |
|
819 |
+
| 2.0255 | 7000 | 2.4555 | - |
|
820 |
+
| 2.1701 | 7500 | 2.442 | - |
|
821 |
+
| 2.3148 | 8000 | 2.4161 | - |
|
822 |
+
| 2.4595 | 8500 | 2.3882 | - |
|
823 |
+
| 2.6042 | 9000 | 2.3545 | - |
|
824 |
+
| 2.7488 | 9500 | 2.3274 | - |
|
825 |
+
| 2.8935 | 10000 | 2.3134 | 0.1082 |
|
826 |
+
| 3.0 | 10368 | - | 0.1089 |
|
827 |
+
|
828 |
+
|
829 |
+
### Framework Versions
|
830 |
+
- Python: 3.10.12
|
831 |
+
- Sentence Transformers: 3.1.1
|
832 |
+
- Transformers: 4.44.2
|
833 |
+
- PyTorch: 2.4.1+cu121
|
834 |
+
- Accelerate: 0.34.2
|
835 |
+
- Datasets: 3.0.0
|
836 |
+
- Tokenizers: 0.19.1
|
837 |
+
|
838 |
+
## Citation
|
839 |
+
|
840 |
+
### BibTeX
|
841 |
+
|
842 |
+
#### Sentence Transformers
|
843 |
+
```bibtex
|
844 |
+
@inproceedings{reimers-2019-sentence-bert,
|
845 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
846 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
847 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
848 |
+
month = "11",
|
849 |
+
year = "2019",
|
850 |
+
publisher = "Association for Computational Linguistics",
|
851 |
+
url = "https://arxiv.org/abs/1908.10084",
|
852 |
+
}
|
853 |
+
```
|
854 |
+
|
855 |
+
#### MultipleNegativesRankingLoss
|
856 |
+
```bibtex
|
857 |
+
@misc{henderson2017efficient,
|
858 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
859 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
860 |
+
year={2017},
|
861 |
+
eprint={1705.00652},
|
862 |
+
archivePrefix={arXiv},
|
863 |
+
primaryClass={cs.CL}
|
864 |
+
}
|
865 |
+
```
|
866 |
+
|
867 |
+
<!--
|
868 |
+
## Glossary
|
869 |
+
|
870 |
+
*Clearly define terms in order to be accessible across audiences.*
|
871 |
+
-->
|
872 |
+
|
873 |
+
<!--
|
874 |
+
## Model Card Authors
|
875 |
+
|
876 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
877 |
+
-->
|
878 |
+
|
879 |
+
<!--
|
880 |
+
## Model Card Contact
|
881 |
+
|
882 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
883 |
+
-->
|
added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[MASK]": 250101
|
3 |
+
}
|
config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/mdeberta-v3-base",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2Model"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.1,
|
9 |
+
"hidden_size": 768,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 3072,
|
12 |
+
"layer_norm_eps": 1e-07,
|
13 |
+
"max_position_embeddings": 512,
|
14 |
+
"max_relative_positions": -1,
|
15 |
+
"model_type": "deberta-v2",
|
16 |
+
"norm_rel_ebd": "layer_norm",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pooler_dropout": 0,
|
21 |
+
"pooler_hidden_act": "gelu",
|
22 |
+
"pooler_hidden_size": 768,
|
23 |
+
"pos_att_type": [
|
24 |
+
"p2c",
|
25 |
+
"c2p"
|
26 |
+
],
|
27 |
+
"position_biased_input": false,
|
28 |
+
"position_buckets": 256,
|
29 |
+
"relative_attention": true,
|
30 |
+
"share_att_key": true,
|
31 |
+
"torch_dtype": "float32",
|
32 |
+
"transformers_version": "4.44.2",
|
33 |
+
"type_vocab_size": 0,
|
34 |
+
"vocab_size": 251000
|
35 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
+
"transformers": "4.44.2",
|
5 |
+
"pytorch": "2.4.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb8be1ce4fd98d1dfc4e55cb166540ffea2a273c950f89f3584bfdac47aaf1c8
|
3 |
+
size 1112897768
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": {
|
9 |
+
"content": "[UNK]",
|
10 |
+
"lstrip": false,
|
11 |
+
"normalized": true,
|
12 |
+
"rstrip": false,
|
13 |
+
"single_word": false
|
14 |
+
}
|
15 |
+
}
|
spm.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13c8d666d62a7bc4ac8f040aab68e942c861f93303156cc28f5c7e885d86d6e3
|
3 |
+
size 4305025
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dac20d1f8559442a3466e5eb12d1ec38c7ed3f302b08d0bb4385e52784741cf
|
3 |
+
size 16351017
|
tokenizer_config.json
ADDED
@@ -0,0 +1,858 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[CLS]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[SEP]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[UNK]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "▁<extra_id_99>",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"250002": {
|
44 |
+
"content": "▁<extra_id_98>",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"250003": {
|
52 |
+
"content": "▁<extra_id_97>",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": false,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"250004": {
|
60 |
+
"content": "▁<extra_id_96>",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": false,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"250005": {
|
68 |
+
"content": "▁<extra_id_95>",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": false,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"250006": {
|
76 |
+
"content": "▁<extra_id_94>",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": false,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"250007": {
|
84 |
+
"content": "▁<extra_id_93>",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": false,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"250008": {
|
92 |
+
"content": "▁<extra_id_92>",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": false,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"250009": {
|
100 |
+
"content": "▁<extra_id_91>",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": false,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"250010": {
|
108 |
+
"content": "▁<extra_id_90>",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": false,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"250011": {
|
116 |
+
"content": "▁<extra_id_89>",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": false,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"250012": {
|
124 |
+
"content": "▁<extra_id_88>",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": false,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"250013": {
|
132 |
+
"content": "▁<extra_id_87>",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": false,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"250014": {
|
140 |
+
"content": "▁<extra_id_86>",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": false,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"250015": {
|
148 |
+
"content": "▁<extra_id_85>",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": false,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"250016": {
|
156 |
+
"content": "▁<extra_id_84>",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": false,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"250017": {
|
164 |
+
"content": "▁<extra_id_83>",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": false,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"250018": {
|
172 |
+
"content": "▁<extra_id_82>",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": false,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"250019": {
|
180 |
+
"content": "▁<extra_id_81>",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": false,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"250020": {
|
188 |
+
"content": "▁<extra_id_80>",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": false,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"250021": {
|
196 |
+
"content": "▁<extra_id_79>",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": false,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"250022": {
|
204 |
+
"content": "▁<extra_id_78>",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": false,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"250023": {
|
212 |
+
"content": "▁<extra_id_77>",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": false,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"250024": {
|
220 |
+
"content": "▁<extra_id_76>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": false
|
226 |
+
},
|
227 |
+
"250025": {
|
228 |
+
"content": "▁<extra_id_75>",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": false
|
234 |
+
},
|
235 |
+
"250026": {
|
236 |
+
"content": "▁<extra_id_74>",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": false
|
242 |
+
},
|
243 |
+
"250027": {
|
244 |
+
"content": "▁<extra_id_73>",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": false
|
250 |
+
},
|
251 |
+
"250028": {
|
252 |
+
"content": "▁<extra_id_72>",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": false
|
258 |
+
},
|
259 |
+
"250029": {
|
260 |
+
"content": "▁<extra_id_71>",
|
261 |
+
"lstrip": false,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": false
|
266 |
+
},
|
267 |
+
"250030": {
|
268 |
+
"content": "▁<extra_id_70>",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": false,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"250031": {
|
276 |
+
"content": "▁<extra_id_69>",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": false,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"250032": {
|
284 |
+
"content": "▁<extra_id_68>",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": false,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"250033": {
|
292 |
+
"content": "▁<extra_id_67>",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": false,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"250034": {
|
300 |
+
"content": "▁<extra_id_66>",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": false,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"250035": {
|
308 |
+
"content": "▁<extra_id_65>",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": false,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"250036": {
|
316 |
+
"content": "▁<extra_id_64>",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": false,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"250037": {
|
324 |
+
"content": "▁<extra_id_63>",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": false,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"250038": {
|
332 |
+
"content": "▁<extra_id_62>",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": false,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"250039": {
|
340 |
+
"content": "▁<extra_id_61>",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": false,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"250040": {
|
348 |
+
"content": "▁<extra_id_60>",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": false,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"250041": {
|
356 |
+
"content": "▁<extra_id_59>",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": false,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"250042": {
|
364 |
+
"content": "▁<extra_id_58>",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": false,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"250043": {
|
372 |
+
"content": "▁<extra_id_57>",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": false,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"250044": {
|
380 |
+
"content": "▁<extra_id_56>",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": false,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"250045": {
|
388 |
+
"content": "▁<extra_id_55>",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": false,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"250046": {
|
396 |
+
"content": "▁<extra_id_54>",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": false,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"250047": {
|
404 |
+
"content": "▁<extra_id_53>",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": false,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"250048": {
|
412 |
+
"content": "▁<extra_id_52>",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": false,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"250049": {
|
420 |
+
"content": "▁<extra_id_51>",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": false,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"250050": {
|
428 |
+
"content": "▁<extra_id_50>",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": false,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"250051": {
|
436 |
+
"content": "▁<extra_id_49>",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": false,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"250052": {
|
444 |
+
"content": "▁<extra_id_48>",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": false,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"250053": {
|
452 |
+
"content": "▁<extra_id_47>",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": false,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"250054": {
|
460 |
+
"content": "▁<extra_id_46>",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": false,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"250055": {
|
468 |
+
"content": "▁<extra_id_45>",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": false,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"250056": {
|
476 |
+
"content": "▁<extra_id_44>",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": false,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"250057": {
|
484 |
+
"content": "▁<extra_id_43>",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": false,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"250058": {
|
492 |
+
"content": "▁<extra_id_42>",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": false,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"250059": {
|
500 |
+
"content": "▁<extra_id_41>",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": false,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"250060": {
|
508 |
+
"content": "▁<extra_id_40>",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": false,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"250061": {
|
516 |
+
"content": "▁<extra_id_39>",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": false,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"250062": {
|
524 |
+
"content": "▁<extra_id_38>",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": false,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"250063": {
|
532 |
+
"content": "▁<extra_id_37>",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": false,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"250064": {
|
540 |
+
"content": "▁<extra_id_36>",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": false,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"250065": {
|
548 |
+
"content": "▁<extra_id_35>",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": false,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"250066": {
|
556 |
+
"content": "▁<extra_id_34>",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": false,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"250067": {
|
564 |
+
"content": "▁<extra_id_33>",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": false,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"250068": {
|
572 |
+
"content": "▁<extra_id_32>",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": false,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"250069": {
|
580 |
+
"content": "▁<extra_id_31>",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": false,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"250070": {
|
588 |
+
"content": "▁<extra_id_30>",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": false,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"250071": {
|
596 |
+
"content": "▁<extra_id_29>",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": false,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"250072": {
|
604 |
+
"content": "▁<extra_id_28>",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": false,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"250073": {
|
612 |
+
"content": "▁<extra_id_27>",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": false,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"250074": {
|
620 |
+
"content": "▁<extra_id_26>",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": false,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"250075": {
|
628 |
+
"content": "▁<extra_id_25>",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": false,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"250076": {
|
636 |
+
"content": "▁<extra_id_24>",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": false,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"250077": {
|
644 |
+
"content": "▁<extra_id_23>",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": false,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"250078": {
|
652 |
+
"content": "▁<extra_id_22>",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": false,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"250079": {
|
660 |
+
"content": "▁<extra_id_21>",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": false,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"250080": {
|
668 |
+
"content": "▁<extra_id_20>",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": false,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"250081": {
|
676 |
+
"content": "▁<extra_id_19>",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": false,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"250082": {
|
684 |
+
"content": "▁<extra_id_18>",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": false,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"250083": {
|
692 |
+
"content": "▁<extra_id_17>",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": false,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"250084": {
|
700 |
+
"content": "▁<extra_id_16>",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": false,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"250085": {
|
708 |
+
"content": "▁<extra_id_15>",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": false,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"250086": {
|
716 |
+
"content": "▁<extra_id_14>",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": false,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"250087": {
|
724 |
+
"content": "▁<extra_id_13>",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": false,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"250088": {
|
732 |
+
"content": "▁<extra_id_12>",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": false,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"250089": {
|
740 |
+
"content": "▁<extra_id_11>",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": false,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"250090": {
|
748 |
+
"content": "▁<extra_id_10>",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": false,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"250091": {
|
756 |
+
"content": "▁<extra_id_9>",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": false,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"250092": {
|
764 |
+
"content": "▁<extra_id_8>",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": false,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"250093": {
|
772 |
+
"content": "▁<extra_id_7>",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": false,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"250094": {
|
780 |
+
"content": "▁<extra_id_6>",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": false,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"250095": {
|
788 |
+
"content": "▁<extra_id_5>",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": false,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"250096": {
|
796 |
+
"content": "▁<extra_id_4>",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": false,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"250097": {
|
804 |
+
"content": "▁<extra_id_3>",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": false,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"250098": {
|
812 |
+
"content": "▁<extra_id_2>",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": false,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"250099": {
|
820 |
+
"content": "▁<extra_id_1>",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": false,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"250100": {
|
828 |
+
"content": "▁<extra_id_0>",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": false,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"250101": {
|
836 |
+
"content": "[MASK]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": false,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": true
|
842 |
+
}
|
843 |
+
},
|
844 |
+
"bos_token": "[CLS]",
|
845 |
+
"clean_up_tokenization_spaces": true,
|
846 |
+
"cls_token": "[CLS]",
|
847 |
+
"do_lower_case": false,
|
848 |
+
"eos_token": "[SEP]",
|
849 |
+
"mask_token": "[MASK]",
|
850 |
+
"model_max_length": 512,
|
851 |
+
"pad_token": "[PAD]",
|
852 |
+
"sep_token": "[SEP]",
|
853 |
+
"sp_model_kwargs": {},
|
854 |
+
"split_by_punct": false,
|
855 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
856 |
+
"unk_token": "[UNK]",
|
857 |
+
"vocab_type": "spm"
|
858 |
+
}
|