Trained with Unsloth
Browse filesUpload model trained with Unsloth 2x faster
- added_tokens.json +4 -0
- tokenizer.json +18 -4
- tokenizer_config.json +16 -0
added_tokens.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|email|>": 32001,
|
3 |
+
"<|url|>": 32000
|
4 |
+
}
|
tokenizer.json
CHANGED
@@ -29,15 +29,29 @@
|
|
29 |
"rstrip": false,
|
30 |
"normalized": false,
|
31 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
],
|
34 |
"normalizer": {
|
35 |
"type": "Sequence",
|
36 |
"normalizers": [
|
37 |
-
{
|
38 |
-
"type": "Prepend",
|
39 |
-
"prepend": "▁"
|
40 |
-
},
|
41 |
{
|
42 |
"type": "Replace",
|
43 |
"pattern": {
|
|
|
29 |
"rstrip": false,
|
30 |
"normalized": false,
|
31 |
"special": true
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"id": 32000,
|
35 |
+
"content": "<|url|>",
|
36 |
+
"single_word": false,
|
37 |
+
"lstrip": false,
|
38 |
+
"rstrip": false,
|
39 |
+
"normalized": true,
|
40 |
+
"special": false
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"id": 32001,
|
44 |
+
"content": "<|email|>",
|
45 |
+
"single_word": false,
|
46 |
+
"lstrip": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"normalized": true,
|
49 |
+
"special": false
|
50 |
}
|
51 |
],
|
52 |
"normalizer": {
|
53 |
"type": "Sequence",
|
54 |
"normalizers": [
|
|
|
|
|
|
|
|
|
55 |
{
|
56 |
"type": "Replace",
|
57 |
"pattern": {
|
tokenizer_config.json
CHANGED
@@ -25,6 +25,22 @@
|
|
25 |
"rstrip": false,
|
26 |
"single_word": false,
|
27 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
}
|
29 |
},
|
30 |
"additional_special_tokens": [],
|
|
|
25 |
"rstrip": false,
|
26 |
"single_word": false,
|
27 |
"special": true
|
28 |
+
},
|
29 |
+
"32000": {
|
30 |
+
"content": "<|url|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": false
|
36 |
+
},
|
37 |
+
"32001": {
|
38 |
+
"content": "<|email|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": false
|
44 |
}
|
45 |
},
|
46 |
"additional_special_tokens": [],
|