Locutusque commited on
Commit
c443b1b
·
verified ·
1 Parent(s): 6abe3e9

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -13,13 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<unk>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer.json CHANGED
@@ -12,9 +12,9 @@
12
  },
13
  "direction": "Left",
14
  "pad_to_multiple_of": null,
15
- "pad_id": 2,
16
  "pad_type_id": 0,
17
- "pad_token": "</s>"
18
  },
19
  "added_tokens": [
20
  {
 
12
  },
13
  "direction": "Left",
14
  "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
  "pad_type_id": 0,
17
+ "pad_token": "<unk>"
18
  },
19
  "added_tokens": [
20
  {
tokenizer_config.json CHANGED
@@ -30,23 +30,15 @@
30
  },
31
  "additional_special_tokens": [],
32
  "bos_token": "<s>",
33
- "chat_template": "{%- set ns = namespace(found=false) -%}{%- for message in messages -%}{%- if message['role'] == 'system' -%}{%- set ns.found = true -%}{%- endif -%}{%- endfor -%}{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '<|im_start|>system\n' + message['content'].rstrip() + '<|im_end|>\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'<|im_start|>user\n' + message['content'].rstrip() + '<|im_end|>\n'-}}{%- else -%}{{-'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'<|im_start|>assistant\n'-}}{%- endif -%}",
34
  "clean_up_tokenization_spaces": false,
35
  "eos_token": "</s>",
36
  "extra_special_tokens": {},
37
  "legacy": false,
38
- "max_length": 1024,
39
  "model_max_length": 1000000000000000019884624838656,
40
- "pad_to_multiple_of": null,
41
- "pad_token": "</s>",
42
- "pad_token_type_id": 0,
43
- "padding_side": "left",
44
  "sp_model_kwargs": {},
45
  "spaces_between_special_tokens": false,
46
- "stride": 0,
47
  "tokenizer_class": "LlamaTokenizer",
48
- "truncation_side": "right",
49
- "truncation_strategy": "longest_first",
50
  "unk_token": "<unk>",
51
  "use_default_system_prompt": false
52
  }
 
30
  },
31
  "additional_special_tokens": [],
32
  "bos_token": "<s>",
 
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "extra_special_tokens": {},
36
  "legacy": false,
 
37
  "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "<unk>",
 
 
 
39
  "sp_model_kwargs": {},
40
  "spaces_between_special_tokens": false,
 
41
  "tokenizer_class": "LlamaTokenizer",
 
 
42
  "unk_token": "<unk>",
43
  "use_default_system_prompt": false
44
  }