|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "[UNK]", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<|begin_of_text|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "<|end_of_text|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "[PAD]", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65520": { |
|
"content": "<|text_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65521": { |
|
"content": "<|text_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65522": { |
|
"content": "<|ocr_correction_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65523": { |
|
"content": "<|ocr_correction_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65524": { |
|
"content": "<|translation_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65525": { |
|
"content": "<|translation_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65526": { |
|
"content": "<|pii_preview_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65527": { |
|
"content": "<|pii_preview_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65528": { |
|
"content": "<|pii_analysis_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65529": { |
|
"content": "<|pii_analysis_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65530": { |
|
"content": "<|pii_result_start|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"65531": { |
|
"content": "<|pii_result_end|>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"<|text_start|>", |
|
"<|text_end|>", |
|
"<|ocr_correction_start|>", |
|
"<|ocr_correction_end|>", |
|
"<|translation_start|>", |
|
"<|translation_end|>", |
|
"<|pii_preview_start|>", |
|
"<|pii_preview_end|>", |
|
"<|pii_analysis_start|>", |
|
"<|pii_analysis_end|>", |
|
"<|pii_result_start|>", |
|
"<|pii_result_end|>" |
|
], |
|
"clean_up_tokenization_spaces": true, |
|
"model_max_length": 1000000000000000019884624838656, |
|
"tokenizer_class": "PreTrainedTokenizerFast" |
|
} |
|
|