File size: 581 Bytes
206d8e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from pathlib import Path
from spacy.util import registry
from spacy.tokenizer import Tokenizer

MODEL_PATH = Path(__file__).resolve().parents[1] / 'model'
if not MODEL_PATH.exists(): # we're in the wheel version of the model directory
	MODEL_PATH = Path(__file__).resolve().parents[0] / 'en_pipeline-0.0.0' # sorry for hardcoded version number
TOK_PATH = MODEL_PATH / 'tokenizer'

@registry.callbacks("custom_tok")
def get_custom():
    def load_it(nlp):
        tokenizer = Tokenizer(nlp.vocab)
        tokenizer.from_disk(TOK_PATH)
        return tokenizer    
    return load_it