NeoZ123 commited on
Commit
4dcf5f0
·
verified ·
1 Parent(s): c30dc0f

Update modeling_llama.py

Browse files
Files changed (1) hide show
  1. modeling_llama.py +1 -1
modeling_llama.py CHANGED
@@ -1261,7 +1261,7 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
1261
  def text_split_by_punctuation(original_text, return_dict=False):
1262
  # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
1263
  text = original_text
1264
- custom_sent_tokenizer = PunktSentenceTokenizer(text)
1265
  punctuations = r"([。;!?])" # For Chinese support
1266
 
1267
  separated = custom_sent_tokenizer.tokenize(text)
 
1261
  def text_split_by_punctuation(original_text, return_dict=False):
1262
  # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
1263
  text = original_text
1264
+ custom_sent_tokenizer = PunktSentenceTokenizer()
1265
  punctuations = r"([。;!?])" # For Chinese support
1266
 
1267
  separated = custom_sent_tokenizer.tokenize(text)