THUDM
/

LongCite-llama3.1-8b

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

NeoZ123 commited on Dec 16, 2024

Commit

4dcf5f0

·

verified ·

1 Parent(s): c30dc0f

Update modeling_llama.py

Files changed (1) hide show

modeling_llama.py +1 -1

modeling_llama.py CHANGED Viewed

@@ -1261,7 +1261,7 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
         def text_split_by_punctuation(original_text, return_dict=False):
             # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text)  # separate period without space
             text = original_text
-            custom_sent_tokenizer = PunktSentenceTokenizer(text)
             punctuations = r"([。；！？])"  # For Chinese support
             separated = custom_sent_tokenizer.tokenize(text)

         def text_split_by_punctuation(original_text, return_dict=False):
             # text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text)  # separate period without space
             text = original_text
+            custom_sent_tokenizer = PunktSentenceTokenizer()
             punctuations = r"([。；！？])"  # For Chinese support
             separated = custom_sent_tokenizer.tokenize(text)