Recurv-Medical-Lllama / example.py
Recurv's picture
init
adf42a8 verified
raw
history blame contribute delete
758 Bytes
# pip install llama-cpp-python --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu118
from llama_cpp import Llama
llm = Llama(
model_path="recurv_llama_13B.gguf",
n_ctx=2048, # Context window
n_threads=4 # Number of CPU threads to use
)
prompt = "What is Paracetamol?"
output = llm(
prompt,
max_tokens=256, # Maximum number of tokens to generate
temperature=0.5, # Controls randomness (0.0 = deterministic, 1.0 = creative)
top_p=0.95, # Nucleus sampling parameter
stop=["###"], # Optional stop words
echo=True # Include prompt in the output
)
# Print the generated text
print(output['choices'][0]['text'])