- COMMANDS.md +13 -0
- inference.py +9 -0
- requirements.txt +1 -0
- test-inference.py +15 -0
COMMANDS.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```bash
|
2 |
+
source ~/miniconda3/bin/activate
|
3 |
+
conda create --name unsloth_env \
|
4 |
+
python=3.11 \
|
5 |
+
pytorch-cuda=12.1 \
|
6 |
+
pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers \
|
7 |
+
-y
|
8 |
+
conda activate unsloth_env
|
9 |
+
pip install -r requirements.txt
|
10 |
+
# Should be installed within an inference endpoint by default:
|
11 |
+
pip install --no-deps trl peft accelerate bitsandbytes
|
12 |
+
sudo apt-get update && sudo apt-get install -y build-essential
|
13 |
+
```
|
inference.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from unsloth import FastLanguageModel
|
2 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
3 |
+
model_name = "aidando73/llama-3.3-70b-instruct-code-agent-fine-tune-v1",
|
4 |
+
max_seq_length = 2048,
|
5 |
+
dtype = "float16",
|
6 |
+
load_in_4bit = True,
|
7 |
+
)
|
8 |
+
FastLanguageModel.for_inference(model)
|
9 |
+
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
|
test-inference.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from unsloth import FastLanguageModel
|
2 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
3 |
+
model_name = "aidando73/llama-3.3-70b-instruct-code-agent-fine-tune-v1",
|
4 |
+
max_seq_length = 2048,
|
5 |
+
dtype = "float16",
|
6 |
+
load_in_4bit = True,
|
7 |
+
)
|
8 |
+
FastLanguageModel.for_inference(model)
|
9 |
+
|
10 |
+
from transformers import TextStreamer
|
11 |
+
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
|
12 |
+
input_ids = tokenizer.encode("Hello, how are you?", return_tensors = "pt").to("cuda")
|
13 |
+
_ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)
|
14 |
+
|
15 |
+
print(text_streamer.stream_output)
|