aidando73 commited on
Commit
0f38ced
·
1 Parent(s): 39c431c
Files changed (4) hide show
  1. COMMANDS.md +13 -0
  2. inference.py +9 -0
  3. requirements.txt +1 -0
  4. test-inference.py +15 -0
COMMANDS.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ source ~/miniconda3/bin/activate
3
+ conda create --name unsloth_env \
4
+ python=3.11 \
5
+ pytorch-cuda=12.1 \
6
+ pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers \
7
+ -y
8
+ conda activate unsloth_env
9
+ pip install -r requirements.txt
10
+ # Should be installed within an inference endpoint by default:
11
+ pip install --no-deps trl peft accelerate bitsandbytes
12
+ sudo apt-get update && sudo apt-get install -y build-essential
13
+ ```
inference.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from unsloth import FastLanguageModel
2
+ model, tokenizer = FastLanguageModel.from_pretrained(
3
+ model_name = "aidando73/llama-3.3-70b-instruct-code-agent-fine-tune-v1",
4
+ max_seq_length = 2048,
5
+ dtype = "float16",
6
+ load_in_4bit = True,
7
+ )
8
+ FastLanguageModel.for_inference(model)
9
+
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
test-inference.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unsloth import FastLanguageModel
2
+ model, tokenizer = FastLanguageModel.from_pretrained(
3
+ model_name = "aidando73/llama-3.3-70b-instruct-code-agent-fine-tune-v1",
4
+ max_seq_length = 2048,
5
+ dtype = "float16",
6
+ load_in_4bit = True,
7
+ )
8
+ FastLanguageModel.for_inference(model)
9
+
10
+ from transformers import TextStreamer
11
+ text_streamer = TextStreamer(tokenizer, skip_prompt = True)
12
+ input_ids = tokenizer.encode("Hello, how are you?", return_tensors = "pt").to("cuda")
13
+ _ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)
14
+
15
+ print(text_streamer.stream_output)