.

Files changed (4) hide show

COMMANDS.md ADDED Viewed

+```bash
+source ~/miniconda3/bin/activate
+conda create --name unsloth_env \
+    python=3.11 \
+    pytorch-cuda=12.1 \
+    pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers \
+    -y
+conda activate unsloth_env
+pip install -r requirements.txt
+# Should be installed within an inference endpoint by default:
+pip install --no-deps trl peft accelerate bitsandbytes
+sudo apt-get update && sudo apt-get install -y build-essential
+```

inference.py ADDED Viewed

+from unsloth import FastLanguageModel
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = "aidando73/llama-3.3-70b-instruct-code-agent-fine-tune-v1",
+    max_seq_length = 2048,
+    dtype = "float16",
+    load_in_4bit = True,
+)
+FastLanguageModel.for_inference(model)

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git

test-inference.py ADDED Viewed

+from unsloth import FastLanguageModel
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = "aidando73/llama-3.3-70b-instruct-code-agent-fine-tune-v1",
+    max_seq_length = 2048,
+    dtype = "float16",
+    load_in_4bit = True,
+)
+FastLanguageModel.for_inference(model)
+from transformers import TextStreamer
+text_streamer = TextStreamer(tokenizer, skip_prompt = True)
+input_ids = tokenizer.encode("Hello, how are you?", return_tensors = "pt").to("cuda")
+_ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)
+print(text_streamer.stream_output)