Jacob Trock commited on May 28

Commit

b485435

1 Parent(s): 90daf30

Add Osmosis Structure 0.6B model with Git LFS for large files

Files changed (38) hide show

.gitattributes +2 -0
.gitignore +3 -0
.gitmodules +3 -0
Osmosis-Structure-0.6B-BF16.gguf +3 -0
added_tokens.json +3 -0
chat_template.jinja +89 -0
config.json +3 -0
generate_quantization.sh +102 -0
generation_config.json +3 -0
jsonsft-0.6b.Q3_K_L.gguf +3 -0
jsonsft-0.6b.Q4_K_M.gguf +3 -0
jsonsft-0.6b.Q6_K.gguf +3 -0
jsonsft-0.6b.Q8_0.gguf +3 -0
llama.cpp +1 -0
merges.txt +0 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +3 -0
test/Makefile +11 -0
test/README.md +136 -0
test/__pycache__/test_mcp_ollama.cpython-312.pyc +0 -0
test/docker-compose.ollama.yaml +45 -0
test/init-ollama.sh +83 -0
test/ollama.test.Dockerfile +14 -0
test/requirements.txt +3 -0
test/structured_output_dataset/dataset_dict.json +3 -0
test/structured_output_dataset/train/data-00000-of-00001.arrow +3 -0
test/structured_output_dataset/train/dataset_info.json +3 -0
test/structured_output_dataset/train/state.json +3 -0
test/test_mcp_ollama.py +365 -0
test_llama_setup.sh +63 -0
tokenizer.json +3 -0
tokenizer_config.json +3 -0
trainer_state.json +3 -0
training_args.bin +3 -0
vocab.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+venv/
+test_results/
+.DS_Store

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "llama.cpp"]
+	path = llama.cpp
+	url = https://github.com/ggml-org/llama.cpp.git

Osmosis-Structure-0.6B-BF16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64b24b3def4b79621c0b9f05581e6f9c52da01523a8e025947003b8888593166
+size 1198177920

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0284b582e14987fbd3d5a2cb2bd139084371ed9acbae488829a1c900833c680
+size 707

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8fee10fe0e3175672f2ae63d15c659e07d7e958c7edca2f4376296707d12f7e
+size 726

generate_quantization.sh ADDED Viewed

	@@ -0,0 +1,102 @@

+#!/bin/bash
+# Activate virtual environment
+source venv/bin/activate
+# Input model directory (containing safetensors files)
+INPUT_MODEL_DIR="../Osmosis-Structure-0.6B"
+# Output GGUF model file
+INPUT_MODEL="Osmosis-Structure-0.6B-BF16.gguf"
+echo "======================================================"
+echo "Step 1: Converting safetensors to GGUF format"
+echo "======================================================"
+# Check if input model directory exists
+if [ ! -d "$INPUT_MODEL_DIR" ]; then
+    echo "Error: Input model directory $INPUT_MODEL_DIR not found."
+    exit 1
+fi
+# Check if GGUF model already exists
+if [ -f "$INPUT_MODEL" ]; then
+    echo "GGUF model $INPUT_MODEL already exists. Skipping conversion..."
+else
+    echo "Converting $INPUT_MODEL_DIR to GGUF format..."
+    # Convert safetensors to GGUF
+    cd llama.cpp
+    python3 convert_hf_to_gguf.py "../$INPUT_MODEL_DIR" --outtype bf16
+    cd ..
+    # Check if conversion was successful
+    if [ -f "$INPUT_MODEL" ]; then
+        echo "Successfully converted to $INPUT_MODEL"
+    else
+        echo "Error: Failed to convert model to GGUF format."
+        exit 1
+    fi
+fi
+echo ""
+echo "======================================================"
+echo "Step 2: Quantizing GGUF model"
+echo "======================================================"
+# Define quantization formats to generate
+QUANT_FORMATS=(
+    "Q8_0"
+    "Q6_K"
+    "Q3_K_L"
+    "Q4_K_M"
+)
+# Path to llama-quantize tool
+QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize"
+# Check if quantize tool exists
+if [ ! -f "$QUANTIZE_TOOL" ]; then
+    echo "Error: Quantize tool not found at $QUANTIZE_TOOL"
+    echo "Please make sure llama.cpp is built with: cd llama.cpp && make"
+    exit 1
+fi
+# Process each quantization format
+for format in "${QUANT_FORMATS[@]}"; do
+    echo "------------------------------------------------------"
+    echo "Starting quantization: $format"
+    echo "------------------------------------------------------"
+    # Define output filename with the exact format requested
+    OUTPUT_MODEL="jsonsft-0.6b.${format}.gguf"
+    # Check if output model already exists
+    if [ -f "$OUTPUT_MODEL" ]; then
+        echo "Model $OUTPUT_MODEL already exists. Skipping..."
+        continue
+    fi
+    # Run quantization
+    echo "Quantizing to $format..."
+    "$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format"
+    # Check if quantization was successful
+    if [ $? -eq 0 ]; then
+        echo "Successfully created $OUTPUT_MODEL"
+        # Show file size
+        ls -lah "$OUTPUT_MODEL"
+    else
+        echo "Failed to create $OUTPUT_MODEL"
+    fi
+    echo ""
+done
+echo "======================================================"
+echo "All operations completed!"
+echo "======================================================"
+echo "Base GGUF model:"
+ls -lah "$INPUT_MODEL"
+echo ""
+echo "Quantized models:"
+ls -lah jsonsft-0.6b.*.gguf 2>/dev/null || echo "No quantized models found."

generation_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8aa750602852f354add3c477ce8c816ccabcde6251ae48cc4c8cccf2610d63df
+size 214

jsonsft-0.6b.Q3_K_L.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:362b57e906c4dd91c528da7304e5f47a6b2d1155611bfdebe7fef1b67150cbdd
+size 368487040

jsonsft-0.6b.Q4_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97dca22c1b92665f96fe020567f2e8646ebfb749ebc04b8e39602791e760ba51
+size 396700288

jsonsft-0.6b.Q6_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb4ae78a3ce7f9c727e1e175cac19eed73ea63c24f278b69a2b822886b122c6c
+size 495102592

jsonsft-0.6b.Q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:696a6dcd7ba50cb3a82791d03c808c236fa0f23e24fd7c45224a004a2cbc9588
+size 639442560

llama.cpp ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit aa6dff05be25709bb218bf648951d690029c4b19

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e2176c467116fbbd78f6310aa6c5d9c9d99668a061d4ea426b69d5af9db0077
+size 2384234968

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:940690d5ce57a95f434455e6af4605c0d2dd04a0479315b9e698b04f86d4e93e
+size 4768667667

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61c19bab1174704a4a4441475683bf1270277af15d2e2c95e964789128e482c4
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:998e35850da1b676e839a131aa498c1aac2743375b8d3eae04511922d30cf59f
+size 1465

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd
+size 613

test/Makefile ADDED Viewed

	@@ -0,0 +1,11 @@

+run-ollama:
+	docker compose -f docker-compose.ollama.yaml up --build --force-recreate --remove-orphans
+run-ollama-headless:
+	docker compose -f docker-compose.ollama.yaml up -d --build --force-recreate --remove-orphans
+stop-ollama:
+	docker compose -f docker-compose.ollama.yaml down
+clean:
+	docker compose -f docker-compose.ollama.yaml down --remove-orphans && docker volume rm test_ollama_data

test/README.md ADDED Viewed

	@@ -0,0 +1,136 @@

+# Structured Output Testing with Ollama
+This directory contains a test script that evaluates language models' ability to generate structured JSON output using a dataset of structured output examples.
+## Overview
+The `test_mcp_ollama.py` script loads a structured output dataset and tests various language models available in Ollama to see how well they can generate structured JSON responses based on schema-guided prompts.
+## Dataset
+The test uses the structured output dataset located in `./structured_output_dataset/` which contains:
+- **5,108 test cases** with conversation examples
+- Each test case includes:
+  - A system message with a JSON schema definition
+  - A user message with natural language description
+  - An assistant response with the expected structured JSON output
+## Requirements
+Install the required dependencies:
+```bash
+pip install -r requirements.txt
+```
+Required packages:
+- `ollama>=0.1.0` - For interacting with Ollama models
+- `datasets>=2.0.0` - For loading the structured output dataset
+- `pyarrow>=10.0.0` - For dataset serialization support
+## Usage
+### Basic Usage
+Test all available models in Ollama with default settings (5 test cases per model):
+```bash
+python3 test_mcp_ollama.py
+```
+### Test a Specific Model
+```bash
+python3 test_mcp_ollama.py --model llama2:latest
+```
+### Adjust Number of Test Cases
+```bash
+python3 test_mcp_ollama.py --max-test-cases 10
+```
+### Verbose Output
+```bash
+python3 test_mcp_ollama.py --verbose
+```
+### Combined Options
+```bash
+python3 test_mcp_ollama.py --model mistral:latest --max-test-cases 3 --verbose
+```
+## Output
+The script provides:
+1. **Real-time progress** showing success/failure for each test case
+2. **Summary statistics** including:
+   - Total tests run
+   - Success/failure counts
+   - Success rate per model
+3. **Detailed results** saved to `structured_output_ollama_results.json`
+### Example Output
+```
+Loaded 5 test cases from dataset
+✅ Found 2 model(s), proceeding with tests.
+Will test 2 model(s) from Ollama:
+  - llama2:latest
+  - mistral:latest
+✅ SUCCESS - llama2:latest (test case 0) (took 2.34s)
+✅ SUCCESS - llama2:latest (test case 1) (took 1.89s)
+❌ FAILED - llama2:latest (test case 2) (took 0.45s) - Error: Connection timeout
+=== SUMMARY ===
+Total tests run: 10
+Successful: 8
+Failed: 2
+Results by model:
+  llama2:latest: 4/5 successful (80.0%)
+  mistral:latest: 4/5 successful (80.0%)
+```
+## Test Evaluation
+The script evaluates models based on:
+- **Response generation**: Whether the model produces a non-empty response
+- **Error handling**: Catches and reports connection/generation errors
+- **Performance timing**: Measures response generation time
+## Dataset Structure
+Each test case in the dataset follows this structure:
+```python
+{
+    "id": 0,
+    "messages": [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant that understands and translates text to JSON format according to the following schema. {...}"
+        },
+        {
+            "role": "user",
+            "content": "Natural language description of data to be structured..."
+        },
+        {
+            "role": "assistant",
+            "content": "{\"structured\": \"json\", \"response\": \"here\"}"
+        }
+    ]
+}
+```
+## Notes
+- The script waits for Ollama models to be loaded before starting tests
+- Results are saved to `structured_output_ollama_results.json` for further analysis
+- Use `--max-test-cases` to limit testing time for quick evaluations
+- The `--verbose` flag shows detailed response content for debugging

test/__pycache__/test_mcp_ollama.cpython-312.pyc ADDED Viewed

Binary file (13.4 kB). View file

test/docker-compose.ollama.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+services:
+  ollama:
+    image: ollama/ollama:0.7.1
+    container_name: ollama
+    volumes:
+      - ollama_data:/root/.ollama
+      - ../jsonsft-0.6b.Q3_K_L.gguf:/models/jsonsft-0.6b.Q3_K_L.gguf:ro
+      - ../jsonsft-0.6b.Q4_K_M.gguf:/models/jsonsft-0.6b.Q4_K_M.gguf:ro
+      - ../jsonsft-0.6b.Q6_K.gguf:/models/jsonsft-0.6b.Q6_K.gguf:ro
+      - ../jsonsft-0.6b.Q8_0.gguf:/models/jsonsft-0.6b.Q8_0.gguf:ro
+      - ./init-ollama.sh:/init-ollama.sh:ro
+    entrypoint: /init-ollama.sh
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu]
+              count: all
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "ollama", "list"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+  ollama-test:
+    build:
+      context: .
+      dockerfile: ollama.test.Dockerfile
+    container_name: ollama-test
+    depends_on:
+      ollama:
+        condition: service_healthy
+    volumes:
+      - ../:/app/models:ro
+      - ./test_results/ollama:/app/test_results
+    working_dir: /app/test_results
+    environment:
+      - OLLAMA_HOST=http://ollama:11434
+    command: python /app/test_mcp_ollama.py -v
+    restart: "no"
+volumes:
+  ollama_data:

test/init-ollama.sh ADDED Viewed

	@@ -0,0 +1,83 @@

+#!/bin/bash
+set -e # Exit immediately if a command exits with a non-zero status.
+MODELS_DIR="/models"
+echo "Ollama initialization script started."
+# listing models in models directory
+echo "Listing models in $MODELS_DIR:"
+ls -1 "$MODELS_DIR"/*.gguf
+# Start ollama serve in the background so 'ollama create' can connect
+ollama serve &
+OLLAMA_PID=$!
+echo "Ollama server started in background with PID $OLLAMA_PID."
+# Wait for Ollama server to be ready
+echo "Waiting for Ollama server to be up..."
+max_attempts=20 # Increased attempts for slower systems
+attempt=0
+while ! ollama list > /dev/null 2>&1; do
+    attempt=$((attempt + 1))
+    if [ "$attempt" -ge "$max_attempts" ]; then
+        echo "Ollama server did not start in time. Exiting."
+        # Try to kill the background server if it's still running
+        if kill -0 $OLLAMA_PID 2>/dev/null; then
+            kill $OLLAMA_PID
+            wait $OLLAMA_PID 2>/dev/null
+        fi
+        exit 1
+    fi
+    echo "Waiting... (attempt ${attempt}/${max_attempts})"
+    sleep 5 # Increased sleep time
+done
+echo "Ollama server is up and running."
+echo "Looking for GGUF models in $MODELS_DIR..."
+if [ -d "$MODELS_DIR" ]; then
+    find "$MODELS_DIR" -type f -name "*.gguf" -print0 | while IFS= read -r -d $'\0' model_path; do
+        filename=$(basename "$model_path")
+        # Derive model name: ultra-simple, just letters and numbers
+        # Try the absolute simplest naming possible
+        base_name="jsonsft"
+        quantization=$(echo "$filename" | sed 's/.*\.\([QK][0-9_KLM]*\)\.gguf$/\1/' | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]//g')
+        model_name="${base_name}${quantization}"
+        echo "Found GGUF file: $model_path"
+        echo "Derived model name: $model_name"
+        # Check if model already exists
+        if ollama list | grep -q "^${model_name}[[:space:]]"; then
+            echo "Model ${model_name} already exists in Ollama. Skipping creation."
+        else
+            echo "Creating model ${model_name} from $model_path..."
+            modelfile_content="FROM $model_path"
+            # create modelfile in writable temp directory instead of read-only models directory
+            modelfile_path="/tmp/${model_name}.modelfile"
+            echo "$modelfile_content" > "$modelfile_path"
+            if ollama create "$model_name" -f "$modelfile_path"; then
+                echo "Successfully created model ${model_name}."
+            else
+                echo "Failed to create model ${model_name}. Continuing with next model..."
+            fi
+        fi
+    done
+else
+    echo "Warning: Models directory $MODELS_DIR not found. No models will be loaded."
+fi
+echo "Model initialization complete."
+echo "Stopping background Ollama server (PID $OLLAMA_PID) and starting it in foreground..."
+# Stop the background ollama serve
+if kill -0 $OLLAMA_PID 2>/dev/null; then
+    kill $OLLAMA_PID
+    wait $OLLAMA_PID 2>/dev/null # Wait for it to actually terminate, ignore error if already stopped
+else
+    echo "Background Ollama server was not running or already stopped."
+fi
+echo "Starting Ollama server in foreground to keep container running."
+exec ollama serve # Replace this script process with ollama serve

test/ollama.test.Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM python:3.12-slim
+WORKDIR /app
+COPY ./test_mcp_ollama.py /app/
+COPY ./structured_output_dataset /app/data/test/structured_output_dataset
+COPY ./requirements.txt /app/
+RUN pip install -r requirements.txt
+# Models are mounted from host to /app/models
+# Test results are written to /app/data, which is mounted from host
+# The script is run with /app/data as its working directory
+# OLLAMA_HOST will be set in docker-compose to point to the ollama service

test/requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+ollama>=0.4.8
+datasets>=3.6.0
+pyarrow>=20.0.0

test/structured_output_dataset/dataset_dict.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c172eebfc28c1400d6be4338ce7d00191507ffb4ae64c315f039585c894df5b7
+size 21

test/structured_output_dataset/train/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bcb36bf09086f37e3b6d2c883d899cde0aa2fe57acc069c2e3cbbc69c013f33
+size 26475688

test/structured_output_dataset/train/dataset_info.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecb26114cf6a9305f593ea81f8e68a2f56577d6d710884e45e478a7320fffe69
+size 309

test/structured_output_dataset/train/state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f6dd66bcca38325282206607387522ea7dc685f341914b81ffe6de59dd94490
+size 247

test/test_mcp_ollama.py ADDED Viewed

	@@ -0,0 +1,365 @@

+#!/usr/bin/env python3
+import os
+import sys
+import json
+import time
+import argparse
+from typing import List, Dict, Any
+try:
+    import ollama
+except ImportError:
+    print("Error: ollama package not found. Please install with: pip install ollama")
+    sys.exit(1)
+try:
+    from datasets import load_dataset, DatasetDict
+except ImportError:
+    print("Error: datasets package not found. Please install with: pip install datasets")
+    sys.exit(1)
+# Global variable for verbose flag
+verbose_global = False
+def load_structured_output_dataset(dataset_path: str = "/app/data/test/structured_output_dataset") -> List[Dict[str, Any]]:
+    """Load the structured output dataset and return test cases."""
+    try:
+        # Load the dataset instead of reading parquet
+        dataset = DatasetDict.load_from_disk(dataset_path)
+        combined = dataset["train"]
+        test_cases = []
+        for i, example in enumerate(combined):
+            # Extract messages from the dataset
+            messages = example.get("messages", [])
+            if messages:
+                test_cases.append({
+                    "id": i,
+                    "messages": messages
+                })
+        if verbose_global:
+            print(f"Loaded {len(test_cases)} test cases from dataset")
+        return test_cases
+    except Exception as e:
+        print(f"Error loading dataset: {str(e)}")
+        return []
+def list_ollama_models() -> List[str]:
+    """List available models in Ollama using the Python library."""
+    try:
+        models_data = ollama.list()
+        if verbose_global:
+            print(f"Response type: {type(models_data)}")
+        model_names = []
+        # Handle ollama._types.ListResponse object
+        if hasattr(models_data, 'models'):
+            models = models_data.models
+        # Handle dict response
+        elif isinstance(models_data, dict) and "models" in models_data:
+            models = models_data["models"]
+        elif isinstance(models_data, dict) and "model" in models_data:
+            models = [models_data]
+        # Handle list response
+        elif isinstance(models_data, list):
+            models = models_data
+        else:
+            if verbose_global:
+                print(f"Unexpected response type: {type(models_data)}")
+            return []
+        for model in models:
+            # Handle Model objects with .model attribute
+            if hasattr(model, 'model'):
+                model_names.append(model.model)
+            # Handle dict with various possible keys
+            elif isinstance(model, dict):
+                name = model.get("name") or model.get("model") or model.get("id")
+                if name:
+                    model_names.append(name)
+            # Handle string
+            elif isinstance(model, str):
+                model_names.append(model)
+        if verbose_global:
+            print(f"Extracted model names: {model_names}")
+        return model_names
+    except Exception as e:
+        if verbose_global:
+            print(f"Error listing models: {str(e)}")
+        return []
+def wait_for_models_ready(expected_min_models: int = 3, max_wait_time: int = 300, check_interval: int = 10) -> List[str]:
+    """Wait for models to be loaded in Ollama before proceeding with tests."""
+    print(f"Waiting for at least {expected_min_models} model(s) to be loaded in Ollama...")
+    start_time = time.time()
+    while time.time() - start_time < max_wait_time:
+        try:
+            models = list_ollama_models()
+            if verbose_global:
+                print(f"Found {len(models)} model(s): {models}")
+            if len(models) >= expected_min_models:
+                print(f"✅ Found {len(models)} model(s), proceeding with tests.")
+                return models
+            else:
+                elapsed = int(time.time() - start_time)
+                print(f"⏳ Found {len(models)} model(s), waiting for more... ({elapsed}s elapsed)")
+        except Exception as e:
+            elapsed = int(time.time() - start_time)
+            if verbose_global:
+                print(f"⏳ Ollama server not ready... ({elapsed}s elapsed) - {str(e)}")
+            else:
+                print(f"⏳ Waiting for Ollama server... ({elapsed}s elapsed)")
+        time.sleep(check_interval)
+    # Final attempt after timeout
+    final_models = list_ollama_models()
+    if final_models:
+        print(f"⚠️  Timeout reached, but found {len(final_models)} model(s). Proceeding anyway.")
+        return final_models
+    else:
+        print("❌ Timeout reached and no models found.")
+        return []
+def test_ollama_model(model_name: str, test_case: Dict[str, Any], verbose: bool = False) -> Dict[str, Any]:
+    """Test a model with a test case from the dataset and return the results."""
+    result = {
+        "model": model_name,
+        "test_case_id": test_case["id"],
+        "success": False,
+        "error": None,
+        "duration": 0,
+        "response": None,
+        "request": None
+    }
+    start_time = time.time()
+    try:
+        if verbose:
+            print(f"Testing model: {model_name} with test case {test_case['id']}")
+        # Use the messages from the dataset test case
+        messages = test_case["messages"]
+        # Extract JSON schema from system message for structured output
+        json_schema = None
+        for message in messages:
+            if message.get("role") == "system":
+                content = message.get("content", "")
+                # Look for JSON schema in the system message
+                if "schema" in content.lower() and "{" in content:
+                    # Extract the JSON schema part from the system message
+                    try:
+                        # Find the JSON part in the system message
+                        start_idx = content.find("{")
+                        if start_idx != -1:
+                            # Find the matching closing brace
+                            brace_count = 0
+                            end_idx = start_idx
+                            for i, char in enumerate(content[start_idx:], start_idx):
+                                if char == "{":
+                                    brace_count += 1
+                                elif char == "}":
+                                    brace_count -= 1
+                                    if brace_count == 0:
+                                        end_idx = i + 1
+                                        break
+                            schema_text = content[start_idx:end_idx]
+                            json_schema = json.loads(schema_text)
+                            if verbose:
+                                print(f"Extracted JSON schema for structured output")
+                    except (json.JSONDecodeError, ValueError) as e:
+                        if verbose:
+                            print(f"Could not parse JSON schema from system message: {e}")
+                break
+        # Prepare the chat request with format parameter for structured output
+        chat_params = {
+            "model": model_name,
+            "messages": messages,
+            "options": {
+                "temperature": 0.7,
+                "top_p": 0.95,
+                "num_predict": 512,
+            }
+        }
+        # Add format parameter to force JSON output
+        if json_schema:
+            # Use the extracted JSON schema for structured output
+            chat_params["format"] = json_schema
+            if verbose:
+                print(f"Using structured output with JSON schema")
+        else:
+            # Fallback to basic JSON mode if no schema found
+            chat_params["format"] = "json"
+            if verbose:
+                print(f"Using basic JSON format mode")
+        # Use the ollama library to send a chat request
+        response = ollama.chat(**chat_params)
+        # Extract the response content
+        generated_text = response.get("message", {}).get("content", "")
+        # Check if we got a valid response
+        has_valid_response = len(generated_text.strip()) > 0
+        result["success"] = has_valid_response
+        result["request"] = messages
+        result["response"] = generated_text[:500] + "..." if len(generated_text) > 500 else generated_text
+    except Exception as e:
+        result["error"] = str(e)
+    result["duration"] = time.time() - start_time
+    return result
+def main():
+    global verbose_global
+    parser = argparse.ArgumentParser(description="Test structured output functionality on models pre-loaded in Ollama")
+    parser.add_argument("--model", type=str, help="Test a specific model name (must exist in Ollama)")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output")
+    parser.add_argument("--max-test-cases", type=int, default=5, help="Maximum number of test cases to run per model")
+    args = parser.parse_args()
+    verbose_global = args.verbose
+    if args.verbose:
+        print("Verbose mode enabled.")
+        print(f"OLLAMA_HOST is set to: {os.environ.get('OLLAMA_HOST')}")
+    # Load test cases from the structured output dataset
+    test_cases = load_structured_output_dataset()
+    if not test_cases:
+        print("No test cases found in the dataset.")
+        sys.exit(1)
+    # Limit the number of test cases if specified
+    if args.max_test_cases and len(test_cases) > args.max_test_cases:
+        test_cases = test_cases[:args.max_test_cases]
+        print(f"Limited to {args.max_test_cases} test cases")
+    print(f"Loaded {len(test_cases)} test cases from dataset")
+    # Wait for models to be properly loaded (expect most of the 4 GGUF files to be converted)
+    ollama_models_available = wait_for_models_ready(expected_min_models=4, max_wait_time=300, check_interval=10)
+    if not ollama_models_available:
+        print("No models found in Ollama after waiting. The init script may have failed.")
+        sys.exit(1)
+    if args.verbose:
+        print("Available models in Ollama:")
+        for m in ollama_models_available:
+            print(f"  - {m}")
+    models_to_test_names = []
+    if args.model:
+        # Test a specific model if provided
+        if args.model in ollama_models_available:
+            models_to_test_names.append(args.model)
+        else:
+            # Attempt to match if user provided a name without a tag (e.g., "llama2" instead of "llama2:latest")
+            # Ollama list returns names like "modelname:tag"
+            found_match = False
+            for om_name in ollama_models_available:
+                if om_name.startswith(args.model + ":"):
+                    models_to_test_names.append(om_name)
+                    if args.verbose:
+                        print(f"Found match for '{args.model}': testing '{om_name}'")
+                    found_match = True
+                    break
+            if not found_match:
+                print(f"Error: Specified model '{args.model}' not found in Ollama's list: {ollama_models_available}")
+                sys.exit(1)
+    else:
+        # Test all available models in Ollama
+        models_to_test_names = ollama_models_available
+    if not models_to_test_names:
+        print("No models selected to test.")
+        sys.exit(1)
+    print(f"\nWill test {len(models_to_test_names)} model(s) from Ollama:")
+    for model_name in models_to_test_names:
+        print(f"  - {model_name}")
+    print()
+    results = []
+    for model_name in models_to_test_names:
+        if args.verbose:
+            print(f"--- Starting tests for model: {model_name} ---")
+        for test_case in test_cases:
+            result = test_ollama_model(model_name, test_case, args.verbose)
+            result["path"] = model_name # Use model_name as path for consistency in reporting
+            results.append(result)
+            status = "✅ SUCCESS" if result["success"] else "❌ FAILED"
+            error_msg = f" - Error: {result['error']}" if result["error"] else ""
+            print(f"{status} - {model_name} (test case {test_case['id']}) (took {result['duration']:.2f}s){error_msg}")
+            if args.verbose and result.get("response"):
+                print(f"  Response: {result['response']}")
+            if args.verbose:
+                print()
+    # Summary
+    print("\n=== SUMMARY ===")
+    successes = [r for r in results if r["success"]]
+    failures = [r for r in results if not r["success"]]
+    print(f"Total tests run: {len(results)}")
+    print(f"Successful: {len(successes)}")
+    print(f"Failed: {len(failures)}")
+    # Group results by model for summary
+    model_results = {}
+    for result in results:
+        model = result["model"]
+        if model not in model_results:
+            model_results[model] = {"success": 0, "failed": 0}
+        if result["success"]:
+            model_results[model]["success"] += 1
+        else:
+            model_results[model]["failed"] += 1
+    print(f"\nResults by model:")
+    for model, stats in model_results.items():
+        total = stats["success"] + stats["failed"]
+        success_rate = (stats["success"] / total * 100) if total > 0 else 0
+        print(f"  {model}: {stats['success']}/{total} successful ({success_rate:.1f}%)")
+    if failures:
+        print("\nFailed tests:")
+        for result in failures:
+            error = f": {result['error']}" if result["error"] else ""
+            print(f"  - {result['model']} (test case {result['test_case_id']}){error}")
+    # Save results to file
+    output_file = "structured_output_ollama_results.json"
+    with open(output_file, "w") as f:
+        json.dump({
+            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+            "total_test_cases": len(test_cases),
+            "models_tested": len(models_to_test_names),
+            "results": results
+        }, f, indent=2)
+    print(f"\nDetailed results saved to {output_file}")
+if __name__ == "__main__":
+    main()

test_llama_setup.sh ADDED Viewed

	@@ -0,0 +1,63 @@

+#!/bin/bash
+echo "Testing llama.cpp setup..."
+echo "================================"
+# Test 1: Check if quantize tool exists and is executable
+echo "1. Testing quantization tool..."
+if [ -x "llama.cpp/build/bin/llama-quantize" ]; then
+    echo "✓ llama-quantize tool found and executable"
+    echo "  Available quantization formats:"
+    ./llama.cpp/build/bin/llama-quantize --help | grep -A 20 "Allowed quantization types:" | head -10
+else
+    echo "✗ llama-quantize tool not found or not executable"
+    exit 1
+fi
+echo ""
+# Test 2: Check if conversion script exists
+echo "2. Testing conversion script..."
+if [ -f "llama.cpp/convert_hf_to_gguf.py" ]; then
+    echo "✓ convert_hf_to_gguf.py found"
+else
+    echo "✗ convert_hf_to_gguf.py not found"
+    exit 1
+fi
+echo ""
+# Test 3: Check if virtual environment and dependencies work
+echo "3. Testing Python dependencies..."
+if source venv/bin/activate && python3 -c "import torch; import transformers; import gguf" 2>/dev/null; then
+    echo "✓ Python dependencies (torch, transformers, gguf) are available"
+else
+    echo "✗ Python dependencies not available"
+    exit 1
+fi
+echo ""
+# Test 4: Check if quantization script is ready
+echo "4. Testing quantization script..."
+if [ -x "generate_quantization.sh" ]; then
+    echo "✓ generate_quantization.sh is executable and ready"
+else
+    echo "✗ generate_quantization.sh not found or not executable"
+    exit 1
+fi
+echo ""
+echo "================================"
+echo "✓ llama.cpp is fully initialized and ready for model quantization!"
+echo ""
+echo "To use it:"
+echo "1. Place your HuggingFace model in a directory (e.g., 'osmosis-mcp-4b')"
+echo "2. Run: ./generate_quantization.sh"
+echo "3. The script will convert the model to GGUF format and create quantized versions"
+echo ""
+echo "Available quantization formats in the script:"
+echo "- Q8_0 (8-bit quantization)"
+echo "- Q6_K (6-bit quantization)"
+echo "- Q3_K_L (3-bit quantization, large)"
+echo "- Q4_K_M (4-bit quantization, medium)"

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:443bfa629eb16387a12edbf92a76f6a6f10b2af3b53d87ba1550adfcf45f7fa0
+size 5404

trainer_state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2932e0ba061682edcde45abf39c9fe6ea877d9bc6f364ed0be91a417f2d2db4
+size 16490

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6c2f8248a9c3cff0f046f5844295b8c64619a4cf49db88ea9335cef10e84ac0
+size 6033

vocab.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
+size 2776833