Jacob Trock
commited on
Commit
·
b485435
1
Parent(s):
90daf30
Add Osmosis Structure 0.6B model with Git LFS for large files
Browse files- .gitattributes +2 -0
- .gitignore +3 -0
- .gitmodules +3 -0
- Osmosis-Structure-0.6B-BF16.gguf +3 -0
- added_tokens.json +3 -0
- chat_template.jinja +89 -0
- config.json +3 -0
- generate_quantization.sh +102 -0
- generation_config.json +3 -0
- jsonsft-0.6b.Q3_K_L.gguf +3 -0
- jsonsft-0.6b.Q4_K_M.gguf +3 -0
- jsonsft-0.6b.Q6_K.gguf +3 -0
- jsonsft-0.6b.Q8_0.gguf +3 -0
- llama.cpp +1 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +3 -0
- test/Makefile +11 -0
- test/README.md +136 -0
- test/__pycache__/test_mcp_ollama.cpython-312.pyc +0 -0
- test/docker-compose.ollama.yaml +45 -0
- test/init-ollama.sh +83 -0
- test/ollama.test.Dockerfile +14 -0
- test/requirements.txt +3 -0
- test/structured_output_dataset/dataset_dict.json +3 -0
- test/structured_output_dataset/train/data-00000-of-00001.arrow +3 -0
- test/structured_output_dataset/train/dataset_info.json +3 -0
- test/structured_output_dataset/train/state.json +3 -0
- test/test_mcp_ollama.py +365 -0
- test_llama_setup.sh +63 -0
- tokenizer.json +3 -0
- tokenizer_config.json +3 -0
- trainer_state.json +3 -0
- training_args.bin +3 -0
- vocab.json +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.gguf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
test_results/
|
| 3 |
+
.DS_Store
|
.gitmodules
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[submodule "llama.cpp"]
|
| 2 |
+
path = llama.cpp
|
| 3 |
+
url = https://github.com/ggml-org/llama.cpp.git
|
Osmosis-Structure-0.6B-BF16.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64b24b3def4b79621c0b9f05581e6f9c52da01523a8e025947003b8888593166
|
| 3 |
+
size 1198177920
|
added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0284b582e14987fbd3d5a2cb2bd139084371ed9acbae488829a1c900833c680
|
| 3 |
+
size 707
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 18 |
+
{%- for message in messages[::-1] %}
|
| 19 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 20 |
+
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 21 |
+
{%- set ns.multi_step_tool = false %}
|
| 22 |
+
{%- set ns.last_query_index = index %}
|
| 23 |
+
{%- endif %}
|
| 24 |
+
{%- endfor %}
|
| 25 |
+
{%- for message in messages %}
|
| 26 |
+
{%- if message.content is string %}
|
| 27 |
+
{%- set content = message.content %}
|
| 28 |
+
{%- else %}
|
| 29 |
+
{%- set content = '' %}
|
| 30 |
+
{%- endif %}
|
| 31 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 32 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 33 |
+
{%- elif message.role == "assistant" %}
|
| 34 |
+
{%- set reasoning_content = '' %}
|
| 35 |
+
{%- if message.reasoning_content is string %}
|
| 36 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 37 |
+
{%- else %}
|
| 38 |
+
{%- if '</think>' in content %}
|
| 39 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 40 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 41 |
+
{%- endif %}
|
| 42 |
+
{%- endif %}
|
| 43 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 44 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 45 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 46 |
+
{%- else %}
|
| 47 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 48 |
+
{%- endif %}
|
| 49 |
+
{%- else %}
|
| 50 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{%- if message.tool_calls %}
|
| 53 |
+
{%- for tool_call in message.tool_calls %}
|
| 54 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 55 |
+
{{- '\n' }}
|
| 56 |
+
{%- endif %}
|
| 57 |
+
{%- if tool_call.function %}
|
| 58 |
+
{%- set tool_call = tool_call.function %}
|
| 59 |
+
{%- endif %}
|
| 60 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 61 |
+
{{- tool_call.name }}
|
| 62 |
+
{{- '", "arguments": ' }}
|
| 63 |
+
{%- if tool_call.arguments is string %}
|
| 64 |
+
{{- tool_call.arguments }}
|
| 65 |
+
{%- else %}
|
| 66 |
+
{{- tool_call.arguments | tojson }}
|
| 67 |
+
{%- endif %}
|
| 68 |
+
{{- '}\n</tool_call>' }}
|
| 69 |
+
{%- endfor %}
|
| 70 |
+
{%- endif %}
|
| 71 |
+
{{- '<|im_end|>\n' }}
|
| 72 |
+
{%- elif message.role == "tool" %}
|
| 73 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 74 |
+
{{- '<|im_start|>user' }}
|
| 75 |
+
{%- endif %}
|
| 76 |
+
{{- '\n<tool_response>\n' }}
|
| 77 |
+
{{- content }}
|
| 78 |
+
{{- '\n</tool_response>' }}
|
| 79 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 80 |
+
{{- '<|im_end|>\n' }}
|
| 81 |
+
{%- endif %}
|
| 82 |
+
{%- endif %}
|
| 83 |
+
{%- endfor %}
|
| 84 |
+
{%- if add_generation_prompt %}
|
| 85 |
+
{{- '<|im_start|>assistant\n' }}
|
| 86 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 87 |
+
{{- '<think>\n\n</think>\n\n' }}
|
| 88 |
+
{%- endif %}
|
| 89 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8fee10fe0e3175672f2ae63d15c659e07d7e958c7edca2f4376296707d12f7e
|
| 3 |
+
size 726
|
generate_quantization.sh
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Activate virtual environment
|
| 4 |
+
source venv/bin/activate
|
| 5 |
+
|
| 6 |
+
# Input model directory (containing safetensors files)
|
| 7 |
+
INPUT_MODEL_DIR="../Osmosis-Structure-0.6B"
|
| 8 |
+
# Output GGUF model file
|
| 9 |
+
INPUT_MODEL="Osmosis-Structure-0.6B-BF16.gguf"
|
| 10 |
+
|
| 11 |
+
echo "======================================================"
|
| 12 |
+
echo "Step 1: Converting safetensors to GGUF format"
|
| 13 |
+
echo "======================================================"
|
| 14 |
+
|
| 15 |
+
# Check if input model directory exists
|
| 16 |
+
if [ ! -d "$INPUT_MODEL_DIR" ]; then
|
| 17 |
+
echo "Error: Input model directory $INPUT_MODEL_DIR not found."
|
| 18 |
+
exit 1
|
| 19 |
+
fi
|
| 20 |
+
|
| 21 |
+
# Check if GGUF model already exists
|
| 22 |
+
if [ -f "$INPUT_MODEL" ]; then
|
| 23 |
+
echo "GGUF model $INPUT_MODEL already exists. Skipping conversion..."
|
| 24 |
+
else
|
| 25 |
+
echo "Converting $INPUT_MODEL_DIR to GGUF format..."
|
| 26 |
+
|
| 27 |
+
# Convert safetensors to GGUF
|
| 28 |
+
cd llama.cpp
|
| 29 |
+
python3 convert_hf_to_gguf.py "../$INPUT_MODEL_DIR" --outtype bf16
|
| 30 |
+
cd ..
|
| 31 |
+
|
| 32 |
+
# Check if conversion was successful
|
| 33 |
+
if [ -f "$INPUT_MODEL" ]; then
|
| 34 |
+
echo "Successfully converted to $INPUT_MODEL"
|
| 35 |
+
else
|
| 36 |
+
echo "Error: Failed to convert model to GGUF format."
|
| 37 |
+
exit 1
|
| 38 |
+
fi
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
echo ""
|
| 42 |
+
echo "======================================================"
|
| 43 |
+
echo "Step 2: Quantizing GGUF model"
|
| 44 |
+
echo "======================================================"
|
| 45 |
+
|
| 46 |
+
# Define quantization formats to generate
|
| 47 |
+
QUANT_FORMATS=(
|
| 48 |
+
"Q8_0"
|
| 49 |
+
"Q6_K"
|
| 50 |
+
"Q3_K_L"
|
| 51 |
+
"Q4_K_M"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Path to llama-quantize tool
|
| 55 |
+
QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize"
|
| 56 |
+
|
| 57 |
+
# Check if quantize tool exists
|
| 58 |
+
if [ ! -f "$QUANTIZE_TOOL" ]; then
|
| 59 |
+
echo "Error: Quantize tool not found at $QUANTIZE_TOOL"
|
| 60 |
+
echo "Please make sure llama.cpp is built with: cd llama.cpp && make"
|
| 61 |
+
exit 1
|
| 62 |
+
fi
|
| 63 |
+
|
| 64 |
+
# Process each quantization format
|
| 65 |
+
for format in "${QUANT_FORMATS[@]}"; do
|
| 66 |
+
echo "------------------------------------------------------"
|
| 67 |
+
echo "Starting quantization: $format"
|
| 68 |
+
echo "------------------------------------------------------"
|
| 69 |
+
|
| 70 |
+
# Define output filename with the exact format requested
|
| 71 |
+
OUTPUT_MODEL="jsonsft-0.6b.${format}.gguf"
|
| 72 |
+
|
| 73 |
+
# Check if output model already exists
|
| 74 |
+
if [ -f "$OUTPUT_MODEL" ]; then
|
| 75 |
+
echo "Model $OUTPUT_MODEL already exists. Skipping..."
|
| 76 |
+
continue
|
| 77 |
+
fi
|
| 78 |
+
|
| 79 |
+
# Run quantization
|
| 80 |
+
echo "Quantizing to $format..."
|
| 81 |
+
"$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format"
|
| 82 |
+
|
| 83 |
+
# Check if quantization was successful
|
| 84 |
+
if [ $? -eq 0 ]; then
|
| 85 |
+
echo "Successfully created $OUTPUT_MODEL"
|
| 86 |
+
# Show file size
|
| 87 |
+
ls -lah "$OUTPUT_MODEL"
|
| 88 |
+
else
|
| 89 |
+
echo "Failed to create $OUTPUT_MODEL"
|
| 90 |
+
fi
|
| 91 |
+
|
| 92 |
+
echo ""
|
| 93 |
+
done
|
| 94 |
+
|
| 95 |
+
echo "======================================================"
|
| 96 |
+
echo "All operations completed!"
|
| 97 |
+
echo "======================================================"
|
| 98 |
+
echo "Base GGUF model:"
|
| 99 |
+
ls -lah "$INPUT_MODEL"
|
| 100 |
+
echo ""
|
| 101 |
+
echo "Quantized models:"
|
| 102 |
+
ls -lah jsonsft-0.6b.*.gguf 2>/dev/null || echo "No quantized models found."
|
generation_config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8aa750602852f354add3c477ce8c816ccabcde6251ae48cc4c8cccf2610d63df
|
| 3 |
+
size 214
|
jsonsft-0.6b.Q3_K_L.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:362b57e906c4dd91c528da7304e5f47a6b2d1155611bfdebe7fef1b67150cbdd
|
| 3 |
+
size 368487040
|
jsonsft-0.6b.Q4_K_M.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97dca22c1b92665f96fe020567f2e8646ebfb749ebc04b8e39602791e760ba51
|
| 3 |
+
size 396700288
|
jsonsft-0.6b.Q6_K.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb4ae78a3ce7f9c727e1e175cac19eed73ea63c24f278b69a2b822886b122c6c
|
| 3 |
+
size 495102592
|
jsonsft-0.6b.Q8_0.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:696a6dcd7ba50cb3a82791d03c808c236fa0f23e24fd7c45224a004a2cbc9588
|
| 3 |
+
size 639442560
|
llama.cpp
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit aa6dff05be25709bb218bf648951d690029c4b19
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e2176c467116fbbd78f6310aa6c5d9c9d99668a061d4ea426b69d5af9db0077
|
| 3 |
+
size 2384234968
|
optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:940690d5ce57a95f434455e6af4605c0d2dd04a0479315b9e698b04f86d4e93e
|
| 3 |
+
size 4768667667
|
rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61c19bab1174704a4a4441475683bf1270277af15d2e2c95e964789128e482c4
|
| 3 |
+
size 14645
|
scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:998e35850da1b676e839a131aa498c1aac2743375b8d3eae04511922d30cf59f
|
| 3 |
+
size 1465
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd
|
| 3 |
+
size 613
|
test/Makefile
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run-ollama:
|
| 2 |
+
docker compose -f docker-compose.ollama.yaml up --build --force-recreate --remove-orphans
|
| 3 |
+
|
| 4 |
+
run-ollama-headless:
|
| 5 |
+
docker compose -f docker-compose.ollama.yaml up -d --build --force-recreate --remove-orphans
|
| 6 |
+
|
| 7 |
+
stop-ollama:
|
| 8 |
+
docker compose -f docker-compose.ollama.yaml down
|
| 9 |
+
|
| 10 |
+
clean:
|
| 11 |
+
docker compose -f docker-compose.ollama.yaml down --remove-orphans && docker volume rm test_ollama_data
|
test/README.md
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Structured Output Testing with Ollama
|
| 2 |
+
|
| 3 |
+
This directory contains a test script that evaluates language models' ability to generate structured JSON output using a dataset of structured output examples.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
The `test_mcp_ollama.py` script loads a structured output dataset and tests various language models available in Ollama to see how well they can generate structured JSON responses based on schema-guided prompts.
|
| 8 |
+
|
| 9 |
+
## Dataset
|
| 10 |
+
|
| 11 |
+
The test uses the structured output dataset located in `./structured_output_dataset/` which contains:
|
| 12 |
+
- **5,108 test cases** with conversation examples
|
| 13 |
+
- Each test case includes:
|
| 14 |
+
- A system message with a JSON schema definition
|
| 15 |
+
- A user message with natural language description
|
| 16 |
+
- An assistant response with the expected structured JSON output
|
| 17 |
+
|
| 18 |
+
## Requirements
|
| 19 |
+
|
| 20 |
+
Install the required dependencies:
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
pip install -r requirements.txt
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
Required packages:
|
| 27 |
+
- `ollama>=0.1.0` - For interacting with Ollama models
|
| 28 |
+
- `datasets>=2.0.0` - For loading the structured output dataset
|
| 29 |
+
- `pyarrow>=10.0.0` - For dataset serialization support
|
| 30 |
+
|
| 31 |
+
## Usage
|
| 32 |
+
|
| 33 |
+
### Basic Usage
|
| 34 |
+
|
| 35 |
+
Test all available models in Ollama with default settings (5 test cases per model):
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
python3 test_mcp_ollama.py
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### Test a Specific Model
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
python3 test_mcp_ollama.py --model llama2:latest
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
### Adjust Number of Test Cases
|
| 48 |
+
|
| 49 |
+
```bash
|
| 50 |
+
python3 test_mcp_ollama.py --max-test-cases 10
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### Verbose Output
|
| 54 |
+
|
| 55 |
+
```bash
|
| 56 |
+
python3 test_mcp_ollama.py --verbose
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
### Combined Options
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
python3 test_mcp_ollama.py --model mistral:latest --max-test-cases 3 --verbose
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
## Output
|
| 66 |
+
|
| 67 |
+
The script provides:
|
| 68 |
+
|
| 69 |
+
1. **Real-time progress** showing success/failure for each test case
|
| 70 |
+
2. **Summary statistics** including:
|
| 71 |
+
- Total tests run
|
| 72 |
+
- Success/failure counts
|
| 73 |
+
- Success rate per model
|
| 74 |
+
3. **Detailed results** saved to `structured_output_ollama_results.json`
|
| 75 |
+
|
| 76 |
+
### Example Output
|
| 77 |
+
|
| 78 |
+
```
|
| 79 |
+
Loaded 5 test cases from dataset
|
| 80 |
+
✅ Found 2 model(s), proceeding with tests.
|
| 81 |
+
|
| 82 |
+
Will test 2 model(s) from Ollama:
|
| 83 |
+
- llama2:latest
|
| 84 |
+
- mistral:latest
|
| 85 |
+
|
| 86 |
+
✅ SUCCESS - llama2:latest (test case 0) (took 2.34s)
|
| 87 |
+
✅ SUCCESS - llama2:latest (test case 1) (took 1.89s)
|
| 88 |
+
❌ FAILED - llama2:latest (test case 2) (took 0.45s) - Error: Connection timeout
|
| 89 |
+
|
| 90 |
+
=== SUMMARY ===
|
| 91 |
+
Total tests run: 10
|
| 92 |
+
Successful: 8
|
| 93 |
+
Failed: 2
|
| 94 |
+
|
| 95 |
+
Results by model:
|
| 96 |
+
llama2:latest: 4/5 successful (80.0%)
|
| 97 |
+
mistral:latest: 4/5 successful (80.0%)
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
## Test Evaluation
|
| 101 |
+
|
| 102 |
+
The script evaluates models based on:
|
| 103 |
+
- **Response generation**: Whether the model produces a non-empty response
|
| 104 |
+
- **Error handling**: Catches and reports connection/generation errors
|
| 105 |
+
- **Performance timing**: Measures response generation time
|
| 106 |
+
|
| 107 |
+
## Dataset Structure
|
| 108 |
+
|
| 109 |
+
Each test case in the dataset follows this structure:
|
| 110 |
+
|
| 111 |
+
```python
|
| 112 |
+
{
|
| 113 |
+
"id": 0,
|
| 114 |
+
"messages": [
|
| 115 |
+
{
|
| 116 |
+
"role": "system",
|
| 117 |
+
"content": "You are a helpful assistant that understands and translates text to JSON format according to the following schema. {...}"
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"role": "user",
|
| 121 |
+
"content": "Natural language description of data to be structured..."
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"role": "assistant",
|
| 125 |
+
"content": "{\"structured\": \"json\", \"response\": \"here\"}"
|
| 126 |
+
}
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
## Notes
|
| 132 |
+
|
| 133 |
+
- The script waits for Ollama models to be loaded before starting tests
|
| 134 |
+
- Results are saved to `structured_output_ollama_results.json` for further analysis
|
| 135 |
+
- Use `--max-test-cases` to limit testing time for quick evaluations
|
| 136 |
+
- The `--verbose` flag shows detailed response content for debugging
|
test/__pycache__/test_mcp_ollama.cpython-312.pyc
ADDED
|
Binary file (13.4 kB). View file
|
|
|
test/docker-compose.ollama.yaml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
ollama:
|
| 3 |
+
image: ollama/ollama:0.7.1
|
| 4 |
+
container_name: ollama
|
| 5 |
+
volumes:
|
| 6 |
+
- ollama_data:/root/.ollama
|
| 7 |
+
- ../jsonsft-0.6b.Q3_K_L.gguf:/models/jsonsft-0.6b.Q3_K_L.gguf:ro
|
| 8 |
+
- ../jsonsft-0.6b.Q4_K_M.gguf:/models/jsonsft-0.6b.Q4_K_M.gguf:ro
|
| 9 |
+
- ../jsonsft-0.6b.Q6_K.gguf:/models/jsonsft-0.6b.Q6_K.gguf:ro
|
| 10 |
+
- ../jsonsft-0.6b.Q8_0.gguf:/models/jsonsft-0.6b.Q8_0.gguf:ro
|
| 11 |
+
- ./init-ollama.sh:/init-ollama.sh:ro
|
| 12 |
+
entrypoint: /init-ollama.sh
|
| 13 |
+
deploy:
|
| 14 |
+
resources:
|
| 15 |
+
reservations:
|
| 16 |
+
devices:
|
| 17 |
+
- driver: nvidia
|
| 18 |
+
capabilities: [gpu]
|
| 19 |
+
count: all
|
| 20 |
+
restart: unless-stopped
|
| 21 |
+
healthcheck:
|
| 22 |
+
test: ["CMD", "ollama", "list"]
|
| 23 |
+
interval: 10s
|
| 24 |
+
timeout: 5s
|
| 25 |
+
retries: 5
|
| 26 |
+
|
| 27 |
+
ollama-test:
|
| 28 |
+
build:
|
| 29 |
+
context: .
|
| 30 |
+
dockerfile: ollama.test.Dockerfile
|
| 31 |
+
container_name: ollama-test
|
| 32 |
+
depends_on:
|
| 33 |
+
ollama:
|
| 34 |
+
condition: service_healthy
|
| 35 |
+
volumes:
|
| 36 |
+
- ../:/app/models:ro
|
| 37 |
+
- ./test_results/ollama:/app/test_results
|
| 38 |
+
working_dir: /app/test_results
|
| 39 |
+
environment:
|
| 40 |
+
- OLLAMA_HOST=http://ollama:11434
|
| 41 |
+
command: python /app/test_mcp_ollama.py -v
|
| 42 |
+
restart: "no"
|
| 43 |
+
|
| 44 |
+
volumes:
|
| 45 |
+
ollama_data:
|
test/init-ollama.sh
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e # Exit immediately if a command exits with a non-zero status.
|
| 3 |
+
|
| 4 |
+
MODELS_DIR="/models"
|
| 5 |
+
echo "Ollama initialization script started."
|
| 6 |
+
|
| 7 |
+
# listing models in models directory
|
| 8 |
+
echo "Listing models in $MODELS_DIR:"
|
| 9 |
+
ls -1 "$MODELS_DIR"/*.gguf
|
| 10 |
+
|
| 11 |
+
# Start ollama serve in the background so 'ollama create' can connect
|
| 12 |
+
ollama serve &
|
| 13 |
+
OLLAMA_PID=$!
|
| 14 |
+
echo "Ollama server started in background with PID $OLLAMA_PID."
|
| 15 |
+
|
| 16 |
+
# Wait for Ollama server to be ready
|
| 17 |
+
echo "Waiting for Ollama server to be up..."
|
| 18 |
+
max_attempts=20 # Increased attempts for slower systems
|
| 19 |
+
attempt=0
|
| 20 |
+
while ! ollama list > /dev/null 2>&1; do
|
| 21 |
+
attempt=$((attempt + 1))
|
| 22 |
+
if [ "$attempt" -ge "$max_attempts" ]; then
|
| 23 |
+
echo "Ollama server did not start in time. Exiting."
|
| 24 |
+
# Try to kill the background server if it's still running
|
| 25 |
+
if kill -0 $OLLAMA_PID 2>/dev/null; then
|
| 26 |
+
kill $OLLAMA_PID
|
| 27 |
+
wait $OLLAMA_PID 2>/dev/null
|
| 28 |
+
fi
|
| 29 |
+
exit 1
|
| 30 |
+
fi
|
| 31 |
+
echo "Waiting... (attempt ${attempt}/${max_attempts})"
|
| 32 |
+
sleep 5 # Increased sleep time
|
| 33 |
+
done
|
| 34 |
+
echo "Ollama server is up and running."
|
| 35 |
+
|
| 36 |
+
echo "Looking for GGUF models in $MODELS_DIR..."
|
| 37 |
+
if [ -d "$MODELS_DIR" ]; then
|
| 38 |
+
find "$MODELS_DIR" -type f -name "*.gguf" -print0 | while IFS= read -r -d $'\0' model_path; do
|
| 39 |
+
filename=$(basename "$model_path")
|
| 40 |
+
# Derive model name: ultra-simple, just letters and numbers
|
| 41 |
+
# Try the absolute simplest naming possible
|
| 42 |
+
base_name="jsonsft"
|
| 43 |
+
quantization=$(echo "$filename" | sed 's/.*\.\([QK][0-9_KLM]*\)\.gguf$/\1/' | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]//g')
|
| 44 |
+
model_name="${base_name}${quantization}"
|
| 45 |
+
|
| 46 |
+
echo "Found GGUF file: $model_path"
|
| 47 |
+
echo "Derived model name: $model_name"
|
| 48 |
+
|
| 49 |
+
# Check if model already exists
|
| 50 |
+
if ollama list | grep -q "^${model_name}[[:space:]]"; then
|
| 51 |
+
echo "Model ${model_name} already exists in Ollama. Skipping creation."
|
| 52 |
+
else
|
| 53 |
+
echo "Creating model ${model_name} from $model_path..."
|
| 54 |
+
modelfile_content="FROM $model_path"
|
| 55 |
+
|
| 56 |
+
# create modelfile in writable temp directory instead of read-only models directory
|
| 57 |
+
modelfile_path="/tmp/${model_name}.modelfile"
|
| 58 |
+
echo "$modelfile_content" > "$modelfile_path"
|
| 59 |
+
|
| 60 |
+
if ollama create "$model_name" -f "$modelfile_path"; then
|
| 61 |
+
echo "Successfully created model ${model_name}."
|
| 62 |
+
else
|
| 63 |
+
echo "Failed to create model ${model_name}. Continuing with next model..."
|
| 64 |
+
fi
|
| 65 |
+
fi
|
| 66 |
+
done
|
| 67 |
+
else
|
| 68 |
+
echo "Warning: Models directory $MODELS_DIR not found. No models will be loaded."
|
| 69 |
+
fi
|
| 70 |
+
|
| 71 |
+
echo "Model initialization complete."
|
| 72 |
+
echo "Stopping background Ollama server (PID $OLLAMA_PID) and starting it in foreground..."
|
| 73 |
+
|
| 74 |
+
# Stop the background ollama serve
|
| 75 |
+
if kill -0 $OLLAMA_PID 2>/dev/null; then
|
| 76 |
+
kill $OLLAMA_PID
|
| 77 |
+
wait $OLLAMA_PID 2>/dev/null # Wait for it to actually terminate, ignore error if already stopped
|
| 78 |
+
else
|
| 79 |
+
echo "Background Ollama server was not running or already stopped."
|
| 80 |
+
fi
|
| 81 |
+
|
| 82 |
+
echo "Starting Ollama server in foreground to keep container running."
|
| 83 |
+
exec ollama serve # Replace this script process with ollama serve
|
test/ollama.test.Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY ./test_mcp_ollama.py /app/
|
| 6 |
+
COPY ./structured_output_dataset /app/data/test/structured_output_dataset
|
| 7 |
+
COPY ./requirements.txt /app/
|
| 8 |
+
|
| 9 |
+
RUN pip install -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# Models are mounted from host to /app/models
|
| 12 |
+
# Test results are written to /app/data, which is mounted from host
|
| 13 |
+
# The script is run with /app/data as its working directory
|
| 14 |
+
# OLLAMA_HOST will be set in docker-compose to point to the ollama service
|
test/requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ollama>=0.4.8
|
| 2 |
+
datasets>=3.6.0
|
| 3 |
+
pyarrow>=20.0.0
|
test/structured_output_dataset/dataset_dict.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c172eebfc28c1400d6be4338ce7d00191507ffb4ae64c315f039585c894df5b7
|
| 3 |
+
size 21
|
test/structured_output_dataset/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bcb36bf09086f37e3b6d2c883d899cde0aa2fe57acc069c2e3cbbc69c013f33
|
| 3 |
+
size 26475688
|
test/structured_output_dataset/train/dataset_info.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecb26114cf6a9305f593ea81f8e68a2f56577d6d710884e45e478a7320fffe69
|
| 3 |
+
size 309
|
test/structured_output_dataset/train/state.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f6dd66bcca38325282206607387522ea7dc685f341914b81ffe6de59dd94490
|
| 3 |
+
size 247
|
test/test_mcp_ollama.py
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import json
|
| 5 |
+
import time
|
| 6 |
+
import argparse
|
| 7 |
+
from typing import List, Dict, Any
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
import ollama
|
| 11 |
+
except ImportError:
|
| 12 |
+
print("Error: ollama package not found. Please install with: pip install ollama")
|
| 13 |
+
sys.exit(1)
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
from datasets import load_dataset, DatasetDict
|
| 17 |
+
except ImportError:
|
| 18 |
+
print("Error: datasets package not found. Please install with: pip install datasets")
|
| 19 |
+
sys.exit(1)
|
| 20 |
+
|
| 21 |
+
# Global variable for verbose flag
|
| 22 |
+
verbose_global = False
|
| 23 |
+
|
| 24 |
+
def load_structured_output_dataset(dataset_path: str = "/app/data/test/structured_output_dataset") -> List[Dict[str, Any]]:
|
| 25 |
+
"""Load the structured output dataset and return test cases."""
|
| 26 |
+
try:
|
| 27 |
+
# Load the dataset instead of reading parquet
|
| 28 |
+
dataset = DatasetDict.load_from_disk(dataset_path)
|
| 29 |
+
combined = dataset["train"]
|
| 30 |
+
|
| 31 |
+
test_cases = []
|
| 32 |
+
for i, example in enumerate(combined):
|
| 33 |
+
# Extract messages from the dataset
|
| 34 |
+
messages = example.get("messages", [])
|
| 35 |
+
if messages:
|
| 36 |
+
test_cases.append({
|
| 37 |
+
"id": i,
|
| 38 |
+
"messages": messages
|
| 39 |
+
})
|
| 40 |
+
|
| 41 |
+
if verbose_global:
|
| 42 |
+
print(f"Loaded {len(test_cases)} test cases from dataset")
|
| 43 |
+
|
| 44 |
+
return test_cases
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"Error loading dataset: {str(e)}")
|
| 47 |
+
return []
|
| 48 |
+
|
| 49 |
+
def list_ollama_models() -> List[str]:
|
| 50 |
+
"""List available models in Ollama using the Python library."""
|
| 51 |
+
try:
|
| 52 |
+
models_data = ollama.list()
|
| 53 |
+
|
| 54 |
+
if verbose_global:
|
| 55 |
+
print(f"Response type: {type(models_data)}")
|
| 56 |
+
|
| 57 |
+
model_names = []
|
| 58 |
+
|
| 59 |
+
# Handle ollama._types.ListResponse object
|
| 60 |
+
if hasattr(models_data, 'models'):
|
| 61 |
+
models = models_data.models
|
| 62 |
+
# Handle dict response
|
| 63 |
+
elif isinstance(models_data, dict) and "models" in models_data:
|
| 64 |
+
models = models_data["models"]
|
| 65 |
+
elif isinstance(models_data, dict) and "model" in models_data:
|
| 66 |
+
models = [models_data]
|
| 67 |
+
# Handle list response
|
| 68 |
+
elif isinstance(models_data, list):
|
| 69 |
+
models = models_data
|
| 70 |
+
else:
|
| 71 |
+
if verbose_global:
|
| 72 |
+
print(f"Unexpected response type: {type(models_data)}")
|
| 73 |
+
return []
|
| 74 |
+
|
| 75 |
+
for model in models:
|
| 76 |
+
# Handle Model objects with .model attribute
|
| 77 |
+
if hasattr(model, 'model'):
|
| 78 |
+
model_names.append(model.model)
|
| 79 |
+
# Handle dict with various possible keys
|
| 80 |
+
elif isinstance(model, dict):
|
| 81 |
+
name = model.get("name") or model.get("model") or model.get("id")
|
| 82 |
+
if name:
|
| 83 |
+
model_names.append(name)
|
| 84 |
+
# Handle string
|
| 85 |
+
elif isinstance(model, str):
|
| 86 |
+
model_names.append(model)
|
| 87 |
+
|
| 88 |
+
if verbose_global:
|
| 89 |
+
print(f"Extracted model names: {model_names}")
|
| 90 |
+
|
| 91 |
+
return model_names
|
| 92 |
+
except Exception as e:
|
| 93 |
+
if verbose_global:
|
| 94 |
+
print(f"Error listing models: {str(e)}")
|
| 95 |
+
return []
|
| 96 |
+
|
| 97 |
+
def wait_for_models_ready(expected_min_models: int = 3, max_wait_time: int = 300, check_interval: int = 10) -> List[str]:
|
| 98 |
+
"""Wait for models to be loaded in Ollama before proceeding with tests."""
|
| 99 |
+
print(f"Waiting for at least {expected_min_models} model(s) to be loaded in Ollama...")
|
| 100 |
+
start_time = time.time()
|
| 101 |
+
|
| 102 |
+
while time.time() - start_time < max_wait_time:
|
| 103 |
+
try:
|
| 104 |
+
models = list_ollama_models()
|
| 105 |
+
|
| 106 |
+
if verbose_global:
|
| 107 |
+
print(f"Found {len(models)} model(s): {models}")
|
| 108 |
+
|
| 109 |
+
if len(models) >= expected_min_models:
|
| 110 |
+
print(f"✅ Found {len(models)} model(s), proceeding with tests.")
|
| 111 |
+
return models
|
| 112 |
+
else:
|
| 113 |
+
elapsed = int(time.time() - start_time)
|
| 114 |
+
print(f"⏳ Found {len(models)} model(s), waiting for more... ({elapsed}s elapsed)")
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
elapsed = int(time.time() - start_time)
|
| 118 |
+
if verbose_global:
|
| 119 |
+
print(f"⏳ Ollama server not ready... ({elapsed}s elapsed) - {str(e)}")
|
| 120 |
+
else:
|
| 121 |
+
print(f"⏳ Waiting for Ollama server... ({elapsed}s elapsed)")
|
| 122 |
+
|
| 123 |
+
time.sleep(check_interval)
|
| 124 |
+
|
| 125 |
+
# Final attempt after timeout
|
| 126 |
+
final_models = list_ollama_models()
|
| 127 |
+
if final_models:
|
| 128 |
+
print(f"⚠️ Timeout reached, but found {len(final_models)} model(s). Proceeding anyway.")
|
| 129 |
+
return final_models
|
| 130 |
+
else:
|
| 131 |
+
print("❌ Timeout reached and no models found.")
|
| 132 |
+
return []
|
| 133 |
+
|
| 134 |
+
def test_ollama_model(model_name: str, test_case: Dict[str, Any], verbose: bool = False) -> Dict[str, Any]:
|
| 135 |
+
"""Test a model with a test case from the dataset and return the results."""
|
| 136 |
+
result = {
|
| 137 |
+
"model": model_name,
|
| 138 |
+
"test_case_id": test_case["id"],
|
| 139 |
+
"success": False,
|
| 140 |
+
"error": None,
|
| 141 |
+
"duration": 0,
|
| 142 |
+
"response": None,
|
| 143 |
+
"request": None
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
start_time = time.time()
|
| 147 |
+
|
| 148 |
+
try:
|
| 149 |
+
if verbose:
|
| 150 |
+
print(f"Testing model: {model_name} with test case {test_case['id']}")
|
| 151 |
+
|
| 152 |
+
# Use the messages from the dataset test case
|
| 153 |
+
messages = test_case["messages"]
|
| 154 |
+
|
| 155 |
+
# Extract JSON schema from system message for structured output
|
| 156 |
+
json_schema = None
|
| 157 |
+
for message in messages:
|
| 158 |
+
if message.get("role") == "system":
|
| 159 |
+
content = message.get("content", "")
|
| 160 |
+
# Look for JSON schema in the system message
|
| 161 |
+
if "schema" in content.lower() and "{" in content:
|
| 162 |
+
# Extract the JSON schema part from the system message
|
| 163 |
+
try:
|
| 164 |
+
# Find the JSON part in the system message
|
| 165 |
+
start_idx = content.find("{")
|
| 166 |
+
if start_idx != -1:
|
| 167 |
+
# Find the matching closing brace
|
| 168 |
+
brace_count = 0
|
| 169 |
+
end_idx = start_idx
|
| 170 |
+
for i, char in enumerate(content[start_idx:], start_idx):
|
| 171 |
+
if char == "{":
|
| 172 |
+
brace_count += 1
|
| 173 |
+
elif char == "}":
|
| 174 |
+
brace_count -= 1
|
| 175 |
+
if brace_count == 0:
|
| 176 |
+
end_idx = i + 1
|
| 177 |
+
break
|
| 178 |
+
|
| 179 |
+
schema_text = content[start_idx:end_idx]
|
| 180 |
+
json_schema = json.loads(schema_text)
|
| 181 |
+
if verbose:
|
| 182 |
+
print(f"Extracted JSON schema for structured output")
|
| 183 |
+
except (json.JSONDecodeError, ValueError) as e:
|
| 184 |
+
if verbose:
|
| 185 |
+
print(f"Could not parse JSON schema from system message: {e}")
|
| 186 |
+
break
|
| 187 |
+
|
| 188 |
+
# Prepare the chat request with format parameter for structured output
|
| 189 |
+
chat_params = {
|
| 190 |
+
"model": model_name,
|
| 191 |
+
"messages": messages,
|
| 192 |
+
"options": {
|
| 193 |
+
"temperature": 0.7,
|
| 194 |
+
"top_p": 0.95,
|
| 195 |
+
"num_predict": 512,
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
# Add format parameter to force JSON output
|
| 200 |
+
if json_schema:
|
| 201 |
+
# Use the extracted JSON schema for structured output
|
| 202 |
+
chat_params["format"] = json_schema
|
| 203 |
+
if verbose:
|
| 204 |
+
print(f"Using structured output with JSON schema")
|
| 205 |
+
else:
|
| 206 |
+
# Fallback to basic JSON mode if no schema found
|
| 207 |
+
chat_params["format"] = "json"
|
| 208 |
+
if verbose:
|
| 209 |
+
print(f"Using basic JSON format mode")
|
| 210 |
+
|
| 211 |
+
# Use the ollama library to send a chat request
|
| 212 |
+
response = ollama.chat(**chat_params)
|
| 213 |
+
|
| 214 |
+
# Extract the response content
|
| 215 |
+
generated_text = response.get("message", {}).get("content", "")
|
| 216 |
+
|
| 217 |
+
# Check if we got a valid response
|
| 218 |
+
has_valid_response = len(generated_text.strip()) > 0
|
| 219 |
+
|
| 220 |
+
result["success"] = has_valid_response
|
| 221 |
+
result["request"] = messages
|
| 222 |
+
result["response"] = generated_text[:500] + "..." if len(generated_text) > 500 else generated_text
|
| 223 |
+
|
| 224 |
+
except Exception as e:
|
| 225 |
+
result["error"] = str(e)
|
| 226 |
+
|
| 227 |
+
result["duration"] = time.time() - start_time
|
| 228 |
+
return result
|
| 229 |
+
|
| 230 |
+
def main():
|
| 231 |
+
global verbose_global
|
| 232 |
+
|
| 233 |
+
parser = argparse.ArgumentParser(description="Test structured output functionality on models pre-loaded in Ollama")
|
| 234 |
+
parser.add_argument("--model", type=str, help="Test a specific model name (must exist in Ollama)")
|
| 235 |
+
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output")
|
| 236 |
+
parser.add_argument("--max-test-cases", type=int, default=5, help="Maximum number of test cases to run per model")
|
| 237 |
+
args = parser.parse_args()
|
| 238 |
+
verbose_global = args.verbose
|
| 239 |
+
|
| 240 |
+
if args.verbose:
|
| 241 |
+
print("Verbose mode enabled.")
|
| 242 |
+
print(f"OLLAMA_HOST is set to: {os.environ.get('OLLAMA_HOST')}")
|
| 243 |
+
|
| 244 |
+
# Load test cases from the structured output dataset
|
| 245 |
+
test_cases = load_structured_output_dataset()
|
| 246 |
+
if not test_cases:
|
| 247 |
+
print("No test cases found in the dataset.")
|
| 248 |
+
sys.exit(1)
|
| 249 |
+
|
| 250 |
+
# Limit the number of test cases if specified
|
| 251 |
+
if args.max_test_cases and len(test_cases) > args.max_test_cases:
|
| 252 |
+
test_cases = test_cases[:args.max_test_cases]
|
| 253 |
+
print(f"Limited to {args.max_test_cases} test cases")
|
| 254 |
+
|
| 255 |
+
print(f"Loaded {len(test_cases)} test cases from dataset")
|
| 256 |
+
|
| 257 |
+
# Wait for models to be properly loaded (expect most of the 4 GGUF files to be converted)
|
| 258 |
+
ollama_models_available = wait_for_models_ready(expected_min_models=4, max_wait_time=300, check_interval=10)
|
| 259 |
+
if not ollama_models_available:
|
| 260 |
+
print("No models found in Ollama after waiting. The init script may have failed.")
|
| 261 |
+
sys.exit(1)
|
| 262 |
+
|
| 263 |
+
if args.verbose:
|
| 264 |
+
print("Available models in Ollama:")
|
| 265 |
+
for m in ollama_models_available:
|
| 266 |
+
print(f" - {m}")
|
| 267 |
+
|
| 268 |
+
models_to_test_names = []
|
| 269 |
+
|
| 270 |
+
if args.model:
|
| 271 |
+
# Test a specific model if provided
|
| 272 |
+
if args.model in ollama_models_available:
|
| 273 |
+
models_to_test_names.append(args.model)
|
| 274 |
+
else:
|
| 275 |
+
# Attempt to match if user provided a name without a tag (e.g., "llama2" instead of "llama2:latest")
|
| 276 |
+
# Ollama list returns names like "modelname:tag"
|
| 277 |
+
found_match = False
|
| 278 |
+
for om_name in ollama_models_available:
|
| 279 |
+
if om_name.startswith(args.model + ":"):
|
| 280 |
+
models_to_test_names.append(om_name)
|
| 281 |
+
if args.verbose:
|
| 282 |
+
print(f"Found match for '{args.model}': testing '{om_name}'")
|
| 283 |
+
found_match = True
|
| 284 |
+
break
|
| 285 |
+
if not found_match:
|
| 286 |
+
print(f"Error: Specified model '{args.model}' not found in Ollama's list: {ollama_models_available}")
|
| 287 |
+
sys.exit(1)
|
| 288 |
+
else:
|
| 289 |
+
# Test all available models in Ollama
|
| 290 |
+
models_to_test_names = ollama_models_available
|
| 291 |
+
|
| 292 |
+
if not models_to_test_names:
|
| 293 |
+
print("No models selected to test.")
|
| 294 |
+
sys.exit(1)
|
| 295 |
+
|
| 296 |
+
print(f"\nWill test {len(models_to_test_names)} model(s) from Ollama:")
|
| 297 |
+
for model_name in models_to_test_names:
|
| 298 |
+
print(f" - {model_name}")
|
| 299 |
+
print()
|
| 300 |
+
|
| 301 |
+
results = []
|
| 302 |
+
|
| 303 |
+
for model_name in models_to_test_names:
|
| 304 |
+
if args.verbose:
|
| 305 |
+
print(f"--- Starting tests for model: {model_name} ---")
|
| 306 |
+
|
| 307 |
+
for test_case in test_cases:
|
| 308 |
+
result = test_ollama_model(model_name, test_case, args.verbose)
|
| 309 |
+
result["path"] = model_name # Use model_name as path for consistency in reporting
|
| 310 |
+
results.append(result)
|
| 311 |
+
|
| 312 |
+
status = "✅ SUCCESS" if result["success"] else "❌ FAILED"
|
| 313 |
+
error_msg = f" - Error: {result['error']}" if result["error"] else ""
|
| 314 |
+
print(f"{status} - {model_name} (test case {test_case['id']}) (took {result['duration']:.2f}s){error_msg}")
|
| 315 |
+
if args.verbose and result.get("response"):
|
| 316 |
+
print(f" Response: {result['response']}")
|
| 317 |
+
if args.verbose:
|
| 318 |
+
print()
|
| 319 |
+
|
| 320 |
+
# Summary
|
| 321 |
+
print("\n=== SUMMARY ===")
|
| 322 |
+
successes = [r for r in results if r["success"]]
|
| 323 |
+
failures = [r for r in results if not r["success"]]
|
| 324 |
+
|
| 325 |
+
print(f"Total tests run: {len(results)}")
|
| 326 |
+
print(f"Successful: {len(successes)}")
|
| 327 |
+
print(f"Failed: {len(failures)}")
|
| 328 |
+
|
| 329 |
+
# Group results by model for summary
|
| 330 |
+
model_results = {}
|
| 331 |
+
for result in results:
|
| 332 |
+
model = result["model"]
|
| 333 |
+
if model not in model_results:
|
| 334 |
+
model_results[model] = {"success": 0, "failed": 0}
|
| 335 |
+
if result["success"]:
|
| 336 |
+
model_results[model]["success"] += 1
|
| 337 |
+
else:
|
| 338 |
+
model_results[model]["failed"] += 1
|
| 339 |
+
|
| 340 |
+
print(f"\nResults by model:")
|
| 341 |
+
for model, stats in model_results.items():
|
| 342 |
+
total = stats["success"] + stats["failed"]
|
| 343 |
+
success_rate = (stats["success"] / total * 100) if total > 0 else 0
|
| 344 |
+
print(f" {model}: {stats['success']}/{total} successful ({success_rate:.1f}%)")
|
| 345 |
+
|
| 346 |
+
if failures:
|
| 347 |
+
print("\nFailed tests:")
|
| 348 |
+
for result in failures:
|
| 349 |
+
error = f": {result['error']}" if result["error"] else ""
|
| 350 |
+
print(f" - {result['model']} (test case {result['test_case_id']}){error}")
|
| 351 |
+
|
| 352 |
+
# Save results to file
|
| 353 |
+
output_file = "structured_output_ollama_results.json"
|
| 354 |
+
with open(output_file, "w") as f:
|
| 355 |
+
json.dump({
|
| 356 |
+
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 357 |
+
"total_test_cases": len(test_cases),
|
| 358 |
+
"models_tested": len(models_to_test_names),
|
| 359 |
+
"results": results
|
| 360 |
+
}, f, indent=2)
|
| 361 |
+
|
| 362 |
+
print(f"\nDetailed results saved to {output_file}")
|
| 363 |
+
|
| 364 |
+
if __name__ == "__main__":
|
| 365 |
+
main()
|
test_llama_setup.sh
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "Testing llama.cpp setup..."
|
| 4 |
+
echo "================================"
|
| 5 |
+
|
| 6 |
+
# Test 1: Check if quantize tool exists and is executable
|
| 7 |
+
echo "1. Testing quantization tool..."
|
| 8 |
+
if [ -x "llama.cpp/build/bin/llama-quantize" ]; then
|
| 9 |
+
echo "✓ llama-quantize tool found and executable"
|
| 10 |
+
echo " Available quantization formats:"
|
| 11 |
+
./llama.cpp/build/bin/llama-quantize --help | grep -A 20 "Allowed quantization types:" | head -10
|
| 12 |
+
else
|
| 13 |
+
echo "✗ llama-quantize tool not found or not executable"
|
| 14 |
+
exit 1
|
| 15 |
+
fi
|
| 16 |
+
|
| 17 |
+
echo ""
|
| 18 |
+
|
| 19 |
+
# Test 2: Check if conversion script exists
|
| 20 |
+
echo "2. Testing conversion script..."
|
| 21 |
+
if [ -f "llama.cpp/convert_hf_to_gguf.py" ]; then
|
| 22 |
+
echo "✓ convert_hf_to_gguf.py found"
|
| 23 |
+
else
|
| 24 |
+
echo "✗ convert_hf_to_gguf.py not found"
|
| 25 |
+
exit 1
|
| 26 |
+
fi
|
| 27 |
+
|
| 28 |
+
echo ""
|
| 29 |
+
|
| 30 |
+
# Test 3: Check if virtual environment and dependencies work
|
| 31 |
+
echo "3. Testing Python dependencies..."
|
| 32 |
+
if source venv/bin/activate && python3 -c "import torch; import transformers; import gguf" 2>/dev/null; then
|
| 33 |
+
echo "✓ Python dependencies (torch, transformers, gguf) are available"
|
| 34 |
+
else
|
| 35 |
+
echo "✗ Python dependencies not available"
|
| 36 |
+
exit 1
|
| 37 |
+
fi
|
| 38 |
+
|
| 39 |
+
echo ""
|
| 40 |
+
|
| 41 |
+
# Test 4: Check if quantization script is ready
|
| 42 |
+
echo "4. Testing quantization script..."
|
| 43 |
+
if [ -x "generate_quantization.sh" ]; then
|
| 44 |
+
echo "✓ generate_quantization.sh is executable and ready"
|
| 45 |
+
else
|
| 46 |
+
echo "✗ generate_quantization.sh not found or not executable"
|
| 47 |
+
exit 1
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
echo ""
|
| 51 |
+
echo "================================"
|
| 52 |
+
echo "✓ llama.cpp is fully initialized and ready for model quantization!"
|
| 53 |
+
echo ""
|
| 54 |
+
echo "To use it:"
|
| 55 |
+
echo "1. Place your HuggingFace model in a directory (e.g., 'osmosis-mcp-4b')"
|
| 56 |
+
echo "2. Run: ./generate_quantization.sh"
|
| 57 |
+
echo "3. The script will convert the model to GGUF format and create quantized versions"
|
| 58 |
+
echo ""
|
| 59 |
+
echo "Available quantization formats in the script:"
|
| 60 |
+
echo "- Q8_0 (8-bit quantization)"
|
| 61 |
+
echo "- Q6_K (6-bit quantization)"
|
| 62 |
+
echo "- Q3_K_L (3-bit quantization, large)"
|
| 63 |
+
echo "- Q4_K_M (4-bit quantization, medium)"
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:443bfa629eb16387a12edbf92a76f6a6f10b2af3b53d87ba1550adfcf45f7fa0
|
| 3 |
+
size 5404
|
trainer_state.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2932e0ba061682edcde45abf39c9fe6ea877d9bc6f364ed0be91a417f2d2db4
|
| 3 |
+
size 16490
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6c2f8248a9c3cff0f046f5844295b8c64619a4cf49db88ea9335cef10e84ac0
|
| 3 |
+
size 6033
|
vocab.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
| 3 |
+
size 2776833
|