Upload benchmark_config.yaml with huggingface_hub
Browse files- benchmark_config.yaml +32 -0
benchmark_config.yaml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Benchmark Configuration for Summarizer-Standard Model
|
| 2 |
+
# Standard summarization model for text summarization tasks
|
| 3 |
+
|
| 4 |
+
model:
|
| 5 |
+
name: "summarizer-standard"
|
| 6 |
+
base_url: "http://127.0.0.1:8000"
|
| 7 |
+
temperature: 0.3 # Lower temperature for consistent summaries
|
| 8 |
+
max_tokens: 256 # Reasonable summary length
|
| 9 |
+
timeout: 45 # Allow time for summarization
|
| 10 |
+
|
| 11 |
+
datasets:
|
| 12 |
+
- name: "cnn_dailymail"
|
| 13 |
+
file: "datasets/cnn_dailymail_sample.jsonl"
|
| 14 |
+
sample_size: 100 # Reasonable sample size for benchmarking
|
| 15 |
+
instruction: "Summarize the following article in 2-3 sentences."
|
| 16 |
+
input_field: "article"
|
| 17 |
+
expected_field: "highlights"
|
| 18 |
+
|
| 19 |
+
evaluation:
|
| 20 |
+
rouge_threshold: 0.3 # Lenient ROUGE score threshold
|
| 21 |
+
semantic_preservation_min: 0.4 # Lenient semantic similarity
|
| 22 |
+
length_ratio_min: 0.1 # Minimum compression ratio
|
| 23 |
+
length_ratio_max: 0.8 # Maximum compression ratio
|
| 24 |
+
|
| 25 |
+
output:
|
| 26 |
+
results_dir: "results"
|
| 27 |
+
include_raw_responses: false
|
| 28 |
+
model_size_gb: 0.369 # From file size check (369MB)
|
| 29 |
+
|
| 30 |
+
cnn_dailymail:
|
| 31 |
+
source_url: "https://huggingface.co/datasets/cnn_dailymail"
|
| 32 |
+
max_samples: 2000
|