| # Benchmark Configuration for Summarizer-Standard Model | |
| # Standard summarization model for text summarization tasks | |
| model: | |
| name: "summarizer-standard" | |
| base_url: "http://127.0.0.1:8000" | |
| temperature: 0.3 # Lower temperature for consistent summaries | |
| max_tokens: 256 # Reasonable summary length | |
| timeout: 45 # Allow time for summarization | |
| datasets: | |
| - name: "cnn_dailymail" | |
| file: "datasets/cnn_dailymail_sample.jsonl" | |
| sample_size: 100 # Reasonable sample size for benchmarking | |
| instruction: "Summarize the following article in 2-3 sentences." | |
| input_field: "article" | |
| expected_field: "highlights" | |
| evaluation: | |
| rouge_threshold: 0.3 # Lenient ROUGE score threshold | |
| semantic_preservation_min: 0.4 # Lenient semantic similarity | |
| length_ratio_min: 0.1 # Minimum compression ratio | |
| length_ratio_max: 0.8 # Maximum compression ratio | |
| output: | |
| results_dir: "results" | |
| include_raw_responses: false | |
| model_size_gb: 0.369 # From file size check (369MB) | |
| cnn_dailymail: | |
| source_url: "https://huggingface.co/datasets/cnn_dailymail" | |
| max_samples: 2000 | |