File size: 6,569 Bytes
83a82e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
#!/usr/bin/env python3
"""
Compute Average Metrics Script for LogSAD Results
This script automatically detects results MD files in the results/ directory,
calculates average metrics across all categories, and appends them to the files
if not already present.
"""
import os
import re
import glob
from pathlib import Path
def has_average_metrics(file_path):
"""Check if the file already contains average metrics row."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return "| **Average**" in content or "| Average" in content
except Exception as e:
print(f"Error reading {file_path}: {e}")
return False
def parse_results_table(file_path):
"""Parse the results table from MD file and extract metrics."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
categories_data = []
# Find table data lines
for line in lines:
if '|' in line and line.count('|') >= 8:
parts = [p.strip() for p in line.split('|')]
# Skip header, separator, average row, and empty lines
if (len(parts) >= 8 and
parts[1] not in ['Category', '----------', '-----', '', '**Average**', 'Average'] and
parts[1] != '----------' and not parts[1].startswith('**Average')):
try:
category_name = parts[1]
f1_image = float(parts[3])
auroc_image = float(parts[4])
f1_logical = float(parts[5])
auroc_logical = float(parts[6])
f1_structural = float(parts[7])
auroc_structural = float(parts[8])
categories_data.append({
'category': category_name,
'f1_image': f1_image,
'auroc_image': auroc_image,
'f1_logical': f1_logical,
'auroc_logical': auroc_logical,
'f1_structural': f1_structural,
'auroc_structural': auroc_structural
})
except (ValueError, IndexError):
continue
return categories_data
except Exception as e:
print(f"Error parsing {file_path}: {e}")
return []
def calculate_averages(categories_data):
"""Calculate average metrics across all categories."""
if not categories_data:
return None
n = len(categories_data)
averages = {
'f1_image': sum(cat['f1_image'] for cat in categories_data) / n,
'auroc_image': sum(cat['auroc_image'] for cat in categories_data) / n,
'f1_logical': sum(cat['f1_logical'] for cat in categories_data) / n,
'auroc_logical': sum(cat['auroc_logical'] for cat in categories_data) / n,
'f1_structural': sum(cat['f1_structural'] for cat in categories_data) / n,
'auroc_structural': sum(cat['auroc_structural'] for cat in categories_data) / n
}
return averages
def append_averages_to_file(file_path, averages):
"""Append average metrics as the last row of the existing table."""
try:
# Read the current file content
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Find the table end and insert average row before it
lines = content.split('\n')
new_lines = []
table_found = False
for i, line in enumerate(lines):
new_lines.append(line)
# Check if this is a table row (has appropriate number of |)
if '|' in line and line.count('|') >= 8:
table_found = True
# Check if this might be the last data row (next line is empty or doesn't have table format)
if (i + 1 >= len(lines) or
lines[i + 1].strip() == '' or
'|' not in lines[i + 1] or
lines[i + 1].count('|') < 8):
# Add average row after this line
average_row = f"| **Average** | 4 | {averages['f1_image']:.2f} | {averages['auroc_image']:.2f} | {averages['f1_logical']:.2f} | {averages['auroc_logical']:.2f} | {averages['f1_structural']:.2f} | {averages['auroc_structural']:.2f} |"
new_lines.append(average_row)
# Write the updated content back to file
with open(file_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(new_lines))
print(f"✓ Added average metrics row to {file_path}")
return True
except Exception as e:
print(f"✗ Error appending to {file_path}: {e}")
return False
def process_results_file(file_path):
"""Process a single results file."""
print(f"Processing: {file_path}")
# Check if averages already exist
if has_average_metrics(file_path):
print(f" → Average metrics already exist, skipping")
return
# Parse the results table
categories_data = parse_results_table(file_path)
if not categories_data:
print(f" → No valid data found, skipping")
return
print(f" → Found {len(categories_data)} categories")
# Calculate averages
averages = calculate_averages(categories_data)
if not averages:
print(f" → Failed to calculate averages, skipping")
return
# Append averages to file
append_averages_to_file(file_path, averages)
def main():
"""Main function to process all results files."""
print("LogSAD Average Metrics Computation")
print("=" * 50)
# Find all results MD files
results_pattern = "results/*_results.md"
results_files = glob.glob(results_pattern)
if not results_files:
print("No results files found matching pattern:", results_pattern)
return
print(f"Found {len(results_files)} results file(s):")
for file_path in results_files:
print(f" - {file_path}")
print("\nProcessing files...")
print("-" * 30)
# Process each file
for file_path in results_files:
process_results_file(file_path)
print()
print("Average metrics computation completed!")
if __name__ == "__main__":
main() |