LogSAD / scripts /compute_averages.py
zhiqing0205
Add scripts, results, and visualization archive
83a82e0
#!/usr/bin/env python3
"""
Compute Average Metrics Script for LogSAD Results
This script automatically detects results MD files in the results/ directory,
calculates average metrics across all categories, and appends them to the files
if not already present.
"""
import os
import re
import glob
from pathlib import Path
def has_average_metrics(file_path):
"""Check if the file already contains average metrics row."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return "| **Average**" in content or "| Average" in content
except Exception as e:
print(f"Error reading {file_path}: {e}")
return False
def parse_results_table(file_path):
"""Parse the results table from MD file and extract metrics."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
categories_data = []
# Find table data lines
for line in lines:
if '|' in line and line.count('|') >= 8:
parts = [p.strip() for p in line.split('|')]
# Skip header, separator, average row, and empty lines
if (len(parts) >= 8 and
parts[1] not in ['Category', '----------', '-----', '', '**Average**', 'Average'] and
parts[1] != '----------' and not parts[1].startswith('**Average')):
try:
category_name = parts[1]
f1_image = float(parts[3])
auroc_image = float(parts[4])
f1_logical = float(parts[5])
auroc_logical = float(parts[6])
f1_structural = float(parts[7])
auroc_structural = float(parts[8])
categories_data.append({
'category': category_name,
'f1_image': f1_image,
'auroc_image': auroc_image,
'f1_logical': f1_logical,
'auroc_logical': auroc_logical,
'f1_structural': f1_structural,
'auroc_structural': auroc_structural
})
except (ValueError, IndexError):
continue
return categories_data
except Exception as e:
print(f"Error parsing {file_path}: {e}")
return []
def calculate_averages(categories_data):
"""Calculate average metrics across all categories."""
if not categories_data:
return None
n = len(categories_data)
averages = {
'f1_image': sum(cat['f1_image'] for cat in categories_data) / n,
'auroc_image': sum(cat['auroc_image'] for cat in categories_data) / n,
'f1_logical': sum(cat['f1_logical'] for cat in categories_data) / n,
'auroc_logical': sum(cat['auroc_logical'] for cat in categories_data) / n,
'f1_structural': sum(cat['f1_structural'] for cat in categories_data) / n,
'auroc_structural': sum(cat['auroc_structural'] for cat in categories_data) / n
}
return averages
def append_averages_to_file(file_path, averages):
"""Append average metrics as the last row of the existing table."""
try:
# Read the current file content
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Find the table end and insert average row before it
lines = content.split('\n')
new_lines = []
table_found = False
for i, line in enumerate(lines):
new_lines.append(line)
# Check if this is a table row (has appropriate number of |)
if '|' in line and line.count('|') >= 8:
table_found = True
# Check if this might be the last data row (next line is empty or doesn't have table format)
if (i + 1 >= len(lines) or
lines[i + 1].strip() == '' or
'|' not in lines[i + 1] or
lines[i + 1].count('|') < 8):
# Add average row after this line
average_row = f"| **Average** | 4 | {averages['f1_image']:.2f} | {averages['auroc_image']:.2f} | {averages['f1_logical']:.2f} | {averages['auroc_logical']:.2f} | {averages['f1_structural']:.2f} | {averages['auroc_structural']:.2f} |"
new_lines.append(average_row)
# Write the updated content back to file
with open(file_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(new_lines))
print(f"✓ Added average metrics row to {file_path}")
return True
except Exception as e:
print(f"✗ Error appending to {file_path}: {e}")
return False
def process_results_file(file_path):
"""Process a single results file."""
print(f"Processing: {file_path}")
# Check if averages already exist
if has_average_metrics(file_path):
print(f" → Average metrics already exist, skipping")
return
# Parse the results table
categories_data = parse_results_table(file_path)
if not categories_data:
print(f" → No valid data found, skipping")
return
print(f" → Found {len(categories_data)} categories")
# Calculate averages
averages = calculate_averages(categories_data)
if not averages:
print(f" → Failed to calculate averages, skipping")
return
# Append averages to file
append_averages_to_file(file_path, averages)
def main():
"""Main function to process all results files."""
print("LogSAD Average Metrics Computation")
print("=" * 50)
# Find all results MD files
results_pattern = "results/*_results.md"
results_files = glob.glob(results_pattern)
if not results_files:
print("No results files found matching pattern:", results_pattern)
return
print(f"Found {len(results_files)} results file(s):")
for file_path in results_files:
print(f" - {file_path}")
print("\nProcessing files...")
print("-" * 30)
# Process each file
for file_path in results_files:
process_results_file(file_path)
print()
print("Average metrics computation completed!")
if __name__ == "__main__":
main()