LogSAD / scripts /compute_averages.py

zhiqing0205

Add scripts, results, and visualization archive

83a82e0 3 days ago

6.57 kB

	#!/usr/bin/env python3
	"""
	Compute Average Metrics Script for LogSAD Results

	This script automatically detects results MD files in the results/ directory,
	calculates average metrics across all categories, and appends them to the files
	if not already present.
	"""

	import os
	import re
	import glob
	from pathlib import Path


	def has_average_metrics(file_path):
	"""Check if the file already contains average metrics row."""
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()
	return "\| Average" in content or "\| Average" in content
	except Exception as e:
	print(f"Error reading {file_path}: {e}")
	return False


	def parse_results_table(file_path):
	"""Parse the results table from MD file and extract metrics."""
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()

	lines = content.split('\n')
	categories_data = []

	# Find table data lines
	for line in lines:
	if '\|' in line and line.count('\|') >= 8:
	parts = [p.strip() for p in line.split('\|')]
	# Skip header, separator, average row, and empty lines
	if (len(parts) >= 8 and
	parts[1] not in ['Category', '----------', '-----', '', 'Average', 'Average'] and
	parts[1] != '----------' and not parts[1].startswith('**Average')):
	try:
	category_name = parts[1]
	f1_image = float(parts[3])
	auroc_image = float(parts[4])
	f1_logical = float(parts[5])
	auroc_logical = float(parts[6])
	f1_structural = float(parts[7])
	auroc_structural = float(parts[8])

	categories_data.append({
	'category': category_name,
	'f1_image': f1_image,
	'auroc_image': auroc_image,
	'f1_logical': f1_logical,
	'auroc_logical': auroc_logical,
	'f1_structural': f1_structural,
	'auroc_structural': auroc_structural
	})
	except (ValueError, IndexError):
	continue

	return categories_data
	except Exception as e:
	print(f"Error parsing {file_path}: {e}")
	return []


	def calculate_averages(categories_data):
	"""Calculate average metrics across all categories."""
	if not categories_data:
	return None

	n = len(categories_data)
	averages = {
	'f1_image': sum(cat['f1_image'] for cat in categories_data) / n,
	'auroc_image': sum(cat['auroc_image'] for cat in categories_data) / n,
	'f1_logical': sum(cat['f1_logical'] for cat in categories_data) / n,
	'auroc_logical': sum(cat['auroc_logical'] for cat in categories_data) / n,
	'f1_structural': sum(cat['f1_structural'] for cat in categories_data) / n,
	'auroc_structural': sum(cat['auroc_structural'] for cat in categories_data) / n
	}

	return averages


	def append_averages_to_file(file_path, averages):
	"""Append average metrics as the last row of the existing table."""
	try:
	# Read the current file content
	with open(file_path, 'r', encoding='utf-8') as f:
	content = f.read()

	# Find the table end and insert average row before it
	lines = content.split('\n')
	new_lines = []
	table_found = False

	for i, line in enumerate(lines):
	new_lines.append(line)
	# Check if this is a table row (has appropriate number of \|)
	if '\|' in line and line.count('\|') >= 8:
	table_found = True
	# Check if this might be the last data row (next line is empty or doesn't have table format)
	if (i + 1 >= len(lines) or
	lines[i + 1].strip() == '' or
	'\|' not in lines[i + 1] or
	lines[i + 1].count('\|') < 8):
	# Add average row after this line
	average_row = f"\| Average \| 4 \| {averages['f1_image']:.2f} \| {averages['auroc_image']:.2f} \| {averages['f1_logical']:.2f} \| {averages['auroc_logical']:.2f} \| {averages['f1_structural']:.2f} \| {averages['auroc_structural']:.2f} \|"
	new_lines.append(average_row)

	# Write the updated content back to file
	with open(file_path, 'w', encoding='utf-8') as f:
	f.write('\n'.join(new_lines))

	print(f"✓ Added average metrics row to {file_path}")
	return True
	except Exception as e:
	print(f"✗ Error appending to {file_path}: {e}")
	return False


	def process_results_file(file_path):
	"""Process a single results file."""
	print(f"Processing: {file_path}")

	# Check if averages already exist
	if has_average_metrics(file_path):
	print(f" → Average metrics already exist, skipping")
	return

	# Parse the results table
	categories_data = parse_results_table(file_path)
	if not categories_data:
	print(f" → No valid data found, skipping")
	return

	print(f" → Found {len(categories_data)} categories")

	# Calculate averages
	averages = calculate_averages(categories_data)
	if not averages:
	print(f" → Failed to calculate averages, skipping")
	return

	# Append averages to file
	append_averages_to_file(file_path, averages)


	def main():
	"""Main function to process all results files."""
	print("LogSAD Average Metrics Computation")
	print("=" * 50)

	# Find all results MD files
	results_pattern = "results/*_results.md"
	results_files = glob.glob(results_pattern)

	if not results_files:
	print("No results files found matching pattern:", results_pattern)
	return

	print(f"Found {len(results_files)} results file(s):")
	for file_path in results_files:
	print(f" - {file_path}")

	print("\nProcessing files...")
	print("-" * 30)

	# Process each file
	for file_path in results_files:
	process_results_file(file_path)
	print()

	print("Average metrics computation completed!")


	if __name__ == "__main__":
	main()