from utils.gemini_eval import * from mathruler.grader import extract_boxed_content import json from typing import List, Dict, Union, Any from pathlib import Path from tqdm import tqdm import logging logging.getLogger().setLevel(logging.ERROR) import json from pathlib import Path from tqdm import tqdm import concurrent.futures from datasets import load_dataset def dump_json( data: List[Dict[str, Any]], path: Union[str, Path], *, indent: int | None = 2, ensure_ascii: bool = False ) -> None: """ Save `data` (a list of dictionaries) to `path` in JSON format. Parameters ---------- data : list[dict] The objects you want to serialize. path : str | pathlib.Path Where to write the JSON file. Parent directories are created if needed. indent : int | None, default=2 How many spaces to use for pretty-printing. Set to `None` for a single-line file. ensure_ascii : bool, default=False If False, non-ASCII characters are written as UTF-8; if True, they're escaped. Raises ------ TypeError If `data` contains objects that `json` cannot serialize. OSError If the file cannot be created or written. """ path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8") as f: json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii) f.write("\n") # final newline (POSIX style) ONLY_FILE = './gemini-pro/visnumbench.json' output_file = './eval_out/gemini-pro/visnumbench.json' ONLY_FILE = Path(ONLY_FILE) with ONLY_FILE.open("r", encoding="utf-8") as f: data = json.load(f) outputs = [] for ele in tqdm(data): problem = ele['problem'].replace('', '') reference = ele['solution'] candidate = extract_boxed_content(ele['predictions'][0]) judgment = generate(problem, reference, candidate) # print(judgment) ele['judgment'] = judgment outputs.append(ele) dump_json(outputs, output_file)