|
from utils.gemini_eval import * |
|
from mathruler.grader import extract_boxed_content |
|
import json |
|
from typing import List, Dict, Union, Any |
|
from pathlib import Path |
|
from tqdm import tqdm |
|
import logging |
|
logging.getLogger().setLevel(logging.ERROR) |
|
import json |
|
from pathlib import Path |
|
from tqdm import tqdm |
|
import concurrent.futures |
|
from datasets import load_dataset |
|
|
|
|
|
def dump_json( |
|
data: List[Dict[str, Any]], |
|
path: Union[str, Path], |
|
*, |
|
indent: int | None = 2, |
|
ensure_ascii: bool = False |
|
) -> None: |
|
""" |
|
Save `data` (a list of dictionaries) to `path` in JSON format. |
|
|
|
Parameters |
|
---------- |
|
data : list[dict] |
|
The objects you want to serialize. |
|
path : str | pathlib.Path |
|
Where to write the JSON file. Parent directories are created if needed. |
|
indent : int | None, default=2 |
|
How many spaces to use for pretty-printing. Set to `None` for a single-line file. |
|
ensure_ascii : bool, default=False |
|
If False, non-ASCII characters are written as UTF-8; if True, they're escaped. |
|
|
|
Raises |
|
------ |
|
TypeError |
|
If `data` contains objects that `json` cannot serialize. |
|
OSError |
|
If the file cannot be created or written. |
|
""" |
|
path = Path(path) |
|
path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
with path.open("w", encoding="utf-8") as f: |
|
json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii) |
|
f.write("\n") |
|
|
|
|
|
|
|
|
|
ONLY_FILE = './gemini-pro/visnumbench.json' |
|
output_file = './eval_out/gemini-pro/visnumbench.json' |
|
|
|
|
|
ONLY_FILE = Path(ONLY_FILE) |
|
with ONLY_FILE.open("r", encoding="utf-8") as f: |
|
data = json.load(f) |
|
|
|
outputs = [] |
|
|
|
for ele in tqdm(data): |
|
problem = ele['problem'].replace('<image>', '') |
|
reference = ele['solution'] |
|
candidate = extract_boxed_content(ele['predictions'][0]) |
|
|
|
judgment = generate(problem, reference, candidate) |
|
|
|
ele['judgment'] = judgment |
|
outputs.append(ele) |
|
|
|
|
|
|
|
|
|
dump_json(outputs, output_file) |
|
|