import json with open("./datasets/apigen/xlam_function_calling_60k.json", "r") as f: data = json.load(f) results = [] max_tool_usage = 0 from tqdm import tqdm for sample in tqdm(data, desc="Processing APIGen samples"): used_tools = list(set([x["name"] for x in json.loads(sample["answers"])])) if len(used_tools) == 0: continue tools = [ {"name": x["name"].replace(".", "_"), "description": x["description"]} for x in json.loads(sample["tools"]) ] result = {"instruction": sample["query"], "tools": tools, "used_tools": used_tools} results.append(result) with open("./datasets/apigen/output.json", "w") as f: json.dump(results, f, indent=4)