|
import json
|
|
import pandas as pd
|
|
|
|
with open("alpaca_data.json", "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
rows = []
|
|
for item in data:
|
|
instruction = item["instruction"].strip()
|
|
input_text = item.get("input", "").strip()
|
|
output = item["output"].strip()
|
|
|
|
if input_text:
|
|
prompt = f"{instruction}\n{input_text}"
|
|
else:
|
|
prompt = instruction
|
|
|
|
rows.append({"prompt": prompt, "response": output})
|
|
|
|
|
|
df = pd.DataFrame(rows)
|
|
df.to_csv("processed_dataset.csv", index=False)
|
|
print(f"✅ Saved {len(df)} prompt-response pairs to processed_dataset.csv") |