my-local-chatbot / preprocess.py
Jasleen05's picture
Upload 12 files
418c329 verified
import json
import pandas as pd
with open("alpaca_data.json", "r", encoding="utf-8") as f:
data = json.load(f)
rows = []
for item in data:
instruction = item["instruction"].strip()
input_text = item.get("input", "").strip()
output = item["output"].strip()
if input_text:
prompt = f"{instruction}\n{input_text}"
else:
prompt = instruction
rows.append({"prompt": prompt, "response": output})
# Save to CSV
df = pd.DataFrame(rows)
df.to_csv("processed_dataset.csv", index=False)
print(f"✅ Saved {len(df)} prompt-response pairs to processed_dataset.csv")