qnguyen3
/

r1-res-stream

Model card Files Files and versions Community

qnguyen3 commited on Jan 18

Commit

22f338c

verified ·

1 Parent(s): 4f55a61

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

.gitattributes +4 -0
gen_ans.py +61 -0
gen_ans2.py +60 -0
load.py +13 -0
qa_pairs_all-alpha1b-final.json +3 -0
qa_pairs_all-alpha1b.json +3 -0
qa_pairs_all-alpha1b_2.json +3 -0
qa_pairs_all-qwen7b.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+qa_pairs_all-alpha1b-final.json filter=lfs diff=lfs merge=lfs -text
+qa_pairs_all-alpha1b.json filter=lfs diff=lfs merge=lfs -text
+qa_pairs_all-alpha1b_2.json filter=lfs diff=lfs merge=lfs -text
+qa_pairs_all-qwen7b.json filter=lfs diff=lfs merge=lfs -text

gen_ans.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from datasets import load_dataset
+from openai import AsyncOpenAI
+from tqdm import tqdm
+import asyncio
+import json
+import os
+client = AsyncOpenAI(api_key="no-need", base_url="http://localhost:8000/v1")
+async def generate_answer(messages):
+    try:
+        response = await client.chat.completions.create(
+            model="outputs/out-alpha",
+            messages=messages,
+            max_tokens=2048,
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return 'error'
+async def process_batch(questions, batch_num, all_qa_pairs):
+    tasks = []
+    for q in tqdm(questions, desc=f"Batch {batch_num}", leave=False):
+        tasks.append(generate_answer(q))
+    answers = await asyncio.gather(*tasks)
+    # Create list of question-answer pairs and append to existing pairs
+    for q, a in zip(questions, answers):
+        q.append({'role': 'assistant', 'content': a})
+        all_qa_pairs.append({"conversations": q})
+    # Save all results after each batch
+    with open('qa_pairs_all-alpha1b_2.json', 'w') as f:
+        json.dump(all_qa_pairs, f, indent=2)
+    return answers
+async def main():
+    dataset = load_dataset('qnguyen3/sft-r1')
+    # Load existing QA pairs if file exists
+    all_qa_pairs = []
+    if os.path.exists('qa_pairs_all-alpha1b_2.json'):
+        with open('qa_pairs_all-alpha1b_2.json', 'r') as f:
+            all_qa_pairs = json.load(f)
+    # Prepare questions
+    question_list = []
+    print("Preparing questions...")
+    for i, item in tqdm(enumerate(dataset['train']), desc="Loading dataset"):
+        if i >= 21600:
+            question_list.append(item['messages'][:-1])
+    # Process in batches of 200
+    batch_size = 200
+    for i in tqdm(range(0, len(question_list), batch_size), desc="Processing batches"):
+        batch_questions = question_list[i:i+batch_size]
+        batch_num = i // batch_size
+        await process_batch(batch_questions, batch_num, all_qa_pairs)
+if __name__ == "__main__":
+    asyncio.run(main())

gen_ans2.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from datasets import load_dataset
+from openai import AsyncOpenAI
+from tqdm import tqdm
+import asyncio
+import json
+import os
+client = AsyncOpenAI(api_key="no-need", base_url="http://147.185.42.13:24585/v1")
+async def generate_answer(messages):
+    try:
+        response = await client.chat.completions.create(
+            model="Qwen/Qwen2.5-7B-Instruct",
+            messages=messages,
+            max_tokens=2048,
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return 'error'
+async def process_batch(questions, batch_num, all_qa_pairs):
+    tasks = []
+    for q in tqdm(questions, desc=f"Batch {batch_num}", leave=False):
+        tasks.append(generate_answer(q))
+    answers = await asyncio.gather(*tasks)
+    # Create list of question-answer pairs and append to existing pairs
+    for q, a in zip(questions, answers):
+        q.append({'role': 'assistant', 'content': a})
+        all_qa_pairs.append({"conversations": q})
+    # Save all results after each batch
+    with open('qa_pairs_all-qwen7b.json', 'w') as f:
+        json.dump(all_qa_pairs, f, indent=2)
+    return answers
+async def main():
+    dataset = load_dataset('qnguyen3/sft-r1')
+    # Load existing QA pairs if file exists
+    all_qa_pairs = []
+    if os.path.exists('qa_pairs_all-qwen7b.json'):
+        with open('qa_pairs_all-qwen7b.json', 'r') as f:
+            all_qa_pairs = json.load(f)
+    # Prepare questions
+    question_list = []
+    print("Preparing questions...")
+    for i, item in tqdm(enumerate(dataset['train']), desc="Loading dataset"):
+        question_list.append(item['messages'][:-1])
+    # Process in batches of 200
+    batch_size = 200
+    for i in tqdm(range(0, len(question_list), batch_size), desc="Processing batches"):
+        batch_questions = question_list[i:i+batch_size]
+        batch_num = i // batch_size
+        await process_batch(batch_questions, batch_num, all_qa_pairs)
+if __name__ == "__main__":
+    asyncio.run(main())

load.py ADDED Viewed

	@@ -0,0 +1,13 @@

+### read qa_pairs_all-alpha1b.json
+import json
+with open('qa_pairs_all-alpha1b.json', 'r') as f:
+    data = json.load(f)
+with open('qa_pairs_all-alpha1b_2.json', 'r') as f:
+    data2 = json.load(f)
+### concat and save as qa_pairs_all-alpha1b_3.json
+data.extend(data2)
+with open('qa_pairs_all-alpha1b-final.json', 'w') as f:
+    json.dump(data, f, indent=2, ensure_ascii=False)

qa_pairs_all-alpha1b-final.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b172c524b52bf65c665d3a80965138d96358478c954c3c7c9a5bd9c77ec92458
+size 132102846

qa_pairs_all-alpha1b.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9daa5f71e42042eb06d7caa3a2b6d5fa216458700ef56f9759b1bb88eb9f9e7d
+size 52337224

qa_pairs_all-alpha1b_2.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55782571d4178a1f2817166820f271f7afef5a02370370103e5e7602afc77411
+size 80234968

qa_pairs_all-qwen7b.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:356685dd6959ef2bc769a93ead26ce5e3f521d731c7e2835351177f1009c25e5
+size 166879355