rookiemango commited on
Commit
3e37441
·
verified ·
1 Parent(s): 6cf3348

Upload folder using huggingface_hub

Browse files
check.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from collections import Counter
4
+
5
+ base_dir = "generate_result/zero_shot/bd_math/generation/llama3.1/1"
6
+
7
+
8
+ def has_repetition(text, threshold=3):
9
+ """
10
+ Check if the given text contains repetitive substrings and return the repetitive phrases.
11
+
12
+ :param text: The text to check for repetition
13
+ :param threshold: The number of repetitions to consider as repetitive
14
+ :return: A list of repetitive phrases if found, otherwise an empty list
15
+ """
16
+ words = text.split()
17
+ repetitive_phrases = []
18
+ for n in range(15, 20):
19
+ phrases = [" ".join(words[i : i + n]) for i in range(len(words) - n + 1)]
20
+ phrase_counts = Counter(phrases)
21
+ repetitive_phrases.extend(
22
+ [phrase for phrase, count in phrase_counts.items() if count >= threshold]
23
+ )
24
+ break
25
+ return repetitive_phrases
26
+
27
+
28
+ total_items = 0
29
+ items_with_repetition = 0
30
+ repetition_data = []
31
+
32
+ for i in range(8):
33
+ file_path = os.path.join(base_dir, f"{i}.json")
34
+
35
+ if not os.path.exists(file_path):
36
+ print(f"File {file_path} does not exist. Skipping.")
37
+ continue
38
+
39
+ with open(file_path, "r") as file:
40
+ for line_number, line in enumerate(file, 1):
41
+ try:
42
+ data = json.loads(line)
43
+ model_output = data.get("total output", "")[0]
44
+ total_items += 1
45
+
46
+ repetitive_phrases = has_repetition(model_output)
47
+ if len(repetitive_phrases):
48
+ items_with_repetition += 1
49
+ repetition_data.append(
50
+ {
51
+ "file": f"{i}.json",
52
+ "line": line_number,
53
+ "prompt": data.get("prompt", ""),
54
+ "repetitive_phrases": repetitive_phrases,
55
+ }
56
+ )
57
+ # print(repetitive_phrases[0])
58
+ except json.JSONDecodeError:
59
+ print(f"Error decoding JSON in file {i}.json, line {line_number}")
60
+ except Exception as e:
61
+ print(f"Error processing file {i}.json, line {line_number}: {str(e)}")
62
+
63
+ # Calculate the ratio
64
+ ratio = items_with_repetition / total_items if total_items > 0 else 0
65
+ print(
66
+ f"Ratio of items with repetition: {ratio:.2f} ({items_with_repetition}/{total_items})"
67
+ )
68
+
69
+ # Save repetition data to a file
70
+ output_file = "repetition_analysis.json"
71
+ with open(os.path.join(base_dir, output_file), "w") as f:
72
+ json.dump(repetition_data, f, indent=2)
73
+
74
+ print(f"Repetition analysis completed. Results saved to {output_file}")
generate_result/zero_shot/bd_math/generation/llama3.1/1/repetition_analysis.json ADDED
The diff for this file is too large to render. See raw diff
 
vllm_generate.py CHANGED
@@ -11,7 +11,7 @@ import sys
11
  import os
12
  import numpy as np
13
 
14
- few_shot_string = '''Question: Find the domain of the expression $\frac{\sqrt{x-2}}{\sqrt{5-x}}$.}
15
  Let's think step by step. The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $[2,5)$. Final Answer: The answer is $[2,5)$. I hope it is correct.
16
 
17
  Question: If $\det \mathbf{A} = 2$ and $\det \mathbf{B} = 12,$ then find $\det (\mathbf{A} \mathbf{B}).$
@@ -35,7 +35,7 @@ Let's think step by step. If we multiply the first equation by $-\frac{3}{2}$, w
35
  $$-\frac{3}{2}a=b\Rightarrow\frac{a}{b}=-\frac{2}{3}.$$
36
  Final Answer: The answer is $-\frac{2}{3}$. I hope it is correct.
37
 
38
- '''
39
 
40
  PROMPT_DICT = {
41
  "lean4": (
@@ -49,9 +49,9 @@ PROMPT_DICT = {
49
  "Write a response that appropriately completes the request.\n\n"
50
  "### Instruction:\n{instruction}\n\n### Response:"
51
  ),
52
- 'old_prompt_bd': '''Question: {question}
53
- Let's think step by step.''',
54
- 'vallina':'''{question}''',
55
  }
56
 
57
 
 
11
  import os
12
  import numpy as np
13
 
14
+ few_shot_string = """Question: Find the domain of the expression $\frac{\sqrt{x-2}}{\sqrt{5-x}}$.}
15
  Let's think step by step. The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $[2,5)$. Final Answer: The answer is $[2,5)$. I hope it is correct.
16
 
17
  Question: If $\det \mathbf{A} = 2$ and $\det \mathbf{B} = 12,$ then find $\det (\mathbf{A} \mathbf{B}).$
 
35
  $$-\frac{3}{2}a=b\Rightarrow\frac{a}{b}=-\frac{2}{3}.$$
36
  Final Answer: The answer is $-\frac{2}{3}$. I hope it is correct.
37
 
38
+ """
39
 
40
  PROMPT_DICT = {
41
  "lean4": (
 
49
  "Write a response that appropriately completes the request.\n\n"
50
  "### Instruction:\n{instruction}\n\n### Response:"
51
  ),
52
+ "old_prompt_bd": """Question: {question}
53
+ Let's think step by step.""",
54
+ "vallina": """{question}""",
55
  }
56
 
57