Henry65 commited on
Commit
fba413c
·
1 Parent(s): a6f930e

Update RepoPipeline.py

Browse files
Files changed (1) hide show
  1. RepoPipeline.py +22 -7
RepoPipeline.py CHANGED
@@ -34,19 +34,35 @@ def extract_code_and_docs(text: str):
34
  return code_set, docs_set
35
 
36
 
37
- def extract_requirements(lines):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
  The method for extracting requirements.
40
  :param lines: requirements.
41
  :return: requirement libraries.
42
  """
43
  requirements_set = set()
 
44
  for line in lines:
45
- line = line.replace('\n', '').strip()
46
  try:
47
  if " == " in line:
48
  splitLine = line.split(" == ")
49
- elif "==" in line:
50
  splitLine = line.split("==")
51
  requirements_set.add(splitLine[0])
52
  except:
@@ -132,8 +148,7 @@ def extract_information(repos, headers=None):
132
  try:
133
  file_content = tar.extractfile(member).read().decode("utf-8")
134
  # extract readme
135
- readmes_set = set()
136
- readmes_set.add(file_content)
137
  repo_info["readmes"].update(readmes_set)
138
  except UnicodeDecodeError as e:
139
  tqdm.write(
@@ -144,9 +159,9 @@ def extract_information(repos, headers=None):
144
  # 4. Extracting requirements.
145
  elif member.name.endswith("requirements.txt") and member.isfile():
146
  try:
147
- lines = tar.extractfile(member).readlines()
148
  # extract readme
149
- requirements_set = extract_requirements(lines)
150
  repo_info["requirements"].update(requirements_set)
151
  except UnicodeDecodeError as e:
152
  tqdm.write(
 
34
  return code_set, docs_set
35
 
36
 
37
+ def extract_readmes(file_content):
38
+ """
39
+ The method for extracting readmes.
40
+ :param lines: readmes.
41
+ :return: readme sentences.
42
+ """
43
+ readmes_set = set()
44
+ lines = file_content.split('\n')
45
+ for line in lines:
46
+ line = line.replace("\n", "").strip()
47
+ readmes_set.add(line)
48
+
49
+ return readmes_set
50
+
51
+
52
+ def extract_requirements(file_content):
53
  """
54
  The method for extracting requirements.
55
  :param lines: requirements.
56
  :return: requirement libraries.
57
  """
58
  requirements_set = set()
59
+ lines = file_content.split('\n')
60
  for line in lines:
61
+ line = line.replace("\n", "").strip()
62
  try:
63
  if " == " in line:
64
  splitLine = line.split(" == ")
65
+ else:
66
  splitLine = line.split("==")
67
  requirements_set.add(splitLine[0])
68
  except:
 
148
  try:
149
  file_content = tar.extractfile(member).read().decode("utf-8")
150
  # extract readme
151
+ readmes_set = extract_readmes(file_content)
 
152
  repo_info["readmes"].update(readmes_set)
153
  except UnicodeDecodeError as e:
154
  tqdm.write(
 
159
  # 4. Extracting requirements.
160
  elif member.name.endswith("requirements.txt") and member.isfile():
161
  try:
162
+ file_content = tar.extractfile(member).read().decode("utf-8")
163
  # extract readme
164
+ requirements_set = extract_requirements(file_content)
165
  repo_info["requirements"].update(requirements_set)
166
  except UnicodeDecodeError as e:
167
  tqdm.write(