forked from muddyfish/PPCG-Life
-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_answers.py
97 lines (88 loc) · 2.77 KB
/
get_answers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import requests, json
from bs4 import BeautifulSoup
question_id = 83988
def get_answers(q_id, page_id = 1):
url = 'http://api.stackexchange.com/2.2/questions/%d/answers'%question_id
values = {"page":str(page_id),
"order":"desc",
"sort":"activity",
"site":"codegolf",
"filter": "!SWJ_BpAceOT6L*G2Qa"}
response = requests.get(url, params=values)
response_dict = response.json()
answers = response_dict["items"]
print("QUOTA", response_dict["quota_remaining"], "OF", response_dict["quota_max"])
if response_dict["has_more"]:
answers.extend(get_answers(q_id, page_id+1))
return answers
def get_answer_body(q_id=question_id):
answers = get_answers(q_id)
return [answer["body"] for answer in answers]
languages = {
"python": "py",
"python 2": "py",
"python 3": "py",
"java": "java",
"ruby": "rb",
"awk": "awk",
"node.js": "js",
"coffeescript": "coffee",
"bash": "sh",
"sh": "sh",
"c++": "cpp",
"r": "r"
}
def get_extention(language):
language = language.lower()
if language in languages:
return languages[language]
return ""
def parse_answer(body):
soup = BeautifulSoup(body, "html5lib")
header = soup.get_text().split("\n")[0]
code_xml = soup.find("code")
parts = header.split(",")
if len(parts) != 2:
print("ANSWER DOESN'T HAVE VALID HEADER" , header)
return
if not code_xml:
print("ANSWER DOESN'T HAVE VALID CODE BLOCK", header)
return
language, bot_name = [part.strip() for part in parts]
code = code_xml.get_text()
extention = get_extention(language)
if extention == "":
try:
print("UNKNOWN LANGUAGE", language)
except UnicodeEncodeError:
print(repr(language))
return
bot_name = "".join(x for x in bot_name if x.isalnum())
file_name = bot_name+"."+extention
return bot_name, file_name, extention, language, code
def save_answer(entry):
#print entry
try:
f = open("bots/"+entry[1], "wb")
except UnicodeEncodeError:
f = open("bots/"+repr(entry[1]), "wb")
f.write(entry[4].encode("UTF-8"))
f.close()
return ['sh', "'./commands/%s.sh'"%entry[3].lower(), './bots/%s'%entry[1], entry[0]]
def add_one(new):
f = open("answers.json")
answers = json.load(f)
f.close()
answers.append(new)
f = open("answers.json", "w")
json.dump(answers, f)
f.close()
def pull():
answers = list(map(save_answer, filter(None,map(parse_answer, get_answer_body()))))
#print(answers)
f = open("answers.json", "w")
json.dump(answers, f)
f.close()
return answers
if __name__ == '__main__':
print(len(pull()))