From 6a71bf5af504fcbfeccc48e5932a5a516602f4f0 Mon Sep 17 00:00:00 2001 From: Manish Date: Sun, 29 Apr 2018 09:43:40 +0530 Subject: [PATCH 01/10] Fixing solution for CF HTML change, Updated README --- .gitignore | 2 ++ README.md | 20 ++++++++++++++++++-- main.py | 30 +++++++++++++++++++----------- 3 files changed, 39 insertions(+), 13 deletions(-) create mode 100644 .gitignore mode change 100644 => 100755 main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cbdad15 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +Solutions \ No newline at end of file diff --git a/README.md b/README.md index 93d95bd..6669bad 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,22 @@ CodeforcesSolutionDownloader ============================ -a small script for downloading ACCEPTED solutions from codeforces.com +A small script for downloading *ACCEPTED* solutions from codeforces.com -Change the handle variable's value to your handle and then run main.py. It will create folders with the CF rounds and place your solutions there. +Install the dependency `bs4` +``` +pip install bs4 +``` + +Run main.py and pass the user handle as argument + +``` +python main.py +``` + +For example, +``` +python main.py Petr +``` + +It will create folders with the user handle and Codeforces rounds under `Solutions` folder and place the solutions there. diff --git a/main.py b/main.py old mode 100644 new mode 100755 index 8a4b987..d469a14 --- a/main.py +++ b/main.py @@ -1,14 +1,20 @@ import urllib import json +import sys import time, os +from bs4 import BeautifulSoup MAX_SUBS = 1000000 MAX_CF_CONTEST_ID = 600 MAGIC_START_POINT = 17000 -handle='tacklemore' +if (len(sys.argv) < 2): + print 'Usage: python main.py ' + exit(1) -SOURCE_CODE_BEGIN = '
'
+handle = sys.argv[1]
+
+DOWNLOAD_DIR = 'Solutions'
 SUBMISSION_URL = 'http://codeforces.com/contest/{ContestId}/submission/{SubmissionId}'
 USER_INFO_URL = 'http://codeforces.com/api/user.status?handle={handle}&from=1&count={count}'
 
@@ -31,8 +37,9 @@ def parse(source_code):
         source_code = source_code.replace(key, replacer[key])
     return source_code
 
-if not os.path.exists(handle):
-    os.makedirs(handle)
+base_dir = DOWNLOAD_DIR + '/' + handle
+if not os.path.exists(base_dir):
+    os.makedirs(base_dir)
 
 user_info = urllib.urlopen(USER_INFO_URL.format(handle=handle, count=MAX_SUBS)).read()
 dic = json.loads(user_info)
@@ -48,14 +55,15 @@ def parse(source_code):
         con_id, sub_id = submission['contestId'], submission['id'],
         prob_name, prob_id = submission['problem']['name'], submission['problem']['index']
         comp_lang = submission['programmingLanguage']
-        submission_info = urllib.urlopen(SUBMISSION_URL.format(ContestId=con_id, SubmissionId=sub_id)).read()
-        
-        start_pos = submission_info.find(SOURCE_CODE_BEGIN, MAGIC_START_POINT) + len(SOURCE_CODE_BEGIN)
-        end_pos = submission_info.find("
", start_pos) - result = parse(submission_info[start_pos:end_pos]).replace('\r', '') + submission_full_url = SUBMISSION_URL.format(ContestId=con_id, SubmissionId=sub_id) + print 'Fetching %s' % submission_full_url + submission_info = urllib.urlopen(submission_full_url).read() + soup = BeautifulSoup(submission_info, 'html.parser') + submission_text = soup.find('pre', id='program-source-text') + result = submission_text.encode_contents() ext = get_ext(comp_lang) - new_directory = handle + '/' + str(con_id) + new_directory = base_dir + '/' + str(con_id) if not os.path.exists(new_directory): os.makedirs(new_directory) file = open(new_directory + '/' + prob_id + '[ ' + prob_name + ' ]' + '.' + ext, 'w') @@ -63,4 +71,4 @@ def parse(source_code): file.close() end_time = time.time() -print 'Execution time %d seconds' % int(end_time - start_time) +print 'Finished in %d seconds' % int(end_time - start_time) From 2796c7a171ee597dc2716c43b52ae9c97dbaf1d7 Mon Sep 17 00:00:00 2001 From: Manish Date: Sun, 29 Apr 2018 10:09:10 +0530 Subject: [PATCH 02/10] Fixed get content text, removing \r --- main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/main.py b/main.py index d469a14..e9b46a9 100755 --- a/main.py +++ b/main.py @@ -60,9 +60,8 @@ def parse(source_code): submission_info = urllib.urlopen(submission_full_url).read() soup = BeautifulSoup(submission_info, 'html.parser') submission_text = soup.find('pre', id='program-source-text') - result = submission_text.encode_contents() + result = submission_text.text.replace('\r', '') ext = get_ext(comp_lang) - new_directory = base_dir + '/' + str(con_id) if not os.path.exists(new_directory): os.makedirs(new_directory) From 6b7dae3b3f1cac82e7826f033eba9a5229a107da Mon Sep 17 00:00:00 2001 From: Manish Date: Sun, 29 Apr 2018 11:53:04 +0530 Subject: [PATCH 03/10] Showing number of submissions going to be fetched and time in minutes and seconds --- main.py | 55 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/main.py b/main.py index e9b46a9..a9b73fd 100755 --- a/main.py +++ b/main.py @@ -5,8 +5,6 @@ from bs4 import BeautifulSoup MAX_SUBS = 1000000 -MAX_CF_CONTEST_ID = 600 -MAGIC_START_POINT = 17000 if (len(sys.argv) < 2): print 'Usage: python main.py ' @@ -41,33 +39,38 @@ def parse(source_code): if not os.path.exists(base_dir): os.makedirs(base_dir) -user_info = urllib.urlopen(USER_INFO_URL.format(handle=handle, count=MAX_SUBS)).read() -dic = json.loads(user_info) +user_info_full_url = USER_INFO_URL.format(handle=handle, count=MAX_SUBS) +print 'Fetching user status: %s' % user_info_full_url +dic = json.loads(urllib.urlopen(user_info_full_url).read()) if dic['status'] != u'OK': - print 'Oops.. Something went wrong...' - exit(0) + print 'Couldn\'t fetch user status' + exit(1) -submissions = dic['result'] -start_time = time.time() +submissions = [submission for submission in dic['result'] if submission['verdict'] == u'OK'] +print 'Fetching %d submissions' % len(submissions) +start_time = time.time() for submission in submissions: - if submission['verdict'] == u'OK' and submission['contestId'] < MAX_CF_CONTEST_ID: - con_id, sub_id = submission['contestId'], submission['id'], - prob_name, prob_id = submission['problem']['name'], submission['problem']['index'] - comp_lang = submission['programmingLanguage'] - submission_full_url = SUBMISSION_URL.format(ContestId=con_id, SubmissionId=sub_id) - print 'Fetching %s' % submission_full_url - submission_info = urllib.urlopen(submission_full_url).read() - soup = BeautifulSoup(submission_info, 'html.parser') - submission_text = soup.find('pre', id='program-source-text') - result = submission_text.text.replace('\r', '') - ext = get_ext(comp_lang) - new_directory = base_dir + '/' + str(con_id) - if not os.path.exists(new_directory): - os.makedirs(new_directory) - file = open(new_directory + '/' + prob_id + '[ ' + prob_name + ' ]' + '.' + ext, 'w') - file.write(result) - file.close() + con_id, sub_id = submission['contestId'], submission['id'], + prob_name, prob_id = submission['problem']['name'], submission['problem']['index'] + comp_lang = submission['programmingLanguage'] + submission_full_url = SUBMISSION_URL.format(ContestId=con_id, SubmissionId=sub_id) + print 'Fetching submission: %s' % submission_full_url + submission_info = urllib.urlopen(submission_full_url).read() + soup = BeautifulSoup(submission_info, 'html.parser') + submission_text = soup.find('pre', id='program-source-text') + if submission_text is None: + print 'Could not fectch solution %d', sub_id + continue + result = submission_text.text.replace('\r', '') + ext = get_ext(comp_lang) + new_directory = base_dir + '/' + str(con_id) + if not os.path.exists(new_directory): + os.makedirs(new_directory) + file = open(new_directory + '/' + prob_id + ' [' + prob_name + ']' + '.' + ext, 'w') + file.write(result) + file.close() end_time = time.time() -print 'Finished in %d seconds' % int(end_time - start_time) +duration_secs = int(end_time - start_time) +print 'Finished in %d minutes %d seconds' % (duration_secs / 60, duration_secs % 60) From ce7893c70a878e6ceaabaad536ce25522ece2394 Mon Sep 17 00:00:00 2001 From: NouemanKHAL Date: Wed, 13 Mar 2019 16:57:58 +0100 Subject: [PATCH 04/10] Removed forbidden characters from problem names Errors occured because of invalid characters filenames from problem names like "Valid BFS ?" problem used re.sub to fix the problem's name --- main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main.py b/main.py index a9b73fd..2027999 100755 --- a/main.py +++ b/main.py @@ -67,6 +67,7 @@ def parse(source_code): new_directory = base_dir + '/' + str(con_id) if not os.path.exists(new_directory): os.makedirs(new_directory) + prob_name = re.sub(r'[\\/*?:"<>|]',"", prob_name) file = open(new_directory + '/' + prob_id + ' [' + prob_name + ']' + '.' + ext, 'w') file.write(result) file.close() From ab8cd760e20ec0e3051363a74774d2342621c9c4 Mon Sep 17 00:00:00 2001 From: NouemanKHAL Date: Wed, 13 Mar 2019 17:03:25 +0100 Subject: [PATCH 05/10] Added Unicode Encoding to prob_name Fixed 'ascii' error by : prob_name.encode('utf-8') --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 2027999..e02fdd7 100755 --- a/main.py +++ b/main.py @@ -68,7 +68,7 @@ def parse(source_code): if not os.path.exists(new_directory): os.makedirs(new_directory) prob_name = re.sub(r'[\\/*?:"<>|]',"", prob_name) - file = open(new_directory + '/' + prob_id + ' [' + prob_name + ']' + '.' + ext, 'w') + file = open(new_directory + '/' + prob_id + ' [' + prob_name.encode('utf-8') + ']' + '.' + ext, 'w') file.write(result) file.close() end_time = time.time() From c9608a19964f0bab73462efd9ba661ea486cf1bb Mon Sep 17 00:00:00 2001 From: NouemanKHAL Date: Wed, 13 Mar 2019 17:13:11 +0100 Subject: [PATCH 06/10] Fixed ASCII Error for result variable Encoding the result variable into UNICODE fixed the error : result.encode('utf-8') --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index e02fdd7..b76558c 100755 --- a/main.py +++ b/main.py @@ -69,7 +69,7 @@ def parse(source_code): os.makedirs(new_directory) prob_name = re.sub(r'[\\/*?:"<>|]',"", prob_name) file = open(new_directory + '/' + prob_id + ' [' + prob_name.encode('utf-8') + ']' + '.' + ext, 'w') - file.write(result) + file.write(result.encode('utf-8')) file.close() end_time = time.time() From 330401c4f1d316d5d4056271be1bfefcd883b553 Mon Sep 17 00:00:00 2001 From: sandy9999 Date: Mon, 8 Jul 2019 13:02:33 +0530 Subject: [PATCH 07/10] Adds support for non-ascii unicode characters --- main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index a9b73fd..53a84e5 100755 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ import sys import time, os from bs4 import BeautifulSoup +import codecs MAX_SUBS = 1000000 @@ -60,14 +61,14 @@ def parse(source_code): soup = BeautifulSoup(submission_info, 'html.parser') submission_text = soup.find('pre', id='program-source-text') if submission_text is None: - print 'Could not fectch solution %d', sub_id + print 'Could not fetch solution %d', sub_id continue result = submission_text.text.replace('\r', '') ext = get_ext(comp_lang) new_directory = base_dir + '/' + str(con_id) if not os.path.exists(new_directory): os.makedirs(new_directory) - file = open(new_directory + '/' + prob_id + ' [' + prob_name + ']' + '.' + ext, 'w') + file = codecs.open(new_directory + '/' + prob_id + ' [' + prob_name + ']' + '.' + ext, 'w', encoding='utf-8') file.write(result) file.close() end_time = time.time() From 9b22508927fdbad90bf3e7f6adfd0c4b19c05671 Mon Sep 17 00:00:00 2001 From: Manish Date: Mon, 8 Jul 2019 14:30:53 +0530 Subject: [PATCH 08/10] Revert "Adds support for non-ascii unicode characters" --- main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 53a84e5..a9b73fd 100755 --- a/main.py +++ b/main.py @@ -3,7 +3,6 @@ import sys import time, os from bs4 import BeautifulSoup -import codecs MAX_SUBS = 1000000 @@ -61,14 +60,14 @@ def parse(source_code): soup = BeautifulSoup(submission_info, 'html.parser') submission_text = soup.find('pre', id='program-source-text') if submission_text is None: - print 'Could not fetch solution %d', sub_id + print 'Could not fectch solution %d', sub_id continue result = submission_text.text.replace('\r', '') ext = get_ext(comp_lang) new_directory = base_dir + '/' + str(con_id) if not os.path.exists(new_directory): os.makedirs(new_directory) - file = codecs.open(new_directory + '/' + prob_id + ' [' + prob_name + ']' + '.' + ext, 'w', encoding='utf-8') + file = open(new_directory + '/' + prob_id + ' [' + prob_name + ']' + '.' + ext, 'w') file.write(result) file.close() end_time = time.time() From 43537a636f01a30da36fe4011fe40809008d8173 Mon Sep 17 00:00:00 2001 From: zegabr Date: Mon, 29 Jul 2019 13:13:47 -0300 Subject: [PATCH 09/10] line 70 gives "re not defined error", but commenting it works fine --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index b76558c..4370397 100755 --- a/main.py +++ b/main.py @@ -67,7 +67,7 @@ def parse(source_code): new_directory = base_dir + '/' + str(con_id) if not os.path.exists(new_directory): os.makedirs(new_directory) - prob_name = re.sub(r'[\\/*?:"<>|]',"", prob_name) + #prob_name = re.sub(r'[\\/*?:"<>|]',"", prob_name) file = open(new_directory + '/' + prob_id + ' [' + prob_name.encode('utf-8') + ']' + '.' + ext, 'w') file.write(result.encode('utf-8')) file.close() From 3c5b6586705f7ec339bd1a43068e5d36e3b41e24 Mon Sep 17 00:00:00 2001 From: zegabr Date: Mon, 29 Jul 2019 13:18:45 -0300 Subject: [PATCH 10/10] excluding line 70 --- main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/main.py b/main.py index 4370397..4f69cc1 100755 --- a/main.py +++ b/main.py @@ -67,7 +67,6 @@ def parse(source_code): new_directory = base_dir + '/' + str(con_id) if not os.path.exists(new_directory): os.makedirs(new_directory) - #prob_name = re.sub(r'[\\/*?:"<>|]',"", prob_name) file = open(new_directory + '/' + prob_id + ' [' + prob_name.encode('utf-8') + ']' + '.' + ext, 'w') file.write(result.encode('utf-8')) file.close()