Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing solution for CF HTML change, Updated README #3

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.DS_Store
Solutions
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
CodeforcesSolutionDownloader
============================

a small script for downloading ACCEPTED solutions from codeforces.com
A small script for downloading *ACCEPTED* solutions from codeforces.com

Change the handle variable's value to your handle and then run main.py. It will create folders with the CF rounds and place your solutions there.
Install the dependency `bs4`
```
pip install bs4
```

Run main.py and pass the user handle as argument

```
python main.py <user_handle>
```

For example,
```
python main.py Petr
```

It will create folders with the user handle and Codeforces rounds under `Solutions` folder and place the solutions there.
70 changes: 40 additions & 30 deletions main.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import urllib
import json
import sys
import time, os
from bs4 import BeautifulSoup

MAX_SUBS = 1000000
MAX_CF_CONTEST_ID = 600
MAGIC_START_POINT = 17000

handle='tacklemore'
if (len(sys.argv) < 2):
print 'Usage: python main.py <handle>'
exit(1)

SOURCE_CODE_BEGIN = '<pre class="prettyprint program-source" style="padding: 0.5em;">'
handle = sys.argv[1]

DOWNLOAD_DIR = 'Solutions'
SUBMISSION_URL = 'http://codeforces.com/contest/{ContestId}/submission/{SubmissionId}'
USER_INFO_URL = 'http://codeforces.com/api/user.status?handle={handle}&from=1&count={count}'

Expand All @@ -31,36 +35,42 @@ def parse(source_code):
source_code = source_code.replace(key, replacer[key])
return source_code

if not os.path.exists(handle):
os.makedirs(handle)
base_dir = DOWNLOAD_DIR + '/' + handle
if not os.path.exists(base_dir):
os.makedirs(base_dir)

user_info = urllib.urlopen(USER_INFO_URL.format(handle=handle, count=MAX_SUBS)).read()
dic = json.loads(user_info)
user_info_full_url = USER_INFO_URL.format(handle=handle, count=MAX_SUBS)
print 'Fetching user status: %s' % user_info_full_url
dic = json.loads(urllib.urlopen(user_info_full_url).read())
if dic['status'] != u'OK':
print 'Oops.. Something went wrong...'
exit(0)
print 'Couldn\'t fetch user status'
exit(1)

submissions = dic['result']
start_time = time.time()
submissions = [submission for submission in dic['result'] if submission['verdict'] == u'OK']
print 'Fetching %d submissions' % len(submissions)

start_time = time.time()
for submission in submissions:
if submission['verdict'] == u'OK' and submission['contestId'] < MAX_CF_CONTEST_ID:
con_id, sub_id = submission['contestId'], submission['id'],
prob_name, prob_id = submission['problem']['name'], submission['problem']['index']
comp_lang = submission['programmingLanguage']
submission_info = urllib.urlopen(SUBMISSION_URL.format(ContestId=con_id, SubmissionId=sub_id)).read()

start_pos = submission_info.find(SOURCE_CODE_BEGIN, MAGIC_START_POINT) + len(SOURCE_CODE_BEGIN)
end_pos = submission_info.find("</pre>", start_pos)
result = parse(submission_info[start_pos:end_pos]).replace('\r', '')
ext = get_ext(comp_lang)

new_directory = handle + '/' + str(con_id)
if not os.path.exists(new_directory):
os.makedirs(new_directory)
file = open(new_directory + '/' + prob_id + '[ ' + prob_name + ' ]' + '.' + ext, 'w')
file.write(result)
file.close()
con_id, sub_id = submission['contestId'], submission['id'],
prob_name, prob_id = submission['problem']['name'], submission['problem']['index']
comp_lang = submission['programmingLanguage']
submission_full_url = SUBMISSION_URL.format(ContestId=con_id, SubmissionId=sub_id)
print 'Fetching submission: %s' % submission_full_url
submission_info = urllib.urlopen(submission_full_url).read()
soup = BeautifulSoup(submission_info, 'html.parser')
submission_text = soup.find('pre', id='program-source-text')
if submission_text is None:
print 'Could not fectch solution %d', sub_id
continue
result = submission_text.text.replace('\r', '')
ext = get_ext(comp_lang)
new_directory = base_dir + '/' + str(con_id)
if not os.path.exists(new_directory):
os.makedirs(new_directory)
file = open(new_directory + '/' + prob_id + ' [' + prob_name.encode('utf-8') + ']' + '.' + ext, 'w')
file.write(result.encode('utf-8'))
file.close()
end_time = time.time()

print 'Execution time %d seconds' % int(end_time - start_time)
duration_secs = int(end_time - start_time)
print 'Finished in %d minutes %d seconds' % (duration_secs / 60, duration_secs % 60)