-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
145 lines (117 loc) · 4.25 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python3
import logging, json, os, sys
from github import Github
from github.GithubException import UnknownObjectException
from github.GithubException import GithubException
# database
# TODO
# import sqlite3
# from sqlite3 import Error
# multitreading:
# TODO
# import threading
import time
from datetime import datetime, timedelta
# debugging:
import pdb
# logging level. change to DEBUG, WARNING, INFO, etc for different levels
# logger = logging.getLogger('dev')
logging.getLogger().setLevel(logging.INFO)
# access tokens
GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
g = Github(GITHUB_TOKEN)
# LICENSES = eval(open("licenses").read())
with open('github-projects-list.json') as complete_list:
real_data = json.load(complete_list)
# tmp or real data choice:
project_list = real_data['data']['allProjects']
# json_data = eval(test_data)
# project_list = json_data['data']['allProjects']
def clean_up(list):
# cleaning up project list
for item in list:
name = item['name']
# removing projects without github link
if not item['githubLinks']:
list.remove(item)
return list
# print(project_list)
PROJECT_LIST = clean_up(project_list)
NO_LICENSE_COUNT = 0
LICENSE_COUNT = 0
RATE_LIMIT_MINIMUM = 30
ALL_LICENSES = []
def get_type(user):
return user.type
def get_license_info(name, repo):
try:
license = repo.get_license()
license = license.license
license_info = (name, repo.name, license.spdx_id)
except UnknownObjectException:
logging.warning(f'UnknownObjectException - {name}: {repo.name} no license')
license_info = (name, repo.name, 0)
except GithubException:
logging.warning(f'GithubException - {name}: {repo.name} UNACCESSIBLE!')
license_info = (name, repo.name, "UNACCESSIBLE")
return license_info
def count_licenses(name, license_info, ALL_LICENSES, LICENSE_COUNT, NO_LICENSE_COUNT, repo):
# pdb.set_trace()
try:
license
except NameError:
ALL_LICENSES.append(license_info)
NO_LICENSE_COUNT += 1
logging.info(f'({name}, {repo.name}, 0)')
else:
ALL_LICENSES.append(license_info)
LICENSE_COUNT += 1
logging.info(license_info)
def loop_through_repos(name, repo_list):
global NO_LICENSE_COUNT
global LICENSE_COUNT
global ALL_LICENSES
for repo in repo_list:
print(name, repo)
license_info = get_license_info(name, repo)
count_licenses(name, license_info, ALL_LICENSES, LICENSE_COUNT, NO_LICENSE_COUNT, repo)
logging.info(LICENSE_COUNT)
write_to_file(license_info)
def get_name(project):
return project['name']
def write_to_file(item):
item = ', '.join(map(str, item))
with open('results.csv', 'a') as f:
f.write(f'{item} \n')
def check_rate_limit():
# rate limit for github API limits
global RATE_LIMIT_MINIMUM
if g.rate_limiting[0] < RATE_LIMIT_MINIMUM:
# wait until limit_resettime if remaining calls are few
while time.time() < g.rate_limiting_resettime:
logging.warning('Waiting 2m for Rate Limit to top off...')
logging.warning(f'Current rate_limiting: {g.rate_limiting}')
readable_time = datetime.utcfromtimestamp(g.rate_limiting_resettime).strftime('%Y-%m-%d %H:%M:%S')
logging.warning(f'Next reset at: {readable_time}')
# readable_remaining_time = g.rate_limiting_resettime - time.time()
# readable_remaining_time = datetime.utcfromtimestamp(readable_remaining_time).strftime('%Y-%m-%d %H:%M:%S')
# logging.warning(f'Time Remaining: {readable_time}')
time.sleep(120)
def main_loop():
with open('results.csv', 'w') as f:
# pdb.set_trace()
check_rate_limit()
for project in PROJECT_LIST:
name = get_name(project)
logging.info(f'Name: {name}')
try:
user = g.get_user(name)
except:
continue
# type = get_type(user) # get type (not used for now)
# logging.info(f'Type: {type}')
# get repos and loop over them
repos = user.get_repos()
loop_through_repos(name, repos)
if __name__ == "__main__":
main_loop()