forked from funnelferry/GSoC-Scrapper
-
Notifications
You must be signed in to change notification settings - Fork 2
/
GSoC_orgs.py
52 lines (39 loc) · 1.61 KB
/
GSoC_orgs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import requests
from bs4 import BeautifulSoup
from pandas import DataFrame
url = 'https://summerofcode.withgoogle.com/archive/2018/organizations/'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
rows = soup.select('section div ul li')
link_list = []
for row in rows:
abc = 'https://summerofcode.withgoogle.com' + row.select_one('a')['href']
link_list.append(abc)
OrgName = []
Contactlink = []
techlist = []
slots = []
ideas = []
for org_url in link_list:
lisat = []
r = requests.get(org_url)
soup = BeautifulSoup(r.text, 'html.parser')
org = soup.find('div', class_="banner__text")
OrgName.append(f"=HYPERLINK(\"{org_url}\",\"{org.h3.text}\")")
technologies = soup.find_all('li', class_="organization__tag--technology")
for technology in technologies:
lisat.append(technology.text)
mys = ', '.join(lisat)
techlist.append(mys)
irc = soup.select_one(".org__meta-button")['href']
Contactlink.append(irc)
projects = soup.find('ul', class_="project-list-container")
slot = projects.findChildren('li')
slots.append(len(slot))
idea = soup.select_one(".org__button-container md-button")['href']
# idea = ideas[0].select_one('md-button')['href']
ideas.append(idea)
table = {'Org' : OrgName , 'Technologies' : techlist , 'Slots' : slots , 'Ideas Page' : ideas , 'Contact' : Contactlink}
df = DataFrame(table)
export_csv = df.to_csv(r'GSoC-Orgs.csv')
print(r'Done!')