-
Notifications
You must be signed in to change notification settings - Fork 0
/
communities.py
41 lines (32 loc) · 1.51 KB
/
communities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import requests
from bs4 import BeautifulSoup
import re
from query import Query
from sub_communities import sub_community
def community(url, cookie):
response = requests.get(url, cookie, verify=False)
soup = BeautifulSoup(response.text, 'html.parser')
# tables name
community_data = []
for table in soup.findAll("table"):
titles_thead = table.find('thead')
title = titles_thead.text.strip()
communities = re.findall(re.compile(
r'<td class=\"trow[1-2]\">\s*<strong><a href=\"forumdisplay\.php\?fid=[0-9]{1,}\">(.*)</a></strong>')
, str(table))
sub_communities = re.findall(re.compile(r'<a href=\"forumdisplay\.php\?fid=[0-9]{1,}" title=\"\">([^</a>]*)')
, str(table))
query = Query(table)
query.tables()
for com in communities:
u = re.findall(f"<a href=\"(.*)\">{com}</a>", str(table))
community_url = url + u[0]
community_data.append((com, community_url))
query = Query(community_data)
c_id = query.community()
if re.findall(f'<div class="smalltext">', str(table)):
sub_community_url = (url + re.findall(f"<a href=\"([^<]*)\" title=\"\">{sub}</a>", str(table))[0]
for sub in sub_communities)
for url_sub in sub_community_url:
sub_community(url_sub, cookie)
return "communities and title tables"