-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_anekdot.py
37 lines (32 loc) · 1.19 KB
/
parse_anekdot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import requests
from bs4 import BeautifulSoup
URL_ANEK = 'https://www.anekdot.ru/random/anekdot/'
URL_STORY = 'https://www.anekdot.ru/last/story/'
ANEKDOT_TYPE = 'anekdots'
STORY_TYPE = 'stories'
def parse_anekdots(content_type=ANEKDOT_TYPE):
if content_type == ANEKDOT_TYPE:
url = URL_ANEK
elif content_type == STORY_TYPE:
url = URL_STORY
else:
return 'No content'
response = requests.get(url)
soup = BeautifulSoup(response.text, features="lxml")
topic_divs = soup.find_all('div', class_='topicbox')
try:
final_str = ''
for div in topic_divs:
if div.has_attr('data-id'):
if content_type == ANEKDOT_TYPE:
date = div.find('p', class_='title')
final_str += date.text + '\n'
text = div.find('div', class_='text')
text = str(text).replace('<div class="text">', '').replace('</div>', '')
text = text.replace('<br/>', '\n')
final_str += text + 2*'\n'
if content_type == STORY_TYPE:
final_str += '-------\n'
except:
final_str = 'Something went wrong'
return final_str