-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
53 lines (41 loc) · 1.76 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import re
from utils import time_fmt
html_tag = re.compile('<.*?>')
def parse(body: dict) -> [(int, str, str, str, str)]:
vacancies = []
try:
for vacancy in body['items']:
vacancy_id = int(vacancy['id'])
title = vacancy['name']
employer = vacancy['employer']['name']
description = '\n\n'.join([i for i in vacancy['snippet'].values() if i is not None]) \
if vacancy['snippet'] else ''
description = re.sub(html_tag, '', description)
url = vacancy['alternate_url']
if s := vacancy['salary']:
suffix = f'{" р." if s["currency"] == "RUR" else (" " + s["currency"])} ' \
f'{"до вычета налогов" if s["gross"] == True else ""}'
salary = f'{"от " + str(s["from"]) + " " if s["from"] else ""}{"до " + str(s["to"]) if s["to"] else ""}' \
+ suffix
else:
salary = 'Зарплата не указана'
vacancies.append((vacancy_id, title, employer, description, url, salary))
except KeyError as e:
print(f'[{time_fmt()}] Ill-formed response body: {body}')
return vacancies
def main():
import requests
r = requests.get('https://api.hh.ru/vacancies?text=java&area=1', headers={'User-Agent': 'TestBot/0.1'})
iterated = False
vacancies = parse(r.json())
for vacancy in vacancies:
iterated = True
print(f'[{vacancy[0]}] {vacancy[1]} {{{vacancy[2]}}}\n{vacancy[3]}')
print('==========================================')
if not iterated:
print('[[Новых вакансий нет]]')
if __name__ == '__main__':
import time
while True:
main()
time.sleep(30)