-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcli.py
97 lines (79 loc) · 2.97 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import abc
import logging
import os
import pickle
import sys
import time
# importing date class from datetime module
from datetime import date
import requests
from lxml import etree
from selenium import webdriver
class Client(abc.ABC):
# filename to store/load already visited urls to be overwritten
file: str = None
# the url to load the listing of the apartments
baseurl: str = None
# the output filename (abs path) to save results
outfile: str = None
def __init__(self, **kwargs):
# create a logger with the instance's class name
self.logger = logging.getLogger(self.__class__.__name__ + ".log")
# data structure holding the already visited urls
self.visited: set[str] = self._load_visited()
# a list of new urls from the baseurl search
self.new_aparts: list[str] = []
# a sub list of the above which fit our requirements
self.trgt_aparts: list[str] = []
# Set explicit HTMLParser
self.parser = etree.HTMLParser()
# create a persistent (keep alive) session
self.client = self._open_session()
def find_new(self):
raise NotImplementedError()
def parse_listings(self) -> list[str]:
raise NotImplementedError()
def refresh_listings(self):
time.sleep(2)
try:
self.client.get(self.baseurl)
# self.driver.find_element_by_id("nav-search").send_keys("Selenium")
except requests.exceptions.RequestException as e:
self.logger.error(f"Failed to contact baseurl: {e}")
except UnicodeDecodeError:
self.logger.error(f"Failed to decode the html from baseurl: {e}")
else:
# self.new_aparts = self.parse_listings(refs)
self.new_aparts = self.parse_listings()
def exit(self, status: int):
self.print_results()
self._save_visited()
self.client.close()
sys.exit(status)
def print_results(self):
# creating the date object of today's date
todays_date = str(date.today())
with open(self.outfile, "a") as fout:
fout.write("\n\n")
fout.write("---===" * 5 + todays_date + "---===" * 5 + "\n")
for u in self.trgt_aparts:
fout.writelines([u, '\n'])
fout.write("---===" * 5 + todays_date + "---===" * 5 + "\n")
def _load_visited(self) -> set[str]:
ret: set[str] = set()
try:
if os.path.getsize(self.file) > 4:
with open(self.file, 'rb') as fin:
ret = pickle.load(fin)
except FileNotFoundError:
# todo handle logging properly.
print(f"Info: {self.file} was not found")
return ret
def _save_visited(self):
with open(self.file, 'wb') as fout:
pickle.dump(self.visited, fout)
self.client.close()
@staticmethod
def _open_session() -> webdriver.Firefox:
driver = webdriver.Firefox()
return driver