-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_all_sites.py
122 lines (101 loc) · 4.59 KB
/
test_all_sites.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import unittest
from app import *
from http.server import HTTPServer, SimpleHTTPRequestHandler
import threading
import time
# Automatically can test only one site on callback address,
# because analysis is saved on per-domain basis
# (multiple sites on localhost will have the same "domain" - "localhost")
# and scrapy does not allow sequential runs (Twisted reactor cant be restarted)
class TestSpider(unittest.TestCase):
def test_scoring(self):
OFFLINE_DIR = r'./tests/offline_sites'
PORT = 8000
def serve_forever(httpd):
httpd.serve_forever()
# run HTTPServer on separate thread
def start_server_in_separate_thread(httpd):
thread = threading.Thread(target=serve_forever, args=(httpd, ))
thread.setDaemon(True)
thread.start()
class QuietHandler(SimpleHTTPRequestHandler):
# Dont flood console wiht http requests info
def log_message(self, format, *args):
pass
directory3 = OFFLINE_DIR + r'/www.tilde.lv'
class Handler3(QuietHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=directory3, **kwargs)
server_address3 = ('127.0.0.3', PORT)
httpd3 = HTTPServer(server_address3, Handler3)
start_server_in_separate_thread(httpd3)
directory5 = OFFLINE_DIR + r'/www.norden.org'
class Handler5(QuietHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=directory5, **kwargs)
server_address5 = ('127.0.0.5', PORT)
httpd5 = HTTPServer(server_address5, Handler5)
start_server_in_separate_thread(httpd5)
directory6 = OFFLINE_DIR + r'/europa.eu'
class Handler6(QuietHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=directory6, **kwargs)
server_address6 = ('127.0.0.6', PORT)
httpd6 = HTTPServer(server_address6, Handler6)
start_server_in_separate_thread(httpd6)
directory13 = OFFLINE_DIR + r'/census.gov.uk'
class Handler13(QuietHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=directory13, **kwargs)
server_address13 = ('127.0.0.13', PORT)
httpd13 = HTTPServer(server_address13, Handler13)
start_server_in_separate_thread(httpd13)
urls = [
f'http://127.0.0.3:{PORT}/',
f'http://127.0.0.5:{PORT}/',
f'http://127.0.0.6:{PORT}/',
f'http://127.0.0.13:{PORT}/',
]
config = configparser.ConfigParser(interpolation=None)
config.read('settings.ini')
config['crawler']['DOWNLOAD_DELAY'] = "0"
report_config = configparser.ConfigParser(interpolation=None)
report_config.read('report_settings.ini')
from modules.scoring_tool import ScoringTool
scorer = ScoringTool(config, report_config)
response = scorer.start_crawl(urls, hops=1)
self.assertEqual(response['status'], "crawling")
self.assertEqual(response['message'], "Started crawling of 4 urls.")
response = scorer.start_crawl(urls, hops=1)
self.assertEqual(response['status'], "error")
self.assertEqual(response['message'], "Can not start, already crawling.")
time.sleep(10)
response = scorer.get_crawl_progress_status()
self.assertEqual(response['status'], "crawling")
self.assertEqual(response['message'], "crawling")
time.sleep(10)
response = scorer.stop_crawl()
self.assertEqual(response['status'], "stopping")
self.assertEqual(response['message'], "stopping")
time.sleep(10) # Give time to stop process normally
response = scorer.stop_crawl()
if not response['status'] == "ready":
time.sleep(10)
response = scorer.stop_crawl()
self.assertEqual(response['status'], "ready")
self.assertEqual(response['message'], "ready")
response = scorer.get_current_stats()
score, stats = response['127.0.0.3']
self.assertEqual(score, '4.17') # monolingual - latvian
score, stats = response['127.0.0.5']
self.assertNotEqual(score, '0.00')
score, stats = response['127.0.0.6']
self.assertNotEqual(score, '0.00')
score, stats = response['127.0.0.13']
self.assertNotEqual(score, '0.00')
httpd3.shutdown()
httpd5.shutdown()
httpd6.shutdown()
httpd13.shutdown()
if __name__ == '__main__':
unittest.main()