-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzbroker
executable file
·286 lines (235 loc) · 10.9 KB
/
zbroker
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#!/usr/bin/env python
#
# Copyright (c) 2010 Samuel Tardieu <sam@rfc1149.net>
#
from __future__ import print_function
"""Request broker for 0MQ with timeout/retry handling."""
__author__ = "Samuel Tardieu <sam@rfc1149.net>"
import collections
import time
import uuid
import zmq
from datetime import datetime
import yaml
interrupted = False
# Compatibility between Python 2 and Python 3
def to_bytes(n):
try:
return bytes(str(n))
except TypeError:
return bytes(str(n), "ascii")
def extract_path(request):
"""Separate routing information and data from an incoming request."""
i = request.index(b"")
return request[:i+1], request[i+1:]
class Request:
"""A request that has entered the system."""
def __init__(self, request, timeout, tries):
"""Create a new request with a maximum number of tries.
After that, the request will be considered a failure."""
self.path, self.content = extract_path(request)
self.deadline = None
self.timeout = timeout
self.tries = tries
self.id = uuid.uuid1().bytes
def update_deadline(self):
"""Update the request deadline if a timeout has been specified."""
if self.timeout is not None:
self.deadline = time.time() + self.timeout
if self.tries is not None:
self.tries -= 1
def make_request(self):
"""Return a request prepended by its unique ID to be sent to a worker.
This also updates the request deadline."""
self.update_deadline()
return [self.id] + self.content
def expired(self):
"""True if the maximum number of tries have been performed."""
return self.tries <= 0
def timed_out(self):
"""True if we are past the request execution deadline."""
return self.deadline is not None and time.time() > self.deadline
def __repr__(self):
return 'Request({}, {}, {}, {})'.format(self.content, self.timeout, self.tries, self.id)
def __str__(self):
cmd = yaml.load(self.content[0])['=']
return cmd
class Requests:
"""Currently executing requests."""
def __init__(self, timeout, tries):
"""Create a new empty requests set."""
if timeout is not None and timeout < 0:
raise Broker.Error("timeout must be non-negative or None")
if tries is not None and tries <= 0:
raise Broker.Error("tries must be positive or None")
self.waiting = collections.deque()
self.processing = collections.deque()
self.bypath = {}
self.timeout = timeout
self.tries = tries
def create_request(self, request, tries=None):
"""Create a new Request object and make it enter the system."""
tries_ = self.tries if tries is None else tries
return self.enqueue_request(Request(request, self.timeout, tries_))
def enqueue_request(self, request):
"""Enqueue a Request object so that it can be picked up by a worker."""
self.waiting.append(request)
return request
def process_request(self, request):
"""Set a new request parameter before sending it to a worker."""
outgoing = request.make_request()
self.processing.append(request)
self.bypath[request.id] = request
return outgoing
def request_waiting(self):
"""Check if there is any request waiting for a worker."""
return len(self.waiting) > 0
def get_request(self):
"""Get the first request in the queue."""
if self.request_waiting():
request = self.waiting.popleft()
return self.process_request(request)
raise Broker.Error("internal error")
def check_for_timeouts(self):
"""Check if any request has timed-out. If this is the case,
requeue it unless the maximum number of tries has been
reached. Return a list of expired requests."""
expired = []
while self.processing and self.processing[0].timed_out():
request = self.processing.popleft()
del self.bypath[request.id]
if request.expired():
print('giving up on', request)
expired.append(request)
else:
self.enqueue_request(request)
print('requeued (due to timeout)', request)
return expired
def dequeue_request(self, answer, requeue=False):
"""Given an answer, remove the corresponding request from
the queue. Return the answer to send if the request has been
found in the system, None otherwise (late answer for example)."""
id, content = answer[0], answer[1:]
if id in self.bypath:
request = self.bypath[id]
self.processing.remove(request)
del self.bypath[id]
if content == [b""] and requeue and not request.expired():
req = self.create_request(request.path + request.content, tries=request.tries)
print('requeued (due to failure)', req, request.tries)
return None
else:
return request.path + content
def next_timeout_ms(self):
"""Return the number of milliseconds until the next timeout event,
or None if there isn't any."""
if self.processing:
return 1000 * max(self.processing[0].deadline - time.time(), 0)
class Broker:
"""Broker object with timeout/retry handling."""
class Error(Exception):
"""Exception raised when something went wrong."""
pass
def __init__(self, context, timeout, tries,
requesters_addr, workers_query_addr, workers_answer_addr):
"""Create a new broker object. The arguments are the 0MQ context
to use to create the listening sockets, the timeout in seconds or
None to wait indefinitely, the number of times a request should
be attempted or None to retry forever, and the requester,
worker and worker answer addresses. If context is None, a new
0MQ context with one I/O thread will be created."""
self.context = context or zmq.Context(1)
self.poller = zmq.Poller()
self.requesters = self.context.socket(zmq.XREP)
self.requesters.bind(requesters_addr)
self.poller.register(self.requesters, zmq.POLLIN)
self.workers_query = self.context.socket(zmq.REP)
self.workers_query.bind(workers_query_addr)
self.workers_answer = self.context.socket(zmq.PULL)
self.workers_answer.bind(workers_answer_addr)
self.poller.register(self.workers_answer, zmq.POLLIN)
self.requests = Requests(timeout, tries)
@property
def client_information(self):
hostname = socket.gethostname()
requesters_port = self.requesters.getsockopt(zmq.LAST_ENDPOINT).decode('ascii').split(':')[-1]
workers_port = self.workers_query.getsockopt(zmq.LAST_ENDPOINT).decode('ascii').split(':')[-1]
answers_port = self.workers_answer.getsockopt(zmq.LAST_ENDPOINT).decode('ascii').split(':')[-1]
conf = {'requests_addr': 'tcp://%s:%s'%(hostname, requesters_port),
'workers_addr': 'tcp://%s:%s'%(hostname, workers_port),
'answers_addr': 'tcp://%s:%s'%(hostname, answers_port)}
return conf
# TODO: generate some random secret string that agents connecting us could use as credentials
# OTOH: python and secure can't be in the same sentence, except this very sentence.
def run(self):
"""Run forever."""
while not interrupted: # TODO: will only work correctly, once we have the correct heatbeat imp. and shorter timeout
self.step()
def step(self):
"""Handle one command (request, worker request, worker answer)
and act on timeouts."""
request_waiting = self.requests.request_waiting()
if request_waiting:
self.poller.register(self.workers_query, zmq.POLLIN)
timeout_ms = self.requests.next_timeout_ms()
for (fd, event) in self.poller.poll(timeout_ms):
if fd == self.requesters:
req = self.requests.create_request(self.requesters.recv_multipart())
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ':', req)
if fd == self.workers_answer:
answer = self.workers_answer.recv_multipart() # TODO: receive heatbeat here + keep list of workers
outgoing = self.requests.dequeue_request(answer, requeue=True)
if outgoing is not None:
self.requesters.send_multipart(outgoing)
if fd == self.workers_query:
self.workers_query.recv()
self.workers_query.send_multipart(self.requests.get_request())
for failure in self.requests.check_for_timeouts():
self.requesters.send_multipart(failure.path + [b""]) # failure message
if request_waiting:
self.poller.unregister(self.workers_query)
def stop(signum, frame):
print('BROKER: shutting down')
global interrupted
interrupted=True
if __name__ == '__main__':
import optparse
import os
import socket
import atexit
import signal
signal.signal(signal.SIGUSR1, stop)
parser = optparse.OptionParser()
parser.add_option("-t", "--tries", dest="tries",
help="Number of tries (default: 20)",
action="store", type="int", default=20)
parser.add_option("-T", "--timeout", dest="timeout",
help="Timeout in seconds (default: 5min)",
action="store", type="float", default=300)
parser.add_option("-r", "--requests", dest="requests_addr",
help="requests address (default: tcp://*:*)",
action="store", default="tcp://*:*")
parser.add_option("-w", "--workers", dest="workers_addr",
help="workers address (default: tcp://*:*)",
action="store", default="tcp://*:*")
parser.add_option("-a", "--answers", dest="answers_addr",
help="answers address (default: tcp://*:*)",
action="store", default="tcp://*:*")
parser.usage = "%prog [options]"
(options, args) = parser.parse_args()
cookie_fname = os.path.expanduser('~/.zmq-broker')
if os.path.exists(cookie_fname):
raise RuntimeError('Cookie file ~/.zmq-broker exists, presumably another broker is running. If no other '
'broker is running, delete this file manually and retry.')
context = zmq.Context(1)
b = Broker(context, options.timeout, options.tries, options.requests_addr,
options.workers_addr, options.answers_addr)
with open(cookie_fname, 'w') as f:
yaml.dump(b.client_information, f)
os.chmod(cookie_fname, 384) # octal 600
def delete_cookie():
if os.path.exists(cookie_fname):
os.remove(cookie_fname)
atexit.register(delete_cookie)
b.run()
# TODO: implement daemon mode where we terminate the main process upon completed initialization