-
Notifications
You must be signed in to change notification settings - Fork 0
/
cachelib.py
613 lines (478 loc) · 18.4 KB
/
cachelib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
"""
cachelib
--------
"Cache rules everything around me"
-- Method Man
Pure Python implementations of various cache algorithms.
License: MIT
"""
import collections
import threading
from collections import deque
from collections import Counter
from collections import OrderedDict
# Linked list indexes
LINK_PREV = 0
LINK_NEXT = 1
LINK_KEY = 2
LINK_VALUE = 3
# MQ-specific
LINK_ACCESS_COUNT = 4
LINK_LAST_ACCESS_TIME = 5
LINK_EXPIRE_TIME = 6
# ARC-specific
LINK_LIST_TYPE = 4
T1, T2, B1, B2 = 0, 1, 2, 3
def make_circular_queue():
root = []
root[:] = [root, root]
return root
def _ll_move(link, target=None):
prev, next = link[LINK_PREV], link[LINK_NEXT]
if prev is not None:
prev[LINK_NEXT] = next
if next is not None:
next[LINK_PREV] = prev
if target is not None and target is not link:
prev, next = target[LINK_PREV], target[LINK_NEXT]
target[LINK_NEXT] = next[LINK_PREV] = link
link[LINK_PREV] = target
link[LINK_NEXT] = next
def _ll_iter_keys(root):
link = root
while True:
link = link[LINK_PREV]
if link is root:
break
yield link[LINK_KEY]
class Cache(collections.Mapping):
def __init__(self, maxsize, get_missing=None, on_evict=None):
self.maxsize = maxsize
# Function which returns items missing from the cache.
if get_missing:
self.get_missing = get_missing
# Callback for item eviction.
self.on_evict = on_evict
self.hits = self.misses = 0
# Make linked list updates atomic.
self._lock = threading.RLock()
def get_missing(self, key):
raise KeyError(key)
def __contains__(self, key):
return self._cache.__contains__(key)
def __delitem__(self, key):
raise NotImplementedError
def __iter__(self):
raise NotImplementedError
def __len__(self):
return self._cache.__len__()
def discard(self, key):
try:
self.__delitem__(key)
except KeyError:
pass
def flush(self):
raise NotImplementedError
def _hit(self):
self.hits += 1
def _miss(self):
self.misses += 1
@classmethod
def cache(cls, *args, **kwargs):
"""
Function decorator.
"""
typed = kwargs.pop('typed', False)
from functools import wraps, _make_key
def decorator(f):
kwargs['get_missing'] = f
instance = cls(*args, **kwargs)
@wraps(f)
def wrapped(*args, **kwargs):
key = _make_key(args, kwargs, typed)
return instance[key]
wrapped.__cache__ = instance
return wrapped
return decorator
class LRUCache(Cache):
"""
Implementation of a cache with a last-recently-used replacement policy.
LRU is susceptible to cache pollution caused by iterating over a
sufficiently large number of unique items (scan vulnerability).
"""
def __init__(self, *args, **kwargs):
super(LRUCache, self).__init__(*args, **kwargs)
# Mapping of keys to links.
self._cache = {}
self.size = 0
# Cache consists of a doubly-linked list implementing a circular
# queue with the following layout:
# <- (prev) (next) ->
# ItemN (MRU) <-> root (empty) <-> Item0 (LRU), Item1 <-> ...
# The order of items in the queue is the eviction order.
self._root = make_circular_queue()
def __getitem__(self, key):
root = self._root
with self._lock:
try:
link = self._cache[key]
except KeyError:
# Cache miss.
assert self._miss() or True
value = self.get_missing(key)
if self.size < self.maxsize:
# Cache can hold the new item. Create a new link and put it
# in the MRU (end of queue).
mru = root[LINK_PREV]
link = [mru, root, key, value]
self._cache[key] = mru[LINK_NEXT] = root[LINK_PREV] = link
self.size += 1
else:
# Replace the current root with link.
root[LINK_KEY] = key
root[LINK_VALUE] = value
link = self._cache[key] = root
# Evict LRU.
lru = root[LINK_NEXT]
del self._cache[lru[LINK_KEY]]
# Clear LRU and make it the new root.
lru[LINK_KEY] = lru[LINK_VALUE] = None
self._root = lru
else:
# Cache hit.
assert self._hit() or True
prev, next, _, value = link
# Remove the link from its current position.
prev[LINK_NEXT] = next
next[LINK_PREV] = prev
# Move the link to the MRU (end of queue).
mru = root[LINK_PREV]
root[LINK_PREV] = mru[LINK_NEXT] = link
link[LINK_PREV] = mru
link[LINK_NEXT] = root
return value
def __delitem__(self, key):
with self._lock:
link = self._cache.pop(key)
# Remove the link from its current position.
prev = link[LINK_PREV]
next = link[LINK_NEXT]
link[:] = []
prev[LINK_NEXT] = next
next[LINK_PREV] = prev
self.size -= 1
class MQCache(Cache):
"""
Implementation of a cache using the Multi-Queue algorithm.
Zhou, Yuanyuan, James Philbin, and Kai Li. "The Multi-Queue Replacement
Algorithm for Second Level Buffer Caches." USENIX Annual Technical
Conference, General Track. 2001.
"""
def __init__(self, *args, m=2, q_out_factor=4, **kwargs):
"""
By default, `MQCache` uses two queues, making it similar in principle
to 2Q.
The number of queues is determined by the adjustable parameter `m`.
:param m: Number of LRU queues. At least 2 and usually fewer than 10.
:param q_out_size: Number of items to keep in the eviction history.
"""
super(MQCache, self).__init__(*args, **kwargs)
# m = 1 doesn't give any benefits (it does exactly the same as LRU,
# but with unnecessary overhead). m < 1 obviously doesn't make sense
# at all.
if m < 2:
raise ValueError('m must be at least 2')
self.m = m
self.size = 0
# Fast lookup. (Key -> Link)
self._cache = {}
# Eviction history.
self._q_out = OrderedDict()
self._q_out_size = int(self.maxsize * q_out_factor)
# Temporal distance statistics.
# Note: this only contains distances greater than maxsize.
# (i.e. of previously evicted items)
self.temporal_distances = Counter()
# LRU queue stack.
self._queues = [make_circular_queue() for _ in range(m)]
self.life_time = self.peak_temporal_distance()
self.current_time = 0
def __getitem__(self, key):
q_out = self._q_out
current_time = self.current_time
with self._lock:
try:
link = self._cache[key]
except KeyError:
# Cache miss.
if self.size < self.maxsize:
link = [None] * 7
self.size += 1
else:
link = self._evict()
prev, next = link[LINK_PREV], link[LINK_NEXT]
# Remove the link from its current location.
prev[LINK_NEXT] = next
next[LINK_PREV] = prev
link[LINK_VALUE] = value = self.get_missing(key)
link[LINK_KEY] = key
try:
access_count, last_access_time = q_out.pop(key)
except KeyError:
access_count = 0
else:
distance = current_time - last_access_time
self.temporal_distances[distance] += 1
else:
# Cache hit.
prev, next, _, value, access_count, _, _ = link
# Remove the link from its current location.
prev[LINK_NEXT] = next
next[LINK_PREV] = prev
access_count += 1
queue_num = self.queue_num(access_count)
root = self._queues[queue_num]
old_tail = root[LINK_PREV]
# Insert link at tail of queue (MRU).
old_tail[LINK_NEXT] = root[LINK_PREV] = link
link[LINK_PREV] = old_tail
link[LINK_NEXT] = root
link[LINK_ACCESS_COUNT] = access_count
link[LINK_LAST_ACCESS_TIME] = current_time
link[LINK_EXPIRE_TIME] = current_time + self.life_time
# Store fast reference.
self._cache[key] = link
self._adjust()
return value
def __delitem__(self, key):
self._q_out.pop(key, None)
LRUCache.__delitem__(self, key)
def _evict(self):
# Gotcha: it is the responsibility of the caller to remove or
# replace (potentially circular) references in the
# victim link's LINK_PREV and LINK_NEXT slots.
# Get the first non-empty queue.
q_out = self._q_out
for k, root in enumerate(self._queues):
# Get victim from head (LRU).
link = root[LINK_NEXT]
if link is not root:
key = link[LINK_KEY]
access_count = link[LINK_ACCESS_COUNT]
last_access_time = link[LINK_LAST_ACCESS_TIME]
prev, next = link[LINK_PREV], link[LINK_NEXT]
# Remove the link.
prev[LINK_NEXT] = next[LINK_PREV]
next[LINK_PREV] = prev[LINK_NEXT]
# Remove victim reference from internal dict.
del self._cache[key]
# Pop eviction history if it's full.
if len(q_out) >= self._q_out_size:
q_out.popitem(last=False) # FIFO
# Remember key, access count and last access time.
q_out[key] = (access_count, last_access_time)
# Invoke callback.
on_evict = self.on_evict
if on_evict is not None:
on_evict(key, link[LINK_VALUE])
return link
raise KeyError('cache is empty')
def _adjust(self):
# Intuition: This causes the head of each queue (except the first) to
# 'age', possibly moving them to the tail end of a lower queue in the
# stack. Without this, items with a high access count would linger in
# the cache even after a drop in access frequency (a type of cache
# pollution).
self.current_time += 1
for k in range(1, self.m):
root = self._queues[k]
link = root[LINK_NEXT]
if link is root:
# Queue is empty.
continue
if link[LINK_EXPIRE_TIME] < self.current_time:
# Demote item to tail of previous queue in stack.
root = self._queues[k-1]
old_tail = root[LINK_PREV]
old_tail[LINK_NEXT] = root[LINK_PREV] = link
link[LINK_PREV] = old_tail
link[LINK_NEXT] = root
link[LINK_EXPIRE_TIME] = (
self.current_time + self.life_time)
def queue_num(self, access_count):
import math
return min(int(math.log(access_count, 2)), self.m - 1)
def peak_temporal_distance(self):
"""
"[...] the peak temporal distance is defined as the temporal distance
that is greater than the number of cache blocks and that has the most
number of accesses." (Zhou 2001)
In theory, MQ performance improves as life_time approaches the peak
temporal distance.
"""
try:
return self.temporal_distances.most_common(1)[0][0]
except IndexError:
return self.maxsize + 1
class ARCache(Cache):
"""
Implementation of a cache using the ARC algorithm.
Megiddo, Nimrod, and Dharmendra S. Modha. "ARC: A Self-Tuning, Low
Overhead Replacement Cache." FAST. Vol. 3. 2003.
"""
def __init__(self, *args, **kwargs):
super(ARCache, self).__init__(*args, **kwargs)
self._cache = {}
self._p = 0
self._t1 = t1 = make_circular_queue()
self._t2 = t2 = make_circular_queue()
self._b1 = b1 = make_circular_queue()
self._b2 = b2 = make_circular_queue()
self._t1_len = self._t2_len = 0
self._b1_len = self._b2_len = 0
def __getitem__(self, key):
t1_len = self._t1_len
t2_len = self._t2_len
b1_len = self._b1_len
b2_len = self._b2_len
maxsize = self.maxsize
with self._lock:
try:
link = self._cache[key]
# Case IV: Complete cache miss.
except KeyError:
assert self._miss() or True
value = None
self._cache[key] = link = [None, None, key, None, T1]
# Case A: T1 U B1 has `maxsize` items.
if t1_len + b1_len >= maxsize:
if b1_len:
# Delete LRU in B1.
victim = self._b1[LINK_NEXT]
assert victim is not self._b1
_ll_move(victim)
self._b1_len -= 1
self._replace(T1)
else:
# Delete LRU in T1.
victim = self._t1[LINK_NEXT]
assert victim is not self._t1
_ll_move(victim)
self._t1_len -= 1
# Case B: T1 U B1 has fewer than `maxsize` items.
else:
total = t1_len + t2_len + b1_len + b2_len
victim = None
if total >= maxsize:
if total == 2 * maxsize:
# Delete LRU in B2.
victim = self._b2[LINK_NEXT]
assert victim is not self._b2
_ll_move(victim)
self._b2_len -= 1
self._replace(T1)
if victim is not None:
victim_key = victim[LINK_KEY]
del self._cache[victim_key]
if self.on_evict is not None:
self.on_evict(victim_key)
victim[:] = ()
# Move new link to MRU of T1.
_ll_move(link, self._t1[LINK_PREV])
self._t1_len += 1
# Case I, II or III: Cache hit or ghost cache hit.
else:
value, list_type = link[LINK_VALUE], link[LINK_LIST_TYPE]
# Case II: Key is in B1.
if list_type is B1:
assert self._miss() or True
# Update p.
d1 = 1 if b1_len >= b2_len else b2_len / b1_len
self._p = min(self._p + d1, maxsize)
self._replace(list_type)
self._b1_len -= 1
self._t2_len += 1
# Case III: Key is in B2.
elif list_type is B2:
assert self._miss() or True
# Update p.
d2 = 1 if b2_len >= b1_len else b1_len / b2_len
self._p = max(self._p - d2, 0)
self._replace(list_type)
self._b2_len -= 1
self._t2_len += 1
# Case I: Key is in T1 or T2.
else:
if list_type is T1:
self._t1_len -= 1
self._t2_len += 1
assert self._hit() or True
# Move to MRU of T2.
_ll_move(link, self._t2[LINK_PREV])
link[LINK_LIST_TYPE] = T2
if value is None:
link[LINK_VALUE] = value = self.get_missing(key)
return value
def _replace(self, list_type):
t1_len = self._t1_len
p = self._p
if t1_len and (t1_len > p or
(list_type is B2 and t1_len == p)):
# Delete the LRU in T1, move to MRU of B1.
link, target = self._t1[LINK_NEXT], self._b1[LINK_PREV]
link[LINK_LIST_TYPE] = B1
self._t1_len -= 1
self._b1_len += 1
else:
# Delete the LRU in T2, move to MRU of B2.
link, target = self._t2[LINK_NEXT], self._b2[LINK_PREV]
link[LINK_LIST_TYPE] = B2
self._t2_len -= 1
self._b2_len += 1
with self._lock:
# Remove reference to value.
link[LINK_VALUE] = None
_ll_move(link, target)
# This counts as an eviction, although the key remains in B1/B2.
if self.on_evict is not None:
self.on_evict(link[LINK_KEY])
def __delitem__(self, key):
with self._lock:
link = self._cache[key]
list_type = link[LINK_LIST_TYPE]
if list_type is T1:
self._t1_len -= 1
elif list_type is T2:
self._t2_len -= 1
else:
# XXX: How should items in B1/B2 be handled, seeing as their
# values aren't actually cached? Attempting to delete them sounds
# like an error to me.
raise KeyError(key)
_ll_move(link)
del self._cache[key]
def __contains__(self, key):
try:
link = self._cache[key]
except KeyError:
return False
if link[LINK_LIST_TYPE] not in (T1, T2):
# XXX: See above.
return False
return True
@property
def size(self):
return self._t1_len + self._t2_len
@property
def t1(self): return _ll_iter_keys(self._t1)
@property
def t2(self): return _ll_iter_keys(self._t2)
@property
def b1(self): return _ll_iter_keys(self._b1)
@property
def b2(self): return _ll_iter_keys(self._b2)
@property
def p(self): return self._p
def __iter__(self):
yield from self.t2
yield from self.t1