-
Notifications
You must be signed in to change notification settings - Fork 0
/
stefankoegl_kdtree.py
718 lines (497 loc) · 20.2 KB
/
stefankoegl_kdtree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
# -*- coding: utf-8 -*-
"""A Python implemntation of a kd-tree
This package provides a simple implementation of a kd-tree in Python.
https://en.wikipedia.org/wiki/K-d_tree
"""
from __future__ import print_function
import operator
import math
from collections import deque
from functools import wraps
__author__ = u'Stefan Kögl <stefan@skoegl.net>'
__version__ = '0.12'
__website__ = 'https://github.com/stefankoegl/kdtree'
__license__ = 'ISC license'
# maps child position to its comparison operator
COMPARE_CHILD = {
0: (operator.le, operator.sub),
1: (operator.ge, operator.add),
}
class Node(object):
""" A Node in a kd-tree
A tree is represented by its root node, and every node represents
its subtree"""
def __init__(self, data=None, left=None, right=None):
self.data = data
self.left = left
self.right = right
@property
def is_leaf(self):
""" Returns True if a Node has no subnodes
>>> Node().is_leaf
True
>>> Node( 1, left=Node(2) ).is_leaf
False
"""
return (not self.data) or \
(all(not bool(c) for c, p in self.children))
def preorder(self):
""" iterator for nodes: root, left, right """
if not self:
return
yield self
if self.left:
for x in self.left.preorder():
yield x
if self.right:
for x in self.right.preorder():
yield x
def inorder(self):
""" iterator for nodes: left, root, right """
if not self:
return
if self.left:
for x in self.left.inorder():
yield x
yield self
if self.right:
for x in self.right.inorder():
yield x
def postorder(self):
""" iterator for nodes: left, right, root """
if not self:
return
if self.left:
for x in self.left.postorder():
yield x
if self.right:
for x in self.right.postorder():
yield x
yield self
@property
def children(self):
"""
Returns an iterator for the non-empty children of the Node
The children are returned as (Node, pos) tuples where pos is 0 for the
left subnode and 1 for the right.
>>> len(list(create(dimensions=2).children))
0
>>> len(list(create([ (1, 2) ]).children))
0
>>> len(list(create([ (2, 2), (2, 1), (2, 3) ]).children))
2
"""
if self.left and self.left.data is not None:
yield self.left, 0
if self.right and self.right.data is not None:
yield self.right, 1
def set_child(self, index, child):
""" Sets one of the node's children
index 0 refers to the left, 1 to the right child """
if index == 0:
self.left = child
else:
self.right = child
def height(self):
"""
Returns height of the (sub)tree, without considering
empty leaf-nodes
>>> create(dimensions=2).height()
0
>>> create([ (1, 2) ]).height()
1
>>> create([ (1, 2), (2, 3) ]).height()
2
"""
min_height = int(bool(self))
return max([min_height] + [c.height()+1 for c, p in self.children])
def get_child_pos(self, child):
""" Returns the position if the given child
If the given node is the left child, 0 is returned. If its the right
child, 1 is returned. Otherwise None """
for c, pos in self.children:
if child == c:
return pos
def __repr__(self):
return '<%(cls)s - %(data)s>' % \
dict(cls=self.__class__.__name__, data=repr(self.data))
def __nonzero__(self):
return self.data is not None
__bool__ = __nonzero__
def __eq__(self, other):
if isinstance(other, tuple):
return self.data == other
else:
return self.data == other.data
def __hash__(self):
return id(self)
def require_axis(f):
""" Check if the object of the function has axis and sel_axis members """
@wraps(f)
def _wrapper(self, *args, **kwargs):
if None in (self.axis, self.sel_axis):
raise ValueError('%(func_name) requires the node %(node)s '
'to have an axis and a sel_axis function' %
dict(func_name=f.__name__, node=repr(self)))
return f(self, *args, **kwargs)
return _wrapper
class KDNode(Node):
""" A Node that contains kd-tree specific data and methods """
def __init__(self, data=None, left=None, right=None, axis=None,
sel_axis=None, dimensions=None):
""" Creates a new node for a kd-tree
If the node will be used within a tree, the axis and the sel_axis
function should be supplied.
sel_axis(axis) is used when creating subnodes of the current node. It
receives the axis of the parent node and returns the axis of the child
node. """
super(KDNode, self).__init__(data, left, right)
self.axis = axis
self.sel_axis = sel_axis
self.dimensions = dimensions
@require_axis
def add(self, point):
"""
Adds a point to the current node or iteratively
descends to one of its children.
Users should call add() only to the topmost tree.
"""
current = self
while True:
check_dimensionality([point], dimensions=current.dimensions)
# Adding has hit an empty leaf-node, add here
if current.data is None:
current.data = point
return current
# split on self.axis, recurse either left or right
if point[current.axis] < current.data[current.axis]:
if current.left is None:
current.left = current.create_subnode(point)
return current.left
else:
current = current.left
#self.left.add(point)
else:
if current.right is None:
current.right = current.create_subnode(point)
return current.right
else:
current = current.right
@require_axis
def create_subnode(self, data):
""" Creates a subnode for the current node """
return self.__class__(data,
axis=self.sel_axis(self.axis),
sel_axis=self.sel_axis,
dimensions=self.dimensions)
@require_axis
def find_replacement(self):
""" Finds a replacement for the current node
The replacement is returned as a
(replacement-node, replacements-parent-node) tuple """
if self.right:
child, parent = self.right.extreme_child(min, self.axis)
else:
child, parent = self.left.extreme_child(max, self.axis)
return (child, parent if parent is not None else self)
def should_remove(self, point, node):
""" checks if self's point (and maybe identity) matches """
if not self.data == point:
return False
return (node is None) or (node is self)
@require_axis
def remove(self, point, node=None):
""" Removes the node with the given point from the tree
Returns the new root node of the (sub)tree.
If there are multiple points matching "point", only one is removed. The
optional "node" parameter is used for checking the identity, once the
removeal candidate is decided."""
# Recursion has reached an empty leaf node, nothing here to delete
if not self:
return
# Recursion has reached the node to be deleted
if self.should_remove(point, node):
return self._remove(point)
# Remove direct subnode
if self.left and self.left.should_remove(point, node):
self.left = self.left._remove(point)
elif self.right and self.right.should_remove(point, node):
self.right = self.right._remove(point)
# Recurse to subtrees
if point[self.axis] <= self.data[self.axis]:
if self.left:
self.left = self.left.remove(point, node)
if point[self.axis] >= self.data[self.axis]:
if self.right:
self.right = self.right.remove(point, node)
return self
@require_axis
def _remove(self, point):
# we have reached the node to be deleted here
# deleting a leaf node is trivial
if self.is_leaf:
self.data = None
return self
# we have to delete a non-leaf node here
# find a replacement for the node (will be the new subtree-root)
root, max_p = self.find_replacement()
# self and root swap positions
tmp_l, tmp_r = self.left, self.right
self.left, self.right = root.left, root.right
root.left, root.right = tmp_l if tmp_l is not root else self, tmp_r if tmp_r is not root else self
self.axis, root.axis = root.axis, self.axis
# Special-case if we have not chosen a direct child as the replacement
if max_p is not self:
pos = max_p.get_child_pos(root)
max_p.set_child(pos, self)
max_p.remove(point, self)
else:
root.remove(point, self)
return root
@property
def is_balanced(self):
""" Returns True if the (sub)tree is balanced
The tree is balanced if the heights of both subtrees differ at most by
1 """
left_height = self.left.height() if self.left else 0
right_height = self.right.height() if self.right else 0
if abs(left_height - right_height) > 1:
return False
return all(c.is_balanced for c, _ in self.children)
def rebalance(self):
"""
Returns the (possibly new) root of the rebalanced tree
"""
return create([x.data for x in self.inorder()])
def axis_dist(self, point, axis):
"""
Squared distance at the given axis between
the current Node and the given point
"""
return math.pow(self.data[axis] - point[axis], 2)
def dist(self, point):
"""
Squared distance between the current Node
and the given point
"""
r = range(len(self.data))
return sum([self.axis_dist(point, i) for i in r])
def search_knn(self, point, k, dist=None):
""" Return the k nearest neighbors of point and their distances
point must be an actual point, not a node.
k is the number of results to return. The actual results can be less
(if there aren't more nodes to return) or more in case of equal
distances.
dist is a distance function, expecting two points and returning a
distance value. Distance values can be any compareable type.
The result is an ordered list of (node, distance) tuples.
"""
prev = None
current = self
if dist is None:
get_dist = lambda n: n.dist(point)
else:
get_dist = lambda n: dist(n.data, point)
# the nodes do not keep a reference to their parents
parents = {current: None}
# go down the tree as we would for inserting
while current:
if point[current.axis] < current.data[current.axis]:
# left side
parents[current.left] = current
prev = current
current = current.left
else:
# right side
parents[current.right] = current
prev = current
current = current.right
if not prev:
return []
examined = set()
results = {}
# Go up the tree, looking for better solutions
current = prev
while current:
# search node and update results
current._search_node(point, k, results, examined, get_dist)
current = parents[current]
BY_VALUE = lambda kv: kv[1]
return sorted(results.items(), key=BY_VALUE)
def _search_node(self, point, k, results, examined, get_dist):
examined.add(self)
# get current best
if not results:
bestNode = None
bestDist = float('inf')
else:
bestNode, bestDist = sorted(results.items(), key=lambda n_d: n_d[1], reverse=True)[0]
nodesChanged = False
# If the current node is closer than the current best, then it
# becomes the current best.
nodeDist = get_dist(self)
if nodeDist < bestDist:
if len(results) == k and bestNode:
results.pop(bestNode)
results[self] = nodeDist
nodesChanged = True
# if we're equal to the current best, add it, regardless of k
elif nodeDist == bestDist:
results[self] = nodeDist
nodesChanged = True
# if we don't have k results yet, add it anyway
elif len(results) < k:
results[self] = nodeDist
nodesChanged = True
# get new best only if nodes have changed
if nodesChanged:
bestNode, bestDist = next(iter(
sorted(results.items(), key=lambda n: n[1], reverse=True)
))
# Check whether there could be any points on the other side of the
# splitting plane that are closer to the search point than the current
# best.
for child, pos in self.children:
if child in examined:
continue
examined.add(child)
compare, combine = COMPARE_CHILD[pos]
# Since the hyperplanes are all axis-aligned this is implemented
# as a simple comparison to see whether the difference between the
# splitting coordinate of the search point and current node is less
# than the distance (overall coordinates) from the search point to
# the current best.
nodePoint = self.data[self.axis]
pointPlusDist = combine(point[self.axis], bestDist)
lineIntersects = compare(pointPlusDist, nodePoint)
# If the hypersphere crosses the plane, there could be nearer
# points on the other side of the plane, so the algorithm must move
# down the other branch of the tree from the current node looking
# for closer points, following the same recursive process as the
# entire search.
if lineIntersects:
child._search_node(point, k, results, examined, get_dist)
@require_axis
def search_nn(self, point, dist=None):
"""
Search the nearest node of the given point
point must be an actual point, not a node. The nearest node to the
point is returned. If a location of an actual node is used, the Node
with this location will be returned (not its neighbor).
dist is a distance function, expecting two points and returning a
distance value. Distance values can be any compareable type.
The result is a (node, distance) tuple.
"""
return next(iter(self.search_knn(point, 1, dist)), None)
@require_axis
def search_nn_dist(self, point, distance, best=None):
"""
Search the n nearest nodes of the given point which are within given
distance
point must be a location, not a node. A list containing the n nearest
nodes to the point within the distance will be returned.
"""
if best is None:
best = []
# consider the current node
if self.dist(point) < distance:
best.append(self)
# sort the children, nearer one first (is this really necessairy?)
children = sorted(self.children, key=lambda c_p1: c_p1[0].dist(point))
for child, p in children:
# check if child node needs to be recursed
if self.axis_dist(point, self.axis) < math.pow(distance, 2):
child.search_nn_dist(point, distance, best)
return best
@require_axis
def is_valid(self):
""" Checks recursively if the tree is valid
It is valid if each node splits correctly """
if not self:
return True
if self.left and self.data[self.axis] < self.left.data[self.axis]:
return False
if self.right and self.data[self.axis] > self.right.data[self.axis]:
return False
return all(c.is_valid() for c, _ in self.children) or self.is_leaf
def extreme_child(self, sel_func, axis):
""" Returns a child of the subtree and its parent
The child is selected by sel_func which is either min or max
(or a different function with similar semantics). """
max_key = lambda child_parent: child_parent[0].data[axis]
# we don't know our parent, so we include None
me = [(self, None)] if self else []
child_max = [c.extreme_child(sel_func, axis) for c, _ in self.children]
# insert self for unknown parents
child_max = [(c, p if p is not None else self) for c, p in child_max]
candidates = me + child_max
if not candidates:
return None, None
return sel_func(candidates, key=max_key)
def create(point_list=None, dimensions=None, axis=0, sel_axis=None):
""" Creates a kd-tree from a list of points
All points in the list must be of the same dimensionality.
If no point_list is given, an empty tree is created. The number of
dimensions has to be given instead.
If both a point_list and dimensions are given, the numbers must agree.
Axis is the axis on which the root-node should split.
sel_axis(axis) is used when creating subnodes of a node. It receives the
axis of the parent node and returns the axis of the child node. """
if not point_list and not dimensions:
raise ValueError('either point_list or dimensions must be provided')
elif point_list:
dimensions = check_dimensionality(point_list, dimensions)
# by default cycle through the axis
sel_axis = sel_axis or (lambda prev_axis: (prev_axis+1) % dimensions)
if not point_list:
return KDNode(sel_axis=sel_axis, axis=axis, dimensions=dimensions)
# Sort point list and choose median as pivot element
point_list.sort(key=lambda point: point[axis])
median = len(point_list) // 2
loc = point_list[median]
left = create(point_list[:median], dimensions, sel_axis(axis))
right = create(point_list[median + 1:], dimensions, sel_axis(axis))
return KDNode(loc, left, right, axis=axis, sel_axis=sel_axis)
def check_dimensionality(point_list, dimensions=None):
dimensions = dimensions or len(point_list[0])
for p in point_list:
if len(p) != dimensions:
raise ValueError('All Points in the point_list must have the same dimensionality')
return dimensions
def level_order(tree, include_all=False):
""" Returns an iterator over the tree in level-order
If include_all is set to True, empty parts of the tree are filled
with dummy entries and the iterator becomes infinite. """
q = deque()
q.append(tree)
while q:
node = q.popleft()
yield node
if include_all or node.left:
q.append(node.left or node.__class__())
if include_all or node.right:
q.append(node.right or node.__class__())
def visualize(tree, max_level=100, node_width=10, left_padding=5):
""" Prints the tree to stdout """
height = min(max_level, tree.height()-1)
max_width = pow(2, height)
per_level = 1
in_level = 0
level = 0
for node in level_order(tree, include_all=True):
if in_level == 0:
print()
print()
print(' '*left_padding, end=' ')
width = int(max_width*node_width/per_level)
node_str = (str(node.data) if node else '').center(width)
print(node_str, end=' ')
in_level += 1
if in_level == per_level:
in_level = 0
per_level *= 2
level += 1
if level > height:
break
print()
print()