Skip to content

Commit

Permalink
fixup
Browse files Browse the repository at this point in the history
  • Loading branch information
timotheecour committed Mar 19, 2020
1 parent 62e8ea3 commit ae94905
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 60 deletions.
31 changes: 22 additions & 9 deletions lib/pure/collections/hashcommon.nim
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,33 @@ template getPerturb(t: typed, hc: Hash): UHash =
template findCell(t: typed, hc, mustNextTry): int =
let m = maxHash(t)
var index: Hash = hc and m
var perturb: UHash = getPerturb(t, hc)
var perturb = getPerturb(t, hc)
var depth = 0
const depthThres = 20 # PARAM

## PARAM: this param can affect performance and could be exposed to users whoe
## need to optimize for their specific key distribution. If clusters are to be
## expected, it's better to set it low; for really random data, it's better to
## set it high. We pick a sensible default that works across a range of key
## distributions.
##
## depthThres=0 will just use pseudorandom probing
## depthThres=int.high will just use linear probing
## depthThres in between will switch
const depthThres = 20

while mustNextTry(t.data[index], index):
depth.inc
if depth <= depthThres:
## linear probing, cache friendly
index = (index + 1) and m
else:
## pseudorandom probing, "bad" case was detected
index = nextTry(index, m, perturb)
index

template rawGetKnownHCImpl() {.dirty.} =
if t.dataLen == 0: return -1
if t.dataLen == 0:
return -1
var deletedIndex = -1
template mustNextTry(cell, index): bool =
if isFilledAndValid(cell.hcode):
Expand All @@ -97,15 +111,14 @@ template rawGetKnownHCImpl() {.dirty.} =
# just a few clock cycles, generally worth it for any non-integer-like A.
# performance: we optimize this: depending on type(key), skip hc comparison
if cell.hcode == hc and cell.key == key:
deletedIndex = -2
deletedIndex = -2 # found
false
else:
true
else: true # keep going
elif cell.hcode == deletedMarker:
if deletedIndex == -1:
deletedIndex = index
true
else: false
deletedIndex = index # remember 1st tombstone found before continuing
true # keep going
else: false # not found

let index = findCell(t, hc, mustNextTry)
if deletedIndex == -2:
Expand Down
6 changes: 2 additions & 4 deletions lib/pure/collections/sharedtables.nim
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
for i in 0..<oldSize:
let eh = n[i].hcode
if isFilled(eh):
var perturb = t.getPerturb(eh)
var j: Hash = eh and maxHash(t)
while isFilled(t.data[j].hcode):
j = nextTry(j, maxHash(t), perturb)
template mustNextTry(cell, index): bool = isFilled(cell.hcode)
let j = findCell(t, eh, mustNextTry)
rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
deallocShared(n)

Expand Down
14 changes: 4 additions & 10 deletions lib/pure/collections/tableimpl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,9 @@ include hashcommon

template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add
genHashImpl(key, hc)
var h: Hash = hc and maxHash(t)
var perturb = t.getPerturb(hc)
while true:
let hcode = t.data[h].hcode
if hcode == deletedMarker or hcode == freeMarker:
break
else:
h = nextTry(h, maxHash(t), perturb)
result = h
template mustNextTry(cell, index): bool = cell.hcode != deletedMarker and cell.hcode != freeMarker
result = findCell(t, hc, mustNextTry)


template rawInsertImpl(t) {.dirty.} =
data[h].key = key
Expand Down Expand Up @@ -128,7 +122,7 @@ template initImpl(result: typed, size: int) =
template insertImpl() = # for CountTable
checkIfInitialized()
if mustRehash(t): enlarge(t)
ctRawInsert(t, t.data, key, val)
ctRawInsert(t, key, val)
inc(t.counter)

template getOrDefaultImpl(t, key): untyped =
Expand Down
76 changes: 39 additions & 37 deletions lib/pure/collections/tables.nim
Original file line number Diff line number Diff line change
Expand Up @@ -782,18 +782,17 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30]

let hc = genHash(key)
var h: Hash = hc and high(t.data)
let L = len(t)
var perturb = t.getPerturb(hc)

var num = 0
var cache: seq[Hash]
while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
if t.data[h].hcode == hc and t.data[h].key == key:
if not hasKeyOrPutCache(cache, h):
yield t.data[h].val
assert(len(t) == L, "the length of the table changed while iterating over it")
h = nextTry(h, high(t.data), perturb)
template mustNextTry(cell, index): bool =
if isFilled(cell.hcode):
if cell.hcode == hc and cell.key == key:
if not hasKeyOrPutCache(cache, index):
yield cell.val
assert(len(t) == L, "the length of the table changed while iterating over it")
true
else: false
let index = findCell(t, hc, mustNextTry)



Expand Down Expand Up @@ -1268,10 +1267,8 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) =
var nxt = n[h].next
let eh = n[h].hcode
if isFilledAndValid(eh):
var j: Hash = eh and maxHash(t)
var perturb = t.getPerturb(eh)
while isFilled(t.data[j].hcode):
j = nextTry(j, maxHash(t), perturb)
template mustNextTry(cell, index): bool = isFilled(cell.hcode)
let j = findCell(t, eh, mustNextTry)
rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j)
h = nxt

Expand Down Expand Up @@ -2228,6 +2225,10 @@ type
data: seq[tuple[key: A, val: int]]
counter: int
countDeleted: int
# using this and creating tombstones as in `Table` would make `remove`
# amortized O(1) instead of O(n); this requires updating remove,
# and changing `val != 0` to what's used in Table. Or simply make CountTable
# a thin wrapper around Table (which would also enable `dec`, etc)
isSorted: bool
CountTableRef*[A] = ref CountTable[A] ## Ref version of
## `CountTable<#CountTable>`_.
Expand All @@ -2238,45 +2239,46 @@ type

# ------------------------------ helpers ---------------------------------

proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
key: A, val: int) =
let hc = hash(key)
var perturb = t.getPerturb(hc)
var h: Hash = hc and high(data)
while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
data[h].key = key
data[h].val = val
proc ctRawInsert[A](t: var CountTable[A], key: A, val: int) =
let hc = genHash(key)
template mustNextTry(cell, index): bool = cell.val != 0
let h = findCell(t, hc, mustNextTry)
t.data[h].key = key
t.data[h].val = val

proc enlarge[A](t: var CountTable[A]) =
var n: seq[tuple[key: A, val: int]]
newSeq(n, len(t.data) * growthFactor)
for i in countup(0, high(t.data)):
if t.data[i].val != 0: ctRawInsert(t, n, move t.data[i].key, move t.data[i].val)
swap(t.data, n)
for i in countup(0, high(n)):
if n[i].val != 0: ctRawInsert(t, move n[i].key, move n[i].val)

proc remove[A](t: var CountTable[A], key: A) =
var n: seq[tuple[key: A, val: int]]
newSeq(n, len(t.data))
swap(t.data, n)
var removed: bool
for i in countup(0, high(t.data)):
if t.data[i].val != 0:
if t.data[i].key != key:
ctRawInsert(t, n, move t.data[i].key, move t.data[i].val)
for i in countup(0, high(n)):
if n[i].val != 0:
if n[i].key != key:
ctRawInsert(t, move n[i].key, move n[i].val)
else:
removed = true
swap(t.data, n)
if removed: dec(t.counter)

proc rawGet[A](t: CountTable[A], key: A): int =
if t.data.len == 0:
return -1
let hc = hash(key)
var perturb = t.getPerturb(hc)
var h: Hash = hc and high(t.data) # start with real hash value
while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
if t.data[h].key == key: return h
h = nextTry(h, high(t.data), perturb)
result = -1 - h # < 0 => MISSING; insert idx = -1 - result
var found = false
template mustNextTry(cell, index): bool =
if cell.val == 0: false # not found
elif cell.key != key: true # keep going
else:
found = true
false
let hc = genHash(key)
let h = findCell(t, hc, mustNextTry)
result = if found: h else: -1 - h # < 0 => MISSING; insert idx = -1 - result

template ctget(t, key, default: untyped): untyped =
var index = rawGet(t, key)
Expand Down Expand Up @@ -2358,7 +2360,7 @@ proc inc*[A](t: var CountTable[A], key: A, val: Positive = 1) =
var index = rawGet(t, key)
if index >= 0:
inc(t.data[index].val, val)
if t.data[index].val == 0: dec(t.counter)
if t.data[index].val == 0: dec(t.counter) # how could this happen?
else:
insertImpl()

Expand Down
34 changes: 34 additions & 0 deletions tests/collections/ttables.nim
Original file line number Diff line number Diff line change
Expand Up @@ -436,3 +436,37 @@ block: # https://github.com/nim-lang/Nim/issues/13496
testDel(): (let t = newOrderedTable[int, int]())
testDel(): (var t: CountTable[int])
testDel(): (let t = newCountTable[int]())

block:
# check CountTable using Table as groundtruth
proc main() =
var t: CountTable[string]
var t0: Table[string, int]
let n = 1000
let n1 = n div 2
template inc2(key: string, val = 1) =
t.inc(key, val)
if key in t0: t0[key]+=val else: t0[key]=val

template del2(key) =
t.del(key)
t0.del(key)

template check() =
for k,v in t:
doAssert t0[k] == v
for k,v in t0:
doAssert t[k] == v

let m = 2
for j in 0..<m:
for i in 0..<n: inc2($i)
check()

for i in 0..<n1: del2($i)
check()

for i in 0..<n: inc2($i)
check()

main()
14 changes: 14 additions & 0 deletions tests/stdlib/tsharedtable.nim
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,17 @@ block: # we use Table as groundtruth, it's well tested elsewhere
var t0: Table[int, int]
testDel(t, t0)
deinitSharedTable(t)

block: # CHECKME:redundant w above?
let n = 100
let n2 = n * 2
for i in 0..<n:
table[i] = i
assert table.hasKeyOrPut(i, i)

for i in n..<n2:
assert not table.hasKeyOrPut(i, i)

for i in 0..<n2:
assert table.mgetOrPut(i, i) == i
assert table.mget(i) == i

0 comments on commit ae94905

Please sign in to comment.