Skip to content

Commit

Permalink
investigating cuckoo problems...
Browse files Browse the repository at this point in the history
  • Loading branch information
folkvir committed May 17, 2024
1 parent 937f80e commit 43e2f72
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 27 deletions.
33 changes: 17 additions & 16 deletions src/cuckoo-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@ import {
import { HashableInput } from './types.js'

/**
* Compute the optimal fingerprint length in bytes for a given bucket size
* Compute the optimal fingerprint length in bits for a given bucket size
* and a false positive rate.
* @param {int} size - The filter bucket size
* @param {int} rate - The error rate, i.e. 'false positive' rate, targetted by the filter
* @return {int} The optimal fingerprint length in bytes
* @return {int} The optimal fingerprint length in bits
* @private
*/
function computeFingerpintLength(size: number, rate: number): number {
const f = Math.ceil(Math.log2(1 / rate) + Math.log2(2 * size))
return Math.ceil(f / 8) // because we use 64-bits hashes
return Math.ceil(Math.log2(1 / rate) + Math.log2(2 * size))
}

export interface ExportedCuckooFilter {
Expand Down Expand Up @@ -95,18 +94,21 @@ export default class CuckooFilter extends BaseFilter implements WritableFilter<H
* Build a new optimal CuckooFilter from an iterable with a fixed error rate
* @param items - Iterable used to populate the filter
* @param errorRate - The error rate of the filter
* @param bucketSize - The number of buckets desired per cell
* @param maxKicks - The number of kicks done when a collision occurs
* @param bucketSize - The number of buckets desired per cell
* @param maxKicks - The number of kicks done when a collision occurs
* @param seed - (optional) the seed to use
* @return A new Cuckoo Filter filled with the iterable's elements
*/
public static from(
items: Iterable<HashableInput>,
errorRate: number,
bucketSize = 4,
maxKicks = 500,
seed?: bigint,
): CuckooFilter {
const array = Array.from(items)
const filter = CuckooFilter.create(array.length, errorRate, bucketSize, maxKicks)
if (seed) filter.seed = seed
array.forEach(item => filter.add(item))
return filter
}
Expand Down Expand Up @@ -298,21 +300,20 @@ export default class CuckooFilter extends BaseFilter implements WritableFilter<H
* @private
*/
public _locations(element: HashableInput) {
const hashes = this._hashing.hashIntAndString(element, this.seed)
const hash = hashes.int
if (this._fingerprintLength > hashes.string.length) {
const hash = this.hash(element)
const hashstr = hash.toString(2).padStart(64, '0')
if (this._fingerprintLength > hashstr.length) {
throw new Error(
`The fingerprint length (${this._fingerprintLength.toString()}) is higher than the hash length (${hashes.string.length.toString()}). Please reduce the fingerprint length or report if it is an unexpected behavior.`,
`The fingerprint length (${this._fingerprintLength.toString()}) is higher than the hash length (${hashstr.length.toString()}). Please reduce the fingerprint length or report if it is an unexpected behavior.`,
)
}
const fingerprint = hashes.string.substring(0, this._fingerprintLength)
const firstIndex = getBigIntAbs(hash)
const secondHash = getBigIntAbs(this.hash(fingerprint))
const secondIndex = firstIndex ^ secondHash
const fingerprint = hashstr.substring(63 - this._fingerprintLength)
const firstIndex = hash % BigInt(this._size)
const secondIndex = (firstIndex ^ this.hash(fingerprint)) % BigInt(this._size)
const res = {
fingerprint,
firstIndex: bigIntToNumber(firstIndex % BigInt(this._size)),
secondIndex: bigIntToNumber(secondIndex % BigInt(this._size)),
firstIndex: Number(BigInt.asUintN(32, firstIndex)),
secondIndex: Number(BigInt.asUintN(32, secondIndex)),
}
return res
}
Expand Down
45 changes: 34 additions & 11 deletions tests/cuckoo-filter.test.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
import { expect, test } from '@jest/globals'
import { CuckooFilter, ExportedCuckooFilter, getBigIntAbs } from '../src/index'
import { CuckooFilter, ExportedCuckooFilter, getBigIntAbs, randomInt } from '../src/index'

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

'randomInt' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

'getBigIntAbs' is defined but never used

Check failure on line 2 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

'randomInt' is defined but never used

// const seed = BigInt(randomInt(0, Number.MAX_SAFE_INTEGER))
// const seed = 8959374062914912n
const seed = 7409732628718466n
console.log(seed)

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 7 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement

test('should compute the fingerprint and indexes for an element', () => {
const filter = new CuckooFilter(15, 3, 2, 1)
filter.seed = seed
const element = 'foo'
const hashes = filter._hashing.hashIntAndString(element, filter.seed)
const hash = hashes.int
const fingerprint = hashes.string.substring(0, 3)
const hash = filter._hashing._lib.xxh64(element, filter.seed)
const fingerprint = hash.toString(2).padStart(64, '0').substring(63 - 3)

const firstIndex = getBigIntAbs(hash)
const secondIndex = firstIndex ^ getBigIntAbs(filter.hash(fingerprint))
const firstIndex = hash % BigInt(filter.size)
const secondIndex = (firstIndex ^ filter.hash(fingerprint)) % BigInt(filter.size)

const locations = filter._locations(element)
expect(fingerprint).toEqual(locations.fingerprint)
expect(Number(firstIndex % BigInt(filter.size))).toEqual(locations.firstIndex)
expect(Number(secondIndex % BigInt(filter.size))).toEqual(locations.secondIndex)
expect(Number(firstIndex)).toEqual(locations.firstIndex)
expect(Number(secondIndex)).toEqual(locations.secondIndex)
})

test('should add element to the filter with #add', () => {
const filter = CuckooFilter.create(15, 0.01)
filter.seed = seed
let nbElements = 0
filter.add('alice')
filter.add('bob')
Expand All @@ -31,6 +37,7 @@ test('should add element to the filter with #add', () => {

test('should store ane element accross two different buckets', () => {
const filter = CuckooFilter.create(15, 0.01, 2)
filter.seed = seed
const element = 'foo'
let nbElements = 0

Expand All @@ -52,6 +59,7 @@ test('should store ane element accross two different buckets', () => {

test('should perform random kicks when both buckets are full', () => {
const filter = new CuckooFilter(15, 3, 1, 1)
filter.seed = seed
const element = 'foo'
let nbElements = 0
const locations = filter._locations(element)
Expand All @@ -73,13 +81,15 @@ test('should perform random kicks when both buckets are full', () => {

test("should reject elements that can't be inserted when filter is full", () => {
const filter = new CuckooFilter(1, 3, 1)
filter.seed = seed
const element = 'foo'
filter.add(element)
expect(filter.add(element, false, true)).toBe(false)
})

test('should not rollback to its initial state in case the filter is full with option add(x, false, true)', () => {
const filter = new CuckooFilter(10, 3, 1)
filter.seed = seed
expect(filter.add('a')).toBe(true)
expect(filter.add('b')).toBe(true)
expect(filter.add('c')).toBe(true)
Expand All @@ -103,6 +113,7 @@ test('should not rollback to its initial state in case the filter is full with o

test('should rollback to its initial state in case the filter is full', () => {
const filter = new CuckooFilter(10, 3, 1)
filter.seed = seed
expect(filter.add('a')).toBe(true)
expect(filter.add('b')).toBe(true)
expect(filter.add('c')).toBe(true)
Expand All @@ -121,6 +132,7 @@ test('should rollback to its initial state in case the filter is full', () => {

test('should remove exisiting elements from the filter', () => {
const filter = new CuckooFilter(15, 3, 1)
filter.seed = seed
const element = 'foo'
const locations = filter._locations(element)

Expand All @@ -131,45 +143,54 @@ test('should remove exisiting elements from the filter', () => {

test('should look inside every possible bucket', () => {
const filter = new CuckooFilter(15, 3, 1)
filter.seed = seed
const element = 'foo'
const locations = filter._locations(element)

filter.add(element)
console.log(filter)
filter.add(element)
console.log(filter)
expect(filter.remove(element)).toBe(true)

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 154 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement
expect(filter._filter[locations.firstIndex].length).toEqual(0)
console.log(filter)

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 156 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement
expect(filter.remove(element)).toBe(true)
expect(filter._filter[locations.secondIndex].length).toEqual(0)
})

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (22)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (lts/*)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (20)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (19)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (18)

Unexpected console statement

Check failure on line 159 in tests/cuckoo-filter.test.ts

View workflow job for this annotation

GitHub Actions / ubuntu_build (21)

Unexpected console statement

test('should fail to remove elements that are not in the filter', () => {
const filter = new CuckooFilter(15, 3, 1)
filter.seed = seed
filter.add('foo')
expect(filter.remove('moo')).toBe(false)
})

test('should return True when an element may be in the filter', () => {
const filter = new CuckooFilter(15, 3, 1)
filter.seed = seed
filter.add('foo')
expect(filter.has('foo')).toBe(true)
})

test('should return False when an element is definitively not in the filter', () => {
const filter = new CuckooFilter(15, 3, 1)
filter.seed = seed
filter.add('foo')
expect(filter.has('moo')).toBe(false)
})

test('should look inside every possible bucket', () => {
const filter = new CuckooFilter(15, 3, 1)
filter.add('foo')
filter.add('foo')
filter.seed = seed
expect(filter.add('foo')).toBe(true)
expect(filter.add('foo')).toBe(true)
filter.remove('foo')
expect(filter.has('foo')).toBe(true)
})

test('issue#(https://github.com/Callidon/bloom-filters/issues/9)', () => {
const filter = CuckooFilter.create(15, 0.01)
filter.seed = seed
filter.add('alice')
filter.add('andrew')
filter.add('bob')
Expand All @@ -194,6 +215,7 @@ test('issue#(https://github.com/Callidon/bloom-filters/issues/9)', () => {

function buildCuckooFilter() {
const filter = new CuckooFilter(15, 3, 2)
filter.seed = seed
filter.add('alice')
filter.add('bob')
return filter
Expand Down Expand Up @@ -226,6 +248,7 @@ const rate = 0.000000000000000001
const bucketSize = 1
test(`should not return an error when inserting and asking for ${max.toString()} elements, rate = ${rate.toString()}; bucketSize = ${bucketSize.toString()};`, () => {
const filter = CuckooFilter.create(max, rate, bucketSize, 500)
filter.seed = seed
for (let i = 0; i < max; i++) {
expect(filter.add(i.toString())).toBe(true)
}
Expand Down Expand Up @@ -268,7 +291,7 @@ test('issue#(https://github.com/Callidon/bloom-filters/issues/68)', () => {
const round = 100000
let c_false = 0

const filter = CuckooFilter.from(items, errorRate)
const filter = CuckooFilter.from(items, errorRate, undefined, undefined, 2141419401098886n)
for (let i = 0; i < round; i++) {
let val = filter.has('https://www.youtube.com/watch?v=HJjxN05ewEc')
if (!val) {
Expand Down

0 comments on commit 43e2f72

Please sign in to comment.