-
-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: Improve word lookup performance for FastTrieBlob and TrieBlob (#…
- Loading branch information
Showing
43 changed files
with
1,282 additions
and
235 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import assert from 'node:assert'; | ||
|
||
import { buildITrieFromWords } from 'cspell-trie-lib'; | ||
import { loremIpsum } from 'lorem-ipsum'; | ||
import { suite } from 'perf-insight'; | ||
|
||
import { createSpellingDictionary } from '../SpellingDictionary/createSpellingDictionary.js'; | ||
|
||
suite('dictionary has', async (test) => { | ||
const words = genWords(10_000); | ||
|
||
const iTrie = buildITrieFromWords(words); | ||
const dict = createSpellingDictionary(words, 'test', import.meta.url); | ||
|
||
test('dictionary has 100k words', () => { | ||
checkWords(dict, words); | ||
}); | ||
|
||
test('dictionary has 100k words (2nd time)', () => { | ||
checkWords(dict, words); | ||
}); | ||
|
||
test('iTrie has 100k words', () => { | ||
checkWords(iTrie, words); | ||
}); | ||
|
||
test('iTrie.hasWord has 100k words', () => { | ||
const dict = { has: (word: string) => iTrie.hasWord(word, true) }; | ||
checkWords(dict, words); | ||
}); | ||
|
||
test('iTrie.data has 100k words', () => { | ||
checkWords(iTrie.data, words); | ||
}); | ||
}); | ||
|
||
function checkWords(dict: { has: (word: string) => boolean }, words: string[], totalChecks = 100_000) { | ||
let has = true; | ||
const len = words.length; | ||
for (let i = 0; i < totalChecks; ++i) { | ||
const word = words[i % len]; | ||
has = dict.has(word) && has; | ||
} | ||
assert(has, 'All words should be found in the dictionary'); | ||
} | ||
|
||
function genWords(count: number): string[] { | ||
const setOfWords = new Set(loremIpsum({ count }).split(' ')); | ||
|
||
while (setOfWords.size < count) { | ||
const words = [...setOfWords]; | ||
for (const a of words) { | ||
for (const b of words) { | ||
if (a !== b) { | ||
setOfWords.add(a + b); | ||
} | ||
if (setOfWords.size >= count) { | ||
break; | ||
} | ||
} | ||
if (setOfWords.size >= count) { | ||
break; | ||
} | ||
} | ||
} | ||
|
||
return [...setOfWords]; | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,9 @@ | ||
{ | ||
"files": [], | ||
"references": [{ "path": "./tsconfig.esm.json" }] | ||
"extends": "../../tsconfig.esm.json", | ||
"compilerOptions": { | ||
"rootDir": "src", | ||
"outDir": "dist", | ||
"types": ["node"] | ||
}, | ||
"include": ["src"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
exports[`Pipe API > pipe api 1`] = ` | ||
[ | ||
"fork", | ||
"helpers", | ||
"interleave", | ||
"isAsyncIterable", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import { describe, expect, test } from 'vitest'; | ||
|
||
import { opTakeSync } from '../operators/take.js'; | ||
import { generatorTestWrapper, makeIterableTestWrapperOptions } from '../test/iterableTestWrapper.js'; | ||
import { throwAfter } from '../test/throwAfter.js'; | ||
import { fork } from './fork.js'; | ||
import { interleave } from './interleave.js'; | ||
import { toArray } from './toArray.js'; | ||
|
||
describe('fork', () => { | ||
test('simple', () => { | ||
const f = fork([1, 2, 3, 4, 5]); | ||
expect([...f[0]]).toEqual([1, 2, 3, 4, 5]); | ||
expect([...f[1]]).toEqual([1, 2, 3, 4, 5]); | ||
}); | ||
|
||
test('test sync', () => { | ||
const f = fork([1, 2, 3, 4, 5]); | ||
expect([...interleave(f[0], f[1])]).toEqual([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]); | ||
}); | ||
|
||
test('from generator', () => { | ||
const options = makeIterableTestWrapperOptions(); | ||
const f = fork(generatorTestWrapper([1, 2, 3, 4, 5], options)); | ||
expect([...f[0]]).toEqual([1, 2, 3, 4, 5]); | ||
expect([...f[1]]).toEqual([1, 2, 3, 4, 5]); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(6); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(1); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(0); | ||
}); | ||
|
||
test('from generator', () => { | ||
const options = makeIterableTestWrapperOptions(); | ||
const f = fork(generatorTestWrapper(genNumbers(), options)); | ||
expect([...take(f[0], 3)]).toEqual([1, 2, 3]); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(4); | ||
expect(options.nextReturned).toHaveBeenCalledTimes(4); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(0); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(0); | ||
expect([...take(f[1], 6)]).toEqual([1, 2, 3, 4, 5, 6]); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(7); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(1); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(0); | ||
}); | ||
|
||
test('with errors, no .throw', () => { | ||
const f = fork([1, 2, 3, 4, 5]); | ||
expect(() => toArray(whenThrow(f[0], 3, 'my error'))).toThrow('my error'); | ||
}); | ||
|
||
test('with errors late', () => { | ||
const options = makeIterableTestWrapperOptions(); | ||
const f = fork(generatorTestWrapper([1, 2, 3, 4, 5, 6, 7], options)); | ||
expect([...f[0]]).toEqual([1, 2, 3, 4, 5, 6, 7]); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(8); | ||
expect(options.nextReturned).toHaveBeenCalledTimes(8); | ||
expect(() => [...throwAfter(f[1], 3, 'my error')]).toThrow('my error'); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(1); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(0); | ||
}); | ||
|
||
test('with errors early', () => { | ||
const options = makeIterableTestWrapperOptions(); | ||
const f = fork(generatorTestWrapper([1, 2, 3, 4, 5, 6, 7], options)); | ||
expect(() => [...generatorTestWrapper(throwAfter(f[1], 3, 'my error'))]).toThrow('my error'); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(3); | ||
expect(options.nextReturned).toHaveBeenCalledTimes(3); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(1); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(1); | ||
}); | ||
|
||
test('asymmetric', () => { | ||
const f = fork([1, 2, 3, 4, 5]); | ||
expect(toArray(opTakeSync(2)(f[0]))).toEqual([1, 2]); | ||
expect([...f[1]]).toEqual([1, 2, 3, 4, 5]); | ||
}); | ||
}); | ||
|
||
describe('Generator Throw Assumptions', () => { | ||
test('No Throw - Empty', () => { | ||
const options = makeIterableTestWrapperOptions(); | ||
const g = generatorTestWrapper([], options); | ||
expect([...g]).toEqual([]); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(1); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(1); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(0); | ||
}); | ||
|
||
test('No Throw', () => { | ||
const options = makeIterableTestWrapperOptions(); | ||
const g = generatorTestWrapper([1, 2, 3], options); | ||
expect([...g]).toEqual([1, 2, 3]); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(4); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(1); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(0); | ||
}); | ||
|
||
test('Throw after', () => { | ||
const options = makeIterableTestWrapperOptions(); | ||
const g = throwAfter(generatorTestWrapper([1, 2, 3, 4, 5], options), 2, 'my error'); | ||
expect(() => [...take(g, 20)]).toThrow('my error'); | ||
expect(options.nextCalled).toHaveBeenCalledTimes(2); | ||
expect(options.returnCalled).toHaveBeenCalledTimes(1); | ||
expect(options.throwCalled).toHaveBeenCalledTimes(1); | ||
}); | ||
}); | ||
|
||
function* take<T>(iterable: Iterable<T>, count: number) { | ||
for (const v of iterable) { | ||
if (count-- <= 0) { | ||
break; | ||
} | ||
yield v; | ||
} | ||
} | ||
|
||
function* genNumbers() { | ||
let i = 0; | ||
while (true) { | ||
yield ++i; | ||
} | ||
} | ||
|
||
function* whenThrow<T>(i: Iterable<T>, when: T, error: unknown) { | ||
for (const v of i) { | ||
if (v === when) { | ||
throw error; | ||
} | ||
yield v; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
export type ForkedIterables<T> = [Iterable<T>, Iterable<T>]; | ||
|
||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
const emptyArray: Array<any> = []; | ||
|
||
Object.freeze(emptyArray); | ||
|
||
export function fork<T>(iterable: Iterable<T>): ForkedIterables<T> { | ||
let active = 3; | ||
|
||
interface BufClosure { | ||
buf: T[]; | ||
} | ||
|
||
const bufA: BufClosure = { buf: [] }; | ||
const bufB: BufClosure = { buf: [] }; | ||
|
||
let iterator: Iterator<T> | undefined = undefined; | ||
|
||
function getIterator(): Iterator<T> { | ||
if (iterator) { | ||
return iterator; | ||
} | ||
return (iterator = iterable[Symbol.iterator]()); | ||
} | ||
|
||
function* gen(mask: number, a: BufClosure, b: BufClosure): Iterable<T> { | ||
const cur = a.buf; | ||
const other = b.buf; | ||
const iter = getIterator(); | ||
try { | ||
// We have to loop through the current buffer first. | ||
// It is necessary to use a loop in case the buffer is updated between yields. | ||
for (let i = 0; i < cur.length; i++) { | ||
yield cur[i]; | ||
} | ||
cur.length = 0; | ||
let n: IteratorResult<T>; | ||
while (!(n = iter.next()).done) { | ||
if (active & mask) { | ||
other.push(n.value); | ||
} | ||
yield n.value; | ||
} | ||
} catch (e) { | ||
if (iter.throw) { | ||
return iter.throw(e); | ||
} | ||
throw e; | ||
} finally { | ||
active &= mask; | ||
cur.length = 0; | ||
a.buf = emptyArray; | ||
if (!active) { | ||
iterator?.return?.(); | ||
} | ||
} | ||
} | ||
|
||
return [gen(~1, bufA, bufB), gen(~2, bufB, bufA)]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.