Skip to content

Commit

Permalink
fix: Improve word lookup performance for FastTrieBlob and TrieBlob (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Jul 11, 2024
1 parent 12c5709 commit 6e5ad48
Show file tree
Hide file tree
Showing 43 changed files with 1,282 additions and 235 deletions.
10 changes: 8 additions & 2 deletions packages/cspell-dictionary/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
],
"scripts": {
"clean": "shx rm -rf dist temp coverage \"*.tsbuildInfo\"",
"build": "tsc -b . -f",
"build": "tsc -p .",
"clean-build": "pnpm run clean && pnpm run build",
"coverage": "vitest run --coverage",
"test:watch": "vitest",
"test": "vitest run",
"watch": "tsc -b . -w -f"
"test:perf": "NODE_ENV=production insight --register ts-node/esm --file \"**/*.perf.{mts,ts}\" -t 500",
"test:perf:js": "NODE_ENV=production insight -t 500",
"test:perf:prof": "NODE_ENV=production node --cpu-prof ../../node_modules/perf-insight/bin.mjs -t 1000",
"watch": "tsc -p . -w "
},
"repository": {
"type": "git",
Expand All @@ -52,5 +55,8 @@
"cspell-trie-lib": "workspace:*",
"fast-equals": "^5.0.1",
"gensequence": "^7.0.0"
},
"devDependencies": {
"lorem-ipsum": "^2.0.8"
}
}
68 changes: 68 additions & 0 deletions packages/cspell-dictionary/src/perf/has.perf.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import assert from 'node:assert';

import { buildITrieFromWords } from 'cspell-trie-lib';
import { loremIpsum } from 'lorem-ipsum';
import { suite } from 'perf-insight';

import { createSpellingDictionary } from '../SpellingDictionary/createSpellingDictionary.js';

suite('dictionary has', async (test) => {
const words = genWords(10_000);

const iTrie = buildITrieFromWords(words);
const dict = createSpellingDictionary(words, 'test', import.meta.url);

test('dictionary has 100k words', () => {
checkWords(dict, words);
});

test('dictionary has 100k words (2nd time)', () => {
checkWords(dict, words);
});

test('iTrie has 100k words', () => {
checkWords(iTrie, words);
});

test('iTrie.hasWord has 100k words', () => {
const dict = { has: (word: string) => iTrie.hasWord(word, true) };
checkWords(dict, words);
});

test('iTrie.data has 100k words', () => {
checkWords(iTrie.data, words);
});
});

function checkWords(dict: { has: (word: string) => boolean }, words: string[], totalChecks = 100_000) {
let has = true;
const len = words.length;
for (let i = 0; i < totalChecks; ++i) {
const word = words[i % len];
has = dict.has(word) && has;
}
assert(has, 'All words should be found in the dictionary');
}

function genWords(count: number): string[] {
const setOfWords = new Set(loremIpsum({ count }).split(' '));

while (setOfWords.size < count) {
const words = [...setOfWords];
for (const a of words) {
for (const b of words) {
if (a !== b) {
setOfWords.add(a + b);
}
if (setOfWords.size >= count) {
break;
}
}
if (setOfWords.size >= count) {
break;
}
}
}

return [...setOfWords];
}
11 changes: 0 additions & 11 deletions packages/cspell-dictionary/tsconfig.esm.json

This file was deleted.

9 changes: 7 additions & 2 deletions packages/cspell-dictionary/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
{
"files": [],
"references": [{ "path": "./tsconfig.esm.json" }]
"extends": "../../tsconfig.esm.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist",
"types": ["node"]
},
"include": ["src"]
}
1 change: 1 addition & 0 deletions packages/cspell-pipe/src/__snapshots__/index.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

exports[`Pipe API > pipe api 1`] = `
[
"fork",
"helpers",
"interleave",
"isAsyncIterable",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
exports[`Helpers > helpers 1`] = `
[
"asyncIteratorToAsyncIterable",
"fork",
"interleave",
"isAsyncIterable",
"iteratorToIterable",
Expand Down
131 changes: 131 additions & 0 deletions packages/cspell-pipe/src/helpers/fork.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import { describe, expect, test } from 'vitest';

import { opTakeSync } from '../operators/take.js';
import { generatorTestWrapper, makeIterableTestWrapperOptions } from '../test/iterableTestWrapper.js';
import { throwAfter } from '../test/throwAfter.js';
import { fork } from './fork.js';
import { interleave } from './interleave.js';
import { toArray } from './toArray.js';

describe('fork', () => {
test('simple', () => {
const f = fork([1, 2, 3, 4, 5]);
expect([...f[0]]).toEqual([1, 2, 3, 4, 5]);
expect([...f[1]]).toEqual([1, 2, 3, 4, 5]);
});

test('test sync', () => {
const f = fork([1, 2, 3, 4, 5]);
expect([...interleave(f[0], f[1])]).toEqual([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
});

test('from generator', () => {
const options = makeIterableTestWrapperOptions();
const f = fork(generatorTestWrapper([1, 2, 3, 4, 5], options));
expect([...f[0]]).toEqual([1, 2, 3, 4, 5]);
expect([...f[1]]).toEqual([1, 2, 3, 4, 5]);
expect(options.nextCalled).toHaveBeenCalledTimes(6);
expect(options.returnCalled).toHaveBeenCalledTimes(1);
expect(options.throwCalled).toHaveBeenCalledTimes(0);
});

test('from generator', () => {
const options = makeIterableTestWrapperOptions();
const f = fork(generatorTestWrapper(genNumbers(), options));
expect([...take(f[0], 3)]).toEqual([1, 2, 3]);
expect(options.nextCalled).toHaveBeenCalledTimes(4);
expect(options.nextReturned).toHaveBeenCalledTimes(4);
expect(options.returnCalled).toHaveBeenCalledTimes(0);
expect(options.throwCalled).toHaveBeenCalledTimes(0);
expect([...take(f[1], 6)]).toEqual([1, 2, 3, 4, 5, 6]);
expect(options.nextCalled).toHaveBeenCalledTimes(7);
expect(options.returnCalled).toHaveBeenCalledTimes(1);
expect(options.throwCalled).toHaveBeenCalledTimes(0);
});

test('with errors, no .throw', () => {
const f = fork([1, 2, 3, 4, 5]);
expect(() => toArray(whenThrow(f[0], 3, 'my error'))).toThrow('my error');
});

test('with errors late', () => {
const options = makeIterableTestWrapperOptions();
const f = fork(generatorTestWrapper([1, 2, 3, 4, 5, 6, 7], options));
expect([...f[0]]).toEqual([1, 2, 3, 4, 5, 6, 7]);
expect(options.nextCalled).toHaveBeenCalledTimes(8);
expect(options.nextReturned).toHaveBeenCalledTimes(8);
expect(() => [...throwAfter(f[1], 3, 'my error')]).toThrow('my error');
expect(options.returnCalled).toHaveBeenCalledTimes(1);
expect(options.throwCalled).toHaveBeenCalledTimes(0);
});

test('with errors early', () => {
const options = makeIterableTestWrapperOptions();
const f = fork(generatorTestWrapper([1, 2, 3, 4, 5, 6, 7], options));
expect(() => [...generatorTestWrapper(throwAfter(f[1], 3, 'my error'))]).toThrow('my error');
expect(options.nextCalled).toHaveBeenCalledTimes(3);
expect(options.nextReturned).toHaveBeenCalledTimes(3);
expect(options.returnCalled).toHaveBeenCalledTimes(1);
expect(options.throwCalled).toHaveBeenCalledTimes(1);
});

test('asymmetric', () => {
const f = fork([1, 2, 3, 4, 5]);
expect(toArray(opTakeSync(2)(f[0]))).toEqual([1, 2]);
expect([...f[1]]).toEqual([1, 2, 3, 4, 5]);
});
});

describe('Generator Throw Assumptions', () => {
test('No Throw - Empty', () => {
const options = makeIterableTestWrapperOptions();
const g = generatorTestWrapper([], options);
expect([...g]).toEqual([]);
expect(options.nextCalled).toHaveBeenCalledTimes(1);
expect(options.returnCalled).toHaveBeenCalledTimes(1);
expect(options.throwCalled).toHaveBeenCalledTimes(0);
});

test('No Throw', () => {
const options = makeIterableTestWrapperOptions();
const g = generatorTestWrapper([1, 2, 3], options);
expect([...g]).toEqual([1, 2, 3]);
expect(options.nextCalled).toHaveBeenCalledTimes(4);
expect(options.returnCalled).toHaveBeenCalledTimes(1);
expect(options.throwCalled).toHaveBeenCalledTimes(0);
});

test('Throw after', () => {
const options = makeIterableTestWrapperOptions();
const g = throwAfter(generatorTestWrapper([1, 2, 3, 4, 5], options), 2, 'my error');
expect(() => [...take(g, 20)]).toThrow('my error');
expect(options.nextCalled).toHaveBeenCalledTimes(2);
expect(options.returnCalled).toHaveBeenCalledTimes(1);
expect(options.throwCalled).toHaveBeenCalledTimes(1);
});
});

function* take<T>(iterable: Iterable<T>, count: number) {
for (const v of iterable) {
if (count-- <= 0) {
break;
}
yield v;
}
}

function* genNumbers() {
let i = 0;
while (true) {
yield ++i;
}
}

function* whenThrow<T>(i: Iterable<T>, when: T, error: unknown) {
for (const v of i) {
if (v === when) {
throw error;
}
yield v;
}
}
61 changes: 61 additions & 0 deletions packages/cspell-pipe/src/helpers/fork.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
export type ForkedIterables<T> = [Iterable<T>, Iterable<T>];

// eslint-disable-next-line @typescript-eslint/no-explicit-any
const emptyArray: Array<any> = [];

Object.freeze(emptyArray);

export function fork<T>(iterable: Iterable<T>): ForkedIterables<T> {
let active = 3;

interface BufClosure {
buf: T[];
}

const bufA: BufClosure = { buf: [] };
const bufB: BufClosure = { buf: [] };

let iterator: Iterator<T> | undefined = undefined;

function getIterator(): Iterator<T> {
if (iterator) {
return iterator;
}
return (iterator = iterable[Symbol.iterator]());
}

function* gen(mask: number, a: BufClosure, b: BufClosure): Iterable<T> {
const cur = a.buf;
const other = b.buf;
const iter = getIterator();
try {
// We have to loop through the current buffer first.
// It is necessary to use a loop in case the buffer is updated between yields.
for (let i = 0; i < cur.length; i++) {
yield cur[i];
}
cur.length = 0;
let n: IteratorResult<T>;
while (!(n = iter.next()).done) {
if (active & mask) {
other.push(n.value);
}
yield n.value;
}
} catch (e) {
if (iter.throw) {
return iter.throw(e);
}
throw e;
} finally {
active &= mask;
cur.length = 0;
a.buf = emptyArray;
if (!active) {
iterator?.return?.();
}
}
}

return [gen(~1, bufA, bufB), gen(~2, bufB, bufA)];
}
1 change: 1 addition & 0 deletions packages/cspell-pipe/src/helpers/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export { toDistributableIterable } from './distribute.js';
export { fork } from './fork.js';
export { interleave } from './interleave.js';
export { asyncIteratorToAsyncIterable, iteratorToIterable } from './iteratorToIterable.js';
export { toArray } from './toArray.js';
Expand Down
Loading

0 comments on commit 6e5ad48

Please sign in to comment.