Skip to content

Commit

Permalink
fix (cspell-tools): Support splitting Hunspell words. (#4791)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S authored Aug 31, 2023
1 parent 176cce1 commit e1e777e
Show file tree
Hide file tree
Showing 14 changed files with 110 additions and 13 deletions.
3 changes: 3 additions & 0 deletions cspell-tools.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env node

import './packages/cspell-tools/bin.mjs';
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
"description": "cspell monorepo.",
"bin": {
"cspell-monorepo": "bin.mjs",
"cspell-monorepo-esm": "bin.mjs"
"cspell-monorepo-esm": "bin.mjs",
"cspell": "bin.mjs",
"cspell-tools": "cspell-tools.mjs"
},
"packageManager": "pnpm@8.6.1",
"private": true,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# yaml-language-server: $schema=./../../cspell-tools.config.schema.json

targets:
- name: split-colors
targetDirectory: ../../temp/builds/build-split-source
sources:
- filename: src/words.txt
split: true
- filename: src/color-pairs.dic
split: true
format: plaintext
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SET UTF-8
LANG en_EN
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
6
apple‌banana
apple‌mango
apple‌pear
apple‌strawberry
apple‌orange
mango‌banana
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apple
banana
apple‌banana
grape
6 changes: 3 additions & 3 deletions packages/cspell-tools/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
},
"type": "module",
"bin": {
"cspell-tools-cli": "bin.js"
"cspell-tools-cli": "bin.mjs"
},
"scripts": {
"build": "pnpm run build-schema && pnpm run compile",
Expand All @@ -32,7 +32,7 @@
"Compiler"
],
"files": [
"bin.js",
"bin.mjs",
"dist",
"cspell-tools.config.schema.json",
"!**/*.tsbuildInfo",
Expand Down Expand Up @@ -67,5 +67,5 @@
"shelljs": "^0.8.5",
"ts-json-schema-generator": "^1.3.0"
},
"main": "bin.js"
"module": "bin.mjs"
}
14 changes: 14 additions & 0 deletions packages/cspell-tools/src/__snapshots__/build.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,20 @@ yellow
"
`;

exports[`build action > build 5 1`] = `
"
# cspell-tools: keep-case no-split
apple
banana
grape
mango
orange
pear
strawberry
"
`;

exports[`build action > build multi 0 1`] = `
"
# cspell-tools: keep-case no-split
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-tools/src/build.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ describe('build action', () => {
${f('build-single-trie')} | ${undefined} | ${tBuilds('build-single-trie/cities.trie')}
${f('build-source-list')} | ${undefined} | ${tBuilds('build-source-list/source-list.txt')}
${'.'} | ${f('build-combo/cspell-tools.config.yaml')} | ${'color-cities-code.txt'}
${f('build-split-source')} | ${undefined} | ${tBuilds('build-split-source/split-colors.txt')}
`('build %#', async ({ sourceRoot, config, target }) => {
await expect(build(undefined, { config, root: t(sourceRoot), cwd: t() })).resolves.toBeUndefined();
const content = await readTextFile(t(target));
Expand Down
37 changes: 29 additions & 8 deletions packages/cspell-tools/src/compiler/SourceReader.test.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import * as path from 'path';
import { describe, expect, test } from 'vitest';

import { test_dirname } from '../test/TestHelper.js';
import { createTestHelper } from '../test/TestHelper.js';
import type { SourceReaderOptions } from './SourceReader.js';
import { createSourceReader } from './SourceReader.js';
import { defaultAllowedSplitWords } from './WordsCollection.js';

const _dirname = test_dirname(import.meta.url);
const helper = createTestHelper(import.meta.url);

const samples = path.join(_dirname, '../../../Samples/dicts');
const samples = helper.resolveSample('dicts');

const readerOptions: SourceReaderOptions = {
splitWords: false,
Expand All @@ -17,14 +17,27 @@ const readerOptions: SourceReaderOptions = {

describe('Validate the iterateWordsFromFile', () => {
test('streamWordsFromFile: hunspell', async () => {
const reader = await createSourceReader(path.join(samples, 'hunspell', 'example.aff'), readerOptions);
const reader = await createSourceReader(sample('hunspell/example.aff'), readerOptions);
const results = [...reader.words];
// this might break if the processing order of hunspell changes.
expect(results).toEqual(s('hello rework reworked tried try work worked', ' '));
});

test('streamWordsFromFile: hunspell split', async () => {
const reader = await createSourceReader(fixture('build-split-source/src/color-pairs.dic'), {
...readerOptions,
splitWords: true,
legacy: true,
});
const results = [...reader.words];
// this might break if the processing order of hunspell changes.
expect(results).toEqual(
s('apple banana apple mango apple orange apple pear apple strawberry mango banana', ' '),
);
});

test('stream words from trie', async () => {
const reader = await createSourceReader(path.join(samples, 'cities.trie.gz'), readerOptions);
const reader = await createSourceReader(sample('cities.trie.gz'), readerOptions);
const results = [...reader.words];
expect(results.join('|')).toBe(
'amsterdam|angeles|city|delhi|francisco|london|los|los angeles' +
Expand All @@ -45,7 +58,7 @@ describe('Validate the iterateWordsFromFile', () => {
});

test('annotatedWords: trie', async () => {
const reader = await createSourceReader(path.join(samples, 'cities.trie.gz'), readerOptions);
const reader = await createSourceReader(sample('cities.trie.gz'), readerOptions);
const results = [...reader.words];
expect(results.join('|')).toBe(
'amsterdam|angeles|city|delhi|francisco|london|los|los angeles' +
Expand All @@ -54,7 +67,7 @@ describe('Validate the iterateWordsFromFile', () => {
});

test('annotatedWords: text - cities.txt', async () => {
const reader = await createSourceReader(path.join(samples, 'cities.txt'), readerOptions);
const reader = await createSourceReader(sample('cities.txt'), readerOptions);
const results = [...reader.words];
// the results are sorted
expect(results.join('|')).toBe(
Expand All @@ -63,7 +76,7 @@ describe('Validate the iterateWordsFromFile', () => {
});

test('annotatedWords: text - sampleCodeDic.txt', async () => {
const reader = await createSourceReader(path.join(samples, 'sampleCodeDic.txt'), readerOptions);
const reader = await createSourceReader(sample('sampleCodeDic.txt'), readerOptions);
const results = [...reader.words];
// cspell:ignore codecode errorerror codemsg
// the results are sorted
Expand All @@ -74,3 +87,11 @@ describe('Validate the iterateWordsFromFile', () => {
return a.split(on);
}
});

function sample(...parts: string[]): string {
return helper.resolveSample('dicts', ...parts);
}

function fixture(...parts: string[]): string {
return helper.resolveFixture(...parts);
}
17 changes: 16 additions & 1 deletion packages/cspell-tools/src/compiler/SourceReader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export async function createSourceReader(filename: string, options: SourceReader

if (reader.type !== 'TextFile') {
return {
words: reader.lines,
words: splitLines(reader.lines, options),
get size() {
return reader.size;
},
Expand All @@ -46,6 +46,21 @@ export async function createSourceReader(filename: string, options: SourceReader
return textFileReader(reader, options);
}

function splitLines(lines: Iterable<string>, options: SourceReaderOptions): Iterable<string> {
if (!options.splitWords) return lines;

function* split() {
const regNonWordOrDigit = /[^\p{L}\p{M}'\w-]+/giu;

for (const line of lines) {
const words = line.split(regNonWordOrDigit);
yield* words;
}
}

return split();
}

async function textFileReader(reader: Reader, options: SourceReaderOptions): Promise<SourceReader> {
const { legacy, splitWords: split, allowedSplitWords } = options;
const words = [...parseFileLines(reader, { legacy, split, allowedSplitWords })];
Expand Down
2 changes: 2 additions & 0 deletions packages/cspell-tools/src/compiler/wordListParser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ describe('Validate the wordListCompiler', () => {
${s('Apple|~apple|Apple')} | ${pf({ legacy: true })} | ${['apple']}
${'ArrayObject::getFlags\nArrayObject::getIterator\nArrayObject::getIteratorClass\n'} | ${pf({ legacy: true })} | ${s('array|object|get|flags|iterator|class')}
${sampleContent} | ${pf()} | ${s('Tower of London|New|York')}
${'apple\u200cbanana'} | ${pf({ split: true })} | ${['apple', 'banana']}
${'apple\u200cbanana'} | ${pf({})} | ${['apple\u200cbanana']}
`('createSortAndFilterOperation $content $options', ({ content, options, expectedResult }) => {
const r = [...parseFileLines(content, options)];
expect(r).toEqual(expectedResult);
Expand Down
15 changes: 15 additions & 0 deletions packages/cspell-tools/src/test/TestHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const _dirname = test_dirname(import.meta.url);
const packageRoot = path.join(_dirname, '../..');
const repoRoot = path.join(packageRoot, '../..');
const tempDirBase = path.join(packageRoot, 'temp');
const repoSamples = path.join(repoRoot, 'packages/Samples');

export interface TestHelper {
readonly packageRoot: string;
Expand All @@ -29,8 +30,18 @@ export interface TestHelper {

createTempDir(...parts: string[]): void;

/**
* Resolves a fixture path to an absolute path
* @param parts - relative path to fixture
*/
resolveFixture(...parts: string[]): string;

/**
* Resolves a path to an absolute path in Samples
* @param parts - relative path to sample
*/
resolveSample(...parts: string[]): string;

/**
* Make the temp directory
* @param parts
Expand Down Expand Up @@ -141,6 +152,10 @@ class TestHelperImpl implements TestHelper {
return path.resolve(this.fixtureDir, ...parts);
}

resolveSample(...parts: string[]): string {
return path.resolve(repoSamples, ...parts);
}

/**
* calc a path relative to the package temp directory.
* @param parts - optional path segments
Expand Down

0 comments on commit e1e777e

Please sign in to comment.