Skip to content

Commit

Permalink
Add dictionary parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
stscoundrel committed Feb 20, 2024
1 parent c975ec5 commit 699549d
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,4 @@ dist
# TernJS port file
.tern-port
/reports
.DS_Store
18 changes: 14 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
export const example = (): string => 'test string';
import path from 'path';
import { DictionaryEntry } from './models';
import { read } from './reader';
import { parse } from './parser';

export default {
example,
};
const COMPRESSED_DICTIONARY_PATH = path.join(`${__dirname}/../resources/dictionary.json.gz`);

export function getDictionary() : DictionaryEntry[] {
const content = read(COMPRESSED_DICTIONARY_PATH);
const entries = parse(content);

return entries;
}

export { DictionaryEntry } from './models';
4 changes: 4 additions & 0 deletions src/models.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export interface DictionaryEntry {
headword: string,
definitions: string[],
}
14 changes: 14 additions & 0 deletions src/parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import zlib from 'zlib';
import { DictionaryEntry } from './models';

export const parse = (content: Buffer): DictionaryEntry[] => {
// Decompress the gzipped data
const decompressedData = zlib.gunzipSync(content);

// Parse the JSON content
return JSON.parse(decompressedData.toString());
};

export default {
parse,
};
7 changes: 7 additions & 0 deletions src/reader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import fs from 'fs';

export const read = (location: string) : Buffer => fs.readFileSync(location);

export default {
read,
};
14 changes: 0 additions & 14 deletions tests/index.test.ts

This file was deleted.

38 changes: 38 additions & 0 deletions tests/old-danish-dictionary.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { getDictionary } from '../src';

describe('Old Danish Dictionary tests', () => {
test('Dictionary contains expected amount of entries', () => {
const result = getDictionary();

expect(result.length).toBe(45408);
});

test('Dictionary entries are parsed in correct object format', () => {
const result = getDictionary();

result.forEach((entry) => {
// Only expected keys.
expect(Object.keys(entry)).toEqual(['headword', 'definitions']);

// TODO: should assert headwords are not empty.
// Current dataset contains 8 empties, so cant be done yet.
});
});

test('Dictionary contains expected content', () => {
const result = getDictionary();

const skrotte = result.filter((entry) => entry.headword === 'Skrotte')[0];
const ulovagtig = result.filter((entry) => entry.headword === 'Ulovagtig')[0];
const vandhagl = result.filter((entry) => entry.headword === 'Vandhagl')[0];

expect(skrotte.headword).toBe('Skrotte');
expect(skrotte.definitions[0]).toBe('go. tvivle. M. Hele ordr\u00e6kken synes at hvile p\u00e5 en misforst\u00e5else eller forvanskning.');

expect(ulovagtig.headword).toBe('Ulovagtig');
expect(ulovagtig.definitions[0]).toBe('no. ulovlig,ikke gyldig efter loven; (1597) KhS VIL 495 (ovf. III 235b8); teste depr\u00e6hensi, drefine, wlouactige vidre TV 400. Jf logagtig (ovf. II 78122).');

expect(vandhagl.headword).toBe('Vandhagl');
expect(vandhagl.definitions[0]).toBe('no. et slags hagl; lade skyde med vandhagl 12 urh\u00f8ns (1642). AL. Efter M: mindste h.');
});
});

0 comments on commit 699549d

Please sign in to comment.