Skip to content

Commit

Permalink
test(lyra): removes skips to stemming tests
Browse files Browse the repository at this point in the history
  • Loading branch information
micheleriva committed Jul 19, 2022
1 parent 87a634f commit 41069fa
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 20 deletions.
2 changes: 1 addition & 1 deletion packages/lyra/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"dependencies": {
"fastq": "^1.13.0",
"nanoid": "3.3.4",
"natural": "^5.2.2"
"natural": "^5.2.3"
},
"devDependencies": {
"@types/jest": "^27.5.0",
Expand Down
33 changes: 22 additions & 11 deletions packages/lyra/src/stemmer.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
import natural, { Stemmer } from "natural";
import {
PorterStemmer,
PorterStemmerNl,
PorterStemmerFr,
PorterStemmerEs,
PorterStemmerIt,
PorterStemmerPt,
PorterStemmerSv,
PorterStemmerRu,
PorterStemmerNo,
Stemmer,
} from "natural";

export type Language = typeof SUPPORTED_LANGUAGES[number];

Expand All @@ -19,34 +30,34 @@ export function stemArray(input: string[], language: Language): string[] {

switch (language) {
case "dutch":
stemmer = natural.PorterStemmerNl;
stemmer = PorterStemmerNl;
break;
case "english":
stemmer = natural.PorterStemmer;
stemmer = PorterStemmer;
break;
case "french":
stemmer = natural.PorterStemmerFr;
stemmer = PorterStemmerFr;
break;
case "italian":
stemmer = natural.PorterStemmerIt;
stemmer = PorterStemmerIt;
break;
case "norwegian":
stemmer = natural.PorterStemmerNo;
stemmer = PorterStemmerNo;
break;
case "portugese":
stemmer = natural.PorterStemmerPt;
stemmer = PorterStemmerPt;
break;
case "russian":
stemmer = natural.PorterStemmerRu;
stemmer = PorterStemmerRu;
break;
case "spanish":
stemmer = natural.PorterStemmerEs;
stemmer = PorterStemmerEs;
break;
case "swedish":
stemmer = natural.PorterStemmerSv;
stemmer = PorterStemmerSv;
break;
default:
stemmer = natural.PorterStemmer;
stemmer = PorterStemmer;
break;
}

Expand Down
39 changes: 39 additions & 0 deletions packages/lyra/tests/__snapshots__/tokenizer.test.ts.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`Should tokenize and stem correctly in dutch 1`] = `
Set {
"de",
"klein",
"koei",
}
`;

exports[`Should tokenize and stem correctly in dutch 2`] = `
Set {
"ik",
"heb",
"wat",
"taart",
"gemaakt",
}
`;

exports[`Should tokenize and stem correctly in english 1`] = `
Set {
"the",
Expand Down Expand Up @@ -132,6 +150,27 @@ Set {
}
`;

exports[`Should tokenize and stem correctly in spanish 1`] = `
Set {
"cocin",
"unos",
"pastel",
}
`;

exports[`Should tokenize and stem correctly in spanish 2`] = `
Set {
"dorm",
"es",
"algo",
"dificil",
"cuand",
"las",
"prueb",
"fall",
}
`;

exports[`Should tokenize and stem correctly in swedish 1`] = `
Set {
"jag",
Expand Down
4 changes: 2 additions & 2 deletions packages/lyra/tests/tokenizer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ it("Should tokenize and stem correctly in swedish", () => {
expect(O2).toMatchSnapshot();
});

it.skip("Should tokenize and stem correctly in spanish", () => {
it("Should tokenize and stem correctly in spanish", () => {
const I1 = "cociné unos pasteles";
const I2 = "dormir es algo dificil cuando las pruebas fallan";

Expand All @@ -89,7 +89,7 @@ it.skip("Should tokenize and stem correctly in spanish", () => {
expect(O2).toMatchSnapshot();
});

it.skip("Should tokenize and stem correctly in dutch", () => {
it("Should tokenize and stem correctly in dutch", () => {
const I1 = "de kleine koeien";
const I2 = "Ik heb wat taarten gemaakt";

Expand Down
12 changes: 6 additions & 6 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 41069fa

Please sign in to comment.