From a594542a64ddcc46ea3e691cce26e60124261e4b Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Tue, 30 May 2023 11:23:22 -0700 Subject: [PATCH 01/19] add sentence tokenizer for Japanese --- .../languages/ja/Researcher.js | 3 +++ .../languages/ja/SplitIntoTokensJapanese.js | 22 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index 4312b87d4bb..252b30dae0b 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,6 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; +import splitIntoTokensJapanese from "yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; @@ -29,6 +30,7 @@ import morphology from "./customResearches/getWordForms"; import getKeyphraseLength from "./customResearches/getKeyphraseLength"; import textLengthResearch from "./customResearches/textLength"; import findKeyphraseInSEOTitle from "./customResearches/findKeyphraseInSEOTitle"; +import SplitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; /** * The researches contains all the researches @@ -72,6 +74,7 @@ export default class Researcher extends AbstractResearcher { customCountLength, matchTransitionWordsHelper, memoizedTokenizer, + splitIntoTokensJapanese, } ); Object.assign( this.defaultResearches, { diff --git a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js new file mode 100644 index 00000000000..d5baaaa16ec --- /dev/null +++ b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js @@ -0,0 +1,22 @@ +import { map } from "lodash-es"; +import TinySegmenter from "tiny-segmenter"; + +/** + * Split sentence into tokens. + * + * @param {Sentence} sentence The sentence to split. + * + * @returns {Token[]} The tokens. + */ +function splitIntoTokensJapanese( sentence ) { + // Retrieve sentence from sentence class + const sentenceText = sentence.text; + // Return empty string if sentence is empty + if ( sentenceText === "" ) { + return []; + } + // Split sentences into words that are also tokens + const words = TinySegmenter.segment( sentenceText ); + return map( words ); +} +export default splitIntoTokensJapanese; From 6c75c4a05931b1881e77baa058347fbddd96eb58 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Tue, 30 May 2023 17:20:31 -0700 Subject: [PATCH 02/19] pass custom tokenization for Japanese in LanguageProcessor --- .../src/parse/language/LanguageProcessor.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index edd24304170..2a735e64dff 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -1,5 +1,6 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; +import countWords from "../../languageProcessing/helpers/word/countWords"; const whitespaceRegex = /^\s+$/; /** @@ -49,12 +50,22 @@ class LanguageProcessor { * Split sentence into tokens. * * @param {Sentence} sentence The sentence to split. + * @param {paper} paper The paper containing the keyword and text. + * @param {researcher} researcher The researcher. * * @returns {Token[]} The tokens. */ - splitIntoTokens( sentence ) { + splitIntoTokens( sentence, paper, researcher ) { // Retrieve sentence from sentence class const sentenceText = sentence.text; + // If there is a custom getWords helper use its output for retrieving words/tokens. + const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); + let wordCount = countWords( paper.getText() ); + if ( getWordsCustomHelper ) { + wordCount = getWordsCustomHelper( paper.getText() ).length; + const tokenTextsCustom = wordCount; + return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); + } // Split the sentence string into tokens const tokenTexts = sentenceText.split( /([\s,.!?;:([\]'"¡¿)/])/g ).filter( x => x !== "" ); return tokenTexts.map( tokenText => new Token( tokenText ) ); From 982a59a3183abae7fe9d400f83060d0df5d36465 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Thu, 1 Jun 2023 14:22:32 -0700 Subject: [PATCH 03/19] add tests to check what errors show in github env --- .../yoastseo/spec/parse/build/buildSpec.js | 61 +++++++++++++++++++ .../parse/language/LanguageProcessorSpec.js | 13 ++++ 2 files changed, 74 insertions(+) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 949cc66a965..685ceef310a 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -2,6 +2,7 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import splitIntoTokensJapanese from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; describe( "The parse function", () => { it( "parses a basic HTML text", () => { @@ -67,6 +68,66 @@ describe( "The parse function", () => { } ); } ); + it( "parses a basic Japanese HTML text", () => { + const html = "

こんにちは世界!

"; + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { splitIntoTokensJapanese: splitIntoTokensJapanese } ); + const languageProcessor = new LanguageProcessor( researcher ); + expect( build( html, languageProcessor ) ).toEqual( { + name: "#document-fragment", + attributes: {}, + childNodes: [ { + name: "div", + sourceCodeLocation: { + startOffset: 0, + endOffset: 45, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 39, + endOffset: 45, + }, + }, + attributes: {}, + childNodes: [ { + name: "p", + isImplicit: false, + attributes: { + "class": new Set( [ "yoast" ] ), + }, + sentences: [ { + text: "こんにちは世界!", + sourceCodeRange: { startOffset: 22, endOffset: 35 }, + tokens: [ + { text: "こんにちは", sourceCodeRange: { startOffset: 22, endOffset: 27 } }, + { text: "世界", sourceCodeRange: { startOffset: 29, endOffset: 34 } }, + { text: "!", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + ], + } ], + childNodes: [ { + name: "#text", + value: "こんにちは世界!", + } ], + sourceCodeLocation: { + startOffset: 5, + endOffset: 39, + startTag: { + startOffset: 5, + endOffset: 22, + }, + endTag: { + startOffset: 35, + endOffset: 39, + }, + }, + } ], + } ], + } ); + } ); + it( "adds implicit paragraphs around phrasing content outside of paragraphs and headings", () => { const html = "
Hello World!
"; diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index e555de25b2a..0134717945d 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -43,3 +43,16 @@ describe( "A test for the splitIntoTokens method", () => { ] ); } ); } ); + +describe( "A test for the splitIntoTokens method in Japanese", () => { + it( "should return an array of tokens", function() { + const languageProcessor = new LanguageProcessor( researcher ); + + const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); + expect( tokens ).toEqual( [ + { text: "こんにちは", sourceCodeRange: {} }, + { text: "世界", sourceCodeRange: {} }, + { text: "!", sourceCodeRange: {} }, + ] ); + } ); +} ); From 52edb3e8278d0f95a907297637db1de64222bf79 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Thu, 1 Jun 2023 16:17:51 +0300 Subject: [PATCH 04/19] Fix linting issues --- .../yoastseo/src/languageProcessing/languages/ja/Researcher.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index 252b30dae0b..8ac2daef08d 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensJapanese from "yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; @@ -30,7 +30,6 @@ import morphology from "./customResearches/getWordForms"; import getKeyphraseLength from "./customResearches/getKeyphraseLength"; import textLengthResearch from "./customResearches/textLength"; import findKeyphraseInSEOTitle from "./customResearches/findKeyphraseInSEOTitle"; -import SplitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; /** * The researches contains all the researches From 324896a527748ccd8e767ab611eb718eb430f300 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Thu, 1 Jun 2023 16:27:49 +0300 Subject: [PATCH 05/19] Check that researcher is present is added. --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 2a735e64dff..689d99b35c3 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -59,10 +59,9 @@ class LanguageProcessor { // Retrieve sentence from sentence class const sentenceText = sentence.text; // If there is a custom getWords helper use its output for retrieving words/tokens. - const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); - let wordCount = countWords( paper.getText() ); + const getWordsCustomHelper = researcher && researcher.getHelper( "getWordsCustomHelper" ); if ( getWordsCustomHelper ) { - wordCount = getWordsCustomHelper( paper.getText() ).length; + const wordCount = getWordsCustomHelper( paper.getText() ).length; const tokenTextsCustom = wordCount; return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); } From a0fe59af27e1d1569637e3df77470067b5946359 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 11:01:10 -0700 Subject: [PATCH 06/19] retrieve research helper in language processor --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 689d99b35c3..a9b4a51d3c2 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -1,6 +1,5 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; -import countWords from "../../languageProcessing/helpers/word/countWords"; const whitespaceRegex = /^\s+$/; /** @@ -61,8 +60,8 @@ class LanguageProcessor { // If there is a custom getWords helper use its output for retrieving words/tokens. const getWordsCustomHelper = researcher && researcher.getHelper( "getWordsCustomHelper" ); if ( getWordsCustomHelper ) { - const wordCount = getWordsCustomHelper( paper.getText() ).length; - const tokenTextsCustom = wordCount; + const tokenTextsCustom = researcher.getHelper( "splitIntoTokensJapanese" ); + return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens From 79711cea8bc5bd131626aa207a1091eee4d6b8a4 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 11:08:51 -0700 Subject: [PATCH 07/19] remove unnecessary function arguments --- .../yoastseo/src/parse/language/LanguageProcessor.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index a9b4a51d3c2..827d0c1b984 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -49,19 +49,16 @@ class LanguageProcessor { * Split sentence into tokens. * * @param {Sentence} sentence The sentence to split. - * @param {paper} paper The paper containing the keyword and text. - * @param {researcher} researcher The researcher. * * @returns {Token[]} The tokens. */ - splitIntoTokens( sentence, paper, researcher ) { + splitIntoTokens( sentence ) { // Retrieve sentence from sentence class const sentenceText = sentence.text; // If there is a custom getWords helper use its output for retrieving words/tokens. - const getWordsCustomHelper = researcher && researcher.getHelper( "getWordsCustomHelper" ); + const getWordsCustomHelper = this.researcher.getHelper( "getWordsCustomHelper" ); if ( getWordsCustomHelper ) { - const tokenTextsCustom = researcher.getHelper( "splitIntoTokensJapanese" ); - + const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensJapanese" ); return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens From c985f3168489159b0e255c7cdc4e713f7e2c4a36 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 13:26:58 -0700 Subject: [PATCH 08/19] improve code --- .../languageProcessing/languages/ja/Researcher.js | 4 ++-- .../yoastseo/src/parse/language/LanguageProcessor.js | 12 ++++++------ .../languages/ja/SplitIntoTokensJapanese.js | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index 8ac2daef08d..fa354b0e2a8 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; @@ -73,7 +73,7 @@ export default class Researcher extends AbstractResearcher { customCountLength, matchTransitionWordsHelper, memoizedTokenizer, - splitIntoTokensJapanese, + splitIntoTokensCustom, } ); Object.assign( this.defaultResearches, { diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 827d0c1b984..b15214b3d64 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -56,14 +56,14 @@ class LanguageProcessor { // Retrieve sentence from sentence class const sentenceText = sentence.text; // If there is a custom getWords helper use its output for retrieving words/tokens. - const getWordsCustomHelper = this.researcher.getHelper( "getWordsCustomHelper" ); - if ( getWordsCustomHelper ) { - const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensJapanese" ); - return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); + const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensCustom" ); + if ( tokenTextsCustom ) { + const tokensCustom = tokenTextsCustom( sentenceText ); + return tokensCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens - const tokenTexts = sentenceText.split( /([\s,.!?;:([\]'"¡¿)/])/g ).filter( x => x !== "" ); - return tokenTexts.map( tokenText => new Token( tokenText ) ); + const tokens = sentenceText.split( /([\s,.!?;:([\]'"¡¿)/])/g ).filter( x => x !== "" ); + return tokens.map( tokenText => new Token( tokenText ) ); } } export default LanguageProcessor; diff --git a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js index d5baaaa16ec..c89e322e67d 100644 --- a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js +++ b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js @@ -16,7 +16,7 @@ function splitIntoTokensJapanese( sentence ) { return []; } // Split sentences into words that are also tokens - const words = TinySegmenter.segment( sentenceText ); + const words = new TinySegmenter().segment( sentenceText ); return map( words ); } export default splitIntoTokensJapanese; From d3885adf5753df512840c14c1785083764f10983 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 13:40:38 -0700 Subject: [PATCH 09/19] pass sentence instead of sentence text --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index b15214b3d64..975f18dfea6 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -58,7 +58,7 @@ class LanguageProcessor { // If there is a custom getWords helper use its output for retrieving words/tokens. const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensCustom" ); if ( tokenTextsCustom ) { - const tokensCustom = tokenTextsCustom( sentenceText ); + const tokensCustom = tokenTextsCustom( sentence ); return tokensCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens From 8eaf4f0762d2f409b795a771a22364acd89d8a92 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Mon, 5 Jun 2023 15:46:41 -0700 Subject: [PATCH 10/19] relocate helper file and update specs --- packages/yoastseo/spec/parse/build/buildSpec.js | 4 ++-- .../spec/parse/language/LanguageProcessorSpec.js | 10 +++++++--- .../src/languageProcessing/languages/ja/Researcher.js | 2 +- .../languages/ja/helpers/splitIntoTokensJapanese.js} | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) rename packages/yoastseo/src/{parse/structure/languages/ja/SplitIntoTokensJapanese.js => languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js} (85%) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 685ceef310a..438505fd842 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -2,7 +2,7 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; -import splitIntoTokensJapanese from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; describe( "The parse function", () => { it( "parses a basic HTML text", () => { @@ -72,7 +72,7 @@ describe( "The parse function", () => { const html = "

こんにちは世界!

"; const researcher = Factory.buildMockResearcher( {}, true, false, false, - { splitIntoTokensJapanese: splitIntoTokensJapanese } ); + { splitIntoTokensCustom: splitIntoTokensCustom } ); const languageProcessor = new LanguageProcessor( researcher ); expect( build( html, languageProcessor ) ).toEqual( { name: "#document-fragment", diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 0134717945d..4c033d86c3d 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -2,6 +2,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import Sentence from "../../../src/parse/structure/Sentence"; +import splitIntoTokensCustom from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -46,13 +47,16 @@ describe( "A test for the splitIntoTokens method", () => { describe( "A test for the splitIntoTokens method in Japanese", () => { it( "should return an array of tokens", function() { + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { splitIntoTokensCustom: splitIntoTokensCustom } ); const languageProcessor = new LanguageProcessor( researcher ); - const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); expect( tokens ).toEqual( [ - { text: "こんにちは", sourceCodeRange: {} }, + { text: "こん", sourceCodeRange: {} }, + { text: "にち", sourceCodeRange: {} }, + { text: "は", sourceCodeRange: {} }, { text: "世界", sourceCodeRange: {} }, - { text: "!", sourceCodeRange: {} }, + { text: "!", sourceCodeRange: {} }, ] ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index fa354b0e2a8..fbd0e0cf813 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensCustom from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "./helpers/splitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; diff --git a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js similarity index 85% rename from packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js rename to packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js index c89e322e67d..959bd1fd99c 100644 --- a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js @@ -8,7 +8,7 @@ import TinySegmenter from "tiny-segmenter"; * * @returns {Token[]} The tokens. */ -function splitIntoTokensJapanese( sentence ) { +function splitIntoTokensCustom( sentence ) { // Retrieve sentence from sentence class const sentenceText = sentence.text; // Return empty string if sentence is empty @@ -19,4 +19,4 @@ function splitIntoTokensJapanese( sentence ) { const words = new TinySegmenter().segment( sentenceText ); return map( words ); } -export default splitIntoTokensJapanese; +export default splitIntoTokensCustom; From 3367b13e14a86f0591344977d740021da79fd35c Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Mon, 5 Jun 2023 15:54:03 -0700 Subject: [PATCH 11/19] correct new path --- packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 4c033d86c3d..2bf034058c9 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -2,7 +2,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import Sentence from "../../../src/parse/structure/Sentence"; -import splitIntoTokensCustom from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); From 841fa265b02355e5a90c9e708f3412d6dc36d581 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 7 Jun 2023 14:23:31 +0200 Subject: [PATCH 12/19] fix bug where splitIntoTokens could not access researcher --- packages/yoastseo/src/parse/build/private/tokenize.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/tokenize.js b/packages/yoastseo/src/parse/build/private/tokenize.js index 6f8933fd0ba..ede3adef6f8 100644 --- a/packages/yoastseo/src/parse/build/private/tokenize.js +++ b/packages/yoastseo/src/parse/build/private/tokenize.js @@ -6,12 +6,12 @@ import getTextElementPositions from "./getTextElementPositions"; * * @param {Paragraph|Heading} node The paragraph or heading node to split into sentences. * @param {Sentence} sentence The sentence. - * @param {function} splitIntoTokens The function to use to split the sentence into tokens. + * @param {function} LanguageProcessor The languageprocessor for the current language. * * @returns {Sentence} The sentence, with tokens. */ -function getTokens( node, sentence, splitIntoTokens ) { - sentence.tokens = splitIntoTokens( sentence ); +function getTokens( node, sentence, LanguageProcessor ) { + sentence.tokens = LanguageProcessor.splitIntoTokens( sentence ); sentence.tokens = getTextElementPositions( node, sentence.tokens, sentence.sourceCodeRange.startOffset ); return sentence; } @@ -31,7 +31,7 @@ function getSentences( node, languageProcessor ) { // Add position information to the sentences. sentences = getTextElementPositions( node, sentences ); // Tokenize sentences into tokens. - return sentences.map( sentence => getTokens( node, sentence, languageProcessor.splitIntoTokens ) ); + return sentences.map( sentence => getTokens( node, sentence, languageProcessor ) ); } /** From 6a6b30531ecb1f608831235cefd6e0a055e6da0f Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 8 Jun 2023 16:31:22 +0200 Subject: [PATCH 13/19] change japanese example in buildSpec.js --- .../yoastseo/spec/parse/build/buildSpec.js | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 811a347d2f9..82774586362 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -69,10 +69,10 @@ describe( "The parse function", () => { } ); it( "parses a basic Japanese HTML text", () => { - const html = "

こんにちは世界!

"; + const html = "

犬が大好き

"; const researcher = Factory.buildMockResearcher( {}, true, false, false, - { splitIntoTokensCustom: splitIntoTokensCustom } ); + { splitIntoTokensCustom: splitIntoTokensCustom, memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); expect( build( html, languageProcessor ) ).toEqual( { name: "#document-fragment", @@ -81,14 +81,14 @@ describe( "The parse function", () => { name: "div", sourceCodeLocation: { startOffset: 0, - endOffset: 45, + endOffset: 37, startTag: { startOffset: 0, endOffset: 5, }, endTag: { - startOffset: 39, - endOffset: 45, + startOffset: 31, + endOffset: 37, }, }, attributes: {}, @@ -99,28 +99,28 @@ describe( "The parse function", () => { "class": new Set( [ "yoast" ] ), }, sentences: [ { - text: "こんにちは世界!", - sourceCodeRange: { startOffset: 22, endOffset: 35 }, + text: "犬が大好き", + sourceCodeRange: { startOffset: 22, endOffset: 27 }, tokens: [ - { text: "こんにちは", sourceCodeRange: { startOffset: 22, endOffset: 27 } }, - { text: "世界", sourceCodeRange: { startOffset: 29, endOffset: 34 } }, - { text: "!", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + { text: "犬", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + { text: "が", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "大好き", sourceCodeRange: { startOffset: 24, endOffset: 27 } }, ], } ], childNodes: [ { name: "#text", - value: "こんにちは世界!", + value: "犬が大好き", } ], sourceCodeLocation: { startOffset: 5, - endOffset: 39, + endOffset: 31, startTag: { startOffset: 5, endOffset: 22, }, endTag: { - startOffset: 35, - endOffset: 39, + startOffset: 27, + endOffset: 31, }, }, } ], From bc4c908b9dbdf601514fe1111cb8b64cdecf25d6 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 9 Jun 2023 10:04:14 +0200 Subject: [PATCH 14/19] add specs for splitIntoTokensJapanese --- .../ja/helpers/splitIntoTokensJapaneseSpec.js | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js new file mode 100644 index 00000000000..62720611b51 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js @@ -0,0 +1,57 @@ +import splitIntoTokensJapanese from "../../../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; + +const testcases = [ + { + description: "should return an empty result sentence is empty", + sentence: { text: "", sourceCodeRange: { startOffset: 0, endOffset: 0 } }, + expected: [], + }, + { + description: "should correctly tokenize a simple Japanese sentence wouthout punctuations", + sentence: { + text: "犬が大好き", + sourceCodeRange: { startOffset: 0, endOffset: 5 }, + }, + expected: [ "犬", "が", "大好き" ], + }, + { + description: "should correctly tokenize a Japanese sentence with japanese punctuations", + sentence: { + text: "犬が大好き\u3002", + sourceCodeRange: { startOffset: 0, endOffset: 6 }, + }, + expected: [ "犬", "が", "大好き", "。" ], + + }, + { + description: "should correctly tokenize a Japanese sentence with english punctuations", + sentence: { + text: "犬が大好き.", + sourceCodeRange: { startOffset: 0, endOffset: 6 }, + }, + expected: [ "犬", "が", "大好き", "." ], + }, + { + description: "should correctly tokenize a Japanese sentence with quotation marks inside", + sentence: { + text: "犬「が」大好き\u3002", + sourceCodeRange: { startOffset: 0, endOffset: 8 }, + }, + expected: [ "犬", "「", "が", "」", "大好き", "。" ], + }, + { + description: "should correctly tokenize a Japanese sentence with quotation marks around", + sentence: { + text: "『犬が大好き\u3002』", + sourceCodeRange: { startOffset: 0, endOffset: 8 }, + }, + expected: [ "『", "犬", "が", "大好き", "。", "』" ], + }, +]; + +describe.each( testcases )( "splitIntoTokensJapanese: %p", ( { description, sentence, expected } ) => { + it( description, () => { + const tokens = splitIntoTokensJapanese( sentence ); + expect( tokens ).toEqual( expected ); + } ); +} ); From d66192517edafd171a84e04122e7412af2603dc9 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 9 Jun 2023 10:04:39 +0200 Subject: [PATCH 15/19] add spec for japanese in tokenizeSpec.js --- .../spec/parse/build/private/tokenizeSpec.js | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 12651e4c272..a08cab6160c 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -1,6 +1,7 @@ import tokenize from "../../../../src/parse/build/private/tokenize"; import Paper from "../../../../src/values/Paper"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; +import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher"; import { buildTreeNoTokenize } from "../../../specHelpers/parse/buildTree"; import LanguageProcessor from "../../../../src/parse/language/LanguageProcessor"; @@ -192,3 +193,81 @@ describe( "A test for the tokenize function", function() { } ); } ); } ); + +describe( "A test for tokenizing a japanese sentence", function() { + it( "should correctly tokenize a simple Japanse sentence.", function() { + const mockPaper = new Paper( "

犬が大好き\u3002

", { locale: "ja_JP" } ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + value: "犬が大好き。", + }, + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 9, + }, + text: "犬が大好き。", + tokens: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 4, + }, + text: "犬", + }, + { + sourceCodeRange: { + startOffset: 4, + endOffset: 5, + }, + text: "が", + }, + { + sourceCodeRange: { + startOffset: 5, + endOffset: 8, + }, + text: "大好き", + }, + { + sourceCodeRange: { + startOffset: 8, + endOffset: 9, + }, + text: "。", + }, + ], + }, + ], + sourceCodeLocation: { + startOffset: 0, + endOffset: 13, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 9, + endOffset: 13, + }, + }, + }, + ], + name: "#document-fragment", + } ); + } ); +} ); From 254d268abb4a5b2b266b464d366b3a60ddb0bc54 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 9 Jun 2023 10:10:39 +0200 Subject: [PATCH 16/19] fix double declaration of researcher --- .../yoastseo/spec/parse/language/LanguageProcessorSpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 2bf034058c9..88e94c842b3 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -47,9 +47,9 @@ describe( "A test for the splitIntoTokens method", () => { describe( "A test for the splitIntoTokens method in Japanese", () => { it( "should return an array of tokens", function() { - const researcher = Factory.buildMockResearcher( {}, true, false, false, + const japaneseResearcher = Factory.buildMockResearcher( {}, true, false, false, { splitIntoTokensCustom: splitIntoTokensCustom } ); - const languageProcessor = new LanguageProcessor( researcher ); + const languageProcessor = new LanguageProcessor( japaneseResearcher ); const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); expect( tokens ).toEqual( [ { text: "こん", sourceCodeRange: {} }, From dbc907fb9d8aeea35203d4c7a2ff383771e53835 Mon Sep 17 00:00:00 2001 From: Marina Koleva Date: Mon, 12 Jun 2023 10:10:20 +0200 Subject: [PATCH 17/19] adapting one unit test in LanguageProcessorSpec.js --- .../spec/parse/language/LanguageProcessorSpec.js | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 98097aa375a..4f023b01c39 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -439,13 +439,17 @@ describe( "A test for the splitIntoTokens method in Japanese", () => { const japaneseResearcher = Factory.buildMockResearcher( {}, true, false, false, { splitIntoTokensCustom: splitIntoTokensCustom } ); const languageProcessor = new LanguageProcessor( japaneseResearcher ); - const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); + const tokens = languageProcessor.splitIntoTokens( new Sentence( "ウクライナは、東ヨーロッパに位置する国家。" ) ); expect( tokens ).toEqual( [ - { text: "こん", sourceCodeRange: {} }, - { text: "にち", sourceCodeRange: {} }, + { text: "ウクライナ", sourceCodeRange: {} }, { text: "は", sourceCodeRange: {} }, - { text: "世界", sourceCodeRange: {} }, - { text: "!", sourceCodeRange: {} }, + { text: "、", sourceCodeRange: {} }, + { text: "東ヨーロッパ", sourceCodeRange: {} }, + { text: "に", sourceCodeRange: {} }, + { text: "位置", sourceCodeRange: {} }, + { text: "する", sourceCodeRange: {} }, + { text: "国家", sourceCodeRange: {} }, + { text: "。", sourceCodeRange: {} }, ] ); } ); } ); From 8a22e4c33552398b32484883b55f0419e69372ae Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 13 Jun 2023 15:06:15 +0300 Subject: [PATCH 18/19] Rename file to use unified name for function and file. --- ...IntoTokensJapaneseSpec.js => splitIntoTokensCustomSpec.js} | 4 ++-- packages/yoastseo/spec/parse/build/buildSpec.js | 2 +- .../yoastseo/spec/parse/language/LanguageProcessorSpec.js | 2 +- .../src/languageProcessing/languages/ja/Researcher.js | 2 +- .../{splitIntoTokensJapanese.js => splitIntoTokensCustom.js} | 0 5 files changed, 5 insertions(+), 5 deletions(-) rename packages/yoastseo/spec/languageProcessing/languages/ja/helpers/{splitIntoTokensJapaneseSpec.js => splitIntoTokensCustomSpec.js} (90%) rename packages/yoastseo/src/languageProcessing/languages/ja/helpers/{splitIntoTokensJapanese.js => splitIntoTokensCustom.js} (100%) diff --git a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js similarity index 90% rename from packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js rename to packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js index 62720611b51..376f6c89dc1 100644 --- a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js +++ b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js @@ -1,4 +1,4 @@ -import splitIntoTokensJapanese from "../../../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; const testcases = [ { @@ -51,7 +51,7 @@ const testcases = [ describe.each( testcases )( "splitIntoTokensJapanese: %p", ( { description, sentence, expected } ) => { it( description, () => { - const tokens = splitIntoTokensJapanese( sentence ); + const tokens = splitIntoTokensCustom( sentence ); expect( tokens ).toEqual( expected ); } ); } ); diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 82774586362..1c08d2a5dce 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -2,7 +2,7 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; -import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; describe( "The parse function", () => { it( "parses a basic HTML text", () => { diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 4f023b01c39..fadd25f9ddf 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -2,7 +2,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import Sentence from "../../../src/parse/structure/Sentence"; -import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index fbd0e0cf813..10fc57d734c 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensCustom from "./helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "./helpers/splitIntoTokensCustom"; // All config import firstWordExceptions from "./config/firstWordExceptions"; diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js similarity index 100% rename from packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js rename to packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js From 61c4e98f611f667b3182a035e8125704cf9b3919 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 13 Jun 2023 15:12:53 +0300 Subject: [PATCH 19/19] Fix test name. --- .../languages/ja/helpers/splitIntoTokensCustomSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js index 376f6c89dc1..dd6f0f7cef7 100644 --- a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js +++ b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js @@ -49,7 +49,7 @@ const testcases = [ }, ]; -describe.each( testcases )( "splitIntoTokensJapanese: %p", ( { description, sentence, expected } ) => { +describe.each( testcases )( "splitIntoTokensCustom for Japanese: %p", ( { description, sentence, expected } ) => { it( description, () => { const tokens = splitIntoTokensCustom( sentence ); expect( tokens ).toEqual( expected );