Skip to content

Commit

Permalink
fix: Use tokenize instead of getTokenizer (#16)
Browse files Browse the repository at this point in the history
* Use tokenize instead of getTokenizer

* Bug fix
  • Loading branch information
k-tahiro authored Dec 12, 2021
1 parent 8730de0 commit 65dc1a4
Showing 1 changed file with 51 additions and 52 deletions.
103 changes: 51 additions & 52 deletions src/max-ten.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// LICENSE : MIT
"use strict";
import { RuleHelper } from "textlint-rule-helper";
import { getTokenizer } from "kuromojin";
import { tokenize } from "kuromojin";
import { splitAST, Syntax as SentenceSyntax } from "sentence-splitter";
import { StringSource } from "textlint-util-to-string";

Expand Down Expand Up @@ -103,59 +103,58 @@ module.exports = function (context, options = {}) {
2. sentence to tokens
3. check tokens
*/
return getTokenizer().then((tokenizer) => {
sentences.forEach((sentence) => {
const source = new StringSource(sentence);
const text = source.toString();
const tokens = tokenizer.tokenizeForSentence(text);
let currentTenCount = 0;
let lastToken = null;
tokens.forEach((token, index) => {
const surface = token.surface_form;
if (surface === touten) {
// 名詞に囲まわれている場合は例外とする
const isSandwiched = isSandwichedMeishi({
before: findSiblingMeaningToken({
tokens,
currentIndex: index,
direction: "prev"
}),
token: token,
after: findSiblingMeaningToken({
tokens,
currentIndex: index,
direction: "next"
})
});
// strictなら例外を例外としない
if (!isStrict && isSandwiched) {
return;
}
currentTenCount++;
lastToken = token;
}
if (surface === kuten) {
// reset
currentTenCount = 0;
}
// report
if (currentTenCount > maxLen) {
const positionInSentence = source.originalIndexFromIndex(lastToken.word_position - 1);
// relative index from Paragraph Node
// Sentence start(relative) + word position(relative)
const index = sentence.range[0] - node.range[0] + positionInSentence;
const ruleError = new context.RuleError(
`一つの文で"${touten}"を${maxLen + 1}つ以上使用しています`,
{
index
}
);
report(node, ruleError);
currentTenCount = 0;
const checkSentence = async (sentence) => {
const source = new StringSource(sentence);
const text = source.toString();
const tokens = await tokenize(text);
let currentTenCount = 0;
let lastToken = null;
tokens.forEach((token, index) => {
const surface = token.surface_form;
if (surface === touten) {
// 名詞に囲まわれている場合は例外とする
const isSandwiched = isSandwichedMeishi({
before: findSiblingMeaningToken({
tokens,
currentIndex: index,
direction: "prev"
}),
token: token,
after: findSiblingMeaningToken({
tokens,
currentIndex: index,
direction: "next"
})
});
// strictなら例外を例外としない
if (!isStrict && isSandwiched) {
return;
}
});
currentTenCount++;
lastToken = token;
}
if (surface === kuten) {
// reset
currentTenCount = 0;
}
// report
if (currentTenCount > maxLen) {
const positionInSentence = source.originalIndexFromIndex(lastToken.word_position - 1);
// relative index from Paragraph Node
// Sentence start(relative) + word position(relative)
const index = sentence.range[0] - node.range[0] + positionInSentence;
const ruleError = new context.RuleError(
`一つの文で"${touten}"を${maxLen + 1}つ以上使用しています`,
{
index
}
);
report(node, ruleError);
currentTenCount = 0;
}
});
});
};
return Promise.all(sentences.map(checkSentence));
}
};
};

0 comments on commit 65dc1a4

Please sign in to comment.