Skip to content

Commit

Permalink
feat(options): add 読点 and 句点 as options
Browse files Browse the repository at this point in the history
  • Loading branch information
azu committed Apr 8, 2021
1 parent 2bc5335 commit d625187
Show file tree
Hide file tree
Showing 4 changed files with 572 additions and 46 deletions.
17 changes: 16 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@
"build": "textlint-scripts build",
"watch": "textlint-scripts build --watch",
"prepublish": "npm run --if-present build",
"test": "textlint-scripts test"
"test": "textlint-scripts test",
"prettier": "prettier --write \"**/*.{js,jsx,ts,tsx,css}\"",
"prepare": "git config --local core.hooksPath .githook"
},
"devDependencies": {
"lint-staged": "^10.5.4",
"prettier": "^2.2.1",
"textlint-scripts": "^3.0.0"
},
"dependencies": {
Expand All @@ -41,5 +45,16 @@
"structured-source": "^3.0.2",
"textlint-rule-helper": "^2.0.0",
"textlint-util-to-string": "^3.1.1"
},
"prettier": {
"singleQuote": false,
"printWidth": 120,
"tabWidth": 4,
"trailingComma": "none"
},
"lint-staged": {
"*.{js,jsx,ts,tsx,css}": [
"prettier --write"
]
}
}
65 changes: 42 additions & 23 deletions src/max-ten.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
// LICENSE : MIT
"use strict";
import { RuleHelper } from "textlint-rule-helper"
import { RuleHelper } from "textlint-rule-helper";
import { getTokenizer } from "kuromojin";
import { splitAST, Syntax as SentenceSyntax } from "sentence-splitter";
import { StringSource } from "textlint-util-to-string";
import { SeparatorParser } from "sentence-splitter/lib/parser/SeparatorParser";

const defaultOptions = {
max: 3, // 1文に利用できる最大の、の数
strict: false // 例外ルールを適応するかどうか
// 1文に利用できる最大の、の数
max: 3,
// 例外ルールを適応するかどうか,
strict: false,
// 読点として扱う文字
// https://ja.wikipedia.org/wiki/%E8%AA%AD%E7%82%B9
touten: "、",
// 句点として扱う文字
// https://ja.wikipedia.org/wiki/%E5%8F%A5%E7%82%B9
kuten: "。"
};

function isSandwichedMeishi({
before,
token,
after
}) {
function isSandwichedMeishi({ before, token, after }) {
if (before === undefined || after === undefined || token === undefined) {
return false;
}
Expand All @@ -23,20 +28,31 @@ function isSandwichedMeishi({

/**
* @param {RuleContext} context
* @param {object} [options]
* @param {typeof defaultOptions} [options]
*/
module.exports = function (context, options = {}) {
const maxLen = options.max || defaultOptions.max;
const isStrict = options.strict || defaultOptions.strict;
const maxLen = options.max ?? defaultOptions.max;
const isStrict = options.strict ?? defaultOptions.strict;
const touten = options.touten ?? defaultOptions.touten;
const kuten = options.kuten ?? defaultOptions.kuten;
const helper = new RuleHelper(context);
const { Syntax, RuleError, report, getSource } = context;
return {
[Syntax.Paragraph](node) {
if (helper.isChildNode(node, [Syntax.BlockQuote])) {
return;
}
const resultNode = splitAST(node);
const sentences = resultNode.children.filter(childNode => childNode.type === SentenceSyntax.Sentence);
const resultNode = splitAST(node, {
SeparatorParser: {
separatorCharacters: [
"?", // question mark
"!", // exclamation mark
"?", // (ja) zenkaku question mark
"!" // (ja) zenkaku exclamation mark
].concat(kuten)
}
});
const sentences = resultNode.children.filter((childNode) => childNode.type === SentenceSyntax.Sentence);
/*
<p>
<str><code><img><str>
Expand All @@ -49,18 +65,18 @@ module.exports = function (context, options = {}) {
2. sentence to tokens
3. check tokens
*/
return getTokenizer().then(tokenizer => {
sentences.forEach(sentence => {
return getTokenizer().then((tokenizer) => {
sentences.forEach((sentence) => {
const source = new StringSource(sentence);
const text = source.toString();
const tokens = tokenizer.tokenizeForSentence(text);
let currentTenCount = 0;
let lastToken = null;
tokens.forEach((token, index) => {
let surface = token.surface_form;
if (surface === "、") {
const surface = token.surface_form;
if (surface === touten) {
// 名詞に囲まわれている場合は例外とする
let isSandwiched = isSandwichedMeishi({
const isSandwiched = isSandwichedMeishi({
before: tokens[index - 1],
token: token,
after: tokens[index + 1]
Expand All @@ -80,15 +96,18 @@ module.exports = function (context, options = {}) {
if (currentTenCount >= maxLen) {
const positionInSentence = source.originalIndexFromIndex(lastToken.word_position - 1);
const index = sentence.range[0] + positionInSentence;
const ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, {
index
});
const ruleError = new context.RuleError(
`一つの文で"${touten}"を${maxLen}つ以上使用しています`,
{
index
}
);
report(node, ruleError);
currentTenCount = 0;
}
});
});
});
}
}
}
};
};
55 changes: 41 additions & 14 deletions test/max-ten-test.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
const rule = require("../src/max-ten");
import TextLintTester from "textlint-tester";
import rule from "../src/max-ten";

function textIncludeTen(count) {
return (new Array(count + 1)).join("テスト文章において、") + "です";
return new Array(count + 1).join("テスト文章において、") + "です";
}
var TextLintTester = require("textlint-tester");
var tester = new TextLintTester();

const tester = new TextLintTester();
// ruleName, rule, expected[]
tester.run("max-ten", rule, {
// default max:3
Expand All @@ -17,19 +19,31 @@ tester.run("max-ten", rule, {
{
text: textIncludeTen(5 - 1),
options: {
"max": 5
max: 5
}
},
{
text: "これは、テストです。"
},
{
text: "これは、これは、これは、これは、オプションでカウントされないのでOK",
options: {
touten: ",",
kuten: "."
}
},
{
text: `これは,これは.これは,これは.`,
options: {
touten: ",",
kuten: "."
}
}

],
invalid: [
{
text: `これは、これは、これは
、d`
,
、d`,
errors: [
{
message: `一つの文で"、"を3つ以上使用しています`,
Expand All @@ -38,10 +52,23 @@ tester.run("max-ten", rule, {
}
]
},
{
text: `これは,これは,これは,これは。`,
errors: [
{
message: `一つの文で","を3つ以上使用しています`,
index: 11
}
],
options: {
touten: ",",
kuten: "."
}
},
{
text: textIncludeTen(5),
options: {
"max": 5
max: 5
},
errors: [
{
Expand All @@ -52,7 +79,7 @@ tester.run("max-ten", rule, {
{
text: `これは、長文の例ですが、columnがちゃんと計算、されてるはずです。`,
options: {
"max": 3
max: 3
},
errors: [
{
Expand All @@ -65,7 +92,7 @@ tester.run("max-ten", rule, {
{
text: "間に、Str以外の`code`Nodeが、あっても、OK",
options: {
"max": 3
max: 3
},
errors: [
{
Expand All @@ -78,7 +105,7 @@ tester.run("max-ten", rule, {
{
text: `複数のセンテンスがある場合。これでも、columnが、ちゃんと計算、されているはずです。`,
options: {
"max": 3
max: 3
},
errors: [
{
Expand All @@ -91,7 +118,7 @@ tester.run("max-ten", rule, {
{
text: `複数のセンテンスがあって、改行されている場合でも\n大丈夫です。これでも、lineとcolumnが、ちゃんと計算、されているはずです。`,
options: {
"max": 3
max: 3
},
errors: [
{
Expand All @@ -102,4 +129,4 @@ tester.run("max-ten", rule, {
]
}
]
});
});
Loading

0 comments on commit d625187

Please sign in to comment.