feat(options): add 読点 and 句点 as options

textlint-ja · Apr 8, 2021 · d625187 · d625187
1 parent 2bc5335
commit d625187
Show file tree

Hide file tree

Showing 4 changed files with 572 additions and 46 deletions.
diff --git a/package.json b/package.json
@@ -30,9 +30,13 @@
     "build": "textlint-scripts build",
     "watch": "textlint-scripts build --watch",
     "prepublish": "npm run --if-present build",
-    "test": "textlint-scripts test"
+    "test": "textlint-scripts test",
+    "prettier": "prettier --write \"**/*.{js,jsx,ts,tsx,css}\"",
+    "prepare": "git config --local core.hooksPath .githook"
   },
   "devDependencies": {
+    "lint-staged": "^10.5.4",
+    "prettier": "^2.2.1",
     "textlint-scripts": "^3.0.0"
   },
   "dependencies": {
@@ -41,5 +45,16 @@
     "structured-source": "^3.0.2",
     "textlint-rule-helper": "^2.0.0",
     "textlint-util-to-string": "^3.1.1"
+  },
+  "prettier": {
+    "singleQuote": false,
+    "printWidth": 120,
+    "tabWidth": 4,
+    "trailingComma": "none"
+  },
+  "lint-staged": {
+    "*.{js,jsx,ts,tsx,css}": [
+      "prettier --write"
+    ]
   }
 }
diff --git a/src/max-ten.js b/src/max-ten.js
@@ -1,20 +1,25 @@
 // LICENSE : MIT
 "use strict";
-import { RuleHelper } from "textlint-rule-helper"
+import { RuleHelper } from "textlint-rule-helper";
 import { getTokenizer } from "kuromojin";
 import { splitAST, Syntax as SentenceSyntax } from "sentence-splitter";
 import { StringSource } from "textlint-util-to-string";
+import { SeparatorParser } from "sentence-splitter/lib/parser/SeparatorParser";
 
 const defaultOptions = {
-    max: 3, // 1文に利用できる最大の、の数
-    strict: false // 例外ルールを適応するかどうか
+    // 1文に利用できる最大の、の数
+    max: 3,
+    // 例外ルールを適応するかどうか,
+    strict: false,
+    // 読点として扱う文字
+    // https://ja.wikipedia.org/wiki/%E8%AA%AD%E7%82%B9
+    touten: "、",
+    // 句点として扱う文字
+    // https://ja.wikipedia.org/wiki/%E5%8F%A5%E7%82%B9
+    kuten: "。"
 };
 
-function isSandwichedMeishi({
-                                before,
-                                token,
-                                after
-                            }) {
+function isSandwichedMeishi({ before, token, after }) {
     if (before === undefined || after === undefined || token === undefined) {
         return false;
     }
@@ -23,20 +28,31 @@ function isSandwichedMeishi({
 
 /**
  * @param {RuleContext} context
- * @param {object} [options]
+ * @param {typeof defaultOptions} [options]
  */
 module.exports = function (context, options = {}) {
-    const maxLen = options.max || defaultOptions.max;
-    const isStrict = options.strict || defaultOptions.strict;
+    const maxLen = options.max ?? defaultOptions.max;
+    const isStrict = options.strict ?? defaultOptions.strict;
+    const touten = options.touten ?? defaultOptions.touten;
+    const kuten = options.kuten ?? defaultOptions.kuten;
     const helper = new RuleHelper(context);
     const { Syntax, RuleError, report, getSource } = context;
     return {
         [Syntax.Paragraph](node) {
             if (helper.isChildNode(node, [Syntax.BlockQuote])) {
                 return;
             }
-            const resultNode = splitAST(node);
-            const sentences = resultNode.children.filter(childNode => childNode.type === SentenceSyntax.Sentence);
+            const resultNode = splitAST(node, {
+                SeparatorParser: {
+                    separatorCharacters: [
+                        "?", // question mark
+                        "!", //  exclamation mark
+                        "？", // (ja) zenkaku question mark
+                        "！" // (ja) zenkaku exclamation mark
+                    ].concat(kuten)
+                }
+            });
+            const sentences = resultNode.children.filter((childNode) => childNode.type === SentenceSyntax.Sentence);
             /*
              <p>
              <str><code><img><str>
@@ -49,18 +65,18 @@ module.exports = function (context, options = {}) {
              2. sentence to tokens
              3. check tokens
              */
-            return getTokenizer().then(tokenizer => {
-                sentences.forEach(sentence => {
+            return getTokenizer().then((tokenizer) => {
+                sentences.forEach((sentence) => {
                     const source = new StringSource(sentence);
                     const text = source.toString();
                     const tokens = tokenizer.tokenizeForSentence(text);
                     let currentTenCount = 0;
                     let lastToken = null;
                     tokens.forEach((token, index) => {
-                        let surface = token.surface_form;
-                        if (surface === "、") {
+                        const surface = token.surface_form;
+                        if (surface === touten) {
                             // 名詞に囲まわれている場合は例外とする
-                            let isSandwiched = isSandwichedMeishi({
+                            const isSandwiched = isSandwichedMeishi({
                                 before: tokens[index - 1],
                                 token: token,
                                 after: tokens[index + 1]
@@ -80,15 +96,18 @@ module.exports = function (context, options = {}) {
                         if (currentTenCount >= maxLen) {
                             const positionInSentence = source.originalIndexFromIndex(lastToken.word_position - 1);
                             const index = sentence.range[0] + positionInSentence;
-                            const ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, {
-                                index
-                            });
+                            const ruleError = new context.RuleError(
+                                `一つの文で"${touten}"を${maxLen}つ以上使用しています`,
+                                {
+                                    index
+                                }
+                            );
                             report(node, ruleError);
                             currentTenCount = 0;
                         }
                     });
                 });
             });
         }
-    }
-}
+    };
+};
diff --git a/test/max-ten-test.js b/test/max-ten-test.js
@@ -1,9 +1,11 @@
-const rule = require("../src/max-ten");
+import TextLintTester from "textlint-tester";
+import rule from "../src/max-ten";
+
 function textIncludeTen(count) {
-    return (new Array(count + 1)).join("テスト文章において、") + "です";
+    return new Array(count + 1).join("テスト文章において、") + "です";
 }
-var TextLintTester = require("textlint-tester");
-var tester = new TextLintTester();
+
+const tester = new TextLintTester();
 // ruleName, rule, expected[]
 tester.run("max-ten", rule, {
     // default max:3
@@ -17,19 +19,31 @@ tester.run("max-ten", rule, {
         {
             text: textIncludeTen(5 - 1),
             options: {
-                "max": 5
+                max: 5
             }
         },
         {
             text: "これは、テストです。"
+        },
+        {
+            text: "これは、これは、これは、これは、オプションでカウントされないのでOK",
+            options: {
+                touten: "，",
+                kuten: "．"
+            }
+        },
+        {
+            text: `これは，これは．これは，これは．`,
+            options: {
+                touten: "，",
+                kuten: "．"
+            }
         }
-
     ],
     invalid: [
         {
             text: `これは、これは、これは
-、d`
-            ,
+、d`,
             errors: [
                 {
                     message: `一つの文で"、"を3つ以上使用しています`,
@@ -38,10 +52,23 @@ tester.run("max-ten", rule, {
                 }
             ]
         },
+        {
+            text: `これは，これは，これは，これは。`,
+            errors: [
+                {
+                    message: `一つの文で"，"を3つ以上使用しています`,
+                    index: 11
+                }
+            ],
+            options: {
+                touten: "，",
+                kuten: "．"
+            }
+        },
         {
             text: textIncludeTen(5),
             options: {
-                "max": 5
+                max: 5
             },
             errors: [
                 {
@@ -52,7 +79,7 @@ tester.run("max-ten", rule, {
         {
             text: `これは、長文の例ですが、columnがちゃんと計算、されてるはずです。`,
             options: {
-                "max": 3
+                max: 3
             },
             errors: [
                 {
@@ -65,7 +92,7 @@ tester.run("max-ten", rule, {
         {
             text: "間に、Str以外の`code`Nodeが、あっても、OK",
             options: {
-                "max": 3
+                max: 3
             },
             errors: [
                 {
@@ -78,7 +105,7 @@ tester.run("max-ten", rule, {
         {
             text: `複数のセンテンスがある場合。これでも、columnが、ちゃんと計算、されているはずです。`,
             options: {
-                "max": 3
+                max: 3
             },
             errors: [
                 {
@@ -91,7 +118,7 @@ tester.run("max-ten", rule, {
         {
             text: `複数のセンテンスがあって、改行されている場合でも\n大丈夫です。これでも、lineとcolumnが、ちゃんと計算、されているはずです。`,
             options: {
-                "max": 3
+                max: 3
             },
             errors: [
                 {
@@ -102,4 +129,4 @@ tester.run("max-ten", rule, {
             ]
         }
     ]
-});
+});