From 5ce5d207e3f3906722d1402212de13339074e567 Mon Sep 17 00:00:00 2001 From: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Date: Mon, 29 Jul 2024 22:15:05 -0400 Subject: [PATCH] Expand "search non-Japanese characters" to Chinese and Cantonese (#1285) * Expand "search non-Japanese characters" to Chinese and Cantonese * Add isCodePointChinese --- ext/css/settings.css | 2 +- ext/js/language/translator.js | 12 +++++++----- ext/js/language/zh/chinese.js | 8 ++++++++ ext/settings.html | 6 +++--- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/ext/css/settings.css b/ext/css/settings.css index 598673f43..2e97a6d07 100644 --- a/ext/css/settings.css +++ b/ext/css/settings.css @@ -647,7 +647,7 @@ a.heading-link-light { :root:not([data-advanced=false]) .basic-only { display: none; } -:root:not([data-language=ja]) .japanese-only { +:root:not([data-language=ja]):not([data-language=zh]):not([data-language=yue]) .jpzhyue-only { display: none; } .settings-item.settings-item-button, diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index ef9c3b541..6f641b80f 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -21,6 +21,7 @@ import {isCodePointJapanese} from './ja/japanese.js'; import {LanguageTransformer} from './language-transformer.js'; import {getAllLanguageReadingNormalizers, getAllLanguageTextProcessors} from './languages.js'; import {MultiLanguageTransformer} from './multi-language-transformer.js'; +import {isCodePointChinese} from './zh/chinese.js'; /** * Class which finds term and kanji dictionary entries for text. @@ -135,7 +136,7 @@ export class Translator { */ async findKanji(text, options) { if (options.removeNonJapaneseCharacters) { - text = this._getJapaneseOnlyText(text); + text = this._getJapaneseChineseOnlyText(text); } const {enabledDictionaryMap} = options; /** @type {Set} */ @@ -219,8 +220,8 @@ export class Translator { */ async _findTermsInternal(text, options, tagAggregator) { const {removeNonJapaneseCharacters, enabledDictionaryMap} = options; - if (removeNonJapaneseCharacters && options.language === 'ja') { - text = this._getJapaneseOnlyText(text); + if (removeNonJapaneseCharacters && (['ja', 'zh', 'yue'].includes(options.language))) { + text = this._getJapaneseChineseOnlyText(text); } if (text.length === 0) { return {dictionaryEntries: [], originalTextLength: 0}; @@ -636,10 +637,11 @@ export class Translator { * @param {string} text * @returns {string} */ - _getJapaneseOnlyText(text) { + _getJapaneseChineseOnlyText(text) { let length = 0; for (const c of text) { - if (!isCodePointJapanese(/** @type {number} */ (c.codePointAt(0)))) { + const codePoint = /** @type {number} */ (c.codePointAt(0)); + if (!isCodePointJapanese(codePoint) && !isCodePointChinese(codePoint)) { return text.substring(0, length); } length += c.length; diff --git a/ext/js/language/zh/chinese.js b/ext/js/language/zh/chinese.js index a17a82d1a..8c5dd206f 100644 --- a/ext/js/language/zh/chinese.js +++ b/ext/js/language/zh/chinese.js @@ -61,6 +61,14 @@ export function isStringPartiallyChinese(str) { return false; } +/** + * @param {number} codePoint + * @returns {boolean} + */ +export function isCodePointChinese(codePoint) { + return isCodePointInRanges(codePoint, CHINESE_RANGES); +} + /** @type {import('language').ReadingNormalizer} */ export function normalizePinyin(str) { return str.normalize('NFC').toLowerCase().replace(/[\s・:]|\/\//g, ''); diff --git a/ext/settings.html b/ext/settings.html index d552a1d2d..83f4d4874 100644 --- a/ext/settings.html +++ b/ext/settings.html @@ -413,10 +413,10 @@

Yomitan Settings

-
+
-
Search text with non-Japanese characters
-
Only applies when language is set to Japanese.
+
Search text with non-Japanese, Chinese, or Cantonese characters
+
Only applies when language is set to Japanese, Chinese, or Cantonese.