Skip to content

Commit

Permalink
Expand "search non-Japanese characters" to Chinese and Cantonese (#1285)
Browse files Browse the repository at this point in the history
* Expand "search non-Japanese characters" to Chinese and Cantonese

* Add isCodePointChinese
  • Loading branch information
Kuuuube authored Jul 30, 2024
1 parent 7dbced4 commit 5ce5d20
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 9 deletions.
2 changes: 1 addition & 1 deletion ext/css/settings.css
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ a.heading-link-light {
:root:not([data-advanced=false]) .basic-only {
display: none;
}
:root:not([data-language=ja]) .japanese-only {
:root:not([data-language=ja]):not([data-language=zh]):not([data-language=yue]) .jpzhyue-only {
display: none;
}
.settings-item.settings-item-button,
Expand Down
12 changes: 7 additions & 5 deletions ext/js/language/translator.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {isCodePointJapanese} from './ja/japanese.js';
import {LanguageTransformer} from './language-transformer.js';
import {getAllLanguageReadingNormalizers, getAllLanguageTextProcessors} from './languages.js';
import {MultiLanguageTransformer} from './multi-language-transformer.js';
import {isCodePointChinese} from './zh/chinese.js';

/**
* Class which finds term and kanji dictionary entries for text.
Expand Down Expand Up @@ -135,7 +136,7 @@ export class Translator {
*/
async findKanji(text, options) {
if (options.removeNonJapaneseCharacters) {
text = this._getJapaneseOnlyText(text);
text = this._getJapaneseChineseOnlyText(text);
}
const {enabledDictionaryMap} = options;
/** @type {Set<string>} */
Expand Down Expand Up @@ -219,8 +220,8 @@ export class Translator {
*/
async _findTermsInternal(text, options, tagAggregator) {
const {removeNonJapaneseCharacters, enabledDictionaryMap} = options;
if (removeNonJapaneseCharacters && options.language === 'ja') {
text = this._getJapaneseOnlyText(text);
if (removeNonJapaneseCharacters && (['ja', 'zh', 'yue'].includes(options.language))) {
text = this._getJapaneseChineseOnlyText(text);
}
if (text.length === 0) {
return {dictionaryEntries: [], originalTextLength: 0};
Expand Down Expand Up @@ -636,10 +637,11 @@ export class Translator {
* @param {string} text
* @returns {string}
*/
_getJapaneseOnlyText(text) {
_getJapaneseChineseOnlyText(text) {
let length = 0;
for (const c of text) {
if (!isCodePointJapanese(/** @type {number} */ (c.codePointAt(0)))) {
const codePoint = /** @type {number} */ (c.codePointAt(0));
if (!isCodePointJapanese(codePoint) && !isCodePointChinese(codePoint)) {
return text.substring(0, length);
}
length += c.length;
Expand Down
8 changes: 8 additions & 0 deletions ext/js/language/zh/chinese.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ export function isStringPartiallyChinese(str) {
return false;
}

/**
* @param {number} codePoint
* @returns {boolean}
*/
export function isCodePointChinese(codePoint) {
return isCodePointInRanges(codePoint, CHINESE_RANGES);
}

/** @type {import('language').ReadingNormalizer} */
export function normalizePinyin(str) {
return str.normalize('NFC').toLowerCase().replace(/[\s・:]|\/\//g, '');
Expand Down
6 changes: 3 additions & 3 deletions ext/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,10 @@ <h1>Yomitan Settings</h1>
<label class="toggle"><input type="checkbox" data-setting="scanning.selectText"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
</div>
</div></div>
<div class="settings-item japanese-only"><div class="settings-item-inner">
<div class="settings-item jpzhyue-only"><div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">Search text with non-Japanese characters</div>
<div class="settings-item-description">Only applies when language is set to Japanese.</div>
<div class="settings-item-label">Search text with non-Japanese, Chinese, or Cantonese characters</div>
<div class="settings-item-description">Only applies when language is set to Japanese, Chinese, or Cantonese.</div>
</div>
<div class="settings-item-right">
<label class="toggle"><input type="checkbox" data-setting="scanning.alphanumeric"><span class="toggle-body"><span class="toggle-track"></span><span class="toggle-knob"></span></span></label>
Expand Down

0 comments on commit 5ce5d20

Please sign in to comment.