From 2e9ea19207a7410f929bb908759d48cb2340f29c Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 25 Feb 2024 11:20:44 -0500 Subject: [PATCH] "isJapanese" check move (#730) * Move isStringPartiallyJapanese out of ClipboardMonitor * Create isStringPartiallyJapanese function * Add textMayBeTranslatable * Rename API function * Rename internal function * Add helper * Update translatable check * Pass language to TextScanner * Pass language explicitly * Use textMayBeTranslatable * No redundant translatable check * Update eslint * Remove double newline * Collapse * Rename --- .eslintrc.json | 1 + ext/js/app/frontend.js | 1 + ext/js/background/backend.js | 18 +++++++---- ext/js/comm/api.js | 9 +++--- ext/js/comm/clipboard-monitor.js | 3 +- ext/js/display/display-generator.js | 32 ++++++++++++------- ext/js/display/display.js | 2 ++ ext/js/display/query-parser.js | 3 +- .../sandbox/structured-content-generator.js | 9 ++++-- ext/js/display/search-display-controller.js | 21 ++++++++++-- ext/js/language/language-descriptors.js | 2 ++ ext/js/language/languages.js | 11 +++++++ ext/js/language/text-scanner.js | 12 +++++-- ext/js/language/text-utilities.js | 29 +++++++++++++++++ types/ext/api.d.ts | 3 +- types/ext/application.d.ts | 2 +- types/ext/display.d.ts | 1 + types/ext/language-descriptors.d.ts | 9 ++++++ 18 files changed, 132 insertions(+), 36 deletions(-) create mode 100644 ext/js/language/text-utilities.js diff --git a/.eslintrc.json b/.eslintrc.json index f2faff591d..63cc6b7e4b 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -568,6 +568,7 @@ "ext/js/display/sandbox/structured-content-generator.js", "ext/js/dom/sandbox/css-style-applier.js", "ext/js/language/ja/japanese.js", + "ext/js/language/text-utilities.js", "ext/js/templates/sandbox/anki-template-renderer-content-manager.js", "ext/js/templates/sandbox/anki-template-renderer.js", "ext/js/templates/sandbox/template-renderer-frame-api.js", diff --git a/ext/js/app/frontend.js b/ext/js/app/frontend.js index 0b7393a05e..4c0faef10c 100644 --- a/ext/js/app/frontend.js +++ b/ext/js/app/frontend.js @@ -474,6 +474,7 @@ export class Frontend { await this._updatePopup(); const preventMiddleMouse = this._getPreventMiddleMouseValueForPageType(scanningOptions.preventMiddleMouse); + this._textScanner.language = options.general.language; this._textScanner.setOptions({ inputs: scanningOptions.inputs, deepContentScan: scanningOptions.deepDomScan, diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 79023ac973..6340d02113 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -34,8 +34,8 @@ import {arrayBufferToBase64} from '../data/sandbox/array-buffer-util.js'; import {DictionaryDatabase} from '../dictionary/dictionary-database.js'; import {Environment} from '../extension/environment.js'; import {ObjectPropertyAccessor} from '../general/object-property-accessor.js'; -import {distributeFuriganaInflected, isCodePointJapanese, isStringPartiallyJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/ja/japanese.js'; -import {getLanguageSummaries} from '../language/languages.js'; +import {distributeFuriganaInflected, isCodePointJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/ja/japanese.js'; +import {getLanguageSummaries, isTextLookupWorthy} from '../language/languages.js'; import {Translator} from '../language/translator.js'; import {AudioDownloader} from '../media/audio-downloader.js'; import {getFileExtensionFromAudioMediaType, getFileExtensionFromImageMediaType} from '../media/media-util.js'; @@ -175,7 +175,7 @@ export class Backend { ['isTabSearchPopup', this._onApiIsTabSearchPopup.bind(this)], ['triggerDatabaseUpdated', this._onApiTriggerDatabaseUpdated.bind(this)], ['testMecab', this._onApiTestMecab.bind(this)], - ['textHasJapaneseCharacters', this._onApiTextHasJapaneseCharacters.bind(this)], + ['isTextLookupWorthy', this._onApiIsTextLookupWorthy.bind(this)], ['getTermFrequencies', this._onApiGetTermFrequencies.bind(this)], ['findAnkiNotes', this._onApiFindAnkiNotes.bind(this)], ['openCrossFramePort', this._onApiOpenCrossFramePort.bind(this)], @@ -310,7 +310,11 @@ export class Backend { * @param {import('clipboard-monitor').EventArgument<'change'>} details */ async _onClipboardTextChange({text}) { - const {clipboard: {maximumSearchLength}} = this._getProfileOptions({current: true}, false); + const { + general: {language}, + clipboard: {maximumSearchLength} + } = this._getProfileOptions({current: true}, false); + if (!isTextLookupWorthy(text, language)) { return; } if (text.length > maximumSearchLength) { text = text.substring(0, maximumSearchLength); } @@ -839,9 +843,9 @@ export class Backend { return true; } - /** @type {import('api').ApiHandler<'textHasJapaneseCharacters'>} */ - _onApiTextHasJapaneseCharacters({text}) { - return isStringPartiallyJapanese(text); + /** @type {import('api').ApiHandler<'isTextLookupWorthy'>} */ + _onApiIsTextLookupWorthy({text, language}) { + return isTextLookupWorthy(text, language); } /** @type {import('api').ApiHandler<'getTermFrequencies'>} */ diff --git a/ext/js/comm/api.js b/ext/js/comm/api.js index 30fcfc2916..e8db784661 100644 --- a/ext/js/comm/api.js +++ b/ext/js/comm/api.js @@ -321,11 +321,12 @@ export class API { } /** - * @param {import('api').ApiParam<'textHasJapaneseCharacters', 'text'>} text - * @returns {Promise>} + * @param {import('api').ApiParam<'isTextLookupWorthy', 'text'>} text + * @param {import('api').ApiParam<'isTextLookupWorthy', 'language'>} language + * @returns {Promise>} */ - textHasJapaneseCharacters(text) { - return this._invoke('textHasJapaneseCharacters', {text}); + isTextLookupWorthy(text, language) { + return this._invoke('isTextLookupWorthy', {text, language}); } /** diff --git a/ext/js/comm/clipboard-monitor.js b/ext/js/comm/clipboard-monitor.js index d101b46743..067ecb6715 100644 --- a/ext/js/comm/clipboard-monitor.js +++ b/ext/js/comm/clipboard-monitor.js @@ -17,7 +17,6 @@ */ import {EventDispatcher} from '../core/event-dispatcher.js'; -import {isStringPartiallyJapanese} from '../language/ja/japanese.js'; /** * @augments EventDispatcher @@ -71,7 +70,7 @@ export class ClipboardMonitor extends EventDispatcher { text !== this._previousText ) { this._previousText = text; - if (canChange && isStringPartiallyJapanese(text)) { + if (canChange) { this.trigger('change', {text}); } } diff --git a/ext/js/display/display-generator.js b/ext/js/display/display-generator.js index 22912e9fb8..0b3236e96a 100644 --- a/ext/js/display/display-generator.js +++ b/ext/js/display/display-generator.js @@ -20,7 +20,8 @@ import {ExtensionError} from '../core/extension-error.js'; import {isObject} from '../core/utilities.js'; import {getDisambiguations, getGroupedPronunciations, getTermFrequency, groupKanjiFrequencies, groupTermFrequencies, groupTermTags, isNonNounVerbOrAdjective} from '../dictionary/dictionary-data-util.js'; import {HtmlTemplateCollection} from '../dom/html-template-collection.js'; -import {distributeFurigana, getKanaMorae, getPitchCategory, isCodePointKanji, isStringPartiallyJapanese} from '../language/ja/japanese.js'; +import {distributeFurigana, getKanaMorae, getPitchCategory, isCodePointKanji} from '../language/ja/japanese.js'; +import {getLanguageFromText} from '../language/text-utilities.js'; import {createPronunciationDownstepPosition, createPronunciationGraph, createPronunciationText} from './sandbox/pronunciation-generator.js'; import {StructuredContentGenerator} from './sandbox/structured-content-generator.js'; @@ -991,12 +992,7 @@ export class DisplayGenerator { * @param {string} [language] */ _setTextContent(node, value, language) { - if (typeof language === 'string') { - node.lang = language; - } else if (isStringPartiallyJapanese(value)) { - node.lang = 'ja'; - } - + this._setElementLanguage(node, language, value); node.textContent = value; } @@ -1008,11 +1004,7 @@ export class DisplayGenerator { _setMultilineTextContent(node, value, language) { // This can't just call _setTextContent because the lack of
elements will // cause the text to not copy correctly. - if (typeof language === 'string') { - node.lang = language; - } else if (isStringPartiallyJapanese(value)) { - node.lang = 'ja'; - } + this._setElementLanguage(node, language, value); let start = 0; while (true) { @@ -1028,6 +1020,22 @@ export class DisplayGenerator { } } + /** + * @param {HTMLElement} element + * @param {string|undefined} language + * @param {string} content + */ + _setElementLanguage(element, language, content) { + if (typeof language === 'string') { + element.lang = language; + } else { + const language2 = getLanguageFromText(content); + if (language2 !== null) { + element.lang = language2; + } + } + } + /** * @param {string} reading * @param {import('dictionary').TermPronunciation[]} termPronunciations diff --git a/ext/js/display/display.js b/ext/js/display/display.js index f6efb5ac56..80f5e9ae80 100644 --- a/ext/js/display/display.js +++ b/ext/js/display/display.js @@ -425,6 +425,7 @@ export class Display extends EventDispatcher { readingMode: options.parsing.readingMode, useInternalParser: options.parsing.enableScanningParser, useMecabParser: options.parsing.enableMecabParser, + language: options.general.language, scanning: { inputs: scanningOptions.inputs, deepContentScan: scanningOptions.deepDomScan, @@ -1834,6 +1835,7 @@ export class Display extends EventDispatcher { } const {scanning: scanningOptions, sentenceParsing: sentenceParsingOptions} = options; + this._contentTextScanner.language = options.general.language; this._contentTextScanner.setOptions({ inputs: [{ include: 'mouse0', diff --git a/ext/js/display/query-parser.js b/ext/js/display/query-parser.js index d27b9394e4..f6c26ce787 100644 --- a/ext/js/display/query-parser.js +++ b/ext/js/display/query-parser.js @@ -92,7 +92,7 @@ export class QueryParser extends EventDispatcher { /** * @param {import('display').QueryParserOptions} display */ - setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, scanning}) { + setOptions({selectedParser, termSpacing, readingMode, useInternalParser, useMecabParser, language, scanning}) { let selectedParserChanged = false; if (selectedParser === null || typeof selectedParser === 'string') { selectedParserChanged = (this._selectedParser !== selectedParser); @@ -115,6 +115,7 @@ export class QueryParser extends EventDispatcher { if (typeof scanLength === 'number') { this._scanLength = scanLength; } + this._textScanner.language = language; this._textScanner.setOptions(scanning); } this._textScanner.setEnabled(true); diff --git a/ext/js/display/sandbox/structured-content-generator.js b/ext/js/display/sandbox/structured-content-generator.js index 1dfde39bef..90a471580b 100644 --- a/ext/js/display/sandbox/structured-content-generator.js +++ b/ext/js/display/sandbox/structured-content-generator.js @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -import {isStringPartiallyJapanese} from '../../language/ja/japanese.js'; +import {getLanguageFromText} from '../../language/text-utilities.js'; export class StructuredContentGenerator { /** @@ -163,8 +163,11 @@ export class StructuredContentGenerator { if (typeof content === 'string') { if (content.length > 0) { container.appendChild(this._createTextNode(content)); - if (language === null && isStringPartiallyJapanese(content)) { - container.lang = 'ja'; + if (language === null) { + const language2 = getLanguageFromText(content); + if (language2 !== null) { + container.lang = language2; + } } } return; diff --git a/ext/js/display/search-display-controller.js b/ext/js/display/search-display-controller.js index e23d5d5036..00f5efc625 100644 --- a/ext/js/display/search-display-controller.js +++ b/ext/js/display/search-display-controller.js @@ -103,7 +103,7 @@ export class SearchDisplayController { this._searchBackButton.addEventListener('click', this._onSearchBackButtonClick.bind(this), false); this._wanakanaEnableCheckbox.addEventListener('change', this._onWanakanaEnableChange.bind(this)); window.addEventListener('copy', this._onCopy.bind(this)); - this._clipboardMonitor.on('change', this._onExternalSearchUpdate.bind(this)); + this._clipboardMonitor.on('change', this._onClipboardMonitorChange.bind(this)); this._clipboardMonitorEnableCheckbox.addEventListener('change', this._onClipboardMonitorEnableChange.bind(this)); this._display.hotkeyHandler.on('keydownNonHotkey', this._onKeyDown.bind(this)); @@ -271,9 +271,26 @@ export class SearchDisplayController { } /** @type {import('application').ApiHandler<'searchDisplayControllerUpdateSearchQuery'>} */ - _onExternalSearchUpdate({text, animate = true}) { + _onExternalSearchUpdate({text, animate}) { + void this._updateSearchFromClipboard(text, animate, false); + } + + /** + * @param {import('clipboard-monitor').Events['change']} event + */ + _onClipboardMonitorChange({text}) { + void this._updateSearchFromClipboard(text, true, true); + } + + /** + * @param {string} text + * @param {boolean} animate + * @param {boolean} checkText + */ + async _updateSearchFromClipboard(text, animate, checkText) { const options = this._display.getOptions(); if (options === null) { return; } + if (checkText && !await this._display.application.api.isTextLookupWorthy(text, options.general.language)) { return; } const {clipboard: {autoSearchContent, maximumSearchLength}} = options; if (text.length > maximumSearchLength) { text = text.substring(0, maximumSearchLength); diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index beb1417e14..d78a96e5e4 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -18,6 +18,7 @@ import {removeArabicScriptDiacritics} from './ar/arabic-text-preprocessors.js'; import {eszettPreprocessor} from './de/german-text-preprocessors.js'; import {collapseEmphaticSequences, convertAlphabeticCharacters, convertHalfWidthCharacters, convertHiraganaToKatakana, convertNumericCharacters} from './ja/japanese-text-preprocessors.js'; +import {isStringPartiallyJapanese} from './ja/japanese.js'; import {removeLatinDiacritics} from './la/latin-text-preprocessors.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {capitalizeFirstLetter, decapitalize} from './text-preprocessors.js'; @@ -114,6 +115,7 @@ const languageDescriptors = [ iso: 'ja', name: 'Japanese', exampleText: '読め', + isTextLookupWorthy: isStringPartiallyJapanese, textPreprocessors: { convertHalfWidthCharacters, convertNumericCharacters, diff --git a/ext/js/language/languages.js b/ext/js/language/languages.js index f964dfec4b..fd58477d7f 100755 --- a/ext/js/language/languages.js +++ b/ext/js/language/languages.js @@ -47,3 +47,14 @@ export function getAllLanguageTextPreprocessors() { } return results; } + +/** + * @param {string} text + * @param {string} language + * @returns {boolean} + */ +export function isTextLookupWorthy(text, language) { + const descriptor = languageDescriptorMap.get(language); + if (typeof descriptor === 'undefined') { return false; } + return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text); +} diff --git a/ext/js/language/text-scanner.js b/ext/js/language/text-scanner.js index 643330934f..ad5ba12b82 100644 --- a/ext/js/language/text-scanner.js +++ b/ext/js/language/text-scanner.js @@ -70,6 +70,8 @@ export class TextScanner extends EventDispatcher { this._includeSelector = null; /** @type {?string} */ this._excludeSelector = null; + /** @type {?string} */ + this._language = null; /** @type {?import('text-scanner').InputInfo} */ this._inputInfoCurrent = null; @@ -188,6 +190,10 @@ export class TextScanner extends EventDispatcher { this._excludeSelector = value; } + /** @type {?string} */ + get language() { return this._language; } + set language(value) { this._language = value; } + /** */ prepare() { this._isPrepared = true; @@ -449,7 +455,7 @@ export class TextScanner extends EventDispatcher { const result = await this._findDictionaryEntries(textSource, searchTerms, searchKanji, optionsContext); if (result !== null) { ({dictionaryEntries, sentence, type} = result); - } else if (textSource !== null && textSource instanceof TextSourceElement && await this._hasJapanese(textSource.fullContent)) { + } else if (textSource !== null && textSource instanceof TextSourceElement && await this._isTextLookupWorthy(textSource.fullContent)) { dictionaryEntries = []; sentence = {text: '', offset: 0}; } @@ -1549,9 +1555,9 @@ export class TextScanner extends EventDispatcher { * @param {string} text * @returns {Promise} */ - async _hasJapanese(text) { + async _isTextLookupWorthy(text) { try { - return await this._api.textHasJapaneseCharacters(text); + return this._language !== null && await this._api.isTextLookupWorthy(text, this._language); } catch (e) { return false; } diff --git a/ext/js/language/text-utilities.js b/ext/js/language/text-utilities.js new file mode 100644 index 0000000000..ca8958aa38 --- /dev/null +++ b/ext/js/language/text-utilities.js @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {isStringPartiallyJapanese} from './ja/japanese.js'; + +/** + * Returns the language that the string might be by using some heuristic checks. + * Values returned are ISO codes. `null` is returned if no language can be determined. + * @param {string} text + * @returns {?string} + */ +export function getLanguageFromText(text) { + if (isStringPartiallyJapanese(text)) { return 'ja'; } + return null; +} diff --git a/types/ext/api.d.ts b/types/ext/api.d.ts index 16321722a3..9a922fb051 100644 --- a/types/ext/api.d.ts +++ b/types/ext/api.d.ts @@ -344,9 +344,10 @@ type ApiSurface = { params: void; return: true; }; - textHasJapaneseCharacters: { + isTextLookupWorthy: { params: { text: string; + language: string; }; return: boolean; }; diff --git a/types/ext/application.d.ts b/types/ext/application.d.ts index 96f76714b9..8d80894d8e 100644 --- a/types/ext/application.d.ts +++ b/types/ext/application.d.ts @@ -41,7 +41,7 @@ export type ApiSurface = { searchDisplayControllerUpdateSearchQuery: { params: { text: string; - animate?: boolean; + animate: boolean; }; return: void; }; diff --git a/types/ext/display.d.ts b/types/ext/display.d.ts index 61e1aac5be..7f4d89661c 100644 --- a/types/ext/display.d.ts +++ b/types/ext/display.d.ts @@ -129,6 +129,7 @@ export type QueryParserOptions = { readingMode: Settings.ParsingReadingMode; useInternalParser: boolean; useMecabParser: boolean; + language: string; scanning: TextScannerTypes.Options; }; diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 319a3ca5c3..ca457721b9 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -18,10 +18,19 @@ import type {TextPreprocessor, BidirectionalConversionPreprocessor} from './language'; import type {SafeAny} from './core'; +export type IsTextLookupWorthyFunction = (text: string) => boolean; + type LanguageDescriptor = { iso: TIso; name: string; exampleText: string; + /** + * An optional function which returns whether or not a given string may be translatable. + * This is used as a filter for several situations, such as whether the clipboard monitor + * window should activate when text is copied to the clipboard. + * If no value is provided, `true` is assumed for all inputs. + */ + isTextLookupWorthy?: IsTextLookupWorthyFunction; textPreprocessors: TTextPreprocessorDescriptor; };