From ddf5002782cf8cf8d1460a18c3de6705a3a5eef0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mert=20Can=20Alt=C4=B1n?= Date: Sat, 18 Feb 2023 21:10:16 +0300 Subject: [PATCH] test_runner: parse non-ascii character correctly PR-URL: https://github.com/nodejs/node/pull/45736 Reviewed-By: Yagiz Nizipli Reviewed-By: Moshe Atlow --- lib/internal/test_runner/tap_lexer.js | 38 +++++++++++++++++--------- lib/internal/util/inspect.js | 24 ++++++++-------- test/parallel/test-runner-tap-lexer.js | 36 ++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 24 deletions(-) diff --git a/lib/internal/test_runner/tap_lexer.js b/lib/internal/test_runner/tap_lexer.js index 79040e5f7bf99a..a27e6ab35ea1d1 100644 --- a/lib/internal/test_runner/tap_lexer.js +++ b/lib/internal/test_runner/tap_lexer.js @@ -5,13 +5,15 @@ const { ArrayPrototypePush, MathMax, SafeSet, - StringPrototypeIncludes, + StringPrototypeCodePointAt, StringPrototypeTrim, } = primordials; const { codes: { ERR_TAP_LEXER_ERROR }, } = require('internal/errors'); +const { isZeroWidthCodePoint } = require('internal/util/inspect'); + const kEOL = ''; const kEOF = ''; @@ -474,18 +476,28 @@ class TapLexer { } #isLiteralSymbol(char) { - return ( - (char >= 'a' && char <= 'z') || - (char >= 'A' && char <= 'Z') || - this.#isSpecialCharacterSymbol(char) - ); - } - - #isSpecialCharacterSymbol(char) { - // We deliberately do not include "# \ + -"" in this list - // these are used for comments/reasons explanations, pragma and escape characters - // whitespace is not included because it is handled separately - return StringPrototypeIncludes('!"$%&\'()*,./:;<=>?@[]^_`{|}~', char); + if (typeof char !== 'string') return false; + const charCode = StringPrototypeCodePointAt(char); + + if (isZeroWidthCodePoint(charCode)) return false; + if (this.#isWhitespaceSymbol(char)) return false; + const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long + // Allow all non-latin characters. + if (charCode > MAX_ASCII_CHAR_CODE) return true; + const ZERO = 48; // 0 + const NINE = 58; // 9 + // Disallow numeric values + if (charCode >= ZERO && char <= NINE) return false; + + // Disallow characters with special meaning in TAP + const HASH = 35; // # + const BACKSLASH = 92; // \ + const PLUS = 43; // + + const DASH = 45; // - + + // Disallow characters with special meaning in TAP + return charCode !== HASH && charCode !== BACKSLASH && + charCode !== PLUS && charCode !== DASH; } #isWhitespaceSymbol(char) { diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js index 61a6093ae293e0..eb9c74e9146892 100644 --- a/lib/internal/util/inspect.js +++ b/lib/internal/util/inspect.js @@ -2295,6 +2295,18 @@ function formatWithOptionsInternal(inspectOptions, args) { return str; } +function isZeroWidthCodePoint(code) { + return code <= 0x1F || // C0 control codes + (code >= 0x7F && code <= 0x9F) || // C1 control codes + (code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks + (code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters + // Combining Diacritical Marks for Symbols + (code >= 0x20D0 && code <= 0x20FF) || + (code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors + (code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks + (code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors +} + if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) @@ -2384,17 +2396,6 @@ if (internalBinding('config').hasIntl) { ); }; - const isZeroWidthCodePoint = (code) => { - return code <= 0x1F || // C0 control codes - (code >= 0x7F && code <= 0x9F) || // C1 control codes - (code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks - (code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters - // Combining Diacritical Marks for Symbols - (code >= 0x20D0 && code <= 0x20FF) || - (code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors - (code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks - (code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors - }; } /** @@ -2414,4 +2415,5 @@ module.exports = { formatWithOptions, getStringWidth, stripVTControlCharacters, + isZeroWidthCodePoint, }; diff --git a/test/parallel/test-runner-tap-lexer.js b/test/parallel/test-runner-tap-lexer.js index 7da0f7c617c121..afc6eea1d2c08b 100644 --- a/test/parallel/test-runner-tap-lexer.js +++ b/test/parallel/test-runner-tap-lexer.js @@ -444,3 +444,39 @@ ok 1 assert.strictEqual(tokens[index].value, token.value); }); } + +// Test isLiteralSymbol method +{ + const tokens = TAPLexer('ok 1 - description أتث讲演講👍🔥'); + + [ + { kind: TokenKind.TAP_TEST_OK, value: 'ok' }, + { kind: TokenKind.WHITESPACE, value: ' ' }, + { kind: TokenKind.NUMERIC, value: '1' }, + { kind: TokenKind.WHITESPACE, value: ' ' }, + { kind: TokenKind.DASH, value: '-' }, + { kind: TokenKind.WHITESPACE, value: ' ' }, + { kind: TokenKind.LITERAL, value: 'description' }, + { kind: TokenKind.WHITESPACE, value: ' ' }, + { kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' }, + { kind: TokenKind.EOL, value: '' }, + ].forEach((token, index) => { + assert.strictEqual(tokens[index].kind, token.kind); + assert.strictEqual(tokens[index].value, token.value); + }); +} + +{ + const tokens = TAPLexer('# comment أتث讲演講👍🔥'); + [ + { kind: TokenKind.COMMENT, value: '#' }, + { kind: TokenKind.WHITESPACE, value: ' ' }, + { kind: TokenKind.LITERAL, value: 'comment' }, + { kind: TokenKind.WHITESPACE, value: ' ' }, + { kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' }, + { kind: TokenKind.EOL, value: '' }, + ].forEach((token, index) => { + assert.strictEqual(tokens[index].kind, token.kind); + assert.strictEqual(tokens[index].value, token.value); + }); +}