From 26f5fc53f2b8e390a3c7cba481b79b3f3f897fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Avi=20=D7=93?= Date: Fri, 25 Jan 2019 21:15:06 -0500 Subject: [PATCH 1/7] readline: improve Unicode handling Fixes: https://github.com/nodejs/node/issues/25693 --- lib/readline.js | 45 ++++++++++--- test/parallel/test-readline-interface.js | 84 ++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 10 deletions(-) diff --git a/lib/readline.js b/lib/readline.js index 9c7caff60a8ae8..de293eb161ec1f 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -459,11 +459,12 @@ Interface.prototype._insertString = function(c) { var beg = this.line.slice(0, this.cursor); var end = this.line.slice(this.cursor, this.line.length); this.line = beg + c + end; - this.cursor += c.length; + // Count Unicode code points + this.cursor += Array.from(c).length; this._refreshLine(); } else { this.line += c; - this.cursor += c.length; + this.cursor += Array.from(c).length; if (this._getCursorPos().cols === 0) { this._refreshLine(); @@ -580,7 +581,7 @@ Interface.prototype._wordLeft = function() { Interface.prototype._wordRight = function() { if (this.cursor < this.line.length) { var trailing = this.line.slice(this.cursor); - var match = trailing.match(/^(?:\s+|\W+|\w+)\s*/); + var match = trailing.match(/^(?:\s+|[^\w\s]+|\w+)\s*/); this._moveCursor(match[0].length); } }; @@ -588,19 +589,29 @@ Interface.prototype._wordRight = function() { Interface.prototype._deleteLeft = function() { if (this.cursor > 0 && this.line.length > 0) { - this.line = this.line.slice(0, this.cursor - 1) + + // The Unicode code points prior to the cursor + const leading = Array.from(this.line.slice(0, this.cursor)); + // The number of UTF-16 units comprising the character to the left + const charSize = leading[leading.length - 1].length; + this.line = this.line.slice(0, this.cursor - charSize) + this.line.slice(this.cursor, this.line.length); - this.cursor--; + this.cursor -= charSize; this._refreshLine(); } }; Interface.prototype._deleteRight = function() { - this.line = this.line.slice(0, this.cursor) + - this.line.slice(this.cursor + 1, this.line.length); - this._refreshLine(); + if (this.cursor < this.line.length) { + // The Unicode code points after the cursor + const ending = Array.from(this.line.slice(this.cursor, this.line.length)); + // The number of UTF-16 units comprising the character to the left + const charSize = ending[0].length; + this.line = this.line.slice(0, this.cursor) + + this.line.slice(this.cursor + charSize, this.line.length); + this._refreshLine(); + } }; @@ -952,11 +963,25 @@ Interface.prototype._ttyWrite = function(s, key) { break; case 'left': - this._moveCursor(-1); + if (this.cursor > 0) { + // The Unicode code points prior to the cursor + const leading = Array.from(this.line.slice(0, this.cursor)); + // The number of UTF-16 units comprising the character to the left + const charSize = leading[leading.length - 1].length; + this._moveCursor(-charSize); + } break; case 'right': - this._moveCursor(+1); + if (this.cursor < this.line.length) { + // The Unicode code points after the cursor + const ending = Array.from( + this.line.slice(this.cursor, this.line.length) + ); + // The number of UTF-16 units comprising the character to the left + const charSize = ending[0].length; + this._moveCursor(+charSize); + } break; case 'home': diff --git a/test/parallel/test-readline-interface.js b/test/parallel/test-readline-interface.js index 87547a9b6ac197..7f3b43cd6de7a0 100644 --- a/test/parallel/test-readline-interface.js +++ b/test/parallel/test-readline-interface.js @@ -650,6 +650,36 @@ function isWarned(emitter) { rli.close(); } + // Back and Forward one astral character + { + const fi = new FakeInput(); + const rli = new readline.Interface({ + input: fi, + output: fi, + prompt: '', + terminal: terminal + }); + fi.emit('data', '💻'); + + // move left one character/code point + fi.emit('keypress', '.', { name: 'left' }); + let cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 0); + + // move right one character/code point + fi.emit('keypress', '.', { name: 'right' }); + cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 2); + + rli.on('line', common.mustCall((line) => { + assert.strictEqual(line, '💻'); + })); + fi.emit('data', '\n'); + rli.close(); + } + { // `wordLeft` and `wordRight` const fi = new FakeInput(); @@ -791,6 +821,32 @@ function isWarned(emitter) { rli.close(); } + // deleteLeft astral character + { + const fi = new FakeInput(); + const rli = new readline.Interface({ + input: fi, + output: fi, + prompt: '', + terminal: terminal + }); + fi.emit('data', '💻'); + let cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 2); + + // Delete left character + fi.emit('keypress', '.', { ctrl: true, name: 'h' }); + cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 0); + rli.on('line', common.mustCall((line) => { + assert.strictEqual(line, ''); + })); + fi.emit('data', '\n'); + rli.close(); + } + // deleteRight { const fi = new FakeInput(); @@ -820,6 +876,34 @@ function isWarned(emitter) { rli.close(); } + // deleteRight astral character + { + const fi = new FakeInput(); + const rli = new readline.Interface({ + input: fi, + output: fi, + prompt: '', + terminal: terminal + }); + fi.emit('data', '💻'); + + // Go to the start of the line + fi.emit('keypress', '.', { ctrl: true, name: 'a' }); + let cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 0); + + // Delete right character + fi.emit('keypress', '.', { ctrl: true, name: 'd' }); + cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 0); + rli.on('line', common.mustCall((line) => { + assert.strictEqual(line, ''); + })); + fi.emit('data', '\n'); + rli.close(); + } // deleteLineLeft { From c0c7f585c06d2e879956207ed18d793e33f73abf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Avi=20=D7=93?= Date: Tue, 29 Jan 2019 04:56:44 -0500 Subject: [PATCH 2/7] readline: improve Unicode handling performance --- lib/readline.js | 61 ++++++++++++++++-------- test/parallel/test-readline-interface.js | 58 ++++++++++++++++++++++ 2 files changed, 99 insertions(+), 20 deletions(-) diff --git a/lib/readline.js b/lib/readline.js index de293eb161ec1f..6928167f8880c9 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -454,17 +454,27 @@ Interface.prototype._normalWrite = function(b) { } }; +// Count the Unicode code points in a string. +function codePointLen(str) { + let count = 0; + // eslint-disable-next-line no-unused-vars + for (const char of str) { + count++; + } + return count; +} + Interface.prototype._insertString = function(c) { if (this.cursor < this.line.length) { var beg = this.line.slice(0, this.cursor); var end = this.line.slice(this.cursor, this.line.length); this.line = beg + c + end; // Count Unicode code points - this.cursor += Array.from(c).length; + this.cursor += codePointLen(c); this._refreshLine(); } else { this.line += c; - this.cursor += Array.from(c).length; + this.cursor += codePointLen(c); if (this._getCursorPos().cols === 0) { this._refreshLine(); @@ -586,13 +596,35 @@ Interface.prototype._wordRight = function() { } }; +function codePointLeftOf(i, str) { + // obtain the code point to the left + let previousChar; + let len = 0; + for (const char of str) { + len += char.length; + previousChar = char; + if (len === i) { + return previousChar; + } + } +} + +function codePointRightOf(i, str) { + // obtain the code point to the right + let len = 0; + for (const char of str) { + len += char.length; + if (len > i) { + // UTF-16 units comprising the character + return char; + } + } +} Interface.prototype._deleteLeft = function() { if (this.cursor > 0 && this.line.length > 0) { - // The Unicode code points prior to the cursor - const leading = Array.from(this.line.slice(0, this.cursor)); // The number of UTF-16 units comprising the character to the left - const charSize = leading[leading.length - 1].length; + const charSize = codePointLeftOf(this.cursor, this.line).length; this.line = this.line.slice(0, this.cursor - charSize) + this.line.slice(this.cursor, this.line.length); @@ -604,10 +636,8 @@ Interface.prototype._deleteLeft = function() { Interface.prototype._deleteRight = function() { if (this.cursor < this.line.length) { - // The Unicode code points after the cursor - const ending = Array.from(this.line.slice(this.cursor, this.line.length)); // The number of UTF-16 units comprising the character to the left - const charSize = ending[0].length; + const charSize = codePointRightOf(this.cursor, this.line).length; this.line = this.line.slice(0, this.cursor) + this.line.slice(this.cursor + charSize, this.line.length); this._refreshLine(); @@ -964,23 +994,14 @@ Interface.prototype._ttyWrite = function(s, key) { case 'left': if (this.cursor > 0) { - // The Unicode code points prior to the cursor - const leading = Array.from(this.line.slice(0, this.cursor)); - // The number of UTF-16 units comprising the character to the left - const charSize = leading[leading.length - 1].length; - this._moveCursor(-charSize); + // obtain the code point to the left + this._moveCursor(-codePointLeftOf(this.cursor, this.line).length); } break; case 'right': if (this.cursor < this.line.length) { - // The Unicode code points after the cursor - const ending = Array.from( - this.line.slice(this.cursor, this.line.length) - ); - // The number of UTF-16 units comprising the character to the left - const charSize = ending[0].length; - this._moveCursor(+charSize); + this._moveCursor(+codePointRightOf(this.cursor, this.line).length); } break; diff --git a/test/parallel/test-readline-interface.js b/test/parallel/test-readline-interface.js index 7f3b43cd6de7a0..59bafccd1d4565 100644 --- a/test/parallel/test-readline-interface.js +++ b/test/parallel/test-readline-interface.js @@ -680,6 +680,64 @@ function isWarned(emitter) { rli.close(); } + // Two astral characters left + { + const fi = new FakeInput(); + const rli = new readline.Interface({ + input: fi, + output: fi, + prompt: '', + terminal: terminal + }); + fi.emit('data', '💻'); + + // move left one character/code point + fi.emit('keypress', '.', { name: 'left' }); + let cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 0); + + fi.emit('data', '🐕'); + cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 2); + + rli.on('line', common.mustCall((line) => { + assert.strictEqual(line, '🐕💻'); + })); + fi.emit('data', '\n'); + rli.close(); + } + + // Two astral characters right + { + const fi = new FakeInput(); + const rli = new readline.Interface({ + input: fi, + output: fi, + prompt: '', + terminal: terminal + }); + fi.emit('data', '💻'); + + // move left one character/code point + fi.emit('keypress', '.', { name: 'right' }); + let cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 2); + + fi.emit('data', '🐕'); + cursorPos = rli._getCursorPos(); + assert.strictEqual(cursorPos.rows, 0); + assert.strictEqual(cursorPos.cols, 4); + + rli.on('line', common.mustCall((line) => { + assert.strictEqual(line, '💻🐕'); + })); + fi.emit('data', '\n'); + rli.close(); + } + { // `wordLeft` and `wordRight` const fi = new FakeInput(); From 756ebea867bc551f61d0d35bff2827881625a426 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Avi=20=D7=93?= Date: Tue, 5 Feb 2019 23:15:43 -0500 Subject: [PATCH 3/7] readline: use built in codePointAt --- lib/readline.js | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/lib/readline.js b/lib/readline.js index 6928167f8880c9..d6bebc8b469837 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -596,35 +596,26 @@ Interface.prototype._wordRight = function() { } }; -function codePointLeftOf(i, str) { - // obtain the code point to the left - let previousChar; - let len = 0; - for (const char of str) { - len += char.length; - previousChar = char; - if (len === i) { - return previousChar; - } +function charLengthLeft(str, i) { + if (i <= 0) + return 0; + if (i > 1 && str.codePointAt(i - 2) > 2 ** 16 || + str.codePointAt(i - 1) > 2 ** 16) { + return 2; } + return 1; } -function codePointRightOf(i, str) { - // obtain the code point to the right - let len = 0; - for (const char of str) { - len += char.length; - if (len > i) { - // UTF-16 units comprising the character - return char; - } - } +function charLengthAt(str, i) { + if (str.length <= i) + return 0; + return str.codePointAt(i) > 2 ** 16 ? 2 : 1; } Interface.prototype._deleteLeft = function() { if (this.cursor > 0 && this.line.length > 0) { // The number of UTF-16 units comprising the character to the left - const charSize = codePointLeftOf(this.cursor, this.line).length; + const charSize = charLengthLeft(this.line, this.cursor); this.line = this.line.slice(0, this.cursor - charSize) + this.line.slice(this.cursor, this.line.length); @@ -637,7 +628,7 @@ Interface.prototype._deleteLeft = function() { Interface.prototype._deleteRight = function() { if (this.cursor < this.line.length) { // The number of UTF-16 units comprising the character to the left - const charSize = codePointRightOf(this.cursor, this.line).length; + const charSize = charLengthAt(this.line, this.cursor); this.line = this.line.slice(0, this.cursor) + this.line.slice(this.cursor + charSize, this.line.length); this._refreshLine(); @@ -993,16 +984,12 @@ Interface.prototype._ttyWrite = function(s, key) { break; case 'left': - if (this.cursor > 0) { - // obtain the code point to the left - this._moveCursor(-codePointLeftOf(this.cursor, this.line).length); - } + // obtain the code point to the left + this._moveCursor(-charLengthLeft(this.line, this.cursor)); break; case 'right': - if (this.cursor < this.line.length) { - this._moveCursor(+codePointRightOf(this.cursor, this.line).length); - } + this._moveCursor(+charLengthAt(this.line, this.cursor)); break; case 'home': From 200560cc0decbec35d9188ac9e155ee5bb7381e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Avi=20=D7=93?= Date: Wed, 6 Feb 2019 23:58:09 -0500 Subject: [PATCH 4/7] readline: fix ctrl + f/b Unicode handling --- lib/readline.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/readline.js b/lib/readline.js index d6bebc8b469837..2168391be16728 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -866,11 +866,11 @@ Interface.prototype._ttyWrite = function(s, key) { break; case 'b': // back one character - this._moveCursor(-1); + this._moveCursor(-charLengthLeft(this.line, this.cursor)); break; case 'f': // forward one character - this._moveCursor(+1); + this._moveCursor(+charLengthAt(this.line, this.cursor)); break; case 'l': // clear the whole screen From 0639c1bb9c156d6ba78543ca54086e387ba0bffa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Avi=20=D7=93?= Date: Thu, 7 Feb 2019 12:40:14 -0500 Subject: [PATCH 5/7] readline: remove redundant codePointLen function --- lib/readline.js | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/lib/readline.js b/lib/readline.js index 2168391be16728..12fb97e1cbf134 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -454,27 +454,16 @@ Interface.prototype._normalWrite = function(b) { } }; -// Count the Unicode code points in a string. -function codePointLen(str) { - let count = 0; - // eslint-disable-next-line no-unused-vars - for (const char of str) { - count++; - } - return count; -} - Interface.prototype._insertString = function(c) { if (this.cursor < this.line.length) { var beg = this.line.slice(0, this.cursor); var end = this.line.slice(this.cursor, this.line.length); this.line = beg + c + end; - // Count Unicode code points - this.cursor += codePointLen(c); + this.cursor += c.length; this._refreshLine(); } else { this.line += c; - this.cursor += codePointLen(c); + this.cursor += c.length; if (this._getCursorPos().cols === 0) { this._refreshLine(); From 1b9ea9352ffdee24ccddd0f12ca1e18db4c8ddcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Avi=20=D7=93?= Date: Sun, 17 Feb 2019 19:47:58 -0500 Subject: [PATCH 6/7] readline: fix tests failing without internationalization --- test/parallel/test-readline-interface.js | 36 ++++++++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/test/parallel/test-readline-interface.js b/test/parallel/test-readline-interface.js index 59bafccd1d4565..037707e0410991 100644 --- a/test/parallel/test-readline-interface.js +++ b/test/parallel/test-readline-interface.js @@ -671,7 +671,11 @@ function isWarned(emitter) { fi.emit('keypress', '.', { name: 'right' }); cursorPos = rli._getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - assert.strictEqual(cursorPos.cols, 2); + if (common.hasIntl) { + assert.strictEqual(cursorPos.cols, 2); + } else { + assert.strictEqual(cursorPos.cols, 1); + } rli.on('line', common.mustCall((line) => { assert.strictEqual(line, '💻'); @@ -700,7 +704,14 @@ function isWarned(emitter) { fi.emit('data', '🐕'); cursorPos = rli._getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - assert.strictEqual(cursorPos.cols, 2); + + if (common.hasIntl) { + assert.strictEqual(cursorPos.cols, 2); + } else { + assert.strictEqual(cursorPos.cols, 1); + // fix cursor position without internationalization + fi.emit('keypress', '.', { name: 'left' }); + } rli.on('line', common.mustCall((line) => { assert.strictEqual(line, '🐕💻'); @@ -724,12 +735,22 @@ function isWarned(emitter) { fi.emit('keypress', '.', { name: 'right' }); let cursorPos = rli._getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - assert.strictEqual(cursorPos.cols, 2); + if (common.hasIntl) { + assert.strictEqual(cursorPos.cols, 2); + } else { + assert.strictEqual(cursorPos.cols, 1); + // fix cursor position without internationalization + fi.emit('keypress', '.', { name: 'right' }); + } fi.emit('data', '🐕'); cursorPos = rli._getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - assert.strictEqual(cursorPos.cols, 4); + if (common.hasIntl) { + assert.strictEqual(cursorPos.cols, 4); + } else { + assert.strictEqual(cursorPos.cols, 2); + } rli.on('line', common.mustCall((line) => { assert.strictEqual(line, '💻🐕'); @@ -891,8 +912,11 @@ function isWarned(emitter) { fi.emit('data', '💻'); let cursorPos = rli._getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - assert.strictEqual(cursorPos.cols, 2); - + if (common.hasIntl) { + assert.strictEqual(cursorPos.cols, 2); + } else { + assert.strictEqual(cursorPos.cols, 1); + } // Delete left character fi.emit('keypress', '.', { ctrl: true, name: 'h' }); cursorPos = rli._getCursorPos(); From e649085f9caae093d45bf05c7af4c72d0815c5c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Avi=20=D7=93?= Date: Wed, 20 Feb 2019 13:47:56 -0500 Subject: [PATCH 7/7] =?UTF-8?q?readline:=20handle=20=F0=90=80=80=20charact?= =?UTF-8?q?er=20correctly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/readline.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/readline.js b/lib/readline.js index 12fb97e1cbf134..d81d62c9230c91 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -588,8 +588,8 @@ Interface.prototype._wordRight = function() { function charLengthLeft(str, i) { if (i <= 0) return 0; - if (i > 1 && str.codePointAt(i - 2) > 2 ** 16 || - str.codePointAt(i - 1) > 2 ** 16) { + if (i > 1 && str.codePointAt(i - 2) >= 2 ** 16 || + str.codePointAt(i - 1) >= 2 ** 16) { return 2; } return 1; @@ -598,7 +598,7 @@ function charLengthLeft(str, i) { function charLengthAt(str, i) { if (str.length <= i) return 0; - return str.codePointAt(i) > 2 ** 16 ? 2 : 1; + return str.codePointAt(i) >= 2 ** 16 ? 2 : 1; } Interface.prototype._deleteLeft = function() {