Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

readline: turn emitKeys into a streaming parser #1601

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .eslintrc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ecmaFeatures:
templateStrings: true
octalLiterals: true
binaryLiterals: true
generators: true

rules:
# Possible Errors
Expand Down
261 changes: 167 additions & 94 deletions lib/readline.js
Original file line number Diff line number Diff line change
Expand Up @@ -893,15 +893,25 @@ exports.Interface = Interface;
* accepts a readable Stream instance and makes it emit "keypress" events
*/

const KEYPRESS_DECODER = Symbol('keypress-decoder');
const ESCAPE_DECODER = Symbol('escape-decoder');

function emitKeypressEvents(stream) {
if (stream._keypressDecoder) return;
if (stream[KEYPRESS_DECODER]) return;
var StringDecoder = require('string_decoder').StringDecoder; // lazy load
stream._keypressDecoder = new StringDecoder('utf8');
stream[KEYPRESS_DECODER] = new StringDecoder('utf8');

stream[ESCAPE_DECODER] = emitKeys(stream);
stream[ESCAPE_DECODER].next();

function onData(b) {
if (EventEmitter.listenerCount(stream, 'keypress') > 0) {
var r = stream._keypressDecoder.write(b);
if (r) emitKeys(stream, r);
var r = stream[KEYPRESS_DECODER].write(b);
if (r) {
for (var i = 0; i < r.length; i++) {
stream[ESCAPE_DECODER].next(r[i]);
}
}
} else {
// Nobody's watching anyway
stream.removeListener('data', onData);
Expand Down Expand Up @@ -954,102 +964,130 @@ exports.emitKeypressEvents = emitKeypressEvents;

// Regexes used for ansi escape code splitting
const metaKeyCodeReAnywhere = /(?:\x1b)([a-zA-Z0-9])/;
const metaKeyCodeRe = new RegExp('^' + metaKeyCodeReAnywhere.source + '$');
const functionKeyCodeReAnywhere = new RegExp('(?:\x1b+)(O|N|\\[|\\[\\[)(?:' + [
'(\\d+)(?:;(\\d+))?([~^$])',
'(?:M([@ #!a`])(.)(.))', // mouse
'(?:1;)?(\\d+)?([a-zA-Z])'
].join('|') + ')');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EDIT: sorry, didn't see the entire file wasn't here. This isn't used anywhere now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, they are used in stripVTControlCharacters below.

I'm not sure whether it's better to move them where they are used, or keep them near the comment where they are explained.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If anything, I'd move stripVTControlCharacters below the regexes.

const functionKeyCodeRe = new RegExp('^' + functionKeyCodeReAnywhere.source);
const escapeCodeReAnywhere = new RegExp([
functionKeyCodeReAnywhere.source, metaKeyCodeReAnywhere.source, /\x1b./.source
].join('|'));

function emitKeys(stream, s) {
if (s instanceof Buffer) {
if (s[0] > 127 && s[1] === undefined) {
s[0] -= 128;
s = '\x1b' + s.toString(stream.encoding || 'utf-8');
} else {
s = s.toString(stream.encoding || 'utf-8');
}
}

var buffer = [];
var match;
while (match = escapeCodeReAnywhere.exec(s)) {
buffer = buffer.concat(s.slice(0, match.index).split(''));
buffer.push(match[0]);
s = s.slice(match.index + match[0].length);
}
buffer = buffer.concat(s.split(''));

buffer.forEach(function(s) {
var ch,
key = {
sequence: s,
name: undefined,
ctrl: false,
meta: false,
shift: false
},
parts;

if (s === '\r') {
// carriage return
key.name = 'return';

} else if (s === '\n') {
// enter, should have been called linefeed
key.name = 'enter';
function* emitKeys(stream) {
while (true) {
var ch = yield;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does all the existing Buffer functionality still work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's not, and it didn't work before either.

I'd appreciate another look at this, but I think this Buffer handling was a dead code since this commit: 3c91a7a.

Function emitKeys never receives any buffers in the first place, because string_decoder.write() returns strings only.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah true. Makes sense.

var s = ch;
var escaped = false;
var key = {
sequence: null,
name: undefined,
ctrl: false,
meta: false,
shift: false
};

if (ch === '\x1b') {
escaped = true;
s += (ch = yield);

if (ch === '\x1b') {
s += (ch = yield);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use a while (ch === '\x1b'), so it will catch as many escapes as one could possibly feed into it?

}

} else if (s === '\t') {
// tab
key.name = 'tab';
if (escaped && (ch === 'O' || ch === '[')) {
// ansi escape sequence
var code = ch;
var modifier = 0;

} else if (s === '\b' || s === '\x7f' ||
s === '\x1b\x7f' || s === '\x1b\b') {
// backspace or ctrl+h
key.name = 'backspace';
key.meta = (s.charAt(0) === '\x1b');
if (ch === 'O') {
// ESC O letter
// ESC O modifier letter
s += (ch = yield);

} else if (s === '\x1b' || s === '\x1b\x1b') {
// escape key
key.name = 'escape';
key.meta = (s.length === 2);
if (ch >= '0' && ch <= '9') {
modifier = (ch >> 0) - 1;
s += (ch = yield);
}

} else if (s === ' ' || s === '\x1b ') {
key.name = 'space';
key.meta = (s.length === 2);
code += ch;

} else if (s.length === 1 && s <= '\x1a') {
// ctrl+letter
key.name = String.fromCharCode(s.charCodeAt(0) + 'a'.charCodeAt(0) - 1);
key.ctrl = true;
} else if (ch === '[') {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could probably also be while() (else while() works, but while() else ... does not)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know any escape sequences that have three brackets (like \x1b[[[1~), and io.js can't parse them (there is a whitelist of accepted codes in the code below).

If we add while here, you can press ESC, and every single [ character after that will be silently ignored, because parser will be still waiting thinking it's in the middle of an escape sequence. Remember that "WAT" effect when you press ESC in the bash for example, and several characters after that are getting ignored by the terminal?

So I prefer to be deterministic here. Right now, this parser is guaranteed to emit at least one keypress event every 9 characters (something like \x1b\x1b[[12;3~ is the longest sequence). That's why there is no while loop there.

Same thing with digits below.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough.

// ESC [ letter
// ESC [ modifier letter
// ESC [ [ modifier letter
// ESC [ [ num char
s += (ch = yield);

} else if (s.length === 1 && s >= 'a' && s <= 'z') {
// lowercase letter
key.name = s;
if (ch === '[') {
// \x1b[[A
// ^--- escape codes might have a second bracket
code += ch;
s += (ch = yield);
}

} else if (s.length === 1 && s >= 'A' && s <= 'Z') {
// shift+letter
key.name = s.toLowerCase();
key.shift = true;
/*
* Here and later we try to buffer just enough data to get
* a complete ascii sequence.
*
* We have basically two classes of ascii characters to process:
*
*
* 1. `\x1b[24;5~` should be parsed as { code: '[24~', modifier: 5 }
*
* This particular example is featuring Ctrl+F12 in xterm.
*
* - `;5` part is optional, e.g. it could be `\x1b[24~`
* - first part can contain one or two digits
*
* So the generic regexp is like /^\d\d?(;\d)?[~^$]$/
*
*
* 2. `\x1b[1;5H` should be parsed as { code: '[H', modifier: 5 }
*
* This particular example is featuring Ctrl+Home in xterm.
*
* - `1;5` part is optional, e.g. it could be `\x1b[H`
* - `1;` part is optional, e.g. it could be `\x1b[5H`
*
* So the generic regexp is like /^((\d;)?\d)?[A-Za-z]$/
*
*/
const cmdStart = s.length - 1;

// skip one or two leading digits
if (ch >= '0' && ch <= '9') {
s += (ch = yield);

if (ch >= '0' && ch <= '9') {
s += (ch = yield);
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto, unless this should strictly be one or two.


} else if (parts = metaKeyCodeRe.exec(s)) {
// meta+character key
key.name = parts[1].toLowerCase();
key.meta = true;
key.shift = /^[A-Z]$/.test(parts[1]);
// skip modifier
if (ch === ';') {
s += (ch = yield);

} else if (parts = functionKeyCodeRe.exec(s)) {
// ansi escape sequence
if (ch >= '0' && ch <= '9') {
s += (ch = yield);
}
}

// reassemble the key code leaving out leading \x1b's,
// the modifier key bitflag and any meaningless "1;" sequence
var code = (parts[1] || '') + (parts[2] || '') +
(parts[4] || '') + (parts[9] || ''),
modifier = (parts[3] || parts[8] || 1) - 1;
/*
* We buffered enough data, now trying to extract code
* and modifier from it
*/
const cmd = s.slice(cmdStart);
var match;

if ((match = cmd.match(/^(\d\d?)(;(\d))?([~^$])$/))) {
code += match[1] + match[4];
modifier = (match[3] || 1) - 1;
} else if ((match = cmd.match(/^((\d;)?(\d))?([A-Za-z])$/))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we put these into consts then? Does it make a difference?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you talking about regular expressions?

I think the only difference is readability, so if regexps are large enough, it'd make sense to move them out (or get rid of them since large regular expressions are a source of trouble usually).

But those are small, so they could be inlined.

V8 should create each regexp from regexp literal only once anyway.

code += match[4];
modifier = (match[3] || 1) - 1;
} else {
code += cmd;
}
}

// Parse the key modifier
key.ctrl = !!(modifier & 4);
Expand Down Expand Up @@ -1152,23 +1190,58 @@ function emitKeys(stream, s) {
/* misc. */
case '[Z': key.name = 'tab'; key.shift = true; break;
default: key.name = 'undefined'; break;

}
}

// Don't emit a key if no name was found
if (key.name === undefined) {
key = undefined;
}
} else if (ch === '\r') {
// carriage return
key.name = 'return';

} else if (ch === '\n') {
// enter, should have been called linefeed
key.name = 'enter';

} else if (ch === '\t') {
// tab
key.name = 'tab';

if (s.length === 1) {
ch = s;
} else if (ch === '\b' || ch === '\x7f') {
// backspace or ctrl+h
key.name = 'backspace';
key.meta = escaped;

} else if (ch === '\x1b') {
// escape key
key.name = 'escape';
key.meta = escaped;

} else if (ch === ' ') {
key.name = 'space';
key.meta = escaped;

} else if (!escaped && ch <= '\x1a') {
// ctrl+letter
key.name = String.fromCharCode(ch.charCodeAt(0) + 'a'.charCodeAt(0) - 1);
key.ctrl = true;

} else if (/^[0-9A-Za-z]$/.test(ch)) {
// letter, number, shift+letter
key.name = ch.toLowerCase();
key.shift = /^[A-Z]$/.test(ch);
key.meta = escaped;
}

if (key || ch) {
stream.emit('keypress', ch, key);
key.sequence = s;

if (key.name !== undefined) {
/* Named character or sequence */
stream.emit('keypress', escaped ? undefined : s, key);
} else if (s.length === 1) {
/* Single unnamed character, e.g. "." */
stream.emit('keypress', s);
} else {
/* Unrecognized or broken escape sequence, don't emit anything */
}
});
}
}


Expand Down
25 changes: 0 additions & 25 deletions test/parallel/test-readline-interface.js
Original file line number Diff line number Diff line change
Expand Up @@ -191,31 +191,6 @@ function isWarned(emitter) {
assert.equal(callCount, 1);
rli.close();

// keypress
[
['a'],
['\x1b'],
['\x1b[31m'],
['\x1b[31m', '\x1b[39m'],
['\x1b[31m', 'a', '\x1b[39m', 'a']
].forEach(function (keypresses) {
fi = new FakeInput();
callCount = 0;
var remainingKeypresses = keypresses.slice();
function keypressListener (ch, key) {
callCount++;
if (ch) assert(!key.code);
assert.equal(key.sequence, remainingKeypresses.shift());
};
readline.emitKeypressEvents(fi);
fi.on('keypress', keypressListener);
fi.emit('data', keypresses.join(''));
assert.equal(callCount, keypresses.length);
assert.equal(remainingKeypresses.length, 0);
fi.removeListener('keypress', keypressListener);
fi.emit('data', ''); // removes listener
});

// calling readline without `new`
fi = new FakeInput();
rli = readline.Interface({ input: fi, output: fi, terminal: terminal });
Expand Down
Loading