Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't crash when reading emoji input in utf8 #12342

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/host/directio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,16 @@ void EventsToUnicode(_Inout_ std::deque<std::unique_ptr<IInputEvent>>& inEvents,
try
{
SplitToOem(readEvents);

// After SplitToOem returns, readEvents might be bigger than it
// was before. It might be more than 2x the size it was before,
// because a single codepoint might have been expanded into more
// that a single char.
//
// As of GH #8663, InputBuffer::Read will have preemptively
// checked how much space each key is about to take up, and will
// only return as many as will fit in readBuffer _after_ a call
// to SplitToOem.
}
CATCH_LOG();
}
Expand Down
46 changes: 35 additions & 11 deletions src/host/inputBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "inputBuffer.hpp"
#include "dbcs.h"
#include "stream.h"
#include "../types/inc/convert.hpp"
#include "../types/inc/GlyphWidth.hpp"

#include <functional>
Expand Down Expand Up @@ -386,6 +387,8 @@ void InputBuffer::_ReadBuffer(_Out_ std::deque<std::unique_ptr<IInputEvent>>& ou
// of events actually put into outRecords.
size_t virtualReadCount = 0;

const UINT codepage = ServiceLocator::LocateGlobals().getConsoleInformation().CP;

while (!_storage.empty() && virtualReadCount < readCount)
{
bool performNormalRead = true;
Expand All @@ -407,24 +410,45 @@ void InputBuffer::_ReadBuffer(_Out_ std::deque<std::unique_ptr<IInputEvent>>& ou
}
}

if (performNormalRead)
{
readEvents.push_back(std::move(_storage.front()));
_storage.pop_front();
}

++virtualReadCount;
// GH #8663: Before we read this key from the buffer, check that there's
// space for it. If we're calling Read without unicode being set, then I
// believe we're also going to try and break this key event into one key
// for each OEM character. Problem is though, one unicode codepoint can
// be more than two chars long. So don't just use IsGlyphFullWidth,
// actually check with the codepoint how wide this key should be.
size_t sizeOfThisKey = 1;
const auto& keyToRead{ _storage.front() };
if (!unicode)
{
if (readEvents.back()->EventType() == InputEventType::KeyEvent)
if (keyToRead->EventType() == InputEventType::KeyEvent)
{
const KeyEvent* const pKeyEvent = static_cast<const KeyEvent* const>(readEvents.back().get());
if (IsGlyphFullWidth(pKeyEvent->GetCharData()))
const KeyEvent* const pKeyEvent = static_cast<const KeyEvent* const>(keyToRead.get());

// convert from wchar to char
std::wstring wstr{ pKeyEvent->GetCharData() };
const auto str = ConvertToA(codepage, wstr);
sizeOfThisKey = str.size();
if (virtualReadCount + sizeOfThisKey > readCount)
{
++virtualReadCount;
break;
}
Comment on lines +431 to 434
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This loop now has two bounds checks instead of one, which might be a bit confusing. We can simplify it by changing the loop to be while (!_storage.empty()) and replacing this if condition with the following:

if (!unicode)
{
    ...
}

virtualReadCount += sizeOfThisKey;
if (virtualReadCount > readCount)
{
    break;
}

if (performNormalRead)
{
    ...
}

}
}
// When we do this, then readers who come asking for 4 events might
// only get 3 back, under the implication that they know they need to
// expand the events themselves. There are only two callers:
// * DirectReadData::Notify
// * _DoGetConsoleInput
//
// and they both handle !unicode the same way.

if (performNormalRead)
{
readEvents.push_back(std::move(_storage.front()));
_storage.pop_front();
}

virtualReadCount += sizeOfThisKey;
}

// the amount of events that were actually read
Expand Down
9 changes: 9 additions & 0 deletions src/host/readDataDirect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ bool DirectReadData::Notify(const WaitTerminationReason TerminationReason,
try
{
SplitToOem(readEvents);
// After SplitToOem returns, readEvents might be bigger than it
// was before. It might be more than 2x the size it was before,
// because a single codepoint might have been expanded into more
// that a single char.
//
// As of GH #8663, InputBuffer::Read will have preemptively
// checked how much space each key is about to take up, and will
// only return as many as will fit in readBuffer _after_ a call
// to SplitToOem.
}
CATCH_LOG();
}
Expand Down
69 changes: 68 additions & 1 deletion src/host/ut_host/InputBufferTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,17 +434,84 @@ class InputBufferTests
std::deque<std::unique_ptr<IInputEvent>> outEvents;
size_t eventsRead = 0;
bool resetWaitEvent = false;

// GH #8663: We only insert 4 events. but we need to ask for 6 here.
// The hiragana A is U+3042 in utf16, but it turns into two chars in 932.
Log::Comment(fmt::format(L"Codepage: {}", ServiceLocator::LocateGlobals().getConsoleInformation().CP).c_str());
ServiceLocator::LocateGlobals().getConsoleInformation().CP = 932;
Log::Comment(fmt::format(L"Changed to: {}", ServiceLocator::LocateGlobals().getConsoleInformation().CP).c_str());

// GH #8663: We only insert 4 events. When we ask for 4 here, we'll only get 3.
// InputBuffer::_ReadBuffer, when called with !unicode, will only return
// as many events as _will_ fit into readCount, assuming the caller
// expands them with SplitToOem.
inputBuffer._ReadBuffer(outEvents,
recordInsertCount,
eventsRead,
false,
resetWaitEvent,
false,
false);

// the dbcs record should have counted for two elements in
// the array, making it so that we get less events read
VERIFY_ARE_EQUAL(eventsRead, recordInsertCount - 1);
VERIFY_ARE_EQUAL(recordInsertCount - 1, eventsRead);
VERIFY_ARE_EQUAL(eventsRead, outEvents.size());
for (size_t i = 0; i < eventsRead; ++i)
{
VERIFY_ARE_EQUAL(outEvents[i]->ToInputRecord(), inRecords[i]);
}
}

TEST_METHOD(ReadingDbcsCharsPadsOutputArrayForEmoji)
{
// Basically the same test as ReadingDbcsCharsPadsOutputArray, but with
// emoji where 1 wchar can turn into 3 chars.
Log::Comment(L"During a utf-8 read, make sure the input buffer leaves "
L"enough room for keys that could be expanded into more than two chars.");

// write a mouse event, key event, emoji key event, mouse event
InputBuffer inputBuffer;
const unsigned int recordInsertCount = 4;
INPUT_RECORD inRecords[recordInsertCount];
inRecords[0].EventType = MOUSE_EVENT;
inRecords[1] = MakeKeyEvent(TRUE, 1, L'A', 0, L'A', 0);
inRecords[2] = MakeKeyEvent(TRUE, 1, 0x270B, 0, 0x270B, 0); // U+270B RAISED HAND
inRecords[3].EventType = MOUSE_EVENT;

std::deque<std::unique_ptr<IInputEvent>> inEvents;
for (size_t i = 0; i < recordInsertCount; ++i)
{
inEvents.push_back(IInputEvent::Create(inRecords[i]));
}

inputBuffer.Flush();
VERIFY_IS_GREATER_THAN(inputBuffer.Write(inEvents), 0u);

// read them out non-unicode style and compare
std::deque<std::unique_ptr<IInputEvent>> outEvents;
size_t eventsRead = 0;
bool resetWaitEvent = false;

// GH #8663: We only insert 4 events. but we need to ask for 5 here.
// The Raised Hand emoji is U+270B in utf16, but it's 0xE2 0x9C 0x8B in utf-8.
ServiceLocator::LocateGlobals().getConsoleInformation().CP = 65001;

inputBuffer._ReadBuffer(outEvents,
5,
eventsRead,
false,
resetWaitEvent,
false,
false);

// the emoji record should have counted for three elements in
// the array, making it so that we get less events read.
// We'll get the mouse, the key(A), and the key(U+270B)
VERIFY_ARE_EQUAL(3u, eventsRead);
VERIFY_ARE_EQUAL(eventsRead, outEvents.size());

// The events we read back here are _not_ pre-translated to the active codepage.
for (size_t i = 0; i < eventsRead; ++i)
{
VERIFY_ARE_EQUAL(outEvents[i]->ToInputRecord(), inRecords[i]);
Expand Down