Skip to content

Commit

Permalink
win32.c: make reading UTF-8 characters from the console possible
Browse files Browse the repository at this point in the history
Due to a bug in Windows, ReadFile() and ReadConsoleA() (and thus
_read()), return zeros instead of non-ASCII characters when the console
codepage is set to 65001. See this ticket for more details:
microsoft/terminal#4551

This commit works around that bug by using ReadConsoleW() inside
win32_read() when the passed fd points to the console and the console
codepage is set to 65001.

Fixes #18701
  • Loading branch information
xenu committed Apr 9, 2021
1 parent 1c48faa commit 03dbe99
Showing 1 changed file with 112 additions and 1 deletion.
113 changes: 112 additions & 1 deletion win32/win32.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@ static const SYSTEMTIME time_t_epoch_base_systemtime = {

#define FILETIME_CHUNKS_PER_SECOND (10000000UL)

#ifdef USE_ITHREADS
static perl_mutex win32_read_console_mutex;
#endif

#ifdef SET_INVALID_PARAMETER_HANDLER
static BOOL silent_invalid_parameter_handler = FALSE;

Expand Down Expand Up @@ -3743,10 +3747,115 @@ win32_dup2(int fd1,int fd2)
return dup2(fd1,fd2);
}

static int
win32_read_console(int fd, U8 *buf, unsigned int cnt)
{
/* This function is a workaround to a bug in Windows:
* https://github.com/microsoft/terminal/issues/4551
* tl;dr: ReadFile() and ReadConsoleA() return garbage when reading
* non-ASCII characters from the console with the 65001 codepage.
*/
HANDLE h = (HANDLE)_get_osfhandle(fd);
size_t left_to_read = cnt;
DWORD mode;

if (h == INVALID_HANDLE_VALUE) {
errno = EBADF;
return -1;
}

if (!GetConsoleMode(h, &mode)) {
translate_to_errno();
return -1;
}

while (left_to_read) {
/* The purpose of converted_buf is to preserve partial UTF-8 (or of any
* other multibyte encoding) code points between read() calls. Since
* there's only one console, the buffer is global. It's needed because
* ReadConsoleW() returns a string of UTF-16 code units and its result,
* after conversion to the current console codepage, may not fit in the
* return buffer.
*
* The buffer's size is 8 because it will contain at most two UTF-8 code
* points.
*/
static char converted_buf[8];
static size_t converted_buf_len = 0;
WCHAR wbuf[2];
DWORD wbuf_len = 0, chars_read;

if (converted_buf_len) {
bool newline = 0;
size_t to_write = MIN(converted_buf_len, left_to_read);

/* are we returning a newline? */
if (memchr(converted_buf, '\n', to_write))
newline = 1;

memcpy(buf, converted_buf, to_write);
buf += to_write;

converted_buf_len -= to_write;
if (converted_buf_len)
memmove(
converted_buf, converted_buf + to_write, converted_buf_len
);

left_to_read -= to_write;

if (!left_to_read || newline || (mode & ENABLE_LINE_INPUT) == 0)
break;
}

/* Reading one code unit at a time is inefficient, but since this code
* is used only for the interactive console, that shouldn't matter */
if (!ReadConsoleW(h, wbuf, 1, &chars_read, 0)) {
translate_to_errno();
return -1;
}
if (!chars_read)
break;

++wbuf_len;

if (wbuf[0] >= 0xD800 && wbuf[0] <= 0xDBFF) {
/* high surrogate, read one more code unit */
if (!ReadConsoleW(h, wbuf + 1, 1, &chars_read, 0)) {
translate_to_errno();
return -1;
}
if (chars_read)
++wbuf_len;
}

converted_buf_len = WideCharToMultiByte(
GetConsoleCP(), 0, wbuf, wbuf_len, converted_buf,
sizeof(converted_buf), NULL, NULL
);
if (!converted_buf_len) {
translate_to_errno();
return -1;
}
}

return cnt - left_to_read;
}


DllExport int
win32_read(int fd, void *buf, unsigned int cnt)
{
return read(fd, buf, cnt);
int ret;
if (UNLIKELY(win32_isatty(fd) && GetConsoleCP() == 65001)) {
MUTEX_LOCK(&win32_read_console_mutex);
ret = win32_read_console(fd, buf, cnt);
MUTEX_UNLOCK(&win32_read_console_mutex);
}
else
ret = read(fd, buf, cnt);

return ret;
}

DllExport int
Expand Down Expand Up @@ -4907,6 +5016,8 @@ Perl_win32_init(int *argcp, char ***argvp)
time_t_epoch_base_filetime.LowPart = ft.dwLowDateTime;
time_t_epoch_base_filetime.HighPart = ft.dwHighDateTime;
}

MUTEX_INIT(&win32_read_console_mutex);
}

void
Expand Down

0 comments on commit 03dbe99

Please sign in to comment.