From 13286d116667d6ef3c1bc9a4dd7cfdeeb710b5ae Mon Sep 17 00:00:00 2001 From: Masayuki Moriyama Date: Fri, 3 Nov 2023 21:39:48 +0900 Subject: [PATCH] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds when encoding Unicode combining character sequence. This bug ocurs the following error: $ python3 -c "print('\u304b\u309a'.encode('iso2022_jp_2004'))" Traceback (most recent call last): File "", line 1, in UnicodeEncodeError: 'iso2022_jp_2004' codec can't encode character '\u309a' in position 1: illegal multibyte sequence This commit fixes the out-of-bounds read. --- Modules/cjkcodecs/_codecs_iso2022.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 86bb73b982a551..e8835ad0909633 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -207,8 +207,9 @@ ENCODER(iso2022) encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { + Py_UCS4 buf[2] = {c, 0}; Py_ssize_t length = 1; - encoded = dsg->encoder(codec, &c, &length); + encoded = dsg->encoder(codec, buf, &length); if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ @@ -217,9 +218,11 @@ ENCODER(iso2022) return MBERR_TOOFEW; length = -1; } - else + else { + buf[1] = INCHAR2; length = 2; - encoded = dsg->encoder(codec, &c, &length); + } + encoded = dsg->encoder(codec, buf, &length); if (encoded != MAP_UNMAPPABLE) { insize = length; break;