From 13286d116667d6ef3c1bc9a4dd7cfdeeb710b5ae Mon Sep 17 00:00:00 2001
From: Masayuki Moriyama <masayuki.moriyama@miraclelinux.com>
Date: Fri, 3 Nov 2023 21:39:48 +0900
Subject: [PATCH] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004
 codecs read out of bounds

iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds when encoding
Unicode combining character sequence.

This bug ocurs the following error:
$ python3 -c "print('\u304b\u309a'.encode('iso2022_jp_2004'))"
Traceback (most recent call last):
  File "<string>", line 1, in <module>
UnicodeEncodeError: 'iso2022_jp_2004' codec can't encode character '\u309a' in position 1: illegal multibyte sequence

This commit fixes the out-of-bounds read.
---
 Modules/cjkcodecs/_codecs_iso2022.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 86bb73b982a551..e8835ad0909633 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -207,8 +207,9 @@ ENCODER(iso2022)
 
         encoded = MAP_UNMAPPABLE;
         for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+            Py_UCS4 buf[2] = {c, 0};
             Py_ssize_t length = 1;
-            encoded = dsg->encoder(codec, &c, &length);
+            encoded = dsg->encoder(codec, buf, &length);
             if (encoded == MAP_MULTIPLE_AVAIL) {
                 /* this implementation won't work for pair
                  * of non-bmp characters. */
@@ -217,9 +218,11 @@ ENCODER(iso2022)
                         return MBERR_TOOFEW;
                     length = -1;
                 }
-                else
+                else {
+                    buf[1] = INCHAR2;
                     length = 2;
-                encoded = dsg->encoder(codec, &c, &length);
+                }
+                encoded = dsg->encoder(codec, buf, &length);
                 if (encoded != MAP_UNMAPPABLE) {
                     insize = length;
                     break;