Skip to content

Commit

Permalink
Restore PUA range check, add manual tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hsivonen committed Oct 24, 2024
1 parent 6a489c7 commit 471104f
Showing 1 changed file with 31 additions and 3 deletions.
34 changes: 31 additions & 3 deletions src/gb18030.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,11 @@ fn gbk_encode_non_unified(bmp: u16) -> Option<(usize, usize)> {
let offset = if other_trail < 0x3F { 0x40 } else { 0x41 };
return Some((other_lead + (0x81 + 0x20), other_trail + offset));
}
// CJK Radicals Supplement and U+9FBx ideographs in GBK_BOTTOM
if in_inclusive_range16(bmp, 0x2E81, 0x2ECA) || in_inclusive_range16(bmp, 0x9FB4, 0x9FBB) {
// CJK Radicals Supplement, PUA, and U+9FBx ideographs in GBK_BOTTOM
if in_inclusive_range16(bmp, 0x2E81, 0x2ECA)
|| in_inclusive_range16(bmp, 0x9FB4, 0x9FBB)
|| in_inclusive_range16(bmp, 0xE816, 0xE855)
{
if let Some(pos) = position(&GBK_BOTTOM[21..], bmp) {
let trail = pos + 16;
let offset = if trail < 0x3F { 0x40 } else { 0x41 };
Expand Down Expand Up @@ -615,11 +618,18 @@ mod tests {
decode_gb18030(b"\x81\x80", "\u{4E90}");
decode_gb18030(b"\x81\xFE", "\u{4FA2}");
decode_gb18030(b"\xFE\x40", "\u{FA0C}");
decode_gb18030(b"\xFE\x7E", "\u{E843}");
decode_gb18030(b"\xFE\x7F", "\u{FFFD}\u{007F}");
decode_gb18030(b"\xFE\x80", "\u{4723}");
decode_gb18030(b"\xFE\xFE", "\u{E4C5}");

// Changes between GB18030-2005 and GB18030-2022
decode_gb18030(b"\xFE\x7E", "\u{9FB9}");
decode_gb18030(b"\xA6\xDD", "\u{FE14}");

// These mappings remain in place the GB18030-2005 way despite GB18030-2022
decode_gb18030(b"\x82\x35\x91\x32", "\u{9FB9}");
decode_gb18030(b"\x84\x31\x83\x30", "\u{FE14}");

// The difference from the original GB18030
decode_gb18030(b"\xA3\xA0", "\u{3000}");
decode_gb18030(b"\xA1\xA1", "\u{3000}");
Expand Down Expand Up @@ -687,6 +697,15 @@ mod tests {

// Edge cases
encode_gb18030("\u{00F7}", b"\xA1\xC2");

// GB18030-2022
encode_gb18030("\u{9FB9}", b"\xFE\x7E");
encode_gb18030("\u{FE14}", b"\xA6\xDD");
encode_gb18030("\u{E843}", b"\xFE\x7E");
encode_gb18030("\u{E791}", b"\xA6\xDD");

// Non-change in GB18030-2022
encode_gb18030("\u{E817}", b"\xFE\x52");
}

#[test]
Expand Down Expand Up @@ -729,6 +748,15 @@ mod tests {

// Edge cases
encode_gbk("\u{00F7}", b"\xA1\xC2");

// GB18030-2022
encode_gb18030("\u{9FB9}", b"\xFE\x7E");
encode_gb18030("\u{FE14}", b"\xA6\xDD");
encode_gb18030("\u{E843}", b"\xFE\x7E");
encode_gb18030("\u{E791}", b"\xA6\xDD");

// Non-change in GB18030-2022
encode_gb18030("\u{E817}", b"\xFE\x52");
}

#[test]
Expand Down

0 comments on commit 471104f

Please sign in to comment.