Skip to content

Commit

Permalink
encoding/korean: make EUCKR replace with FFFD on error
Browse files Browse the repository at this point in the history
Updates golang/go#18898

Change-Id: I9868004acb11abbfee8492c9de8ba374f6dcb2ac
Reviewed-on: https://go-review.googlesource.com/37319
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
  • Loading branch information
mpvl committed Feb 23, 2017
1 parent afcdff5 commit b1a461c
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 20 deletions.
14 changes: 14 additions & 0 deletions encoding/korean/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package korean

import (
"strings"
"testing"

"golang.org/x/text/encoding"
Expand All @@ -21,6 +22,9 @@ func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
}

func TestNonRepertoire(t *testing.T) {
// Pick n large enough to cause an overflow in the destination buffer of
// transform.String.
const n = 10000
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
Expand All @@ -33,6 +37,16 @@ func TestNonRepertoire(t *testing.T) {
{enc, EUCKR, "aא", "a"},
{enc, EUCKR, "\uac00א", "\xb0\xa1"},
// TODO: should we also handle Jamo?

{dec, EUCKR, "\x80", "\ufffd"},
{dec, EUCKR, "\xff", "\ufffd"},
{dec, EUCKR, "\x81", "\ufffd"},
{dec, EUCKR, "\xb0\x40", "\ufffd@"},
{dec, EUCKR, "\xb0\xff", "\ufffd"},
{dec, EUCKR, "\xd0\x20", "\ufffd "},
{dec, EUCKR, "\xd0\xff", "\ufffd"},

{dec, EUCKR, strings.Repeat("\x81", n), strings.Repeat("걖", n/2)},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
Expand Down
39 changes: 19 additions & 20 deletions encoding/korean/euckr.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package korean

import (
"errors"
"unicode/utf8"

"golang.org/x/text/encoding"
Expand All @@ -26,8 +25,6 @@ var eucKR = internal.Encoding{
identifier.EUCKR,
}

var errInvalidEUCKR = errors.New("korean: invalid EUC-KR encoding")

type eucKRDecoder struct{ transform.NopResetter }

func (eucKRDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
Expand All @@ -40,10 +37,15 @@ loop:

case 0x81 <= c0 && c0 < 0xff:
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
break loop
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
break
}
c1 := src[nSrc+1]
size = 2
if c0 < 0xc7 {
r = 178 * rune(c0-0x81)
switch {
Expand All @@ -54,39 +56,36 @@ loop:
case 0x81 <= c1 && c1 < 0xff:
r += rune(c1) - (0x81 - 2*26)
default:
err = errInvalidEUCKR
break loop
goto decError
}
} else if 0xa1 <= c1 && c1 < 0xff {
r = 178*(0xc7-0x81) + rune(c0-0xc7)*94 + rune(c1-0xa1)
} else {
err = errInvalidEUCKR
break loop
goto decError
}
if int(r) < len(decode) {
r = rune(decode[r])
if r == 0 {
r = '\ufffd'
if r != 0 {
break
}
} else {
r = '\ufffd'
}
size = 2
decError:
r = utf8.RuneError
if c1 < utf8.RuneSelf {
size = 1
}

default:
err = errInvalidEUCKR
break loop
r, size = utf8.RuneError, 1
break
}

if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
if atEOF && err == transform.ErrShortSrc {
err = errInvalidEUCKR
}
return nDst, nSrc, err
}

Expand Down

0 comments on commit b1a461c

Please sign in to comment.