Skip to content

Commit

Permalink
Support CJK Symbols and Punctuation
Browse files Browse the repository at this point in the history
This commit adds support of CJK Symbols and Punctuation to `func IsEastAsianWideRune`
  • Loading branch information
henry0312 committed Aug 13, 2023
1 parent 254b9f8 commit 2f1b40d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
11 changes: 11 additions & 0 deletions extension/cjk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,4 +197,15 @@ func TestEastAsianLineBreaks(t *testing.T) {
},
t,
)
no = 8
testutil.DoTestCase(
markdown,
testutil.MarkdownTestCase{
No: no,
Description: "Soft line breaks between east asian wide characters or punctuations are ignored",
Markdown: "太郎は\\ **「こんにちわ」**\\ と、\r\n言った\r\nんです",
Expected: "<p>太郎は\\ <strong>「こんにちわ」</strong>\\ と、言ったんです</p>",
},
t,
)
}
9 changes: 8 additions & 1 deletion util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -836,11 +836,18 @@ func IsAlphaNumeric(c byte) bool {

// IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
func IsEastAsianWideRune(r rune) bool {
// https://en.wikipedia.org/wiki/CJK_Symbols_and_Punctuation
var CJKSymbolsAndPunctuation = &unicode.RangeTable{
R16: []unicode.Range16{
{0x3000, 0x303F, 1},
},
}
return unicode.Is(unicode.Hiragana, r) ||
unicode.Is(unicode.Katakana, r) ||
unicode.Is(unicode.Han, r) ||
unicode.Is(unicode.Lm, r) ||
unicode.Is(unicode.Hangul, r)
unicode.Is(unicode.Hangul, r) ||
unicode.Is(CJKSymbolsAndPunctuation, r)
}

// A BufWriter is a subset of the bufio.Writer .
Expand Down

0 comments on commit 2f1b40d

Please sign in to comment.