Skip to content

Commit

Permalink
✨ 新增 HTML 实体语法节点 #69
Browse files Browse the repository at this point in the history
  • Loading branch information
88250 committed Jun 4, 2020
1 parent 5e5d5f3 commit cb6c287
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 16 deletions.
4 changes: 4 additions & 0 deletions ast/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ type Node struct {
FootnotesRefLabel []byte // 脚注引用 label,[^label]
FootnotesRefId string // 脚注 id
FootnotesRefs []*Node // 脚注引用

// HTML 实体

EntityTokens []byte // 原始输入的实体 tokens,&
}

// ListData 用于记录列表或列表项节点的附加信息。
Expand Down
11 changes: 10 additions & 1 deletion html/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ type parser struct {
// context is the context element when parsing an HTML fragment
// (section 12.4).
context *Node
// escapeHtmlEntity 配置是否反转文本内容中的 HTML 实体(& -> &) 默认开启
escapeHtmlEntity bool
}

func (p *parser) top() *Node {
Expand Down Expand Up @@ -2325,6 +2327,13 @@ func ParseOptionEnableScripting(enable bool) ParseOption {
}
}

// ParseOptionHtmlEntity 设置是否进行 HTML 实体反转。
func ParseOptionEnableHtmlEntity(enable bool) ParseOption {
return func(p *parser) {
p.escapeHtmlEntity = enable
}
}

// ParseWithOptions is like Parse, with options.
func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
p := &parser{
Expand Down Expand Up @@ -2364,14 +2373,14 @@ func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) (
contextTag = context.DataAtom.String()
}
p := &parser{
tokenizer: NewTokenizerFragment(r, contextTag),
doc: &Node{
Type: DocumentNode,
},
scripting: true,
fragment: true,
context: context,
}
p.tokenizer = NewTokenizerFragment(p, r, contextTag)

for _, f := range opts {
f(p)
Expand Down
19 changes: 14 additions & 5 deletions html/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ type span struct {

// A Tokenizer returns a stream of HTML Tokens.
type Tokenizer struct {
// parser is the parser related with this tokenizer
parser *parser
// r is the source of the HTML text.
r io.Reader
// tt is the TokenType of the current token.
Expand Down Expand Up @@ -1121,7 +1123,13 @@ func (z *Tokenizer) Text() []byte {
s = bytes.Replace(s, nul, replacement, -1)
}
if !z.textIsRaw {
s = unescape(s, false)
if nil == z.parser {
s = unescape(s, false)
} else {
if !z.parser.escapeHtmlEntity {
s = unescape(s, false)
}
}
}
return s
}
Expand Down Expand Up @@ -1193,7 +1201,7 @@ func (z *Tokenizer) SetMaxBuf(n int) {
// NewTokenizer returns a new HTML Tokenizer for the given Reader.
// The input is assumed to be UTF-8 encoded.
func NewTokenizer(r io.Reader) *Tokenizer {
return NewTokenizerFragment(r, "")
return NewTokenizerFragment(nil,r, "")
}

// NewTokenizerFragment returns a new HTML Tokenizer for the given Reader, for
Expand All @@ -1204,10 +1212,11 @@ func NewTokenizer(r io.Reader) *Tokenizer {
// for a <p> tag or a <script> tag.
//
// The input is assumed to be UTF-8 encoded.
func NewTokenizerFragment(r io.Reader, contextTag string) *Tokenizer {
func NewTokenizerFragment(parser *parser, r io.Reader, contextTag string) *Tokenizer {
z := &Tokenizer{
r: r,
buf: make([]byte, 0, 4096),
parser: parser,
r: r,
buf: make([]byte, 0, 4096),
}
if contextTag != "" {
switch s := strings.ToLower(contextTag); s {
Expand Down
10 changes: 5 additions & 5 deletions javascript/lute.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion javascript/lute.min.js.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion parse/inline.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func (t *Tree) parseEntity(ctx *InlineContext) (ret *ast.Node) {
return &ast.Node{Type: ast.NodeText, Tokens: and}
}
ctx.pos += i - start
return &ast.Node{Type: ast.NodeHTMLEntity, Tokens: util.StrToBytes(v)}
return &ast.Node{Type: ast.NodeHTMLEntity, Tokens: util.StrToBytes(v), EntityTokens: util.StrToBytes(entityName)}
}

// Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a plain [ character,
Expand Down
25 changes: 25 additions & 0 deletions render/vditor_renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ func NewVditorRenderer(tree *parse.Tree) *VditorRenderer {
ret.RendererFuncs[ast.NodeToC] = ret.renderToC
ret.RendererFuncs[ast.NodeBackslash] = ret.renderBackslash
ret.RendererFuncs[ast.NodeBackslashContent] = ret.renderBackslashContent
ret.RendererFuncs[ast.NodeHTMLEntity] = ret.renderHtmlEntity
return ret
}

Expand Down Expand Up @@ -148,6 +149,30 @@ func (r *VditorRenderer) RenderFootnotesDefs(context *parse.Context) []byte {
return r.Writer.Bytes()
}

func (r *VditorRenderer) renderHtmlEntity(node *ast.Node, entering bool) ast.WalkStatus {
previousNodeText := node.PreviousNodeText()
previousNodeText = strings.ReplaceAll(previousNodeText, parse.Caret, "")
if "" == previousNodeText {
r.WriteString(parse.Zwsp)
}

r.WriteString("<span class=\"vditor-wysiwyg__block\" data-type=\"html-entity\">")
r.tag("code", [][]string{{"data-type", "html-entity"}}, false)
tokens := append([]byte(parse.Zwsp), node.EntityTokens...)
r.Write(tokens)
r.WriteString("</code>")

r.tag("span", [][]string{{"class", "vditor-wysiwyg__preview"}, {"data-render", "2"}}, false)
r.tag("code", nil, false)
previewTokens := bytes.ReplaceAll(node.Tokens, []byte(parse.Caret), nil)
r.Write(util.EscapeHTML(previewTokens))
r.tag("/code", nil, false)
r.tag("/span", nil, false)
r.WriteString("</span>" + parse.Zwsp)

return ast.WalkStop
}

func (r *VditorRenderer) renderBackslashContent(node *ast.Node, entering bool) ast.WalkStatus {
r.Write(util.EscapeHTML(node.Tokens))
return ast.WalkStop
Expand Down
3 changes: 2 additions & 1 deletion test/spinv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

var spinVditorDOMTests = []*parseTest{

{"121", "a&amp;b", "<p data-block=\"0\">a<span class=\"vditor-wysiwyg__block\" data-type=\"html-entity\"><code data-type=\"html-entity\">\u200b&amp;</code><span class=\"vditor-wysiwyg__preview\" data-render=\"2\"><code>&amp;</code></span></span>\u200bb\n</p>"},
{"120", "<ul data-tight=\"true\" data-marker=\"*\" data-block=\"0\"><li data-marker=\"*\" class=\"vditor-task\"><input type=\"checkbox\"> test<wbr></li></ul><ul data-tight=\"true\" data-marker=\"*\" data-block=\"0\"><li data-marker=\"*\" class=\"vditor-task\"><input type=\"checkbox\"> test</li></ul>", "<ul data-tight=\"true\" data-marker=\"*\" data-block=\"0\"><li data-marker=\"*\" class=\"vditor-task\"><input type=\"checkbox\" /> test<wbr></li><li data-marker=\"*\" class=\"vditor-task\"><input type=\"checkbox\" /> test</li></ul>"},
{"119", "&parx", "<p data-block=\"0\">&amp;parx\n</p>"},
{"118", "<ul data-tight=\"true\" data-marker=\"-\" data-block=\"0\"><li data-marker=\"-\"><p>[ ]<wbr></p></li></ul>", "<ul data-tight=\"true\" data-marker=\"-\" data-block=\"0\"><li data-marker=\"-\" class=\"vditor-task\"><input type=\"checkbox\" /> <wbr></li></ul>"},
Expand Down Expand Up @@ -62,7 +63,7 @@ var spinVditorDOMTests = []*parseTest{
{"82", "<ol data-tight=\"true\" data-block=\"0\"><li data-marker=\"2.\"><p>bar<wbr></p></li></ol>", "<ol data-tight=\"true\" data-marker=\"1.\" data-block=\"0\"><li data-marker=\"1.\">bar<wbr></li></ol>"},
{"81", "<p data-block=\"0\"><strong>\u200b<em>\u200b<s>\u200b1</s></em></strong><wbr></p>", "<p data-block=\"0\"><em data-marker=\"*\"><strong data-marker=\"**\"><s data-marker=\"~~\">1</s></strong></em><wbr>\n</p>"},
{"80", "<s><em>\u200b</em></s>", ""},
{"79", "<p data-block=\"0\"><b>&#8203;</b></p>", ""},
{"79", "<p data-block=\"0\"><b>&#8203;</b></p>", "<p data-block=\"0\"><strong data-marker=\"**\">\u200b<span class=\"vditor-wysiwyg__block\" data-type=\"html-entity\"><code data-type=\"html-entity\">\u200b&#8203;</code><span class=\"vditor-wysiwyg__preview\" data-render=\"2\"><code>\u200b</code></span></span>\u200b</strong>\n</p>"},
{"78", "<p data-block=\"0\">``foo``<wbr></p>", "<p data-block=\"0\">\u200b<code data-marker=\"``\">\u200bfoo</code>\u200b<wbr>\n</p>"},
{"77", `<p data-block="0">1<wbr><span style="background-color: var(--textarea-focus-background-color); color: var(--textarea-text-color);">​</span><span class="vditor-wysiwyg__block" data-type="math-inline" style="background-color: var(--textarea-focus-background-color); color: var(--textarea-text-color);"><code data-type="math-inline">foo</code><span class="vditor-wysiwyg__preview" data-render="false"><span class="vditor-math" data-math="foo"><span class="katex"><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">f</span><span class="mord mathdefault" style="margin-right:0.05724em;">o</span><span class="mord mathdefault" style="margin-right:0.05724em;">o</span></span></span></span></span></span></span><span style="background-color: var(--textarea-focus-background-color); color: var(--textarea-text-color);">​</span></p>`, "<p data-block=\"0\">1<wbr><span class=\"vditor-wysiwyg__block\" data-type=\"math-inline\"><code data-type=\"math-inline\">\u200bfoo</code><span class=\"vditor-wysiwyg__preview\" data-render=\"2\"><code class=\"language-math\">foo</code></span></span>\u200b\n</p>"},
{"76", "<ul><li data-marker=\"1.\"><p>12<wbr></p></li></ul>", "<ul data-tight=\"true\" data-marker=\"*\" data-block=\"0\"><li data-marker=\"*\">12<wbr></li></ul>"},
Expand Down
2 changes: 1 addition & 1 deletion test/v2m_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ var vditorDOM2MdTests = []parseTest{
{"84", `<table data-block="0"><thead><tr><th>col1</th></tr></thead><tbody><tr><td>foo<wbr><br></td></tr></tbody></table>`, "| col1 |\n| - |\n| foo |\n"},
{"83", "<ul data-tight=\"true\" data-marker=\"*\" data-block=\"0\"><li data-marker=\"*\"><p>foo</p><p data-block=\"0\">b<wbr></p></li></ul>", "* foo\n\n b\n"},
{"82", "<ol data-tight=\"true\" data-block=\"0\"><li data-marker=\"1.\"><p>[x] foo<wbr></p></li></ol>", "1. [x] foo\n"},
{"81", "<p data-block=\"0\">f&#8203;b</p>", "fb\n"},
{"81", "<p data-block=\"0\">f&#8203;b</p>", "f&#8203;b\n"},
{"80", "<p data-block=\"0\"><span class=\"vditor-wysiwyg__block\" data-type=\"html-inline\">\u200b<code data-type=\"html-inline\" style=\"display: none;\">&lt;foo&gt;</code></span>b<wbr>\n</p>", "<foo>b\n"},
{"79", "<p>\u200bfoo<wbr></p>", "foo\n"},
{"78", "<ul data-tight=\"true\" data-marker=\"*\" data-block=\"0\"><li data-marker=\"*\"><p>a​​​​</p><ul data-tight=\"true\" data-marker=\"*\" data-block=\"0\"><li data-marker=\"*\"><p><br></p></li><li data-marker=\"*\"><p><wbr>b</p></li></ul></li></ul>", "* a\n * \n * b\n"},
Expand Down
6 changes: 5 additions & 1 deletion vditor.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func (lute *Lute) vditorDOM2Md(htmlStr string) (markdown string) {

reader := strings.NewReader(htmlStr)
htmlRoot := &html.Node{Type: html.ElementNode}
htmlNodes, err := html.ParseFragment(reader, htmlRoot)
htmlNodes, err := html.ParseFragmentWithOptions(reader, htmlRoot, html.ParseOptionEnableHtmlEntity(true))
if nil != err {
markdown = err.Error()
return
Expand Down Expand Up @@ -920,6 +920,10 @@ func (lute *Lute) genASTByVditorDOM(n *html.Node, tree *parse.Tree) {
} else if "code-inline" == dataType {
node.Tokens = codeTokens
tree.Context.Tip.AppendChild(node)
} else if "html-entity" == dataType {
node.Type = ast.NodeText
node.Tokens = codeTokens
tree.Context.Tip.AppendChild(node)
}
return
case atom.Font:
Expand Down

0 comments on commit cb6c287

Please sign in to comment.