Skip to content

Commit

Permalink
✨ 提供 HTML 转 AST/Text 的接口 Fix #90
Browse files Browse the repository at this point in the history
  • Loading branch information
88250 committed Sep 16, 2020
1 parent 3ded08f commit 59b2556
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 17 deletions.
34 changes: 19 additions & 15 deletions h2m.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,22 @@ import (
// HTML2Markdown 将 HTML 转换为 Markdown。
func (lute *Lute) HTML2Markdown(htmlStr string) (markdown string, err error) {
// 将字符串解析为 DOM 树
tree := lute.HTML2Tree(htmlStr)

reader := strings.NewReader(htmlStr)
// 将 AST 进行 Markdown 格式化渲染
var formatted []byte
renderer := render.NewFormatRenderer(tree)
for nodeType, rendererFunc := range lute.HTML2MdRendererFuncs {
renderer.ExtRendererFuncs[nodeType] = rendererFunc
}
formatted = renderer.Render()
markdown = util.BytesToStr(formatted)
return
}

// HTML2Tree 将 HTML 转换为 AST。
func (lute *Lute) HTML2Tree(dom string) (ret *parse.Tree) {
reader := strings.NewReader(dom)
htmlRoot := &html.Node{Type: html.ElementNode}
htmlNodes, err := html.ParseFragment(reader, htmlRoot)
if nil != err {
Expand All @@ -38,13 +52,13 @@ func (lute *Lute) HTML2Markdown(htmlStr string) (markdown string, err error) {

// 将 HTML 树转换为 Markdown AST

tree := &parse.Tree{Name: "", Root: &ast.Node{Type: ast.NodeDocument}, Context: &parse.Context{Option: lute.Options}}
tree.Context.Tip = tree.Root
ret = &parse.Tree{Name: "", Root: &ast.Node{Type: ast.NodeDocument}, Context: &parse.Context{Option: lute.Options}}
ret.Context.Tip = ret.Root
for _, htmlNode := range htmlNodes {
lute.genASTByDOM(htmlNode, tree)
lute.genASTByDOM(htmlNode, ret)
}

ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
ast.Walk(ret.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
if entering {
if ast.NodeList == n.Type {
// ul.ul => ul.li.ul
Expand All @@ -59,16 +73,6 @@ func (lute *Lute) HTML2Markdown(htmlStr string) (markdown string, err error) {
}
return ast.WalkContinue
})

// 将 AST 进行 Markdown 格式化渲染

var formatted []byte
renderer := render.NewFormatRenderer(tree)
for nodeType, rendererFunc := range lute.HTML2MdRendererFuncs {
renderer.ExtRendererFuncs[nodeType] = rendererFunc
}
formatted = renderer.Render()
markdown = util.BytesToStr(formatted)
return
}

Expand Down
2 changes: 1 addition & 1 deletion javascript/lute.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion javascript/lute.min.js.map

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions lute.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,15 @@ func (lute *Lute) TextBundleStr(name, markdown string, linkPrefixes []string) (t
return
}

// HTML2Text 将指定的 HTMl dom 转换为文本。
func (lute *Lute) HTML2Text(dom string) string {
tree := lute.HTML2Tree(dom)
if nil == tree {
return ""
}
return tree.Root.Text()
}

// Space 用于在 text 中的中西文之间插入空格。
func (lute *Lute) Space(text string) string {
return render.Space0(text)
Expand Down

0 comments on commit 59b2556

Please sign in to comment.