Skip to content

Commit

Permalink
🐛 调用HTML2Markdown 转换table失败问题 Fix #70
Browse files Browse the repository at this point in the history
  • Loading branch information
88250 committed Jun 12, 2020
1 parent a2054b9 commit c488315
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 46 deletions.
8 changes: 8 additions & 0 deletions h2m.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,15 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) {
defer tree.Context.ParentTip()
case atom.Tbody:
case atom.Tr:
table := n.Parent.Parent
node.Type = ast.NodeTableRow
if atom.Thead != table.FirstChild.DataAtom && n == n.Parent.FirstChild {
// 补全 thread 节点
thead := &ast.Node{Type: ast.NodeTableHead}
tree.Context.Tip.AppendChild(thead)
tree.Context.Tip = thead
defer tree.Context.ParentTip()
}
tree.Context.Tip.AppendChild(node)
tree.Context.Tip = node
defer tree.Context.ParentTip()
Expand Down
110 changes: 64 additions & 46 deletions test/h2m_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,55 +18,73 @@ import (

var html2MdTests = []parseTest{

{"27", `<html>
{"28", `<html>
<body>
<table>
<thead>
<tr>
<th>Month</th>
<th>Savings</th>
</tr>
</thead>
<tbody>
<tr>
<td>January</td>
<td>$100</td>
</tr>
<tr>
<td>February</td>
<td>$80</td>
</tr>
</tbody>
</table>
<table>
<tr>
<th>Month</th>
<th>Savings</th>
</tr>
<tr>
<td>January</td>
<td>$100</td>
</tr>
<tr>
<td>February</td>
<td>$80</td>
</tr>
</table>
</body>
</html>`, "| Month | Savings |\n| - | - |\n| January | $100 |\n| February | $80 |\n"},
{"26", "<table class=\"markdown-reference\"><thead><tr><th>Type</th><th class=\"second-example\">Or</th><th>… to Get</th></tr></thead><tbody><tr><td class=\"preformatted\">*Italic*</td><td class=\"preformatted second-example\">_Italic_</td><td><em>Italic</em></td></tr><tr><td class=\"preformatted\">**Bold**</td><td class=\"preformatted second-example\">__Bold__</td><td><strong>Bold</strong></td></tr><tr><td class=\"preformatted\"># Heading 1</td><td class=\"preformatted second-example\">Heading 1<br>=========</td><td><h1 class=\"smaller-h1\">Heading 1</h1></td></tr><tr><td class=\"preformatted\">## Heading 2</td><td class=\"preformatted second-example\">Heading 2<br>---------</td><td><h2 class=\"smaller-h2\">Heading 2</h2></td></tr><tr><td class=\"preformatted\">[Link](http://a.com)</td><td class=\"preformatted second-example\">[Link][1]<br>⋮<br>[1]: http://b.org</td><td><a href=\"https://commonmark.org/\">Link</a></td></tr><tr><td class=\"preformatted\">![Image](http://url/a.png)</td><td class=\"preformatted second-example\">![Image][1]<br>⋮<br>[1]: http://url/b.jpg</td><td><img src=\"https://commonmark.org/help/images/favicon.png\" width=\"36\" height=\"36\" alt=\"Markdown\"></td></tr><tr><td class=\"preformatted\">&gt; Blockquote</td><td class=\"preformatted second-example\">&nbsp;</td><td><blockquote>Blockquote</blockquote></td></tr><tr><td class=\"preformatted\"><p>* List<br>* List<br>* List</p></td><td class=\"preformatted second-example\"><p>- List<br>- List<br>- List<br></p></td><td><ul><li>List</li><li>List</li><li>List</li></ul></td></tr></tbody></table>", "| Type | Or | … to Get |\n| - | - | - |\n| *Italic* | _Italic_ | *Italic* |\n| **Bold** | __Bold__ | **Bold** |\n| # Heading 1 | Heading 1<br/>========= | # Heading 1 |\n| ## Heading 2 | Heading 2<br/>--------- | ## Heading 2 |\n| [Link](http://a.com) | [Link][1]<br/>⋮<br/>[1]: http://b.org | [Link](https://commonmark.org/) |\n| ![Image](http://url/a.png) | ![Image][1]<br/>⋮<br/>[1]: http://url/b.jpg | ![Markdown](https://commonmark.org/help/images/favicon.png) |\n| > Blockquote | | > Blockquote |\n| * List<br/>* List<br/>* List | - List<br/>- List<br/>- List<br/> | * List* List* List |\n"},
{"25", "<table class=\"table table-bordered\"><thead class=\"thead-light\"><tr><th>Element</th><th>Markdown Syntax</th></tr></thead><tbody><tr><td><a href=\"https://www.markdownguide.org/extended-syntax/#tables\">Table</a></td><td><code>| Syntax | Description |<br>| ----------- | ----------- |<br>| Header | Title |<br>| Paragraph | Text |</code></td></tr><tr><td><a href=\"https://www.markdownguide.org/extended-syntax/#fenced-code-blocks\">Fenced Code Block</a></td><td><code>```<br>{<br>&nbsp;&nbsp;\"firstName\": \"John\",<br>&nbsp;&nbsp;\"lastName\": \"Smith\",<br>&nbsp;&nbsp;\"age\": 25<br>}<br>```</code></td></tr></tbody></table>", "| Element | Markdown Syntax |\n| - | - |\n| [Table](https://www.markdownguide.org/extended-syntax/#tables) | <code>\\| Syntax \\| Description \\|\\| ----------- \\| ----------- \\|\\| Header \\| Title \\|\\| Paragraph \\| Text \\|</code> |\n| [Fenced Code Block](https://www.markdownguide.org/extended-syntax/#fenced-code-blocks) | <code>```{\u00a0\u00a0\"firstName\": \"John\",\u00a0\u00a0\"lastName\": \"Smith\",\u00a0\u00a0\"age\": 25}```</code> |\n"},
{"24", "<table><thead><tr><th>Element</th><th>Markdown Syntax</th></tr></thead><tbody><tr><td>Table</td><td><code>| Syntax | Description |<br>| ----------- | ----------- |<br>| Header | Title |<br>| Paragraph | Text |</code></td></tr></tbody></table>", "| Element | Markdown Syntax |\n| - | - |\n| Table | <code>\\| Syntax \\| Description \\|\\| ----------- \\| ----------- \\|\\| Header \\| Title \\|\\| Paragraph \\| Text \\|</code> |\n"},
{"23", "<h2 style=\"box-sizing: border-box; margin-top: 24px; margin-bottom: 16px; font-weight: 600; font-size: 1.5em; line-height: 1.25; padding-bottom: 0.3em; border-bottom: 1px solid rgb(234, 236, 239); color: rgb(36, 41, 46); font-family: -apple-system, BlinkMacSystemFont, &quot;Segoe UI&quot;, Helvetica, Arial, sans-serif, &quot;Apple Color Emoji&quot;, &quot;Segoe UI Emoji&quot;; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255); text-decoration-style: initial; text-decoration-color: initial;\"><g-emoji class=\"g-emoji\" alias=\"m\" fallback-src=\"https://github.githubassets.com/images/icons/emoji/unicode/24c2.png\" style=\"box-sizing: border-box; font-family: &quot;Apple Color Emoji&quot;, &quot;Segoe UI&quot;, &quot;Segoe UI Emoji&quot;, &quot;Segoe UI Symbol&quot;; font-size: 1.2em; font-weight: 400; line-height: 20px; vertical-align: middle; font-style: normal !important;\">Ⓜ️</g-emoji><span> </span>Markdown User Guide</h2>", "## Ⓜ️ Markdown User Guide\n"},
{"22", "<div class=\"highlight highlight-source-shell\"><pre>npm install vditor --save</pre></div>", "```shell\nnpm install vditor --save\n```\n"},
{"21", "<h4><a id=\"user-content-id\" class=\"anchor\" aria-hidden=\"true\" href=\"https://github.com/Vanessa219/vditor/blob/master/README.md#id\"><svg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>id</h4>", "#### id\n"},
{"20", "<h2 id=\"whats-markdown\">What’s Markdown?<a class=\"anchorjs-link \" aria-label=\"Anchor\" data-anchorjs-icon=\"\uE9CB\" href=\"https://www.markdownguide.org/getting-started/#whats-markdown\"></a></h2>", "## What’s Markdown?\n"},
{"19", "<pre><span>`foo`</span></pre>", "`foo`\n"},
{"18", "<del>foo</del>", "~foo~\n"},
{"17", "<img src=\"bar.png\" alt=\"foo\">", "![foo](bar.png)\n"},
{"16", "foo<br>bar", "foo\nbar\n"},
{"15", "<em>foo</em>", "*foo*\n"},
{"14", "<hr>", "---\n"},
{"13", "<blockquote>foo</blockquote>", "> foo\n"},
{"12", "<h1>foo</h1>", "# foo\n"},
{"11", "<li>foo</li><li>bar</li>", "* foo\n* bar\n"},
{"10", `<p data-block="0">foo'%'bar</p>`, "foo'%'bar\n"},
{"9", `<code class="language-text">&gt;</code>`, "`>`\n"},
{"8", `<div><a href="/bar">foo</a></div>`, "[foo](/bar)\n"},
{"7", `<ul><li><p>Java</p><ul><li><p>Spring</p></li></ul></li></ul>`, "* Java\n * Spring\n"},
{"6", `<!--StartFragment--><p>这是一篇讲解如何正确使用<span>&nbsp;</span><strong>Markdown</strong><span>&nbsp;</span>的排版示例,学会这个很有必要,能让你的文章有更佳清晰的排版。</p><!--EndFragment-->`, "这是一篇讲解如何正确使用 **Markdown** 的排版示例,学会这个很有必要,能让你的文章有更佳清晰的排版。\n"},
{"5", `<!--StartFragment--><ul><li><input checked="" disabled="" type="checkbox"><span>&nbsp;</span>发布 Solo</li></ul><!--EndFragment-->`, "* [X] 发布 Solo\n"},
{"4", "<span>&nbsp;</span>发布 Solo", "发布 Solo\n"},
{"3", "<pre><ul><li>foo</li></ul></pre>", "<pre><ul><li><p>foo</p></li></ul></pre>\n"},
{"2", "<pre><span>//&#32;Lute&#32;-&#32;A&#32;structured&#32;markdown&#32;engine.<br></span><span>//&#32;Copyright&#32;(c)&#32;2019-present,&#32;b3log.org</span></pre>", "// Lute - A structured Markdown engine.\n// Copyright (c) 2019-present, b3log.org\n"},
{"1", "<meta charset='utf-8'><span>foo</span>", "foo\n"},
{"0", "<html><body><!--StartFragment--><p>foo</p><!--EndFragment--></body></html>", "foo\n"},
// {"27", `<html>
//<body>
// <table>
// <thead>
// <tr>
// <th>Month</th>
// <th>Savings</th>
// </tr>
// </thead>
// <tbody>
// <tr>
// <td>January</td>
// <td>$100</td>
// </tr>
// <tr>
// <td>February</td>
// <td>$80</td>
// </tr>
// </tbody>
// </table>
//</body>
//</html>`, "| Month | Savings |\n| - | - |\n| January | $100 |\n| February | $80 |\n"},
// {"26", "<table class=\"markdown-reference\"><thead><tr><th>Type</th><th class=\"second-example\">Or</th><th>… to Get</th></tr></thead><tbody><tr><td class=\"preformatted\">*Italic*</td><td class=\"preformatted second-example\">_Italic_</td><td><em>Italic</em></td></tr><tr><td class=\"preformatted\">**Bold**</td><td class=\"preformatted second-example\">__Bold__</td><td><strong>Bold</strong></td></tr><tr><td class=\"preformatted\"># Heading 1</td><td class=\"preformatted second-example\">Heading 1<br>=========</td><td><h1 class=\"smaller-h1\">Heading 1</h1></td></tr><tr><td class=\"preformatted\">## Heading 2</td><td class=\"preformatted second-example\">Heading 2<br>---------</td><td><h2 class=\"smaller-h2\">Heading 2</h2></td></tr><tr><td class=\"preformatted\">[Link](http://a.com)</td><td class=\"preformatted second-example\">[Link][1]<br>⋮<br>[1]: http://b.org</td><td><a href=\"https://commonmark.org/\">Link</a></td></tr><tr><td class=\"preformatted\">![Image](http://url/a.png)</td><td class=\"preformatted second-example\">![Image][1]<br>⋮<br>[1]: http://url/b.jpg</td><td><img src=\"https://commonmark.org/help/images/favicon.png\" width=\"36\" height=\"36\" alt=\"Markdown\"></td></tr><tr><td class=\"preformatted\">&gt; Blockquote</td><td class=\"preformatted second-example\">&nbsp;</td><td><blockquote>Blockquote</blockquote></td></tr><tr><td class=\"preformatted\"><p>* List<br>* List<br>* List</p></td><td class=\"preformatted second-example\"><p>- List<br>- List<br>- List<br></p></td><td><ul><li>List</li><li>List</li><li>List</li></ul></td></tr></tbody></table>", "| Type | Or | … to Get |\n| - | - | - |\n| *Italic* | _Italic_ | *Italic* |\n| **Bold** | __Bold__ | **Bold** |\n| # Heading 1 | Heading 1<br/>========= | # Heading 1 |\n| ## Heading 2 | Heading 2<br/>--------- | ## Heading 2 |\n| [Link](http://a.com) | [Link][1]<br/>⋮<br/>[1]: http://b.org | [Link](https://commonmark.org/) |\n| ![Image](http://url/a.png) | ![Image][1]<br/>⋮<br/>[1]: http://url/b.jpg | ![Markdown](https://commonmark.org/help/images/favicon.png) |\n| > Blockquote | | > Blockquote |\n| * List<br/>* List<br/>* List | - List<br/>- List<br/>- List<br/> | * List* List* List |\n"},
// {"25", "<table class=\"table table-bordered\"><thead class=\"thead-light\"><tr><th>Element</th><th>Markdown Syntax</th></tr></thead><tbody><tr><td><a href=\"https://www.markdownguide.org/extended-syntax/#tables\">Table</a></td><td><code>| Syntax | Description |<br>| ----------- | ----------- |<br>| Header | Title |<br>| Paragraph | Text |</code></td></tr><tr><td><a href=\"https://www.markdownguide.org/extended-syntax/#fenced-code-blocks\">Fenced Code Block</a></td><td><code>```<br>{<br>&nbsp;&nbsp;\"firstName\": \"John\",<br>&nbsp;&nbsp;\"lastName\": \"Smith\",<br>&nbsp;&nbsp;\"age\": 25<br>}<br>```</code></td></tr></tbody></table>", "| Element | Markdown Syntax |\n| - | - |\n| [Table](https://www.markdownguide.org/extended-syntax/#tables) | <code>\\| Syntax \\| Description \\|\\| ----------- \\| ----------- \\|\\| Header \\| Title \\|\\| Paragraph \\| Text \\|</code> |\n| [Fenced Code Block](https://www.markdownguide.org/extended-syntax/#fenced-code-blocks) | <code>```{\u00a0\u00a0\"firstName\": \"John\",\u00a0\u00a0\"lastName\": \"Smith\",\u00a0\u00a0\"age\": 25}```</code> |\n"},
// {"24", "<table><thead><tr><th>Element</th><th>Markdown Syntax</th></tr></thead><tbody><tr><td>Table</td><td><code>| Syntax | Description |<br>| ----------- | ----------- |<br>| Header | Title |<br>| Paragraph | Text |</code></td></tr></tbody></table>", "| Element | Markdown Syntax |\n| - | - |\n| Table | <code>\\| Syntax \\| Description \\|\\| ----------- \\| ----------- \\|\\| Header \\| Title \\|\\| Paragraph \\| Text \\|</code> |\n"},
// {"23", "<h2 style=\"box-sizing: border-box; margin-top: 24px; margin-bottom: 16px; font-weight: 600; font-size: 1.5em; line-height: 1.25; padding-bottom: 0.3em; border-bottom: 1px solid rgb(234, 236, 239); color: rgb(36, 41, 46); font-family: -apple-system, BlinkMacSystemFont, &quot;Segoe UI&quot;, Helvetica, Arial, sans-serif, &quot;Apple Color Emoji&quot;, &quot;Segoe UI Emoji&quot;; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255); text-decoration-style: initial; text-decoration-color: initial;\"><g-emoji class=\"g-emoji\" alias=\"m\" fallback-src=\"https://github.githubassets.com/images/icons/emoji/unicode/24c2.png\" style=\"box-sizing: border-box; font-family: &quot;Apple Color Emoji&quot;, &quot;Segoe UI&quot;, &quot;Segoe UI Emoji&quot;, &quot;Segoe UI Symbol&quot;; font-size: 1.2em; font-weight: 400; line-height: 20px; vertical-align: middle; font-style: normal !important;\">Ⓜ️</g-emoji><span> </span>Markdown User Guide</h2>", "## Ⓜ️ Markdown User Guide\n"},
// {"22", "<div class=\"highlight highlight-source-shell\"><pre>npm install vditor --save</pre></div>", "```shell\nnpm install vditor --save\n```\n"},
// {"21", "<h4><a id=\"user-content-id\" class=\"anchor\" aria-hidden=\"true\" href=\"https://github.com/Vanessa219/vditor/blob/master/README.md#id\"><svg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"><path fill-rule=\"evenodd\" d=\"M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z\"></path></svg></a>id</h4>", "#### id\n"},
// {"20", "<h2 id=\"whats-markdown\">What’s Markdown?<a class=\"anchorjs-link \" aria-label=\"Anchor\" data-anchorjs-icon=\"\uE9CB\" href=\"https://www.markdownguide.org/getting-started/#whats-markdown\"></a></h2>", "## What’s Markdown?\n"},
// {"19", "<pre><span>`foo`</span></pre>", "`foo`\n"},
// {"18", "<del>foo</del>", "~foo~\n"},
// {"17", "<img src=\"bar.png\" alt=\"foo\">", "![foo](bar.png)\n"},
// {"16", "foo<br>bar", "foo\nbar\n"},
// {"15", "<em>foo</em>", "*foo*\n"},
// {"14", "<hr>", "---\n"},
// {"13", "<blockquote>foo</blockquote>", "> foo\n"},
// {"12", "<h1>foo</h1>", "# foo\n"},
// {"11", "<li>foo</li><li>bar</li>", "* foo\n* bar\n"},
// {"10", `<p data-block="0">foo'%'bar</p>`, "foo'%'bar\n"},
// {"9", `<code class="language-text">&gt;</code>`, "`>`\n"},
// {"8", `<div><a href="/bar">foo</a></div>`, "[foo](/bar)\n"},
// {"7", `<ul><li><p>Java</p><ul><li><p>Spring</p></li></ul></li></ul>`, "* Java\n * Spring\n"},
// {"6", `<!--StartFragment--><p>这是一篇讲解如何正确使用<span>&nbsp;</span><strong>Markdown</strong><span>&nbsp;</span>的排版示例,学会这个很有必要,能让你的文章有更佳清晰的排版。</p><!--EndFragment-->`, "这是一篇讲解如何正确使用 **Markdown** 的排版示例,学会这个很有必要,能让你的文章有更佳清晰的排版。\n"},
// {"5", `<!--StartFragment--><ul><li><input checked="" disabled="" type="checkbox"><span>&nbsp;</span>发布 Solo</li></ul><!--EndFragment-->`, "* [X] 发布 Solo\n"},
// {"4", "<span>&nbsp;</span>发布 Solo", "发布 Solo\n"},
// {"3", "<pre><ul><li>foo</li></ul></pre>", "<pre><ul><li><p>foo</p></li></ul></pre>\n"},
// {"2", "<pre><span>//&#32;Lute&#32;-&#32;A&#32;structured&#32;markdown&#32;engine.<br></span><span>//&#32;Copyright&#32;(c)&#32;2019-present,&#32;b3log.org</span></pre>", "// Lute - A structured Markdown engine.\n// Copyright (c) 2019-present, b3log.org\n"},
// {"1", "<meta charset='utf-8'><span>foo</span>", "foo\n"},
// {"0", "<html><body><!--StartFragment--><p>foo</p><!--EndFragment--></body></html>", "foo\n"},
}

func TestHTML2Md(t *testing.T) {
Expand Down

0 comments on commit c488315

Please sign in to comment.