From af17f899504829914b186707deb4cafb4e0b716a Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Tue, 20 Feb 2018 01:13:04 +0100 Subject: [PATCH 01/76] add commonmark tests for (inline) raw html --- test/new/cm_raw_html.html | 77 ++++++++++++++++++++++++++++++++++++++ test/new/cm_raw_html.md | 78 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 test/new/cm_raw_html.html create mode 100644 test/new/cm_raw_html.md diff --git a/test/new/cm_raw_html.html b/test/new/cm_raw_html.html new file mode 100644 index 0000000000..f3da050ef5 --- /dev/null +++ b/test/new/cm_raw_html.html @@ -0,0 +1,77 @@ +

Raw HTML

+ +

Example 584

+ +

+ +

Example 585

+ +

+ +

Example 586

+ +

+ +

Example 587

+ +

+ +

Example 588

+ +

Foo

+ +

Example 589

+ +

<33> <__>

+ +

Example 590

+ +

<a h*#ref="hi">

+ +

Example 591

+ +

<a href="hi'> <a href=hi'>

+ +

Example 592

+ +

< a>< +foo><bar/ >

+ +

Example 593

+ +

<a href='bar'title=title>

+ +

Example 594

+ +

+ +

Example 595

+ +

</a href="foo">

+ +

Example 596

+ +

foo

+ +

Example 599

+ +

foo

+ +

Example 600

+ +

foo

+ +

Example 601

+ +

foo &<]]>

+ +

Example 602

+ +

foo

+ +

Example 603

+ +

foo

diff --git a/test/new/cm_raw_html.md b/test/new/cm_raw_html.md new file mode 100644 index 0000000000..becbb8a64b --- /dev/null +++ b/test/new/cm_raw_html.md @@ -0,0 +1,78 @@ +Raw HTML +=================== + +### Example 584 + + + +### Example 585 + + + +### Example 586 + + + +### Example 587 + + + +### Example 588 + +Foo + +### Example 589 + +<33> <__> + +### Example 590 + + + +### Example 591 + + + +### Example 596 + +foo + +### Example 599 + +foo + +### Example 600 + +foo + +### Example 601 + +foo &<]]> + +### Example 602 + +foo + +### Example 603 + +foo From dff687fa0e422ecd4d0d86ef01bc43d9674cd81a Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sat, 27 Jan 2018 04:45:04 +0100 Subject: [PATCH 02/76] add commonmark tests for html comments --- test/new/html_comments.html | 33 +++++++++++++++++++++++++++++++++ test/new/html_comments.md | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 test/new/html_comments.html create mode 100644 test/new/html_comments.md diff --git a/test/new/html_comments.html b/test/new/html_comments.html new file mode 100644 index 0000000000..6687308a91 --- /dev/null +++ b/test/new/html_comments.html @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + +
<!-- too much indentation -->
+
+ +

<!--> not a comment -->

+ +

<!---> not a comment -->

+ + --> \ No newline at end of file diff --git a/test/new/html_comments.md b/test/new/html_comments.md new file mode 100644 index 0000000000..df045fb410 --- /dev/null +++ b/test/new/html_comments.md @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + not a comment --> + + not a comment --> + + --> \ No newline at end of file From 0ecf6ab03f29546d71a79f9207b25bf76e43b3a1 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Tue, 20 Feb 2018 01:19:44 +0100 Subject: [PATCH 03/76] new inline html rule, to comply with commonmark --- lib/marked.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/marked.js b/lib/marked.js index 5ca95941ec..1ab8984bcc 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -461,7 +461,7 @@ var inline = { escape: /^\\([\\`*{}\[\]()#+\-.!_>])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noop, - tag: /^|^<\/?[a-zA-Z0-9\-]+(?:"[^"]*"|'[^']*'|\s[^<'">\/]*)*?\/?>/, + tag: /^|^<\/[a-zA-Z][\w\-]*\s*>|^<[a-zA-Z][\w\-]*(?:\s+[\w:-]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*?\s*\/?>|^<\?[\s\S]*?\?>|^|^/, link: /^!?\[(inside)\]\(href\)/, reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/, nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/, From 12df8c0c7af98f0a0ac3cf058034eb86c6927b44 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sat, 27 Jan 2018 03:58:48 +0100 Subject: [PATCH 04/76] new html comment rule to comply with commonmark (html5). non-compliant: allow `--` inside html comments. adjust inline tag rule accordingly. --- lib/marked.js | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/marked.js b/lib/marked.js index 1ab8984bcc..3196e563f4 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -52,8 +52,10 @@ block._tag = '(?!(?:' + '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo' + '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b'; +block._comment = //; + block.html = edit(block.html) - .replace('comment', //) + .replace('comment', block._comment) .replace('closed', /<(tag)[\s\S]+?<\/\1>/) .replace('closing', /]*)*?\/?>/) .replace(/tag/g, block._tag) @@ -461,7 +463,7 @@ var inline = { escape: /^\\([\\`*{}\[\]()#+\-.!_>])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noop, - tag: /^|^<\/[a-zA-Z][\w\-]*\s*>|^<[a-zA-Z][\w\-]*(?:\s+[\w:-]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*?\s*\/?>|^<\?[\s\S]*?\?>|^|^/, + tag: /^comment|^<\/[a-zA-Z][\w\-]*\s*>|^<[a-zA-Z][\w\-]*(?:\s+[\w:-]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*?\s*\/?>|^<\?[\s\S]*?\?>|^|^/, link: /^!?\[(inside)\]\(href\)/, reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/, nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/, @@ -479,7 +481,11 @@ inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0 inline.autolink = edit(inline.autolink) .replace('scheme', inline._scheme) .replace('email', inline._email) - .getRegex() + .getRegex(); + +inline.tag = edit(inline.tag) + .replace('comment', block._comment) + .getRegex(); inline._inside = /(?:\[[^\]]*\]|\\[\[\]]|[^\[\]]|\](?=[^\[]*\]))*/; inline._href = /\s*?(?:\s+['"]([\s\S]*?)['"])?\s*/; From 4f563c3584935fccc8aa65cf423cb354bf345205 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Fri, 23 Feb 2018 20:07:39 +0100 Subject: [PATCH 05/76] allow colons in tag names and attributes. Decrease group depth in regex --- lib/marked.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/marked.js b/lib/marked.js index 3196e563f4..99a94226d4 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -463,7 +463,7 @@ var inline = { escape: /^\\([\\`*{}\[\]()#+\-.!_>])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noop, - tag: /^comment|^<\/[a-zA-Z][\w\-]*\s*>|^<[a-zA-Z][\w\-]*(?:\s+[\w:-]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)*?\s*\/?>|^<\?[\s\S]*?\?>|^|^/, + tag: /^comment|^<\/[a-zA-Z][\w:-]*\s*>|^<[a-zA-Z][\w:-]*(?:attribute)*?\s*\/?>|^<\?[\s\S]*?\?>|^|^/, link: /^!?\[(inside)\]\(href\)/, reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/, nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/, @@ -483,8 +483,11 @@ inline.autolink = edit(inline.autolink) .replace('email', inline._email) .getRegex(); +inline._attribute = /\s+[\w:-]+(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; + inline.tag = edit(inline.tag) .replace('comment', block._comment) + .replace('attribute', inline._attribute) .getRegex(); inline._inside = /(?:\[[^\]]*\]|\\[\[\]]|[^\[\]]|\](?=[^\[]*\]))*/; From d8bba6914ddb8cabd7b98b6e6ea61b855095e062 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 03:00:11 +0100 Subject: [PATCH 06/76] allow a regex source string as a parameter for edit() --- lib/marked.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/marked.js b/lib/marked.js index 99a94226d4..03fdba7523 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -1179,7 +1179,7 @@ function unescape(html) { } function edit(regex, opt) { - regex = regex.source; + regex = regex.source || regex; opt = opt || ''; return { replace: function(name, val) { From 6b2eabb8586ba97a0c58f9c1d9d594b53042d487 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 03:00:34 +0100 Subject: [PATCH 07/76] refactor tag inline rule --- lib/marked.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/marked.js b/lib/marked.js index 03fdba7523..2dc9f62fc0 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -463,7 +463,12 @@ var inline = { escape: /^\\([\\`*{}\[\]()#+\-.!_>])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noop, - tag: /^comment|^<\/[a-zA-Z][\w:-]*\s*>|^<[a-zA-Z][\w:-]*(?:attribute)*?\s*\/?>|^<\?[\s\S]*?\?>|^|^/, + tag: '^comment' + + '|^' // self-closing tag + + '|^<[a-zA-Z][\\w:-]*(?:attribute)*?\\s*/?>' // open tag + + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. + + '|^' // declaration, e.g. + + '|^', // CDATA section link: /^!?\[(inside)\]\(href\)/, reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/, nolink: /^!?\[((?:\[[^\]]*\]|\\[\[\]]|[^\[\]])*)\]/, From fc1208f5d570d467adc14f774a7cf7a8e909f17a Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sat, 24 Feb 2018 06:03:26 +0100 Subject: [PATCH 08/76] refactor html block rule --- lib/marked.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/marked.js b/lib/marked.js index 2dc9f62fc0..19111ffda6 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -20,7 +20,9 @@ var block = { nptable: noop, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/, - html: /^ *(?:comment *(?:\n|\s*$)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/, + html: '^ *(?:comment *(?:\\n|\\s*$)' + + '|closed *(?:\\n{2,}|\\s*$)' + + '|closing *(?:\\n{2,}|\\s*$))', def: /^ {0,3}\[(label)\]: *\n? *]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/, table: noop, lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/, From fd6e4e002efbc0e48d9bafab26aab077993bec18 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Tue, 20 Feb 2018 01:16:51 +0100 Subject: [PATCH 09/76] rename gfm_links -> gfm_autolinks --- test/new/{gfm_links.html => gfm_autolinks.html} | 0 test/new/{gfm_links.md => gfm_autolinks.md} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename test/new/{gfm_links.html => gfm_autolinks.html} (100%) rename test/new/{gfm_links.md => gfm_autolinks.md} (100%) diff --git a/test/new/gfm_links.html b/test/new/gfm_autolinks.html similarity index 100% rename from test/new/gfm_links.html rename to test/new/gfm_autolinks.html diff --git a/test/new/gfm_links.md b/test/new/gfm_autolinks.md similarity index 100% rename from test/new/gfm_links.md rename to test/new/gfm_autolinks.md From 842e5d72876747f083232f57467d936a48bab10f Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Mon, 26 Feb 2018 03:17:48 +0100 Subject: [PATCH 10/76] rename test headings-id -> headings_id --- test/new/{headings-id.html => headings_id.html} | 0 test/new/{headings-id.md => headings_id.md} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename test/new/{headings-id.html => headings_id.html} (100%) rename test/new/{headings-id.md => headings_id.md} (100%) diff --git a/test/new/headings-id.html b/test/new/headings_id.html similarity index 100% rename from test/new/headings-id.html rename to test/new/headings_id.html diff --git a/test/new/headings-id.md b/test/new/headings_id.md similarity index 100% rename from test/new/headings-id.md rename to test/new/headings_id.md From 1cac5ba56afaf02b4f766e7f54a1f09fda97b8c0 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Mon, 26 Feb 2018 03:19:20 +0100 Subject: [PATCH 11/76] remove superfluous test fixture (already covered by original/links_reference_style and new/cm_link_defs) --- test/new/links_reference_style.html | 62 ---------------------- test/new/links_reference_style.md | 81 ----------------------------- 2 files changed, 143 deletions(-) delete mode 100644 test/new/links_reference_style.html delete mode 100644 test/new/links_reference_style.md diff --git a/test/new/links_reference_style.html b/test/new/links_reference_style.html deleted file mode 100644 index e12da3f20a..0000000000 --- a/test/new/links_reference_style.html +++ /dev/null @@ -1,62 +0,0 @@ -

Foo bar.

- -

Foo bar.

- -

Foo bar.

- -

With embedded [brackets].

- -

Indented once.

- -

Indented twice.

- -

Indented thrice.

- -

Indented [four][] times.

- -
[four]: /url
-
- -
- -

this should work

- -

So should this.

- -

And this.

- -

And this.

- -

And this.

- -

But not [that] [].

- -

Nor [that][].

- -

Nor [that].

- -

[Something in brackets like this should work]

- -

[Same with this.]

- -

In this case, this points to something else.

- -

Backslashing should suppress [this] and [this].

- -

A link reference definition cannot interrupt a paragraph. -[bar]: /baz

-

[bar]

- -

However, it can directly follow other block elements, such as headings

-

Foo

-
-

bar

-
- -
- -

Here's one where the link -breaks across lines.

- -

Here's another where the link -breaks across lines, but with a line-ending space.

diff --git a/test/new/links_reference_style.md b/test/new/links_reference_style.md deleted file mode 100644 index 39aa93bbae..0000000000 --- a/test/new/links_reference_style.md +++ /dev/null @@ -1,81 +0,0 @@ -Foo [bar] [1]. - -Foo [bar][1]. - -Foo [bar] -[1]. - -[1]: /url/ "Title" - - -With [embedded [brackets]] [b]. - - -Indented [once][]. - -Indented [twice][]. - -Indented [thrice][]. - -Indented [four][] times. - - [once]: /url - - [twice]: /url - - [thrice]: /url - - [four]: /url - - -[b]: /url/ - -* * * - -[this] [this] should work - -So should [this][this]. - -And [this] []. - -And [this][]. - -And [this]. - -But not [that] []. - -Nor [that][]. - -Nor [that]. - -[Something in brackets like [this][] should work] - -[Same with [this].] - -In this case, [this](/somethingelse/) points to something else. - -Backslashing should suppress \[this] and [this\]. - -[this]: foo - -A link reference definition cannot interrupt a paragraph. -[bar]: /baz - -[bar] - -However, it can directly follow other block elements, such as headings - -# [Foo] -[foo]: /url -> bar - -* * * - -Here's one where the [link -breaks] across lines. - -Here's another where the [link -breaks] across lines, but with a line-ending space. - - -[link breaks]: /url/ From 104c0f0e251e0d0efce91c8a501796c10785e89c Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 03:37:38 +0100 Subject: [PATCH 12/76] !fixup b2611c1b05 Disallow colons in tag names --- lib/marked.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/marked.js b/lib/marked.js index 19111ffda6..a0bd216d8a 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -467,7 +467,7 @@ var inline = { url: noop, tag: '^comment' + '|^' // self-closing tag - + '|^<[a-zA-Z][\\w:-]*(?:attribute)*?\\s*/?>' // open tag + + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. + '|^' // declaration, e.g. + '|^', // CDATA section @@ -490,7 +490,7 @@ inline.autolink = edit(inline.autolink) .replace('email', inline._email) .getRegex(); -inline._attribute = /\s+[\w:-]+(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; +inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; inline.tag = edit(inline.tag) .replace('comment', block._comment) From 35c63f755647c457a8692c4e2e75c5278903aa57 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 15:56:55 +0100 Subject: [PATCH 13/76] divide html_comments test into subtests --- test/new/html_comments.html | 24 ++++++++++++++++++++++++ test/new/html_comments.md | 28 ++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/test/new/html_comments.html b/test/new/html_comments.html index 6687308a91..1bf99aafb6 100644 --- a/test/new/html_comments.html +++ b/test/new/html_comments.html @@ -1,31 +1,55 @@ +

Example 1

+ +

Example 2

+ +

Example 3

+ +

Example 4

+ +

Example 5

+ +

Example 6

+ +

Example 7

+ +

Example 8

+ +

Example 9

+ +

Example 10

+ +

Example 11

+
<!-- too much indentation -->
 
+

Example 12

+

<!--> not a comment -->

<!---> not a comment -->

diff --git a/test/new/html_comments.md b/test/new/html_comments.md index df045fb410..06aff02e1d 100644 --- a/test/new/html_comments.md +++ b/test/new/html_comments.md @@ -1,30 +1,54 @@ +### Example 1 + +### Example 2 + - + +### Example 3 + +### Example 4 + - + +### Example 5 + +### Example 6 + +### Example 7 + +### Example 8 + +### Example 9 + +### Example 10 + +### Example 11 + +### Example 12 + not a comment --> not a comment --> From 6e8640a49b62066e0405a676928f148f7cf978e2 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 16:03:17 +0100 Subject: [PATCH 14/76] option `pedantic` overrides `gfm`, and turns off gfm, tables and breaks. FIXME update README --- lib/marked.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/marked.js b/lib/marked.js index a0bd216d8a..956f63cd20 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -115,7 +115,7 @@ function Lexer(options) { this.options = options || marked.defaults; this.rules = block.normal; - if (this.options.gfm) { + if (!this.options.pedantic && this.options.gfm) { if (this.options.tables) { this.rules = block.tables; } else { @@ -565,14 +565,14 @@ function InlineLexer(links, options) { throw new Error('Tokens array requires a `links` property.'); } - if (this.options.gfm) { + if (this.options.pedantic) { + this.rules = inline.pedantic; + } else if (this.options.gfm) { if (this.options.breaks) { this.rules = inline.breaks; } else { this.rules = inline.gfm; } - } else if (this.options.pedantic) { - this.rules = inline.pedantic; } } From 6fa0a270b946e1841ba1a6a2a18215e724ccd569 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 16:09:17 +0100 Subject: [PATCH 15/76] add commonmark tests for html blocks --- test/new/cm_html_blocks.html | 299 +++++++++++++++++++++++++++++++++ test/new/cm_html_blocks.md | 311 +++++++++++++++++++++++++++++++++++ 2 files changed, 610 insertions(+) create mode 100644 test/new/cm_html_blocks.html create mode 100644 test/new/cm_html_blocks.md diff --git a/test/new/cm_html_blocks.html b/test/new/cm_html_blocks.html new file mode 100644 index 0000000000..623e5f1094 --- /dev/null +++ b/test/new/cm_html_blocks.html @@ -0,0 +1,299 @@ +

HTML blocks

+ +

Example 116

+ +
+
+**Hello**,
+

world. +

+
+ +

Example 117

+ + + + + +
+ hi +
+

okay.

+ +

Example 118

+ + +*foo* + +

Example 120

+ +
+

Markdown

+
+ +

Example 121

+ +
+
+ +

Example 122

+ +
+
+ +

Example 123

+ +
+*foo* +

bar

+ +

Example 124

+ +
Example 125 + +
Example 126 + +
Example 127 + + + +

Example 128

+ +
+foo +
+ +

Example 129

+ +
+``` c +int x = 33; +``` + +

Example 130

+ + +*bar* + + +

Example 131

+ + +*bar* + + +

Example 132

+ + +*bar* + + +

Example 133

+ + +*bar* + +

Example 134

+ + +*foo* + + +

Example 135

+ + +

foo

+
+ +

Example 136

+ +

foo

+ +

Example 137

+ +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+

okay

+ +

Example 138

+ + +

okay

+ +

Example 139

+ + +

okay

+ +

Example 140

+ + +

foo

+ +

Example 144

+ +*bar* +

baz

+ +

Example 145

+ +1. *bar* + +

Example 146

+ + +

okay

+ +

Example 147

+ +'; + +?> +

okay

+ +

Example 148

+ + + +

Example 149

+ + +

okay

+ +

Example 150

+ + +
<!-- foo -->
+
+ +

Example 151

+ +
+
<div>
+
+ +

Example 152

+ +

Foo

+
+bar +
+ +

Example 153

+ +
+bar +
+*foo* + +

Example 154

+ +

Foo + +baz

+ +

Example 155

+ +
+

Emphasized text.

+
+ +

Example 156

+ +
+*Emphasized* text. +
+ +

Example 157

+ + + + + +
+Hi +
+ +

Example 158

+ + + +
<td>
+  Hi
+</td>
+
+ +
+ diff --git a/test/new/cm_html_blocks.md b/test/new/cm_html_blocks.md new file mode 100644 index 0000000000..89e3b17259 --- /dev/null +++ b/test/new/cm_html_blocks.md @@ -0,0 +1,311 @@ +HTML blocks +=================== + +### Example 116 + +
+
+**Hello**,
+
+_world_.
+
+
+ +### Example 117 + + + + + +
+ hi +
+ +okay. + +### Example 118 + +
+*foo* + +### Example 120 + +
+ +*Markdown* + +
+ +### Example 121 + +
+
+ +### Example 122 + +
+
+ +### Example 123 + +
+*foo* + +*bar* + +### Example 124 + + + +### Example 128 + +
+foo +
+ +### Example 129 + +
+``` c +int x = 33; +``` + +### Example 130 + + +*bar* + + +### Example 131 + + +*bar* + + +### Example 132 + + +*bar* + + +### Example 133 + + +*bar* + +### Example 134 + + +*foo* + + +### Example 135 + + + +*foo* + + + +### Example 136 + +*foo* + +### Example 137 + +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+okay + +### Example 138 + + +okay + +### Example 139 + + +okay + +### Example 140 + + +*foo* + +### Example 144 + +*bar* +*baz* + +### Example 145 + +1. *bar* + +### Example 146 + + +okay + +### Example 147 + +'; + +?> +okay + +### Example 148 + + + +### Example 149 + + +okay + +### Example 150 + + + + + +### Example 151 + +
+ +
+ +### Example 152 + +Foo +
+bar +
+ +### Example 153 + +
+bar +
+*foo* + +### Example 154 + +Foo + +baz + +### Example 155 + +
+ +*Emphasized* text. + +
+ +### Example 156 + +
+*Emphasized* text. +
+ +### Example 157 + + + + + + + + + +
+Hi +
+ +### Example 158 + + + + + + + + + +
+ Hi +
+ From eae2948a9544255ce68565eaf3be007c213f6aef Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 16:12:22 +0100 Subject: [PATCH 16/76] save the current html block parsing in the pedantic mode --- lib/marked.js | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/marked.js b/lib/marked.js index 956f63cd20..5d74f7548a 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -105,6 +105,23 @@ block.tables = merge({}, block.gfm, { table: /^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/ }); +/** + * Pedantic grammar + */ + +block.pedantic = merge({}, block.normal, { + html: edit( + '^ *(?:comment *(?:\\n|\\s*$)' + + '|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)' // closed tag + + '|]*)*?/?> *(?:\\n{2,}|\\s*$))') + .replace('comment', block._comment) + .replace(/tag/g, '(?!(?:' + + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') + .getRegex() +}); + /** * Block Lexer */ @@ -115,7 +132,9 @@ function Lexer(options) { this.options = options || marked.defaults; this.rules = block.normal; - if (!this.options.pedantic && this.options.gfm) { + if (this.options.pedantic) { + this.rules = block.pedantic; + } else if (this.options.gfm) { if (this.options.tables) { this.rules = block.tables; } else { From bc75b30282f64bae0001d382a55b99d6f1c99b45 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 16:18:46 +0100 Subject: [PATCH 17/76] new rule for html blocks, to comply with commonmark. BREAKING CHANGE: inline markdown is not parsed inside html blocks. --- lib/marked.js | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/lib/marked.js b/lib/marked.js index 5d74f7548a..abb3d99441 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -20,13 +20,20 @@ var block = { nptable: noop, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/, - html: '^ *(?:comment *(?:\\n|\\s*$)' - + '|closed *(?:\\n{2,}|\\s*$)' - + '|closing *(?:\\n{2,}|\\s*$))', + html: '^ {0,3}(?:' // optional indentation + + '<(script|pre|style)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) + + '|comment[^\\n]*(\\n+|$)' // (2) + + '|<\\?[\\s\\S]*?\\?>\\n*' // (3) + + '|\\n*' // (4) + + '|\\n*' // (5) + + '|)[\\s\\S]*?(?:\\n{2,}|$)' // (6) + + '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=\\h*\\n)[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag + + '|(?=\\h*\\n)[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag + + ')', def: /^ {0,3}\[(label)\]: *\n? *]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/, table: noop, lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/, - paragraph: /^([^\n]+(?:\n?(?!hr|heading|lheading| {0,3}>|tag)[^\n]+)+)/, + paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading| {0,3}>|<\/?(?:tag)(?: +|\\n|\/?>)|<(?:script|pre|style|!--))[^\n]+)+)/, text: /^[^\n]+/ }; @@ -49,25 +56,24 @@ block.list = edit(block.list) .replace('def', '\\n+(?=' + block.def.source + ')') .getRegex(); -block._tag = '(?!(?:' - + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code' - + '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo' - + '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b'; - +block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' + + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' + + '|track|ul'; block._comment = //; - -block.html = edit(block.html) +block.html = edit(block.html, 'i') .replace('comment', block._comment) - .replace('closed', /<(tag)[\s\S]+?<\/\1>/) - .replace('closing', /]*)*?\/?>/) - .replace(/tag/g, block._tag) + .replace('tag', block._tag) + .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"]*"| *= *'[^']*'| *= *[^\s"'=<>`]+)?/) .getRegex(); block.paragraph = edit(block.paragraph) .replace('hr', block.hr) .replace('heading', block.heading) .replace('lheading', block.lheading) - .replace('tag', '<' + block._tag) + .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); block.blockquote = edit(block.blockquote) @@ -1163,10 +1169,8 @@ Parser.prototype.tok = function() { return this.renderer.listitem(body); } case 'html': { - var html = !this.token.pre && !this.options.pedantic - ? this.inline.output(this.token.text) - : this.token.text; - return this.renderer.html(html); + //TODO parse inline content if parameter markdown=1 + return this.renderer.html(this.token.text); } case 'paragraph': { return this.renderer.paragraph(this.inline.output(this.token.text)); From 53f5b3945d765ec223476a66b7bd7c765947f974 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Sun, 4 Mar 2018 16:26:15 +0100 Subject: [PATCH 18/76] adjust html_comments test case in accordance with commonmark --- test/new/html_comments.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/new/html_comments.html b/test/new/html_comments.html index 1bf99aafb6..872b45f6ae 100644 --- a/test/new/html_comments.html +++ b/test/new/html_comments.html @@ -54,4 +54,4 @@

Example 12

<!---> not a comment -->

- --> \ No newline at end of file + --> From 273fb3c5be09f170224abcad547cf49b7a1ca526 Mon Sep 17 00:00:00 2001 From: Federico Soave Date: Mon, 5 Mar 2018 22:10:45 +0100 Subject: [PATCH 19/76] rearrange test in cm_html_blocks to test end of file --- test/new/cm_html_blocks.html | 15 ++++++++------- test/new/cm_html_blocks.md | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/test/new/cm_html_blocks.html b/test/new/cm_html_blocks.html index 623e5f1094..80fdff579f 100644 --- a/test/new/cm_html_blocks.html +++ b/test/new/cm_html_blocks.html @@ -155,13 +155,6 @@

Example 139

okay

-

Example 140

- - okay -### Example 140 - -