Skip to content

Commit

Permalink
Merge pull request markedjs#1135 from Feder1co5oave/cm_links
Browse files Browse the repository at this point in the history
[commonmark] make html comments, html inlines, html blocks and links compliant
  • Loading branch information
styfle authored Apr 10, 2018
2 parents f314396 + 50fd5f7 commit fb18030
Show file tree
Hide file tree
Showing 28 changed files with 1,955 additions and 209 deletions.
2 changes: 1 addition & 1 deletion docs/USING_ADVANCED.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ console.log(myMarked('I am using __markdown__.'));
|highlight |`function`|A function to highlight code blocks. See also: <a href="#highlight">Asynchronous highlighting</a>. |
|langPrefix |`??` |Default is `lang-`
|mangle |`boolean` |Default is `true`
|pedantic |`boolean` |Conform to obscure parts of `markdown.pl` as much as possible. Don't fix original markdown bugs or behavior. Default: `false`|
|pedantic |`boolean` |Conform to obscure parts of `markdown.pl` as much as possible. Don't fix original markdown bugs or behavior. Turns off and overrides `gfm`. Default: `false`|
|renderer |`object` |An object containing functions to render tokens to HTML. See [extensibility](https://github.com/markedjs/marked/blob/master/docs/USING_PRO.md) for more details. Default: `new Renderer()`|
|sanitize |`boolean` |Ignore HTML passed into `markdownString` (sanitize the input). Default: `false` |
|sanitizer |`??` |Default is `null` |
Expand Down
144 changes: 103 additions & 41 deletions lib/marked.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,25 @@ var block = {
nptable: noop,
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,
html: /^ *(?:comment *(?:\n|\s*$)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/,
html: '^ {0,3}(?:' // optional indentation
+ '<(script|pre|style)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
+ '|comment[^\\n]*(\\n+|$)' // (2)
+ '|<\\?[\\s\\S]*?\\?>\\n*' // (3)
+ '|<![A-Z][\\s\\S]*?>\\n*' // (4)
+ '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>\\n*' // (5)
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:\\n{2,}|$)' // (6)
+ '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=\\h*\\n)[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag
+ '|</(?!script|pre|style)[a-z][\\w-]*\\s*>(?=\\h*\\n)[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag
+ ')',
def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,
table: noop,
lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/,
paragraph: /^([^\n]+(?:\n?(?!hr|heading|lheading| {0,3}>|tag)[^\n]+)+)/,
paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading| {0,3}>|<\/?(?:tag)(?: +|\\n|\/?>)|<(?:script|pre|style|!--))[^\n]+)+)/,
text: /^[^\n]+/
};

block._label = /(?:\\[\[\]]|[^\[\]])+/;
block._title = /(?:"(?:\\"|[^"]|"[^"\n]*")*"|'\n?(?:[^'\n]+\n?)*'|\([^()]*\))/;
block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/;
block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/;
block.def = edit(block.def)
.replace('label', block._label)
.replace('title', block._title)
Expand All @@ -47,23 +56,24 @@ block.list = edit(block.list)
.replace('def', '\\n+(?=' + block.def.source + ')')
.getRegex();

block._tag = '(?!(?:'
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code'
+ '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo'
+ '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b';

block.html = edit(block.html)
.replace('comment', /<!--[\s\S]*?-->/)
.replace('closed', /<(tag)[\s\S]+?<\/\1>/)
.replace('closing', /<tag(?:"[^"]*"|'[^']*'|\s[^'"\/>\s]*)*?\/?>/)
.replace(/tag/g, block._tag)
block._tag = 'address|article|aside|base|basefont|blockquote|body|caption'
+ '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
+ '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
+ '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
+ '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
+ '|track|ul';
block._comment = /<!--(?!-?>)[\s\S]*?-->/;
block.html = edit(block.html, 'i')
.replace('comment', block._comment)
.replace('tag', block._tag)
.replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/)
.getRegex();

block.paragraph = edit(block.paragraph)
.replace('hr', block.hr)
.replace('heading', block.heading)
.replace('lheading', block.lheading)
.replace('tag', '<' + block._tag)
.replace('tag', block._tag) // pars can be interrupted by type (6) html blocks
.getRegex();

block.blockquote = edit(block.blockquote)
Expand Down Expand Up @@ -101,6 +111,24 @@ block.tables = merge({}, block.gfm, {
table: /^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/
});

/**
* Pedantic grammar
*/

block.pedantic = merge({}, block.normal, {
html: edit(
'^ *(?:comment *(?:\\n|\\s*$)'
+ '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
+ '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))')
.replace('comment', block._comment)
.replace(/tag/g, '(?!(?:'
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
+ '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
+ '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
.getRegex(),
def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/
});

/**
* Block Lexer
*/
Expand All @@ -111,7 +139,9 @@ function Lexer(options) {
this.options = options || marked.defaults;
this.rules = block.normal;

if (this.options.gfm) {
if (this.options.pedantic) {
this.rules = block.pedantic;
} else if (this.options.gfm) {
if (this.options.tables) {
this.rules = block.tables;
} else {
Expand Down Expand Up @@ -370,7 +400,7 @@ Lexer.prototype.token = function(src, top) {
if (top && (cap = this.rules.def.exec(src))) {
src = src.substring(cap[0].length);
if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
tag = cap[1].toLowerCase();
tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
if (!this.tokens.links[tag]) {
this.tokens.links[tag] = {
href: cap[2],
Expand Down Expand Up @@ -461,13 +491,18 @@ Lexer.prototype.token = function(src, top) {
*/

var inline = {
escape: /^\\([\\`*{}\[\]()#+\-.!_>])/,
escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/,
autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/,
url: noop,
tag: /^<!--[\s\S]*?-->|^<\/?[a-zA-Z0-9\-]+(?:"[^"]*"|'[^']*'|\s[^<'">\/\s]*)*?\/?>/,
link: /^!?\[(inside)\]\(href\)/,
reflink: /^!?\[(inside)\]\s*\[([^\]]*)\]/,
nolink: /^!?\[((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\]/,
tag: '^comment'
+ '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
+ '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
+ '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
+ '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
+ '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>', // CDATA section
link: /^!?\[(label)\]\(href(?:\s+(title))?\s*\)/,
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
strong: /^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)|^__([^\s])__(?!_)|^\*\*([^\s])\*\*(?!\*)/,
em: /^_([^\s][\s\S]*?[^\s_])_(?!_)|^_([^\s_][\s\S]*?[^\s])_(?!_)|^\*([^\s][\s\S]*?[^\s*])\*(?!\*)|^\*([^\s*][\s\S]*?[^\s])\*(?!\*)|^_([^\s_])_(?!_)|^\*([^\s*])\*(?!\*)/,
code: /^(`+)\s*([\s\S]*?[^`]?)\s*\1(?!`)/,
Expand All @@ -476,24 +511,34 @@ var inline = {
text: /^[\s\S]+?(?=[\\<!\[`*]|\b_| {2,}\n|$)/
};

inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;

inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;

inline.autolink = edit(inline.autolink)
.replace('scheme', inline._scheme)
.replace('email', inline._email)
.getRegex()
.getRegex();

inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/;

inline._inside = /(?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]]|\](?=[^\[]*\]))*/;
inline._href = /\s*<?([\s\S]*?)>?(?:\s+['"]([\s\S]*?)['"])?\s*/;
inline.tag = edit(inline.tag)
.replace('comment', block._comment)
.replace('attribute', inline._attribute)
.getRegex();

inline._label = /(?:\[[^\[\]]*\]|\\[\[\]]?|`[^`]*`|[^\[\]\\])*?/;
inline._href = /\s*(<(?:\\[<>]?|[^\s<>\\])*>|(?:\\[()]?|\([^\s\x00-\x1f()\\]*\)|[^\s\x00-\x1f()\\])*?)/;
inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/;

inline.link = edit(inline.link)
.replace('inside', inline._inside)
.replace('label', inline._label)
.replace('href', inline._href)
.replace('title', inline._title)
.getRegex();

inline.reflink = edit(inline.reflink)
.replace('inside', inline._inside)
.replace('label', inline._label)
.getRegex();

/**
Expand All @@ -508,7 +553,13 @@ inline.normal = merge({}, inline);

inline.pedantic = merge({}, inline.normal, {
strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/
em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/,
link: edit(/^!?\[(label)\]\(\s*<?([\s\S]*?)>?(?:\s+(['"][\s\S]*?['"]))?\s*\)/)
.replace('label', inline._label)
.getRegex(),
reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/)
.replace('label', inline._label)
.getRegex()
});

/**
Expand Down Expand Up @@ -552,14 +603,14 @@ function InlineLexer(links, options) {
throw new Error('Tokens array requires a `links` property.');
}

if (this.options.gfm) {
if (this.options.pedantic) {
this.rules = inline.pedantic;
} else if (this.options.gfm) {
if (this.options.breaks) {
this.rules = inline.breaks;
} else {
this.rules = inline.gfm;
}
} else if (this.options.pedantic) {
this.rules = inline.pedantic;
}
}

Expand Down Expand Up @@ -587,6 +638,7 @@ InlineLexer.prototype.output = function(src) {
link,
text,
href,
title,
cap;

while (src) {
Expand Down Expand Up @@ -650,9 +702,12 @@ InlineLexer.prototype.output = function(src) {
if (cap = this.rules.link.exec(src)) {
src = src.substring(cap[0].length);
this.inLink = true;
href = cap[2];
href = href[0] === '<' ? href.substring(1, href.length - 1) : href;
title = cap[3] ? cap[3].substring(1, cap[3].length - 1) : cap[3];
out += this.outputLink(cap, {
href: cap[2],
title: cap[3]
href: InlineLexer.escapes(href),
title: InlineLexer.escapes(title)
});
this.inLink = false;
continue;
Expand Down Expand Up @@ -725,12 +780,16 @@ InlineLexer.prototype.output = function(src) {
return out;
};

InlineLexer.escapes = function(text) {
return text ? text.replace(InlineLexer.rules._escapes, '$1') : text;
}

/**
* Compile Link
*/

InlineLexer.prototype.outputLink = function(cap, link) {
var href = escape(link.href),
var href = link.href,
title = link.title ? escape(link.title) : null;

return cap[0].charAt(0) !== '!'
Expand Down Expand Up @@ -917,7 +976,12 @@ Renderer.prototype.link = function(href, title, text) {
if (this.options.baseUrl && !originIndependentUrl.test(href)) {
href = resolveUrl(this.options.baseUrl, href);
}
var out = '<a href="' + href + '"';
try {
href = encodeURI(href).replace(/%25/g, '%');
} catch (e) {
return text;
}
var out = '<a href="' + escape(href) + '"';
if (title) {
out += ' title="' + title + '"';
}
Expand Down Expand Up @@ -1137,10 +1201,8 @@ Parser.prototype.tok = function() {
return this.renderer.listitem(body);
}
case 'html': {
var html = !this.token.pre && !this.options.pedantic
? this.inline.output(this.token.text)
: this.token.text;
return this.renderer.html(html);
// TODO parse inline content if parameter markdown=1
return this.renderer.html(this.token.text);
}
case 'paragraph': {
return this.renderer.paragraph(this.inline.output(this.token.text));
Expand Down Expand Up @@ -1179,7 +1241,7 @@ function unescape(html) {
}

function edit(regex, opt) {
regex = regex.source;
regex = regex.source || regex;
opt = opt || '';
return {
replace: function(name, val) {
Expand Down
9 changes: 7 additions & 2 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -365,10 +365,15 @@ function fix() {

// cp -r original tests
fs.readdirSync(path.resolve(__dirname, 'original')).forEach(function(file) {
var text = fs.readFileSync(path.resolve(__dirname, 'original', file));
var text = fs.readFileSync(path.resolve(__dirname, 'original', file), 'utf8');

if (path.extname(file) === '.md') {
text = '---\ngfm: false\n---\n' + text;
if (fm.test(text)) {
text = fm(text);
text = '---\n' + text.frontmatter + '\ngfm: false\n---\n' + text.body;
} else {
text = '---\ngfm: false\n---\n' + text;
}
}

fs.writeFileSync(path.resolve(__dirname, 'compiled_tests', file), text);
Expand Down
File renamed without changes.
Loading

0 comments on commit fb18030

Please sign in to comment.