Skip to content

Commit

Permalink
Fix to match GHs algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
wooorm committed Feb 3, 2021
1 parent aa87cc2 commit a2fac79
Show file tree
Hide file tree
Showing 56 changed files with 2,663 additions and 12 deletions.
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
coverage/
*.html
*.json
*.md
1 change: 1 addition & 0 deletions .remarkignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test/
120 changes: 120 additions & 0 deletions from-markdown.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
var ccount = require('ccount')
var findAndReplace = require('mdast-util-find-and-replace')
var unicodePunctuation = require('micromark/dist/character/unicode-punctuation')
var unicodeWhitespace = require('micromark/dist/character/unicode-whitespace')

exports.transforms = [transformGfmAutolinkLiterals]
exports.enter = {
literalAutolink: enterLiteralAutolink,
literalAutolinkEmail: enterLiteralAutolinkValue,
Expand Down Expand Up @@ -35,3 +41,117 @@ function exitLiteralAutolinkEmail(token) {
function exitLiteralAutolink(token) {
this.exit(token)
}

function transformGfmAutolinkLiterals(tree) {
findAndReplace(
tree,
[
[/(https?:\/\/|www(?=\.))([-.\w]+)([^ \t\r\n]*)/i, findUrl],
[/([-.\w+]+)@([-\w]+(?:\.[-\w]+)+)/, findEmail]
],
{ignore: ['link', 'linkReference']}
)
}

function findUrl($0, protocol, domain, path, match) {
var prefix = ''
var parts
var result

// Not an expected previous character.
if (!previous(match)) {
return false
}

// Treat `www` as part of the domain.
if (/^w/i.test(protocol)) {
domain = protocol + domain
protocol = ''
prefix = 'http://'
}

if (!isCorrectDomain(domain)) {
return false
}

parts = splitUrl(domain + path)

if (!parts[0]) return false

result = {
type: 'link',
title: null,
url: prefix + protocol + parts[0],
children: [{type: 'text', value: protocol + parts[0]}]
}

if (parts[1]) {
result = [result, {type: 'text', value: parts[1]}]
}

return result
}

function findEmail($0, atext, label, match) {
// Not an expected previous character.
if (!previous(match, true) || /[_-]$/.test(label)) {
return false
}

return {
type: 'link',
title: null,
url: 'mailto:' + atext + '@' + label,
children: [{type: 'text', value: atext + '@' + label}]
}
}

function isCorrectDomain(domain) {
var parts = domain.split('.')

if (
parts.length < 2 ||
(parts[parts.length - 1] &&
(/_/.test(parts[parts.length - 1]) ||
!/[a-zA-Z\d]/.test(parts[parts.length - 1]))) ||
(parts[parts.length - 2] &&
(/_/.test(parts[parts.length - 2]) ||
!/[a-zA-Z\d]/.test(parts[parts.length - 2])))
) {
return false
}

return true
}

function splitUrl(url) {
var trail = /[!"&'),.:;<>?\]}]+$/.exec(url)
var closingParenIndex
var openingParens
var closingParens

if (trail) {
url = url.slice(0, trail.index)
trail = trail[0]
closingParenIndex = trail.indexOf(')')
openingParens = ccount(url, '(')
closingParens = ccount(url, ')')

while (closingParenIndex !== -1 && openingParens > closingParens) {
url += trail.slice(0, closingParenIndex + 1)
trail = trail.slice(closingParenIndex + 1)
closingParenIndex = trail.indexOf(')')
closingParens++
}
}

return [url, trail]
}

function previous(match, email) {
var code = match.input.charCodeAt(match.index - 1)
return (
(code !== code || unicodeWhitespace(code) || unicodePunctuation(code)) &&
(!email || code !== 47)
)
}
22 changes: 17 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,17 @@
"index.js",
"to-markdown.js"
],
"dependencies": {},
"dependencies": {
"ccount": "^1.0.0",
"mdast-util-find-and-replace": "^1.1.0",
"micromark": "^2.11.3"
},
"devDependencies": {
"mdast-util-from-markdown": "^0.8.0",
"hast-util-to-html": "^7.0.0",
"mdast-util-from-markdown": "^0.8.5",
"mdast-util-to-hast": "^10.0.0",
"mdast-util-to-markdown": "^0.6.0",
"micromark-extension-gfm-autolink-literal": "^0.5.0",
"micromark-extension-gfm-autolink-literal": "^0.5.6",
"nyc": "^15.0.0",
"prettier": "^2.0.0",
"remark-cli": "^9.0.0",
Expand All @@ -49,7 +55,7 @@
"scripts": {
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix",
"test-api": "node test",
"test-coverage": "nyc --reporter lcov tape test.js",
"test-coverage": "nyc --reporter lcov tape test/index.js",
"test": "npm run format && npm run test-coverage"
},
"nyc": {
Expand All @@ -68,7 +74,13 @@
},
"xo": {
"prettier": true,
"esnext": false
"esnext": false,
"rules": {
"max-params": "off",
"no-self-compare": "off",
"unicorn/prefer-includes": "off",
"unicorn/prefer-optional-catch-binding": "off"
}
},
"remarkConfig": {
"plugins": [
Expand Down
4 changes: 2 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ autolink literals in **[mdast][]**.
When parsing (`from-markdown`), must be combined with
[`micromark-extension-gfm-autolink-literal`][extension].

You probably shouldn’t use this package directly, but instead use
[`remark-gfm`][remark-gfm] with **[remark][]**.
You might want to use this package through [`remark-gfm`][remark-gfm] with
**[remark][]**.

## Install

Expand Down
86 changes: 86 additions & 0 deletions test/algorithm-2.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<p>[https://</p>
<p>[https://a</p>
<p>[https://.</p>
<p>[<a href="https://a">https://a</a>.</p>
<p>[<a href="https://a">https://a</a>..</p>
<p>[<a href="https://a..b">https://a..b</a></p>
<p>[<a href="https://a.b">https://a.b</a></p>
<p>[<a href="https://a.b">https://a.b</a>.</p>
<p>[<a href="https://a.b">https://a.b</a>..</p>
<p>[<a href="https://a.b.c">https://a.b.c</a></p>
<p>[<a href="https://a.b..c">https://a.b..c</a></p>
<p>[https://a.b_.c</p>
<p>[<a href="https://a_.b.c">https://a_.b.c</a></p>
<p>[https://a_.b_.c</p>
<p>[<a href="https://a.b%C2%A9">https://a.b©</a></p>
<p>[http://點看.com</p>
<hr>
<p>[<a href="http://a.b/c">http://a.b/c</a> (space)</p>
<p>[<a href="http://a.b/c">http://a.b/c</a>!</p>
<p>[<a href="http://a.b/c">http://a.b/c</a>"</p>
<p>[<a href="http://a.b/c#">http://a.b/c#</a></p>
<p>[<a href="http://a.b/c$">http://a.b/c$</a></p>
<p>[<a href="http://a.b/c%25">http://a.b/c%</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>&amp;</p>
<p>[<a href="http://a.b/c">http://a.b/c</a>'</p>
<p>[<a href="http://a.b/c(">http://a.b/c(</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>)</p>
<p>[<a href="http://a.b/c*">http://a.b/c*</a></p>
<p>[<a href="http://a.b/c+">http://a.b/c+</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>,</p>
<p>[<a href="http://a.b/c-">http://a.b/c-</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>.</p>
<p>[<a href="http://a.b/c/">http://a.b/c/</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>:</p>
<p>[<a href="http://a.b/c">http://a.b/c</a>;</p>
<p>[<a href="http://a.b/c">http://a.b/c</a>&lt;</p>
<p>[<a href="http://a.b/c=">http://a.b/c=</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>?</p>
<p>[<a href="http://a.b/c@">http://a.b/c@</a></p>
<p>[<a href="http://a.b/c%5B">http://a.b/c[</a></p>
<p>[<a href="http://a.b/c%5C">http://a.b/c\</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>]</p>
<p>[<a href="http://a.b/c%5E">http://a.b/c^</a></p>
<p>[<a href="http://a.b/c_">http://a.b/c_</a></p>
<p>[<a href="http://a.b/c%60">http://a.b/c`</a></p>
<p>[<a href="http://a.b/c%7B">http://a.b/c{</a></p>
<p>[<a href="http://a.b/c%7C">http://a.b/c|</a></p>
<p>[<a href="http://a.b/c">http://a.b/c</a>}</p>
<p>[<a href="http://a.b/c~">http://a.b/c~</a></p>
<hr>
<p>[<a href="http://www">www</a>.</p>
<p>[<a href="http://www.a">www.a</a></p>
<p>[<a href="http://www">www</a>..</p>
<p>[<a href="http://www.a">www.a</a>.</p>
<p>[<a href="http://www.a">www.a</a>..</p>
<p>[<a href="http://www.a..b">www.a..b</a></p>
<p>[<a href="http://www.a.b">www.a.b</a></p>
<p>[<a href="http://www.a.b">www.a.b</a>.</p>
<p>[<a href="http://www.a.b">www.a.b</a>..</p>
<p>[<a href="http://www.a.b.c">www.a.b.c</a></p>
<p>[<a href="http://www.a.b..c">www.a.b..c</a></p>
<p>[www.a.b_.c</p>
<p>[<a href="http://www.a_.b.c">www.a_.b.c</a></p>
<p>[www.a_.b_.c</p>
<p>[<a href="http://www.a.b%C2%A9">www.a.b©</a></p>
<p>[<a href="http://www.%E9%BB%9E%E7%9C%8B.com">www.點看.com</a></p>
<hr>
<p>[<a href="mailto:a@b.c">a@b.c</a></p>
<p>[<a href="mailto:a@b.ca">a@b.ca</a></p>
<p>[<a href="mailto:a@b.c">a@b.c</a>.</p>
<p>[<a href="mailto:a@b.ca">a@b.ca</a>.</p>
<p>[<a href="mailto:a@b.ca">a@b.ca</a>..</p>
<p>[<a href="mailto:a@b.ca">a@b.ca</a>..b</p>
<p>[<a href="mailto:a@b.ca.b">a@b.ca.b</a></p>
<p>[<a href="mailto:a@b.ca.b">a@b.ca.b</a>.</p>
<p>[<a href="mailto:a@b.ca.b">a@b.ca.b</a>..</p>
<p>[<a href="mailto:a@b.ca.b.c">a@b.ca.b.c</a></p>
<p>[<a href="mailto:a@b.ca.b">a@b.ca.b</a>..c</p>
<p>[<a href="mailto:a@b.ca.b_.c">a@b.ca.b_.c</a></p>
<p>[<a href="mailto:a@b.ca_.b.c">a@b.ca_.b.c</a></p>
<p>[<a href="mailto:a@b.ca_.b_.c">a@b.ca_.b_.c</a></p>
<p>[<a href="mailto:a@b.ca.b">a@b.ca.b</a>©</p>
<p>[a@b點看.com</p>
<p>[點看@b.com</p>
Loading

0 comments on commit a2fac79

Please sign in to comment.