Skip to content

Commit

Permalink
Fix sending out too many requests
Browse files Browse the repository at this point in the history
Related-to: #53.
  • Loading branch information
wooorm committed Oct 8, 2024
1 parent c1365e1 commit 8655bbc
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 106 deletions.
225 changes: 119 additions & 106 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
import {deadOrAlive} from 'dead-or-alive'
import {ok as assert} from 'devlop'
import isOnline from 'is-online'
import pAll from 'p-all'
import pLimit from 'p-limit'
import {lintRule} from 'unified-lint-rule'
import {visit} from 'unist-util-visit'

const limit = pLimit(1)

/** @type {Readonly<Options>} */
const emptyOptions = {}
const defaultSkipUrlPatterns = [/^(?!https?)/i]
Expand Down Expand Up @@ -55,124 +59,133 @@ export default remarkLintNoDeadUrls
* Nothing.
*/
async function rule(tree, file, options) {
/** @type {Map<string, Array<Resources>>} */
const nodesByUrl = new Map()
const online = await isOnline()
const settings = options || emptyOptions
const skipUrlPatterns = settings.skipUrlPatterns
? settings.skipUrlPatterns.map(function (d) {
return typeof d === 'string' ? new RegExp(d) : d
})
: [...defaultSkipUrlPatterns]

if (settings.skipLocalhost) {
skipUrlPatterns.push(/^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/)
}

/* c8 ignore next 9 -- difficult to test */
if (!online) {
if (!settings.skipOffline) {
file.info(
'Unexpected offline connection, expected either an online connection or `skipOffline: true`'
)
}
// Operate one file at a time.
// Otherwise we’d send out tons of requests at a time for say 10 files.
await limit(async function () {
/** @type {Map<string, Array<Resources>>} */
const nodesByUrl = new Map()
const online = await isOnline()
const settings = options || emptyOptions
const skipUrlPatterns = settings.skipUrlPatterns
? settings.skipUrlPatterns.map(function (d) {
return typeof d === 'string' ? new RegExp(d) : d
})
: [...defaultSkipUrlPatterns]

return
}

const meta = /** @type {Record<string, unknown> | undefined} */ (
file.data.meta
)

const from =
settings.from ||
(meta &&
typeof meta.origin === 'string' &&
typeof meta.pathname === 'string'
? new URL(meta.pathname, meta.origin).href
: undefined)

const deadOrAliveOptions = {
...settings.deadOrAliveOptions,
findUrls: false
}

visit(tree, function (node) {
if ('url' in node && typeof node.url === 'string') {
const value = node.url
const colon = value.indexOf(':')
const questionMark = value.indexOf('?')
const numberSign = value.indexOf('#')
const slash = value.indexOf('/')
let relativeToSomething = false

if (
// If there is no protocol, it’s relative.
colon < 0 ||
// If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.
(slash > -1 && colon > slash) ||
(questionMark > -1 && colon > questionMark) ||
(numberSign > -1 && colon > numberSign)
) {
relativeToSomething = true
}
if (settings.skipLocalhost) {
skipUrlPatterns.push(/^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/)
}

// We can only check URLs relative to something if `from` is passed.
if (relativeToSomething && !from) {
return
/* c8 ignore next 9 -- difficult to test */
if (!online) {
if (!settings.skipOffline) {
file.info(
'Unexpected offline connection, expected either an online connection or `skipOffline: true`'
)
}

const url = new URL(value, from).href
return
}

if (
skipUrlPatterns.some(function (skipPattern) {
return skipPattern.test(url)
})
) {
return
}
const meta = /** @type {Record<string, unknown> | undefined} */ (
file.data.meta
)

const from =
settings.from ||
(meta &&
typeof meta.origin === 'string' &&
typeof meta.pathname === 'string'
? new URL(meta.pathname, meta.origin).href
: undefined)

const deadOrAliveOptions = {
...settings.deadOrAliveOptions,
findUrls: false
}

let list = nodesByUrl.get(url)
visit(tree, function (node) {
if ('url' in node && typeof node.url === 'string') {
const value = node.url
const colon = value.indexOf(':')
const questionMark = value.indexOf('?')
const numberSign = value.indexOf('#')
const slash = value.indexOf('/')
let relativeToSomething = false

if (
// If there is no protocol, it’s relative.
colon < 0 ||
// If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.
(slash > -1 && colon > slash) ||
(questionMark > -1 && colon > questionMark) ||
(numberSign > -1 && colon > numberSign)
) {
relativeToSomething = true
}

if (!list) {
list = []
nodesByUrl.set(url, list)
}
// We can only check URLs relative to something if `from` is passed.
if (relativeToSomething && !from) {
return
}

list.push(node)
}
})
const url = new URL(value, from).href

const urls = [...nodesByUrl.keys()]

await Promise.all(
urls.map(async function (url) {
const nodes = nodesByUrl.get(url)
assert(nodes)
const result = await deadOrAlive(url, deadOrAliveOptions)

for (const node of nodes) {
for (const message of result.messages) {
const product = file.message(
'Unexpected dead URL `' + url + '`, expected live URL',
{ancestors: [node], cause: message, place: node.position}
)
product.fatal = message.fatal
if (
skipUrlPatterns.some(function (skipPattern) {
return skipPattern.test(url)
})
) {
return
}

if (result.status === 'alive' && new URL(url).href !== result.url) {
const message = file.message(
'Unexpected redirecting URL `' +
url +
'`, expected final URL `' +
result.url +
'`',
{ancestors: [node], place: node.position}
)
message.actual = url
message.expected = [result.url]
let list = nodesByUrl.get(url)

if (!list) {
list = []
nodesByUrl.set(url, list)
}

list.push(node)
}
})
)

const urls = [...nodesByUrl.keys()]

await pAll(
urls.map(function (url) {
return async function () {
const nodes = nodesByUrl.get(url)
assert(nodes)

const result = await deadOrAlive(url, deadOrAliveOptions)

for (const node of nodes) {
for (const message of result.messages) {
const product = file.message(
'Unexpected dead URL `' + url + '`, expected live URL',
{ancestors: [node], cause: message, place: node.position}
)
product.fatal = message.fatal
}

if (result.status === 'alive' && new URL(url).href !== result.url) {
const message = file.message(
'Unexpected redirecting URL `' +
url +
'`, expected final URL `' +
result.url +
'`',
{ancestors: [node], place: node.position}
)
message.actual = url
message.expected = [result.url]
}
}
}
}),
// Operate on 10 URLs at a time.
{concurrency: 10}
)
})
}
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
"dead-or-alive": "^1.0.0",
"devlop": "^1.0.0",
"is-online": "^11.0.0",
"p-all": "^5.0.0",
"p-limit": "^6.0.0",
"unified-lint-rule": "^3.0.0",
"unist-util-visit": "^5.0.0",
"vfile": "^6.0.0",
Expand Down

0 comments on commit 8655bbc

Please sign in to comment.