Skip to content

Commit

Permalink
doc: improve markdown link checker
Browse files Browse the repository at this point in the history
- Test link definitions too
- Report all broken links in a file, not just the first one
- Avoid use of workers (launching ~90 workers has substantial
  overhead, and in my 4-core machine it is slower than not using
  them at all) & other simplifications

Refs: #32359

PR-URL: #32586
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
mildsunrise authored and BethGriggs committed Apr 7, 2020
1 parent 7d8f82d commit 18ca874
Showing 1 changed file with 28 additions and 48 deletions.
76 changes: 28 additions & 48 deletions tools/doc/checkLinks.js
Original file line number Diff line number Diff line change
@@ -1,60 +1,43 @@
'use strict';

const fs = require('fs');
const { Worker, isMainThread, workerData: path } = require('worker_threads');
const { extname, join, resolve } = require('path');
const unified = require('unified');
const { pathToFileURL } = require('url');
const DIR = resolve(process.argv[2]);

console.log('Running Markdown link checker...');
findMarkdownFilesRecursively(DIR);

function* getLinksRecursively(node) {
if (
(node.type === 'link' && !node.url.startsWith('#')) ||
node.type === 'image'
) {
if (node.url && !node.url.startsWith('#')) {
yield node;
}
for (const child of node.children || []) {
yield* getLinksRecursively(child);
}
}

if (isMainThread) {
const { extname, join, resolve } = require('path');
const DIR = resolve(process.argv[2]);

console.log('Running Markdown link checker...');

async function* findMarkdownFilesRecursively(dirPath) {
const fileNames = await fs.promises.readdir(dirPath);

for (const fileName of fileNames) {
const path = join(dirPath, fileName);

const stats = await fs.promises.stat(path);
if (
stats.isDirectory() &&
fileName !== 'api' &&
fileName !== 'deps' &&
fileName !== 'node_modules'
) {
yield* findMarkdownFilesRecursively(path);
} else if (extname(fileName) === '.md') {
yield path;
}
function findMarkdownFilesRecursively(dirPath) {
const entries = fs.readdirSync(dirPath, { withFileTypes: true });

for (const entry of entries) {
const path = join(dirPath, entry.name);

if (
entry.isDirectory() &&
entry.name !== 'api' &&
entry.name !== 'deps' &&
entry.name !== 'node_modules'
) {
findMarkdownFilesRecursively(path);
} else if (entry.isFile() && extname(entry.name) === '.md') {
checkFile(path);
}
}
}

function errorHandler(error) {
console.error(error);
process.exitCode = 1;
}

setImmediate(async () => {
for await (const path of findMarkdownFilesRecursively(DIR)) {
new Worker(__filename, { workerData: path }).on('error', errorHandler);
}
});
} else {
const unified = require('unified');
const { pathToFileURL } = require('url');

function checkFile(path) {
const tree = unified()
.use(require('remark-parse'))
.parse(fs.readFileSync(path));
Expand All @@ -63,12 +46,9 @@ if (isMainThread) {
for (const node of getLinksRecursively(tree)) {
const targetURL = new URL(node.url, base);
if (targetURL.protocol === 'file:' && !fs.existsSync(targetURL)) {
const error = new Error('Broken link in a Markdown document.');
const { start } = node.position;
error.stack =
error.stack.substring(0, error.stack.indexOf('\n') + 5) +
`at ${node.type} (${path}:${start.line}:${start.column})`;
throw error;
const { line, column } = node.position.start;
console.error(`Broken link at ${path}:${line}:${column} (${node.url})`);
process.exitCode = 1;
}
}
}

0 comments on commit 18ca874

Please sign in to comment.