diff --git a/src/bootstrap/src/core/build_steps/check.rs b/src/bootstrap/src/core/build_steps/check.rs index cfe090b22dc32..b4232409ba83d 100644 --- a/src/bootstrap/src/core/build_steps/check.rs +++ b/src/bootstrap/src/core/build_steps/check.rs @@ -556,3 +556,9 @@ tool_check_step!(Compiletest { allow_features: COMPILETEST_ALLOW_FEATURES, default: false, }); + +tool_check_step!(Linkchecker { + path: "src/tools/linkchecker", + mode: |_builder| Mode::ToolBootstrap, + default: false +}); diff --git a/src/bootstrap/src/core/builder/mod.rs b/src/bootstrap/src/core/builder/mod.rs index 923c3a9a93501..020622d1c1219 100644 --- a/src/bootstrap/src/core/builder/mod.rs +++ b/src/bootstrap/src/core/builder/mod.rs @@ -1033,6 +1033,7 @@ impl<'a> Builder<'a> { check::Compiletest, check::FeaturesStatusDump, check::CoverageDump, + check::Linkchecker, // This has special staging logic, it may run on stage 1 while others run on stage 0. // It takes quite some time to build stage 1, so put this at the end. // diff --git a/src/tools/linkchecker/Cargo.toml b/src/tools/linkchecker/Cargo.toml index 7123d43eb564c..fb5bff3fe63ff 100644 --- a/src/tools/linkchecker/Cargo.toml +++ b/src/tools/linkchecker/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "linkchecker" version = "0.1.0" -edition = "2021" +edition = "2024" [[bin]] name = "linkchecker" diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 84cba3f8c4473..1dc45728c90cd 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -17,12 +17,13 @@ //! should catch the majority of "broken link" cases. use std::cell::{Cell, RefCell}; +use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; -use std::io::ErrorKind; +use std::fs; +use std::iter::once; use std::path::{Component, Path, PathBuf}; use std::rc::Rc; use std::time::Instant; -use std::{env, fs}; use html5ever::tendril::ByteTendril; use html5ever::tokenizer::{ @@ -110,10 +111,25 @@ macro_rules! t { }; } +struct Cli { + docs: PathBuf, + link_targets_dirs: Vec, +} + fn main() { - let docs = env::args_os().nth(1).expect("doc path should be first argument"); - let docs = env::current_dir().unwrap().join(docs); - let mut checker = Checker { root: docs.clone(), cache: HashMap::new() }; + let cli = match parse_cli() { + Ok(cli) => cli, + Err(err) => { + eprintln!("error: {err}"); + usage_and_exit(1); + } + }; + + let mut checker = Checker { + root: cli.docs.clone(), + link_targets_dirs: cli.link_targets_dirs, + cache: HashMap::new(), + }; let mut report = Report { errors: 0, start: Instant::now(), @@ -125,7 +141,7 @@ fn main() { intra_doc_exceptions: 0, has_broken_urls: false, }; - checker.walk(&docs, &mut report); + checker.walk(&cli.docs, &mut report); report.report(); if report.errors != 0 { println!("found some broken links"); @@ -133,8 +149,50 @@ fn main() { } } +fn parse_cli() -> Result { + fn to_absolute_path(arg: &str) -> Result { + std::path::absolute(arg).map_err(|e| format!("could not convert to absolute {arg}: {e}")) + } + + let mut verbatim = false; + let mut docs = None; + let mut link_targets_dirs = Vec::new(); + + let mut args = std::env::args().skip(1); + while let Some(arg) = args.next() { + if !verbatim && arg == "--" { + verbatim = true; + } else if !verbatim && (arg == "-h" || arg == "--help") { + usage_and_exit(0) + } else if !verbatim && arg == "--link-targets-dir" { + link_targets_dirs.push(to_absolute_path( + &args.next().ok_or("missing value for --link-targets-dir")?, + )?); + } else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") { + link_targets_dirs.push(to_absolute_path(value)?); + } else if !verbatim && arg.starts_with('-') { + return Err(format!("unknown flag: {arg}")); + } else if docs.is_none() { + docs = Some(arg); + } else { + return Err("too many positional arguments".into()); + } + } + + Ok(Cli { + docs: to_absolute_path(&docs.ok_or("missing first positional argument")?)?, + link_targets_dirs, + }) +} + +fn usage_and_exit(code: i32) -> ! { + eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]"); + std::process::exit(code) +} + struct Checker { root: PathBuf, + link_targets_dirs: Vec, cache: Cache, } @@ -420,37 +478,34 @@ impl Checker { /// Load a file from disk, or from the cache if available. fn load_file(&mut self, file: &Path, report: &mut Report) -> (String, &FileEntry) { - // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- - #[cfg(windows)] - const ERROR_INVALID_NAME: i32 = 123; - let pretty_path = file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string(); - let entry = - self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) { + for base in once(&self.root).chain(self.link_targets_dirs.iter()) { + let entry = self.cache.entry(pretty_path.clone()); + if let Entry::Occupied(e) = &entry + && !matches!(e.get(), FileEntry::Missing) + { + break; + } + + let file = base.join(&pretty_path); + entry.insert_entry(match fs::metadata(&file) { Ok(metadata) if metadata.is_dir() => FileEntry::Dir, Ok(_) => { if file.extension().and_then(|s| s.to_str()) != Some("html") { FileEntry::OtherFile } else { report.html_files += 1; - load_html_file(file, report) + load_html_file(&file, report) } } - Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing, - Err(e) => { - // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`. - // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`. - #[cfg(windows)] - if e.raw_os_error() == Some(ERROR_INVALID_NAME) - && file.as_os_str().to_str().map_or(false, |s| s.contains("::")) - { - return FileEntry::Missing; - } - panic!("unexpected read error for {}: {}", file.display(), e); - } + Err(e) if is_not_found_error(&file, &e) => FileEntry::Missing, + Err(e) => panic!("unexpected read error for {}: {}", file.display(), e), }); + } + + let entry = self.cache.get(&pretty_path).unwrap(); (pretty_path, entry) } } @@ -629,3 +684,16 @@ fn parse_ids(ids: &mut HashSet, file: &str, source: &str, report: &mut R ids.insert(encoded); } } + +fn is_not_found_error(path: &Path, error: &std::io::Error) -> bool { + // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- + const WINDOWS_ERROR_INVALID_NAME: i32 = 123; + + error.kind() == std::io::ErrorKind::NotFound + // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` + // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed + // in `LINKCHECK_EXCEPTIONS`. + || (cfg!(windows) + && error.raw_os_error() == Some(WINDOWS_ERROR_INVALID_NAME) + && path.as_os_str().to_str().map_or(false, |s| s.contains("::"))) +}