Rollup merge of #143883 - pietroalbini:pa-linkchecker-extra-target, r=ehuss
Add `--link-targets-dir` argument to linkchecker In my release notes API list tool (rust-lang/rust#143053) I want to check whether all links generated by the tool are actually valid, and using linkchecker seems to be the most sensible choice. Linkchecker currently has a fairly big limitation though: it can only check a single directory, it checks *all* of the files within it, and link targets must point inside that same directory. This works great when checking the whole documentation package, but in my case I only need to check that one file contains valid links to the standard library docs. To solve that, this PR adds a new `--link-targets-dir` flag to linkchecker. Directories passed to it will be valid link targets (with lower priority than the root being checked), but links within them will not be checked. I'm not that happy with the name of the flag, happy for it to be bikeshedded.
This commit is contained in:
@@ -556,3 +556,9 @@ tool_check_step!(Compiletest {
|
||||
allow_features: COMPILETEST_ALLOW_FEATURES,
|
||||
default: false,
|
||||
});
|
||||
|
||||
tool_check_step!(Linkchecker {
|
||||
path: "src/tools/linkchecker",
|
||||
mode: |_builder| Mode::ToolBootstrap,
|
||||
default: false
|
||||
});
|
||||
|
||||
@@ -1033,6 +1033,7 @@ impl<'a> Builder<'a> {
|
||||
check::Compiletest,
|
||||
check::FeaturesStatusDump,
|
||||
check::CoverageDump,
|
||||
check::Linkchecker,
|
||||
// This has special staging logic, it may run on stage 1 while others run on stage 0.
|
||||
// It takes quite some time to build stage 1, so put this at the end.
|
||||
//
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "linkchecker"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
edition = "2024"
|
||||
|
||||
[[bin]]
|
||||
name = "linkchecker"
|
||||
|
||||
@@ -17,12 +17,13 @@
|
||||
//! should catch the majority of "broken link" cases.
|
||||
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::io::ErrorKind;
|
||||
use std::fs;
|
||||
use std::iter::once;
|
||||
use std::path::{Component, Path, PathBuf};
|
||||
use std::rc::Rc;
|
||||
use std::time::Instant;
|
||||
use std::{env, fs};
|
||||
|
||||
use html5ever::tendril::ByteTendril;
|
||||
use html5ever::tokenizer::{
|
||||
@@ -110,10 +111,25 @@ macro_rules! t {
|
||||
};
|
||||
}
|
||||
|
||||
struct Cli {
|
||||
docs: PathBuf,
|
||||
link_targets_dirs: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let docs = env::args_os().nth(1).expect("doc path should be first argument");
|
||||
let docs = env::current_dir().unwrap().join(docs);
|
||||
let mut checker = Checker { root: docs.clone(), cache: HashMap::new() };
|
||||
let cli = match parse_cli() {
|
||||
Ok(cli) => cli,
|
||||
Err(err) => {
|
||||
eprintln!("error: {err}");
|
||||
usage_and_exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let mut checker = Checker {
|
||||
root: cli.docs.clone(),
|
||||
link_targets_dirs: cli.link_targets_dirs,
|
||||
cache: HashMap::new(),
|
||||
};
|
||||
let mut report = Report {
|
||||
errors: 0,
|
||||
start: Instant::now(),
|
||||
@@ -125,7 +141,7 @@ fn main() {
|
||||
intra_doc_exceptions: 0,
|
||||
has_broken_urls: false,
|
||||
};
|
||||
checker.walk(&docs, &mut report);
|
||||
checker.walk(&cli.docs, &mut report);
|
||||
report.report();
|
||||
if report.errors != 0 {
|
||||
println!("found some broken links");
|
||||
@@ -133,8 +149,50 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_cli() -> Result<Cli, String> {
|
||||
fn to_absolute_path(arg: &str) -> Result<PathBuf, String> {
|
||||
std::path::absolute(arg).map_err(|e| format!("could not convert to absolute {arg}: {e}"))
|
||||
}
|
||||
|
||||
let mut verbatim = false;
|
||||
let mut docs = None;
|
||||
let mut link_targets_dirs = Vec::new();
|
||||
|
||||
let mut args = std::env::args().skip(1);
|
||||
while let Some(arg) = args.next() {
|
||||
if !verbatim && arg == "--" {
|
||||
verbatim = true;
|
||||
} else if !verbatim && (arg == "-h" || arg == "--help") {
|
||||
usage_and_exit(0)
|
||||
} else if !verbatim && arg == "--link-targets-dir" {
|
||||
link_targets_dirs.push(to_absolute_path(
|
||||
&args.next().ok_or("missing value for --link-targets-dir")?,
|
||||
)?);
|
||||
} else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") {
|
||||
link_targets_dirs.push(to_absolute_path(value)?);
|
||||
} else if !verbatim && arg.starts_with('-') {
|
||||
return Err(format!("unknown flag: {arg}"));
|
||||
} else if docs.is_none() {
|
||||
docs = Some(arg);
|
||||
} else {
|
||||
return Err("too many positional arguments".into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Cli {
|
||||
docs: to_absolute_path(&docs.ok_or("missing first positional argument")?)?,
|
||||
link_targets_dirs,
|
||||
})
|
||||
}
|
||||
|
||||
fn usage_and_exit(code: i32) -> ! {
|
||||
eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]");
|
||||
std::process::exit(code)
|
||||
}
|
||||
|
||||
struct Checker {
|
||||
root: PathBuf,
|
||||
link_targets_dirs: Vec<PathBuf>,
|
||||
cache: Cache,
|
||||
}
|
||||
|
||||
@@ -420,37 +478,34 @@ impl Checker {
|
||||
|
||||
/// Load a file from disk, or from the cache if available.
|
||||
fn load_file(&mut self, file: &Path, report: &mut Report) -> (String, &FileEntry) {
|
||||
// https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
|
||||
#[cfg(windows)]
|
||||
const ERROR_INVALID_NAME: i32 = 123;
|
||||
|
||||
let pretty_path =
|
||||
file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string();
|
||||
|
||||
let entry =
|
||||
self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) {
|
||||
for base in once(&self.root).chain(self.link_targets_dirs.iter()) {
|
||||
let entry = self.cache.entry(pretty_path.clone());
|
||||
if let Entry::Occupied(e) = &entry
|
||||
&& !matches!(e.get(), FileEntry::Missing)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
let file = base.join(&pretty_path);
|
||||
entry.insert_entry(match fs::metadata(&file) {
|
||||
Ok(metadata) if metadata.is_dir() => FileEntry::Dir,
|
||||
Ok(_) => {
|
||||
if file.extension().and_then(|s| s.to_str()) != Some("html") {
|
||||
FileEntry::OtherFile
|
||||
} else {
|
||||
report.html_files += 1;
|
||||
load_html_file(file, report)
|
||||
load_html_file(&file, report)
|
||||
}
|
||||
}
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing,
|
||||
Err(e) => {
|
||||
// If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
|
||||
// Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
|
||||
#[cfg(windows)]
|
||||
if e.raw_os_error() == Some(ERROR_INVALID_NAME)
|
||||
&& file.as_os_str().to_str().map_or(false, |s| s.contains("::"))
|
||||
{
|
||||
return FileEntry::Missing;
|
||||
}
|
||||
panic!("unexpected read error for {}: {}", file.display(), e);
|
||||
}
|
||||
Err(e) if is_not_found_error(&file, &e) => FileEntry::Missing,
|
||||
Err(e) => panic!("unexpected read error for {}: {}", file.display(), e),
|
||||
});
|
||||
}
|
||||
|
||||
let entry = self.cache.get(&pretty_path).unwrap();
|
||||
(pretty_path, entry)
|
||||
}
|
||||
}
|
||||
@@ -629,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
|
||||
ids.insert(encoded);
|
||||
}
|
||||
}
|
||||
|
||||
fn is_not_found_error(path: &Path, error: &std::io::Error) -> bool {
|
||||
// https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
|
||||
const WINDOWS_ERROR_INVALID_NAME: i32 = 123;
|
||||
|
||||
error.kind() == std::io::ErrorKind::NotFound
|
||||
// If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
|
||||
// rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
|
||||
// in `LINKCHECK_EXCEPTIONS`.
|
||||
|| (cfg!(windows)
|
||||
&& error.raw_os_error() == Some(WINDOWS_ERROR_INVALID_NAME)
|
||||
&& path.as_os_str().to_str().map_or(false, |s| s.contains("::")))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user