Replace the nm symbol check with a Rust implementation

This should be less error-prone and adaptable than the `nm` version, and
have better cross-platform support without needing LLVM `nm` installed.
This commit is contained in:
Trevor Gross
2025-04-19 07:38:43 +00:00
parent 97c0bebdf7
commit a63f4826cf
4 changed files with 261 additions and 107 deletions

View File

@@ -6,6 +6,7 @@ members = [
"crates/libm-macros",
"crates/musl-math-sys",
"crates/panic-handler",
"crates/symbol-check",
"crates/util",
"libm",
"libm-test",

View File

@@ -47,87 +47,25 @@ else
fi
fi
# Ensure there are no duplicate symbols or references to `core` when
# `compiler-builtins` is built with various features. Symcheck invokes Cargo to
# build with the arguments we provide it, then validates the built artifacts.
symcheck=(cargo run -p symbol-check --release)
[[ "$target" = "wasm"* ]] && symcheck+=(--features wasm)
symcheck+=(-- build-and-check)
declare -a rlib_paths
# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs
update_rlib_paths() {
if [ -d /builtins-target ]; then
rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
else
rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
fi
}
# Remove any existing artifacts from previous tests that don't set #![compiler_builtins]
update_rlib_paths
rm -f "${rlib_paths[@]}"
cargo build -p compiler_builtins --target "$target"
cargo build -p compiler_builtins --target "$target" --release
cargo build -p compiler_builtins --target "$target" --features c
cargo build -p compiler_builtins --target "$target" --features c --release
cargo build -p compiler_builtins --target "$target" --features no-asm
cargo build -p compiler_builtins --target "$target" --features no-asm --release
cargo build -p compiler_builtins --target "$target" --features no-f16-f128
cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release
PREFIX=${target//unknown-/}-
case "$target" in
armv7-*)
PREFIX=arm-linux-gnueabihf-
;;
thumb*)
PREFIX=arm-none-eabi-
;;
*86*-*)
PREFIX=
;;
esac
NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) )
if [ "$NM" = "" ]; then
NM="${PREFIX}nm"
fi
# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with
# rustup run to ensure that those are in PATH.
TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')"
if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then
NM="rustup run $TOOLCHAIN $NM"
fi
# Look out for duplicated symbols when we include the compiler-rt (C) implementation
update_rlib_paths
for rlib in "${rlib_paths[@]}"; do
set +x
echo "================================================================"
echo "checking $rlib for duplicate symbols"
echo "================================================================"
set -x
duplicates_found=0
# NOTE On i586, It's normal that the get_pc_thunk symbol appears several
# times so ignore it
$NM -g --defined-only "$rlib" 2>&1 |
sort |
uniq -d |
grep -v __x86.get_pc_thunk --quiet |
grep 'T __' && duplicates_found=1
if [ "$duplicates_found" != 0 ]; then
echo "error: found duplicate symbols"
exit 1
else
echo "success; no duplicate symbols found"
fi
done
rm -f "${rlib_paths[@]}"
"${symcheck[@]}" -p compiler_builtins --target "$target"
"${symcheck[@]}" -p compiler_builtins --target "$target" --release
"${symcheck[@]}" -p compiler_builtins --target "$target" --features c
"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release
build_intrinsics_test() {
cargo build \
# symcheck also checks the results of builtins-test-intrinsics
"${symcheck[@]}" \
--target "$target" --verbose \
--manifest-path builtins-test-intrinsics/Cargo.toml "$@"
}
@@ -143,35 +81,6 @@ build_intrinsics_test --features c --release
CARGO_PROFILE_DEV_LTO=true build_intrinsics_test
CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release
# Ensure no references to any symbols from core
update_rlib_paths
for rlib in "${rlib_paths[@]}"; do
set +x
echo "================================================================"
echo "checking $rlib for references to core"
echo "================================================================"
set -x
tmpdir="${CARGO_TARGET_DIR:-target}/tmp"
test -d "$tmpdir" || mkdir "$tmpdir"
defined="$tmpdir/defined_symbols.txt"
undefined="$tmpdir/defined_symbols.txt"
$NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined"
$NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined"
grep_has_results=0
grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1
if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then
echo "FIXME: powerpc64 fails these tests"
elif [ "$grep_has_results" != 0 ]; then
echo "error: found unexpected references to core"
exit 1
else
echo "success; no references to core found"
fi
done
# Test libm
# Make sure a simple build works

View File

@@ -0,0 +1,13 @@
[package]
name = "symbol-check"
version = "0.1.0"
edition = "2024"
publish = false
[dependencies]
# FIXME: used as a git dependency since the latest release does not support wasm
object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" }
serde_json = "1.0.140"
[features]
wasm = ["object/wasm"]

View File

@@ -0,0 +1,231 @@
//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any
//! linking errors.
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use object::read::archive::{ArchiveFile, ArchiveMember};
use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection};
use serde_json::Value;
const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None];
const USAGE: &str = "Usage:
symbol-check build-and-check CARGO_ARGS ...
Cargo will get invoked with `CARGO_ARGS` and all output
`compiler_builtins*.rlib` files will be checked.
";
fn main() {
// Create a `&str` vec so we can match on it.
let args = std::env::args().collect::<Vec<_>>();
let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
match &args_ref[1..] {
["build-and-check", rest @ ..] if !rest.is_empty() => {
let paths = exec_cargo_with_args(rest);
for path in paths {
println!("Checking {}", path.display());
verify_no_duplicates(&path);
verify_core_symbols(&path);
}
}
_ => {
println!("{USAGE}");
std::process::exit(1);
}
}
}
/// Run `cargo build` with the provided additional arguments, collecting the list of created
/// libraries.
fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
let mut cmd = Command::new("cargo")
.arg("build")
.arg("--message-format=json")
.args(args)
.stdout(Stdio::piped())
.spawn()
.expect("failed to launch Cargo");
let stdout = cmd.stdout.take().unwrap();
let reader = BufReader::new(stdout);
let mut check_files = Vec::new();
for line in reader.lines() {
let line = line.expect("failed to read line");
println!("{line}"); // tee to stdout
// Select only steps that create files
let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
if j["reason"] != "compiler-artifact" {
continue;
}
// Find rlibs in the created file list that match our expected library names and
// extensions.
for fpath in j["filenames"].as_array().expect("filenames not an array") {
let path = fpath.as_str().expect("file name not a string");
let path = PathBuf::from(path);
if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) {
let fname = path.file_name().unwrap().to_str().unwrap();
if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) {
check_files.push(path);
}
}
}
}
cmd.wait().expect("failed to wait on Cargo");
assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
println!("Collected the following rlibs to check: {check_files:#?}");
check_files
}
/// Information collected from `object`, for convenience.
#[expect(unused)] // only for printing
#[derive(Clone, Debug)]
struct SymInfo {
name: String,
kind: SymbolKind,
scope: SymbolScope,
section: SymbolSection,
is_undefined: bool,
is_global: bool,
is_local: bool,
is_weak: bool,
is_common: bool,
address: u64,
object: String,
}
impl SymInfo {
fn new(sym: &Symbol, member: &ArchiveMember) -> Self {
Self {
name: sym.name().expect("missing name").to_owned(),
kind: sym.kind(),
scope: sym.scope(),
section: sym.section(),
is_undefined: sym.is_undefined(),
is_global: sym.is_global(),
is_local: sym.is_local(),
is_weak: sym.is_weak(),
is_common: sym.is_common(),
address: sym.address(),
object: String::from_utf8_lossy(member.name()).into_owned(),
}
}
}
/// Ensure that the same global symbol isn't defined in multiple object files within an archive.
///
/// Note that this will also locate cases where a symbol is weakly defined in more than one place.
/// Technically there are no linker errors that will come from this, but it keeps our binary more
/// straightforward and saves some distribution size.
fn verify_no_duplicates(path: &Path) {
let mut syms = BTreeMap::<String, SymInfo>::new();
let mut dups = Vec::new();
let mut found_any = false;
for_each_symbol(path, |symbol, member| {
// Only check defined globals
if !symbol.is_global() || symbol.is_undefined() {
return;
}
let sym = SymInfo::new(&symbol, member);
// x86-32 includes multiple copies of thunk symbols
if sym.name.starts_with("__x86.get_pc_thunk") {
return;
}
// Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
// relocations. These are allowed to have repeated definitions.
let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];
if win_allowed_dup_pfx
.iter()
.any(|pfx| sym.name.starts_with(pfx))
{
return;
}
match syms.get(&sym.name) {
Some(existing) => {
dups.push(sym);
dups.push(existing.clone());
}
None => {
syms.insert(sym.name.clone(), sym);
}
}
found_any = true;
});
assert!(found_any, "no symbols found");
if !dups.is_empty() {
dups.sort_unstable_by(|a, b| a.name.cmp(&b.name));
panic!("found duplicate symbols: {dups:#?}");
}
println!(" success: no duplicate symbols found");
}
/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
fn verify_core_symbols(path: &Path) {
let mut defined = BTreeSet::new();
let mut undefined = Vec::new();
let mut has_symbols = false;
for_each_symbol(path, |symbol, member| {
has_symbols = true;
// Find only symbols from `core`
if !symbol.name().unwrap().contains("_ZN4core") {
return;
}
let sym = SymInfo::new(&symbol, member);
if sym.is_undefined {
undefined.push(sym);
} else {
defined.insert(sym.name);
}
});
assert!(has_symbols, "no symbols found");
// Discard any symbols that are defined somewhere in the archive
undefined.retain(|sym| !defined.contains(&sym.name));
if !undefined.is_empty() {
undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name));
panic!("found undefined symbols from core: {undefined:#?}");
}
println!(" success: no undefined references to core found");
}
/// For a given archive path, do something with each symbol.
fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) {
let data = fs::read(path).expect("reading file failed");
let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed");
for member in archive.members() {
let member = member.expect("failed to access member");
let obj_data = member.data(&*data).expect("failed to access object");
let obj = object::File::parse(obj_data).expect("failed to parse object");
obj.symbols().for_each(|sym| f(sym, &member));
}
}