Files
rust/library/stdarch/crates/stdarch-test/src/disassembly.rs

204 lines
7.3 KiB
Rust
Raw Normal View History

//! Disassembly calling function for most targets.
2019-07-08 14:30:51 +02:00
use crate::Function;
use std::{collections::HashSet, env, str};
2019-07-08 14:30:51 +02:00
// Extracts the "shim" name from the `symbol`.
fn normalize(mut symbol: &str) -> String {
// Remove trailing colon:
if symbol.ends_with(':') {
symbol = &symbol[..symbol.len() - 1];
}
if symbol.ends_with('>') {
symbol = &symbol[..symbol.len() - 1];
}
if let Some(idx) = symbol.find('<') {
symbol = &symbol[idx + 1..];
}
let mut symbol = rustc_demangle::demangle(symbol).to_string();
symbol = match symbol.rfind("::h") {
Some(i) => symbol[..i].to_string(),
None => symbol.to_string(),
};
// Remove Rust paths
if let Some(last_colon) = symbol.rfind(':') {
symbol = symbol[last_colon + 1..].to_string();
2019-07-08 14:30:51 +02:00
}
// Normalize to no leading underscore to handle platforms that may
// inject extra ones in symbol names.
while symbol.starts_with('_') {
symbol.remove(0);
}
2020-10-25 09:32:27 +09:00
// Windows/x86 has a suffix such as @@4.
if let Some(idx) = symbol.find("@@") {
symbol = symbol[..idx].to_string();
2020-10-25 09:32:27 +09:00
}
2019-07-08 14:30:51 +02:00
symbol
}
#[cfg(target_env = "msvc")]
2019-07-08 14:30:51 +02:00
pub(crate) fn disassemble_myself() -> HashSet<Function> {
let me = env::current_exe().expect("failed to get current exe");
let target = if cfg!(target_arch = "x86_64") {
"x86_64-pc-windows-msvc"
} else if cfg!(target_arch = "x86") {
"i686-pc-windows-msvc"
} else if cfg!(target_arch = "aarch64") {
"aarch64-pc-windows-msvc"
2020-10-25 09:32:27 +09:00
} else {
panic!("disassembly unimplemented")
};
let mut cmd =
cc::windows_registry::find(target, "dumpbin.exe").expect("failed to find `dumpbin` tool");
let output = cmd
.arg("/DISASM:NOBYTES")
.arg(&me)
.output()
.expect("failed to execute dumpbin");
println!(
"{}\n{}",
output.status,
String::from_utf8_lossy(&output.stderr)
);
assert!(output.status.success());
// Windows does not return valid UTF-8 output:
parse(&String::from_utf8_lossy(Vec::leak(output.stdout)))
}
#[cfg(not(target_env = "msvc"))]
pub(crate) fn disassemble_myself() -> HashSet<Function> {
let me = env::current_exe().expect("failed to get current exe");
let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
let add_args = if cfg!(target_vendor = "apple") && cfg!(target_arch = "aarch64") {
// Target features need to be enabled for LLVM objdump on Darwin ARM64
vec!["--mattr=+v8.6a,+crypto,+tme"]
2023-09-16 16:54:08 +02:00
} else if cfg!(target_arch = "riscv64") {
2023-09-16 16:57:57 +02:00
vec!["--mattr=+zk,+zks,+zbc,+zbb"]
} else {
vec![]
2020-10-25 09:32:27 +09:00
};
let output = std::process::Command::new(objdump.clone())
.arg("--disassemble")
.arg("--no-show-raw-insn")
.args(add_args)
.arg(&me)
.output()
.unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={objdump}"));
println!(
"{}\n{}",
output.status,
String::from_utf8_lossy(&output.stderr)
);
assert!(output.status.success());
let disassembly = String::from_utf8_lossy(Vec::leak(output.stdout));
2019-07-08 14:30:51 +02:00
parse(&disassembly)
}
2019-07-08 14:30:51 +02:00
fn parse(output: &str) -> HashSet<Function> {
let mut lines = output.lines();
2019-09-16 17:42:00 +02:00
println!(
"First 100 lines of the disassembly input containing {} lines:",
lines.clone().count()
);
for line in output.lines().take(100) {
println!("{line}");
}
2019-07-08 14:30:51 +02:00
let mut functions = HashSet::new();
let mut cached_header = None;
while let Some(header) = cached_header.take().or_else(|| lines.next()) {
2019-07-08 23:21:37 +02:00
if !header.ends_with(':') || !header.contains("stdarch_test_shim") {
2019-09-16 17:42:00 +02:00
continue;
}
eprintln!("header: {header}");
2019-07-08 14:30:51 +02:00
let symbol = normalize(header);
eprintln!("normalized symbol: {symbol}");
let mut instructions = Vec::new();
2023-10-30 13:25:33 +01:00
for instruction in lines.by_ref() {
if instruction.ends_with(':') {
cached_header = Some(instruction);
break;
}
2019-07-08 14:30:51 +02:00
if instruction.is_empty() {
cached_header = None;
break;
}
let mut parts = if cfg!(target_env = "msvc") {
2019-07-08 14:30:51 +02:00
// Each line looks like:
//
2024-04-15 16:20:31 -07:00
// > $addr: $instr..
2019-07-08 14:30:51 +02:00
instruction
2024-04-15 16:20:31 -07:00
.split(&[' ', ','])
.filter(|&x| !x.is_empty())
2019-07-08 14:30:51 +02:00
.skip(1)
2024-04-15 16:20:31 -07:00
.map(str::to_lowercase)
2024-12-21 13:54:12 +05:30
.skip_while(|s| matches!(&**s, "lock" | "vex")) // skip x86-specific prefix
2019-07-08 14:30:51 +02:00
.collect::<Vec<String>>()
} else {
// objdump with --no-show-raw-insn
2019-07-08 14:30:51 +02:00
// Each line of instructions should look like:
//
// $rel_offset: $instruction...
2019-07-08 14:30:51 +02:00
instruction
.split_whitespace()
.skip(1)
2024-12-21 13:54:12 +05:30
.skip_while(|s| matches!(*s, "lock" | "{evex}" | "{vex}")) // skip x86-specific prefix
.map(ToString::to_string)
2019-07-08 14:30:51 +02:00
.collect::<Vec<String>>()
};
2023-11-30 15:00:25 -08:00
if cfg!(any(target_arch = "aarch64", target_arch = "arm64ec")) {
// Normalize [us]shll.* ..., #0 instructions to the preferred form: [us]xtl.* ...
2024-04-15 16:20:31 -07:00
// as neither LLVM objdump nor dumpbin does that.
// See https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/UXTL--UXTL2--Unsigned-extend-Long--an-alias-of-USHLL--USHLL2-
// and https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/SXTL--SXTL2--Signed-extend-Long--an-alias-of-SSHLL--SSHLL2-
// for details.
2024-04-15 16:20:31 -07:00
fn is_shll(instr: &str) -> bool {
if cfg!(target_env = "msvc") {
instr.starts_with("ushll") || instr.starts_with("sshll")
} else {
instr.starts_with("ushll.") || instr.starts_with("sshll.")
}
}
match (parts.first(), parts.last()) {
2024-09-28 00:56:02 -04:00
(Some(instr), Some(last_arg)) if is_shll(instr) && last_arg == "#0" => {
assert_eq!(parts.len(), 4);
let mut new_parts = Vec::with_capacity(3);
let new_instr = format!("{}{}{}", &instr[..1], "xtl", &instr[5..]);
new_parts.push(new_instr);
new_parts.push(parts[1].clone());
new_parts.push(parts[2][0..parts[2].len() - 1].to_owned()); // strip trailing comma
parts = new_parts;
}
2024-04-15 16:20:31 -07:00
// dumpbin uses "ins" instead of "mov"
(Some(instr), _) if cfg!(target_env = "msvc") && instr == "ins" => {
parts[0] = "mov".to_string()
}
_ => {}
};
}
2019-07-08 14:30:51 +02:00
instructions.push(parts.join(" "));
}
2019-07-08 14:30:51 +02:00
let function = Function {
name: symbol,
2019-09-16 17:42:00 +02:00
instrs: instructions,
2019-07-08 14:30:51 +02:00
};
assert!(functions.insert(function));
}
2019-07-09 10:03:12 +02:00
eprintln!("all found functions dump:");
for k in &functions {
eprintln!(" f: {}", k.name);
}
2019-07-08 14:30:51 +02:00
functions
}