Refactor weak symbols in std::sys::unix

This makes a few changes to the weak symbol macros in `sys::unix`:

- `dlsym!` is added to keep the functionality for runtime `dlsym`
  lookups, like for `__pthread_get_minstack@GLIBC_PRIVATE` that we don't
  want to show up in ELF symbol tables.
- `weak!` now uses `#[linkage = "extern_weak"]` symbols, so its runtime
  behavior is just a simple null check. This is also used by `syscall!`.
  - On non-ELF targets (macos/ios) where that linkage is not known to
    behave, `weak!` is just an alias to `dlsym!` for the old behavior.
- `raw_syscall!` is added to always call `libc::syscall` on linux and
  android, for cases like `clone3` that have no known libc wrapper.

The new `weak!` linkage does mean that you'll get versioned symbols if
you build with a newer glibc, like `WEAK DEFAULT UND statx@GLIBC_2.28`.
This might seem problematic, but old non-weak symbols can tie the build
to new versions too, like `dlsym@GLIBC_2.34` from their recent library
unification. If you build with an old glibc like `dist-x86_64-linux`
does, you'll still get unversioned `WEAK DEFAULT UND statx`, which may
be resolved based on the runtime glibc.

I also found a few functions that don't need to be weak anymore:

- Android can directly use `ftruncate64`, `pread64`, and `pwrite64`, as
  these were added in API 12, and our baseline is API 14.
- Linux can directly use `splice`, added way back in glibc 2.5 and
  similarly old musl. Android only added it in API 21 though.
This commit is contained in:
Josh Stone
2021-11-12 12:58:38 -08:00
parent e90c5fbbc5
commit 5ff6ac4287
10 changed files with 136 additions and 193 deletions

View File

@@ -6,7 +6,7 @@
//! detection.
//!
//! One option to use here is weak linkage, but that is unfortunately only
//! really workable on Linux. Hence, use dlsym to get the symbol value at
//! really workable with ELF. Otherwise, use dlsym to get the symbol value at
//! runtime. This is also done for compatibility with older versions of glibc,
//! and to avoid creating dependencies on GLIBC_PRIVATE symbols. It assumes that
//! we've been dynamically linked to the library the symbol comes from, but that
@@ -14,7 +14,8 @@
//!
//! A long time ago this used weak linkage for the __pthread_get_minstack
//! symbol, but that caused Debian to detect an unnecessarily strict versioned
//! dependency on libc6 (#23628).
//! dependency on libc6 (#23628) because it is GLIBC_PRIVATE. We now use `dlsym`
//! for a runtime lookup of that symbol to avoid the ELF versioned dependency.
// There are a variety of `#[cfg]`s controlling which targets are involved in
// each instance of `weak!` and `syscall!`. Rather than trying to unify all of
@@ -22,31 +23,75 @@
#![allow(dead_code, unused_macros)]
use crate::ffi::CStr;
use crate::marker;
use crate::marker::PhantomData;
use crate::mem;
use crate::sync::atomic::{self, AtomicUsize, Ordering};
// We can use true weak linkage on ELF targets.
#[cfg(not(any(target_os = "macos", target_os = "ios")))]
pub(crate) macro weak {
(fn $name:ident($($t:ty),*) -> $ret:ty) => (
#[allow(non_upper_case_globals)]
static $name: crate::sys::weak::Weak<unsafe extern "C" fn($($t),*) -> $ret> =
crate::sys::weak::Weak::new(concat!(stringify!($name), '\0'));
let ref $name: ExternWeak<unsafe extern "C" fn($($t),*) -> $ret> = {
extern "C" {
#[linkage = "extern_weak"]
static $name: *const libc::c_void;
}
#[allow(unused_unsafe)]
ExternWeak::new(unsafe { $name })
};
)
}
pub struct Weak<F> {
name: &'static str,
addr: AtomicUsize,
_marker: marker::PhantomData<F>,
// On non-ELF targets, use the dlsym approximation of weak linkage.
#[cfg(any(target_os = "macos", target_os = "ios"))]
pub(crate) use self::dlsym as weak;
pub(crate) struct ExternWeak<F> {
weak_ptr: *const libc::c_void,
_marker: PhantomData<F>,
}
impl<F> Weak<F> {
pub const fn new(name: &'static str) -> Weak<F> {
Weak { name, addr: AtomicUsize::new(1), _marker: marker::PhantomData }
impl<F> ExternWeak<F> {
#[inline]
pub(crate) fn new(weak_ptr: *const libc::c_void) -> Self {
ExternWeak { weak_ptr, _marker: PhantomData }
}
}
impl<F> ExternWeak<F> {
#[inline]
pub(crate) fn get(&self) -> Option<F> {
unsafe {
if self.weak_ptr.is_null() {
None
} else {
Some(mem::transmute_copy::<*const libc::c_void, F>(&self.weak_ptr))
}
}
}
}
pub(crate) macro dlsym {
(fn $name:ident($($t:ty),*) -> $ret:ty) => (
static DLSYM: DlsymWeak<unsafe extern "C" fn($($t),*) -> $ret> =
DlsymWeak::new(concat!(stringify!($name), '\0'));
let $name = &DLSYM;
)
}
pub(crate) struct DlsymWeak<F> {
name: &'static str,
addr: AtomicUsize,
_marker: PhantomData<F>,
}
impl<F> DlsymWeak<F> {
pub(crate) const fn new(name: &'static str) -> Self {
DlsymWeak { name, addr: AtomicUsize::new(1), _marker: PhantomData }
}
pub fn get(&self) -> Option<F> {
assert_eq!(mem::size_of::<F>(), mem::size_of::<usize>());
#[inline]
pub(crate) fn get(&self) -> Option<F> {
unsafe {
// Relaxed is fine here because we fence before reading through the
// pointer (see the comment below).
@@ -82,6 +127,8 @@ impl<F> Weak<F> {
// Cold because it should only happen during first-time initalization.
#[cold]
unsafe fn initialize(&self) -> Option<F> {
assert_eq!(mem::size_of::<F>(), mem::size_of::<usize>());
let val = fetch(self.name);
// This synchronizes with the acquire fence in `get`.
self.addr.store(val, Ordering::Release);
@@ -105,14 +152,12 @@ unsafe fn fetch(name: &str) -> usize {
pub(crate) macro syscall {
(fn $name:ident($($arg_name:ident: $t:ty),*) -> $ret:ty) => (
unsafe fn $name($($arg_name: $t),*) -> $ret {
use super::os;
weak! { fn $name($($t),*) -> $ret }
if let Some(fun) = $name.get() {
fun($($arg_name),*)
} else {
os::set_errno(libc::ENOSYS);
super::os::set_errno(libc::ENOSYS);
-1
}
}
@@ -123,11 +168,6 @@ pub(crate) macro syscall {
pub(crate) macro syscall {
(fn $name:ident($($arg_name:ident: $t:ty),*) -> $ret:ty) => (
unsafe fn $name($($arg_name:$t),*) -> $ret {
use weak;
// This looks like a hack, but concat_idents only accepts idents
// (not paths).
use libc::*;
weak! { fn $name($($t),*) -> $ret }
// Use a weak symbol from libc when possible, allowing `LD_PRELOAD`
@@ -135,6 +175,10 @@ pub(crate) macro syscall {
if let Some(fun) = $name.get() {
fun($($arg_name),*)
} else {
// This looks like a hack, but concat_idents only accepts idents
// (not paths).
use libc::*;
syscall(
concat_idents!(SYS_, $name),
$($arg_name),*
@@ -143,3 +187,19 @@ pub(crate) macro syscall {
}
)
}
#[cfg(any(target_os = "linux", target_os = "android"))]
pub(crate) macro raw_syscall {
(fn $name:ident($($arg_name:ident: $t:ty),*) -> $ret:ty) => (
unsafe fn $name($($arg_name:$t),*) -> $ret {
// This looks like a hack, but concat_idents only accepts idents
// (not paths).
use libc::*;
syscall(
concat_idents!(SYS_, $name),
$($arg_name),*
) as $ret
}
)
}