x86 (32/64): go back to passing SIMD vectors by-ptr

This commit is contained in:
Ralf Jung
2025-05-20 21:53:57 +02:00
parent 792fc2b033
commit 321db85fb4
4 changed files with 22 additions and 46 deletions

View File

@@ -7,7 +7,7 @@ use rustc_abi::{
use rustc_macros::HashStable_Generic;
pub use crate::spec::AbiMap;
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
mod aarch64;
mod amdgpu;
@@ -696,24 +696,6 @@ impl<'a, Ty> FnAbi<'a, Ty> {
_ => {}
};
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
// May only return `true` if the target will always pass those arguments the same way,
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
// target features are required to pass a SIMD value in registers must be listed in
// the `abi_required_features` for the current target and ABI.
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
// to 128-bit-sized vectors.
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
// FIXME once https://github.com/bytecodealliance/wasmtime/issues/10254 is fixed
// accept vectors up to 128bit rather than vectors of exactly 128bit.
arg.layout.size.bits() == 128
}
// So far, we haven't implemented this logic for any other target.
_ => false,
};
for (arg_idx, arg) in self
.args
.iter_mut()
@@ -813,9 +795,10 @@ impl<'a, Ty> FnAbi<'a, Ty> {
// target feature sets. Some more information about this
// issue can be found in #44367.
//
// Note that the intrinsic ABI is exempt here as those are not
// real functions anyway, and the backend expects very specific types.
if spec.simd_types_indirect && !can_pass_simd_directly(arg) {
// We *could* do better in some cases, e.g. on x86_64 targets where SSE2 is
// required. However, it turns out that that makes LLVM worse at optimizing this
// code, so we pass things indirectly even there. See #139029 for more on that.
if spec.simd_types_indirect {
arg.make_indirect();
}
}