Implement floor and ceil in assembly on i586

Fixes: https://github.com/rust-lang/compiler-builtins/issues/837

The assembly is based on

- 2043392793/lib/libm/arch/i387/s_floor.S
- 2043392793/lib/libm/arch/i387/s_ceil.S

Which both state

    /*
     * Written by J.T. Conklin <jtc@NetBSD.org>.
     * Public domain.
     */

Which I believe means we're good in terms of licensing.
This commit is contained in:
Folkert de Vries
2025-07-27 23:27:40 +02:00
committed by GitHub
parent c061e73d9f
commit 9c683d3487
2 changed files with 55 additions and 52 deletions

View File

@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
impl MaybeOverride<(f64,)> for SpecialCase { impl MaybeOverride<(f64,)> for SpecialCase {
fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction { fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
if cfg!(x86_no_sse)
&& ctx.base_name == BaseName::Ceil
&& ctx.basis == CheckBasis::Musl
&& input.0 < 0.0
&& input.0 > -1.0
&& expected == F::ZERO
&& actual == F::ZERO
{
// musl returns -0.0, we return +0.0
return XFAIL("i586 ceil signed zero");
}
if cfg!(x86_no_sse) if cfg!(x86_no_sse)
&& (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven) && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
&& (expected - actual).abs() <= F::ONE && (expected - actual).abs() <= F::ONE
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
return XFAIL("i586 rint rounding mode"); return XFAIL("i586 rint rounding mode");
} }
if cfg!(x86_no_sse)
&& (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
&& expected.eq_repr(F::NEG_ZERO)
&& actual.eq_repr(F::ZERO)
{
// FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
// See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
return XFAIL("i586 ceil/floor signed zero");
}
if cfg!(x86_no_sse) if cfg!(x86_no_sse)
&& (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2) && (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
{ {

View File

@@ -1,37 +1,62 @@
//! Architecture-specific support for x86-32 without SSE2 //! Architecture-specific support for x86-32 without SSE2
//!
//! We use an alternative implementation on x86, because the
//! main implementation fails with the x87 FPU used by
//! debian i386, probably due to excess precision issues.
//!
//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
//! functions are implemented in this way.
use super::super::fabs; pub fn ceil(mut x: f64) -> f64 {
unsafe {
/// Use an alternative implementation on x86, because the core::arch::asm!(
/// main implementation fails with the x87 FPU used by "fld qword ptr [{x}]",
/// debian i386, probably due to excess precision issues. // Save the FPU control word, using `x` as scratch space.
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. "fstcw [{x}]",
pub fn ceil(x: f64) -> f64 { // Set rounding control to 0b10 (+∞).
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { "mov word ptr [{x} + 2], 0x0b7f",
let truncated = x as i64 as f64; "fldcw [{x} + 2]",
if truncated < x { // Round.
return truncated + 1.0; "frndint",
} else { // Restore FPU control word.
return truncated; "fldcw [{x}]",
} // Save rounded value to memory.
} else { "fstp qword ptr [{x}]",
return x; x = in(reg) &mut x,
// All the x87 FPU stack is used, all registers must be clobbered
out("st(0)") _, out("st(1)") _,
out("st(2)") _, out("st(3)") _,
out("st(4)") _, out("st(5)") _,
out("st(6)") _, out("st(7)") _,
options(nostack),
);
} }
x
} }
/// Use an alternative implementation on x86, because the pub fn floor(mut x: f64) -> f64 {
/// main implementation fails with the x87 FPU used by unsafe {
/// debian i386, probably due to excess precision issues. core::arch::asm!(
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. "fld qword ptr [{x}]",
pub fn floor(x: f64) -> f64 { // Save the FPU control word, using `x` as scratch space.
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { "fstcw [{x}]",
let truncated = x as i64 as f64; // Set rounding control to 0b01 (-∞).
if truncated > x { "mov word ptr [{x} + 2], 0x077f",
return truncated - 1.0; "fldcw [{x} + 2]",
} else { // Round.
return truncated; "frndint",
} // Restore FPU control word.
} else { "fldcw [{x}]",
return x; // Save rounded value to memory.
"fstp qword ptr [{x}]",
x = in(reg) &mut x,
// All the x87 FPU stack is used, all registers must be clobbered
out("st(0)") _, out("st(1)") _,
out("st(2)") _, out("st(3)") _,
out("st(4)") _, out("st(5)") _,
out("st(6)") _, out("st(7)") _,
options(nostack),
);
} }
x
} }