Implement floor and ceil in assembly on i586

Fixes: https://github.com/rust-lang/compiler-builtins/issues/837

The assembly is based on

- 2043392793/lib/libm/arch/i387/s_floor.S
- 2043392793/lib/libm/arch/i387/s_ceil.S

Which both state

    /*
     * Written by J.T. Conklin <jtc@NetBSD.org>.
     * Public domain.
     */

Which I believe means we're good in terms of licensing.
This commit is contained in:
Folkert de Vries
2025-07-27 23:27:40 +02:00
committed by GitHub
parent c061e73d9f
commit 9c683d3487
2 changed files with 55 additions and 52 deletions

View File

@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
impl MaybeOverride<(f64,)> for SpecialCase {
fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
if cfg!(x86_no_sse)
&& ctx.base_name == BaseName::Ceil
&& ctx.basis == CheckBasis::Musl
&& input.0 < 0.0
&& input.0 > -1.0
&& expected == F::ZERO
&& actual == F::ZERO
{
// musl returns -0.0, we return +0.0
return XFAIL("i586 ceil signed zero");
}
if cfg!(x86_no_sse)
&& (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
&& (expected - actual).abs() <= F::ONE
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
return XFAIL("i586 rint rounding mode");
}
if cfg!(x86_no_sse)
&& (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
&& expected.eq_repr(F::NEG_ZERO)
&& actual.eq_repr(F::ZERO)
{
// FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
// See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
return XFAIL("i586 ceil/floor signed zero");
}
if cfg!(x86_no_sse)
&& (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
{

View File

@@ -1,37 +1,62 @@
//! Architecture-specific support for x86-32 without SSE2
//!
//! We use an alternative implementation on x86, because the
//! main implementation fails with the x87 FPU used by
//! debian i386, probably due to excess precision issues.
//!
//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
//! functions are implemented in this way.
use super::super::fabs;
/// Use an alternative implementation on x86, because the
/// main implementation fails with the x87 FPU used by
/// debian i386, probably due to excess precision issues.
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
pub fn ceil(x: f64) -> f64 {
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
let truncated = x as i64 as f64;
if truncated < x {
return truncated + 1.0;
} else {
return truncated;
}
} else {
return x;
pub fn ceil(mut x: f64) -> f64 {
unsafe {
core::arch::asm!(
"fld qword ptr [{x}]",
// Save the FPU control word, using `x` as scratch space.
"fstcw [{x}]",
// Set rounding control to 0b10 (+∞).
"mov word ptr [{x} + 2], 0x0b7f",
"fldcw [{x} + 2]",
// Round.
"frndint",
// Restore FPU control word.
"fldcw [{x}]",
// Save rounded value to memory.
"fstp qword ptr [{x}]",
x = in(reg) &mut x,
// All the x87 FPU stack is used, all registers must be clobbered
out("st(0)") _, out("st(1)") _,
out("st(2)") _, out("st(3)") _,
out("st(4)") _, out("st(5)") _,
out("st(6)") _, out("st(7)") _,
options(nostack),
);
}
x
}
/// Use an alternative implementation on x86, because the
/// main implementation fails with the x87 FPU used by
/// debian i386, probably due to excess precision issues.
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
pub fn floor(x: f64) -> f64 {
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
let truncated = x as i64 as f64;
if truncated > x {
return truncated - 1.0;
} else {
return truncated;
}
} else {
return x;
pub fn floor(mut x: f64) -> f64 {
unsafe {
core::arch::asm!(
"fld qword ptr [{x}]",
// Save the FPU control word, using `x` as scratch space.
"fstcw [{x}]",
// Set rounding control to 0b01 (-∞).
"mov word ptr [{x} + 2], 0x077f",
"fldcw [{x} + 2]",
// Round.
"frndint",
// Restore FPU control word.
"fldcw [{x}]",
// Save rounded value to memory.
"fstp qword ptr [{x}]",
x = in(reg) &mut x,
// All the x87 FPU stack is used, all registers must be clobbered
out("st(0)") _, out("st(1)") _,
out("st(2)") _, out("st(3)") _,
out("st(4)") _, out("st(5)") _,
out("st(6)") _, out("st(7)") _,
options(nostack),
);
}
x
}