Implement floor and ceil in assembly on i586
Fixes: https://github.com/rust-lang/compiler-builtins/issues/837 The assembly is based on -2043392793/lib/libm/arch/i387/s_floor.S-2043392793/lib/libm/arch/i387/s_ceil.SWhich both state /* * Written by J.T. Conklin <jtc@NetBSD.org>. * Public domain. */ Which I believe means we're good in terms of licensing.
This commit is contained in:
@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
|
|||||||
|
|
||||||
impl MaybeOverride<(f64,)> for SpecialCase {
|
impl MaybeOverride<(f64,)> for SpecialCase {
|
||||||
fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
|
fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
|
||||||
if cfg!(x86_no_sse)
|
|
||||||
&& ctx.base_name == BaseName::Ceil
|
|
||||||
&& ctx.basis == CheckBasis::Musl
|
|
||||||
&& input.0 < 0.0
|
|
||||||
&& input.0 > -1.0
|
|
||||||
&& expected == F::ZERO
|
|
||||||
&& actual == F::ZERO
|
|
||||||
{
|
|
||||||
// musl returns -0.0, we return +0.0
|
|
||||||
return XFAIL("i586 ceil signed zero");
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg!(x86_no_sse)
|
if cfg!(x86_no_sse)
|
||||||
&& (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
|
&& (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
|
||||||
&& (expected - actual).abs() <= F::ONE
|
&& (expected - actual).abs() <= F::ONE
|
||||||
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
|
|||||||
return XFAIL("i586 rint rounding mode");
|
return XFAIL("i586 rint rounding mode");
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg!(x86_no_sse)
|
|
||||||
&& (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
|
|
||||||
&& expected.eq_repr(F::NEG_ZERO)
|
|
||||||
&& actual.eq_repr(F::ZERO)
|
|
||||||
{
|
|
||||||
// FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
|
|
||||||
// See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
|
|
||||||
return XFAIL("i586 ceil/floor signed zero");
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg!(x86_no_sse)
|
if cfg!(x86_no_sse)
|
||||||
&& (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
|
&& (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,37 +1,62 @@
|
|||||||
//! Architecture-specific support for x86-32 without SSE2
|
//! Architecture-specific support for x86-32 without SSE2
|
||||||
|
//!
|
||||||
|
//! We use an alternative implementation on x86, because the
|
||||||
|
//! main implementation fails with the x87 FPU used by
|
||||||
|
//! debian i386, probably due to excess precision issues.
|
||||||
|
//!
|
||||||
|
//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
|
||||||
|
//! functions are implemented in this way.
|
||||||
|
|
||||||
use super::super::fabs;
|
pub fn ceil(mut x: f64) -> f64 {
|
||||||
|
unsafe {
|
||||||
/// Use an alternative implementation on x86, because the
|
core::arch::asm!(
|
||||||
/// main implementation fails with the x87 FPU used by
|
"fld qword ptr [{x}]",
|
||||||
/// debian i386, probably due to excess precision issues.
|
// Save the FPU control word, using `x` as scratch space.
|
||||||
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
|
"fstcw [{x}]",
|
||||||
pub fn ceil(x: f64) -> f64 {
|
// Set rounding control to 0b10 (+∞).
|
||||||
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
|
"mov word ptr [{x} + 2], 0x0b7f",
|
||||||
let truncated = x as i64 as f64;
|
"fldcw [{x} + 2]",
|
||||||
if truncated < x {
|
// Round.
|
||||||
return truncated + 1.0;
|
"frndint",
|
||||||
} else {
|
// Restore FPU control word.
|
||||||
return truncated;
|
"fldcw [{x}]",
|
||||||
}
|
// Save rounded value to memory.
|
||||||
} else {
|
"fstp qword ptr [{x}]",
|
||||||
return x;
|
x = in(reg) &mut x,
|
||||||
|
// All the x87 FPU stack is used, all registers must be clobbered
|
||||||
|
out("st(0)") _, out("st(1)") _,
|
||||||
|
out("st(2)") _, out("st(3)") _,
|
||||||
|
out("st(4)") _, out("st(5)") _,
|
||||||
|
out("st(6)") _, out("st(7)") _,
|
||||||
|
options(nostack),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
x
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Use an alternative implementation on x86, because the
|
pub fn floor(mut x: f64) -> f64 {
|
||||||
/// main implementation fails with the x87 FPU used by
|
unsafe {
|
||||||
/// debian i386, probably due to excess precision issues.
|
core::arch::asm!(
|
||||||
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
|
"fld qword ptr [{x}]",
|
||||||
pub fn floor(x: f64) -> f64 {
|
// Save the FPU control word, using `x` as scratch space.
|
||||||
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
|
"fstcw [{x}]",
|
||||||
let truncated = x as i64 as f64;
|
// Set rounding control to 0b01 (-∞).
|
||||||
if truncated > x {
|
"mov word ptr [{x} + 2], 0x077f",
|
||||||
return truncated - 1.0;
|
"fldcw [{x} + 2]",
|
||||||
} else {
|
// Round.
|
||||||
return truncated;
|
"frndint",
|
||||||
}
|
// Restore FPU control word.
|
||||||
} else {
|
"fldcw [{x}]",
|
||||||
return x;
|
// Save rounded value to memory.
|
||||||
|
"fstp qword ptr [{x}]",
|
||||||
|
x = in(reg) &mut x,
|
||||||
|
// All the x87 FPU stack is used, all registers must be clobbered
|
||||||
|
out("st(0)") _, out("st(1)") _,
|
||||||
|
out("st(2)") _, out("st(3)") _,
|
||||||
|
out("st(4)") _, out("st(5)") _,
|
||||||
|
out("st(6)") _, out("st(7)") _,
|
||||||
|
options(nostack),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
x
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user