Refactor the fma modules
Move implementations to `generic/` like the other functions. This also allows us to combine the `fma` and `fma_wide` modules.
This commit is contained in:
committed by
Trevor Gross
parent
6d83a3226f
commit
caf337d467
165
library/compiler-builtins/libm/src/math/fma.rs
Normal file
165
library/compiler-builtins/libm/src/math/fma.rs
Normal file
@@ -0,0 +1,165 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */
|
||||
|
||||
use super::generic;
|
||||
use crate::support::Round;
|
||||
|
||||
// Placeholder so we can have `fmaf16` in the `Float` trait.
|
||||
#[allow(unused)]
|
||||
#[cfg(f16_enabled)]
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Floating multiply add (f32)
|
||||
///
|
||||
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
|
||||
select_implementation! {
|
||||
name: fmaf,
|
||||
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
|
||||
args: x, y, z,
|
||||
}
|
||||
|
||||
generic::fma_wide_round(x, y, z, Round::Nearest).val
|
||||
}
|
||||
|
||||
/// Fused multiply add (f64)
|
||||
///
|
||||
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
|
||||
select_implementation! {
|
||||
name: fma,
|
||||
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
|
||||
args: x, y, z,
|
||||
}
|
||||
|
||||
generic::fma_round(x, y, z, Round::Nearest).val
|
||||
}
|
||||
|
||||
/// Fused multiply add (f128)
|
||||
///
|
||||
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
|
||||
generic::fma_round(x, y, z, Round::Nearest).val
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status};
|
||||
|
||||
/// Test the generic `fma_round` algorithm for a given float.
|
||||
fn spec_test<F>(f: impl Fn(F, F, F) -> F)
|
||||
where
|
||||
F: Float,
|
||||
F: CastFrom<F::SignedInt>,
|
||||
F: CastFrom<i8>,
|
||||
F::Int: HInt,
|
||||
u32: CastInto<F::Int>,
|
||||
{
|
||||
let x = F::from_bits(F::Int::ONE);
|
||||
let y = F::from_bits(F::Int::ONE);
|
||||
let z = F::ZERO;
|
||||
|
||||
// 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
|
||||
// fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
|
||||
// exact result"
|
||||
assert_biteq!(f(x, y, z), F::ZERO);
|
||||
assert_biteq!(f(x, -y, z), F::NEG_ZERO);
|
||||
assert_biteq!(f(-x, y, z), F::NEG_ZERO);
|
||||
assert_biteq!(f(-x, -y, z), F::ZERO);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_test_f32() {
|
||||
spec_test::<f32>(fmaf);
|
||||
|
||||
// Also do a small check that the non-widening version works for f32 (this should ideally
|
||||
// get tested some more).
|
||||
spec_test::<f32>(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_test_f64() {
|
||||
spec_test::<f64>(fma);
|
||||
|
||||
let expect_underflow = [
|
||||
(
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("0x1.ffffffffffffp-1023"),
|
||||
hf64!("0x0.ffffffffffff8p-1022"),
|
||||
),
|
||||
(
|
||||
// FIXME: we raise underflow but this should only be inexact (based on C and
|
||||
// `rustc_apfloat`).
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("-0x1.0p-1022"),
|
||||
hf64!("-0x1.0p-1022"),
|
||||
),
|
||||
];
|
||||
|
||||
for (x, y, z, res) in expect_underflow {
|
||||
let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest);
|
||||
assert_biteq!(val, res);
|
||||
assert_eq!(status, Status::UNDERFLOW);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(f128_enabled)]
|
||||
fn spec_test_f128() {
|
||||
spec_test::<f128>(fmaf128);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_263() {
|
||||
let a = f32::from_bits(1266679807);
|
||||
let b = f32::from_bits(1300234242);
|
||||
let c = f32::from_bits(1115553792);
|
||||
let expected = f32::from_bits(1501560833);
|
||||
assert_eq!(fmaf(a, b, c), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fma_segfault() {
|
||||
// These two inputs cause fma to segfault on release due to overflow:
|
||||
assert_eq!(
|
||||
fma(
|
||||
-0.0000000000000002220446049250313,
|
||||
-0.0000000000000002220446049250313,
|
||||
-0.0000000000000002220446049250313
|
||||
),
|
||||
-0.00000000000000022204460492503126,
|
||||
);
|
||||
|
||||
let result = fma(-0.992, -0.992, -0.992);
|
||||
//force rounding to storage format on x87 to prevent superious errors.
|
||||
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
|
||||
let result = force_eval!(result);
|
||||
assert_eq!(result, -0.007936000000000007,);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fma_sbb() {
|
||||
assert_eq!(
|
||||
fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
|
||||
-3991680619069439e277
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fma_underflow() {
|
||||
assert_eq!(
|
||||
fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
|
||||
0.0,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,31 +1,9 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
|
||||
|
||||
use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
|
||||
use super::{CastFrom, CastInto, Float, Int, MinInt};
|
||||
|
||||
/// Fused multiply add (f64)
|
||||
///
|
||||
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
|
||||
select_implementation! {
|
||||
name: fma,
|
||||
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
|
||||
args: x, y, z,
|
||||
}
|
||||
|
||||
fma_round(x, y, z, Round::Nearest).val
|
||||
}
|
||||
|
||||
/// Fused multiply add (f128)
|
||||
///
|
||||
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
|
||||
#[cfg(f128_enabled)]
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
|
||||
fma_round(x, y, z, Round::Nearest).val
|
||||
}
|
||||
use crate::support::{
|
||||
CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status,
|
||||
};
|
||||
|
||||
/// Fused multiply-add that works when there is not a larger float size available. Computes
|
||||
/// `(x * y) + z`.
|
||||
@@ -234,7 +212,7 @@ where
|
||||
}
|
||||
|
||||
// Use our exponent to scale the final value.
|
||||
FpResult::new(super::generic::scalbn(r, e), status)
|
||||
FpResult::new(super::scalbn(r, e), status)
|
||||
}
|
||||
|
||||
/// Representation of `F` that has handled subnormals.
|
||||
@@ -298,106 +276,3 @@ impl<F: Float> Norm<F> {
|
||||
self.e > Self::ZERO_INF_NAN as i32
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Test the generic `fma_round` algorithm for a given float.
|
||||
fn spec_test<F>()
|
||||
where
|
||||
F: Float,
|
||||
F: CastFrom<F::SignedInt>,
|
||||
F: CastFrom<i8>,
|
||||
F::Int: HInt,
|
||||
u32: CastInto<F::Int>,
|
||||
{
|
||||
let x = F::from_bits(F::Int::ONE);
|
||||
let y = F::from_bits(F::Int::ONE);
|
||||
let z = F::ZERO;
|
||||
|
||||
let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
|
||||
|
||||
// 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
|
||||
// fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
|
||||
// exact result"
|
||||
assert_biteq!(fma(x, y, z), F::ZERO);
|
||||
assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
|
||||
assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
|
||||
assert_biteq!(fma(-x, -y, z), F::ZERO);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_test_f32() {
|
||||
spec_test::<f32>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_test_f64() {
|
||||
spec_test::<f64>();
|
||||
|
||||
let expect_underflow = [
|
||||
(
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("0x1.ffffffffffffp-1023"),
|
||||
hf64!("0x0.ffffffffffff8p-1022"),
|
||||
),
|
||||
(
|
||||
// FIXME: we raise underflow but this should only be inexact (based on C and
|
||||
// `rustc_apfloat`).
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("0x1.0p-1070"),
|
||||
hf64!("-0x1.0p-1022"),
|
||||
hf64!("-0x1.0p-1022"),
|
||||
),
|
||||
];
|
||||
|
||||
for (x, y, z, res) in expect_underflow {
|
||||
let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
|
||||
assert_biteq!(val, res);
|
||||
assert_eq!(status, Status::UNDERFLOW);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(f128_enabled)]
|
||||
fn spec_test_f128() {
|
||||
spec_test::<f128>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fma_segfault() {
|
||||
// These two inputs cause fma to segfault on release due to overflow:
|
||||
assert_eq!(
|
||||
fma(
|
||||
-0.0000000000000002220446049250313,
|
||||
-0.0000000000000002220446049250313,
|
||||
-0.0000000000000002220446049250313
|
||||
),
|
||||
-0.00000000000000022204460492503126,
|
||||
);
|
||||
|
||||
let result = fma(-0.992, -0.992, -0.992);
|
||||
//force rounding to storage format on x87 to prevent superious errors.
|
||||
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
|
||||
let result = force_eval!(result);
|
||||
assert_eq!(result, -0.007936000000000007,);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fma_sbb() {
|
||||
assert_eq!(
|
||||
fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
|
||||
-3991680619069439e277
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fma_underflow() {
|
||||
assert_eq!(
|
||||
fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
|
||||
0.0,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,30 +1,6 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */
|
||||
|
||||
use super::support::{FpResult, IntTy, Round, Status};
|
||||
use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
|
||||
|
||||
// Placeholder so we can have `fmaf16` in the `Float` trait.
|
||||
#[allow(unused)]
|
||||
#[cfg(f16_enabled)]
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
/// Floating multiply add (f32)
|
||||
///
|
||||
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
|
||||
select_implementation! {
|
||||
name: fmaf,
|
||||
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
|
||||
args: x, y, z,
|
||||
}
|
||||
|
||||
fma_wide_round(x, y, z, Round::Nearest).val
|
||||
}
|
||||
use crate::support::{
|
||||
CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status,
|
||||
};
|
||||
|
||||
/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
|
||||
/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
|
||||
@@ -95,17 +71,3 @@ where
|
||||
|
||||
FpResult::ok(B::from_bits(ui).narrow())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn issue_263() {
|
||||
let a = f32::from_bits(1266679807);
|
||||
let b = f32::from_bits(1300234242);
|
||||
let c = f32::from_bits(1115553792);
|
||||
let expected = f32::from_bits(1501560833);
|
||||
assert_eq!(fmaf(a, b, c), expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ mod copysign;
|
||||
mod fabs;
|
||||
mod fdim;
|
||||
mod floor;
|
||||
mod fma;
|
||||
mod fma_wide;
|
||||
mod fmax;
|
||||
mod fmaximum;
|
||||
mod fmaximum_num;
|
||||
@@ -24,6 +26,8 @@ pub use copysign::copysign;
|
||||
pub use fabs::fabs;
|
||||
pub use fdim::fdim;
|
||||
pub use floor::floor;
|
||||
pub use fma::fma_round;
|
||||
pub use fma_wide::fma_wide_round;
|
||||
pub use fmax::fmax;
|
||||
pub use fmaximum::fmaximum;
|
||||
pub use fmaximum_num::fmaximum_num;
|
||||
|
||||
@@ -159,7 +159,6 @@ mod fabs;
|
||||
mod fdim;
|
||||
mod floor;
|
||||
mod fma;
|
||||
mod fma_wide;
|
||||
mod fmin_fmax;
|
||||
mod fminimum_fmaximum;
|
||||
mod fminimum_fmaximum_num;
|
||||
@@ -254,8 +253,7 @@ pub use self::expm1f::expm1f;
|
||||
pub use self::fabs::{fabs, fabsf};
|
||||
pub use self::fdim::{fdim, fdimf};
|
||||
pub use self::floor::{floor, floorf};
|
||||
pub use self::fma::fma;
|
||||
pub use self::fma_wide::fmaf;
|
||||
pub use self::fma::{fma, fmaf};
|
||||
pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
|
||||
pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
|
||||
pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
|
||||
@@ -336,7 +334,7 @@ cfg_if! {
|
||||
// verify-sorted-end
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub(crate) use self::fma_wide::fmaf16;
|
||||
pub(crate) use self::fma::fmaf16;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user