Refactor the fma modules

Move implementations to `generic/` like the other functions. This also
allows us to combine the `fma` and `fma_wide` modules.
This commit is contained in:
Trevor Gross
2025-04-29 20:52:54 +00:00
committed by Trevor Gross
parent 6d83a3226f
commit caf337d467
6 changed files with 179 additions and 175 deletions

View File

@@ -0,0 +1,165 @@
/* SPDX-License-Identifier: MIT */
/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */
use super::generic;
use crate::support::Round;
// Placeholder so we can have `fmaf16` in the `Float` trait.
#[allow(unused)]
#[cfg(f16_enabled)]
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
unimplemented!()
}
/// Floating multiply add (f32)
///
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
select_implementation! {
name: fmaf,
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
args: x, y, z,
}
generic::fma_wide_round(x, y, z, Round::Nearest).val
}
/// Fused multiply add (f64)
///
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
select_implementation! {
name: fma,
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
args: x, y, z,
}
generic::fma_round(x, y, z, Round::Nearest).val
}
/// Fused multiply add (f128)
///
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
#[cfg(f128_enabled)]
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
generic::fma_round(x, y, z, Round::Nearest).val
}
#[cfg(test)]
mod tests {
use super::*;
use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status};
/// Test the generic `fma_round` algorithm for a given float.
fn spec_test<F>(f: impl Fn(F, F, F) -> F)
where
F: Float,
F: CastFrom<F::SignedInt>,
F: CastFrom<i8>,
F::Int: HInt,
u32: CastInto<F::Int>,
{
let x = F::from_bits(F::Int::ONE);
let y = F::from_bits(F::Int::ONE);
let z = F::ZERO;
// 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
// fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
// exact result"
assert_biteq!(f(x, y, z), F::ZERO);
assert_biteq!(f(x, -y, z), F::NEG_ZERO);
assert_biteq!(f(-x, y, z), F::NEG_ZERO);
assert_biteq!(f(-x, -y, z), F::ZERO);
}
#[test]
fn spec_test_f32() {
spec_test::<f32>(fmaf);
// Also do a small check that the non-widening version works for f32 (this should ideally
// get tested some more).
spec_test::<f32>(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val);
}
#[test]
fn spec_test_f64() {
spec_test::<f64>(fma);
let expect_underflow = [
(
hf64!("0x1.0p-1070"),
hf64!("0x1.0p-1070"),
hf64!("0x1.ffffffffffffp-1023"),
hf64!("0x0.ffffffffffff8p-1022"),
),
(
// FIXME: we raise underflow but this should only be inexact (based on C and
// `rustc_apfloat`).
hf64!("0x1.0p-1070"),
hf64!("0x1.0p-1070"),
hf64!("-0x1.0p-1022"),
hf64!("-0x1.0p-1022"),
),
];
for (x, y, z, res) in expect_underflow {
let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest);
assert_biteq!(val, res);
assert_eq!(status, Status::UNDERFLOW);
}
}
#[test]
#[cfg(f128_enabled)]
fn spec_test_f128() {
spec_test::<f128>(fmaf128);
}
#[test]
fn issue_263() {
let a = f32::from_bits(1266679807);
let b = f32::from_bits(1300234242);
let c = f32::from_bits(1115553792);
let expected = f32::from_bits(1501560833);
assert_eq!(fmaf(a, b, c), expected);
}
#[test]
fn fma_segfault() {
// These two inputs cause fma to segfault on release due to overflow:
assert_eq!(
fma(
-0.0000000000000002220446049250313,
-0.0000000000000002220446049250313,
-0.0000000000000002220446049250313
),
-0.00000000000000022204460492503126,
);
let result = fma(-0.992, -0.992, -0.992);
//force rounding to storage format on x87 to prevent superious errors.
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
let result = force_eval!(result);
assert_eq!(result, -0.007936000000000007,);
}
#[test]
fn fma_sbb() {
assert_eq!(
fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
-3991680619069439e277
);
}
#[test]
fn fma_underflow() {
assert_eq!(
fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
0.0,
);
}
}

View File

@@ -1,31 +1,9 @@
/* SPDX-License-Identifier: MIT */
/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
use super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
use super::{CastFrom, CastInto, Float, Int, MinInt};
/// Fused multiply add (f64)
///
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
select_implementation! {
name: fma,
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
args: x, y, z,
}
fma_round(x, y, z, Round::Nearest).val
}
/// Fused multiply add (f128)
///
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
#[cfg(f128_enabled)]
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
fma_round(x, y, z, Round::Nearest).val
}
use crate::support::{
CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status,
};
/// Fused multiply-add that works when there is not a larger float size available. Computes
/// `(x * y) + z`.
@@ -234,7 +212,7 @@ where
}
// Use our exponent to scale the final value.
FpResult::new(super::generic::scalbn(r, e), status)
FpResult::new(super::scalbn(r, e), status)
}
/// Representation of `F` that has handled subnormals.
@@ -298,106 +276,3 @@ impl<F: Float> Norm<F> {
self.e > Self::ZERO_INF_NAN as i32
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Test the generic `fma_round` algorithm for a given float.
fn spec_test<F>()
where
F: Float,
F: CastFrom<F::SignedInt>,
F: CastFrom<i8>,
F::Int: HInt,
u32: CastInto<F::Int>,
{
let x = F::from_bits(F::Int::ONE);
let y = F::from_bits(F::Int::ONE);
let z = F::ZERO;
let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
// 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
// fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
// exact result"
assert_biteq!(fma(x, y, z), F::ZERO);
assert_biteq!(fma(x, -y, z), F::NEG_ZERO);
assert_biteq!(fma(-x, y, z), F::NEG_ZERO);
assert_biteq!(fma(-x, -y, z), F::ZERO);
}
#[test]
fn spec_test_f32() {
spec_test::<f32>();
}
#[test]
fn spec_test_f64() {
spec_test::<f64>();
let expect_underflow = [
(
hf64!("0x1.0p-1070"),
hf64!("0x1.0p-1070"),
hf64!("0x1.ffffffffffffp-1023"),
hf64!("0x0.ffffffffffff8p-1022"),
),
(
// FIXME: we raise underflow but this should only be inexact (based on C and
// `rustc_apfloat`).
hf64!("0x1.0p-1070"),
hf64!("0x1.0p-1070"),
hf64!("-0x1.0p-1022"),
hf64!("-0x1.0p-1022"),
),
];
for (x, y, z, res) in expect_underflow {
let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
assert_biteq!(val, res);
assert_eq!(status, Status::UNDERFLOW);
}
}
#[test]
#[cfg(f128_enabled)]
fn spec_test_f128() {
spec_test::<f128>();
}
#[test]
fn fma_segfault() {
// These two inputs cause fma to segfault on release due to overflow:
assert_eq!(
fma(
-0.0000000000000002220446049250313,
-0.0000000000000002220446049250313,
-0.0000000000000002220446049250313
),
-0.00000000000000022204460492503126,
);
let result = fma(-0.992, -0.992, -0.992);
//force rounding to storage format on x87 to prevent superious errors.
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
let result = force_eval!(result);
assert_eq!(result, -0.007936000000000007,);
}
#[test]
fn fma_sbb() {
assert_eq!(
fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN),
-3991680619069439e277
);
}
#[test]
fn fma_underflow() {
assert_eq!(
fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320),
0.0,
);
}
}

View File

@@ -1,30 +1,6 @@
/* SPDX-License-Identifier: MIT */
/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */
use super::support::{FpResult, IntTy, Round, Status};
use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
// Placeholder so we can have `fmaf16` in the `Float` trait.
#[allow(unused)]
#[cfg(f16_enabled)]
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
unimplemented!()
}
/// Floating multiply add (f32)
///
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
select_implementation! {
name: fmaf,
use_arch: all(target_arch = "aarch64", target_feature = "neon"),
args: x, y, z,
}
fma_wide_round(x, y, z, Round::Nearest).val
}
use crate::support::{
CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status,
};
/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
@@ -95,17 +71,3 @@ where
FpResult::ok(B::from_bits(ui).narrow())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn issue_263() {
let a = f32::from_bits(1266679807);
let b = f32::from_bits(1300234242);
let c = f32::from_bits(1115553792);
let expected = f32::from_bits(1501560833);
assert_eq!(fmaf(a, b, c), expected);
}
}

View File

@@ -6,6 +6,8 @@ mod copysign;
mod fabs;
mod fdim;
mod floor;
mod fma;
mod fma_wide;
mod fmax;
mod fmaximum;
mod fmaximum_num;
@@ -24,6 +26,8 @@ pub use copysign::copysign;
pub use fabs::fabs;
pub use fdim::fdim;
pub use floor::floor;
pub use fma::fma_round;
pub use fma_wide::fma_wide_round;
pub use fmax::fmax;
pub use fmaximum::fmaximum;
pub use fmaximum_num::fmaximum_num;

View File

@@ -159,7 +159,6 @@ mod fabs;
mod fdim;
mod floor;
mod fma;
mod fma_wide;
mod fmin_fmax;
mod fminimum_fmaximum;
mod fminimum_fmaximum_num;
@@ -254,8 +253,7 @@ pub use self::expm1f::expm1f;
pub use self::fabs::{fabs, fabsf};
pub use self::fdim::{fdim, fdimf};
pub use self::floor::{floor, floorf};
pub use self::fma::fma;
pub use self::fma_wide::fmaf;
pub use self::fma::{fma, fmaf};
pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
@@ -336,7 +334,7 @@ cfg_if! {
// verify-sorted-end
#[allow(unused_imports)]
pub(crate) use self::fma_wide::fmaf16;
pub(crate) use self::fma::fmaf16;
}
}