Add __addhf3, __subhf3, __mulhf3, __{eq,ge,gt,le,lt,ne,unord}hf2

LLVM does not currently emit these, but it is being discussed as an
option on platforms where `f32` is not hardware supported. Glibc/libgcc
also has the comparison functions [1] already.

The generic implementations for addition, subtraction, and
multiplication work for f16 without any complications, as do
comparisons, so add them here.

[1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=6ec6c77867af4ddfec7323e0ac6ede89effca852
This commit is contained in:
Trevor Gross
2025-08-09 06:29:01 -05:00
parent 531d5aa593
commit ba5def8a85
7 changed files with 85 additions and 14 deletions

View File

@@ -1,4 +1,5 @@
#![allow(unused_macros)]
#![cfg_attr(f16_enabled, feature(f16))]
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::*;
@@ -115,28 +116,25 @@ macro_rules! float_sum {
mod float_addsub {
use super::*;
#[cfg(f16_enabled)]
float_sum! {
f16, __addhf3, __subhf3, Half, all();
}
float_sum! {
f32, __addsf3, __subsf3, Single, all();
f64, __adddf3, __subdf3, Double, all();
}
}
#[cfg(f128_enabled)]
#[cfg(not(x86_no_sse))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
mod float_addsub_f128 {
use super::*;
#[cfg(f128_enabled)]
#[cfg(not(x86_no_sse))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
float_sum! {
f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128");
}
}
#[cfg(f128_enabled)]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
mod float_addsub_f128_ppc {
use super::*;
#[cfg(f128_enabled)]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
float_sum! {
f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128");
}

View File

@@ -1,5 +1,6 @@
#![allow(unused_macros)]
#![allow(unreachable_code)]
#![cfg_attr(f16_enabled, feature(f16))]
#![cfg_attr(f128_enabled, feature(f128))]
use builtins_test::*;
@@ -51,6 +52,26 @@ mod float_comparisons {
};
}
#[test]
#[cfg(f16_enabled)]
fn cmp_f16() {
use compiler_builtins::float::cmp::{
__eqhf2, __gehf2, __gthf2, __lehf2, __lthf2, __nehf2, __unordhf2,
};
fuzz_float_2(N, |x: f16, y: f16| {
assert_eq!(__unordhf2(x, y) != 0, x.is_nan() || y.is_nan());
cmp!(f16, x, y, Half, all(),
1, __lthf2;
1, __lehf2;
1, __eqhf2;
-1, __gehf2;
-1, __gthf2;
1, __nehf2;
);
});
}
#[test]
fn cmp_f32() {
use compiler_builtins::float::cmp::{

View File

@@ -1,5 +1,6 @@
#![allow(unused_macros)]
#![cfg_attr(f16_enabled, feature(f16))]
#![cfg_attr(f128_enabled, feature(f128))]
#![allow(unused_macros)]
use builtins_test::*;
@@ -117,6 +118,11 @@ macro_rules! float_mul {
mod float_mul {
use super::*;
#[cfg(f16_enabled)]
float_mul! {
f16, __mulhf3, Half, all();
}
// FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in
// nightly.
float_mul! {

View File

@@ -191,6 +191,11 @@ where
}
intrinsics! {
#[cfg(f16_enabled)]
pub extern "C" fn __addhf3(a: f16, b: f16) -> f16 {
add(a, b)
}
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_fadd]
pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 {

View File

@@ -115,6 +115,37 @@ fn unord<F: Float>(a: F, b: F) -> bool {
a_abs > inf_rep || b_abs > inf_rep
}
#[cfg(f16_enabled)]
intrinsics! {
pub extern "C" fn __lehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __gehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
pub extern "C" fn __unordhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
unord(a, b) as crate::float::cmp::CmpResult
}
pub extern "C" fn __eqhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __lthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __nehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()
}
pub extern "C" fn __gthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
cmp(a, b).to_ge_abi()
}
}
intrinsics! {
pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
cmp(a, b).to_le_abi()

View File

@@ -180,6 +180,11 @@ where
}
intrinsics! {
#[cfg(f16_enabled)]
pub extern "C" fn __mulhf3(a: f16, b: f16) -> f16 {
mul(a, b)
}
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_fmul]
pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {

View File

@@ -1,6 +1,11 @@
use crate::float::Float;
intrinsics! {
#[cfg(f16_enabled)]
pub extern "C" fn __subhf3(a: f16, b: f16) -> f16 {
crate::float::add::__addhf3(a, f16::from_bits(b.to_bits() ^ f16::SIGN_MASK))
}
#[arm_aeabi_alias = __aeabi_fsub]
pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 {
crate::float::add::__addsf3(a, f32::from_bits(b.to_bits() ^ f32::SIGN_MASK))