Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128`
|
|
|
|
|
//! routines, so this only does a few million fuzz iterations against GMP.
|
|
|
|
|
|
|
|
|
|
#![cfg(feature = "build-mpfr")]
|
|
|
|
|
|
|
|
|
|
use std::sync::LazyLock;
|
|
|
|
|
|
|
|
|
|
use libm::support::{HInt, u256};
|
|
|
|
|
type BigInt = rug::Integer;
|
|
|
|
|
|
|
|
|
|
use libm_test::bigint_fuzz_iteration_count;
|
2025-03-19 00:11:39 +00:00
|
|
|
use libm_test::generate::random::SEED;
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
use rand::{Rng, SeedableRng};
|
|
|
|
|
use rand_chacha::ChaCha8Rng;
|
|
|
|
|
use rug::Assign;
|
|
|
|
|
use rug::integer::Order;
|
|
|
|
|
use rug::ops::NotAssign;
|
|
|
|
|
|
|
|
|
|
static BIGINT_U256_MAX: LazyLock<BigInt> =
|
|
|
|
|
LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf));
|
|
|
|
|
|
|
|
|
|
/// Copied from the test module.
|
|
|
|
|
fn hexu(v: u256) -> String {
|
|
|
|
|
format!("0x{:032x}{:032x}", v.hi, v.lo)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn random_u256(rng: &mut ChaCha8Rng) -> u256 {
|
2025-03-18 23:51:19 +00:00
|
|
|
let lo: u128 = rng.random();
|
|
|
|
|
let hi: u128 = rng.random();
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
u256 { lo, hi }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn assign_bigint(bx: &mut BigInt, x: u256) {
|
|
|
|
|
bx.assign_digits(&[x.lo, x.hi], Order::Lsf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn from_bigint(bx: &mut BigInt) -> u256 {
|
|
|
|
|
// Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing.
|
|
|
|
|
*bx &= &*BIGINT_U256_MAX;
|
|
|
|
|
let mut bres = [0u128, 0];
|
|
|
|
|
bx.write_digits(&mut bres, Order::Lsf);
|
|
|
|
|
bx.assign(0);
|
2025-04-19 22:39:00 +00:00
|
|
|
u256 {
|
|
|
|
|
lo: bres[0],
|
|
|
|
|
hi: bres[1],
|
|
|
|
|
}
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn check_one(
|
|
|
|
|
x: impl FnOnce() -> String,
|
|
|
|
|
y: impl FnOnce() -> Option<String>,
|
|
|
|
|
actual: u256,
|
|
|
|
|
expected: &mut BigInt,
|
|
|
|
|
) {
|
|
|
|
|
let expected = from_bigint(expected);
|
|
|
|
|
if actual != expected {
|
|
|
|
|
let xmsg = x();
|
|
|
|
|
let ymsg = y().map(|y| format!("y: {y}\n")).unwrap_or_default();
|
|
|
|
|
panic!(
|
|
|
|
|
"Results do not match\n\
|
|
|
|
|
input: {xmsg}\n\
|
|
|
|
|
{ymsg}\
|
|
|
|
|
actual: {}\n\
|
|
|
|
|
expected: {}\
|
|
|
|
|
",
|
|
|
|
|
hexu(actual),
|
|
|
|
|
hexu(expected),
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn mp_u256_bitor() {
|
|
|
|
|
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
|
|
|
|
let mut bx = BigInt::new();
|
|
|
|
|
let mut by = BigInt::new();
|
|
|
|
|
|
|
|
|
|
for _ in 0..bigint_fuzz_iteration_count() {
|
|
|
|
|
let x = random_u256(&mut rng);
|
|
|
|
|
let y = random_u256(&mut rng);
|
|
|
|
|
assign_bigint(&mut bx, x);
|
|
|
|
|
assign_bigint(&mut by, y);
|
|
|
|
|
let actual = x | y;
|
|
|
|
|
bx |= &by;
|
|
|
|
|
check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn mp_u256_not() {
|
|
|
|
|
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
|
|
|
|
let mut bx = BigInt::new();
|
|
|
|
|
|
|
|
|
|
for _ in 0..bigint_fuzz_iteration_count() {
|
|
|
|
|
let x = random_u256(&mut rng);
|
|
|
|
|
assign_bigint(&mut bx, x);
|
|
|
|
|
let actual = !x;
|
|
|
|
|
bx.not_assign();
|
|
|
|
|
check_one(|| hexu(x), || None, actual, &mut bx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn mp_u256_add() {
|
|
|
|
|
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
|
|
|
|
let mut bx = BigInt::new();
|
|
|
|
|
let mut by = BigInt::new();
|
|
|
|
|
|
|
|
|
|
for _ in 0..bigint_fuzz_iteration_count() {
|
|
|
|
|
let x = random_u256(&mut rng);
|
|
|
|
|
let y = random_u256(&mut rng);
|
|
|
|
|
assign_bigint(&mut bx, x);
|
|
|
|
|
assign_bigint(&mut by, y);
|
2025-07-01 11:07:48 +03:00
|
|
|
let actual = if u256::MAX - x >= y {
|
|
|
|
|
x + y
|
|
|
|
|
} else {
|
|
|
|
|
// otherwise (u256::MAX - x) < y, so the wrapped result is
|
|
|
|
|
// (x + y) - (u256::MAX + 1) == y - (u256::MAX - x) - 1
|
|
|
|
|
y - (u256::MAX - x) - 1_u128.widen()
|
|
|
|
|
};
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
bx += &by;
|
|
|
|
|
check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-07-01 11:07:48 +03:00
|
|
|
#[test]
|
|
|
|
|
fn mp_u256_sub() {
|
|
|
|
|
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
|
|
|
|
let mut bx = BigInt::new();
|
|
|
|
|
let mut by = BigInt::new();
|
|
|
|
|
|
|
|
|
|
for _ in 0..bigint_fuzz_iteration_count() {
|
|
|
|
|
let x = random_u256(&mut rng);
|
|
|
|
|
let y = random_u256(&mut rng);
|
|
|
|
|
assign_bigint(&mut bx, x);
|
|
|
|
|
assign_bigint(&mut by, y);
|
|
|
|
|
|
|
|
|
|
// since the operators (may) panic on overflow,
|
|
|
|
|
// we should test something that doesn't
|
|
|
|
|
let actual = if x >= y { x - y } else { y - x };
|
|
|
|
|
bx -= &by;
|
|
|
|
|
bx.abs_mut();
|
|
|
|
|
check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn mp_u256_shl() {
|
|
|
|
|
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
|
|
|
|
let mut bx = BigInt::new();
|
|
|
|
|
|
|
|
|
|
for _ in 0..bigint_fuzz_iteration_count() {
|
|
|
|
|
let x = random_u256(&mut rng);
|
|
|
|
|
let shift: u32 = rng.random_range(0..256);
|
|
|
|
|
assign_bigint(&mut bx, x);
|
|
|
|
|
let actual = x << shift;
|
|
|
|
|
bx <<= shift;
|
|
|
|
|
check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
#[test]
|
|
|
|
|
fn mp_u256_shr() {
|
|
|
|
|
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
|
|
|
|
let mut bx = BigInt::new();
|
|
|
|
|
|
|
|
|
|
for _ in 0..bigint_fuzz_iteration_count() {
|
|
|
|
|
let x = random_u256(&mut rng);
|
2025-07-01 11:07:48 +03:00
|
|
|
let shift: u32 = rng.random_range(0..256);
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
assign_bigint(&mut bx, x);
|
|
|
|
|
let actual = x >> shift;
|
|
|
|
|
bx >>= shift;
|
|
|
|
|
check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn mp_u256_widen_mul() {
|
|
|
|
|
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
|
|
|
|
let mut bx = BigInt::new();
|
|
|
|
|
let mut by = BigInt::new();
|
|
|
|
|
|
|
|
|
|
for _ in 0..bigint_fuzz_iteration_count() {
|
2025-03-18 23:51:19 +00:00
|
|
|
let x: u128 = rng.random();
|
|
|
|
|
let y: u128 = rng.random();
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
bx.assign(x);
|
|
|
|
|
by.assign(y);
|
|
|
|
|
let actual = x.widen_mul(y);
|
|
|
|
|
bx *= &by;
|
2025-04-19 22:39:00 +00:00
|
|
|
check_one(
|
|
|
|
|
|| format!("{x:#034x}"),
|
|
|
|
|
|| Some(format!("{y:#034x}")),
|
|
|
|
|
actual,
|
|
|
|
|
&mut bx,
|
|
|
|
|
);
|
Implement `u256` with two `u128`s rather than `u64`
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
2025-02-08 04:09:59 +00:00
|
|
|
}
|
|
|
|
|
}
|