Rollup merge of #122884 - mzabaluev:pow-remove-exit-branch, r=Amanieu
Optimize integer `pow` by removing the exit branch The branch at the end of the `pow` implementations is redundant with multiplication code already present in the loop. By rotating the exit check, this branch can be largely removed, improving code size and reducing instruction cache misses. Testing on my machine (`x86_64`, 11th Gen Intel Core i5-1135G7 @ 2.40GHz), the `num::int_pow` benchmarks improve by some 40% for the unchecked operations and show some slight improvement for the checked operations as well.
This commit is contained in:
@@ -170,6 +170,7 @@
|
|||||||
#![feature(internal_impls_macro)]
|
#![feature(internal_impls_macro)]
|
||||||
#![feature(ip)]
|
#![feature(ip)]
|
||||||
#![feature(is_ascii_octdigit)]
|
#![feature(is_ascii_octdigit)]
|
||||||
|
#![feature(is_val_statically_known)]
|
||||||
#![feature(isqrt)]
|
#![feature(isqrt)]
|
||||||
#![feature(link_cfg)]
|
#![feature(link_cfg)]
|
||||||
#![feature(offset_of_enum)]
|
#![feature(offset_of_enum)]
|
||||||
|
|||||||
@@ -1496,18 +1496,17 @@ macro_rules! int_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc: Self = 1;
|
let mut acc: Self = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
loop {
|
||||||
if (exp & 1) == 1 {
|
if (exp & 1) == 1 {
|
||||||
acc = try_opt!(acc.checked_mul(base));
|
acc = try_opt!(acc.checked_mul(base));
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return Some(acc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
exp /= 2;
|
exp /= 2;
|
||||||
base = try_opt!(base.checked_mul(base));
|
base = try_opt!(base.checked_mul(base));
|
||||||
}
|
}
|
||||||
// since exp!=0, finally the exp must be 1.
|
|
||||||
// Deal with the final bit of the exponent separately, since
|
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
|
||||||
// needless overflow.
|
|
||||||
acc.checked_mul(base)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
|
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
|
||||||
@@ -1547,18 +1546,17 @@ macro_rules! int_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc: Self = 1;
|
let mut acc: Self = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
loop {
|
||||||
if (exp & 1) == 1 {
|
if (exp & 1) == 1 {
|
||||||
acc = acc.strict_mul(base);
|
acc = acc.strict_mul(base);
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
exp /= 2;
|
exp /= 2;
|
||||||
base = base.strict_mul(base);
|
base = base.strict_mul(base);
|
||||||
}
|
}
|
||||||
// since exp!=0, finally the exp must be 1.
|
|
||||||
// Deal with the final bit of the exponent separately, since
|
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
|
||||||
// needless overflow.
|
|
||||||
acc.strict_mul(base)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the square root of the number, rounded down.
|
/// Returns the square root of the number, rounded down.
|
||||||
@@ -2175,6 +2173,7 @@ macro_rules! int_impl {
|
|||||||
#[must_use = "this returns the result of the operation, \
|
#[must_use = "this returns the result of the operation, \
|
||||||
without modifying the original"]
|
without modifying the original"]
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
|
||||||
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
|
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
|
||||||
if exp == 0 {
|
if exp == 0 {
|
||||||
return 1;
|
return 1;
|
||||||
@@ -2182,19 +2181,36 @@ macro_rules! int_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc: Self = 1;
|
let mut acc: Self = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
if intrinsics::is_val_statically_known(exp) {
|
||||||
if (exp & 1) == 1 {
|
while exp > 1 {
|
||||||
acc = acc.wrapping_mul(base);
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc.wrapping_mul(base);
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base.wrapping_mul(base);
|
||||||
}
|
}
|
||||||
exp /= 2;
|
|
||||||
base = base.wrapping_mul(base);
|
|
||||||
}
|
|
||||||
|
|
||||||
// since exp!=0, finally the exp must be 1.
|
// since exp!=0, finally the exp must be 1.
|
||||||
// Deal with the final bit of the exponent separately, since
|
// Deal with the final bit of the exponent separately, since
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
// squaring the base afterwards is not necessary.
|
||||||
// needless overflow.
|
acc.wrapping_mul(base)
|
||||||
acc.wrapping_mul(base)
|
} else {
|
||||||
|
// This is faster than the above when the exponent is not known
|
||||||
|
// at compile time. We can't use the same code for the constant
|
||||||
|
// exponent case because LLVM is currently unable to unroll
|
||||||
|
// this loop.
|
||||||
|
loop {
|
||||||
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc.wrapping_mul(base);
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base.wrapping_mul(base);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculates `self` + `rhs`.
|
/// Calculates `self` + `rhs`.
|
||||||
@@ -2690,9 +2706,14 @@ macro_rules! int_impl {
|
|||||||
// Scratch space for storing results of overflowing_mul.
|
// Scratch space for storing results of overflowing_mul.
|
||||||
let mut r;
|
let mut r;
|
||||||
|
|
||||||
while exp > 1 {
|
loop {
|
||||||
if (exp & 1) == 1 {
|
if (exp & 1) == 1 {
|
||||||
r = acc.overflowing_mul(base);
|
r = acc.overflowing_mul(base);
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
r.1 |= overflown;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
acc = r.0;
|
acc = r.0;
|
||||||
overflown |= r.1;
|
overflown |= r.1;
|
||||||
}
|
}
|
||||||
@@ -2701,14 +2722,6 @@ macro_rules! int_impl {
|
|||||||
base = r.0;
|
base = r.0;
|
||||||
overflown |= r.1;
|
overflown |= r.1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// since exp!=0, finally the exp must be 1.
|
|
||||||
// Deal with the final bit of the exponent separately, since
|
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
|
||||||
// needless overflow.
|
|
||||||
r = acc.overflowing_mul(base);
|
|
||||||
r.1 |= overflown;
|
|
||||||
r
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Raises self to the power of `exp`, using exponentiation by squaring.
|
/// Raises self to the power of `exp`, using exponentiation by squaring.
|
||||||
@@ -2728,6 +2741,7 @@ macro_rules! int_impl {
|
|||||||
without modifying the original"]
|
without modifying the original"]
|
||||||
#[inline]
|
#[inline]
|
||||||
#[rustc_inherit_overflow_checks]
|
#[rustc_inherit_overflow_checks]
|
||||||
|
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
|
||||||
pub const fn pow(self, mut exp: u32) -> Self {
|
pub const fn pow(self, mut exp: u32) -> Self {
|
||||||
if exp == 0 {
|
if exp == 0 {
|
||||||
return 1;
|
return 1;
|
||||||
@@ -2735,19 +2749,37 @@ macro_rules! int_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc = 1;
|
let mut acc = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
if intrinsics::is_val_statically_known(exp) {
|
||||||
if (exp & 1) == 1 {
|
while exp > 1 {
|
||||||
acc = acc * base;
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc * base;
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base * base;
|
||||||
}
|
}
|
||||||
exp /= 2;
|
|
||||||
base = base * base;
|
|
||||||
}
|
|
||||||
|
|
||||||
// since exp!=0, finally the exp must be 1.
|
// since exp!=0, finally the exp must be 1.
|
||||||
// Deal with the final bit of the exponent separately, since
|
// Deal with the final bit of the exponent separately, since
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
// squaring the base afterwards is not necessary and may cause a
|
||||||
// needless overflow.
|
// needless overflow.
|
||||||
acc * base
|
acc * base
|
||||||
|
} else {
|
||||||
|
// This is faster than the above when the exponent is not known
|
||||||
|
// at compile time. We can't use the same code for the constant
|
||||||
|
// exponent case because LLVM is currently unable to unroll
|
||||||
|
// this loop.
|
||||||
|
loop {
|
||||||
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc * base;
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base * base;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the square root of the number, rounded down.
|
/// Returns the square root of the number, rounded down.
|
||||||
|
|||||||
@@ -1622,20 +1622,17 @@ macro_rules! uint_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc: Self = 1;
|
let mut acc: Self = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
loop {
|
||||||
if (exp & 1) == 1 {
|
if (exp & 1) == 1 {
|
||||||
acc = try_opt!(acc.checked_mul(base));
|
acc = try_opt!(acc.checked_mul(base));
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return Some(acc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
exp /= 2;
|
exp /= 2;
|
||||||
base = try_opt!(base.checked_mul(base));
|
base = try_opt!(base.checked_mul(base));
|
||||||
}
|
}
|
||||||
|
|
||||||
// since exp!=0, finally the exp must be 1.
|
|
||||||
// Deal with the final bit of the exponent separately, since
|
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
|
||||||
// needless overflow.
|
|
||||||
|
|
||||||
acc.checked_mul(base)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
|
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
|
||||||
@@ -1675,18 +1672,17 @@ macro_rules! uint_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc: Self = 1;
|
let mut acc: Self = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
loop {
|
||||||
if (exp & 1) == 1 {
|
if (exp & 1) == 1 {
|
||||||
acc = acc.strict_mul(base);
|
acc = acc.strict_mul(base);
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
exp /= 2;
|
exp /= 2;
|
||||||
base = base.strict_mul(base);
|
base = base.strict_mul(base);
|
||||||
}
|
}
|
||||||
// since exp!=0, finally the exp must be 1.
|
|
||||||
// Deal with the final bit of the exponent separately, since
|
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
|
||||||
// needless overflow.
|
|
||||||
acc.strict_mul(base)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Saturating integer addition. Computes `self + rhs`, saturating at
|
/// Saturating integer addition. Computes `self + rhs`, saturating at
|
||||||
@@ -2138,6 +2134,7 @@ macro_rules! uint_impl {
|
|||||||
#[must_use = "this returns the result of the operation, \
|
#[must_use = "this returns the result of the operation, \
|
||||||
without modifying the original"]
|
without modifying the original"]
|
||||||
#[inline]
|
#[inline]
|
||||||
|
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
|
||||||
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
|
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
|
||||||
if exp == 0 {
|
if exp == 0 {
|
||||||
return 1;
|
return 1;
|
||||||
@@ -2145,19 +2142,36 @@ macro_rules! uint_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc: Self = 1;
|
let mut acc: Self = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
if intrinsics::is_val_statically_known(exp) {
|
||||||
if (exp & 1) == 1 {
|
while exp > 1 {
|
||||||
acc = acc.wrapping_mul(base);
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc.wrapping_mul(base);
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base.wrapping_mul(base);
|
||||||
}
|
}
|
||||||
exp /= 2;
|
|
||||||
base = base.wrapping_mul(base);
|
|
||||||
}
|
|
||||||
|
|
||||||
// since exp!=0, finally the exp must be 1.
|
// since exp!=0, finally the exp must be 1.
|
||||||
// Deal with the final bit of the exponent separately, since
|
// Deal with the final bit of the exponent separately, since
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
// squaring the base afterwards is not necessary.
|
||||||
// needless overflow.
|
acc.wrapping_mul(base)
|
||||||
acc.wrapping_mul(base)
|
} else {
|
||||||
|
// This is faster than the above when the exponent is not known
|
||||||
|
// at compile time. We can't use the same code for the constant
|
||||||
|
// exponent case because LLVM is currently unable to unroll
|
||||||
|
// this loop.
|
||||||
|
loop {
|
||||||
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc.wrapping_mul(base);
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base.wrapping_mul(base);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculates `self` + `rhs`.
|
/// Calculates `self` + `rhs`.
|
||||||
@@ -2603,9 +2617,14 @@ macro_rules! uint_impl {
|
|||||||
// Scratch space for storing results of overflowing_mul.
|
// Scratch space for storing results of overflowing_mul.
|
||||||
let mut r;
|
let mut r;
|
||||||
|
|
||||||
while exp > 1 {
|
loop {
|
||||||
if (exp & 1) == 1 {
|
if (exp & 1) == 1 {
|
||||||
r = acc.overflowing_mul(base);
|
r = acc.overflowing_mul(base);
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
r.1 |= overflown;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
acc = r.0;
|
acc = r.0;
|
||||||
overflown |= r.1;
|
overflown |= r.1;
|
||||||
}
|
}
|
||||||
@@ -2614,15 +2633,6 @@ macro_rules! uint_impl {
|
|||||||
base = r.0;
|
base = r.0;
|
||||||
overflown |= r.1;
|
overflown |= r.1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// since exp!=0, finally the exp must be 1.
|
|
||||||
// Deal with the final bit of the exponent separately, since
|
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
|
||||||
// needless overflow.
|
|
||||||
r = acc.overflowing_mul(base);
|
|
||||||
r.1 |= overflown;
|
|
||||||
|
|
||||||
r
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Raises self to the power of `exp`, using exponentiation by squaring.
|
/// Raises self to the power of `exp`, using exponentiation by squaring.
|
||||||
@@ -2640,6 +2650,7 @@ macro_rules! uint_impl {
|
|||||||
without modifying the original"]
|
without modifying the original"]
|
||||||
#[inline]
|
#[inline]
|
||||||
#[rustc_inherit_overflow_checks]
|
#[rustc_inherit_overflow_checks]
|
||||||
|
#[rustc_allow_const_fn_unstable(is_val_statically_known)]
|
||||||
pub const fn pow(self, mut exp: u32) -> Self {
|
pub const fn pow(self, mut exp: u32) -> Self {
|
||||||
if exp == 0 {
|
if exp == 0 {
|
||||||
return 1;
|
return 1;
|
||||||
@@ -2647,19 +2658,37 @@ macro_rules! uint_impl {
|
|||||||
let mut base = self;
|
let mut base = self;
|
||||||
let mut acc = 1;
|
let mut acc = 1;
|
||||||
|
|
||||||
while exp > 1 {
|
if intrinsics::is_val_statically_known(exp) {
|
||||||
if (exp & 1) == 1 {
|
while exp > 1 {
|
||||||
acc = acc * base;
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc * base;
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base * base;
|
||||||
}
|
}
|
||||||
exp /= 2;
|
|
||||||
base = base * base;
|
|
||||||
}
|
|
||||||
|
|
||||||
// since exp!=0, finally the exp must be 1.
|
// since exp!=0, finally the exp must be 1.
|
||||||
// Deal with the final bit of the exponent separately, since
|
// Deal with the final bit of the exponent separately, since
|
||||||
// squaring the base afterwards is not necessary and may cause a
|
// squaring the base afterwards is not necessary and may cause a
|
||||||
// needless overflow.
|
// needless overflow.
|
||||||
acc * base
|
acc * base
|
||||||
|
} else {
|
||||||
|
// This is faster than the above when the exponent is not known
|
||||||
|
// at compile time. We can't use the same code for the constant
|
||||||
|
// exponent case because LLVM is currently unable to unroll
|
||||||
|
// this loop.
|
||||||
|
loop {
|
||||||
|
if (exp & 1) == 1 {
|
||||||
|
acc = acc * base;
|
||||||
|
// since exp!=0, finally the exp must be 1.
|
||||||
|
if exp == 1 {
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exp /= 2;
|
||||||
|
base = base * base;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the square root of the number, rounded down.
|
/// Returns the square root of the number, rounded down.
|
||||||
|
|||||||
Reference in New Issue
Block a user