Initial conversion to const generics (#1018)

This commit is contained in:
Amanieu d'Antras
2021-02-27 14:25:19 +00:00
committed by GitHub
parent 5728faf0f7
commit b4023b1ffe
5 changed files with 78 additions and 70 deletions

View File

@@ -53,7 +53,8 @@
clippy::shadow_reuse,
clippy::cognitive_complexity,
clippy::similar_names,
clippy::many_single_char_names
clippy::many_single_char_names,
non_upper_case_globals
)]
#![cfg_attr(test, allow(unused_imports))]
#![no_std]

View File

@@ -22694,7 +22694,7 @@ pub unsafe fn _mm_mask_shuffle_ps(
) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_ps(a, b, $imm8)
_mm_shuffle_ps::<$imm8>(a, b)
};
}
let r = constify_imm8_sae!(imm8, call);
@@ -22711,7 +22711,7 @@ pub unsafe fn _mm_mask_shuffle_ps(
pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_ps(a, b, $imm8)
_mm_shuffle_ps::<$imm8>(a, b)
};
}
let r = constify_imm8_sae!(imm8, call);

View File

@@ -1007,52 +1007,20 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(shufps, mask = 3))]
#[rustc_args_required_const(2)]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: i32) -> __m128 {
let mask = (mask & 0xFF) as u8;
macro_rules! shuffle_done {
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
simd_shuffle4(a, b, [$x01, $x23, $x45, $x67])
};
}
macro_rules! shuffle_x67 {
($x01:expr, $x23:expr, $x45:expr) => {
match (mask >> 6) & 0b11 {
0b00 => shuffle_done!($x01, $x23, $x45, 4),
0b01 => shuffle_done!($x01, $x23, $x45, 5),
0b10 => shuffle_done!($x01, $x23, $x45, 6),
_ => shuffle_done!($x01, $x23, $x45, 7),
}
};
}
macro_rules! shuffle_x45 {
($x01:expr, $x23:expr) => {
match (mask >> 4) & 0b11 {
0b00 => shuffle_x67!($x01, $x23, 4),
0b01 => shuffle_x67!($x01, $x23, 5),
0b10 => shuffle_x67!($x01, $x23, 6),
_ => shuffle_x67!($x01, $x23, 7),
}
};
}
macro_rules! shuffle_x23 {
($x01:expr) => {
match (mask >> 2) & 0b11 {
0b00 => shuffle_x45!($x01, 0),
0b01 => shuffle_x45!($x01, 1),
0b10 => shuffle_x45!($x01, 2),
_ => shuffle_x45!($x01, 3),
}
};
}
match mask & 0b11 {
0b00 => shuffle_x23!(0),
0b01 => shuffle_x23!(1),
0b10 => shuffle_x23!(2),
_ => shuffle_x23!(3),
}
pub unsafe fn _mm_shuffle_ps<const mask: i32>(a: __m128, b: __m128) -> __m128 {
assert!(mask >= 0 && mask <= 255);
simd_shuffle4(
a,
b,
[
mask as u32 & 0b11,
(mask as u32 >> 2) & 0b11,
((mask as u32 >> 4) & 0b11) + 4,
((mask as u32 >> 6) & 0b11) + 4,
],
)
}
/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1725,6 +1693,14 @@ pub const _MM_HINT_T2: i32 = 1;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_HINT_NTA: i32 = 0;
/// See [`_mm_prefetch`](fn._mm_prefetch.html).
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_HINT_ET0: i32 = 7;
/// See [`_mm_prefetch`](fn._mm_prefetch.html).
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_HINT_ET1: i32 = 6;
/// Fetch the cache line that contains address `p` using the given `strategy`.
///
/// The `strategy` must be one of:
@@ -1742,6 +1718,10 @@ pub const _MM_HINT_NTA: i32 = 0;
/// but outside of the cache hierarchy. This is used to reduce access latency
/// without polluting the cache.
///
/// * [`_MM_HINT_ET0`](constant._MM_HINT_ET0.html) and
/// [`_MM_HINT_ET1`](constant._MM_HINT_ET1.html) are similar to `_MM_HINT_T0`
/// and `_MM_HINT_T1` but indicate an anticipation to write to the address.
///
/// The actual implementation depends on the particular CPU. This instruction
/// is considered a hint, so the CPU is also free to simply ignore the request.
///
@@ -1769,24 +1749,12 @@ pub const _MM_HINT_NTA: i32 = 0;
#[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))]
#[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))]
#[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))]
#[rustc_args_required_const(1)]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
// The `strategy` must be a compile-time constant, so we use a short form
// of `constify_imm8!` for now.
// We use the `llvm.prefetch` instrinsic with `rw` = 0 (read), and
// `cache type` = 1 (data cache). `locality` is based on our `strategy`.
macro_rules! pref {
($imm8:expr) => {
match $imm8 {
0 => prefetch(p, 0, 0, 1),
1 => prefetch(p, 0, 1, 1),
2 => prefetch(p, 0, 2, 1),
_ => prefetch(p, 0, 3, 1),
}
};
}
pref!(strategy)
pub unsafe fn _mm_prefetch<const strategy: i32>(p: *const i8) {
// We use the `llvm.prefetch` instrinsic with `cache type` = 1 (data cache).
// `locality` and `rw` are based on our `strategy`.
prefetch(p, (strategy >> 2) & 1, strategy & 3, 1);
}
/// Returns vector of type __m128 with undefined elements.
@@ -2976,7 +2944,7 @@ mod tests {
unsafe fn test_mm_shuffle_ps() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
let r = _mm_shuffle_ps(a, b, 0b00_01_01_11);
let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0));
}