Convert shuffle_ps and shuffle_pd to const generics (#1037)
This commit is contained in:
@@ -113,44 +113,21 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle4 {
|
||||
($a:expr, $b:expr, $c:expr, $d:expr) => {
|
||||
simd_shuffle4(a, b, [$a, $b, $c, $d])
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle3 {
|
||||
($a:expr, $b:expr, $c:expr) => {
|
||||
match (imm8 >> 3) & 0x1 {
|
||||
0 => shuffle4!($a, $b, $c, 6),
|
||||
_ => shuffle4!($a, $b, $c, 7),
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle2 {
|
||||
($a:expr, $b:expr) => {
|
||||
match (imm8 >> 2) & 0x1 {
|
||||
0 => shuffle3!($a, $b, 2),
|
||||
_ => shuffle3!($a, $b, 3),
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle1 {
|
||||
($a:expr) => {
|
||||
match (imm8 >> 1) & 0x1 {
|
||||
0 => shuffle2!($a, 4),
|
||||
_ => shuffle2!($a, 5),
|
||||
}
|
||||
};
|
||||
}
|
||||
match imm8 & 0x1 {
|
||||
0 => shuffle1!(0),
|
||||
_ => shuffle1!(1),
|
||||
}
|
||||
pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle4(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
MASK as u32 & 0b1,
|
||||
((MASK as u32 >> 1) & 0b1) + 4,
|
||||
((MASK as u32 >> 2) & 0b1) + 2,
|
||||
((MASK as u32 >> 3) & 0b1) + 6,
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
/// Shuffles single-precision (32-bit) floating-point elements in `a` within
|
||||
@@ -159,61 +136,25 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
macro_rules! shuffle4 {
|
||||
(
|
||||
$a:expr,
|
||||
$b:expr,
|
||||
$c:expr,
|
||||
$d:expr,
|
||||
$e:expr,
|
||||
$f:expr,
|
||||
$g:expr,
|
||||
$h:expr
|
||||
) => {
|
||||
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle3 {
|
||||
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
|
||||
match (imm8 >> 6) & 0x3 {
|
||||
0 => shuffle4!($a, $b, $c, 8, $e, $f, $g, 12),
|
||||
1 => shuffle4!($a, $b, $c, 9, $e, $f, $g, 13),
|
||||
2 => shuffle4!($a, $b, $c, 10, $e, $f, $g, 14),
|
||||
_ => shuffle4!($a, $b, $c, 11, $e, $f, $g, 15),
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle2 {
|
||||
($a:expr, $b:expr, $e:expr, $f:expr) => {
|
||||
match (imm8 >> 4) & 0x3 {
|
||||
0 => shuffle3!($a, $b, 8, $e, $f, 12),
|
||||
1 => shuffle3!($a, $b, 9, $e, $f, 13),
|
||||
2 => shuffle3!($a, $b, 10, $e, $f, 14),
|
||||
_ => shuffle3!($a, $b, 11, $e, $f, 15),
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle1 {
|
||||
($a:expr, $e:expr) => {
|
||||
match (imm8 >> 2) & 0x3 {
|
||||
0 => shuffle2!($a, 0, $e, 4),
|
||||
1 => shuffle2!($a, 1, $e, 5),
|
||||
2 => shuffle2!($a, 2, $e, 6),
|
||||
_ => shuffle2!($a, 3, $e, 7),
|
||||
}
|
||||
};
|
||||
}
|
||||
match imm8 & 0x3 {
|
||||
0 => shuffle1!(0, 4),
|
||||
1 => shuffle1!(1, 5),
|
||||
2 => shuffle1!(2, 6),
|
||||
_ => shuffle1!(3, 7),
|
||||
}
|
||||
pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle8(
|
||||
a,
|
||||
b,
|
||||
[
|
||||
MASK as u32 & 0b11,
|
||||
(MASK as u32 >> 2) & 0b11,
|
||||
((MASK as u32 >> 4) & 0b11) + 8,
|
||||
((MASK as u32 >> 6) & 0b11) + 8,
|
||||
(MASK as u32 & 0b11) + 4,
|
||||
((MASK as u32 >> 2) & 0b11) + 4,
|
||||
((MASK as u32 >> 4) & 0b11) + 12,
|
||||
((MASK as u32 >> 6) & 0b11) + 12,
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
|
||||
@@ -3381,7 +3322,7 @@ mod tests {
|
||||
unsafe fn test_mm256_shuffle_pd() {
|
||||
let a = _mm256_setr_pd(1., 4., 5., 8.);
|
||||
let b = _mm256_setr_pd(2., 3., 6., 7.);
|
||||
let r = _mm256_shuffle_pd(a, b, 0xF);
|
||||
let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
|
||||
let e = _mm256_setr_pd(4., 3., 8., 7.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -3390,7 +3331,7 @@ mod tests {
|
||||
unsafe fn test_mm256_shuffle_ps() {
|
||||
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
|
||||
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
|
||||
let r = _mm256_shuffle_ps(a, b, 0x0F);
|
||||
let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
|
||||
let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,20 @@
|
||||
//! Utility macros.
|
||||
//!
|
||||
// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is
|
||||
// not a round number.
|
||||
pub(crate) struct ValidateConstRound<const IMM: i32>;
|
||||
impl<const IMM: i32> ValidateConstRound<IMM> {
|
||||
pub(crate) const VALID: () = {
|
||||
let _ = 1 / ((IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11) as usize);
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! static_assert_rounding {
|
||||
($imm:ident) => {
|
||||
let _ = $crate::core_arch::x86::macros::ValidateConstRound::<$imm>::VALID;
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! constify_imm6 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
|
||||
@@ -2653,21 +2653,17 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(
|
||||
all(test, any(not(target_os = "windows"), target_arch = "x86")),
|
||||
assert_instr(shufps, imm8 = 1)
|
||||
cfg_attr(test, assert_instr(shufps, MASK = 2)) // FIXME shufpd expected
|
||||
)]
|
||||
#[cfg_attr(
|
||||
all(test, all(target_os = "windows", target_arch = "x86_64")),
|
||||
assert_instr(shufpd, imm8 = 1)
|
||||
cfg_attr(test, assert_instr(shufpd, MASK = 1))
|
||||
)]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
||||
match imm8 & 0b11 {
|
||||
0b00 => simd_shuffle2(a, b, [0, 2]),
|
||||
0b01 => simd_shuffle2(a, b, [1, 2]),
|
||||
0b10 => simd_shuffle2(a, b, [0, 3]),
|
||||
_ => simd_shuffle2(a, b, [1, 3]),
|
||||
}
|
||||
pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
|
||||
static_assert_imm8!(MASK);
|
||||
simd_shuffle2(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
|
||||
}
|
||||
|
||||
/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
|
||||
@@ -4852,7 +4848,7 @@ mod tests {
|
||||
let a = _mm_setr_pd(1., 2.);
|
||||
let b = _mm_setr_pd(3., 4.);
|
||||
let expected = _mm_setr_pd(1., 3.);
|
||||
let r = _mm_shuffle_pd(a, b, 0);
|
||||
let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
|
||||
assert_eq_m128d(r, expected);
|
||||
}
|
||||
|
||||
|
||||
@@ -2920,7 +2920,7 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_roundscale_pd() {
|
||||
let a = _mm512_set1_pd(1.1);
|
||||
let r = _mm512_roundscale_pd(a, 0);
|
||||
let r = _mm512_roundscale_pd::<0b00_00_00_00>(a);
|
||||
let e = _mm512_set1_pd(1.0);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
@@ -2928,10 +2928,10 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_roundscale_pd() {
|
||||
let a = _mm512_set1_pd(1.1);
|
||||
let r = _mm512_mask_roundscale_pd(a, 0, a, 0);
|
||||
let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
|
||||
let e = _mm512_set1_pd(1.1);
|
||||
assert_eq_m512d(r, e);
|
||||
let r = _mm512_mask_roundscale_pd(a, 0b11111111, a, 0);
|
||||
let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0b11111111, a);
|
||||
let e = _mm512_set1_pd(1.0);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
@@ -2939,9 +2939,9 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_maskz_roundscale_pd() {
|
||||
let a = _mm512_set1_pd(1.1);
|
||||
let r = _mm512_maskz_roundscale_pd(0, a, 0);
|
||||
let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
|
||||
assert_eq_m512d(r, _mm512_setzero_pd());
|
||||
let r = _mm512_maskz_roundscale_pd(0b11111111, a, 0);
|
||||
let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0b11111111, a);
|
||||
let e = _mm512_set1_pd(1.0);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
@@ -2949,7 +2949,7 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_roundscale_pd() {
|
||||
let a = _mm256_set1_pd(1.1);
|
||||
let r = _mm256_roundscale_pd(a, 0);
|
||||
let r = _mm256_roundscale_pd::<0b00_00_00_00>(a);
|
||||
let e = _mm256_set1_pd(1.0);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -2957,10 +2957,9 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_mask_roundscale_pd() {
|
||||
let a = _mm256_set1_pd(1.1);
|
||||
let r = _mm256_mask_roundscale_pd(a, 0, a, 0);
|
||||
let e = _mm256_set1_pd(1.1);
|
||||
assert_eq_m256d(r, e);
|
||||
let r = _mm256_mask_roundscale_pd(a, 0b00001111, a, 0);
|
||||
let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
|
||||
assert_eq_m256d(r, a);
|
||||
let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00001111, a);
|
||||
let e = _mm256_set1_pd(1.0);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -2968,9 +2967,9 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_roundscale_pd() {
|
||||
let a = _mm256_set1_pd(1.1);
|
||||
let r = _mm256_maskz_roundscale_pd(0, a, 0);
|
||||
let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
|
||||
assert_eq_m256d(r, _mm256_setzero_pd());
|
||||
let r = _mm256_maskz_roundscale_pd(0b00001111, a, 0);
|
||||
let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0b00001111, a);
|
||||
let e = _mm256_set1_pd(1.0);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -2978,7 +2977,7 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_roundscale_pd() {
|
||||
let a = _mm_set1_pd(1.1);
|
||||
let r = _mm_roundscale_pd(a, 0);
|
||||
let r = _mm_roundscale_pd::<0b00_00_00_00>(a);
|
||||
let e = _mm_set1_pd(1.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
@@ -2986,10 +2985,10 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_roundscale_pd() {
|
||||
let a = _mm_set1_pd(1.1);
|
||||
let r = _mm_mask_roundscale_pd(a, 0, a, 0);
|
||||
let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
|
||||
let e = _mm_set1_pd(1.1);
|
||||
assert_eq_m128d(r, e);
|
||||
let r = _mm_mask_roundscale_pd(a, 0b00000011, a, 0);
|
||||
let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00000011, a);
|
||||
let e = _mm_set1_pd(1.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
@@ -2997,9 +2996,9 @@ mod tests {
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_roundscale_pd() {
|
||||
let a = _mm_set1_pd(1.1);
|
||||
let r = _mm_maskz_roundscale_pd(0, a, 0);
|
||||
let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
|
||||
assert_eq_m128d(r, _mm_setzero_pd());
|
||||
let r = _mm_maskz_roundscale_pd(0b00000011, a, 0);
|
||||
let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0b00000011, a);
|
||||
let e = _mm_set1_pd(1.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
@@ -3102,7 +3101,7 @@ mod tests {
|
||||
let a = _mm512_set1_pd(f64::NAN);
|
||||
let b = _mm512_set1_pd(f64::MAX);
|
||||
let c = _mm512_set1_epi64(i32::MAX as i64);
|
||||
let r = _mm512_fixupimm_pd(a, b, c, 5);
|
||||
let r = _mm512_fixupimm_pd::<5>(a, b, c);
|
||||
let e = _mm512_set1_pd(0.0);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
@@ -3112,7 +3111,7 @@ mod tests {
|
||||
let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
|
||||
let b = _mm512_set1_pd(f64::MAX);
|
||||
let c = _mm512_set1_epi64(i32::MAX as i64);
|
||||
let r = _mm512_mask_fixupimm_pd(a, 0b11110000, b, c, 5);
|
||||
let r = _mm512_mask_fixupimm_pd::<5>(a, 0b11110000, b, c);
|
||||
let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
@@ -3122,7 +3121,7 @@ mod tests {
|
||||
let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
|
||||
let b = _mm512_set1_pd(f64::MAX);
|
||||
let c = _mm512_set1_epi64(i32::MAX as i64);
|
||||
let r = _mm512_maskz_fixupimm_pd(0b11110000, a, b, c, 5);
|
||||
let r = _mm512_maskz_fixupimm_pd::<5>(0b11110000, a, b, c);
|
||||
let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
@@ -3132,7 +3131,7 @@ mod tests {
|
||||
let a = _mm256_set1_pd(f64::NAN);
|
||||
let b = _mm256_set1_pd(f64::MAX);
|
||||
let c = _mm256_set1_epi64x(i32::MAX as i64);
|
||||
let r = _mm256_fixupimm_pd(a, b, c, 5);
|
||||
let r = _mm256_fixupimm_pd::<5>(a, b, c);
|
||||
let e = _mm256_set1_pd(0.0);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -3142,7 +3141,7 @@ mod tests {
|
||||
let a = _mm256_set1_pd(f64::NAN);
|
||||
let b = _mm256_set1_pd(f64::MAX);
|
||||
let c = _mm256_set1_epi64x(i32::MAX as i64);
|
||||
let r = _mm256_mask_fixupimm_pd(a, 0b00001111, b, c, 5);
|
||||
let r = _mm256_mask_fixupimm_pd::<5>(a, 0b00001111, b, c);
|
||||
let e = _mm256_set1_pd(0.0);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -3152,7 +3151,7 @@ mod tests {
|
||||
let a = _mm256_set1_pd(f64::NAN);
|
||||
let b = _mm256_set1_pd(f64::MAX);
|
||||
let c = _mm256_set1_epi64x(i32::MAX as i64);
|
||||
let r = _mm256_maskz_fixupimm_pd(0b00001111, a, b, c, 5);
|
||||
let r = _mm256_maskz_fixupimm_pd::<5>(0b00001111, a, b, c);
|
||||
let e = _mm256_set1_pd(0.0);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -3162,7 +3161,7 @@ mod tests {
|
||||
let a = _mm_set1_pd(f64::NAN);
|
||||
let b = _mm_set1_pd(f64::MAX);
|
||||
let c = _mm_set1_epi64x(i32::MAX as i64);
|
||||
let r = _mm_fixupimm_pd(a, b, c, 5);
|
||||
let r = _mm_fixupimm_pd::<5>(a, b, c);
|
||||
let e = _mm_set1_pd(0.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
@@ -3172,7 +3171,7 @@ mod tests {
|
||||
let a = _mm_set1_pd(f64::NAN);
|
||||
let b = _mm_set1_pd(f64::MAX);
|
||||
let c = _mm_set1_epi64x(i32::MAX as i64);
|
||||
let r = _mm_mask_fixupimm_pd(a, 0b00000011, b, c, 5);
|
||||
let r = _mm_mask_fixupimm_pd::<5>(a, 0b00000011, b, c);
|
||||
let e = _mm_set1_pd(0.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
@@ -3182,7 +3181,7 @@ mod tests {
|
||||
let a = _mm_set1_pd(f64::NAN);
|
||||
let b = _mm_set1_pd(f64::MAX);
|
||||
let c = _mm_set1_epi64x(i32::MAX as i64);
|
||||
let r = _mm_maskz_fixupimm_pd(0b00000011, a, b, c, 5);
|
||||
let r = _mm_maskz_fixupimm_pd::<5>(0b00000011, a, b, c);
|
||||
let e = _mm_set1_pd(0.0);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
@@ -3192,7 +3191,7 @@ mod tests {
|
||||
let a = _mm512_set1_epi64(1 << 2);
|
||||
let b = _mm512_set1_epi64(1 << 1);
|
||||
let c = _mm512_set1_epi64(1 << 0);
|
||||
let r = _mm512_ternarylogic_epi64(a, b, c, 8);
|
||||
let r = _mm512_ternarylogic_epi64::<8>(a, b, c);
|
||||
let e = _mm512_set1_epi64(0);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
@@ -3202,9 +3201,9 @@ mod tests {
|
||||
let src = _mm512_set1_epi64(1 << 2);
|
||||
let a = _mm512_set1_epi64(1 << 1);
|
||||
let b = _mm512_set1_epi64(1 << 0);
|
||||
let r = _mm512_mask_ternarylogic_epi64(src, 0, a, b, 8);
|
||||
let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0, a, b);
|
||||
assert_eq_m512i(r, src);
|
||||
let r = _mm512_mask_ternarylogic_epi64(src, 0b11111111, a, b, 8);
|
||||
let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0b11111111, a, b);
|
||||
let e = _mm512_set1_epi64(0);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
@@ -3214,9 +3213,9 @@ mod tests {
|
||||
let a = _mm512_set1_epi64(1 << 2);
|
||||
let b = _mm512_set1_epi64(1 << 1);
|
||||
let c = _mm512_set1_epi64(1 << 0);
|
||||
let r = _mm512_maskz_ternarylogic_epi64(0, a, b, c, 9);
|
||||
let r = _mm512_maskz_ternarylogic_epi64::<8>(0, a, b, c);
|
||||
assert_eq_m512i(r, _mm512_setzero_si512());
|
||||
let r = _mm512_maskz_ternarylogic_epi64(0b11111111, a, b, c, 8);
|
||||
let r = _mm512_maskz_ternarylogic_epi64::<8>(0b11111111, a, b, c);
|
||||
let e = _mm512_set1_epi64(0);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
@@ -3226,7 +3225,7 @@ mod tests {
|
||||
let a = _mm256_set1_epi64x(1 << 2);
|
||||
let b = _mm256_set1_epi64x(1 << 1);
|
||||
let c = _mm256_set1_epi64x(1 << 0);
|
||||
let r = _mm256_ternarylogic_epi64(a, b, c, 8);
|
||||
let r = _mm256_ternarylogic_epi64::<8>(a, b, c);
|
||||
let e = _mm256_set1_epi64x(0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
@@ -3236,9 +3235,9 @@ mod tests {
|
||||
let src = _mm256_set1_epi64x(1 << 2);
|
||||
let a = _mm256_set1_epi64x(1 << 1);
|
||||
let b = _mm256_set1_epi64x(1 << 0);
|
||||
let r = _mm256_mask_ternarylogic_epi64(src, 0, a, b, 8);
|
||||
let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0, a, b);
|
||||
assert_eq_m256i(r, src);
|
||||
let r = _mm256_mask_ternarylogic_epi64(src, 0b00001111, a, b, 8);
|
||||
let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0b00001111, a, b);
|
||||
let e = _mm256_set1_epi64x(0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
@@ -3248,9 +3247,9 @@ mod tests {
|
||||
let a = _mm256_set1_epi64x(1 << 2);
|
||||
let b = _mm256_set1_epi64x(1 << 1);
|
||||
let c = _mm256_set1_epi64x(1 << 0);
|
||||
let r = _mm256_maskz_ternarylogic_epi64(0, a, b, c, 9);
|
||||
let r = _mm256_maskz_ternarylogic_epi64::<9>(0, a, b, c);
|
||||
assert_eq_m256i(r, _mm256_setzero_si256());
|
||||
let r = _mm256_maskz_ternarylogic_epi64(0b00001111, a, b, c, 8);
|
||||
let r = _mm256_maskz_ternarylogic_epi64::<8>(0b00001111, a, b, c);
|
||||
let e = _mm256_set1_epi64x(0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
@@ -3260,7 +3259,7 @@ mod tests {
|
||||
let a = _mm_set1_epi64x(1 << 2);
|
||||
let b = _mm_set1_epi64x(1 << 1);
|
||||
let c = _mm_set1_epi64x(1 << 0);
|
||||
let r = _mm_ternarylogic_epi64(a, b, c, 8);
|
||||
let r = _mm_ternarylogic_epi64::<8>(a, b, c);
|
||||
let e = _mm_set1_epi64x(0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
@@ -3270,9 +3269,9 @@ mod tests {
|
||||
let src = _mm_set1_epi64x(1 << 2);
|
||||
let a = _mm_set1_epi64x(1 << 1);
|
||||
let b = _mm_set1_epi64x(1 << 0);
|
||||
let r = _mm_mask_ternarylogic_epi64(src, 0, a, b, 8);
|
||||
let r = _mm_mask_ternarylogic_epi64::<8>(src, 0, a, b);
|
||||
assert_eq_m128i(r, src);
|
||||
let r = _mm_mask_ternarylogic_epi64(src, 0b00000011, a, b, 8);
|
||||
let r = _mm_mask_ternarylogic_epi64::<8>(src, 0b00000011, a, b);
|
||||
let e = _mm_set1_epi64x(0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
@@ -3282,9 +3281,9 @@ mod tests {
|
||||
let a = _mm_set1_epi64x(1 << 2);
|
||||
let b = _mm_set1_epi64x(1 << 1);
|
||||
let c = _mm_set1_epi64x(1 << 0);
|
||||
let r = _mm_maskz_ternarylogic_epi64(0, a, b, c, 9);
|
||||
let r = _mm_maskz_ternarylogic_epi64::<9>(0, a, b, c);
|
||||
assert_eq_m128i(r, _mm_setzero_si128());
|
||||
let r = _mm_maskz_ternarylogic_epi64(0b00000011, a, b, c, 8);
|
||||
let r = _mm_maskz_ternarylogic_epi64::<8>(0b00000011, a, b, c);
|
||||
let e = _mm_set1_epi64x(0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
@@ -5308,10 +5307,10 @@ mod tests {
|
||||
unsafe fn test_mm512_add_round_pd() {
|
||||
let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
|
||||
let b = _mm512_set1_pd(-1.);
|
||||
let r = _mm512_add_round_pd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
|
||||
let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0);
|
||||
assert_eq_m512d(r, e);
|
||||
let r = _mm512_add_round_pd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
|
||||
let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
@@ -5320,14 +5319,12 @@ mod tests {
|
||||
unsafe fn test_mm512_mask_add_round_pd() {
|
||||
let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
|
||||
let b = _mm512_set1_pd(-1.);
|
||||
let r = _mm512_mask_add_round_pd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
|
||||
a, 0, a, b,
|
||||
);
|
||||
assert_eq_m512d(r, a);
|
||||
let r = _mm512_mask_add_round_pd(
|
||||
a,
|
||||
0b11110000,
|
||||
a,
|
||||
b,
|
||||
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
|
||||
let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
|
||||
a, 0b11110000, a, b,
|
||||
);
|
||||
let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0);
|
||||
assert_eq_m512d(r, e);
|
||||
@@ -5337,13 +5334,11 @@ mod tests {
|
||||
unsafe fn test_mm512_maskz_add_round_pd() {
|
||||
let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
|
||||
let b = _mm512_set1_pd(-1.);
|
||||
let r = _mm512_maskz_add_round_pd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
let r =
|
||||
_mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
|
||||
assert_eq_m512d(r, _mm512_setzero_pd());
|
||||
let r = _mm512_maskz_add_round_pd(
|
||||
0b11110000,
|
||||
a,
|
||||
b,
|
||||
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
|
||||
let r = _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
|
||||
0b11110000, a, b,
|
||||
);
|
||||
let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0);
|
||||
assert_eq_m512d(r, e);
|
||||
@@ -9715,70 +9710,13 @@ mod tests {
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_shuffle_pd() {
|
||||
let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
|
||||
let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
|
||||
let r = _mm512_shuffle_pd(
|
||||
a,
|
||||
b,
|
||||
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
|
||||
);
|
||||
let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_shuffle_pd() {
|
||||
let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
|
||||
let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
|
||||
let r = _mm512_mask_shuffle_pd(
|
||||
a,
|
||||
0,
|
||||
a,
|
||||
b,
|
||||
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
|
||||
);
|
||||
assert_eq_m512d(r, a);
|
||||
let r = _mm512_mask_shuffle_pd(
|
||||
a,
|
||||
0b11111111,
|
||||
a,
|
||||
b,
|
||||
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
|
||||
);
|
||||
let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_maskz_shuffle_pd() {
|
||||
let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
|
||||
let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
|
||||
let r = _mm512_maskz_shuffle_pd(
|
||||
0,
|
||||
a,
|
||||
b,
|
||||
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
|
||||
);
|
||||
assert_eq_m512d(r, _mm512_setzero_pd());
|
||||
let r = _mm512_maskz_shuffle_pd(
|
||||
0b00001111,
|
||||
a,
|
||||
b,
|
||||
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
|
||||
);
|
||||
let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_mask_shuffle_pd() {
|
||||
let a = _mm256_set_pd(1., 4., 5., 8.);
|
||||
let b = _mm256_set_pd(2., 3., 6., 7.);
|
||||
let r = _mm256_mask_shuffle_pd(a, 0, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
|
||||
let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
|
||||
assert_eq_m256d(r, a);
|
||||
let r = _mm256_mask_shuffle_pd(a, 0b00001111, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
|
||||
let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00001111, a, b);
|
||||
let e = _mm256_set_pd(2., 1., 6., 5.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -9787,9 +9725,9 @@ mod tests {
|
||||
unsafe fn test_mm256_maskz_shuffle_pd() {
|
||||
let a = _mm256_set_pd(1., 4., 5., 8.);
|
||||
let b = _mm256_set_pd(2., 3., 6., 7.);
|
||||
let r = _mm256_maskz_shuffle_pd(0, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
|
||||
let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
|
||||
assert_eq_m256d(r, _mm256_setzero_pd());
|
||||
let r = _mm256_maskz_shuffle_pd(0b00001111, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
|
||||
let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
|
||||
let e = _mm256_set_pd(2., 1., 6., 5.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
@@ -9798,9 +9736,9 @@ mod tests {
|
||||
unsafe fn test_mm_mask_shuffle_pd() {
|
||||
let a = _mm_set_pd(1., 4.);
|
||||
let b = _mm_set_pd(2., 3.);
|
||||
let r = _mm_mask_shuffle_pd(a, 0, a, b, 1 << 0 | 1 << 1);
|
||||
let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
|
||||
assert_eq_m128d(r, a);
|
||||
let r = _mm_mask_shuffle_pd(a, 0b00000011, a, b, 1 << 0 | 1 << 1);
|
||||
let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00000011, a, b);
|
||||
let e = _mm_set_pd(2., 1.);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
@@ -9809,9 +9747,9 @@ mod tests {
|
||||
unsafe fn test_mm_maskz_shuffle_pd() {
|
||||
let a = _mm_set_pd(1., 4.);
|
||||
let b = _mm_set_pd(2., 3.);
|
||||
let r = _mm_maskz_shuffle_pd(0, a, b, 1 << 0 | 1 << 1);
|
||||
let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
|
||||
assert_eq_m128d(r, _mm_setzero_pd());
|
||||
let r = _mm_maskz_shuffle_pd(0b00000011, a, b, 1 << 0 | 1 << 1);
|
||||
let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0b00000011, a, b);
|
||||
let e = _mm_set_pd(2., 1.);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user