Convert shuffle_ps and shuffle_pd to const generics (#1037)

This commit is contained in:
minybot
2021-03-02 00:36:01 -05:00
committed by GitHub
parent f626d43780
commit 2a9f6349d8
5 changed files with 674 additions and 1137 deletions

View File

@@ -113,44 +113,21 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle4(a, b, [$a, $b, $c, $d])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 3) & 0x1 {
0 => shuffle4!($a, $b, $c, 6),
_ => shuffle4!($a, $b, $c, 7),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 2) & 0x1 {
0 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, 4),
_ => shuffle2!($a, 5),
}
};
}
match imm8 & 0x1 {
0 => shuffle1!(0),
_ => shuffle1!(1),
}
pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
static_assert_imm8!(MASK);
simd_shuffle4(
a,
b,
[
MASK as u32 & 0b1,
((MASK as u32 >> 1) & 0b1) + 4,
((MASK as u32 >> 2) & 0b1) + 2,
((MASK as u32 >> 3) & 0b1) + 6,
],
)
}
/// Shuffles single-precision (32-bit) floating-point elements in `a` within
@@ -159,61 +136,25 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps)
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 8, $e, $f, $g, 12),
1 => shuffle4!($a, $b, $c, 9, $e, $f, $g, 13),
2 => shuffle4!($a, $b, $c, 10, $e, $f, $g, 14),
_ => shuffle4!($a, $b, $c, 11, $e, $f, $g, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 8, $e, $f, 12),
1 => shuffle3!($a, $b, 9, $e, $f, 13),
2 => shuffle3!($a, $b, 10, $e, $f, 14),
_ => shuffle3!($a, $b, 11, $e, $f, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4),
1 => shuffle2!($a, 1, $e, 5),
2 => shuffle2!($a, 2, $e, 6),
_ => shuffle2!($a, 3, $e, 7),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 4),
1 => shuffle1!(1, 5),
2 => shuffle1!(2, 6),
_ => shuffle1!(3, 7),
}
pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
static_assert_imm8!(MASK);
simd_shuffle8(
a,
b,
[
MASK as u32 & 0b11,
(MASK as u32 >> 2) & 0b11,
((MASK as u32 >> 4) & 0b11) + 8,
((MASK as u32 >> 6) & 0b11) + 8,
(MASK as u32 & 0b11) + 4,
((MASK as u32 >> 2) & 0b11) + 4,
((MASK as u32 >> 4) & 0b11) + 12,
((MASK as u32 >> 6) & 0b11) + 12,
],
)
}
/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
@@ -3381,7 +3322,7 @@ mod tests {
unsafe fn test_mm256_shuffle_pd() {
let a = _mm256_setr_pd(1., 4., 5., 8.);
let b = _mm256_setr_pd(2., 3., 6., 7.);
let r = _mm256_shuffle_pd(a, b, 0xF);
let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
let e = _mm256_setr_pd(4., 3., 8., 7.);
assert_eq_m256d(r, e);
}
@@ -3390,7 +3331,7 @@ mod tests {
unsafe fn test_mm256_shuffle_ps() {
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_shuffle_ps(a, b, 0x0F);
let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
assert_eq_m256(r, e);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,20 @@
//! Utility macros.
//!
// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is
// not a round number.
pub(crate) struct ValidateConstRound<const IMM: i32>;
impl<const IMM: i32> ValidateConstRound<IMM> {
pub(crate) const VALID: () = {
let _ = 1 / ((IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11) as usize);
};
}
#[allow(unused)]
macro_rules! static_assert_rounding {
($imm:ident) => {
let _ = $crate::core_arch::x86::macros::ValidateConstRound::<$imm>::VALID;
};
}
macro_rules! constify_imm6 {
($imm8:expr, $expand:ident) => {

View File

@@ -2653,21 +2653,17 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
#[target_feature(enable = "sse2")]
#[cfg_attr(
all(test, any(not(target_os = "windows"), target_arch = "x86")),
assert_instr(shufps, imm8 = 1)
cfg_attr(test, assert_instr(shufps, MASK = 2)) // FIXME shufpd expected
)]
#[cfg_attr(
all(test, all(target_os = "windows", target_arch = "x86_64")),
assert_instr(shufpd, imm8 = 1)
cfg_attr(test, assert_instr(shufpd, MASK = 1))
)]
#[rustc_args_required_const(2)]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
match imm8 & 0b11 {
0b00 => simd_shuffle2(a, b, [0, 2]),
0b01 => simd_shuffle2(a, b, [1, 2]),
0b10 => simd_shuffle2(a, b, [0, 3]),
_ => simd_shuffle2(a, b, [1, 3]),
}
pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
static_assert_imm8!(MASK);
simd_shuffle2(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
}
/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
@@ -4852,7 +4848,7 @@ mod tests {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(3., 4.);
let expected = _mm_setr_pd(1., 3.);
let r = _mm_shuffle_pd(a, b, 0);
let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
assert_eq_m128d(r, expected);
}

View File

@@ -2920,7 +2920,7 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_roundscale_pd() {
let a = _mm512_set1_pd(1.1);
let r = _mm512_roundscale_pd(a, 0);
let r = _mm512_roundscale_pd::<0b00_00_00_00>(a);
let e = _mm512_set1_pd(1.0);
assert_eq_m512d(r, e);
}
@@ -2928,10 +2928,10 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_roundscale_pd() {
let a = _mm512_set1_pd(1.1);
let r = _mm512_mask_roundscale_pd(a, 0, a, 0);
let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
let e = _mm512_set1_pd(1.1);
assert_eq_m512d(r, e);
let r = _mm512_mask_roundscale_pd(a, 0b11111111, a, 0);
let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0b11111111, a);
let e = _mm512_set1_pd(1.0);
assert_eq_m512d(r, e);
}
@@ -2939,9 +2939,9 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_roundscale_pd() {
let a = _mm512_set1_pd(1.1);
let r = _mm512_maskz_roundscale_pd(0, a, 0);
let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
assert_eq_m512d(r, _mm512_setzero_pd());
let r = _mm512_maskz_roundscale_pd(0b11111111, a, 0);
let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0b11111111, a);
let e = _mm512_set1_pd(1.0);
assert_eq_m512d(r, e);
}
@@ -2949,7 +2949,7 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_roundscale_pd() {
let a = _mm256_set1_pd(1.1);
let r = _mm256_roundscale_pd(a, 0);
let r = _mm256_roundscale_pd::<0b00_00_00_00>(a);
let e = _mm256_set1_pd(1.0);
assert_eq_m256d(r, e);
}
@@ -2957,10 +2957,9 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_roundscale_pd() {
let a = _mm256_set1_pd(1.1);
let r = _mm256_mask_roundscale_pd(a, 0, a, 0);
let e = _mm256_set1_pd(1.1);
assert_eq_m256d(r, e);
let r = _mm256_mask_roundscale_pd(a, 0b00001111, a, 0);
let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
assert_eq_m256d(r, a);
let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00001111, a);
let e = _mm256_set1_pd(1.0);
assert_eq_m256d(r, e);
}
@@ -2968,9 +2967,9 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_roundscale_pd() {
let a = _mm256_set1_pd(1.1);
let r = _mm256_maskz_roundscale_pd(0, a, 0);
let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
assert_eq_m256d(r, _mm256_setzero_pd());
let r = _mm256_maskz_roundscale_pd(0b00001111, a, 0);
let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0b00001111, a);
let e = _mm256_set1_pd(1.0);
assert_eq_m256d(r, e);
}
@@ -2978,7 +2977,7 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_roundscale_pd() {
let a = _mm_set1_pd(1.1);
let r = _mm_roundscale_pd(a, 0);
let r = _mm_roundscale_pd::<0b00_00_00_00>(a);
let e = _mm_set1_pd(1.0);
assert_eq_m128d(r, e);
}
@@ -2986,10 +2985,10 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_roundscale_pd() {
let a = _mm_set1_pd(1.1);
let r = _mm_mask_roundscale_pd(a, 0, a, 0);
let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
let e = _mm_set1_pd(1.1);
assert_eq_m128d(r, e);
let r = _mm_mask_roundscale_pd(a, 0b00000011, a, 0);
let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00000011, a);
let e = _mm_set1_pd(1.0);
assert_eq_m128d(r, e);
}
@@ -2997,9 +2996,9 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_roundscale_pd() {
let a = _mm_set1_pd(1.1);
let r = _mm_maskz_roundscale_pd(0, a, 0);
let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
assert_eq_m128d(r, _mm_setzero_pd());
let r = _mm_maskz_roundscale_pd(0b00000011, a, 0);
let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0b00000011, a);
let e = _mm_set1_pd(1.0);
assert_eq_m128d(r, e);
}
@@ -3102,7 +3101,7 @@ mod tests {
let a = _mm512_set1_pd(f64::NAN);
let b = _mm512_set1_pd(f64::MAX);
let c = _mm512_set1_epi64(i32::MAX as i64);
let r = _mm512_fixupimm_pd(a, b, c, 5);
let r = _mm512_fixupimm_pd::<5>(a, b, c);
let e = _mm512_set1_pd(0.0);
assert_eq_m512d(r, e);
}
@@ -3112,7 +3111,7 @@ mod tests {
let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
let b = _mm512_set1_pd(f64::MAX);
let c = _mm512_set1_epi64(i32::MAX as i64);
let r = _mm512_mask_fixupimm_pd(a, 0b11110000, b, c, 5);
let r = _mm512_mask_fixupimm_pd::<5>(a, 0b11110000, b, c);
let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.);
assert_eq_m512d(r, e);
}
@@ -3122,7 +3121,7 @@ mod tests {
let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
let b = _mm512_set1_pd(f64::MAX);
let c = _mm512_set1_epi64(i32::MAX as i64);
let r = _mm512_maskz_fixupimm_pd(0b11110000, a, b, c, 5);
let r = _mm512_maskz_fixupimm_pd::<5>(0b11110000, a, b, c);
let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.);
assert_eq_m512d(r, e);
}
@@ -3132,7 +3131,7 @@ mod tests {
let a = _mm256_set1_pd(f64::NAN);
let b = _mm256_set1_pd(f64::MAX);
let c = _mm256_set1_epi64x(i32::MAX as i64);
let r = _mm256_fixupimm_pd(a, b, c, 5);
let r = _mm256_fixupimm_pd::<5>(a, b, c);
let e = _mm256_set1_pd(0.0);
assert_eq_m256d(r, e);
}
@@ -3142,7 +3141,7 @@ mod tests {
let a = _mm256_set1_pd(f64::NAN);
let b = _mm256_set1_pd(f64::MAX);
let c = _mm256_set1_epi64x(i32::MAX as i64);
let r = _mm256_mask_fixupimm_pd(a, 0b00001111, b, c, 5);
let r = _mm256_mask_fixupimm_pd::<5>(a, 0b00001111, b, c);
let e = _mm256_set1_pd(0.0);
assert_eq_m256d(r, e);
}
@@ -3152,7 +3151,7 @@ mod tests {
let a = _mm256_set1_pd(f64::NAN);
let b = _mm256_set1_pd(f64::MAX);
let c = _mm256_set1_epi64x(i32::MAX as i64);
let r = _mm256_maskz_fixupimm_pd(0b00001111, a, b, c, 5);
let r = _mm256_maskz_fixupimm_pd::<5>(0b00001111, a, b, c);
let e = _mm256_set1_pd(0.0);
assert_eq_m256d(r, e);
}
@@ -3162,7 +3161,7 @@ mod tests {
let a = _mm_set1_pd(f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_fixupimm_pd(a, b, c, 5);
let r = _mm_fixupimm_pd::<5>(a, b, c);
let e = _mm_set1_pd(0.0);
assert_eq_m128d(r, e);
}
@@ -3172,7 +3171,7 @@ mod tests {
let a = _mm_set1_pd(f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_mask_fixupimm_pd(a, 0b00000011, b, c, 5);
let r = _mm_mask_fixupimm_pd::<5>(a, 0b00000011, b, c);
let e = _mm_set1_pd(0.0);
assert_eq_m128d(r, e);
}
@@ -3182,7 +3181,7 @@ mod tests {
let a = _mm_set1_pd(f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_maskz_fixupimm_pd(0b00000011, a, b, c, 5);
let r = _mm_maskz_fixupimm_pd::<5>(0b00000011, a, b, c);
let e = _mm_set1_pd(0.0);
assert_eq_m128d(r, e);
}
@@ -3192,7 +3191,7 @@ mod tests {
let a = _mm512_set1_epi64(1 << 2);
let b = _mm512_set1_epi64(1 << 1);
let c = _mm512_set1_epi64(1 << 0);
let r = _mm512_ternarylogic_epi64(a, b, c, 8);
let r = _mm512_ternarylogic_epi64::<8>(a, b, c);
let e = _mm512_set1_epi64(0);
assert_eq_m512i(r, e);
}
@@ -3202,9 +3201,9 @@ mod tests {
let src = _mm512_set1_epi64(1 << 2);
let a = _mm512_set1_epi64(1 << 1);
let b = _mm512_set1_epi64(1 << 0);
let r = _mm512_mask_ternarylogic_epi64(src, 0, a, b, 8);
let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0, a, b);
assert_eq_m512i(r, src);
let r = _mm512_mask_ternarylogic_epi64(src, 0b11111111, a, b, 8);
let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0b11111111, a, b);
let e = _mm512_set1_epi64(0);
assert_eq_m512i(r, e);
}
@@ -3214,9 +3213,9 @@ mod tests {
let a = _mm512_set1_epi64(1 << 2);
let b = _mm512_set1_epi64(1 << 1);
let c = _mm512_set1_epi64(1 << 0);
let r = _mm512_maskz_ternarylogic_epi64(0, a, b, c, 9);
let r = _mm512_maskz_ternarylogic_epi64::<8>(0, a, b, c);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_ternarylogic_epi64(0b11111111, a, b, c, 8);
let r = _mm512_maskz_ternarylogic_epi64::<8>(0b11111111, a, b, c);
let e = _mm512_set1_epi64(0);
assert_eq_m512i(r, e);
}
@@ -3226,7 +3225,7 @@ mod tests {
let a = _mm256_set1_epi64x(1 << 2);
let b = _mm256_set1_epi64x(1 << 1);
let c = _mm256_set1_epi64x(1 << 0);
let r = _mm256_ternarylogic_epi64(a, b, c, 8);
let r = _mm256_ternarylogic_epi64::<8>(a, b, c);
let e = _mm256_set1_epi64x(0);
assert_eq_m256i(r, e);
}
@@ -3236,9 +3235,9 @@ mod tests {
let src = _mm256_set1_epi64x(1 << 2);
let a = _mm256_set1_epi64x(1 << 1);
let b = _mm256_set1_epi64x(1 << 0);
let r = _mm256_mask_ternarylogic_epi64(src, 0, a, b, 8);
let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0, a, b);
assert_eq_m256i(r, src);
let r = _mm256_mask_ternarylogic_epi64(src, 0b00001111, a, b, 8);
let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0b00001111, a, b);
let e = _mm256_set1_epi64x(0);
assert_eq_m256i(r, e);
}
@@ -3248,9 +3247,9 @@ mod tests {
let a = _mm256_set1_epi64x(1 << 2);
let b = _mm256_set1_epi64x(1 << 1);
let c = _mm256_set1_epi64x(1 << 0);
let r = _mm256_maskz_ternarylogic_epi64(0, a, b, c, 9);
let r = _mm256_maskz_ternarylogic_epi64::<9>(0, a, b, c);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_ternarylogic_epi64(0b00001111, a, b, c, 8);
let r = _mm256_maskz_ternarylogic_epi64::<8>(0b00001111, a, b, c);
let e = _mm256_set1_epi64x(0);
assert_eq_m256i(r, e);
}
@@ -3260,7 +3259,7 @@ mod tests {
let a = _mm_set1_epi64x(1 << 2);
let b = _mm_set1_epi64x(1 << 1);
let c = _mm_set1_epi64x(1 << 0);
let r = _mm_ternarylogic_epi64(a, b, c, 8);
let r = _mm_ternarylogic_epi64::<8>(a, b, c);
let e = _mm_set1_epi64x(0);
assert_eq_m128i(r, e);
}
@@ -3270,9 +3269,9 @@ mod tests {
let src = _mm_set1_epi64x(1 << 2);
let a = _mm_set1_epi64x(1 << 1);
let b = _mm_set1_epi64x(1 << 0);
let r = _mm_mask_ternarylogic_epi64(src, 0, a, b, 8);
let r = _mm_mask_ternarylogic_epi64::<8>(src, 0, a, b);
assert_eq_m128i(r, src);
let r = _mm_mask_ternarylogic_epi64(src, 0b00000011, a, b, 8);
let r = _mm_mask_ternarylogic_epi64::<8>(src, 0b00000011, a, b);
let e = _mm_set1_epi64x(0);
assert_eq_m128i(r, e);
}
@@ -3282,9 +3281,9 @@ mod tests {
let a = _mm_set1_epi64x(1 << 2);
let b = _mm_set1_epi64x(1 << 1);
let c = _mm_set1_epi64x(1 << 0);
let r = _mm_maskz_ternarylogic_epi64(0, a, b, c, 9);
let r = _mm_maskz_ternarylogic_epi64::<9>(0, a, b, c);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_ternarylogic_epi64(0b00000011, a, b, c, 8);
let r = _mm_maskz_ternarylogic_epi64::<8>(0b00000011, a, b, c);
let e = _mm_set1_epi64x(0);
assert_eq_m128i(r, e);
}
@@ -5308,10 +5307,10 @@ mod tests {
unsafe fn test_mm512_add_round_pd() {
let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
let b = _mm512_set1_pd(-1.);
let r = _mm512_add_round_pd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0);
assert_eq_m512d(r, e);
let r = _mm512_add_round_pd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999);
assert_eq_m512d(r, e);
}
@@ -5320,14 +5319,12 @@ mod tests {
unsafe fn test_mm512_mask_add_round_pd() {
let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
let b = _mm512_set1_pd(-1.);
let r = _mm512_mask_add_round_pd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0, a, b,
);
assert_eq_m512d(r, a);
let r = _mm512_mask_add_round_pd(
a,
0b11110000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
a, 0b11110000, a, b,
);
let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0);
assert_eq_m512d(r, e);
@@ -5337,13 +5334,11 @@ mod tests {
unsafe fn test_mm512_maskz_add_round_pd() {
let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
let b = _mm512_set1_pd(-1.);
let r = _mm512_maskz_add_round_pd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let r =
_mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
assert_eq_m512d(r, _mm512_setzero_pd());
let r = _mm512_maskz_add_round_pd(
0b11110000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
let r = _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
0b11110000, a, b,
);
let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0);
assert_eq_m512d(r, e);
@@ -9715,70 +9710,13 @@ mod tests {
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_pd() {
let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
let r = _mm512_shuffle_pd(
a,
b,
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
);
let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_pd() {
let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
let r = _mm512_mask_shuffle_pd(
a,
0,
a,
b,
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
);
assert_eq_m512d(r, a);
let r = _mm512_mask_shuffle_pd(
a,
0b11111111,
a,
b,
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
);
let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_pd() {
let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
let r = _mm512_maskz_shuffle_pd(
0,
a,
b,
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
);
assert_eq_m512d(r, _mm512_setzero_pd());
let r = _mm512_maskz_shuffle_pd(
0b00001111,
a,
b,
1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7,
);
let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_shuffle_pd() {
let a = _mm256_set_pd(1., 4., 5., 8.);
let b = _mm256_set_pd(2., 3., 6., 7.);
let r = _mm256_mask_shuffle_pd(a, 0, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
assert_eq_m256d(r, a);
let r = _mm256_mask_shuffle_pd(a, 0b00001111, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00001111, a, b);
let e = _mm256_set_pd(2., 1., 6., 5.);
assert_eq_m256d(r, e);
}
@@ -9787,9 +9725,9 @@ mod tests {
unsafe fn test_mm256_maskz_shuffle_pd() {
let a = _mm256_set_pd(1., 4., 5., 8.);
let b = _mm256_set_pd(2., 3., 6., 7.);
let r = _mm256_maskz_shuffle_pd(0, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
assert_eq_m256d(r, _mm256_setzero_pd());
let r = _mm256_maskz_shuffle_pd(0b00001111, a, b, 1 << 0 | 1 << 1 | 1 << 2 | 1 << 3);
let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
let e = _mm256_set_pd(2., 1., 6., 5.);
assert_eq_m256d(r, e);
}
@@ -9798,9 +9736,9 @@ mod tests {
unsafe fn test_mm_mask_shuffle_pd() {
let a = _mm_set_pd(1., 4.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_shuffle_pd(a, 0, a, b, 1 << 0 | 1 << 1);
let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
assert_eq_m128d(r, a);
let r = _mm_mask_shuffle_pd(a, 0b00000011, a, b, 1 << 0 | 1 << 1);
let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00000011, a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
@@ -9809,9 +9747,9 @@ mod tests {
unsafe fn test_mm_maskz_shuffle_pd() {
let a = _mm_set_pd(1., 4.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_shuffle_pd(0, a, b, 1 << 0 | 1 << 1);
let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
assert_eq_m128d(r, _mm_setzero_pd());
let r = _mm_maskz_shuffle_pd(0b00000011, a, b, 1 << 0 | 1 << 1);
let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0b00000011, a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}