reformat with latest rustfmt

This commit is contained in:
gnzlbg
2018-06-06 00:17:14 +02:00
committed by gnzlbg
parent c2d60b18e4
commit c3d273c980
58 changed files with 1038 additions and 1710 deletions

View File

@@ -16,36 +16,36 @@ extern "C" {
fn vsha1h_u32_(hash_e: u32) -> u32; fn vsha1h_u32_(hash_e: u32) -> u32;
#[link_name = "llvm.aarch64.crypto.sha1su0"] #[link_name = "llvm.aarch64.crypto.sha1su0"]
fn vsha1su0q_u32_( fn vsha1su0q_u32_(
w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t,
) -> uint32x4_t; ) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1su1"] #[link_name = "llvm.aarch64.crypto.sha1su1"]
fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1c"] #[link_name = "llvm.aarch64.crypto.sha1c"]
fn vsha1cq_u32_( fn vsha1cq_u32_(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t; ) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1p"] #[link_name = "llvm.aarch64.crypto.sha1p"]
fn vsha1pq_u32_( fn vsha1pq_u32_(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t; ) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1m"] #[link_name = "llvm.aarch64.crypto.sha1m"]
fn vsha1mq_u32_( fn vsha1mq_u32_(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t; ) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256h"] #[link_name = "llvm.aarch64.crypto.sha256h"]
fn vsha256hq_u32_( fn vsha256hq_u32_(
hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t; ) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256h2"] #[link_name = "llvm.aarch64.crypto.sha256h2"]
fn vsha256h2q_u32_( fn vsha256h2q_u32_(
hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t; ) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256su0"] #[link_name = "llvm.aarch64.crypto.sha256su0"]
fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256su1"] #[link_name = "llvm.aarch64.crypto.sha256su1"]
fn vsha256su1q_u32_( fn vsha256su1q_u32_(
tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t,
) -> uint32x4_t; ) -> uint32x4_t;
} }
@@ -97,7 +97,7 @@ pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1c))] #[cfg_attr(test, assert_instr(sha1c))]
pub unsafe fn vsha1cq_u32( pub unsafe fn vsha1cq_u32(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha1cq_u32_(hash_abcd, hash_e, wk) vsha1cq_u32_(hash_abcd, hash_e, wk)
} }
@@ -107,7 +107,7 @@ pub unsafe fn vsha1cq_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1m))] #[cfg_attr(test, assert_instr(sha1m))]
pub unsafe fn vsha1mq_u32( pub unsafe fn vsha1mq_u32(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha1mq_u32_(hash_abcd, hash_e, wk) vsha1mq_u32_(hash_abcd, hash_e, wk)
} }
@@ -117,7 +117,7 @@ pub unsafe fn vsha1mq_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1p))] #[cfg_attr(test, assert_instr(sha1p))]
pub unsafe fn vsha1pq_u32( pub unsafe fn vsha1pq_u32(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha1pq_u32_(hash_abcd, hash_e, wk) vsha1pq_u32_(hash_abcd, hash_e, wk)
} }
@@ -127,7 +127,7 @@ pub unsafe fn vsha1pq_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1su0))] #[cfg_attr(test, assert_instr(sha1su0))]
pub unsafe fn vsha1su0q_u32( pub unsafe fn vsha1su0q_u32(
w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha1su0q_u32_(w0_3, w4_7, w8_11) vsha1su0q_u32_(w0_3, w4_7, w8_11)
} }
@@ -137,7 +137,7 @@ pub unsafe fn vsha1su0q_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1su1))] #[cfg_attr(test, assert_instr(sha1su1))]
pub unsafe fn vsha1su1q_u32( pub unsafe fn vsha1su1q_u32(
tw0_3: uint32x4_t, w12_15: uint32x4_t tw0_3: uint32x4_t, w12_15: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha1su1q_u32_(tw0_3, w12_15) vsha1su1q_u32_(tw0_3, w12_15)
} }
@@ -147,7 +147,7 @@ pub unsafe fn vsha1su1q_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256h))] #[cfg_attr(test, assert_instr(sha256h))]
pub unsafe fn vsha256hq_u32( pub unsafe fn vsha256hq_u32(
hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha256hq_u32_(hash_abcd, hash_efgh, wk) vsha256hq_u32_(hash_abcd, hash_efgh, wk)
} }
@@ -157,7 +157,7 @@ pub unsafe fn vsha256hq_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256h2))] #[cfg_attr(test, assert_instr(sha256h2))]
pub unsafe fn vsha256h2q_u32( pub unsafe fn vsha256h2q_u32(
hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha256h2q_u32_(hash_efgh, hash_abcd, wk) vsha256h2q_u32_(hash_efgh, hash_abcd, wk)
} }
@@ -167,7 +167,7 @@ pub unsafe fn vsha256h2q_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256su0))] #[cfg_attr(test, assert_instr(sha256su0))]
pub unsafe fn vsha256su0q_u32( pub unsafe fn vsha256su0q_u32(
w0_3: uint32x4_t, w4_7: uint32x4_t w0_3: uint32x4_t, w4_7: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha256su0q_u32_(w0_3, w4_7) vsha256su0q_u32_(w0_3, w4_7)
} }
@@ -177,7 +177,7 @@ pub unsafe fn vsha256su0q_u32(
#[target_feature(enable = "crypto")] #[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256su1))] #[cfg_attr(test, assert_instr(sha256su1))]
pub unsafe fn vsha256su1q_u32( pub unsafe fn vsha256su1q_u32(
tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t,
) -> uint32x4_t { ) -> uint32x4_t {
vsha256su1q_u32_(tw0_3, w8_11, w12_15) vsha256su1q_u32_(tw0_3, w8_11, w12_15)
} }
@@ -199,22 +199,8 @@ mod tests {
assert_eq!( assert_eq!(
r, r,
u8x16::new( u8x16::new(
124, 124, 123, 124, 118, 124, 123, 124, 197, 124, 123, 124, 118,
123, 124, 123, 124, 197
124,
118,
124,
123,
124,
197,
124,
123,
124,
118,
124,
123,
124,
197
) )
); );
} }
@@ -229,22 +215,7 @@ mod tests {
assert_eq!( assert_eq!(
r, r,
u8x16::new( u8x16::new(
9, 9, 213, 9, 251, 9, 213, 9, 56, 9, 213, 9, 251, 9, 213, 9, 56
213,
9,
251,
9,
213,
9,
56,
9,
213,
9,
251,
9,
213,
9,
56
) )
); );
} }
@@ -256,24 +227,7 @@ mod tests {
let r: u8x16 = vaesmcq_u8(data).into_bits(); let r: u8x16 = vaesmcq_u8(data).into_bits();
assert_eq!( assert_eq!(
r, r,
u8x16::new( u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30)
3,
4,
9,
10,
15,
8,
21,
30,
3,
4,
9,
10,
15,
8,
21,
30
)
); );
} }
@@ -285,22 +239,8 @@ mod tests {
assert_eq!( assert_eq!(
r, r,
u8x16::new( u8x16::new(
43, 43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80,
60, 125, 70
33,
50,
103,
80,
125,
70,
43,
60,
33,
50,
103,
80,
125,
70
) )
); );
} }

View File

@@ -546,7 +546,6 @@ pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vpmaxq_f64_(a, b) vpmaxq_f64_(a, b)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use coresimd::aarch64::*; use coresimd::aarch64::*;
@@ -800,20 +799,11 @@ mod tests {
#[simd_test(enable = "neon")] #[simd_test(enable = "neon")]
unsafe fn test_vpminq_s8() { unsafe fn test_vpminq_s8() {
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let a = i8x16::new( let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
1, -2, 3, -4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let b = i8x16::new( let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let e = i8x16::new( let e = i8x16::new(-2, -4, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
-2, -4, 5, 7, 1, 3, 5, 7,
0, 2, 4, 6, 0, 2, 4, 6,
);
let r: i8x16 = vpminq_s8(a.into_bits(), b.into_bits()).into_bits(); let r: i8x16 = vpminq_s8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e); assert_eq!(r, e);
} }
@@ -839,20 +829,11 @@ mod tests {
#[simd_test(enable = "neon")] #[simd_test(enable = "neon")]
unsafe fn test_vpminq_u8() { unsafe fn test_vpminq_u8() {
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let a = u8x16::new( let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let b = u8x16::new( let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let e = u8x16::new( let e = u8x16::new(1, 3, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
1, 3, 5, 7, 1, 3, 5, 7,
0, 2, 4, 6, 0, 2, 4, 6,
);
let r: u8x16 = vpminq_u8(a.into_bits(), b.into_bits()).into_bits(); let r: u8x16 = vpminq_u8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e); assert_eq!(r, e);
} }
@@ -896,20 +877,11 @@ mod tests {
#[simd_test(enable = "neon")] #[simd_test(enable = "neon")]
unsafe fn test_vpmaxq_s8() { unsafe fn test_vpmaxq_s8() {
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let a = i8x16::new( let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
1, -2, 3, -4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let b = i8x16::new( let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let e = i8x16::new( let e = i8x16::new(1, 3, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
1, 3, 6, 8, 2, 4, 6, 8,
3, 5, 7, 9, 3, 5, 7, 9,
);
let r: i8x16 = vpmaxq_s8(a.into_bits(), b.into_bits()).into_bits(); let r: i8x16 = vpmaxq_s8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e); assert_eq!(r, e);
} }
@@ -935,20 +907,11 @@ mod tests {
#[simd_test(enable = "neon")] #[simd_test(enable = "neon")]
unsafe fn test_vpmaxq_u8() { unsafe fn test_vpmaxq_u8() {
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let a = u8x16::new( let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let b = u8x16::new( let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
#[cfg_attr(rustfmt, skip)] #[cfg_attr(rustfmt, skip)]
let e = u8x16::new( let e = u8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
2, 4, 6, 8, 2, 4, 6, 8,
3, 5, 7, 9, 3, 5, 7, 9,
);
let r: u8x16 = vpmaxq_u8(a.into_bits(), b.into_bits()).into_bits(); let r: u8x16 = vpmaxq_u8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e); assert_eq!(r, e);
} }

View File

@@ -19,11 +19,19 @@ pub use self::v7::*;
// NEON is supported on AArch64, and on ARM when built with the v7 and neon // NEON is supported on AArch64, and on ARM when built with the v7 and neon
// features. Building ARM without neon produces incorrect codegen. // features. Building ARM without neon produces incorrect codegen.
#[cfg(any(target_arch = "aarch64", #[cfg(
any(
target_arch = "aarch64",
all(target_feature = "v7", target_feature = "neon"), all(target_feature = "v7", target_feature = "neon"),
dox))] dox
)
)]
mod neon; mod neon;
#[cfg(any(target_arch = "aarch64", #[cfg(
any(
target_arch = "aarch64",
all(target_feature = "v7", target_feature = "neon"), all(target_feature = "v7", target_feature = "neon"),
dox))] dox
)
)]
pub use self::neon::*; pub use self::neon::*;

View File

@@ -366,52 +366,82 @@ impl_from_bits_!(
#[allow(improper_ctypes)] #[allow(improper_ctypes)]
extern "C" { extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32"
)]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
fn frsqrte_v2f32(a: float32x2_t) -> float32x2_t; fn frsqrte_v2f32(a: float32x2_t) -> float32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8"
)]
fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16"
)]
fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32"
)]
fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8"
)]
fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16"
)]
fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32"
)]
fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32"
)]
fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8"
)]
fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16"
)]
fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32"
)]
fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8"
)]
fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16"
)]
fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32"
)]
fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] #[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32"
)]
fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
} }
@@ -916,7 +946,6 @@ pub unsafe fn vpmax_f32 (a: float32x2_t, b: float32x2_t) -> float32x2_t {
vpmaxf_v2f32(a, b) vpmaxf_v2f32(a, b)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use coresimd::arm::*; use coresimd::arm::*;

View File

@@ -75,8 +75,7 @@ impl_from_bits_!(
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(
i8x16: i8x16: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -114,8 +113,7 @@ impl_from_bits_!(
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(
u8x16: u8x16: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -135,11 +133,7 @@ impl_from_bits_!(
vector_bool_short, vector_bool_short,
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(m8x16: vector_bool_char, vector_bool_short, vector_bool_int);
m8x16: vector_bool_char,
vector_bool_short,
vector_bool_int
);
impl_from_bits_!( impl_from_bits_!(
vector_signed_short: u64x2, vector_signed_short: u64x2,
@@ -166,8 +160,7 @@ impl_from_bits_!(
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(
i16x8: i16x8: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -204,8 +197,7 @@ impl_from_bits_!(
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(
u16x8: u16x8: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -251,8 +243,7 @@ impl_from_bits_!(
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(
i32x4: i32x4: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -289,8 +280,7 @@ impl_from_bits_!(
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(
u32x4: u32x4: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -345,8 +335,7 @@ impl_from_bits_!(
vector_bool_int vector_bool_int
); );
impl_from_bits_!( impl_from_bits_!(
f32x4: f32x4: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -361,9 +350,13 @@ impl_from_bits_!(
#[allow(improper_ctypes)] #[allow(improper_ctypes)]
extern "C" { extern "C" {
#[link_name = "llvm.ppc.altivec.vperm"] #[link_name = "llvm.ppc.altivec.vperm"]
fn vperm(a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char) -> vector_signed_int; fn vperm(
a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char,
) -> vector_signed_int;
#[link_name = "llvm.ppc.altivec.vmhaddshs"] #[link_name = "llvm.ppc.altivec.vmhaddshs"]
fn vmhaddshs(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short; fn vmhaddshs(
a: vector_signed_short, b: vector_signed_short, c: vector_signed_short,
) -> vector_signed_short;
} }
mod sealed { mod sealed {
@@ -373,7 +366,9 @@ mod sealed {
#[inline] #[inline]
#[target_feature(enable = "altivec")] #[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vperm))] #[cfg_attr(test, assert_instr(vperm))]
unsafe fn vec_vperm(a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char) -> vector_signed_int { unsafe fn vec_vperm(
a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char,
) -> vector_signed_int {
vperm(a, b, c) vperm(a, b, c)
} }
@@ -703,7 +698,6 @@ where
a.vec_add(b) a.vec_add(b)
} }
/// Endian-biased intrinsics /// Endian-biased intrinsics
#[cfg(target_endian = "little")] #[cfg(target_endian = "little")]
mod endian { mod endian {
@@ -718,8 +712,10 @@ mod endian {
// vperm has big-endian bias // vperm has big-endian bias
// //
// Xor the mask and flip the arguments // Xor the mask and flip the arguments
let d = u8x16::new(255, 255, 255, 255, 255, 255, 255, 255, let d = u8x16::new(
255, 255, 255, 255, 255, 255, 255, 255).into_bits(); 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255,
).into_bits();
let c = simd_xor(c, d); let c = simd_xor(c, d);
b.vec_vperm(a, c) b.vec_vperm(a, c)
@@ -730,7 +726,9 @@ mod endian {
#[inline] #[inline]
#[target_feature(enable = "altivec")] #[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vmhaddshs))] #[cfg_attr(test, assert_instr(vmhaddshs))]
pub unsafe fn vec_madds(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short { pub unsafe fn vec_madds(
a: vector_signed_short, b: vector_signed_short, c: vector_signed_short,
) -> vector_signed_short {
vmhaddshs(a, b, c) vmhaddshs(a, b, c)
} }
@@ -850,9 +848,20 @@ mod tests {
#[simd_test(enable = "altivec")] #[simd_test(enable = "altivec")]
unsafe fn test_vec_madds() { unsafe fn test_vec_madds() {
let a: vector_signed_short = i16x8::new(0 * 256, 1 * 256, 2 * 256, 3 * 256, 4 * 256, 5 * 256, 6 * 256, 7 * 256).into_bits(); let a: vector_signed_short = i16x8::new(
let b: vector_signed_short = i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits(); 0 * 256,
let c: vector_signed_short = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7).into_bits(); 1 * 256,
2 * 256,
3 * 256,
4 * 256,
5 * 256,
6 * 256,
7 * 256,
).into_bits();
let b: vector_signed_short =
i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits();
let c: vector_signed_short =
i16x8::new(0, 1, 2, 3, 4, 5, 6, 7).into_bits();
let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21); let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21);

View File

@@ -1,6 +1,7 @@
//! PowerPC 64 //! PowerPC 64
//! //!
//! The reference is the [64-Bit ELF V2 ABI Specification - Power Architecture]. //! The reference is the [64-Bit ELF V2 ABI Specification - Power
//! Architecture].
//! //!
//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf //! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf

View File

@@ -63,8 +63,7 @@ impl_from_bits_!(
vector_double vector_double
); );
impl_from_bits_!( impl_from_bits_!(
i64x2: i64x2: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -109,8 +108,7 @@ impl_from_bits_!(
vector_double vector_double
); );
impl_from_bits_!( impl_from_bits_!(
u64x2: u64x2: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -155,8 +153,7 @@ impl_from_bits_!(
vector_bool_long vector_bool_long
); );
impl_from_bits_!( impl_from_bits_!(
f64x2: f64x2: vector_signed_char,
vector_signed_char,
vector_unsigned_char, vector_unsigned_char,
vector_bool_char, vector_bool_char,
vector_signed_short, vector_signed_short,
@@ -234,8 +231,12 @@ mod sealed {
// xxpermdi has an big-endian bias and extended mnemonics // xxpermdi has an big-endian bias and extended mnemonics
#[inline] #[inline]
#[target_feature(enable = "vsx")] #[target_feature(enable = "vsx")]
#[cfg_attr(all(test, target_endian="little"), assert_instr(xxmrgld, dm = 0x0))] #[cfg_attr(
#[cfg_attr(all(test, target_endian="big"), assert_instr(xxspltd, dm = 0x0))] all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0)
)]
#[cfg_attr(
all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0)
)]
unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 { unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 {
match dm & 0b11 { match dm & 0b11 {
0 => simd_shuffle2(a, b, [0b00, 0b10]), 0 => simd_shuffle2(a, b, [0b00, 0b10]),

View File

@@ -165,7 +165,6 @@ macro_rules! impl_float_arithmetic_reductions {
}; };
} }
#[cfg(test)] #[cfg(test)]
macro_rules! test_int_arithmetic_reductions { macro_rules! test_int_arithmetic_reductions {
($id:ident, $elem_ty:ident) => { ($id:ident, $elem_ty:ident) => {
@@ -237,10 +236,7 @@ macro_rules! test_float_arithmetic_reductions {
let v = $id::splat(1 as $elem_ty); let v = $id::splat(1 as $elem_ty);
assert_eq!(v.sum(), $id::lanes() as $elem_ty); assert_eq!(v.sum(), $id::lanes() as $elem_ty);
let v = alternating(2); let v = alternating(2);
assert_eq!( assert_eq!(v.sum(), ($id::lanes() / 2 + $id::lanes()) as $elem_ty);
v.sum(),
($id::lanes() / 2 + $id::lanes()) as $elem_ty
);
} }
#[test] #[test]
fn product() { fn product() {

View File

@@ -59,7 +59,6 @@ macro_rules! impl_float_math {
macro_rules! test_float_math { macro_rules! test_float_math {
($id:ident, $elem_ty:ident) => { ($id:ident, $elem_ty:ident) => {
fn sqrt2() -> $elem_ty { fn sqrt2() -> $elem_ty {
match ::mem::size_of::<$elem_ty>() { match ::mem::size_of::<$elem_ty>() {
4 => 1.4142135 as $elem_ty, 4 => 1.4142135 as $elem_ty,

View File

@@ -46,7 +46,7 @@ macro_rules! impl_load_store {
/// undefined. /// undefined.
#[inline] #[inline]
pub unsafe fn store_aligned_unchecked( pub unsafe fn store_aligned_unchecked(
self, slice: &mut [$elem_ty] self, slice: &mut [$elem_ty],
) { ) {
*(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) =
self; self;
@@ -59,7 +59,7 @@ macro_rules! impl_load_store {
/// If `slice.len() < Self::lanes()` the behavior is undefined. /// If `slice.len() < Self::lanes()` the behavior is undefined.
#[inline] #[inline]
pub unsafe fn store_unaligned_unchecked( pub unsafe fn store_unaligned_unchecked(
self, slice: &mut [$elem_ty] self, slice: &mut [$elem_ty],
) { ) {
let target_ptr = let target_ptr =
slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
@@ -121,7 +121,7 @@ macro_rules! impl_load_store {
/// If `slice.len() < Self::lanes()` the behavior is undefined. /// If `slice.len() < Self::lanes()` the behavior is undefined.
#[inline] #[inline]
pub unsafe fn load_unaligned_unchecked( pub unsafe fn load_unaligned_unchecked(
slice: &[$elem_ty] slice: &[$elem_ty],
) -> Self { ) -> Self {
use mem::size_of; use mem::size_of;
let target_ptr = let target_ptr =
@@ -238,7 +238,8 @@ macro_rules! test_load_store {
data: [0 as $elem_ty; 2 * $id::lanes()], data: [0 as $elem_ty; 2 * $id::lanes()],
}; };
// offset the aligned data by one byte: // offset the aligned data by one byte:
let s: &mut [u8; 2 * $id::lanes() let s: &mut [u8; 2
* $id::lanes()
* mem::size_of::<$elem_ty>()] = * mem::size_of::<$elem_ty>()] =
mem::transmute(&mut aligned.data); mem::transmute(&mut aligned.data);
let s: &mut [$elem_ty] = slice::from_raw_parts_mut( let s: &mut [$elem_ty] = slice::from_raw_parts_mut(
@@ -296,7 +297,8 @@ macro_rules! test_load_store {
data: [0 as $elem_ty; 2 * $id::lanes()], data: [0 as $elem_ty; 2 * $id::lanes()],
}; };
// offset the aligned data by one byte: // offset the aligned data by one byte:
let s: &[u8; 2 * $id::lanes() let s: &[u8; 2
* $id::lanes()
* mem::size_of::<$elem_ty>()] = * mem::size_of::<$elem_ty>()] =
mem::transmute(&aligned.data); mem::transmute(&aligned.data);
let s: &[$elem_ty] = slice::from_raw_parts( let s: &[$elem_ty] = slice::from_raw_parts(

View File

@@ -41,18 +41,7 @@ macro_rules! impl_shifts {
macro_rules! impl_all_scalar_shifts { macro_rules! impl_all_scalar_shifts {
($id:ident, $elem_ty:ident) => { ($id:ident, $elem_ty:ident) => {
impl_shifts!( impl_shifts!(
$id, $id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize
$elem_ty,
u8,
u16,
u32,
u64,
usize,
i8,
i16,
i32,
i64,
isize
); );
}; };
} }
@@ -125,18 +114,7 @@ macro_rules! test_shift_ops {
macro_rules! test_all_scalar_shift_ops { macro_rules! test_all_scalar_shift_ops {
($id:ident, $elem_ty:ident) => { ($id:ident, $elem_ty:ident) => {
test_shift_ops!( test_shift_ops!(
$id, $id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize
$elem_ty,
u8,
u16,
u32,
u64,
usize,
i8,
i16,
i32,
i64,
isize
); );
}; };
} }

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fabs {
unsafe { $fn(self) } unsafe { $fn(self) }
} }
} }
} };
} }
impl_fabs!(f32x2: abs_v2f32); impl_fabs!(f32x2: abs_v2f32);

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fcos {
unsafe { $fn(self) } unsafe { $fn(self) }
} }
} }
} };
} }
impl_fcos!(f32x2: cos_v2f32); impl_fcos!(f32x2: cos_v2f32);

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fma {
unsafe { $fn(self, y, z) } unsafe { $fn(self, y, z) }
} }
} }
} };
} }
impl_fma!(f32x2: fma_v2f32); impl_fma!(f32x2: fma_v2f32);

View File

@@ -25,11 +25,13 @@ macro_rules! default_impl {
impl All for $id { impl All for $id {
#[inline] #[inline]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
#[cfg(not(target_arch = "aarch64"))] { #[cfg(not(target_arch = "aarch64"))]
{
use coresimd::simd_llvm::simd_reduce_all; use coresimd::simd_llvm::simd_reduce_all;
simd_reduce_all(self) simd_reduce_all(self)
} }
#[cfg(target_arch = "aarch64")] { #[cfg(target_arch = "aarch64")]
{
// FIXME: Broken on AArch64 // FIXME: Broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796 // https://bugs.llvm.org/show_bug.cgi?id=36796
self.and() self.and()
@@ -40,11 +42,13 @@ macro_rules! default_impl {
impl Any for $id { impl Any for $id {
#[inline] #[inline]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
#[cfg(not(target_arch = "aarch64"))] { #[cfg(not(target_arch = "aarch64"))]
{
use coresimd::simd_llvm::simd_reduce_any; use coresimd::simd_llvm::simd_reduce_any;
simd_reduce_any(self) simd_reduce_any(self)
} }
#[cfg(target_arch = "aarch64")] { #[cfg(target_arch = "aarch64")]
{
// FIXME: Broken on AArch64 // FIXME: Broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796 // https://bugs.llvm.org/show_bug.cgi?id=36796
self.or() self.or()
@@ -63,7 +67,12 @@ macro_rules! default_impl {
// or floating point vectors, we can't currently work around this yet. The // or floating point vectors, we can't currently work around this yet. The
// performance impact for this shouldn't be large, but this is filled as: // performance impact for this shouldn't be large, but this is filled as:
// https://bugs.llvm.org/show_bug.cgi?id=37087 // https://bugs.llvm.org/show_bug.cgi?id=37087
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))] #[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2"
)
)]
macro_rules! x86_128_sse2_movemask_impl { macro_rules! x86_128_sse2_movemask_impl {
($id:ident) => { ($id:ident) => {
impl All for $id { impl All for $id {
@@ -71,13 +80,15 @@ macro_rules! x86_128_sse2_movemask_impl {
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_epi8; use coresimd::arch::x86::_mm_movemask_epi8;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_epi8; use coresimd::arch::x86_64::_mm_movemask_epi8;
// _mm_movemask_epi8(a) creates a 16bit mask containing the most // _mm_movemask_epi8(a) creates a 16bit mask containing the
// significant bit of each byte of `a`. If all bits are set, // most significant bit of each byte of `a`. If all
// then all 16 lanes of the mask are true. // bits are set, then all 16 lanes of the mask are
_mm_movemask_epi8(::mem::transmute(self)) == u16::max_value() as i32 // true.
_mm_movemask_epi8(::mem::transmute(self))
== u16::max_value() as i32
} }
} }
impl Any for $id { impl Any for $id {
@@ -85,14 +96,14 @@ macro_rules! x86_128_sse2_movemask_impl {
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_epi8; use coresimd::arch::x86::_mm_movemask_epi8;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_epi8; use coresimd::arch::x86_64::_mm_movemask_epi8;
_mm_movemask_epi8(::mem::transmute(self)) != 0 _mm_movemask_epi8(::mem::transmute(self)) != 0
} }
} }
} };
} }
// On x86 with AVX we use _mm256_testc_si256 and _mm256_testz_si256. // On x86 with AVX we use _mm256_testc_si256 and _mm256_testz_si256.
@@ -103,7 +114,12 @@ macro_rules! x86_128_sse2_movemask_impl {
// integer or floating point vectors, we can't currently work around this yet. // integer or floating point vectors, we can't currently work around this yet.
// //
// TODO: investigate perf impact and fill LLVM bugs as necessary. // TODO: investigate perf impact and fill LLVM bugs as necessary.
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx"))] #[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx"
)
)]
macro_rules! x86_256_avx_test_impl { macro_rules! x86_256_avx_test_impl {
($id:ident) => { ($id:ident) => {
impl All for $id { impl All for $id {
@@ -111,11 +127,13 @@ macro_rules! x86_256_avx_test_impl {
#[target_feature(enable = "avx")] #[target_feature(enable = "avx")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm256_testc_si256; use coresimd::arch::x86::_mm256_testc_si256;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm256_testc_si256; use coresimd::arch::x86_64::_mm256_testc_si256;
_mm256_testc_si256(::mem::transmute(self), _mm256_testc_si256(
::mem::transmute($id::splat(true))) != 0 ::mem::transmute(self),
::mem::transmute($id::splat(true)),
) != 0
} }
} }
impl Any for $id { impl Any for $id {
@@ -123,20 +141,27 @@ macro_rules! x86_256_avx_test_impl {
#[target_feature(enable = "avx")] #[target_feature(enable = "avx")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm256_testz_si256; use coresimd::arch::x86::_mm256_testz_si256;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm256_testz_si256; use coresimd::arch::x86_64::_mm256_testz_si256;
_mm256_testz_si256(::mem::transmute(self), _mm256_testz_si256(
::mem::transmute(self)) == 0 ::mem::transmute(self),
} ::mem::transmute(self),
) == 0
} }
} }
};
} }
// On x86 with SSE2 all/any for 256-bit wide vectors is implemented by executing // On x86 with SSE2 all/any for 256-bit wide vectors is implemented by
// the algorithm for 128-bit on the higher and lower elements of the vector // executing the algorithm for 128-bit on the higher and lower elements of the
// independently. // vector independently.
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))] #[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2"
)
)]
macro_rules! x86_256_sse2_impl { macro_rules! x86_256_sse2_impl {
($id:ident, $v128:ident) => { ($id:ident, $v128:ident) => {
impl All for $id { impl All for $id {
@@ -146,7 +171,7 @@ macro_rules! x86_256_sse2_impl {
unsafe { unsafe {
union U { union U {
halves: ($v128, $v128), halves: ($v128, $v128),
vec: $id vec: $id,
} }
let halves = U { vec: self }.halves; let halves = U { vec: self }.halves;
halves.0.all() && halves.1.all() halves.0.all() && halves.1.all()
@@ -160,14 +185,14 @@ macro_rules! x86_256_sse2_impl {
unsafe { unsafe {
union U { union U {
halves: ($v128, $v128), halves: ($v128, $v128),
vec: $id vec: $id,
} }
let halves = U { vec: self }.halves; let halves = U { vec: self }.halves;
halves.0.any() || halves.1.any() halves.0.any() || halves.1.any()
} }
} }
} }
} };
} }
// Implementation for 64-bit wide masks on x86. // Implementation for 64-bit wide masks on x86.
@@ -179,13 +204,14 @@ macro_rules! x86_64_mmx_movemask_impl {
#[target_feature(enable = "mmx")] #[target_feature(enable = "mmx")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_pi8; use coresimd::arch::x86::_mm_movemask_pi8;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_pi8; use coresimd::arch::x86_64::_mm_movemask_pi8;
// _mm_movemask_pi8(a) creates an 8bit mask containing the most // _mm_movemask_pi8(a) creates an 8bit mask containing the most
// significant bit of each byte of `a`. If all bits are set, // significant bit of each byte of `a`. If all bits are set,
// then all 8 lanes of the mask are true. // then all 8 lanes of the mask are true.
_mm_movemask_pi8(::mem::transmute(self)) == u8::max_value() as i32 _mm_movemask_pi8(::mem::transmute(self))
== u8::max_value() as i32
} }
} }
impl Any for $id { impl Any for $id {
@@ -193,14 +219,14 @@ macro_rules! x86_64_mmx_movemask_impl {
#[target_feature(enable = "mmx")] #[target_feature(enable = "mmx")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_pi8; use coresimd::arch::x86::_mm_movemask_pi8;
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_pi8; use coresimd::arch::x86_64::_mm_movemask_pi8;
_mm_movemask_pi8(::mem::transmute(self)) != 0 _mm_movemask_pi8(::mem::transmute(self)) != 0
} }
} }
} };
} }
// Implementation for 128-bit wide masks on x86 // Implementation for 128-bit wide masks on x86
@@ -214,7 +240,7 @@ macro_rules! x86_128_impl {
default_impl!($id); default_impl!($id);
} }
} }
} };
} }
// Implementation for 256-bit wide masks on x86 // Implementation for 256-bit wide masks on x86
@@ -230,22 +256,25 @@ macro_rules! x86_256_impl {
default_impl!($id); default_impl!($id);
} }
} }
} };
} }
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding // Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit wide two-element vectors. // minimum/maximum of adjacent pairs) for 64-bit wide two-element vectors.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] #[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_64_x2_v7_neon_impl { macro_rules! arm_64_x2_v7_neon_impl {
($id:ident, $vpmin:ident, $vpmax:ident) => { ($id:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id { impl All for $id {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin; use coresimd::arch::arm::$vpmin;
use ::mem::transmute; use mem::transmute;
// pmin((a, b), (-,-)) => (b, -).0 => b // pmin((a, b), (-,-)) => (b, -).0 => b
let tmp: $id = transmute($vpmin(transmute(self), ::mem::uninitialized())); let tmp: $id =
transmute($vpmin(transmute(self), ::mem::uninitialized()));
tmp.extract(0) tmp.extract(0)
} }
} }
@@ -253,27 +282,30 @@ macro_rules! arm_64_x2_v7_neon_impl {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax; use coresimd::arch::arm::$vpmax;
use ::mem::transmute; use mem::transmute;
// pmax((a, b), (-,-)) => (b, -).0 => b // pmax((a, b), (-,-)) => (b, -).0 => b
let tmp: $id = transmute($vpmax(transmute(self), ::mem::uninitialized())); let tmp: $id =
transmute($vpmax(transmute(self), ::mem::uninitialized()));
tmp.extract(0) tmp.extract(0)
} }
} }
} };
} }
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding // Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit wide four-element vectors. // minimum/maximum of adjacent pairs) for 64-bit wide four-element vectors.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] #[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_64_x4_v7_neon_impl { macro_rules! arm_64_x4_v7_neon_impl {
($id:ident, $vpmin:ident, $vpmax:ident) => { ($id:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id { impl All for $id {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin; use coresimd::arch::arm::$vpmin;
use ::mem::transmute; use mem::transmute;
// tmp = pmin((a, b, c, d), (-,-,-,-)) => (a, c, -, -) // tmp = pmin((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
let tmp = $vpmin(transmute(self), ::mem::uninitialized()); let tmp = $vpmin(transmute(self), ::mem::uninitialized());
// tmp = pmin((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c // tmp = pmin((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
@@ -285,8 +317,8 @@ macro_rules! arm_64_x4_v7_neon_impl {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax; use coresimd::arch::arm::$vpmax;
use ::mem::transmute; use mem::transmute;
// tmp = pmax((a, b, c, d), (-,-,-,-)) => (a, c, -, -) // tmp = pmax((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
let tmp = $vpmax(transmute(self), ::mem::uninitialized()); let tmp = $vpmax(transmute(self), ::mem::uninitialized());
// tmp = pmax((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c // tmp = pmax((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
@@ -294,20 +326,22 @@ macro_rules! arm_64_x4_v7_neon_impl {
tmp.extract(0) tmp.extract(0)
} }
} }
} };
} }
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding // Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit wide eight-element vectors. // minimum/maximum of adjacent pairs) for 64-bit wide eight-element vectors.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] #[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_64_x8_v7_neon_impl { macro_rules! arm_64_x8_v7_neon_impl {
($id:ident, $vpmin:ident, $vpmax:ident) => { ($id:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id { impl All for $id {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin; use coresimd::arch::arm::$vpmin;
use ::mem::transmute; use mem::transmute;
// tmp = pmin( // tmp = pmin(
// (a, b, c, d, e, f, g, h), // (a, b, c, d, e, f, g, h),
// (-, -, -, -, -, -, -, -) // (-, -, -, -, -, -, -, -)
@@ -330,8 +364,8 @@ macro_rules! arm_64_x8_v7_neon_impl {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax; use coresimd::arch::arm::$vpmax;
use ::mem::transmute; use mem::transmute;
// tmp = pmax( // tmp = pmax(
// (a, b, c, d, e, f, g, h), // (a, b, c, d, e, f, g, h),
// (-, -, -, -, -, -, -, -) // (-, -, -, -, -, -, -, -)
@@ -350,28 +384,32 @@ macro_rules! arm_64_x8_v7_neon_impl {
tmp.extract(0) tmp.extract(0)
} }
} }
};
} }
}
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding // Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit or 128-bit wide vectors with // minimum/maximum of adjacent pairs) for 64-bit or 128-bit wide vectors with
// more than two elements. // more than two elements.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))] #[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_128_v7_neon_impl { macro_rules! arm_128_v7_neon_impl {
($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id { impl All for $id {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin; use coresimd::arch::arm::$vpmin;
use ::mem::transmute; use mem::transmute;
union U { union U {
halves: ($half, $half), halves: ($half, $half),
vec: $id vec: $id,
} }
let halves = U { vec: self }.halves; let halves = U { vec: self }.halves;
let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1))); let h: $half = transmute($vpmin(
transmute(halves.0),
transmute(halves.1),
));
h.all() h.all()
} }
} }
@@ -379,18 +417,21 @@ macro_rules! arm_128_v7_neon_impl {
#[inline] #[inline]
#[target_feature(enable = "v7,neon")] #[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax; use coresimd::arch::arm::$vpmax;
use ::mem::transmute; use mem::transmute;
union U { union U {
halves: ($half, $half), halves: ($half, $half),
vec: $id vec: $id,
} }
let halves = U { vec: self }.halves; let halves = U { vec: self }.halves;
let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1))); let h: $half = transmute($vpmax(
transmute(halves.0),
transmute(halves.1),
));
h.any() h.any()
} }
} }
} };
} }
// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector // Implementation for AArch64 + NEON using vmin and vmax (horizontal vector
@@ -402,7 +443,7 @@ macro_rules! aarch64_128_neon_impl {
#[inline] #[inline]
#[target_feature(enable = "neon")] #[target_feature(enable = "neon")]
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
use ::coresimd::arch::aarch64::$vmin; use coresimd::arch::aarch64::$vmin;
$vmin(::mem::transmute(self)) != 0 $vmin(::mem::transmute(self)) != 0
} }
} }
@@ -410,11 +451,11 @@ macro_rules! aarch64_128_neon_impl {
#[inline] #[inline]
#[target_feature(enable = "neon")] #[target_feature(enable = "neon")]
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
use ::coresimd::arch::aarch64::$vmax; use coresimd::arch::aarch64::$vmax;
$vmax(::mem::transmute(self)) != 0 $vmax(::mem::transmute(self)) != 0
} }
} }
} };
} }
// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector // Implementation for AArch64 + NEON using vmin and vmax (horizontal vector
@@ -431,9 +472,12 @@ macro_rules! aarch64_64_neon_impl {
unsafe fn all(self) -> bool { unsafe fn all(self) -> bool {
union U { union U {
halves: ($id, $id), halves: ($id, $id),
vec: $vec128 vec: $vec128,
} }
U { halves: (self, self) }.vec.all() U {
halves: (self, self),
}.vec
.all()
} }
} }
impl Any for $id { impl Any for $id {
@@ -442,12 +486,15 @@ macro_rules! aarch64_64_neon_impl {
unsafe fn any(self) -> bool { unsafe fn any(self) -> bool {
union U { union U {
halves: ($id, $id), halves: ($id, $id),
vec: $vec128 vec: $vec128,
}
U { halves: (self, self) }.vec.any()
} }
U {
halves: (self, self),
}.vec
.any()
} }
} }
};
} }
macro_rules! impl_mask_all_any { macro_rules! impl_mask_all_any {

View File

@@ -5,8 +5,8 @@ pub mod wrapping;
pub mod masks_reductions; pub mod masks_reductions;
pub mod sqrt;
pub mod abs; pub mod abs;
pub mod cos;
pub mod fma; pub mod fma;
pub mod sin; pub mod sin;
pub mod cos; pub mod sqrt;

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fsin {
unsafe { $fn(self) } unsafe { $fn(self) }
} }
} }
} };
} }
impl_fsin!(f32x2: sin_v2f32); impl_fsin!(f32x2: sin_v2f32);

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fsqrt {
unsafe { $fn(self) } unsafe { $fn(self) }
} }
} }
} };
} }
impl_fsqrt!(f32x2: sqrt_v2f32); impl_fsqrt!(f32x2: sqrt_v2f32);

View File

@@ -66,8 +66,12 @@ where
U: FromBits<T>, U: FromBits<T>,
{ {
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449 // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449
#[cfg_attr(any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always))] #[cfg_attr(
#[cfg_attr(not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline)] any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always)
)]
#[cfg_attr(
not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline
)]
fn into_bits(self) -> U { fn into_bits(self) -> U {
debug_assert!(::mem::size_of::<Self>() == ::mem::size_of::<U>()); debug_assert!(::mem::size_of::<Self>() == ::mem::size_of::<U>());
U::from_bits(self) U::from_bits(self)
@@ -77,8 +81,12 @@ where
// FromBits (and thus IntoBits) is reflexive. // FromBits (and thus IntoBits) is reflexive.
impl<T> FromBits<T> for T { impl<T> FromBits<T> for T {
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449 // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449
#[cfg_attr(any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always))] #[cfg_attr(
#[cfg_attr(not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline)] any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always)
)]
#[cfg_attr(
not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline
)]
fn from_bits(t: Self) -> Self { fn from_bits(t: Self) -> Self {
t t
} }

View File

@@ -110,9 +110,11 @@ macro_rules! from_bits_x86 {
}; };
} }
#[cfg(all(target_arch = "arm", target_feature = "neon", #[cfg(
target_feature = "v7"))] all(target_arch = "arm", target_feature = "neon", target_feature = "v7")
use coresimd::arch::arm::{// FIXME: float16x8_t, )]
use coresimd::arch::arm::{
// FIXME: float16x8_t,
float32x4_t, float32x4_t,
int16x8_t, int16x8_t,
int32x4_t, int32x4_t,
@@ -123,10 +125,12 @@ use coresimd::arch::arm::{// FIXME: float16x8_t,
uint16x8_t, uint16x8_t,
uint32x4_t, uint32x4_t,
uint64x2_t, uint64x2_t,
uint8x16_t}; uint8x16_t,
};
#[cfg(target_arch = "aarch64")] #[cfg(target_arch = "aarch64")]
use coresimd::arch::aarch64::{// FIXME: float16x8_t, use coresimd::arch::aarch64::{
// FIXME: float16x8_t,
float32x4_t, float32x4_t,
float64x2_t, float64x2_t,
int16x8_t, int16x8_t,
@@ -138,13 +142,21 @@ use coresimd::arch::aarch64::{// FIXME: float16x8_t,
uint16x8_t, uint16x8_t,
uint32x4_t, uint32x4_t,
uint64x2_t, uint64x2_t,
uint8x16_t}; uint8x16_t,
};
macro_rules! from_bits_arm { macro_rules! from_bits_arm {
($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => { ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
#[cfg(any(all(target_arch = "arm", target_feature = "neon", #[cfg(
target_feature = "v7"), any(
target_arch = "aarch64"))] all(
target_arch = "arm",
target_feature = "neon",
target_feature = "v7"
),
target_arch = "aarch64"
)
)]
impl_from_bits_!( impl_from_bits_!(
$id: int8x16_t, $id: int8x16_t,
uint8x16_t, uint8x16_t,
@@ -182,12 +194,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(u64x2, u64, u64x2_from_bits_x86); from_bits_x86!(u64x2, u64, u64x2_from_bits_x86);
from_bits_arm!( from_bits_arm!(u64x2, u64, u64x2_from_bits_arm, u64x2_from_bits_aarch64);
u64x2,
u64,
u64x2_from_bits_arm,
u64x2_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
i64x2: i64, i64x2: i64,
@@ -207,12 +214,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(i64x2, i64, i64x2_from_bits_x86); from_bits_x86!(i64x2, i64, i64x2_from_bits_x86);
from_bits_arm!( from_bits_arm!(i64x2, i64, i64x2_from_bits_arm, i64x2_from_bits_aarch64);
i64x2,
i64,
i64x2_from_bits_arm,
i64x2_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
f64x2: f64, f64x2: f64,
@@ -232,12 +234,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(f64x2, f64, f64x2_from_bits_x86); from_bits_x86!(f64x2, f64, f64x2_from_bits_x86);
from_bits_arm!( from_bits_arm!(f64x2, f64, f64x2_from_bits_arm, f64x2_from_bits_aarch64);
f64x2,
f64,
f64x2_from_bits_arm,
f64x2_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
u32x4: u32, u32x4: u32,
@@ -257,12 +254,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(u32x4, u32, u32x4_from_bits_x86); from_bits_x86!(u32x4, u32, u32x4_from_bits_x86);
from_bits_arm!( from_bits_arm!(u32x4, u32, u32x4_from_bits_arm, u32x4_from_bits_aarch64);
u32x4,
u32,
u32x4_from_bits_arm,
u32x4_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
i32x4: i32, i32x4: i32,
@@ -282,12 +274,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(i32x4, i32, i32x4_from_bits_x86); from_bits_x86!(i32x4, i32, i32x4_from_bits_x86);
from_bits_arm!( from_bits_arm!(i32x4, i32, i32x4_from_bits_arm, i32x4_from_bits_aarch64);
i32x4,
i32,
i32x4_from_bits_arm,
i32x4_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
f32x4: f32, f32x4: f32,
@@ -307,12 +294,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(f32x4, f32, f32x4_from_bits_x86); from_bits_x86!(f32x4, f32, f32x4_from_bits_x86);
from_bits_arm!( from_bits_arm!(f32x4, f32, f32x4_from_bits_arm, f32x4_from_bits_aarch64);
f32x4,
f32,
f32x4_from_bits_arm,
f32x4_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
u16x8: u16, u16x8: u16,
@@ -332,12 +314,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(u16x8, u16, u16x8_from_bits_x86); from_bits_x86!(u16x8, u16, u16x8_from_bits_x86);
from_bits_arm!( from_bits_arm!(u16x8, u16, u16x8_from_bits_arm, u16x8_from_bits_aarch64);
u16x8,
u16,
u16x8_from_bits_arm,
u16x8_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
i16x8: i16, i16x8: i16,
@@ -357,12 +334,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(i16x8, i16, i16x8_from_bits_x86); from_bits_x86!(i16x8, i16, i16x8_from_bits_x86);
from_bits_arm!( from_bits_arm!(i16x8, i16, i16x8_from_bits_arm, i16x8_from_bits_aarch64);
i16x8,
i16,
i16x8_from_bits_arm,
i16x8_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
u8x16: u8, u8x16: u8,
@@ -382,12 +354,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(u8x16, u8, u8x16_from_bits_x86); from_bits_x86!(u8x16, u8, u8x16_from_bits_x86);
from_bits_arm!( from_bits_arm!(u8x16, u8, u8x16_from_bits_arm, u8x16_from_bits_aarch64);
u8x16,
u8,
u8x16_from_bits_arm,
u8x16_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
i8x16: i8, i8x16: i8,
@@ -407,12 +374,7 @@ impl_from_bits!(
m8x16 m8x16
); );
from_bits_x86!(i8x16, i8, i8x16_from_bits_x86); from_bits_x86!(i8x16, i8, i8x16_from_bits_x86);
from_bits_arm!( from_bits_arm!(i8x16, i8, i8x16_from_bits_arm, i8x16_from_bits_aarch64);
i8x16,
i8,
i8x16_from_bits_arm,
i8x16_from_bits_aarch64
);
impl_from!( impl_from!(
f64x2: f64, f64x2: f64,
@@ -552,31 +514,37 @@ impl_from!(
m8x8 m8x8
); );
impl_from!(u8x16: u8, u8x16_from, test_v128 | i32x16, u32x16, f32x16, m1x16, i16x16, u16x16, m16x16, i8x16, m8x16); impl_from!(
impl_from!(i8x16: i8, i8x16_from, test_v128 | i32x16, u32x16, f32x16, m1x16, i16x16, u16x16, m16x16, u8x16, m8x16); u8x16: u8,
u8x16_from,
test_v128 | i32x16,
u32x16,
f32x16,
m1x16,
i16x16,
u16x16,
m16x16,
i8x16,
m8x16
);
impl_from!(
i8x16: i8,
i8x16_from,
test_v128 | i32x16,
u32x16,
f32x16,
m1x16,
i16x16,
u16x16,
m16x16,
u8x16,
m8x16
);
impl_from!(m8x16: i8, m8x16_from, test_v128 | m1x16, m16x16); impl_from!(m8x16: i8, m8x16_from, test_v128 | m1x16, m16x16);
impl_from!( impl_from!(m16x8: i16, m16x8_from, test_v128 | m1x8, m32x8, m8x8);
m16x8: i16,
m16x8_from,
test_v128 | m1x8,
m32x8,
m8x8
);
impl_from!( impl_from!(m32x4: i32, m32x4_from, test_v128 | m64x4, m16x4, m8x4);
m32x4: i32,
m32x4_from,
test_v128 | m64x4,
m16x4,
m8x4
);
impl_from!( impl_from!(m64x2: i64, m64x2_from, test_v128 | m32x2, m16x2, m8x2);
m64x2: i64,
m64x2_from,
test_v128 | m32x2,
m16x2,
m8x2
);

View File

@@ -57,10 +57,4 @@ impl_from!(
m8x2 m8x2
); );
impl_from!( impl_from!(m8x2: i8, m8x2_from, test_v16 | m64x2, m32x2, m16x2);
m8x2: i8,
m8x2_from,
test_v16 | m64x2,
m32x2,
m16x2
);

View File

@@ -465,25 +465,8 @@ impl_from!(
impl_from!(m8x32: i8, m8x32_from, test_v256 | m1x32); impl_from!(m8x32: i8, m8x32_from, test_v256 | m1x32);
impl_from!( impl_from!(m16x16: i16, m16x16_from, test_v256 | m1x16, m8x16);
m16x16: i16,
m16x16_from,
test_v256 | m1x16,
m8x16
);
impl_from!( impl_from!(m32x8: i32, m32x8_from, test_v256 | m1x8, m16x8, m8x8);
m32x8: i32,
m32x8_from,
test_v256 | m1x8,
m16x8,
m8x8
);
impl_from!( impl_from!(m64x4: i64, m64x4_from, test_v256 | m32x4, m16x4, m8x4);
m64x4: i64,
m64x4_from,
test_v256 | m32x4,
m16x4,
m8x4
);

View File

@@ -151,18 +151,6 @@ impl_from!(
m8x4 m8x4
); );
impl_from!( impl_from!(m8x4: i8, m8x4_from, test_v32 | m64x4, m32x4, m16x4);
m8x4: i8,
m8x4_from,
test_v32 | m64x4,
m32x4,
m16x4
);
impl_from!( impl_from!(m16x2: i16, m16x2_from, test_v32 | m64x2, m32x2, m8x2);
m16x2: i16,
m16x2_from,
test_v32 | m64x2,
m32x2,
m8x2
);

View File

@@ -446,17 +446,6 @@ impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64, m1x64);
impl_from!(m1x32: i16, m1x32_from, test_v512 | m8x32); impl_from!(m1x32: i16, m1x32_from, test_v512 | m8x32);
impl_from!( impl_from!(m1x16: i32, m1x16_from, test_v512 | m16x16, m8x16);
m1x16: i32,
m1x16_from,
test_v512 | m16x16,
m8x16
);
impl_from!( impl_from!(m1x8: i64, m1x8_from, test_v512 | m32x8, m16x8, m8x8);
m1x8: i64,
m1x8_from,
test_v512 | m32x8,
m16x8,
m8x8
);

View File

@@ -83,9 +83,11 @@ macro_rules! from_bits_x86 {
}; };
} }
#[cfg(all(target_arch = "arm", target_feature = "neon", #[cfg(
target_feature = "v7"))] all(target_arch = "arm", target_feature = "neon", target_feature = "v7")
use coresimd::arch::arm::{// FIXME: float16x4_t, )]
use coresimd::arch::arm::{
// FIXME: float16x4_t,
float32x2_t, float32x2_t,
int16x4_t, int16x4_t,
int32x2_t, int32x2_t,
@@ -96,10 +98,12 @@ use coresimd::arch::arm::{// FIXME: float16x4_t,
uint16x4_t, uint16x4_t,
uint32x2_t, uint32x2_t,
uint64x1_t, uint64x1_t,
uint8x8_t}; uint8x8_t,
};
#[cfg(target_arch = "aarch64")] #[cfg(target_arch = "aarch64")]
use coresimd::arch::aarch64::{// FIXME: float16x4_t, use coresimd::arch::aarch64::{
// FIXME: float16x4_t,
float32x2_t, float32x2_t,
float64x1_t, float64x1_t,
int16x4_t, int16x4_t,
@@ -111,13 +115,21 @@ use coresimd::arch::aarch64::{// FIXME: float16x4_t,
uint16x4_t, uint16x4_t,
uint32x2_t, uint32x2_t,
uint64x1_t, uint64x1_t,
uint8x8_t}; uint8x8_t,
};
macro_rules! from_bits_arm { macro_rules! from_bits_arm {
($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => { ($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
#[cfg(any(all(target_arch = "arm", target_feature = "neon", #[cfg(
target_feature = "v7"), any(
target_arch = "aarch64"))] all(
target_arch = "arm",
target_feature = "neon",
target_feature = "v7"
),
target_arch = "aarch64"
)
)]
impl_from_bits_!( impl_from_bits_!(
$id: int64x1_t, $id: int64x1_t,
uint64x1_t, uint64x1_t,
@@ -151,12 +163,7 @@ impl_from_bits!(
m8x8 m8x8
); );
from_bits_x86!(u32x2, u32, u32x2_from_bits_x86); from_bits_x86!(u32x2, u32, u32x2_from_bits_x86);
from_bits_arm!( from_bits_arm!(u32x2, u32, u32x2_from_bits_arm, u32x2_from_bits_aarch64);
u32x2,
u32,
u32x2_from_bits_arm,
u32x2_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
i32x2: i32, i32x2: i32,
@@ -172,12 +179,7 @@ impl_from_bits!(
m8x8 m8x8
); );
from_bits_x86!(i32x2, i32, i32x2_from_bits_x86); from_bits_x86!(i32x2, i32, i32x2_from_bits_x86);
from_bits_arm!( from_bits_arm!(i32x2, i32, i32x2_from_bits_arm, i32x2_from_bits_aarch64);
i32x2,
i32,
i32x2_from_bits_arm,
i32x2_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
f32x2: f32, f32x2: f32,
@@ -193,12 +195,7 @@ impl_from_bits!(
m8x8 m8x8
); );
from_bits_x86!(f32x2, f32, f32x2_from_bits_x86); from_bits_x86!(f32x2, f32, f32x2_from_bits_x86);
from_bits_arm!( from_bits_arm!(f32x2, f32, f32x2_from_bits_arm, f32x2_from_bits_aarch64);
f32x2,
f32,
f32x2_from_bits_arm,
f32x2_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
u16x4: u16, u16x4: u16,
@@ -213,12 +210,7 @@ impl_from_bits!(
m8x8 m8x8
); );
from_bits_x86!(u16x4, u16, u16x4_from_bits_x86); from_bits_x86!(u16x4, u16, u16x4_from_bits_x86);
from_bits_arm!( from_bits_arm!(u16x4, u16, u16x4_from_bits_arm, u16x4_from_bits_aarch64);
u16x4,
u16,
u16x4_from_bits_arm,
u16x4_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
i16x4: i16, i16x4: i16,
@@ -233,12 +225,7 @@ impl_from_bits!(
m8x8 m8x8
); );
from_bits_x86!(i16x4, i16, i16x4_from_bits_x86); from_bits_x86!(i16x4, i16, i16x4_from_bits_x86);
from_bits_arm!( from_bits_arm!(i16x4, i16, i16x4_from_bits_arm, i16x4_from_bits_aarch64);
i16x4,
i16,
i16x4_from_bits_arm,
i16x4_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
u8x8: u8, u8x8: u8,
@@ -253,12 +240,7 @@ impl_from_bits!(
m8x8 m8x8
); );
from_bits_x86!(u8x8, u8, u8x8_from_bits_x86); from_bits_x86!(u8x8, u8, u8x8_from_bits_x86);
from_bits_arm!( from_bits_arm!(u8x8, u8, u8x8_from_bits_arm, u8x8_from_bits_aarch64);
u8x8,
u8,
u8x8_from_bits_arm,
u8x8_from_bits_aarch64
);
impl_from_bits!( impl_from_bits!(
i8x8: i8, i8x8: i8,
@@ -273,12 +255,7 @@ impl_from_bits!(
m8x8 m8x8
); );
from_bits_x86!(i8x8, i8, i8x8_from_bits_x86); from_bits_x86!(i8x8, i8, i8x8_from_bits_x86);
from_bits_arm!( from_bits_arm!(i8x8, i8, i8x8_from_bits_arm, i8x8_from_bits_aarch64);
i8x8,
i8,
i8x8_from_bits_arm,
i8x8_from_bits_aarch64
);
impl_from!( impl_from!(
f32x2: f32, f32x2: f32,
@@ -404,26 +381,8 @@ impl_from!(
m8x8 m8x8
); );
impl_from!( impl_from!(m8x8: i8, m8x8_from, test_v64 | m1x8, m32x8, m16x8);
m8x8: i8,
m8x8_from,
test_v64 | m1x8,
m32x8,
m16x8
);
impl_from!( impl_from!(m16x4: i16, m16x4_from, test_v64 | m64x4, m32x4, m8x4);
m16x4: i16,
m16x4_from,
test_v64 | m64x4,
m32x4,
m8x4
);
impl_from!( impl_from!(m32x2: i32, m32x2_from, test_v64 | m64x2, m16x2, m8x2);
m32x2: i32,
m32x2_from,
test_v64 | m64x2,
m16x2,
m8x2
);

View File

@@ -1387,7 +1387,7 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2f128_ps( pub unsafe fn _mm256_permute2f128_ps(
a: __m256, b: __m256, imm8: i32 a: __m256, b: __m256, imm8: i32,
) -> __m256 { ) -> __m256 {
macro_rules! call { macro_rules! call {
($imm8:expr) => { ($imm8:expr) => {
@@ -1407,7 +1407,7 @@ pub unsafe fn _mm256_permute2f128_ps(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2f128_pd( pub unsafe fn _mm256_permute2f128_pd(
a: __m256d, b: __m256d, imm8: i32 a: __m256d, b: __m256d, imm8: i32,
) -> __m256d { ) -> __m256d {
macro_rules! call { macro_rules! call {
($imm8:expr) => { ($imm8:expr) => {
@@ -1427,7 +1427,7 @@ pub unsafe fn _mm256_permute2f128_pd(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2f128_si256( pub unsafe fn _mm256_permute2f128_si256(
a: __m256i, b: __m256i, imm8: i32 a: __m256i, b: __m256i, imm8: i32,
) -> __m256i { ) -> __m256i {
let a = a.as_i32x8(); let a = a.as_i32x8();
let b = b.as_i32x8(); let b = b.as_i32x8();
@@ -1529,7 +1529,7 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insertf128_pd( pub unsafe fn _mm256_insertf128_pd(
a: __m256d, b: __m128d, imm8: i32 a: __m256d, b: __m128d, imm8: i32,
) -> __m256d { ) -> __m256d {
match imm8 & 1 { match imm8 & 1 {
0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]), 0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]),
@@ -1547,7 +1547,7 @@ pub unsafe fn _mm256_insertf128_pd(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insertf128_si256( pub unsafe fn _mm256_insertf128_si256(
a: __m256i, b: __m128i, imm8: i32 a: __m256i, b: __m128i, imm8: i32,
) -> __m256i { ) -> __m256i {
let b = _mm256_castsi128_si256(b).as_i64x4(); let b = _mm256_castsi128_si256(b).as_i64x4();
let dst: i64x4 = match imm8 & 1 { let dst: i64x4 = match imm8 & 1 {
@@ -1567,11 +1567,7 @@ pub unsafe fn _mm256_insertf128_si256(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i {
mem::transmute(simd_insert( mem::transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i))
a.as_i8x32(),
(index as u32) & 31,
i,
))
} }
/// Copy `a` to result, and insert the 16-bit integer `i` into result /// Copy `a` to result, and insert the 16-bit integer `i` into result
@@ -1584,11 +1580,7 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i {
mem::transmute(simd_insert( mem::transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i))
a.as_i16x16(),
(index as u32) & 15,
i,
))
} }
/// Copy `a` to result, and insert the 32-bit integer `i` into result /// Copy `a` to result, and insert the 32-bit integer `i` into result
@@ -1790,7 +1782,7 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
#[cfg_attr(test, assert_instr(vmaskmovpd))] #[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_pd( pub unsafe fn _mm256_maskload_pd(
mem_addr: *const f64, mask: __m256i mem_addr: *const f64, mask: __m256i,
) -> __m256d { ) -> __m256d {
maskloadpd256(mem_addr as *const i8, mask.as_i64x4()) maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
} }
@@ -1804,7 +1796,7 @@ pub unsafe fn _mm256_maskload_pd(
#[cfg_attr(test, assert_instr(vmaskmovpd))] #[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_pd( pub unsafe fn _mm256_maskstore_pd(
mem_addr: *mut f64, mask: __m256i, a: __m256d mem_addr: *mut f64, mask: __m256i, a: __m256d,
) { ) {
maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a); maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
} }
@@ -1844,7 +1836,7 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
#[cfg_attr(test, assert_instr(vmaskmovps))] #[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_ps( pub unsafe fn _mm256_maskload_ps(
mem_addr: *const f32, mask: __m256i mem_addr: *const f32, mask: __m256i,
) -> __m256 { ) -> __m256 {
maskloadps256(mem_addr as *const i8, mask.as_i32x8()) maskloadps256(mem_addr as *const i8, mask.as_i32x8())
} }
@@ -1858,7 +1850,7 @@ pub unsafe fn _mm256_maskload_ps(
#[cfg_attr(test, assert_instr(vmaskmovps))] #[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_ps( pub unsafe fn _mm256_maskstore_ps(
mem_addr: *mut f32, mask: __m256i, a: __m256 mem_addr: *mut f32, mask: __m256i, a: __m256,
) { ) {
maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a); maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
} }
@@ -2383,7 +2375,7 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_set_ps( pub unsafe fn _mm256_set_ps(
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32 a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32,
) -> __m256 { ) -> __m256 {
_mm256_setr_ps(h, g, f, e, d, c, b, a) _mm256_setr_ps(h, g, f, e, d, c, b, a)
} }
@@ -2440,7 +2432,7 @@ pub unsafe fn _mm256_set_epi16(
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_set_epi32( pub unsafe fn _mm256_set_epi32(
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32 e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32,
) -> __m256i { ) -> __m256i {
_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
} }
@@ -2477,7 +2469,7 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_setr_ps( pub unsafe fn _mm256_setr_ps(
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32 a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32,
) -> __m256 { ) -> __m256 {
__m256(a, b, c, d, e, f, g, h) __m256(a, b, c, d, e, f, g, h)
} }
@@ -2536,7 +2528,7 @@ pub unsafe fn _mm256_setr_epi16(
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_setr_epi32( pub unsafe fn _mm256_setr_epi32(
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32 e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32,
) -> __m256i { ) -> __m256i {
mem::transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) mem::transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
} }
@@ -2950,7 +2942,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_loadu2_m128( pub unsafe fn _mm256_loadu2_m128(
hiaddr: *const f32, loaddr: *const f32 hiaddr: *const f32, loaddr: *const f32,
) -> __m256 { ) -> __m256 {
let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr)); let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
_mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1) _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
@@ -2967,7 +2959,7 @@ pub unsafe fn _mm256_loadu2_m128(
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_loadu2_m128d( pub unsafe fn _mm256_loadu2_m128d(
hiaddr: *const f64, loaddr: *const f64 hiaddr: *const f64, loaddr: *const f64,
) -> __m256d { ) -> __m256d {
let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr)); let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
_mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1) _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
@@ -2983,7 +2975,7 @@ pub unsafe fn _mm256_loadu2_m128d(
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_loadu2_m128i( pub unsafe fn _mm256_loadu2_m128i(
hiaddr: *const __m128i, loaddr: *const __m128i hiaddr: *const __m128i, loaddr: *const __m128i,
) -> __m256i { ) -> __m256i {
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr)); let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1) _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
@@ -3000,7 +2992,7 @@ pub unsafe fn _mm256_loadu2_m128i(
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_storeu2_m128( pub unsafe fn _mm256_storeu2_m128(
hiaddr: *mut f32, loaddr: *mut f32, a: __m256 hiaddr: *mut f32, loaddr: *mut f32, a: __m256,
) { ) {
let lo = _mm256_castps256_ps128(a); let lo = _mm256_castps256_ps128(a);
_mm_storeu_ps(loaddr, lo); _mm_storeu_ps(loaddr, lo);
@@ -3019,7 +3011,7 @@ pub unsafe fn _mm256_storeu2_m128(
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_storeu2_m128d( pub unsafe fn _mm256_storeu2_m128d(
hiaddr: *mut f64, loaddr: *mut f64, a: __m256d hiaddr: *mut f64, loaddr: *mut f64, a: __m256d,
) { ) {
let lo = _mm256_castpd256_pd128(a); let lo = _mm256_castpd256_pd128(a);
_mm_storeu_pd(loaddr, lo); _mm_storeu_pd(loaddr, lo);
@@ -3037,7 +3029,7 @@ pub unsafe fn _mm256_storeu2_m128d(
// This intrinsic has no corresponding instruction. // This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_storeu2_m128i( pub unsafe fn _mm256_storeu2_m128i(
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i,
) { ) {
let lo = _mm256_castsi256_si128(a); let lo = _mm256_castsi256_si128(a);
_mm_storeu_si128(loaddr, lo); _mm_storeu_si128(loaddr, lo);
@@ -3500,20 +3492,11 @@ mod tests {
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_blend_ps(a, b, 0x0); let r = _mm256_blend_ps(a, b, 0x0);
assert_eq_m256( assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
r,
_mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.),
);
let r = _mm256_blend_ps(a, b, 0x3); let r = _mm256_blend_ps(a, b, 0x3);
assert_eq_m256( assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
r,
_mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.),
);
let r = _mm256_blend_ps(a, b, 0xF); let r = _mm256_blend_ps(a, b, 0xF);
assert_eq_m256( assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
r,
_mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.),
);
} }
#[simd_test(enable = "avx")] #[simd_test(enable = "avx")]
@@ -3544,16 +3527,8 @@ mod tests {
let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
let r = _mm256_dp_ps(a, b, 0xFF); let r = _mm256_dp_ps(a, b, 0xFF);
let e = _mm256_setr_ps( let e =
200., _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
200.,
200.,
200.,
2387.,
2387.,
2387.,
2387.,
);
assert_eq_m256(r, e); assert_eq_m256(r, e);
} }
@@ -4234,9 +4209,7 @@ mod tests {
pub data: [f64; 4], pub data: [f64; 4],
} }
let a = _mm256_set1_pd(7.0); let a = _mm256_set1_pd(7.0);
let mut mem = Memory { let mut mem = Memory { data: [-1.0; 4] };
data: [-1.0; 4],
};
_mm256_stream_pd(&mut mem.data[0] as *mut f64, a); _mm256_stream_pd(&mut mem.data[0] as *mut f64, a);
for i in 0..4 { for i in 0..4 {
@@ -4251,9 +4224,7 @@ mod tests {
pub data: [f32; 8], pub data: [f32; 8],
} }
let a = _mm256_set1_ps(7.0); let a = _mm256_set1_ps(7.0);
let mut mem = Memory { let mut mem = Memory { data: [-1.0; 8] };
data: [-1.0; 8],
};
_mm256_stream_ps(&mut mem.data[0] as *mut f32, a); _mm256_stream_ps(&mut mem.data[0] as *mut f32, a);
for i in 0..8 { for i in 0..8 {
@@ -4534,10 +4505,7 @@ mod tests {
#[simd_test(enable = "avx")] #[simd_test(enable = "avx")]
unsafe fn test_mm256_set_ps() { unsafe fn test_mm256_set_ps() {
let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256( assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
r,
_mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.),
);
} }
#[simd_test(enable = "avx")] #[simd_test(enable = "avx")]
@@ -4595,10 +4563,7 @@ mod tests {
#[simd_test(enable = "avx")] #[simd_test(enable = "avx")]
unsafe fn test_mm256_setr_ps() { unsafe fn test_mm256_setr_ps() {
let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256( assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
r,
_mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.),
);
} }
#[simd_test(enable = "avx")] #[simd_test(enable = "avx")]

View File

@@ -413,7 +413,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blend_epi32( pub unsafe fn _mm256_blend_epi32(
a: __m256i, b: __m256i, imm8: i32 a: __m256i, b: __m256i, imm8: i32,
) -> __m256i { ) -> __m256i {
let imm8 = (imm8 & 0xFF) as u8; let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x8(); let a = a.as_i32x8();
@@ -480,7 +480,7 @@ pub unsafe fn _mm256_blend_epi32(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blend_epi16( pub unsafe fn _mm256_blend_epi16(
a: __m256i, b: __m256i, imm8: i32 a: __m256i, b: __m256i, imm8: i32,
) -> __m256i { ) -> __m256i {
let imm8 = (imm8 & 0xFF) as u8; let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i16x16(); let a = a.as_i16x16();
@@ -531,76 +531,20 @@ pub unsafe fn _mm256_blend_epi16(
) => { ) => {
match (imm8 >> 6) & 0b11 { match (imm8 >> 6) & 0b11 {
0b00 => blend4!( 0b00 => blend4!(
$a, $a, $b, $c, $d, $e, $f, 6, 7, $a2, $b2, $c2, $d2, $e2,
$b, $f2, 14, 15
$c,
$d,
$e,
$f,
6,
7,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
14,
15
), ),
0b01 => blend4!( 0b01 => blend4!(
$a, $a, $b, $c, $d, $e, $f, 22, 7, $a2, $b2, $c2, $d2,
$b, $e2, $f2, 30, 15
$c,
$d,
$e,
$f,
22,
7,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
30,
15
), ),
0b10 => blend4!( 0b10 => blend4!(
$a, $a, $b, $c, $d, $e, $f, 6, 23, $a2, $b2, $c2, $d2,
$b, $e2, $f2, 14, 31
$c,
$d,
$e,
$f,
6,
23,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
14,
31
), ),
_ => blend4!( _ => blend4!(
$a, $a, $b, $c, $d, $e, $f, 22, 23, $a2, $b2, $c2, $d2,
$b, $e2, $f2, 30, 31
$c,
$d,
$e,
$f,
22,
23,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
30,
31
), ),
} }
}; };
@@ -618,60 +562,16 @@ pub unsafe fn _mm256_blend_epi16(
) => { ) => {
match (imm8 >> 4) & 0b11 { match (imm8 >> 4) & 0b11 {
0b00 => blend3!( 0b00 => blend3!(
$a, $a, $b, $c, $d, 4, 5, $a2, $b2, $c2, $d2, 12, 13
$b,
$c,
$d,
4,
5,
$a2,
$b2,
$c2,
$d2,
12,
13
), ),
0b01 => blend3!( 0b01 => blend3!(
$a, $a, $b, $c, $d, 20, 5, $a2, $b2, $c2, $d2, 28, 13
$b,
$c,
$d,
20,
5,
$a2,
$b2,
$c2,
$d2,
28,
13
), ),
0b10 => blend3!( 0b10 => blend3!(
$a, $a, $b, $c, $d, 4, 21, $a2, $b2, $c2, $d2, 12, 29
$b,
$c,
$d,
4,
21,
$a2,
$b2,
$c2,
$d2,
12,
29
), ),
_ => blend3!( _ => blend3!(
$a, $a, $b, $c, $d, 20, 21, $a2, $b2, $c2, $d2, 28, 29
$b,
$c,
$d,
20,
21,
$a2,
$b2,
$c2,
$d2,
28,
29
), ),
} }
}; };
@@ -703,13 +603,9 @@ pub unsafe fn _mm256_blend_epi16(
#[cfg_attr(test, assert_instr(vpblendvb))] #[cfg_attr(test, assert_instr(vpblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blendv_epi8( pub unsafe fn _mm256_blendv_epi8(
a: __m256i, b: __m256i, mask: __m256i a: __m256i, b: __m256i, mask: __m256i,
) -> __m256i { ) -> __m256i {
mem::transmute(pblendvb( mem::transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32()))
a.as_i8x32(),
b.as_i8x32(),
mask.as_i8x32(),
))
} }
/// Broadcast the low packed 8-bit integer from `a` to all elements of /// Broadcast the low packed 8-bit integer from `a` to all elements of
@@ -1226,7 +1122,7 @@ pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_epi32( pub unsafe fn _mm_i32gather_epi32(
slice: *const i32, offsets: __m128i, scale: i32 slice: *const i32, offsets: __m128i, scale: i32,
) -> __m128i { ) -> __m128i {
let zero = _mm_setzero_si128().as_i32x4(); let zero = _mm_setzero_si128().as_i32x4();
let neg_one = _mm_set1_epi32(-1).as_i32x4(); let neg_one = _mm_set1_epi32(-1).as_i32x4();
@@ -1280,7 +1176,7 @@ pub unsafe fn _mm_mask_i32gather_epi32(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_epi32( pub unsafe fn _mm256_i32gather_epi32(
slice: *const i32, offsets: __m256i, scale: i32 slice: *const i32, offsets: __m256i, scale: i32,
) -> __m256i { ) -> __m256i {
let zero = _mm256_setzero_si256().as_i32x8(); let zero = _mm256_setzero_si256().as_i32x8();
let neg_one = _mm256_set1_epi32(-1).as_i32x8(); let neg_one = _mm256_set1_epi32(-1).as_i32x8();
@@ -1334,7 +1230,7 @@ pub unsafe fn _mm256_mask_i32gather_epi32(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_ps( pub unsafe fn _mm_i32gather_ps(
slice: *const f32, offsets: __m128i, scale: i32 slice: *const f32, offsets: __m128i, scale: i32,
) -> __m128 { ) -> __m128 {
let zero = _mm_setzero_ps(); let zero = _mm_setzero_ps();
let neg_one = _mm_set1_ps(-1.0); let neg_one = _mm_set1_ps(-1.0);
@@ -1360,7 +1256,7 @@ pub unsafe fn _mm_i32gather_ps(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mask_i32gather_ps( pub unsafe fn _mm_mask_i32gather_ps(
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32 src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32,
) -> __m128 { ) -> __m128 {
let offsets = offsets.as_i32x4(); let offsets = offsets.as_i32x4();
let slice = slice as *const i8; let slice = slice as *const i8;
@@ -1383,7 +1279,7 @@ pub unsafe fn _mm_mask_i32gather_ps(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_ps( pub unsafe fn _mm256_i32gather_ps(
slice: *const f32, offsets: __m256i, scale: i32 slice: *const f32, offsets: __m256i, scale: i32,
) -> __m256 { ) -> __m256 {
let zero = _mm256_setzero_ps(); let zero = _mm256_setzero_ps();
let neg_one = _mm256_set1_ps(-1.0); let neg_one = _mm256_set1_ps(-1.0);
@@ -1409,7 +1305,7 @@ pub unsafe fn _mm256_i32gather_ps(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_mask_i32gather_ps( pub unsafe fn _mm256_mask_i32gather_ps(
src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32 src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32,
) -> __m256 { ) -> __m256 {
let offsets = offsets.as_i32x8(); let offsets = offsets.as_i32x8();
let slice = slice as *const i8; let slice = slice as *const i8;
@@ -1432,7 +1328,7 @@ pub unsafe fn _mm256_mask_i32gather_ps(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_epi64( pub unsafe fn _mm_i32gather_epi64(
slice: *const i64, offsets: __m128i, scale: i32 slice: *const i64, offsets: __m128i, scale: i32,
) -> __m128i { ) -> __m128i {
let zero = _mm_setzero_si128().as_i64x2(); let zero = _mm_setzero_si128().as_i64x2();
let neg_one = _mm_set1_epi64x(-1).as_i64x2(); let neg_one = _mm_set1_epi64x(-1).as_i64x2();
@@ -1486,7 +1382,7 @@ pub unsafe fn _mm_mask_i32gather_epi64(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_epi64( pub unsafe fn _mm256_i32gather_epi64(
slice: *const i64, offsets: __m128i, scale: i32 slice: *const i64, offsets: __m128i, scale: i32,
) -> __m256i { ) -> __m256i {
let zero = _mm256_setzero_si256().as_i64x4(); let zero = _mm256_setzero_si256().as_i64x4();
let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
@@ -1540,7 +1436,7 @@ pub unsafe fn _mm256_mask_i32gather_epi64(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_pd( pub unsafe fn _mm_i32gather_pd(
slice: *const f64, offsets: __m128i, scale: i32 slice: *const f64, offsets: __m128i, scale: i32,
) -> __m128d { ) -> __m128d {
let zero = _mm_setzero_pd(); let zero = _mm_setzero_pd();
let neg_one = _mm_set1_pd(-1.0); let neg_one = _mm_set1_pd(-1.0);
@@ -1590,7 +1486,7 @@ pub unsafe fn _mm_mask_i32gather_pd(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_pd( pub unsafe fn _mm256_i32gather_pd(
slice: *const f64, offsets: __m128i, scale: i32 slice: *const f64, offsets: __m128i, scale: i32,
) -> __m256d { ) -> __m256d {
let zero = _mm256_setzero_pd(); let zero = _mm256_setzero_pd();
let neg_one = _mm256_set1_pd(-1.0); let neg_one = _mm256_set1_pd(-1.0);
@@ -1640,7 +1536,7 @@ pub unsafe fn _mm256_mask_i32gather_pd(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_epi32( pub unsafe fn _mm_i64gather_epi32(
slice: *const i32, offsets: __m128i, scale: i32 slice: *const i32, offsets: __m128i, scale: i32,
) -> __m128i { ) -> __m128i {
let zero = _mm_setzero_si128().as_i32x4(); let zero = _mm_setzero_si128().as_i32x4();
let neg_one = _mm_set1_epi64x(-1).as_i32x4(); let neg_one = _mm_set1_epi64x(-1).as_i32x4();
@@ -1694,7 +1590,7 @@ pub unsafe fn _mm_mask_i64gather_epi32(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_epi32( pub unsafe fn _mm256_i64gather_epi32(
slice: *const i32, offsets: __m256i, scale: i32 slice: *const i32, offsets: __m256i, scale: i32,
) -> __m128i { ) -> __m128i {
let zero = _mm_setzero_si128().as_i32x4(); let zero = _mm_setzero_si128().as_i32x4();
let neg_one = _mm_set1_epi64x(-1).as_i32x4(); let neg_one = _mm_set1_epi64x(-1).as_i32x4();
@@ -1748,7 +1644,7 @@ pub unsafe fn _mm256_mask_i64gather_epi32(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_ps( pub unsafe fn _mm_i64gather_ps(
slice: *const f32, offsets: __m128i, scale: i32 slice: *const f32, offsets: __m128i, scale: i32,
) -> __m128 { ) -> __m128 {
let zero = _mm_setzero_ps(); let zero = _mm_setzero_ps();
let neg_one = _mm_set1_ps(-1.0); let neg_one = _mm_set1_ps(-1.0);
@@ -1774,7 +1670,7 @@ pub unsafe fn _mm_i64gather_ps(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mask_i64gather_ps( pub unsafe fn _mm_mask_i64gather_ps(
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32 src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32,
) -> __m128 { ) -> __m128 {
let offsets = offsets.as_i64x2(); let offsets = offsets.as_i64x2();
let slice = slice as *const i8; let slice = slice as *const i8;
@@ -1797,7 +1693,7 @@ pub unsafe fn _mm_mask_i64gather_ps(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_ps( pub unsafe fn _mm256_i64gather_ps(
slice: *const f32, offsets: __m256i, scale: i32 slice: *const f32, offsets: __m256i, scale: i32,
) -> __m128 { ) -> __m128 {
let zero = _mm_setzero_ps(); let zero = _mm_setzero_ps();
let neg_one = _mm_set1_ps(-1.0); let neg_one = _mm_set1_ps(-1.0);
@@ -1823,7 +1719,7 @@ pub unsafe fn _mm256_i64gather_ps(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_mask_i64gather_ps( pub unsafe fn _mm256_mask_i64gather_ps(
src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32 src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32,
) -> __m128 { ) -> __m128 {
let offsets = offsets.as_i64x4(); let offsets = offsets.as_i64x4();
let slice = slice as *const i8; let slice = slice as *const i8;
@@ -1846,7 +1742,7 @@ pub unsafe fn _mm256_mask_i64gather_ps(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_epi64( pub unsafe fn _mm_i64gather_epi64(
slice: *const i64, offsets: __m128i, scale: i32 slice: *const i64, offsets: __m128i, scale: i32,
) -> __m128i { ) -> __m128i {
let zero = _mm_setzero_si128().as_i64x2(); let zero = _mm_setzero_si128().as_i64x2();
let neg_one = _mm_set1_epi64x(-1).as_i64x2(); let neg_one = _mm_set1_epi64x(-1).as_i64x2();
@@ -1900,7 +1796,7 @@ pub unsafe fn _mm_mask_i64gather_epi64(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_epi64( pub unsafe fn _mm256_i64gather_epi64(
slice: *const i64, offsets: __m256i, scale: i32 slice: *const i64, offsets: __m256i, scale: i32,
) -> __m256i { ) -> __m256i {
let zero = _mm256_setzero_si256().as_i64x4(); let zero = _mm256_setzero_si256().as_i64x4();
let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
@@ -1954,7 +1850,7 @@ pub unsafe fn _mm256_mask_i64gather_epi64(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_pd( pub unsafe fn _mm_i64gather_pd(
slice: *const f64, offsets: __m128i, scale: i32 slice: *const f64, offsets: __m128i, scale: i32,
) -> __m128d { ) -> __m128d {
let zero = _mm_setzero_pd(); let zero = _mm_setzero_pd();
let neg_one = _mm_set1_pd(-1.0); let neg_one = _mm_set1_pd(-1.0);
@@ -2004,7 +1900,7 @@ pub unsafe fn _mm_mask_i64gather_pd(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_pd( pub unsafe fn _mm256_i64gather_pd(
slice: *const f64, offsets: __m256i, scale: i32 slice: *const f64, offsets: __m256i, scale: i32,
) -> __m256d { ) -> __m256d {
let zero = _mm256_setzero_pd(); let zero = _mm256_setzero_pd();
let neg_one = _mm256_set1_pd(-1.0); let neg_one = _mm256_set1_pd(-1.0);
@@ -2053,7 +1949,7 @@ pub unsafe fn _mm256_mask_i64gather_pd(
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_inserti128_si256( pub unsafe fn _mm256_inserti128_si256(
a: __m256i, b: __m128i, imm8: i32 a: __m256i, b: __m128i, imm8: i32,
) -> __m256i { ) -> __m256i {
let a = a.as_i64x4(); let a = a.as_i64x4();
let b = _mm256_castsi128_si256(b).as_i64x4(); let b = _mm256_castsi128_si256(b).as_i64x4();
@@ -2101,12 +1997,9 @@ pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaskmovd))] #[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi32( pub unsafe fn _mm_maskload_epi32(
mem_addr: *const i32, mask: __m128i mem_addr: *const i32, mask: __m128i,
) -> __m128i { ) -> __m128i {
mem::transmute(maskloadd( mem::transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
mem_addr as *const i8,
mask.as_i32x4(),
))
} }
/// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask` /// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@@ -2119,12 +2012,9 @@ pub unsafe fn _mm_maskload_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovd))] #[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi32( pub unsafe fn _mm256_maskload_epi32(
mem_addr: *const i32, mask: __m256i mem_addr: *const i32, mask: __m256i,
) -> __m256i { ) -> __m256i {
mem::transmute(maskloadd256( mem::transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
mem_addr as *const i8,
mask.as_i32x8(),
))
} }
/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@@ -2137,12 +2027,9 @@ pub unsafe fn _mm256_maskload_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovq))] #[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi64( pub unsafe fn _mm_maskload_epi64(
mem_addr: *const i64, mask: __m128i mem_addr: *const i64, mask: __m128i,
) -> __m128i { ) -> __m128i {
mem::transmute(maskloadq( mem::transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
mem_addr as *const i8,
mask.as_i64x2(),
))
} }
/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@@ -2155,12 +2042,9 @@ pub unsafe fn _mm_maskload_epi64(
#[cfg_attr(test, assert_instr(vpmaskmovq))] #[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi64( pub unsafe fn _mm256_maskload_epi64(
mem_addr: *const i64, mask: __m256i mem_addr: *const i64, mask: __m256i,
) -> __m256i { ) -> __m256i {
mem::transmute(maskloadq256( mem::transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
mem_addr as *const i8,
mask.as_i64x4(),
))
} }
/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr` /// Store packed 32-bit integers from `a` into memory pointed by `mem_addr`
@@ -2173,13 +2057,9 @@ pub unsafe fn _mm256_maskload_epi64(
#[cfg_attr(test, assert_instr(vpmaskmovd))] #[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi32( pub unsafe fn _mm_maskstore_epi32(
mem_addr: *mut i32, mask: __m128i, a: __m128i mem_addr: *mut i32, mask: __m128i, a: __m128i,
) { ) {
maskstored( maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
mem_addr as *mut i8,
mask.as_i32x4(),
a.as_i32x4(),
)
} }
/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr` /// Store packed 32-bit integers from `a` into memory pointed by `mem_addr`
@@ -2192,13 +2072,9 @@ pub unsafe fn _mm_maskstore_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovd))] #[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi32( pub unsafe fn _mm256_maskstore_epi32(
mem_addr: *mut i32, mask: __m256i, a: __m256i mem_addr: *mut i32, mask: __m256i, a: __m256i,
) { ) {
maskstored256( maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
mem_addr as *mut i8,
mask.as_i32x8(),
a.as_i32x8(),
)
} }
/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr` /// Store packed 64-bit integers from `a` into memory pointed by `mem_addr`
@@ -2211,13 +2087,9 @@ pub unsafe fn _mm256_maskstore_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovq))] #[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi64( pub unsafe fn _mm_maskstore_epi64(
mem_addr: *mut i64, mask: __m128i, a: __m128i mem_addr: *mut i64, mask: __m128i, a: __m128i,
) { ) {
maskstoreq( maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
mem_addr as *mut i8,
mask.as_i64x2(),
a.as_i64x2(),
)
} }
/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr` /// Store packed 64-bit integers from `a` into memory pointed by `mem_addr`
@@ -2230,13 +2102,9 @@ pub unsafe fn _mm_maskstore_epi64(
#[cfg_attr(test, assert_instr(vpmaskmovq))] #[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi64( pub unsafe fn _mm256_maskstore_epi64(
mem_addr: *mut i64, mask: __m256i, a: __m256i mem_addr: *mut i64, mask: __m256i, a: __m256i,
) { ) {
maskstoreq256( maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
mem_addr as *mut i8,
mask.as_i64x4(),
a.as_i64x4(),
)
} }
/// Compare packed 16-bit integers in `a` and `b`, and return the packed /// Compare packed 16-bit integers in `a` and `b`, and return the packed
@@ -2410,7 +2278,7 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_mpsadbw_epu8( pub unsafe fn _mm256_mpsadbw_epu8(
a: __m256i, b: __m256i, imm8: i32 a: __m256i, b: __m256i, imm8: i32,
) -> __m256i { ) -> __m256i {
let a = a.as_u8x32(); let a = a.as_u8x32();
let b = b.as_u8x32(); let b = b.as_u8x32();
@@ -2656,7 +2524,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2x128_si256( pub unsafe fn _mm256_permute2x128_si256(
a: __m256i, b: __m256i, imm8: i32 a: __m256i, b: __m256i, imm8: i32,
) -> __m256i { ) -> __m256i {
let a = a.as_i64x4(); let a = a.as_i64x4();
let b = b.as_i64x4(); let b = b.as_i64x4();
@@ -3559,16 +3427,23 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
/// # if is_x86_feature_detected!("avx2") { /// # if is_x86_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")] /// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() { /// # unsafe fn worker() {
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /// let a = _mm256_setr_epi8(
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
/// let b = _mm256_setr_epi8(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15, /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31); /// );
/// let b = _mm256_setr_epi8(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
/// -30, -31,
/// );
/// ///
/// let c = _mm256_unpackhi_epi8(a, b); /// let c = _mm256_unpackhi_epi8(a, b);
/// ///
/// let expected = _mm256_setr_epi8(8,-8, 9,-9, 10,-10, 11,-11, 12,-12, 13,-13, /// let expected = _mm256_setr_epi8(
/// 14,-14, 15,-15, 24,-24, 25,-25, 26,-26, 27,-27, 28,-28, 29,-29, 30,-30, /// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
/// 31,-31); /// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
/// -31,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
/// ///
/// # } /// # }
@@ -3612,15 +3487,22 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
/// # if is_x86_feature_detected!("avx2") { /// # if is_x86_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")] /// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() { /// # unsafe fn worker() {
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /// let a = _mm256_setr_epi8(
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
/// let b = _mm256_setr_epi8(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15, /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31); /// );
/// let b = _mm256_setr_epi8(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
/// -30, -31,
/// );
/// ///
/// let c = _mm256_unpacklo_epi8(a, b); /// let c = _mm256_unpacklo_epi8(a, b);
/// ///
/// let expected = _mm256_setr_epi8(0, 0, 1,-1, 2,-2, 3,-3, 4,-4, 5,-5, 6,-6, 7,-7, /// let expected = _mm256_setr_epi8(
/// 16,-16, 17,-17, 18,-18, 19,-19, 20,-20, 21,-21, 22,-22, 23,-23); /// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
/// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
/// ///
/// # } /// # }
@@ -3664,13 +3546,18 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
/// # if is_x86_feature_detected!("avx2") { /// # if is_x86_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")] /// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() { /// # unsafe fn worker() {
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); /// let a = _mm256_setr_epi16(
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15); /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// );
/// let b = _mm256_setr_epi16(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// );
/// ///
/// let c = _mm256_unpackhi_epi16(a, b); /// let c = _mm256_unpackhi_epi16(a, b);
/// ///
/// let expected = _mm256_setr_epi16(4,-4, 5,-5, 6,-6, 7,-7, 12,-12, 13,-13, 14,-14, /// let expected = _mm256_setr_epi16(
/// 15,-15); /// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
/// ///
/// # } /// # }
@@ -3688,9 +3575,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
let r: i16x16 = simd_shuffle16( let r: i16x16 = simd_shuffle16(
a.as_i16x16(), a.as_i16x16(),
b.as_i16x16(), b.as_i16x16(),
[ [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31
],
); );
mem::transmute(r) mem::transmute(r)
} }
@@ -3715,13 +3600,18 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
/// # #[target_feature(enable = "avx2")] /// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() { /// # unsafe fn worker() {
/// ///
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); /// let a = _mm256_setr_epi16(
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15); /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// );
/// let b = _mm256_setr_epi16(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// );
/// ///
/// let c = _mm256_unpacklo_epi16(a, b); /// let c = _mm256_unpacklo_epi16(a, b);
/// ///
/// let expected = _mm256_setr_epi16(0, 0, 1,-1, 2,-2, 3,-3, 8,-8, 9,-9, 10,-10, /// let expected = _mm256_setr_epi16(
/// 11,-11); /// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
/// ///
/// # } /// # }
@@ -3739,9 +3629,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
let r: i16x16 = simd_shuffle16( let r: i16x16 = simd_shuffle16(
a.as_i16x16(), a.as_i16x16(),
b.as_i16x16(), b.as_i16x16(),
[ [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27
],
); );
mem::transmute(r) mem::transmute(r)
} }
@@ -3832,11 +3720,8 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vunpcklps))] #[cfg_attr(test, assert_instr(vunpcklps))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
let r: i32x8 = simd_shuffle8( let r: i32x8 =
a.as_i32x8(), simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
b.as_i32x8(),
[0, 8, 1, 9, 4, 12, 5, 13],
);
mem::transmute(r) mem::transmute(r)
} }
@@ -4183,35 +4068,35 @@ extern "C" {
fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4; fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
#[link_name = "llvm.x86.avx2.gather.d.d"] #[link_name = "llvm.x86.avx2.gather.d.d"]
fn pgatherdd( fn pgatherdd(
src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8 src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8,
) -> i32x4; ) -> i32x4;
#[link_name = "llvm.x86.avx2.gather.d.d.256"] #[link_name = "llvm.x86.avx2.gather.d.d.256"]
fn vpgatherdd( fn vpgatherdd(
src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8 src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8,
) -> i32x8; ) -> i32x8;
#[link_name = "llvm.x86.avx2.gather.d.q"] #[link_name = "llvm.x86.avx2.gather.d.q"]
fn pgatherdq( fn pgatherdq(
src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8 src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8,
) -> i64x2; ) -> i64x2;
#[link_name = "llvm.x86.avx2.gather.d.q.256"] #[link_name = "llvm.x86.avx2.gather.d.q.256"]
fn vpgatherdq( fn vpgatherdq(
src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8 src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8,
) -> i64x4; ) -> i64x4;
#[link_name = "llvm.x86.avx2.gather.q.d"] #[link_name = "llvm.x86.avx2.gather.q.d"]
fn pgatherqd( fn pgatherqd(
src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8 src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8,
) -> i32x4; ) -> i32x4;
#[link_name = "llvm.x86.avx2.gather.q.d.256"] #[link_name = "llvm.x86.avx2.gather.q.d.256"]
fn vpgatherqd( fn vpgatherqd(
src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8 src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8,
) -> i32x4; ) -> i32x4;
#[link_name = "llvm.x86.avx2.gather.q.q"] #[link_name = "llvm.x86.avx2.gather.q.q"]
fn pgatherqq( fn pgatherqq(
src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8 src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8,
) -> i64x2; ) -> i64x2;
#[link_name = "llvm.x86.avx2.gather.q.q.256"] #[link_name = "llvm.x86.avx2.gather.q.q.256"]
fn vpgatherqq( fn vpgatherqq(
src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8 src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8,
) -> i64x4; ) -> i64x4;
#[link_name = "llvm.x86.avx2.gather.d.pd"] #[link_name = "llvm.x86.avx2.gather.d.pd"]
fn pgatherdpd( fn pgatherdpd(
@@ -4235,19 +4120,19 @@ extern "C" {
) -> __m256d; ) -> __m256d;
#[link_name = "llvm.x86.avx2.gather.d.ps"] #[link_name = "llvm.x86.avx2.gather.d.ps"]
fn pgatherdps( fn pgatherdps(
src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8 src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8,
) -> __m128; ) -> __m128;
#[link_name = "llvm.x86.avx2.gather.d.ps.256"] #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
fn vpgatherdps( fn vpgatherdps(
src: __m256, slice: *const i8, offsets: i32x8, mask: __m256, scale: i8 src: __m256, slice: *const i8, offsets: i32x8, mask: __m256, scale: i8,
) -> __m256; ) -> __m256;
#[link_name = "llvm.x86.avx2.gather.q.ps"] #[link_name = "llvm.x86.avx2.gather.q.ps"]
fn pgatherqps( fn pgatherqps(
src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8 src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8,
) -> __m128; ) -> __m128;
#[link_name = "llvm.x86.avx2.gather.q.ps.256"] #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
fn vpgatherqps( fn vpgatherqps(
src: __m128, slice: *const i8, offsets: i64x4, mask: __m128, scale: i8 src: __m128, slice: *const i8, offsets: i64x4, mask: __m128, scale: i8,
) -> __m128; ) -> __m128;
#[link_name = "llvm.x86.avx2.psll.dq"] #[link_name = "llvm.x86.avx2.psll.dq"]
fn vpslldq(a: i64x4, b: i32) -> i64x4; fn vpslldq(a: i64x4, b: i32) -> i64x4;
@@ -4718,10 +4603,7 @@ mod tests {
7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0,
); );
let r = _mm256_cmpeq_epi8(a, b); let r = _mm256_cmpeq_epi8(a, b);
assert_eq_m256i( assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2));
r,
_mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2),
);
} }
#[simd_test(enable = "avx2")] #[simd_test(enable = "avx2")]
@@ -4737,10 +4619,7 @@ mod tests {
7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0,
); );
let r = _mm256_cmpeq_epi16(a, b); let r = _mm256_cmpeq_epi16(a, b);
assert_eq_m256i( assert_eq_m256i(r, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2));
r,
_mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2),
);
} }
#[simd_test(enable = "avx2")] #[simd_test(enable = "avx2")]
@@ -4758,10 +4637,7 @@ mod tests {
let a = _mm256_setr_epi64x(0, 1, 2, 3); let a = _mm256_setr_epi64x(0, 1, 2, 3);
let b = _mm256_setr_epi64x(3, 2, 2, 0); let b = _mm256_setr_epi64x(3, 2, 2, 0);
let r = _mm256_cmpeq_epi64(a, b); let r = _mm256_cmpeq_epi64(a, b);
assert_eq_m256i( assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2));
r,
_mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2),
);
} }
#[simd_test(enable = "avx2")] #[simd_test(enable = "avx2")]
@@ -4769,10 +4645,7 @@ mod tests {
let a = _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0); let a = _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0);
let b = _mm256_set1_epi8(0); let b = _mm256_set1_epi8(0);
let r = _mm256_cmpgt_epi8(a, b); let r = _mm256_cmpgt_epi8(a, b);
assert_eq_m256i( assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0));
r,
_mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0),
);
} }
#[simd_test(enable = "avx2")] #[simd_test(enable = "avx2")]
@@ -4780,10 +4653,7 @@ mod tests {
let a = _mm256_insert_epi16(_mm256_set1_epi16(0), 5, 0); let a = _mm256_insert_epi16(_mm256_set1_epi16(0), 5, 0);
let b = _mm256_set1_epi16(0); let b = _mm256_set1_epi16(0);
let r = _mm256_cmpgt_epi16(a, b); let r = _mm256_cmpgt_epi16(a, b);
assert_eq_m256i( assert_eq_m256i(r, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0));
r,
_mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0),
);
} }
#[simd_test(enable = "avx2")] #[simd_test(enable = "avx2")]
@@ -4791,10 +4661,7 @@ mod tests {
let a = _mm256_insert_epi32(_mm256_set1_epi32(0), 5, 0); let a = _mm256_insert_epi32(_mm256_set1_epi32(0), 5, 0);
let b = _mm256_set1_epi32(0); let b = _mm256_set1_epi32(0);
let r = _mm256_cmpgt_epi32(a, b); let r = _mm256_cmpgt_epi32(a, b);
assert_eq_m256i( assert_eq_m256i(r, _mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0));
r,
_mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0),
);
} }
#[simd_test(enable = "avx2")] #[simd_test(enable = "avx2")]
@@ -4802,10 +4669,7 @@ mod tests {
let a = _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0); let a = _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0);
let b = _mm256_set1_epi64x(0); let b = _mm256_set1_epi64x(0);
let r = _mm256_cmpgt_epi64(a, b); let r = _mm256_cmpgt_epi64(a, b);
assert_eq_m256i( assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0));
r,
_mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0),
);
} }
#[simd_test(enable = "avx2")] #[simd_test(enable = "avx2")]
@@ -5997,16 +5861,7 @@ mod tests {
); );
assert_eq_m256( assert_eq_m256(
r, r,
_mm256_setr_ps( _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
0.0,
16.0,
64.0,
256.0,
256.0,
256.0,
256.0,
256.0,
),
); );
} }

View File

@@ -21,17 +21,14 @@ use stdsimd_test::assert_instr;
#[cfg_attr(test, assert_instr(bextr))] #[cfg_attr(test, assert_instr(bextr))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32( _bextr2_u32(a, (start & 0xff_u32) | ((len & 0xff_u32) << 8_u32))
a,
(start & 0xff_u32) | ((len & 0xff_u32) << 8_u32),
)
} }
/// Extracts bits of `a` specified by `control` into /// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result. /// the least significant bits of the result.
/// ///
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to /// Bits `[7,0]` of `control` specify the index to the first bit in the range
/// be extracted, and bits `[15,8]` specify the length of the range. /// to be extracted, and bits `[15,8]` specify the length of the range.
/// ///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32)
#[inline] #[inline]

View File

@@ -58,7 +58,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use coresimd::x86::*; use coresimd::x86::*;
use std::{fmt, cmp::PartialEq}; use std::{cmp::PartialEq, fmt};
use stdsimd_test::simd_test; use stdsimd_test::simd_test;
#[repr(align(16))] #[repr(align(16))]

View File

@@ -380,7 +380,7 @@ pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
#[inline] #[inline]
#[target_feature(enable = "mmx")] #[target_feature(enable = "mmx")]
pub unsafe fn _mm_set_pi8( pub unsafe fn _mm_set_pi8(
e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8 e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8,
) -> __m64 { ) -> __m64 {
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
} }
@@ -426,7 +426,7 @@ pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
#[inline] #[inline]
#[target_feature(enable = "mmx")] #[target_feature(enable = "mmx")]
pub unsafe fn _mm_setr_pi8( pub unsafe fn _mm_setr_pi8(
e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8 e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8,
) -> __m64 { ) -> __m64 {
mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
} }
@@ -514,12 +514,8 @@ mod tests {
-30001, -30001,
i16::max_value() - 1, i16::max_value() - 1,
); );
let e = _mm_setr_pi16( let e =
i16::min_value(), _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value());
30000,
-30000,
i16::max_value(),
);
assert_eq_m64(e, _mm_add_pi16(a, b)); assert_eq_m64(e, _mm_add_pi16(a, b));
assert_eq_m64(e, _m_paddw(a, b)); assert_eq_m64(e, _m_paddw(a, b));
} }
@@ -537,16 +533,8 @@ mod tests {
unsafe fn test_mm_adds_pi8() { unsafe fn test_mm_adds_pi8() {
let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0); let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0);
let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1); let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1);
let e = _mm_setr_pi8( let e =
i8::min_value(), _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1);
0,
0,
i8::max_value(),
-1,
-1,
1,
1,
);
assert_eq_m64(e, _mm_adds_pi8(a, b)); assert_eq_m64(e, _mm_adds_pi8(a, b));
assert_eq_m64(e, _m_paddsb(a, b)); assert_eq_m64(e, _m_paddsb(a, b));
} }

View File

@@ -444,11 +444,12 @@ impl m256iExt for __m256i {
} }
} }
use coresimd::simd::{f32x2, f32x4, f32x8, f64x2, f64x4, i16x16, i16x4, i16x8, use coresimd::simd::{
i32x2, i32x4, i32x8, i64x2, i64x4, i8x16, i8x32, i8x8, f32x2, f32x4, f32x8, f64x2, f64x4, i16x16, i16x4, i16x8, i32x2, i32x4,
m16x16, m16x4, m16x8, m32x2, m32x4, m32x8, m64x2, m64x4, i32x8, i64x2, i64x4, i8x16, i8x32, i8x8, m16x16, m16x4, m16x8, m32x2,
m8x16, m8x32, m8x8, u16x16, u16x4, u16x8, u32x2, u32x4, m32x4, m32x8, m64x2, m64x4, m8x16, m8x32, m8x8, u16x16, u16x4, u16x8,
u32x8, u64x2, u64x4, u8x16, u8x32, u8x8}; u32x2, u32x4, u32x8, u64x2, u64x4, u8x16, u8x32, u8x8,
};
impl_from_bits_!( impl_from_bits_!(
__m64: u32x2, __m64: u32x2,

View File

@@ -25,20 +25,25 @@ extern "C" {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128)
#[inline] #[inline]
#[target_feature(enable = "pclmulqdq")] #[target_feature(enable = "pclmulqdq")]
#[cfg_attr(all(test, not(target_os = "linux")), #[cfg_attr(
assert_instr(pclmulqdq, imm8 = 0))] all(test, not(target_os = "linux")), assert_instr(pclmulqdq, imm8 = 0)
#[cfg_attr(all(test, target_os = "linux"), )]
assert_instr(pclmullqlqdq, imm8 = 0))] #[cfg_attr(
#[cfg_attr(all(test, target_os = "linux"), all(test, target_os = "linux"), assert_instr(pclmullqlqdq, imm8 = 0)
assert_instr(pclmulhqlqdq, imm8 = 1))] )]
#[cfg_attr(all(test, target_os = "linux"), #[cfg_attr(
assert_instr(pclmullqhqdq, imm8 = 16))] all(test, target_os = "linux"), assert_instr(pclmulhqlqdq, imm8 = 1)
#[cfg_attr(all(test, target_os = "linux"), )]
assert_instr(pclmulhqhqdq, imm8 = 17))] #[cfg_attr(
all(test, target_os = "linux"), assert_instr(pclmullqhqdq, imm8 = 16)
)]
#[cfg_attr(
all(test, target_os = "linux"), assert_instr(pclmulhqhqdq, imm8 = 17)
)]
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_clmulepi64_si128( pub unsafe fn _mm_clmulepi64_si128(
a: __m128i, b: __m128i, imm8: i32 a: __m128i, b: __m128i, imm8: i32,
) -> __m128i { ) -> __m128i {
macro_rules! call { macro_rules! call {
($imm8:expr) => { ($imm8:expr) => {

View File

@@ -1,4 +1,3 @@
//! RDRAND and RDSEED instructions for returning random numbers from an Intel //! RDRAND and RDSEED instructions for returning random numbers from an Intel
//! on-chip hardware random number generator which has been seeded by an //! on-chip hardware random number generator which has been seeded by an
//! on-chip entropy source. //! on-chip entropy source.

View File

@@ -75,7 +75,7 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha1rnds4_epu32( pub unsafe fn _mm_sha1rnds4_epu32(
a: __m128i, b: __m128i, func: i32 a: __m128i, b: __m128i, func: i32,
) -> __m128i { ) -> __m128i {
let a = a.as_i32x4(); let a = a.as_i32x4();
let b = b.as_i32x4(); let b = b.as_i32x4();
@@ -126,13 +126,9 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(sha256rnds2))] #[cfg_attr(test, assert_instr(sha256rnds2))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha256rnds2_epu32( pub unsafe fn _mm_sha256rnds2_epu32(
a: __m128i, b: __m128i, k: __m128i a: __m128i, b: __m128i, k: __m128i,
) -> __m128i { ) -> __m128i {
mem::transmute(sha256rnds2( mem::transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4()))
a.as_i32x4(),
b.as_i32x4(),
k.as_i32x4(),
))
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -230,8 +230,10 @@ pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
#[inline] #[inline]
#[target_feature(enable = "sse")] #[target_feature(enable = "sse")]
// i586 only seems to generate plain `and` instructions, so ignore it. // i586 only seems to generate plain `and` instructions, so ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), #[cfg_attr(
assert_instr(andps))] all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a); let a: __m128i = mem::transmute(a);
@@ -249,8 +251,10 @@ pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
#[target_feature(enable = "sse")] #[target_feature(enable = "sse")]
// i586 only seems to generate plain `not` and `and` instructions, so ignore // i586 only seems to generate plain `not` and `and` instructions, so ignore
// it. // it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), #[cfg_attr(
assert_instr(andnps))] all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andnps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a); let a: __m128i = mem::transmute(a);
@@ -265,8 +269,10 @@ pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
#[inline] #[inline]
#[target_feature(enable = "sse")] #[target_feature(enable = "sse")]
// i586 only seems to generate plain `or` instructions, so we ignore it. // i586 only seems to generate plain `or` instructions, so we ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), #[cfg_attr(
assert_instr(orps))] all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(orps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a); let a: __m128i = mem::transmute(a);
@@ -281,8 +287,10 @@ pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
#[inline] #[inline]
#[target_feature(enable = "sse")] #[target_feature(enable = "sse")]
// i586 only seems to generate plain `xor` instructions, so we ignore it. // i586 only seems to generate plain `xor` instructions, so we ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), #[cfg_attr(
assert_instr(xorps))] all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(xorps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a); let a: __m128i = mem::transmute(a);
@@ -1132,10 +1140,14 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))]
// 32-bit codegen does not generate `movhps` or `movhpd`, but instead // 32-bit codegen does not generate `movhps` or `movhpd`, but instead
// `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2). // `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
#[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"), #[cfg_attr(
assert_instr(movlhps))] all(test, target_arch = "x86", target_feature = "sse2"),
#[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")), assert_instr(movlhps)
assert_instr(unpcklps))] )]
#[cfg_attr(
all(test, target_arch = "x86", not(target_feature = "sse2")),
assert_instr(unpcklps)
)]
// TODO: This function is actually not limited to floats, but that's what // TODO: This function is actually not limited to floats, but that's what
// what matches the C type most closely: (__m128, *const __m64) -> __m128 // what matches the C type most closely: (__m128, *const __m64) -> __m128
pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
@@ -1185,11 +1197,15 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
// #[cfg_attr(test, assert_instr(movlps))] // #[cfg_attr(test, assert_instr(movlps))]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))]
// On 32-bit targets with SSE2, it just generates two `movsd`. // On 32-bit targets with SSE2, it just generates two `movsd`.
#[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"), #[cfg_attr(
assert_instr(movsd))] all(test, target_arch = "x86", target_feature = "sse2"),
assert_instr(movsd)
)]
// It should really generate "movlps", but oh well... // It should really generate "movlps", but oh well...
#[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")), #[cfg_attr(
assert_instr(movss))] all(test, target_arch = "x86", not(target_feature = "sse2")),
assert_instr(movss)
)]
// TODO: Like _mm_loadh_pi, this also isn't limited to floats. // TODO: Like _mm_loadh_pi, this also isn't limited to floats.
pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 { pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
let q = p as *const f32x2; let q = p as *const f32x2;
@@ -1321,8 +1337,10 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's // On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
// fine. // fine.
// On i586 (no SSE2) it just generates plain MOV instructions. // On i586 (no SSE2) it just generates plain MOV instructions.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), #[cfg_attr(
assert_instr(movhpd))] all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(movhpd)
)]
pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) { pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
{ {
@@ -1349,8 +1367,10 @@ pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
#[inline] #[inline]
#[target_feature(enable = "sse")] #[target_feature(enable = "sse")]
// On i586 the codegen just generates plane MOVs. No need to test for that. // On i586 the codegen just generates plane MOVs. No need to test for that.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")), #[cfg_attr(
assert_instr(movlps))] all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(movlps)
)]
pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) { pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
{ {
@@ -1929,7 +1949,7 @@ pub unsafe fn _mm_undefined_ps() -> __m128 {
#[target_feature(enable = "sse")] #[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_TRANSPOSE4_PS( pub unsafe fn _MM_TRANSPOSE4_PS(
row0: &mut __m128, row1: &mut __m128, row2: &mut __m128, row3: &mut __m128 row0: &mut __m128, row1: &mut __m128, row2: &mut __m128, row3: &mut __m128,
) { ) {
let tmp0 = _mm_unpacklo_ps(*row0, *row1); let tmp0 = _mm_unpacklo_ps(*row0, *row1);
let tmp2 = _mm_unpacklo_ps(*row2, *row3); let tmp2 = _mm_unpacklo_ps(*row2, *row3);
@@ -2734,12 +2754,8 @@ mod tests {
let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2)); let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
let e2: u32x4 = transmute(_mm_setr_ps( let e2: u32x4 =
transmute(0xffffffffu32), transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
2.0,
3.0,
4.0,
));
assert_eq!(r2, e2); assert_eq!(r2, e2);
} }
@@ -3453,22 +3469,9 @@ mod tests {
#[simd_test(enable = "sse")] #[simd_test(enable = "sse")]
unsafe fn test_mm_cvtss_si32() { unsafe fn test_mm_cvtss_si32() {
let inputs = &[ let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
42.0f32, let result =
-3.1, &[42i32, -3, i32::min_value(), 0, i32::min_value(), 2147483520];
4.0e10,
4.0e-20,
NAN,
2147483500.1,
];
let result = &[
42i32,
-3,
i32::min_value(),
0,
i32::min_value(),
2147483520,
];
for i in 0..inputs.len() { for i in 0..inputs.len() {
let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
let e = result[i]; let e = result[i];
@@ -3672,10 +3675,8 @@ mod tests {
} }
let r = _mm_load_ps(p); let r = _mm_load_ps(p);
let e = _mm_add_ps( let e =
_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
_mm_set1_ps(fixup),
);
assert_eq_m128(r, e); assert_eq_m128(r, e);
} }
@@ -3705,10 +3706,8 @@ mod tests {
} }
let r = _mm_loadr_ps(p); let r = _mm_loadr_ps(p);
let e = _mm_add_ps( let e =
_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
_mm_set1_ps(fixup),
);
assert_eq_m128(r, e); assert_eq_m128(r, e);
} }
@@ -3947,9 +3946,7 @@ mod tests {
#[simd_test(enable = "sse")] #[simd_test(enable = "sse")]
unsafe fn test_mm_stream_ps() { unsafe fn test_mm_stream_ps() {
let a = _mm_set1_ps(7.0); let a = _mm_set1_ps(7.0);
let mut mem = Memory { let mut mem = Memory { data: [-1.0; 4] };
data: [-1.0; 4],
};
_mm_stream_ps(&mut mem.data[0] as *mut f32, a); _mm_stream_ps(&mut mem.data[0] as *mut f32, a);
for i in 0..4 { for i in 0..4 {
@@ -4157,12 +4154,8 @@ mod tests {
#[simd_test(enable = "sse,mmx")] #[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_movemask_pi8() { unsafe fn test_mm_movemask_pi8() {
let a = _mm_setr_pi16( let a =
0b1000_0000, _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
0b0100_0000,
0b1000_0000,
0b0100_0000,
);
let r = _mm_movemask_pi8(a); let r = _mm_movemask_pi8(a);
assert_eq!(r, 0b10001); assert_eq!(r, 0b10001);

View File

@@ -1010,7 +1010,7 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
// no particular instruction to test // no particular instruction to test
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_set_epi16( pub unsafe fn _mm_set_epi16(
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16 e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
) -> __m128i { ) -> __m128i {
mem::transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) mem::transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
} }
@@ -1095,7 +1095,7 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
// no particular instruction to test // no particular instruction to test
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_setr_epi16( pub unsafe fn _mm_setr_epi16(
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16 e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
) -> __m128i { ) -> __m128i {
_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
} }
@@ -1134,10 +1134,15 @@ pub unsafe fn _mm_setzero_si128() -> __m128i {
#[inline] #[inline]
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
// FIXME movsd on windows // FIXME movsd on windows
#[cfg_attr(all(test, not(windows), #[cfg_attr(
all(
test,
not(windows),
not(all(target_os = "linux", target_arch = "x86_64")), not(all(target_os = "linux", target_arch = "x86_64")),
target_arch = "x86_64"), target_arch = "x86_64"
assert_instr(movq))] ),
assert_instr(movq)
)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
_mm_set_epi64x(0, simd_extract((*mem_addr).as_i64x2(), 0)) _mm_set_epi64x(0, simd_extract((*mem_addr).as_i64x2(), 0))
@@ -1190,7 +1195,7 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(maskmovdqu))] #[cfg_attr(test, assert_instr(maskmovdqu))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskmoveu_si128( pub unsafe fn _mm_maskmoveu_si128(
a: __m128i, mask: __m128i, mem_addr: *mut i8 a: __m128i, mask: __m128i, mem_addr: *mut i8,
) { ) {
maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
} }
@@ -1229,10 +1234,15 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
#[inline] #[inline]
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
// FIXME mov on windows, movlps on i686 // FIXME mov on windows, movlps on i686
#[cfg_attr(all(test, not(windows), #[cfg_attr(
all(
test,
not(windows),
not(all(target_os = "linux", target_arch = "x86_64")), not(all(target_os = "linux", target_arch = "x86_64")),
target_arch = "x86_64"), target_arch = "x86_64"
assert_instr(movq))] ),
assert_instr(movq)
)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
ptr::copy_nonoverlapping( ptr::copy_nonoverlapping(
@@ -1275,8 +1285,9 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
#[inline] #[inline]
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
// FIXME movd on windows, movd on i686 // FIXME movd on windows, movd on i686
#[cfg_attr(all(test, not(windows), target_arch = "x86_64"), #[cfg_attr(
assert_instr(movq))] all(test, not(windows), target_arch = "x86_64"), assert_instr(movq)
)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
let zero = _mm_setzero_si128(); let zero = _mm_setzero_si128();
@@ -1341,11 +1352,7 @@ pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert( mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16))
a.as_i16x8(),
(imm8 & 7) as u32,
i as i16,
))
} }
/// Return a mask of the most significant bit of each element in `a`. /// Return a mask of the most significant bit of each element in `a`.
@@ -1443,16 +1450,7 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
simd_shuffle8( simd_shuffle8(
a, a,
a, a,
[ [0, 1, 2, 3, $x01 + 4, $x23 + 4, $x45 + 4, $x67 + 4],
0,
1,
2,
3,
$x01 + 4,
$x23 + 4,
$x45 + 4,
$x67 + 4,
],
) )
}; };
} }
@@ -1567,9 +1565,7 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i8x16, _>(simd_shuffle16( mem::transmute::<i8x16, _>(simd_shuffle16(
a.as_i8x16(), a.as_i8x16(),
b.as_i8x16(), b.as_i8x16(),
[ [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31
],
)) ))
} }
@@ -1630,9 +1626,7 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i8x16, _>(simd_shuffle16( mem::transmute::<i8x16, _>(simd_shuffle16(
a.as_i8x16(), a.as_i8x16(),
b.as_i8x16(), b.as_i8x16(),
[ [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
],
)) ))
} }
@@ -1644,11 +1638,8 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(punpcklwd))] #[cfg_attr(test, assert_instr(punpcklwd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
let x = simd_shuffle8( let x =
a.as_i16x8(), simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
b.as_i16x8(),
[0, 8, 1, 9, 2, 10, 3, 11],
);
mem::transmute::<i16x8, _>(x) mem::transmute::<i16x8, _>(x)
} }
@@ -1947,11 +1938,7 @@ pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmpltsd))] #[cfg_attr(test, assert_instr(cmpltsd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert( simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
_mm_cmplt_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
} }
/// Return a new vector with the low element of `a` replaced by the /// Return a new vector with the low element of `a` replaced by the
@@ -1963,11 +1950,7 @@ pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmplesd))] #[cfg_attr(test, assert_instr(cmplesd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert( simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
_mm_cmple_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
} }
/// Return a new vector with the low element of `a` replaced by the result /// Return a new vector with the low element of `a` replaced by the result
@@ -2042,11 +2025,7 @@ pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmpnltsd))] #[cfg_attr(test, assert_instr(cmpnltsd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert( simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
_mm_cmpnlt_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
} }
/// Return a new vector with the low element of `a` replaced by the /// Return a new vector with the low element of `a` replaced by the
@@ -2058,11 +2037,7 @@ pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmpnlesd))] #[cfg_attr(test, assert_instr(cmpnlesd))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert( simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
_mm_cmpnle_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
} }
/// Compare corresponding elements in `a` and `b` for equality. /// Compare corresponding elements in `a` and `b` for equality.
@@ -2881,8 +2856,9 @@ pub unsafe fn _mm_undefined_si128() -> __m128i {
/// The resulting `__m128d` element is composed by the low-order values of /// The resulting `__m128d` element is composed by the low-order values of
/// the two `__m128d` interleaved input elements, i.e.: /// the two `__m128d` interleaved input elements, i.e.:
/// ///
/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
/// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
/// input
/// ///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
#[inline] #[inline]
@@ -3223,22 +3199,7 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_add_epi8() { unsafe fn test_mm_add_epi8() {
let a = _mm_setr_epi8( let a = _mm_setr_epi8(
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
); );
#[cfg_attr(rustfmt, rustfmt_skip)] #[cfg_attr(rustfmt, rustfmt_skip)]
let b = _mm_setr_epi8( let b = _mm_setr_epi8(
@@ -3290,22 +3251,7 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_adds_epi8() { unsafe fn test_mm_adds_epi8() {
let a = _mm_setr_epi8( let a = _mm_setr_epi8(
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
); );
#[cfg_attr(rustfmt, rustfmt_skip)] #[cfg_attr(rustfmt, rustfmt_skip)]
let b = _mm_setr_epi8( let b = _mm_setr_epi8(
@@ -3363,22 +3309,7 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_adds_epu8() { unsafe fn test_mm_adds_epu8() {
let a = _mm_setr_epi8( let a = _mm_setr_epi8(
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
); );
#[cfg_attr(rustfmt, rustfmt_skip)] #[cfg_attr(rustfmt, rustfmt_skip)]
let b = _mm_setr_epi8( let b = _mm_setr_epi8(
@@ -3629,22 +3560,7 @@ mod tests {
); );
let r = _mm_slli_si128(a, 1); let r = _mm_slli_si128(a, 1);
let e = _mm_setr_epi8( let e = _mm_setr_epi8(
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
); );
assert_eq_m128i(r, e); assert_eq_m128i(r, e);
@@ -3888,41 +3804,10 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpeq_epi8() { unsafe fn test_mm_cmpeq_epi8() {
let a = _mm_setr_epi8( let a = _mm_setr_epi8(
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
);
let b = _mm_setr_epi8(
15,
14,
2,
12,
11,
10,
9,
8,
7,
6,
5,
4,
3,
2,
1,
0,
); );
let b =
_mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm_cmpeq_epi8(a, b); let r = _mm_cmpeq_epi8(a, b);
#[cfg_attr(rustfmt, rustfmt_skip)] #[cfg_attr(rustfmt, rustfmt_skip)]
assert_eq_m128i( assert_eq_m128i(
@@ -4869,9 +4754,7 @@ mod tests {
pub data: [f64; 2], pub data: [f64; 2],
} }
let a = _mm_set1_pd(7.0); let a = _mm_set1_pd(7.0);
let mut mem = Memory { let mut mem = Memory { data: [-1.0; 2] };
data: [-1.0; 2],
};
_mm_stream_pd(&mut mem.data[0] as *mut f64, a); _mm_stream_pd(&mut mem.data[0] as *mut f64, a);
for i in 0..2 { for i in 0..2 {
@@ -4889,9 +4772,7 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_store_pd() { unsafe fn test_mm_store_pd() {
let mut mem = Memory { let mut mem = Memory { data: [0.0f64; 4] };
data: [0.0f64; 4],
};
let vals = &mut mem.data; let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0); let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr(); let d = vals.as_mut_ptr();
@@ -4903,9 +4784,7 @@ mod tests {
#[simd_test(enable = "sse")] #[simd_test(enable = "sse")]
unsafe fn test_mm_storeu_pd() { unsafe fn test_mm_storeu_pd() {
let mut mem = Memory { let mut mem = Memory { data: [0.0f64; 4] };
data: [0.0f64; 4],
};
let vals = &mut mem.data; let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0); let a = _mm_setr_pd(1.0, 2.0);
@@ -4929,9 +4808,7 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_store1_pd() { unsafe fn test_mm_store1_pd() {
let mut mem = Memory { let mut mem = Memory { data: [0.0f64; 4] };
data: [0.0f64; 4],
};
let vals = &mut mem.data; let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0); let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr(); let d = vals.as_mut_ptr();
@@ -4943,9 +4820,7 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_store_pd1() { unsafe fn test_mm_store_pd1() {
let mut mem = Memory { let mut mem = Memory { data: [0.0f64; 4] };
data: [0.0f64; 4],
};
let vals = &mut mem.data; let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0); let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr(); let d = vals.as_mut_ptr();
@@ -4957,9 +4832,7 @@ mod tests {
#[simd_test(enable = "sse2")] #[simd_test(enable = "sse2")]
unsafe fn test_mm_storer_pd() { unsafe fn test_mm_storer_pd() {
let mut mem = Memory { let mut mem = Memory { data: [0.0f64; 4] };
data: [0.0f64; 4],
};
let vals = &mut mem.data; let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0); let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr(); let d = vals.as_mut_ptr();
@@ -5013,10 +4886,7 @@ mod tests {
} }
let r = _mm_loadu_pd(d); let r = _mm_loadu_pd(d);
let e = _mm_add_pd( let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
_mm_setr_pd(1.0, 2.0),
_mm_set1_pd(offset as f64),
);
assert_eq_m128d(r, e); assert_eq_m128d(r, e);
} }
@@ -5091,12 +4961,8 @@ mod tests {
assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
let a = _mm_setr_ps( let a =
-1.1, _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
f32::NEG_INFINITY,
f32::MAX,
f32::NEG_INFINITY,
);
let b = _mm_setr_pd(f64::INFINITY, -5.0); let b = _mm_setr_pd(f64::INFINITY, -5.0);
let r = _mm_cvtsd_ss(a, b); let r = _mm_cvtsd_ss(a, b);
@@ -5161,12 +5027,8 @@ mod tests {
let r = _mm_cvttps_epi32(a); let r = _mm_cvttps_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
let a = _mm_setr_ps( let a =
f32::NEG_INFINITY, _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
f32::INFINITY,
f32::MIN,
f32::MAX,
);
let r = _mm_cvttps_epi32(a); let r = _mm_cvttps_epi32(a);
assert_eq_m128i( assert_eq_m128i(
r, r,

View File

@@ -66,13 +66,9 @@ pub const _MM_FROUND_NEARBYINT: i32 =
#[cfg_attr(test, assert_instr(pblendvb))] #[cfg_attr(test, assert_instr(pblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_epi8( pub unsafe fn _mm_blendv_epi8(
a: __m128i, b: __m128i, mask: __m128i a: __m128i, b: __m128i, mask: __m128i,
) -> __m128i { ) -> __m128i {
mem::transmute(pblendvb( mem::transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16()))
a.as_i8x16(),
b.as_i8x16(),
mask.as_i8x16(),
))
} }
/// Blend packed 16-bit integers from `a` and `b` using the mask `imm8`. /// Blend packed 16-bit integers from `a` and `b` using the mask `imm8`.
@@ -250,11 +246,7 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i { pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert( mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8))
a.as_i8x16(),
(imm8 & 0b1111) as u32,
i as i8,
))
} }
/// Return a copy of `a` with the 32-bit integer from `i` inserted at a /// Return a copy of `a` with the 32-bit integer from `i` inserted at a
@@ -267,11 +259,7 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
#[rustc_args_required_const(2)] #[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i { pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert( mem::transmute(simd_insert(a.as_i32x4(), (imm8 & 0b11) as u32, i))
a.as_i32x4(),
(imm8 & 0b11) as u32,
i,
))
} }
/// Compare packed 8-bit integers in `a` and `b` and return packed maximum /// Compare packed 8-bit integers in `a` and `b` and return packed maximum
@@ -1778,16 +1766,12 @@ mod tests {
} }
{ {
let a = _mm_setr_epi32( let a = _mm_setr_epi32(
15, 15, 2, /* ignored */
2, /* ignored */ 1234567, 4, /* ignored */
1234567,
4, /* ignored */
); );
let b = _mm_setr_epi32( let b = _mm_setr_epi32(
-20, -20, -256, /* ignored */
-256, /* ignored */ 666666, 666666, /* ignored */
666666,
666666, /* ignored */
); );
let r = _mm_mul_epi32(a, b); let r = _mm_mul_epi32(a, b);
let e = _mm_setr_epi64x(-300, 823043843622); let e = _mm_setr_epi64x(-300, 823043843622);

View File

@@ -439,7 +439,7 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrm( pub unsafe fn _mm_cmpestrm(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> __m128i { ) -> __m128i {
let a = a.as_i8x16(); let a = a.as_i8x16();
let b = b.as_i8x16(); let b = b.as_i8x16();
@@ -544,7 +544,7 @@ pub unsafe fn _mm_cmpestrm(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestri( pub unsafe fn _mm_cmpestri(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 { ) -> i32 {
let a = a.as_i8x16(); let a = a.as_i8x16();
let b = b.as_i8x16(); let b = b.as_i8x16();
@@ -567,7 +567,7 @@ pub unsafe fn _mm_cmpestri(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrz( pub unsafe fn _mm_cmpestrz(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 { ) -> i32 {
let a = a.as_i8x16(); let a = a.as_i8x16();
let b = b.as_i8x16(); let b = b.as_i8x16();
@@ -590,7 +590,7 @@ pub unsafe fn _mm_cmpestrz(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrc( pub unsafe fn _mm_cmpestrc(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 { ) -> i32 {
let a = a.as_i8x16(); let a = a.as_i8x16();
let b = b.as_i8x16(); let b = b.as_i8x16();
@@ -613,7 +613,7 @@ pub unsafe fn _mm_cmpestrc(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrs( pub unsafe fn _mm_cmpestrs(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 { ) -> i32 {
let a = a.as_i8x16(); let a = a.as_i8x16();
let b = b.as_i8x16(); let b = b.as_i8x16();
@@ -636,7 +636,7 @@ pub unsafe fn _mm_cmpestrs(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestro( pub unsafe fn _mm_cmpestro(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 { ) -> i32 {
let a = a.as_i8x16(); let a = a.as_i8x16();
let b = b.as_i8x16(); let b = b.as_i8x16();
@@ -660,7 +660,7 @@ pub unsafe fn _mm_cmpestro(
#[rustc_args_required_const(4)] #[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestra( pub unsafe fn _mm_cmpestra(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 { ) -> i32 {
let a = a.as_i8x16(); let a = a.as_i8x16();
let b = b.as_i8x16(); let b = b.as_i8x16();
@@ -917,13 +917,8 @@ mod tests {
unsafe fn test_mm_cmpestra() { unsafe fn test_mm_cmpestra() {
let a = str_to_m128i(b"Cannot match a"); let a = str_to_m128i(b"Cannot match a");
let b = str_to_m128i(b"Null after 14"); let b = str_to_m128i(b"Null after 14");
let i = _mm_cmpestra( let i =
a, _mm_cmpestra(a, 14, b, 16, _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK);
14,
b,
16,
_SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK,
);
assert_eq!(1, i); assert_eq!(1, i);
} }

View File

@@ -25,8 +25,8 @@ extern "C" {
/// Extracts the bit range specified by `y` from the lower 64 bits of `x`. /// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
/// ///
/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The /// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
/// `[5:0]` bits of `y` specify the length of the bit-range to extract. All other /// `[5:0]` bits of `y` specify the length of the bit-range to extract. All
/// bits are ignored. /// other bits are ignored.
/// ///
/// If the length is zero, it is interpreted as `64`. If the length and index /// If the length is zero, it is interpreted as `64`. If the length and index
/// are zero, the lower 64 bits of `x` are extracted. /// are zero, the lower 64 bits of `x` are extracted.

View File

@@ -596,24 +596,8 @@ mod tests {
12, 5, 5, 10, 12, 5, 5, 10,
4, 1, 8, 0, 4, 1, 8, 0,
); );
let expected = _mm_setr_epi8( let expected =
5, _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
0,
5,
4,
9,
13,
7,
4,
13,
6,
6,
11,
5,
2,
9,
1,
);
let r = _mm_shuffle_epi8(a, b); let r = _mm_shuffle_epi8(a, b);
assert_eq_m128i(r, expected); assert_eq_m128i(r, expected);
} }

View File

@@ -121,7 +121,7 @@ mod x86_polyfill {
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub unsafe fn _mm256_insert_epi64( pub unsafe fn _mm256_insert_epi64(
a: __m256i, val: i64, idx: i32 a: __m256i, val: i64, idx: i32,
) -> __m256i { ) -> __m256i {
union A { union A {
a: __m256i, a: __m256i,

View File

@@ -38,11 +38,7 @@ extern "C" {
#[cfg_attr(test, assert_instr(xsave))] #[cfg_attr(test, assert_instr(xsave))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
xsave( xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial restore of the enabled processor states using /// Perform a full or partial restore of the enabled processor states using
@@ -110,11 +106,7 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
#[cfg_attr(test, assert_instr(xsaveopt))] #[cfg_attr(test, assert_instr(xsaveopt))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
xsaveopt( xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial save of the enabled processor states to memory /// Perform a full or partial save of the enabled processor states to memory
@@ -130,11 +122,7 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsavec))] #[cfg_attr(test, assert_instr(xsavec))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
xsavec( xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial save of the enabled processor states to memory at /// Perform a full or partial save of the enabled processor states to memory at
@@ -151,11 +139,7 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsaves))] #[cfg_attr(test, assert_instr(xsaves))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
xsaves( xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial restore of the enabled processor states using the /// Perform a full or partial restore of the enabled processor states using the
@@ -196,9 +180,7 @@ mod tests {
impl XsaveArea { impl XsaveArea {
fn new() -> XsaveArea { fn new() -> XsaveArea {
XsaveArea { XsaveArea { data: [0; 2560] }
data: [0; 2560],
}
} }
fn ptr(&mut self) -> *mut u8 { fn ptr(&mut self) -> *mut u8 {
&mut self.data[0] as *mut _ as *mut u8 &mut self.data[0] as *mut _ as *mut u8

View File

@@ -28,8 +28,8 @@ pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
/// Extracts bits of `a` specified by `control` into /// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result. /// the least significant bits of the result.
/// ///
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to /// Bits `[7,0]` of `control` specify the index to the first bit in the range
/// be extracted, and bits `[15,8]` specify the length of the range. /// to be extracted, and bits `[15,8]` specify the length of the range.
/// ///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64) /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64)
#[inline] #[inline]

View File

@@ -58,7 +58,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use coresimd::x86_64::*; use coresimd::x86_64::*;
use std::{fmt, cmp::PartialEq}; use std::{cmp::PartialEq, fmt};
use stdsimd_test::simd_test; use stdsimd_test::simd_test;
#[repr(align(16))] #[repr(align(16))]

View File

@@ -36,11 +36,7 @@ extern "C" {
#[cfg_attr(test, assert_instr(xsave64))] #[cfg_attr(test, assert_instr(xsave64))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
xsave64( xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial restore of the enabled processor states using /// Perform a full or partial restore of the enabled processor states using
@@ -73,11 +69,7 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
#[cfg_attr(test, assert_instr(xsaveopt64))] #[cfg_attr(test, assert_instr(xsaveopt64))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
xsaveopt64( xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial save of the enabled processor states to memory /// Perform a full or partial save of the enabled processor states to memory
@@ -93,11 +85,7 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsavec64))] #[cfg_attr(test, assert_instr(xsavec64))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
xsavec64( xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial save of the enabled processor states to memory at /// Perform a full or partial save of the enabled processor states to memory at
@@ -114,11 +102,7 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsaves64))] #[cfg_attr(test, assert_instr(xsaves64))]
#[stable(feature = "simd_x86", since = "1.27.0")] #[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) { pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
xsaves64( xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
} }
/// Perform a full or partial restore of the enabled processor states using the /// Perform a full or partial restore of the enabled processor states using the

View File

@@ -21,7 +21,7 @@ use proc_macro2::TokenStream;
#[proc_macro_attribute] #[proc_macro_attribute]
pub fn assert_instr( pub fn assert_instr(
attr: proc_macro::TokenStream, item: proc_macro::TokenStream attr: proc_macro::TokenStream, item: proc_macro::TokenStream,
) -> proc_macro::TokenStream { ) -> proc_macro::TokenStream {
let invoc = syn::parse::<Invoc>(attr) let invoc = syn::parse::<Invoc>(attr)
.expect("expected #[assert_instr(instr, a = b, ...)]"); .expect("expected #[assert_instr(instr, a = b, ...)]");
@@ -36,9 +36,10 @@ pub fn assert_instr(
let name = &func.ident; let name = &func.ident;
// Disable assert_instr for x86 targets compiled with avx enabled, which // Disable assert_instr for x86 targets compiled with avx enabled, which
// causes LLVM to generate different intrinsics that the ones we are testing // causes LLVM to generate different intrinsics that the ones we are
// for. // testing for.
let disable_assert_instr = std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok(); let disable_assert_instr =
std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok();
let maybe_ignore = if cfg!(optimized) && !disable_assert_instr { let maybe_ignore = if cfg!(optimized) && !disable_assert_instr {
TokenStream::new() TokenStream::new()
} else { } else {
@@ -72,11 +73,7 @@ pub fn assert_instr(
syn::Pat::Ident(ref i) => &i.ident, syn::Pat::Ident(ref i) => &i.ident,
_ => panic!("must have bare arguments"), _ => panic!("must have bare arguments"),
}; };
match invoc match invoc.args.iter().find(|a| *ident == a.0) {
.args
.iter()
.find(|a| *ident == a.0)
{
Some(&(_, ref tts)) => { Some(&(_, ref tts)) => {
input_vals.push(quote! { #tts }); input_vals.push(quote! { #tts });
} }
@@ -87,7 +84,8 @@ pub fn assert_instr(
}; };
} }
let attrs = func.attrs let attrs = func
.attrs
.iter() .iter()
.filter(|attr| { .filter(|attr| {
attr.path attr.path
@@ -142,9 +140,8 @@ pub fn assert_instr(
} }
}.into(); }.into();
// why? necessary now to get tests to work? // why? necessary now to get tests to work?
let tts: TokenStream = tts.to_string() let tts: TokenStream =
.parse() tts.to_string().parse().expect("cannot parse tokenstream");
.expect("cannot parse tokenstream");
let tts: TokenStream = quote! { let tts: TokenStream = quote! {
#item #item

View File

@@ -1,8 +1,5 @@
use std::env; use std::env;
fn main() { fn main() {
println!( println!("cargo:rustc-env=TARGET={}", env::var("TARGET").unwrap());
"cargo:rustc-env=TARGET={}",
env::var("TARGET").unwrap()
);
} }

View File

@@ -9,29 +9,35 @@
#![cfg_attr(stdsimd_strict, deny(warnings))] #![cfg_attr(stdsimd_strict, deny(warnings))]
#![allow(dead_code)] #![allow(dead_code)]
#![allow(unused_features)] #![allow(unused_features)]
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, #![feature(
simd_ffi, asm, proc_macro_gen, const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi,
integer_atomics, stmt_expr_attributes, core_intrinsics, asm, proc_macro_gen, integer_atomics, stmt_expr_attributes,
crate_in_paths, no_core, attr_literals, rustc_attrs, stdsimd, core_intrinsics, crate_in_paths, no_core, attr_literals, rustc_attrs,
staged_api, core_float, core_slice_ext, align_offset, stdsimd, staged_api, core_float, core_slice_ext, align_offset, doc_cfg,
doc_cfg, mmx_target_feature, tbm_target_feature, mmx_target_feature, tbm_target_feature, sse4a_target_feature,
sse4a_target_feature, arm_target_feature, aarch64_target_feature, arm_target_feature, aarch64_target_feature, mips_target_feature,
mips_target_feature, powerpc_target_feature)] powerpc_target_feature
#![cfg_attr(test, )]
feature(proc_macro, test, attr_literals, abi_vectorcall, #![cfg_attr(
untagged_unions))] test,
#![cfg_attr(feature = "cargo-clippy", feature(proc_macro, test, attr_literals, abi_vectorcall, untagged_unions)
allow(inline_always, too_many_arguments, cast_sign_loss, )]
cast_lossless, cast_possible_wrap, #![cfg_attr(
cast_possible_truncation, cast_precision_loss, feature = "cargo-clippy",
allow(
inline_always, too_many_arguments, cast_sign_loss, cast_lossless,
cast_possible_wrap, cast_possible_truncation, cast_precision_loss,
shadow_reuse, cyclomatic_complexity, similar_names, shadow_reuse, cyclomatic_complexity, similar_names,
many_single_char_names))] many_single_char_names
)
)]
#![cfg_attr(test, allow(unused_imports))] #![cfg_attr(test, allow(unused_imports))]
#![no_core] #![no_core]
#![unstable(feature = "stdsimd", issue = "27731")] #![unstable(feature = "stdsimd", issue = "27731")]
#![doc(test(attr(deny(warnings))), #![doc(
test(attr(allow(dead_code, deprecated, unused_variables, test(attr(deny(warnings))),
unused_mut))))] test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
)]
#[cfg_attr(not(test), macro_use)] #[cfg_attr(not(test), macro_use)]
extern crate core as _core; extern crate core as _core;

View File

@@ -1,7 +1,9 @@
#![feature(stdsimd)] #![feature(stdsimd)]
#![cfg_attr(stdsimd_strict, deny(warnings))] #![cfg_attr(stdsimd_strict, deny(warnings))]
#![cfg_attr(feature = "cargo-clippy", #![cfg_attr(
allow(option_unwrap_used, print_stdout, use_debug))] feature = "cargo-clippy",
allow(option_unwrap_used, print_stdout, use_debug)
)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use] #[macro_use]
@@ -14,53 +16,20 @@ fn x86_all() {
println!("sse2: {:?}", is_x86_feature_detected!("sse2")); println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
println!("sse3: {:?}", is_x86_feature_detected!("sse3")); println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
println!( println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
"sse4.1: {:?}", println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
is_x86_feature_detected!("sse4.1")
);
println!(
"sse4.2: {:?}",
is_x86_feature_detected!("sse4.2")
);
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
println!("avx: {:?}", is_x86_feature_detected!("avx")); println!("avx: {:?}", is_x86_feature_detected!("avx"));
println!("avx2: {:?}", is_x86_feature_detected!("avx2")); println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
println!( println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
"avx512f {:?}", println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
is_x86_feature_detected!("avx512f") println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
); println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
println!( println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
"avx512cd {:?}", println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
is_x86_feature_detected!("avx512cd") println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
); println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
println!( println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
"avx512er {:?}",
is_x86_feature_detected!("avx512er")
);
println!(
"avx512pf {:?}",
is_x86_feature_detected!("avx512pf")
);
println!(
"avx512bw {:?}",
is_x86_feature_detected!("avx512bw")
);
println!(
"avx512dq {:?}",
is_x86_feature_detected!("avx512dq")
);
println!(
"avx512vl {:?}",
is_x86_feature_detected!("avx512vl")
);
println!(
"avx512_ifma {:?}",
is_x86_feature_detected!("avx512ifma")
);
println!(
"avx512_vbmi {:?}",
is_x86_feature_detected!("avx512vbmi")
);
println!( println!(
"avx512_vpopcntdq {:?}", "avx512_vpopcntdq {:?}",
is_x86_feature_detected!("avx512vpopcntdq") is_x86_feature_detected!("avx512vpopcntdq")
@@ -70,23 +39,11 @@ fn x86_all() {
println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
println!("tbm: {:?}", is_x86_feature_detected!("tbm")); println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
println!( println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
"popcnt: {:?}",
is_x86_feature_detected!("popcnt")
);
println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
println!("xsave: {:?}", is_x86_feature_detected!("xsave")); println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
println!( println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
"xsaveopt: {:?}", println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
is_x86_feature_detected!("xsaveopt") println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
);
println!(
"xsaves: {:?}",
is_x86_feature_detected!("xsaves")
);
println!(
"xsavec: {:?}",
is_x86_feature_detected!("xsavec")
);
} }

View File

@@ -253,11 +253,7 @@ macro_rules! product_nan_test {
} }
} }
let v = $id::splat(n0); let v = $id::splat(n0);
assert!( assert!(v.product().is_nan(), "all nans | {:?}", v);
v.product().is_nan(),
"all nans | {:?}",
v
);
} }
unsafe { test_fn() }; unsafe { test_fn() };
} }
@@ -355,8 +351,7 @@ mod offset {
// tolerate 1 ULP difference: // tolerate 1 ULP difference:
if vsum.as_int() > tsum.as_int() { if vsum.as_int() > tsum.as_int() {
assert!( assert!(
vsum.as_int() - tsum.as_int() vsum.as_int() - tsum.as_int() < 2,
< 2,
"v: {:?} | vsum: {} | tsum: {}", "v: {:?} | vsum: {} | tsum: {}",
v, v,
vsum, vsum,
@@ -364,8 +359,7 @@ mod offset {
); );
} else { } else {
assert!( assert!(
tsum.as_int() - vsum.as_int() tsum.as_int() - vsum.as_int() < 2,
< 2,
"v: {:?} | vsum: {} | tsum: {}", "v: {:?} | vsum: {} | tsum: {}",
v, v,
vsum, vsum,

View File

@@ -12,7 +12,7 @@ extern crate quote;
use std::env; use std::env;
use proc_macro2::{Literal, Span, Ident, TokenStream, TokenTree}; use proc_macro2::{Ident, Literal, Span, TokenStream, TokenTree};
fn string(s: &str) -> TokenTree { fn string(s: &str) -> TokenTree {
Literal::string(s).into() Literal::string(s).into()
@@ -20,11 +20,9 @@ fn string(s: &str) -> TokenTree {
#[proc_macro_attribute] #[proc_macro_attribute]
pub fn simd_test( pub fn simd_test(
attr: proc_macro::TokenStream, item: proc_macro::TokenStream attr: proc_macro::TokenStream, item: proc_macro::TokenStream,
) -> proc_macro::TokenStream { ) -> proc_macro::TokenStream {
let tokens = TokenStream::from(attr) let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
.into_iter()
.collect::<Vec<_>>();
if tokens.len() != 3 { if tokens.len() != 3 {
panic!("expected #[simd_test(enable = \"feature\")]"); panic!("expected #[simd_test(enable = \"feature\")]");
} }
@@ -53,18 +51,19 @@ pub fn simd_test(
let item = TokenStream::from(item); let item = TokenStream::from(item);
let name = find_name(item.clone()); let name = find_name(item.clone());
let name: TokenStream = name.to_string().parse().expect(&format!( let name: TokenStream = name
"failed to parse name: {}", .to_string()
name.to_string() .parse()
)); .expect(&format!("failed to parse name: {}", name.to_string()));
let target = env::var("TARGET") let target = env::var("TARGET")
.expect("TARGET environment variable should be set for rustc"); .expect("TARGET environment variable should be set for rustc");
let mut force_test = false; let mut force_test = false;
let macro_test = match target.split('-').next().expect(&format!( let macro_test = match target
"target triple contained no \"-\": {}", .split('-')
target .next()
)) { .expect(&format!("target triple contained no \"-\": {}", target))
{
"i686" | "x86_64" | "i586" => "is_x86_feature_detected", "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
"arm" | "armv7" => "is_arm_feature_detected", "arm" | "armv7" => "is_arm_feature_detected",
"aarch64" => "is_aarch64_feature_detected", "aarch64" => "is_aarch64_feature_detected",

View File

@@ -5,8 +5,10 @@
//! assertions about the disassembly of a function. //! assertions about the disassembly of a function.
#![feature(proc_macro)] #![feature(proc_macro)]
#![cfg_attr(feature = "cargo-clippy", #![cfg_attr(
allow(missing_docs_in_private_items, print_stdout))] feature = "cargo-clippy",
allow(missing_docs_in_private_items, print_stdout)
)]
extern crate assert_instr_macro; extern crate assert_instr_macro;
extern crate backtrace; extern crate backtrace;
@@ -25,7 +27,8 @@ pub use assert_instr_macro::*;
pub use simd_test_macro::*; pub use simd_test_macro::*;
lazy_static! { lazy_static! {
static ref DISASSEMBLY: HashMap<String, Vec<Function>> = disassemble_myself(); static ref DISASSEMBLY: HashMap<String, Vec<Function>> =
disassemble_myself();
} }
struct Function { struct Function {
@@ -39,14 +42,16 @@ struct Instruction {
fn disassemble_myself() -> HashMap<String, Vec<Function>> { fn disassemble_myself() -> HashMap<String, Vec<Function>> {
let me = env::current_exe().expect("failed to get current exe"); let me = env::current_exe().expect("failed to get current exe");
if cfg!(target_arch = "x86_64") && cfg!(target_os = "windows") if cfg!(target_arch = "x86_64")
&& cfg!(target_os = "windows")
&& cfg!(target_env = "msvc") && cfg!(target_env = "msvc")
{ {
let mut cmd = cc::windows_registry::find( let mut cmd = cc::windows_registry::find(
"x86_64-pc-windows-msvc", "x86_64-pc-windows-msvc",
"dumpbin.exe", "dumpbin.exe",
).expect("failed to find `dumpbin` tool"); ).expect("failed to find `dumpbin` tool");
let output = cmd.arg("/DISASM") let output = cmd
.arg("/DISASM")
.arg(&me) .arg(&me)
.output() .output()
.expect("failed to execute dumpbin"); .expect("failed to execute dumpbin");
@@ -257,9 +262,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
// in the disassembly. // in the disassembly.
let mut sym = None; let mut sym = None;
backtrace::resolve(fnptr as *mut _, |name| { backtrace::resolve(fnptr as *mut _, |name| {
sym = name.name() sym = name.name().and_then(|s| s.as_str()).map(normalize);
.and_then(|s| s.as_str())
.map(normalize);
}); });
let functions = let functions =
@@ -270,26 +273,17 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
println!("assumed symbol name: `{}`", sym); println!("assumed symbol name: `{}`", sym);
} }
println!("maybe related functions"); println!("maybe related functions");
for f in DISASSEMBLY for f in DISASSEMBLY.keys().filter(|k| k.contains(fnname)) {
.keys()
.filter(|k| k.contains(fnname))
{
println!("\t- {}", f); println!("\t- {}", f);
} }
panic!( panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname);
"failed to find disassembly of {:#x} ({})",
fnptr, fnname
);
}; };
assert_eq!(functions.len(), 1); assert_eq!(functions.len(), 1);
let function = &functions[0]; let function = &functions[0];
let mut instrs = &function.instrs[..]; let mut instrs = &function.instrs[..];
while instrs while instrs.last().map_or(false, |s| s.parts == ["nop"]) {
.last()
.map_or(false, |s| s.parts == ["nop"])
{
instrs = &instrs[..instrs.len() - 1]; instrs = &instrs[..instrs.len() - 1];
} }
@@ -400,10 +394,7 @@ pub fn assert_skip_test_ok(name: &str) {
if env::var("STDSIMD_TEST_EVERYTHING").is_err() { if env::var("STDSIMD_TEST_EVERYTHING").is_err() {
return; return;
} }
panic!( panic!("skipped test `{}` when it shouldn't be skipped", name);
"skipped test `{}` when it shouldn't be skipped",
name
);
} }
// See comment in `assert-instr-macro` crate for why this exists // See comment in `assert-instr-macro` crate for why this exists

View File

@@ -1,9 +1,12 @@
#![feature(proc_macro)] #![feature(proc_macro)]
#![allow(bad_style)] #![allow(bad_style)]
#![cfg_attr(feature = "cargo-clippy", #![cfg_attr(
allow(shadow_reuse, cast_lossless, match_same_arms, feature = "cargo-clippy",
nonminimal_bool, print_stdout, use_debug, eq_op, allow(
useless_format))] shadow_reuse, cast_lossless, match_same_arms, nonminimal_bool,
print_stdout, use_debug, eq_op, useless_format
)
)]
#[macro_use] #[macro_use]
extern crate serde_derive; extern crate serde_derive;
@@ -249,10 +252,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
.flat_map(|c| c.to_lowercase()) .flat_map(|c| c.to_lowercase())
.collect::<String>(); .collect::<String>();
let rust_feature = rust.target_feature.expect(&format!( let rust_feature = rust
"no target feature listed for {}", .target_feature
rust.name .expect(&format!("no target feature listed for {}", rust.name));
));
if rust_feature.contains(&cpuid) { if rust_feature.contains(&cpuid) {
continue; continue;
} }
@@ -314,24 +316,19 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
if rust.arguments.len() != intel.parameters.len() { if rust.arguments.len() != intel.parameters.len() {
bail!("wrong number of arguments on {}", rust.name) bail!("wrong number of arguments on {}", rust.name)
} }
for (i, (a, b)) in intel for (i, (a, b)) in
.parameters intel.parameters.iter().zip(rust.arguments).enumerate()
.iter()
.zip(rust.arguments)
.enumerate()
{ {
let is_const = rust.required_const.contains(&i); let is_const = rust.required_const.contains(&i);
equate(b, &a.type_, &intel.name, is_const)?; equate(b, &a.type_, &intel.name, is_const)?;
} }
} }
let any_i64 = rust.arguments let any_i64 = rust.arguments.iter().cloned().chain(rust.ret).any(|arg| {
.iter() match *arg {
.cloned()
.chain(rust.ret)
.any(|arg| match *arg {
Type::PrimSigned(64) | Type::PrimUnsigned(64) => true, Type::PrimSigned(64) | Type::PrimUnsigned(64) => true,
_ => false, _ => false,
}
}); });
let any_i64_exempt = match rust.name { let any_i64_exempt = match rust.name {
// These intrinsics have all been manually verified against Clang's // These intrinsics have all been manually verified against Clang's
@@ -363,7 +360,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
} }
fn equate( fn equate(
t: &Type, intel: &str, intrinsic: &str, is_const: bool t: &Type, intel: &str, intrinsic: &str, is_const: bool,
) -> Result<(), String> { ) -> Result<(), String> {
let intel = intel.replace(" *", "*"); let intel = intel.replace(" *", "*");
let intel = intel.replace(" const*", "*"); let intel = intel.replace(" const*", "*");
@@ -371,9 +368,7 @@ fn equate(
if is_const { if is_const {
return Ok(()); return Ok(());
} }
Err(format!( Err(format!("argument required to be const but isn't"))
"argument required to be const but isn't"
))
}; };
match (t, &intel[..]) { match (t, &intel[..]) {
(&Type::PrimFloat(32), "float") => {} (&Type::PrimFloat(32), "float") => {}

View File

@@ -1,11 +1,20 @@
#![feature(stdsimd)] #![feature(stdsimd)]
#![cfg_attr(stdsimd_strict, deny(warnings))] #![cfg_attr(stdsimd_strict, deny(warnings))]
#![cfg_attr(feature = "cargo-clippy", #![cfg_attr(
allow(option_unwrap_used, use_debug, print_stdout))] feature = "cargo-clippy",
allow(option_unwrap_used, use_debug, print_stdout)
)]
#[cfg(any(target_arch = "arm", target_arch = "aarch64", #[cfg(
target_arch = "x86", target_arch = "x86_64", any(
target_arch = "powerpc", target_arch = "powerpc64"))] target_arch = "arm",
target_arch = "aarch64",
target_arch = "x86",
target_arch = "x86_64",
target_arch = "powerpc",
target_arch = "powerpc64"
)
)]
#[macro_use] #[macro_use]
extern crate stdsimd; extern crate stdsimd;
@@ -22,51 +31,30 @@ fn aarch64_linux() {
println!("fp: {}", is_aarch64_feature_detected!("fp")); println!("fp: {}", is_aarch64_feature_detected!("fp"));
println!("fp16: {}", is_aarch64_feature_detected!("fp16")); println!("fp16: {}", is_aarch64_feature_detected!("fp16"));
println!("neon: {}", is_aarch64_feature_detected!("neon")); println!("neon: {}", is_aarch64_feature_detected!("neon"));
println!( println!("asimd: {}", is_aarch64_feature_detected!("asimd"));
"asimd: {}",
is_aarch64_feature_detected!("asimd")
);
println!("sve: {}", is_aarch64_feature_detected!("sve")); println!("sve: {}", is_aarch64_feature_detected!("sve"));
println!("crc: {}", is_aarch64_feature_detected!("crc")); println!("crc: {}", is_aarch64_feature_detected!("crc"));
println!( println!("crypto: {}", is_aarch64_feature_detected!("crypto"));
"crypto: {}",
is_aarch64_feature_detected!("crypto")
);
println!("lse: {}", is_aarch64_feature_detected!("lse")); println!("lse: {}", is_aarch64_feature_detected!("lse"));
println!("rdm: {}", is_aarch64_feature_detected!("rdm")); println!("rdm: {}", is_aarch64_feature_detected!("rdm"));
println!("rcpc: {}", is_aarch64_feature_detected!("rcpc")); println!("rcpc: {}", is_aarch64_feature_detected!("rcpc"));
println!( println!("dotprod: {}", is_aarch64_feature_detected!("dotprod"));
"dotprod: {}",
is_aarch64_feature_detected!("dotprod")
);
} }
#[test] #[test]
#[cfg(all(target_arch = "powerpc", target_os = "linux"))] #[cfg(all(target_arch = "powerpc", target_os = "linux"))]
fn powerpc_linux() { fn powerpc_linux() {
println!( println!("altivec: {}", is_powerpc_feature_detected!("altivec"));
"altivec: {}",
is_powerpc_feature_detected!("altivec")
);
println!("vsx: {}", is_powerpc_feature_detected!("vsx")); println!("vsx: {}", is_powerpc_feature_detected!("vsx"));
println!( println!("power8: {}", is_powerpc_feature_detected!("power8"));
"power8: {}",
is_powerpc_feature_detected!("power8")
);
} }
#[test] #[test]
#[cfg(all(target_arch = "powerpc64", target_os = "linux"))] #[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
fn powerpc64_linux() { fn powerpc64_linux() {
println!( println!("altivec: {}", is_powerpc64_feature_detected!("altivec"));
"altivec: {}",
is_powerpc64_feature_detected!("altivec")
);
println!("vsx: {}", is_powerpc64_feature_detected!("vsx")); println!("vsx: {}", is_powerpc64_feature_detected!("vsx"));
println!( println!("power8: {}", is_powerpc64_feature_detected!("power8"));
"power8: {}",
is_powerpc64_feature_detected!("power8")
);
} }
#[test] #[test]
@@ -76,54 +64,21 @@ fn x86_all() {
println!("sse2: {:?}", is_x86_feature_detected!("sse2")); println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
println!("sse3: {:?}", is_x86_feature_detected!("sse3")); println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
println!( println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
"sse4.1: {:?}", println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
is_x86_feature_detected!("sse4.1")
);
println!(
"sse4.2: {:?}",
is_x86_feature_detected!("sse4.2")
);
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
println!("sha: {:?}", is_x86_feature_detected!("sha")); println!("sha: {:?}", is_x86_feature_detected!("sha"));
println!("avx: {:?}", is_x86_feature_detected!("avx")); println!("avx: {:?}", is_x86_feature_detected!("avx"));
println!("avx2: {:?}", is_x86_feature_detected!("avx2")); println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
println!( println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
"avx512f {:?}", println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
is_x86_feature_detected!("avx512f") println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
); println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
println!( println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
"avx512cd {:?}", println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
is_x86_feature_detected!("avx512cd") println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
); println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
println!( println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
"avx512er {:?}",
is_x86_feature_detected!("avx512er")
);
println!(
"avx512pf {:?}",
is_x86_feature_detected!("avx512pf")
);
println!(
"avx512bw {:?}",
is_x86_feature_detected!("avx512bw")
);
println!(
"avx512dq {:?}",
is_x86_feature_detected!("avx512dq")
);
println!(
"avx512vl {:?}",
is_x86_feature_detected!("avx512vl")
);
println!(
"avx512_ifma {:?}",
is_x86_feature_detected!("avx512ifma")
);
println!(
"avx512_vbmi {:?}",
is_x86_feature_detected!("avx512vbmi")
);
println!( println!(
"avx512_vpopcntdq {:?}", "avx512_vpopcntdq {:?}",
is_x86_feature_detected!("avx512vpopcntdq") is_x86_feature_detected!("avx512vpopcntdq")
@@ -133,23 +88,11 @@ fn x86_all() {
println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
println!("tbm: {:?}", is_x86_feature_detected!("tbm")); println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
println!( println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
"popcnt: {:?}",
is_x86_feature_detected!("popcnt")
);
println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
println!("xsave: {:?}", is_x86_feature_detected!("xsave")); println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
println!( println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
"xsaveopt: {:?}", println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
is_x86_feature_detected!("xsaveopt") println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
);
println!(
"xsaves: {:?}",
is_x86_feature_detected!("xsaves")
);
println!(
"xsavec: {:?}",
is_x86_feature_detected!("xsavec")
);
} }

View File

@@ -14,10 +14,13 @@
#![feature(stdsimd)] #![feature(stdsimd)]
#![cfg_attr(test, feature(test))] #![cfg_attr(test, feature(test))]
#![cfg_attr(feature = "cargo-clippy", #![cfg_attr(
allow(result_unwrap_used, print_stdout, option_unwrap_used, feature = "cargo-clippy",
shadow_reuse, cast_possible_wrap, cast_sign_loss, allow(
missing_docs_in_private_items))] result_unwrap_used, print_stdout, option_unwrap_used, shadow_reuse,
cast_possible_wrap, cast_sign_loss, missing_docs_in_private_items
)
)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use] #[macro_use]
@@ -68,7 +71,7 @@ fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_avx2<'a>( unsafe fn hex_encode_avx2<'a>(
mut src: &[u8], dst: &'a mut [u8] mut src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> { ) -> Result<&'a str, usize> {
let ascii_zero = _mm256_set1_epi8(b'0' as i8); let ascii_zero = _mm256_set1_epi8(b'0' as i8);
let nines = _mm256_set1_epi8(9); let nines = _mm256_set1_epi8(9);
@@ -115,16 +118,14 @@ unsafe fn hex_encode_avx2<'a>(
let i = i as usize; let i = i as usize;
let _ = hex_encode_sse41(src, &mut dst[i * 2..]); let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
Ok(str::from_utf8_unchecked( Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
&dst[..src.len() * 2 + i * 2],
))
} }
// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp // copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
#[target_feature(enable = "sse4.1")] #[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_sse41<'a>( unsafe fn hex_encode_sse41<'a>(
mut src: &[u8], dst: &'a mut [u8] mut src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> { ) -> Result<&'a str, usize> {
let ascii_zero = _mm_set1_epi8(b'0' as i8); let ascii_zero = _mm_set1_epi8(b'0' as i8);
let nines = _mm_set1_epi8(9); let nines = _mm_set1_epi8(9);
@@ -157,10 +158,7 @@ unsafe fn hex_encode_sse41<'a>(
let res2 = _mm_unpackhi_epi8(masked2, masked1); let res2 = _mm_unpackhi_epi8(masked2, masked1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1); _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
_mm_storeu_si128( _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
res2,
);
src = &src[16..]; src = &src[16..];
i += 16; i += 16;
} }
@@ -168,13 +166,11 @@ unsafe fn hex_encode_sse41<'a>(
let i = i as usize; let i = i as usize;
let _ = hex_encode_fallback(src, &mut dst[i * 2..]); let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
Ok(str::from_utf8_unchecked( Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
&dst[..src.len() * 2 + i * 2],
))
} }
fn hex_encode_fallback<'a>( fn hex_encode_fallback<'a>(
src: &[u8], dst: &'a mut [u8] src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> { ) -> Result<&'a str, usize> {
fn hex(byte: u8) -> u8 { fn hex(byte: u8) -> u8 {
static TABLE: &[u8] = b"0123456789abcdef"; static TABLE: &[u8] = b"0123456789abcdef";
@@ -199,10 +195,7 @@ mod tests {
fn test(input: &[u8], output: &str) { fn test(input: &[u8], output: &str) {
let tmp = || vec![0; input.len() * 2]; let tmp = || vec![0; input.len() * 2];
assert_eq!( assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
hex_encode_fallback(input, &mut tmp()).unwrap(),
output
);
assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output); assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
@@ -239,9 +232,7 @@ mod tests {
fn odd() { fn odd() {
test( test(
&[0; 313], &[0; 313],
&iter::repeat('0') &iter::repeat('0').take(313 * 2).collect::<String>(),
.take(313 * 2)
.collect::<String>(),
); );
} }

View File

@@ -5,9 +5,13 @@
#![cfg_attr(stdsimd_strict, deny(warnings))] #![cfg_attr(stdsimd_strict, deny(warnings))]
#![feature(stdsimd)] #![feature(stdsimd)]
#![cfg_attr(feature = "cargo-clippy", #![cfg_attr(
allow(similar_names, missing_docs_in_private_items, feature = "cargo-clippy",
shadow_reuse, print_stdout))] allow(
similar_names, missing_docs_in_private_items, shadow_reuse,
print_stdout
)
)]
extern crate stdsimd; extern crate stdsimd;
#[macro_use] #[macro_use]
@@ -15,8 +19,6 @@ extern crate cfg_if;
use stdsimd::simd::*; use stdsimd::simd::*;
const PI: f64 = std::f64::consts::PI; const PI: f64 = std::f64::consts::PI;
const SOLAR_MASS: f64 = 4.0 * PI * PI; const SOLAR_MASS: f64 = 4.0 * PI * PI;
const DAYS_PER_YEAR: f64 = 365.24; const DAYS_PER_YEAR: f64 = 365.24;
@@ -81,7 +83,7 @@ struct Body {
impl Body { impl Body {
fn new( fn new(
x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64 x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64,
) -> Self { ) -> Self {
Self { Self {
x: [x0, x1, x2], x: [x0, x1, x2],

View File

@@ -64,7 +64,8 @@ macro_rules! is_aarch64_feature_detected {
#[unstable(feature = "stdsimd", issue = "27731")] #[unstable(feature = "stdsimd", issue = "27731")]
macro_rules! is_powerpc_feature_detected { macro_rules! is_powerpc_feature_detected {
($t:tt) => { ($t:tt) => {
compile_error!(r#" compile_error!(
r#"
is_powerpc_feature_detected can only be used on PowerPC targets. is_powerpc_feature_detected can only be used on PowerPC targets.
You can prevent it from being used in other architectures by You can prevent it from being used in other architectures by
guarding it behind a cfg(target_arch) as follows: guarding it behind a cfg(target_arch) as follows:
@@ -72,7 +73,8 @@ guarding it behind a cfg(target_arch) as follows:
#[cfg(target_arch = "powerpc")] { #[cfg(target_arch = "powerpc")] {
if is_powerpc_feature_detected(...) { ... } if is_powerpc_feature_detected(...) { ... }
} }
"#) "#
)
}; };
} }
@@ -81,7 +83,8 @@ guarding it behind a cfg(target_arch) as follows:
#[unstable(feature = "stdsimd", issue = "27731")] #[unstable(feature = "stdsimd", issue = "27731")]
macro_rules! is_powerpc64_feature_detected { macro_rules! is_powerpc64_feature_detected {
($t:tt) => { ($t:tt) => {
compile_error!(r#" compile_error!(
r#"
is_powerpc64_feature_detected can only be used on PowerPC64 targets. is_powerpc64_feature_detected can only be used on PowerPC64 targets.
You can prevent it from being used in other architectures by You can prevent it from being used in other architectures by
guarding it behind a cfg(target_arch) as follows: guarding it behind a cfg(target_arch) as follows:
@@ -89,7 +92,8 @@ guarding it behind a cfg(target_arch) as follows:
#[cfg(target_arch = "powerpc64")] { #[cfg(target_arch = "powerpc64")] {
if is_powerpc64_feature_detected(...) { ... } if is_powerpc64_feature_detected(...) { ... }
} }
"#) "#
)
}; };
} }

View File

@@ -60,8 +60,8 @@ cfg_if! {
} }
pub use self::arch::Feature; pub use self::arch::Feature;
mod cache;
mod bit; mod bit;
mod cache;
cfg_if! { cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {