reformat with latest rustfmt

This commit is contained in:
gnzlbg
2018-06-06 00:17:14 +02:00
committed by gnzlbg
parent c2d60b18e4
commit c3d273c980
58 changed files with 1038 additions and 1710 deletions

View File

@@ -16,36 +16,36 @@ extern "C" {
fn vsha1h_u32_(hash_e: u32) -> u32;
#[link_name = "llvm.aarch64.crypto.sha1su0"]
fn vsha1su0q_u32_(
w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t
w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t,
) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1su1"]
fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1c"]
fn vsha1cq_u32_(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1p"]
fn vsha1pq_u32_(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1m"]
fn vsha1mq_u32_(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256h"]
fn vsha256hq_u32_(
hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256h2"]
fn vsha256h2q_u32_(
hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t
hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256su0"]
fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256su1"]
fn vsha256su1q_u32_(
tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t
tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t,
) -> uint32x4_t;
}
@@ -97,7 +97,7 @@ pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1c))]
pub unsafe fn vsha1cq_u32(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t {
vsha1cq_u32_(hash_abcd, hash_e, wk)
}
@@ -107,7 +107,7 @@ pub unsafe fn vsha1cq_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1m))]
pub unsafe fn vsha1mq_u32(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t {
vsha1mq_u32_(hash_abcd, hash_e, wk)
}
@@ -117,7 +117,7 @@ pub unsafe fn vsha1mq_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1p))]
pub unsafe fn vsha1pq_u32(
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t,
) -> uint32x4_t {
vsha1pq_u32_(hash_abcd, hash_e, wk)
}
@@ -127,7 +127,7 @@ pub unsafe fn vsha1pq_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1su0))]
pub unsafe fn vsha1su0q_u32(
w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t
w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t,
) -> uint32x4_t {
vsha1su0q_u32_(w0_3, w4_7, w8_11)
}
@@ -137,7 +137,7 @@ pub unsafe fn vsha1su0q_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1su1))]
pub unsafe fn vsha1su1q_u32(
tw0_3: uint32x4_t, w12_15: uint32x4_t
tw0_3: uint32x4_t, w12_15: uint32x4_t,
) -> uint32x4_t {
vsha1su1q_u32_(tw0_3, w12_15)
}
@@ -147,7 +147,7 @@ pub unsafe fn vsha1su1q_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256h))]
pub unsafe fn vsha256hq_u32(
hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t
hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t {
vsha256hq_u32_(hash_abcd, hash_efgh, wk)
}
@@ -157,7 +157,7 @@ pub unsafe fn vsha256hq_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256h2))]
pub unsafe fn vsha256h2q_u32(
hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t
hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t,
) -> uint32x4_t {
vsha256h2q_u32_(hash_efgh, hash_abcd, wk)
}
@@ -167,7 +167,7 @@ pub unsafe fn vsha256h2q_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256su0))]
pub unsafe fn vsha256su0q_u32(
w0_3: uint32x4_t, w4_7: uint32x4_t
w0_3: uint32x4_t, w4_7: uint32x4_t,
) -> uint32x4_t {
vsha256su0q_u32_(w0_3, w4_7)
}
@@ -177,7 +177,7 @@ pub unsafe fn vsha256su0q_u32(
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256su1))]
pub unsafe fn vsha256su1q_u32(
tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t
tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t,
) -> uint32x4_t {
vsha256su1q_u32_(tw0_3, w8_11, w12_15)
}
@@ -199,22 +199,8 @@ mod tests {
assert_eq!(
r,
u8x16::new(
124,
123,
124,
118,
124,
123,
124,
197,
124,
123,
124,
118,
124,
123,
124,
197
124, 123, 124, 118, 124, 123, 124, 197, 124, 123, 124, 118,
124, 123, 124, 197
)
);
}
@@ -229,22 +215,7 @@ mod tests {
assert_eq!(
r,
u8x16::new(
9,
213,
9,
251,
9,
213,
9,
56,
9,
213,
9,
251,
9,
213,
9,
56
9, 213, 9, 251, 9, 213, 9, 56, 9, 213, 9, 251, 9, 213, 9, 56
)
);
}
@@ -256,24 +227,7 @@ mod tests {
let r: u8x16 = vaesmcq_u8(data).into_bits();
assert_eq!(
r,
u8x16::new(
3,
4,
9,
10,
15,
8,
21,
30,
3,
4,
9,
10,
15,
8,
21,
30
)
u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30)
);
}
@@ -285,22 +239,8 @@ mod tests {
assert_eq!(
r,
u8x16::new(
43,
60,
33,
50,
103,
80,
125,
70,
43,
60,
33,
50,
103,
80,
125,
70
43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80,
125, 70
)
);
}

View File

@@ -546,7 +546,6 @@ pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vpmaxq_f64_(a, b)
}
#[cfg(test)]
mod tests {
use coresimd::aarch64::*;
@@ -800,20 +799,11 @@ mod tests {
#[simd_test(enable = "neon")]
unsafe fn test_vpminq_s8() {
#[cfg_attr(rustfmt, skip)]
let a = i8x16::new(
1, -2, 3, -4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
#[cfg_attr(rustfmt, skip)]
let b = i8x16::new(
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
#[cfg_attr(rustfmt, skip)]
let e = i8x16::new(
-2, -4, 5, 7, 1, 3, 5, 7,
0, 2, 4, 6, 0, 2, 4, 6,
);
let e = i8x16::new(-2, -4, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
let r: i8x16 = vpminq_s8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e);
}
@@ -839,20 +829,11 @@ mod tests {
#[simd_test(enable = "neon")]
unsafe fn test_vpminq_u8() {
#[cfg_attr(rustfmt, skip)]
let a = u8x16::new(
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
#[cfg_attr(rustfmt, skip)]
let b = u8x16::new(
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
#[cfg_attr(rustfmt, skip)]
let e = u8x16::new(
1, 3, 5, 7, 1, 3, 5, 7,
0, 2, 4, 6, 0, 2, 4, 6,
);
let e = u8x16::new(1, 3, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
let r: u8x16 = vpminq_u8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e);
}
@@ -896,20 +877,11 @@ mod tests {
#[simd_test(enable = "neon")]
unsafe fn test_vpmaxq_s8() {
#[cfg_attr(rustfmt, skip)]
let a = i8x16::new(
1, -2, 3, -4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
#[cfg_attr(rustfmt, skip)]
let b = i8x16::new(
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
#[cfg_attr(rustfmt, skip)]
let e = i8x16::new(
1, 3, 6, 8, 2, 4, 6, 8,
3, 5, 7, 9, 3, 5, 7, 9,
);
let e = i8x16::new(1, 3, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
let r: i8x16 = vpmaxq_s8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e);
}
@@ -935,20 +907,11 @@ mod tests {
#[simd_test(enable = "neon")]
unsafe fn test_vpmaxq_u8() {
#[cfg_attr(rustfmt, skip)]
let a = u8x16::new(
1, 2, 3, 4, 5, 6, 7, 8,
1, 2, 3, 4, 5, 6, 7, 8
);
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
#[cfg_attr(rustfmt, skip)]
let b = u8x16::new(
0, 3, 2, 5, 4, 7, 6, 9,
0, 3, 2, 5, 4, 7, 6, 9
);
let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
#[cfg_attr(rustfmt, skip)]
let e = u8x16::new(
2, 4, 6, 8, 2, 4, 6, 8,
3, 5, 7, 9, 3, 5, 7, 9,
);
let e = u8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
let r: u8x16 = vpmaxq_u8(a.into_bits(), b.into_bits()).into_bits();
assert_eq!(r, e);
}

View File

@@ -19,11 +19,19 @@ pub use self::v7::*;
// NEON is supported on AArch64, and on ARM when built with the v7 and neon
// features. Building ARM without neon produces incorrect codegen.
#[cfg(any(target_arch = "aarch64",
#[cfg(
any(
target_arch = "aarch64",
all(target_feature = "v7", target_feature = "neon"),
dox))]
dox
)
)]
mod neon;
#[cfg(any(target_arch = "aarch64",
#[cfg(
any(
target_arch = "aarch64",
all(target_feature = "v7", target_feature = "neon"),
dox))]
dox
)
)]
pub use self::neon::*;

View File

@@ -366,52 +366,82 @@ impl_from_bits_!(
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32"
)]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
fn frsqrte_v2f32(a: float32x2_t) -> float32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8"
)]
fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16"
)]
fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32"
)]
fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8"
)]
fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16"
)]
fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32"
)]
fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32"
)]
fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8"
)]
fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16"
)]
fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32"
)]
fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8"
)]
fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16"
)]
fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32"
)]
fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")]
#[cfg_attr(
target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32"
)]
fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
@@ -916,7 +946,6 @@ pub unsafe fn vpmax_f32 (a: float32x2_t, b: float32x2_t) -> float32x2_t {
vpmaxf_v2f32(a, b)
}
#[cfg(test)]
mod tests {
use coresimd::arm::*;

View File

@@ -75,8 +75,7 @@ impl_from_bits_!(
vector_bool_int
);
impl_from_bits_!(
i8x16:
vector_signed_char,
i8x16: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -114,8 +113,7 @@ impl_from_bits_!(
vector_bool_int
);
impl_from_bits_!(
u8x16:
vector_signed_char,
u8x16: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -135,11 +133,7 @@ impl_from_bits_!(
vector_bool_short,
vector_bool_int
);
impl_from_bits_!(
m8x16: vector_bool_char,
vector_bool_short,
vector_bool_int
);
impl_from_bits_!(m8x16: vector_bool_char, vector_bool_short, vector_bool_int);
impl_from_bits_!(
vector_signed_short: u64x2,
@@ -166,8 +160,7 @@ impl_from_bits_!(
vector_bool_int
);
impl_from_bits_!(
i16x8:
vector_signed_char,
i16x8: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -204,8 +197,7 @@ impl_from_bits_!(
vector_bool_int
);
impl_from_bits_!(
u16x8:
vector_signed_char,
u16x8: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -251,8 +243,7 @@ impl_from_bits_!(
vector_bool_int
);
impl_from_bits_!(
i32x4:
vector_signed_char,
i32x4: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -289,8 +280,7 @@ impl_from_bits_!(
vector_bool_int
);
impl_from_bits_!(
u32x4:
vector_signed_char,
u32x4: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -345,8 +335,7 @@ impl_from_bits_!(
vector_bool_int
);
impl_from_bits_!(
f32x4:
vector_signed_char,
f32x4: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -361,9 +350,13 @@ impl_from_bits_!(
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.ppc.altivec.vperm"]
fn vperm(a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char) -> vector_signed_int;
fn vperm(
a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char,
) -> vector_signed_int;
#[link_name = "llvm.ppc.altivec.vmhaddshs"]
fn vmhaddshs(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short;
fn vmhaddshs(
a: vector_signed_short, b: vector_signed_short, c: vector_signed_short,
) -> vector_signed_short;
}
mod sealed {
@@ -373,7 +366,9 @@ mod sealed {
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vperm))]
unsafe fn vec_vperm(a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char) -> vector_signed_int {
unsafe fn vec_vperm(
a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char,
) -> vector_signed_int {
vperm(a, b, c)
}
@@ -703,7 +698,6 @@ where
a.vec_add(b)
}
/// Endian-biased intrinsics
#[cfg(target_endian = "little")]
mod endian {
@@ -718,8 +712,10 @@ mod endian {
// vperm has big-endian bias
//
// Xor the mask and flip the arguments
let d = u8x16::new(255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255).into_bits();
let d = u8x16::new(
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255,
).into_bits();
let c = simd_xor(c, d);
b.vec_vperm(a, c)
@@ -730,7 +726,9 @@ mod endian {
#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vmhaddshs))]
pub unsafe fn vec_madds(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short {
pub unsafe fn vec_madds(
a: vector_signed_short, b: vector_signed_short, c: vector_signed_short,
) -> vector_signed_short {
vmhaddshs(a, b, c)
}
@@ -850,9 +848,20 @@ mod tests {
#[simd_test(enable = "altivec")]
unsafe fn test_vec_madds() {
let a: vector_signed_short = i16x8::new(0 * 256, 1 * 256, 2 * 256, 3 * 256, 4 * 256, 5 * 256, 6 * 256, 7 * 256).into_bits();
let b: vector_signed_short = i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits();
let c: vector_signed_short = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7).into_bits();
let a: vector_signed_short = i16x8::new(
0 * 256,
1 * 256,
2 * 256,
3 * 256,
4 * 256,
5 * 256,
6 * 256,
7 * 256,
).into_bits();
let b: vector_signed_short =
i16x8::new(256, 256, 256, 256, 256, 256, 256, 256).into_bits();
let c: vector_signed_short =
i16x8::new(0, 1, 2, 3, 4, 5, 6, 7).into_bits();
let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21);

View File

@@ -1,6 +1,7 @@
//! PowerPC 64
//!
//! The reference is the [64-Bit ELF V2 ABI Specification - Power Architecture].
//! The reference is the [64-Bit ELF V2 ABI Specification - Power
//! Architecture].
//!
//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf

View File

@@ -63,8 +63,7 @@ impl_from_bits_!(
vector_double
);
impl_from_bits_!(
i64x2:
vector_signed_char,
i64x2: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -109,8 +108,7 @@ impl_from_bits_!(
vector_double
);
impl_from_bits_!(
u64x2:
vector_signed_char,
u64x2: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -155,8 +153,7 @@ impl_from_bits_!(
vector_bool_long
);
impl_from_bits_!(
f64x2:
vector_signed_char,
f64x2: vector_signed_char,
vector_unsigned_char,
vector_bool_char,
vector_signed_short,
@@ -234,8 +231,12 @@ mod sealed {
// xxpermdi has an big-endian bias and extended mnemonics
#[inline]
#[target_feature(enable = "vsx")]
#[cfg_attr(all(test, target_endian="little"), assert_instr(xxmrgld, dm = 0x0))]
#[cfg_attr(all(test, target_endian="big"), assert_instr(xxspltd, dm = 0x0))]
#[cfg_attr(
all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0)
)]
#[cfg_attr(
all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0)
)]
unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 {
match dm & 0b11 {
0 => simd_shuffle2(a, b, [0b00, 0b10]),

View File

@@ -165,7 +165,6 @@ macro_rules! impl_float_arithmetic_reductions {
};
}
#[cfg(test)]
macro_rules! test_int_arithmetic_reductions {
($id:ident, $elem_ty:ident) => {
@@ -237,10 +236,7 @@ macro_rules! test_float_arithmetic_reductions {
let v = $id::splat(1 as $elem_ty);
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
let v = alternating(2);
assert_eq!(
v.sum(),
($id::lanes() / 2 + $id::lanes()) as $elem_ty
);
assert_eq!(v.sum(), ($id::lanes() / 2 + $id::lanes()) as $elem_ty);
}
#[test]
fn product() {

View File

@@ -59,7 +59,6 @@ macro_rules! impl_float_math {
macro_rules! test_float_math {
($id:ident, $elem_ty:ident) => {
fn sqrt2() -> $elem_ty {
match ::mem::size_of::<$elem_ty>() {
4 => 1.4142135 as $elem_ty,

View File

@@ -46,7 +46,7 @@ macro_rules! impl_load_store {
/// undefined.
#[inline]
pub unsafe fn store_aligned_unchecked(
self, slice: &mut [$elem_ty]
self, slice: &mut [$elem_ty],
) {
*(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) =
self;
@@ -59,7 +59,7 @@ macro_rules! impl_load_store {
/// If `slice.len() < Self::lanes()` the behavior is undefined.
#[inline]
pub unsafe fn store_unaligned_unchecked(
self, slice: &mut [$elem_ty]
self, slice: &mut [$elem_ty],
) {
let target_ptr =
slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
@@ -121,7 +121,7 @@ macro_rules! impl_load_store {
/// If `slice.len() < Self::lanes()` the behavior is undefined.
#[inline]
pub unsafe fn load_unaligned_unchecked(
slice: &[$elem_ty]
slice: &[$elem_ty],
) -> Self {
use mem::size_of;
let target_ptr =
@@ -238,7 +238,8 @@ macro_rules! test_load_store {
data: [0 as $elem_ty; 2 * $id::lanes()],
};
// offset the aligned data by one byte:
let s: &mut [u8; 2 * $id::lanes()
let s: &mut [u8; 2
* $id::lanes()
* mem::size_of::<$elem_ty>()] =
mem::transmute(&mut aligned.data);
let s: &mut [$elem_ty] = slice::from_raw_parts_mut(
@@ -296,7 +297,8 @@ macro_rules! test_load_store {
data: [0 as $elem_ty; 2 * $id::lanes()],
};
// offset the aligned data by one byte:
let s: &[u8; 2 * $id::lanes()
let s: &[u8; 2
* $id::lanes()
* mem::size_of::<$elem_ty>()] =
mem::transmute(&aligned.data);
let s: &[$elem_ty] = slice::from_raw_parts(

View File

@@ -41,18 +41,7 @@ macro_rules! impl_shifts {
macro_rules! impl_all_scalar_shifts {
($id:ident, $elem_ty:ident) => {
impl_shifts!(
$id,
$elem_ty,
u8,
u16,
u32,
u64,
usize,
i8,
i16,
i32,
i64,
isize
$id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize
);
};
}
@@ -125,18 +114,7 @@ macro_rules! test_shift_ops {
macro_rules! test_all_scalar_shift_ops {
($id:ident, $elem_ty:ident) => {
test_shift_ops!(
$id,
$elem_ty,
u8,
u16,
u32,
u64,
usize,
i8,
i16,
i32,
i64,
isize
$id, $elem_ty, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize
);
};
}

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fabs {
unsafe { $fn(self) }
}
}
}
};
}
impl_fabs!(f32x2: abs_v2f32);

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fcos {
unsafe { $fn(self) }
}
}
}
};
}
impl_fcos!(f32x2: cos_v2f32);

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fma {
unsafe { $fn(self, y, z) }
}
}
}
};
}
impl_fma!(f32x2: fma_v2f32);

View File

@@ -25,11 +25,13 @@ macro_rules! default_impl {
impl All for $id {
#[inline]
unsafe fn all(self) -> bool {
#[cfg(not(target_arch = "aarch64"))] {
#[cfg(not(target_arch = "aarch64"))]
{
use coresimd::simd_llvm::simd_reduce_all;
simd_reduce_all(self)
}
#[cfg(target_arch = "aarch64")] {
#[cfg(target_arch = "aarch64")]
{
// FIXME: Broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
self.and()
@@ -40,11 +42,13 @@ macro_rules! default_impl {
impl Any for $id {
#[inline]
unsafe fn any(self) -> bool {
#[cfg(not(target_arch = "aarch64"))] {
#[cfg(not(target_arch = "aarch64"))]
{
use coresimd::simd_llvm::simd_reduce_any;
simd_reduce_any(self)
}
#[cfg(target_arch = "aarch64")] {
#[cfg(target_arch = "aarch64")]
{
// FIXME: Broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
self.or()
@@ -63,7 +67,12 @@ macro_rules! default_impl {
// or floating point vectors, we can't currently work around this yet. The
// performance impact for this shouldn't be large, but this is filled as:
// https://bugs.llvm.org/show_bug.cgi?id=37087
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
#[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2"
)
)]
macro_rules! x86_128_sse2_movemask_impl {
($id:ident) => {
impl All for $id {
@@ -71,13 +80,15 @@ macro_rules! x86_128_sse2_movemask_impl {
#[target_feature(enable = "sse2")]
unsafe fn all(self) -> bool {
#[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_epi8;
use coresimd::arch::x86::_mm_movemask_epi8;
#[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_epi8;
// _mm_movemask_epi8(a) creates a 16bit mask containing the most
// significant bit of each byte of `a`. If all bits are set,
// then all 16 lanes of the mask are true.
_mm_movemask_epi8(::mem::transmute(self)) == u16::max_value() as i32
use coresimd::arch::x86_64::_mm_movemask_epi8;
// _mm_movemask_epi8(a) creates a 16bit mask containing the
// most significant bit of each byte of `a`. If all
// bits are set, then all 16 lanes of the mask are
// true.
_mm_movemask_epi8(::mem::transmute(self))
== u16::max_value() as i32
}
}
impl Any for $id {
@@ -85,14 +96,14 @@ macro_rules! x86_128_sse2_movemask_impl {
#[target_feature(enable = "sse2")]
unsafe fn any(self) -> bool {
#[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_epi8;
use coresimd::arch::x86::_mm_movemask_epi8;
#[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_epi8;
use coresimd::arch::x86_64::_mm_movemask_epi8;
_mm_movemask_epi8(::mem::transmute(self)) != 0
}
}
}
};
}
// On x86 with AVX we use _mm256_testc_si256 and _mm256_testz_si256.
@@ -103,7 +114,12 @@ macro_rules! x86_128_sse2_movemask_impl {
// integer or floating point vectors, we can't currently work around this yet.
//
// TODO: investigate perf impact and fill LLVM bugs as necessary.
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx"))]
#[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx"
)
)]
macro_rules! x86_256_avx_test_impl {
($id:ident) => {
impl All for $id {
@@ -111,11 +127,13 @@ macro_rules! x86_256_avx_test_impl {
#[target_feature(enable = "avx")]
unsafe fn all(self) -> bool {
#[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm256_testc_si256;
use coresimd::arch::x86::_mm256_testc_si256;
#[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm256_testc_si256;
_mm256_testc_si256(::mem::transmute(self),
::mem::transmute($id::splat(true))) != 0
use coresimd::arch::x86_64::_mm256_testc_si256;
_mm256_testc_si256(
::mem::transmute(self),
::mem::transmute($id::splat(true)),
) != 0
}
}
impl Any for $id {
@@ -123,20 +141,27 @@ macro_rules! x86_256_avx_test_impl {
#[target_feature(enable = "avx")]
unsafe fn any(self) -> bool {
#[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm256_testz_si256;
use coresimd::arch::x86::_mm256_testz_si256;
#[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm256_testz_si256;
_mm256_testz_si256(::mem::transmute(self),
::mem::transmute(self)) == 0
}
use coresimd::arch::x86_64::_mm256_testz_si256;
_mm256_testz_si256(
::mem::transmute(self),
::mem::transmute(self),
) == 0
}
}
};
}
// On x86 with SSE2 all/any for 256-bit wide vectors is implemented by executing
// the algorithm for 128-bit on the higher and lower elements of the vector
// independently.
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
// On x86 with SSE2 all/any for 256-bit wide vectors is implemented by
// executing the algorithm for 128-bit on the higher and lower elements of the
// vector independently.
#[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse2"
)
)]
macro_rules! x86_256_sse2_impl {
($id:ident, $v128:ident) => {
impl All for $id {
@@ -146,7 +171,7 @@ macro_rules! x86_256_sse2_impl {
unsafe {
union U {
halves: ($v128, $v128),
vec: $id
vec: $id,
}
let halves = U { vec: self }.halves;
halves.0.all() && halves.1.all()
@@ -160,14 +185,14 @@ macro_rules! x86_256_sse2_impl {
unsafe {
union U {
halves: ($v128, $v128),
vec: $id
vec: $id,
}
let halves = U { vec: self }.halves;
halves.0.any() || halves.1.any()
}
}
}
}
};
}
// Implementation for 64-bit wide masks on x86.
@@ -179,13 +204,14 @@ macro_rules! x86_64_mmx_movemask_impl {
#[target_feature(enable = "mmx")]
unsafe fn all(self) -> bool {
#[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_pi8;
use coresimd::arch::x86::_mm_movemask_pi8;
#[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_pi8;
use coresimd::arch::x86_64::_mm_movemask_pi8;
// _mm_movemask_pi8(a) creates an 8bit mask containing the most
// significant bit of each byte of `a`. If all bits are set,
// then all 8 lanes of the mask are true.
_mm_movemask_pi8(::mem::transmute(self)) == u8::max_value() as i32
_mm_movemask_pi8(::mem::transmute(self))
== u8::max_value() as i32
}
}
impl Any for $id {
@@ -193,14 +219,14 @@ macro_rules! x86_64_mmx_movemask_impl {
#[target_feature(enable = "mmx")]
unsafe fn any(self) -> bool {
#[cfg(target_arch = "x86")]
use ::coresimd::arch::x86::_mm_movemask_pi8;
use coresimd::arch::x86::_mm_movemask_pi8;
#[cfg(target_arch = "x86_64")]
use ::coresimd::arch::x86_64::_mm_movemask_pi8;
use coresimd::arch::x86_64::_mm_movemask_pi8;
_mm_movemask_pi8(::mem::transmute(self)) != 0
}
}
}
};
}
// Implementation for 128-bit wide masks on x86
@@ -214,7 +240,7 @@ macro_rules! x86_128_impl {
default_impl!($id);
}
}
}
};
}
// Implementation for 256-bit wide masks on x86
@@ -230,22 +256,25 @@ macro_rules! x86_256_impl {
default_impl!($id);
}
}
}
};
}
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit wide two-element vectors.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
#[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_64_x2_v7_neon_impl {
($id:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin;
use ::mem::transmute;
use coresimd::arch::arm::$vpmin;
use mem::transmute;
// pmin((a, b), (-,-)) => (b, -).0 => b
let tmp: $id = transmute($vpmin(transmute(self), ::mem::uninitialized()));
let tmp: $id =
transmute($vpmin(transmute(self), ::mem::uninitialized()));
tmp.extract(0)
}
}
@@ -253,27 +282,30 @@ macro_rules! arm_64_x2_v7_neon_impl {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax;
use ::mem::transmute;
use coresimd::arch::arm::$vpmax;
use mem::transmute;
// pmax((a, b), (-,-)) => (b, -).0 => b
let tmp: $id = transmute($vpmax(transmute(self), ::mem::uninitialized()));
let tmp: $id =
transmute($vpmax(transmute(self), ::mem::uninitialized()));
tmp.extract(0)
}
}
}
};
}
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit wide four-element vectors.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
#[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_64_x4_v7_neon_impl {
($id:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin;
use ::mem::transmute;
use coresimd::arch::arm::$vpmin;
use mem::transmute;
// tmp = pmin((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
let tmp = $vpmin(transmute(self), ::mem::uninitialized());
// tmp = pmin((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
@@ -285,8 +317,8 @@ macro_rules! arm_64_x4_v7_neon_impl {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax;
use ::mem::transmute;
use coresimd::arch::arm::$vpmax;
use mem::transmute;
// tmp = pmax((a, b, c, d), (-,-,-,-)) => (a, c, -, -)
let tmp = $vpmax(transmute(self), ::mem::uninitialized());
// tmp = pmax((a, b, -, -), (-,-,-,-)) => (c, -, -, -).0 => c
@@ -294,20 +326,22 @@ macro_rules! arm_64_x4_v7_neon_impl {
tmp.extract(0)
}
}
}
};
}
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit wide eight-element vectors.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
#[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_64_x8_v7_neon_impl {
($id:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin;
use ::mem::transmute;
use coresimd::arch::arm::$vpmin;
use mem::transmute;
// tmp = pmin(
// (a, b, c, d, e, f, g, h),
// (-, -, -, -, -, -, -, -)
@@ -330,8 +364,8 @@ macro_rules! arm_64_x8_v7_neon_impl {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax;
use ::mem::transmute;
use coresimd::arch::arm::$vpmax;
use mem::transmute;
// tmp = pmax(
// (a, b, c, d, e, f, g, h),
// (-, -, -, -, -, -, -, -)
@@ -350,28 +384,32 @@ macro_rules! arm_64_x8_v7_neon_impl {
tmp.extract(0)
}
}
};
}
}
// Implementation for ARM + v7 + NEON using vpmin and vpmax (folding
// minimum/maximum of adjacent pairs) for 64-bit or 128-bit wide vectors with
// more than two elements.
#[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
#[cfg(
all(target_arch = "arm", target_feature = "v7", target_feature = "neon")
)]
macro_rules! arm_128_v7_neon_impl {
($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
impl All for $id {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn all(self) -> bool {
use ::coresimd::arch::arm::$vpmin;
use ::mem::transmute;
use coresimd::arch::arm::$vpmin;
use mem::transmute;
union U {
halves: ($half, $half),
vec: $id
vec: $id,
}
let halves = U { vec: self }.halves;
let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1)));
let h: $half = transmute($vpmin(
transmute(halves.0),
transmute(halves.1),
));
h.all()
}
}
@@ -379,18 +417,21 @@ macro_rules! arm_128_v7_neon_impl {
#[inline]
#[target_feature(enable = "v7,neon")]
unsafe fn any(self) -> bool {
use ::coresimd::arch::arm::$vpmax;
use ::mem::transmute;
use coresimd::arch::arm::$vpmax;
use mem::transmute;
union U {
halves: ($half, $half),
vec: $id
vec: $id,
}
let halves = U { vec: self }.halves;
let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1)));
let h: $half = transmute($vpmax(
transmute(halves.0),
transmute(halves.1),
));
h.any()
}
}
}
};
}
// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector
@@ -402,7 +443,7 @@ macro_rules! aarch64_128_neon_impl {
#[inline]
#[target_feature(enable = "neon")]
unsafe fn all(self) -> bool {
use ::coresimd::arch::aarch64::$vmin;
use coresimd::arch::aarch64::$vmin;
$vmin(::mem::transmute(self)) != 0
}
}
@@ -410,11 +451,11 @@ macro_rules! aarch64_128_neon_impl {
#[inline]
#[target_feature(enable = "neon")]
unsafe fn any(self) -> bool {
use ::coresimd::arch::aarch64::$vmax;
use coresimd::arch::aarch64::$vmax;
$vmax(::mem::transmute(self)) != 0
}
}
}
};
}
// Implementation for AArch64 + NEON using vmin and vmax (horizontal vector
@@ -431,9 +472,12 @@ macro_rules! aarch64_64_neon_impl {
unsafe fn all(self) -> bool {
union U {
halves: ($id, $id),
vec: $vec128
vec: $vec128,
}
U { halves: (self, self) }.vec.all()
U {
halves: (self, self),
}.vec
.all()
}
}
impl Any for $id {
@@ -442,12 +486,15 @@ macro_rules! aarch64_64_neon_impl {
unsafe fn any(self) -> bool {
union U {
halves: ($id, $id),
vec: $vec128
}
U { halves: (self, self) }.vec.any()
vec: $vec128,
}
U {
halves: (self, self),
}.vec
.any()
}
}
};
}
macro_rules! impl_mask_all_any {

View File

@@ -5,8 +5,8 @@ pub mod wrapping;
pub mod masks_reductions;
pub mod sqrt;
pub mod abs;
pub mod cos;
pub mod fma;
pub mod sin;
pub mod cos;
pub mod sqrt;

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fsin {
unsafe { $fn(self) }
}
}
}
};
}
impl_fsin!(f32x2: sin_v2f32);

View File

@@ -31,7 +31,7 @@ macro_rules! impl_fsqrt {
unsafe { $fn(self) }
}
}
}
};
}
impl_fsqrt!(f32x2: sqrt_v2f32);

View File

@@ -66,8 +66,12 @@ where
U: FromBits<T>,
{
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449
#[cfg_attr(any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always))]
#[cfg_attr(not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline)]
#[cfg_attr(
any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always)
)]
#[cfg_attr(
not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline
)]
fn into_bits(self) -> U {
debug_assert!(::mem::size_of::<Self>() == ::mem::size_of::<U>());
U::from_bits(self)
@@ -77,8 +81,12 @@ where
// FromBits (and thus IntoBits) is reflexive.
impl<T> FromBits<T> for T {
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/449
#[cfg_attr(any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always))]
#[cfg_attr(not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline)]
#[cfg_attr(
any(target_arch = "powerpc", target_arch = "powerpc64"), inline(always)
)]
#[cfg_attr(
not(any(target_arch = "powerpc", target_arch = "powerpc64")), inline
)]
fn from_bits(t: Self) -> Self {
t
}

View File

@@ -110,9 +110,11 @@ macro_rules! from_bits_x86 {
};
}
#[cfg(all(target_arch = "arm", target_feature = "neon",
target_feature = "v7"))]
use coresimd::arch::arm::{// FIXME: float16x8_t,
#[cfg(
all(target_arch = "arm", target_feature = "neon", target_feature = "v7")
)]
use coresimd::arch::arm::{
// FIXME: float16x8_t,
float32x4_t,
int16x8_t,
int32x4_t,
@@ -123,10 +125,12 @@ use coresimd::arch::arm::{// FIXME: float16x8_t,
uint16x8_t,
uint32x4_t,
uint64x2_t,
uint8x16_t};
uint8x16_t,
};
#[cfg(target_arch = "aarch64")]
use coresimd::arch::aarch64::{// FIXME: float16x8_t,
use coresimd::arch::aarch64::{
// FIXME: float16x8_t,
float32x4_t,
float64x2_t,
int16x8_t,
@@ -138,13 +142,21 @@ use coresimd::arch::aarch64::{// FIXME: float16x8_t,
uint16x8_t,
uint32x4_t,
uint64x2_t,
uint8x16_t};
uint8x16_t,
};
macro_rules! from_bits_arm {
($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
#[cfg(any(all(target_arch = "arm", target_feature = "neon",
target_feature = "v7"),
target_arch = "aarch64"))]
#[cfg(
any(
all(
target_arch = "arm",
target_feature = "neon",
target_feature = "v7"
),
target_arch = "aarch64"
)
)]
impl_from_bits_!(
$id: int8x16_t,
uint8x16_t,
@@ -182,12 +194,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(u64x2, u64, u64x2_from_bits_x86);
from_bits_arm!(
u64x2,
u64,
u64x2_from_bits_arm,
u64x2_from_bits_aarch64
);
from_bits_arm!(u64x2, u64, u64x2_from_bits_arm, u64x2_from_bits_aarch64);
impl_from_bits!(
i64x2: i64,
@@ -207,12 +214,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(i64x2, i64, i64x2_from_bits_x86);
from_bits_arm!(
i64x2,
i64,
i64x2_from_bits_arm,
i64x2_from_bits_aarch64
);
from_bits_arm!(i64x2, i64, i64x2_from_bits_arm, i64x2_from_bits_aarch64);
impl_from_bits!(
f64x2: f64,
@@ -232,12 +234,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(f64x2, f64, f64x2_from_bits_x86);
from_bits_arm!(
f64x2,
f64,
f64x2_from_bits_arm,
f64x2_from_bits_aarch64
);
from_bits_arm!(f64x2, f64, f64x2_from_bits_arm, f64x2_from_bits_aarch64);
impl_from_bits!(
u32x4: u32,
@@ -257,12 +254,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(u32x4, u32, u32x4_from_bits_x86);
from_bits_arm!(
u32x4,
u32,
u32x4_from_bits_arm,
u32x4_from_bits_aarch64
);
from_bits_arm!(u32x4, u32, u32x4_from_bits_arm, u32x4_from_bits_aarch64);
impl_from_bits!(
i32x4: i32,
@@ -282,12 +274,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(i32x4, i32, i32x4_from_bits_x86);
from_bits_arm!(
i32x4,
i32,
i32x4_from_bits_arm,
i32x4_from_bits_aarch64
);
from_bits_arm!(i32x4, i32, i32x4_from_bits_arm, i32x4_from_bits_aarch64);
impl_from_bits!(
f32x4: f32,
@@ -307,12 +294,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(f32x4, f32, f32x4_from_bits_x86);
from_bits_arm!(
f32x4,
f32,
f32x4_from_bits_arm,
f32x4_from_bits_aarch64
);
from_bits_arm!(f32x4, f32, f32x4_from_bits_arm, f32x4_from_bits_aarch64);
impl_from_bits!(
u16x8: u16,
@@ -332,12 +314,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(u16x8, u16, u16x8_from_bits_x86);
from_bits_arm!(
u16x8,
u16,
u16x8_from_bits_arm,
u16x8_from_bits_aarch64
);
from_bits_arm!(u16x8, u16, u16x8_from_bits_arm, u16x8_from_bits_aarch64);
impl_from_bits!(
i16x8: i16,
@@ -357,12 +334,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(i16x8, i16, i16x8_from_bits_x86);
from_bits_arm!(
i16x8,
i16,
i16x8_from_bits_arm,
i16x8_from_bits_aarch64
);
from_bits_arm!(i16x8, i16, i16x8_from_bits_arm, i16x8_from_bits_aarch64);
impl_from_bits!(
u8x16: u8,
@@ -382,12 +354,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(u8x16, u8, u8x16_from_bits_x86);
from_bits_arm!(
u8x16,
u8,
u8x16_from_bits_arm,
u8x16_from_bits_aarch64
);
from_bits_arm!(u8x16, u8, u8x16_from_bits_arm, u8x16_from_bits_aarch64);
impl_from_bits!(
i8x16: i8,
@@ -407,12 +374,7 @@ impl_from_bits!(
m8x16
);
from_bits_x86!(i8x16, i8, i8x16_from_bits_x86);
from_bits_arm!(
i8x16,
i8,
i8x16_from_bits_arm,
i8x16_from_bits_aarch64
);
from_bits_arm!(i8x16, i8, i8x16_from_bits_arm, i8x16_from_bits_aarch64);
impl_from!(
f64x2: f64,
@@ -552,31 +514,37 @@ impl_from!(
m8x8
);
impl_from!(u8x16: u8, u8x16_from, test_v128 | i32x16, u32x16, f32x16, m1x16, i16x16, u16x16, m16x16, i8x16, m8x16);
impl_from!(i8x16: i8, i8x16_from, test_v128 | i32x16, u32x16, f32x16, m1x16, i16x16, u16x16, m16x16, u8x16, m8x16);
impl_from!(
u8x16: u8,
u8x16_from,
test_v128 | i32x16,
u32x16,
f32x16,
m1x16,
i16x16,
u16x16,
m16x16,
i8x16,
m8x16
);
impl_from!(
i8x16: i8,
i8x16_from,
test_v128 | i32x16,
u32x16,
f32x16,
m1x16,
i16x16,
u16x16,
m16x16,
u8x16,
m8x16
);
impl_from!(m8x16: i8, m8x16_from, test_v128 | m1x16, m16x16);
impl_from!(
m16x8: i16,
m16x8_from,
test_v128 | m1x8,
m32x8,
m8x8
);
impl_from!(m16x8: i16, m16x8_from, test_v128 | m1x8, m32x8, m8x8);
impl_from!(
m32x4: i32,
m32x4_from,
test_v128 | m64x4,
m16x4,
m8x4
);
impl_from!(m32x4: i32, m32x4_from, test_v128 | m64x4, m16x4, m8x4);
impl_from!(
m64x2: i64,
m64x2_from,
test_v128 | m32x2,
m16x2,
m8x2
);
impl_from!(m64x2: i64, m64x2_from, test_v128 | m32x2, m16x2, m8x2);

View File

@@ -57,10 +57,4 @@ impl_from!(
m8x2
);
impl_from!(
m8x2: i8,
m8x2_from,
test_v16 | m64x2,
m32x2,
m16x2
);
impl_from!(m8x2: i8, m8x2_from, test_v16 | m64x2, m32x2, m16x2);

View File

@@ -465,25 +465,8 @@ impl_from!(
impl_from!(m8x32: i8, m8x32_from, test_v256 | m1x32);
impl_from!(
m16x16: i16,
m16x16_from,
test_v256 | m1x16,
m8x16
);
impl_from!(m16x16: i16, m16x16_from, test_v256 | m1x16, m8x16);
impl_from!(
m32x8: i32,
m32x8_from,
test_v256 | m1x8,
m16x8,
m8x8
);
impl_from!(m32x8: i32, m32x8_from, test_v256 | m1x8, m16x8, m8x8);
impl_from!(
m64x4: i64,
m64x4_from,
test_v256 | m32x4,
m16x4,
m8x4
);
impl_from!(m64x4: i64, m64x4_from, test_v256 | m32x4, m16x4, m8x4);

View File

@@ -151,18 +151,6 @@ impl_from!(
m8x4
);
impl_from!(
m8x4: i8,
m8x4_from,
test_v32 | m64x4,
m32x4,
m16x4
);
impl_from!(m8x4: i8, m8x4_from, test_v32 | m64x4, m32x4, m16x4);
impl_from!(
m16x2: i16,
m16x2_from,
test_v32 | m64x2,
m32x2,
m8x2
);
impl_from!(m16x2: i16, m16x2_from, test_v32 | m64x2, m32x2, m8x2);

View File

@@ -446,17 +446,6 @@ impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64, m1x64);
impl_from!(m1x32: i16, m1x32_from, test_v512 | m8x32);
impl_from!(
m1x16: i32,
m1x16_from,
test_v512 | m16x16,
m8x16
);
impl_from!(m1x16: i32, m1x16_from, test_v512 | m16x16, m8x16);
impl_from!(
m1x8: i64,
m1x8_from,
test_v512 | m32x8,
m16x8,
m8x8
);
impl_from!(m1x8: i64, m1x8_from, test_v512 | m32x8, m16x8, m8x8);

View File

@@ -83,9 +83,11 @@ macro_rules! from_bits_x86 {
};
}
#[cfg(all(target_arch = "arm", target_feature = "neon",
target_feature = "v7"))]
use coresimd::arch::arm::{// FIXME: float16x4_t,
#[cfg(
all(target_arch = "arm", target_feature = "neon", target_feature = "v7")
)]
use coresimd::arch::arm::{
// FIXME: float16x4_t,
float32x2_t,
int16x4_t,
int32x2_t,
@@ -96,10 +98,12 @@ use coresimd::arch::arm::{// FIXME: float16x4_t,
uint16x4_t,
uint32x2_t,
uint64x1_t,
uint8x8_t};
uint8x8_t,
};
#[cfg(target_arch = "aarch64")]
use coresimd::arch::aarch64::{// FIXME: float16x4_t,
use coresimd::arch::aarch64::{
// FIXME: float16x4_t,
float32x2_t,
float64x1_t,
int16x4_t,
@@ -111,13 +115,21 @@ use coresimd::arch::aarch64::{// FIXME: float16x4_t,
uint16x4_t,
uint32x2_t,
uint64x1_t,
uint8x8_t};
uint8x8_t,
};
macro_rules! from_bits_arm {
($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
#[cfg(any(all(target_arch = "arm", target_feature = "neon",
target_feature = "v7"),
target_arch = "aarch64"))]
#[cfg(
any(
all(
target_arch = "arm",
target_feature = "neon",
target_feature = "v7"
),
target_arch = "aarch64"
)
)]
impl_from_bits_!(
$id: int64x1_t,
uint64x1_t,
@@ -151,12 +163,7 @@ impl_from_bits!(
m8x8
);
from_bits_x86!(u32x2, u32, u32x2_from_bits_x86);
from_bits_arm!(
u32x2,
u32,
u32x2_from_bits_arm,
u32x2_from_bits_aarch64
);
from_bits_arm!(u32x2, u32, u32x2_from_bits_arm, u32x2_from_bits_aarch64);
impl_from_bits!(
i32x2: i32,
@@ -172,12 +179,7 @@ impl_from_bits!(
m8x8
);
from_bits_x86!(i32x2, i32, i32x2_from_bits_x86);
from_bits_arm!(
i32x2,
i32,
i32x2_from_bits_arm,
i32x2_from_bits_aarch64
);
from_bits_arm!(i32x2, i32, i32x2_from_bits_arm, i32x2_from_bits_aarch64);
impl_from_bits!(
f32x2: f32,
@@ -193,12 +195,7 @@ impl_from_bits!(
m8x8
);
from_bits_x86!(f32x2, f32, f32x2_from_bits_x86);
from_bits_arm!(
f32x2,
f32,
f32x2_from_bits_arm,
f32x2_from_bits_aarch64
);
from_bits_arm!(f32x2, f32, f32x2_from_bits_arm, f32x2_from_bits_aarch64);
impl_from_bits!(
u16x4: u16,
@@ -213,12 +210,7 @@ impl_from_bits!(
m8x8
);
from_bits_x86!(u16x4, u16, u16x4_from_bits_x86);
from_bits_arm!(
u16x4,
u16,
u16x4_from_bits_arm,
u16x4_from_bits_aarch64
);
from_bits_arm!(u16x4, u16, u16x4_from_bits_arm, u16x4_from_bits_aarch64);
impl_from_bits!(
i16x4: i16,
@@ -233,12 +225,7 @@ impl_from_bits!(
m8x8
);
from_bits_x86!(i16x4, i16, i16x4_from_bits_x86);
from_bits_arm!(
i16x4,
i16,
i16x4_from_bits_arm,
i16x4_from_bits_aarch64
);
from_bits_arm!(i16x4, i16, i16x4_from_bits_arm, i16x4_from_bits_aarch64);
impl_from_bits!(
u8x8: u8,
@@ -253,12 +240,7 @@ impl_from_bits!(
m8x8
);
from_bits_x86!(u8x8, u8, u8x8_from_bits_x86);
from_bits_arm!(
u8x8,
u8,
u8x8_from_bits_arm,
u8x8_from_bits_aarch64
);
from_bits_arm!(u8x8, u8, u8x8_from_bits_arm, u8x8_from_bits_aarch64);
impl_from_bits!(
i8x8: i8,
@@ -273,12 +255,7 @@ impl_from_bits!(
m8x8
);
from_bits_x86!(i8x8, i8, i8x8_from_bits_x86);
from_bits_arm!(
i8x8,
i8,
i8x8_from_bits_arm,
i8x8_from_bits_aarch64
);
from_bits_arm!(i8x8, i8, i8x8_from_bits_arm, i8x8_from_bits_aarch64);
impl_from!(
f32x2: f32,
@@ -404,26 +381,8 @@ impl_from!(
m8x8
);
impl_from!(
m8x8: i8,
m8x8_from,
test_v64 | m1x8,
m32x8,
m16x8
);
impl_from!(m8x8: i8, m8x8_from, test_v64 | m1x8, m32x8, m16x8);
impl_from!(
m16x4: i16,
m16x4_from,
test_v64 | m64x4,
m32x4,
m8x4
);
impl_from!(m16x4: i16, m16x4_from, test_v64 | m64x4, m32x4, m8x4);
impl_from!(
m32x2: i32,
m32x2_from,
test_v64 | m64x2,
m16x2,
m8x2
);
impl_from!(m32x2: i32, m32x2_from, test_v64 | m64x2, m16x2, m8x2);

View File

@@ -1387,7 +1387,7 @@ pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2f128_ps(
a: __m256, b: __m256, imm8: i32
a: __m256, b: __m256, imm8: i32,
) -> __m256 {
macro_rules! call {
($imm8:expr) => {
@@ -1407,7 +1407,7 @@ pub unsafe fn _mm256_permute2f128_ps(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2f128_pd(
a: __m256d, b: __m256d, imm8: i32
a: __m256d, b: __m256d, imm8: i32,
) -> __m256d {
macro_rules! call {
($imm8:expr) => {
@@ -1427,7 +1427,7 @@ pub unsafe fn _mm256_permute2f128_pd(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2f128_si256(
a: __m256i, b: __m256i, imm8: i32
a: __m256i, b: __m256i, imm8: i32,
) -> __m256i {
let a = a.as_i32x8();
let b = b.as_i32x8();
@@ -1529,7 +1529,7 @@ pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insertf128_pd(
a: __m256d, b: __m128d, imm8: i32
a: __m256d, b: __m128d, imm8: i32,
) -> __m256d {
match imm8 & 1 {
0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]),
@@ -1547,7 +1547,7 @@ pub unsafe fn _mm256_insertf128_pd(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insertf128_si256(
a: __m256i, b: __m128i, imm8: i32
a: __m256i, b: __m128i, imm8: i32,
) -> __m256i {
let b = _mm256_castsi128_si256(b).as_i64x4();
let dst: i64x4 = match imm8 & 1 {
@@ -1567,11 +1567,7 @@ pub unsafe fn _mm256_insertf128_si256(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i {
mem::transmute(simd_insert(
a.as_i8x32(),
(index as u32) & 31,
i,
))
mem::transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i))
}
/// Copy `a` to result, and insert the 16-bit integer `i` into result
@@ -1584,11 +1580,7 @@ pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i {
mem::transmute(simd_insert(
a.as_i16x16(),
(index as u32) & 15,
i,
))
mem::transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i))
}
/// Copy `a` to result, and insert the 32-bit integer `i` into result
@@ -1790,7 +1782,7 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_pd(
mem_addr: *const f64, mask: __m256i
mem_addr: *const f64, mask: __m256i,
) -> __m256d {
maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
}
@@ -1804,7 +1796,7 @@ pub unsafe fn _mm256_maskload_pd(
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_pd(
mem_addr: *mut f64, mask: __m256i, a: __m256d
mem_addr: *mut f64, mask: __m256i, a: __m256d,
) {
maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
}
@@ -1844,7 +1836,7 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_ps(
mem_addr: *const f32, mask: __m256i
mem_addr: *const f32, mask: __m256i,
) -> __m256 {
maskloadps256(mem_addr as *const i8, mask.as_i32x8())
}
@@ -1858,7 +1850,7 @@ pub unsafe fn _mm256_maskload_ps(
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_ps(
mem_addr: *mut f32, mask: __m256i, a: __m256
mem_addr: *mut f32, mask: __m256i, a: __m256,
) {
maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
}
@@ -2383,7 +2375,7 @@ pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_set_ps(
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32,
) -> __m256 {
_mm256_setr_ps(h, g, f, e, d, c, b, a)
}
@@ -2440,7 +2432,7 @@ pub unsafe fn _mm256_set_epi16(
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_set_epi32(
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32,
) -> __m256i {
_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
}
@@ -2477,7 +2469,7 @@ pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_setr_ps(
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32
a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32,
) -> __m256 {
__m256(a, b, c, d, e, f, g, h)
}
@@ -2536,7 +2528,7 @@ pub unsafe fn _mm256_setr_epi16(
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_setr_epi32(
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32
e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32,
) -> __m256i {
mem::transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
}
@@ -2950,7 +2942,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_loadu2_m128(
hiaddr: *const f32, loaddr: *const f32
hiaddr: *const f32, loaddr: *const f32,
) -> __m256 {
let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
_mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
@@ -2967,7 +2959,7 @@ pub unsafe fn _mm256_loadu2_m128(
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_loadu2_m128d(
hiaddr: *const f64, loaddr: *const f64
hiaddr: *const f64, loaddr: *const f64,
) -> __m256d {
let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
_mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
@@ -2983,7 +2975,7 @@ pub unsafe fn _mm256_loadu2_m128d(
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_loadu2_m128i(
hiaddr: *const __m128i, loaddr: *const __m128i
hiaddr: *const __m128i, loaddr: *const __m128i,
) -> __m256i {
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
@@ -3000,7 +2992,7 @@ pub unsafe fn _mm256_loadu2_m128i(
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_storeu2_m128(
hiaddr: *mut f32, loaddr: *mut f32, a: __m256
hiaddr: *mut f32, loaddr: *mut f32, a: __m256,
) {
let lo = _mm256_castps256_ps128(a);
_mm_storeu_ps(loaddr, lo);
@@ -3019,7 +3011,7 @@ pub unsafe fn _mm256_storeu2_m128(
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_storeu2_m128d(
hiaddr: *mut f64, loaddr: *mut f64, a: __m256d
hiaddr: *mut f64, loaddr: *mut f64, a: __m256d,
) {
let lo = _mm256_castpd256_pd128(a);
_mm_storeu_pd(loaddr, lo);
@@ -3037,7 +3029,7 @@ pub unsafe fn _mm256_storeu2_m128d(
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_storeu2_m128i(
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i,
) {
let lo = _mm256_castsi256_si128(a);
_mm_storeu_si128(loaddr, lo);
@@ -3500,20 +3492,11 @@ mod tests {
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_blend_ps(a, b, 0x0);
assert_eq_m256(
r,
_mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.),
);
assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
let r = _mm256_blend_ps(a, b, 0x3);
assert_eq_m256(
r,
_mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.),
);
assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
let r = _mm256_blend_ps(a, b, 0xF);
assert_eq_m256(
r,
_mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.),
);
assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
}
#[simd_test(enable = "avx")]
@@ -3544,16 +3527,8 @@ mod tests {
let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
let r = _mm256_dp_ps(a, b, 0xFF);
let e = _mm256_setr_ps(
200.,
200.,
200.,
200.,
2387.,
2387.,
2387.,
2387.,
);
let e =
_mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
assert_eq_m256(r, e);
}
@@ -4234,9 +4209,7 @@ mod tests {
pub data: [f64; 4],
}
let a = _mm256_set1_pd(7.0);
let mut mem = Memory {
data: [-1.0; 4],
};
let mut mem = Memory { data: [-1.0; 4] };
_mm256_stream_pd(&mut mem.data[0] as *mut f64, a);
for i in 0..4 {
@@ -4251,9 +4224,7 @@ mod tests {
pub data: [f32; 8],
}
let a = _mm256_set1_ps(7.0);
let mut mem = Memory {
data: [-1.0; 8],
};
let mut mem = Memory { data: [-1.0; 8] };
_mm256_stream_ps(&mut mem.data[0] as *mut f32, a);
for i in 0..8 {
@@ -4534,10 +4505,7 @@ mod tests {
#[simd_test(enable = "avx")]
unsafe fn test_mm256_set_ps() {
let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(
r,
_mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.),
);
assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
}
#[simd_test(enable = "avx")]
@@ -4595,10 +4563,7 @@ mod tests {
#[simd_test(enable = "avx")]
unsafe fn test_mm256_setr_ps() {
let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(
r,
_mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.),
);
assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
}
#[simd_test(enable = "avx")]

View File

@@ -413,7 +413,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blend_epi32(
a: __m256i, b: __m256i, imm8: i32
a: __m256i, b: __m256i, imm8: i32,
) -> __m256i {
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x8();
@@ -480,7 +480,7 @@ pub unsafe fn _mm256_blend_epi32(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blend_epi16(
a: __m256i, b: __m256i, imm8: i32
a: __m256i, b: __m256i, imm8: i32,
) -> __m256i {
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i16x16();
@@ -531,76 +531,20 @@ pub unsafe fn _mm256_blend_epi16(
) => {
match (imm8 >> 6) & 0b11 {
0b00 => blend4!(
$a,
$b,
$c,
$d,
$e,
$f,
6,
7,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
14,
15
$a, $b, $c, $d, $e, $f, 6, 7, $a2, $b2, $c2, $d2, $e2,
$f2, 14, 15
),
0b01 => blend4!(
$a,
$b,
$c,
$d,
$e,
$f,
22,
7,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
30,
15
$a, $b, $c, $d, $e, $f, 22, 7, $a2, $b2, $c2, $d2,
$e2, $f2, 30, 15
),
0b10 => blend4!(
$a,
$b,
$c,
$d,
$e,
$f,
6,
23,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
14,
31
$a, $b, $c, $d, $e, $f, 6, 23, $a2, $b2, $c2, $d2,
$e2, $f2, 14, 31
),
_ => blend4!(
$a,
$b,
$c,
$d,
$e,
$f,
22,
23,
$a2,
$b2,
$c2,
$d2,
$e2,
$f2,
30,
31
$a, $b, $c, $d, $e, $f, 22, 23, $a2, $b2, $c2, $d2,
$e2, $f2, 30, 31
),
}
};
@@ -618,60 +562,16 @@ pub unsafe fn _mm256_blend_epi16(
) => {
match (imm8 >> 4) & 0b11 {
0b00 => blend3!(
$a,
$b,
$c,
$d,
4,
5,
$a2,
$b2,
$c2,
$d2,
12,
13
$a, $b, $c, $d, 4, 5, $a2, $b2, $c2, $d2, 12, 13
),
0b01 => blend3!(
$a,
$b,
$c,
$d,
20,
5,
$a2,
$b2,
$c2,
$d2,
28,
13
$a, $b, $c, $d, 20, 5, $a2, $b2, $c2, $d2, 28, 13
),
0b10 => blend3!(
$a,
$b,
$c,
$d,
4,
21,
$a2,
$b2,
$c2,
$d2,
12,
29
$a, $b, $c, $d, 4, 21, $a2, $b2, $c2, $d2, 12, 29
),
_ => blend3!(
$a,
$b,
$c,
$d,
20,
21,
$a2,
$b2,
$c2,
$d2,
28,
29
$a, $b, $c, $d, 20, 21, $a2, $b2, $c2, $d2, 28, 29
),
}
};
@@ -703,13 +603,9 @@ pub unsafe fn _mm256_blend_epi16(
#[cfg_attr(test, assert_instr(vpblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blendv_epi8(
a: __m256i, b: __m256i, mask: __m256i
a: __m256i, b: __m256i, mask: __m256i,
) -> __m256i {
mem::transmute(pblendvb(
a.as_i8x32(),
b.as_i8x32(),
mask.as_i8x32(),
))
mem::transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32()))
}
/// Broadcast the low packed 8-bit integer from `a` to all elements of
@@ -1226,7 +1122,7 @@ pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_epi32(
slice: *const i32, offsets: __m128i, scale: i32
slice: *const i32, offsets: __m128i, scale: i32,
) -> __m128i {
let zero = _mm_setzero_si128().as_i32x4();
let neg_one = _mm_set1_epi32(-1).as_i32x4();
@@ -1280,7 +1176,7 @@ pub unsafe fn _mm_mask_i32gather_epi32(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_epi32(
slice: *const i32, offsets: __m256i, scale: i32
slice: *const i32, offsets: __m256i, scale: i32,
) -> __m256i {
let zero = _mm256_setzero_si256().as_i32x8();
let neg_one = _mm256_set1_epi32(-1).as_i32x8();
@@ -1334,7 +1230,7 @@ pub unsafe fn _mm256_mask_i32gather_epi32(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_ps(
slice: *const f32, offsets: __m128i, scale: i32
slice: *const f32, offsets: __m128i, scale: i32,
) -> __m128 {
let zero = _mm_setzero_ps();
let neg_one = _mm_set1_ps(-1.0);
@@ -1360,7 +1256,7 @@ pub unsafe fn _mm_i32gather_ps(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mask_i32gather_ps(
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32,
) -> __m128 {
let offsets = offsets.as_i32x4();
let slice = slice as *const i8;
@@ -1383,7 +1279,7 @@ pub unsafe fn _mm_mask_i32gather_ps(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_ps(
slice: *const f32, offsets: __m256i, scale: i32
slice: *const f32, offsets: __m256i, scale: i32,
) -> __m256 {
let zero = _mm256_setzero_ps();
let neg_one = _mm256_set1_ps(-1.0);
@@ -1409,7 +1305,7 @@ pub unsafe fn _mm256_i32gather_ps(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_mask_i32gather_ps(
src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32
src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32,
) -> __m256 {
let offsets = offsets.as_i32x8();
let slice = slice as *const i8;
@@ -1432,7 +1328,7 @@ pub unsafe fn _mm256_mask_i32gather_ps(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_epi64(
slice: *const i64, offsets: __m128i, scale: i32
slice: *const i64, offsets: __m128i, scale: i32,
) -> __m128i {
let zero = _mm_setzero_si128().as_i64x2();
let neg_one = _mm_set1_epi64x(-1).as_i64x2();
@@ -1486,7 +1382,7 @@ pub unsafe fn _mm_mask_i32gather_epi64(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_epi64(
slice: *const i64, offsets: __m128i, scale: i32
slice: *const i64, offsets: __m128i, scale: i32,
) -> __m256i {
let zero = _mm256_setzero_si256().as_i64x4();
let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
@@ -1540,7 +1436,7 @@ pub unsafe fn _mm256_mask_i32gather_epi64(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i32gather_pd(
slice: *const f64, offsets: __m128i, scale: i32
slice: *const f64, offsets: __m128i, scale: i32,
) -> __m128d {
let zero = _mm_setzero_pd();
let neg_one = _mm_set1_pd(-1.0);
@@ -1590,7 +1486,7 @@ pub unsafe fn _mm_mask_i32gather_pd(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i32gather_pd(
slice: *const f64, offsets: __m128i, scale: i32
slice: *const f64, offsets: __m128i, scale: i32,
) -> __m256d {
let zero = _mm256_setzero_pd();
let neg_one = _mm256_set1_pd(-1.0);
@@ -1640,7 +1536,7 @@ pub unsafe fn _mm256_mask_i32gather_pd(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_epi32(
slice: *const i32, offsets: __m128i, scale: i32
slice: *const i32, offsets: __m128i, scale: i32,
) -> __m128i {
let zero = _mm_setzero_si128().as_i32x4();
let neg_one = _mm_set1_epi64x(-1).as_i32x4();
@@ -1694,7 +1590,7 @@ pub unsafe fn _mm_mask_i64gather_epi32(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_epi32(
slice: *const i32, offsets: __m256i, scale: i32
slice: *const i32, offsets: __m256i, scale: i32,
) -> __m128i {
let zero = _mm_setzero_si128().as_i32x4();
let neg_one = _mm_set1_epi64x(-1).as_i32x4();
@@ -1748,7 +1644,7 @@ pub unsafe fn _mm256_mask_i64gather_epi32(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_ps(
slice: *const f32, offsets: __m128i, scale: i32
slice: *const f32, offsets: __m128i, scale: i32,
) -> __m128 {
let zero = _mm_setzero_ps();
let neg_one = _mm_set1_ps(-1.0);
@@ -1774,7 +1670,7 @@ pub unsafe fn _mm_i64gather_ps(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mask_i64gather_ps(
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32
src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32,
) -> __m128 {
let offsets = offsets.as_i64x2();
let slice = slice as *const i8;
@@ -1797,7 +1693,7 @@ pub unsafe fn _mm_mask_i64gather_ps(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_ps(
slice: *const f32, offsets: __m256i, scale: i32
slice: *const f32, offsets: __m256i, scale: i32,
) -> __m128 {
let zero = _mm_setzero_ps();
let neg_one = _mm_set1_ps(-1.0);
@@ -1823,7 +1719,7 @@ pub unsafe fn _mm256_i64gather_ps(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_mask_i64gather_ps(
src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32
src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32,
) -> __m128 {
let offsets = offsets.as_i64x4();
let slice = slice as *const i8;
@@ -1846,7 +1742,7 @@ pub unsafe fn _mm256_mask_i64gather_ps(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_epi64(
slice: *const i64, offsets: __m128i, scale: i32
slice: *const i64, offsets: __m128i, scale: i32,
) -> __m128i {
let zero = _mm_setzero_si128().as_i64x2();
let neg_one = _mm_set1_epi64x(-1).as_i64x2();
@@ -1900,7 +1796,7 @@ pub unsafe fn _mm_mask_i64gather_epi64(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_epi64(
slice: *const i64, offsets: __m256i, scale: i32
slice: *const i64, offsets: __m256i, scale: i32,
) -> __m256i {
let zero = _mm256_setzero_si256().as_i64x4();
let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
@@ -1954,7 +1850,7 @@ pub unsafe fn _mm256_mask_i64gather_epi64(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_i64gather_pd(
slice: *const f64, offsets: __m128i, scale: i32
slice: *const f64, offsets: __m128i, scale: i32,
) -> __m128d {
let zero = _mm_setzero_pd();
let neg_one = _mm_set1_pd(-1.0);
@@ -2004,7 +1900,7 @@ pub unsafe fn _mm_mask_i64gather_pd(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_i64gather_pd(
slice: *const f64, offsets: __m256i, scale: i32
slice: *const f64, offsets: __m256i, scale: i32,
) -> __m256d {
let zero = _mm256_setzero_pd();
let neg_one = _mm256_set1_pd(-1.0);
@@ -2053,7 +1949,7 @@ pub unsafe fn _mm256_mask_i64gather_pd(
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_inserti128_si256(
a: __m256i, b: __m128i, imm8: i32
a: __m256i, b: __m128i, imm8: i32,
) -> __m256i {
let a = a.as_i64x4();
let b = _mm256_castsi128_si256(b).as_i64x4();
@@ -2101,12 +1997,9 @@ pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi32(
mem_addr: *const i32, mask: __m128i
mem_addr: *const i32, mask: __m128i,
) -> __m128i {
mem::transmute(maskloadd(
mem_addr as *const i8,
mask.as_i32x4(),
))
mem::transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
}
/// Load packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@@ -2119,12 +2012,9 @@ pub unsafe fn _mm_maskload_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi32(
mem_addr: *const i32, mask: __m256i
mem_addr: *const i32, mask: __m256i,
) -> __m256i {
mem::transmute(maskloadd256(
mem_addr as *const i8,
mask.as_i32x8(),
))
mem::transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
}
/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@@ -2137,12 +2027,9 @@ pub unsafe fn _mm256_maskload_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi64(
mem_addr: *const i64, mask: __m128i
mem_addr: *const i64, mask: __m128i,
) -> __m128i {
mem::transmute(maskloadq(
mem_addr as *const i8,
mask.as_i64x2(),
))
mem::transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
}
/// Load packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@@ -2155,12 +2042,9 @@ pub unsafe fn _mm_maskload_epi64(
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi64(
mem_addr: *const i64, mask: __m256i
mem_addr: *const i64, mask: __m256i,
) -> __m256i {
mem::transmute(maskloadq256(
mem_addr as *const i8,
mask.as_i64x4(),
))
mem::transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
}
/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr`
@@ -2173,13 +2057,9 @@ pub unsafe fn _mm256_maskload_epi64(
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi32(
mem_addr: *mut i32, mask: __m128i, a: __m128i
mem_addr: *mut i32, mask: __m128i, a: __m128i,
) {
maskstored(
mem_addr as *mut i8,
mask.as_i32x4(),
a.as_i32x4(),
)
maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
}
/// Store packed 32-bit integers from `a` into memory pointed by `mem_addr`
@@ -2192,13 +2072,9 @@ pub unsafe fn _mm_maskstore_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi32(
mem_addr: *mut i32, mask: __m256i, a: __m256i
mem_addr: *mut i32, mask: __m256i, a: __m256i,
) {
maskstored256(
mem_addr as *mut i8,
mask.as_i32x8(),
a.as_i32x8(),
)
maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
}
/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr`
@@ -2211,13 +2087,9 @@ pub unsafe fn _mm256_maskstore_epi32(
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi64(
mem_addr: *mut i64, mask: __m128i, a: __m128i
mem_addr: *mut i64, mask: __m128i, a: __m128i,
) {
maskstoreq(
mem_addr as *mut i8,
mask.as_i64x2(),
a.as_i64x2(),
)
maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
}
/// Store packed 64-bit integers from `a` into memory pointed by `mem_addr`
@@ -2230,13 +2102,9 @@ pub unsafe fn _mm_maskstore_epi64(
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi64(
mem_addr: *mut i64, mask: __m256i, a: __m256i
mem_addr: *mut i64, mask: __m256i, a: __m256i,
) {
maskstoreq256(
mem_addr as *mut i8,
mask.as_i64x4(),
a.as_i64x4(),
)
maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
}
/// Compare packed 16-bit integers in `a` and `b`, and return the packed
@@ -2410,7 +2278,7 @@ pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_mpsadbw_epu8(
a: __m256i, b: __m256i, imm8: i32
a: __m256i, b: __m256i, imm8: i32,
) -> __m256i {
let a = a.as_u8x32();
let b = b.as_u8x32();
@@ -2656,7 +2524,7 @@ pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_permute2x128_si256(
a: __m256i, b: __m256i, imm8: i32
a: __m256i, b: __m256i, imm8: i32,
) -> __m256i {
let a = a.as_i64x4();
let b = b.as_i64x4();
@@ -3559,16 +3427,23 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
/// # if is_x86_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
/// let b = _mm256_setr_epi8(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,
/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
/// let a = _mm256_setr_epi8(
/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
/// );
/// let b = _mm256_setr_epi8(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
/// -30, -31,
/// );
///
/// let c = _mm256_unpackhi_epi8(a, b);
///
/// let expected = _mm256_setr_epi8(8,-8, 9,-9, 10,-10, 11,-11, 12,-12, 13,-13,
/// 14,-14, 15,-15, 24,-24, 25,-25, 26,-26, 27,-27, 28,-28, 29,-29, 30,-30,
/// 31,-31);
/// let expected = _mm256_setr_epi8(
/// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
/// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
/// -31,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
///
/// # }
@@ -3612,15 +3487,22 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
/// # if is_x86_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
/// let b = _mm256_setr_epi8(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,
/// -16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31);
/// let a = _mm256_setr_epi8(
/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
/// );
/// let b = _mm256_setr_epi8(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
/// -30, -31,
/// );
///
/// let c = _mm256_unpacklo_epi8(a, b);
///
/// let expected = _mm256_setr_epi8(0, 0, 1,-1, 2,-2, 3,-3, 4,-4, 5,-5, 6,-6, 7,-7,
/// 16,-16, 17,-17, 18,-18, 19,-19, 20,-20, 21,-21, 22,-22, 23,-23);
/// let expected = _mm256_setr_epi8(
/// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
/// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
///
/// # }
@@ -3664,13 +3546,18 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
/// # if is_x86_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
/// let a = _mm256_setr_epi16(
/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// );
/// let b = _mm256_setr_epi16(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// );
///
/// let c = _mm256_unpackhi_epi16(a, b);
///
/// let expected = _mm256_setr_epi16(4,-4, 5,-5, 6,-6, 7,-7, 12,-12, 13,-13, 14,-14,
/// 15,-15);
/// let expected = _mm256_setr_epi16(
/// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
///
/// # }
@@ -3688,9 +3575,7 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
let r: i16x16 = simd_shuffle16(
a.as_i16x16(),
b.as_i16x16(),
[
4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31
],
[4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
);
mem::transmute(r)
}
@@ -3715,13 +3600,18 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
///
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
/// let a = _mm256_setr_epi16(
/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// );
/// let b = _mm256_setr_epi16(
/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
/// );
///
/// let c = _mm256_unpacklo_epi16(a, b);
///
/// let expected = _mm256_setr_epi16(0, 0, 1,-1, 2,-2, 3,-3, 8,-8, 9,-9, 10,-10,
/// 11,-11);
/// let expected = _mm256_setr_epi16(
/// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
/// );
/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
///
/// # }
@@ -3739,9 +3629,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
let r: i16x16 = simd_shuffle16(
a.as_i16x16(),
b.as_i16x16(),
[
0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27
],
[0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
);
mem::transmute(r)
}
@@ -3832,11 +3720,8 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vunpcklps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
let r: i32x8 = simd_shuffle8(
a.as_i32x8(),
b.as_i32x8(),
[0, 8, 1, 9, 4, 12, 5, 13],
);
let r: i32x8 =
simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
mem::transmute(r)
}
@@ -4183,35 +4068,35 @@ extern "C" {
fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
#[link_name = "llvm.x86.avx2.gather.d.d"]
fn pgatherdd(
src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8
src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8,
) -> i32x4;
#[link_name = "llvm.x86.avx2.gather.d.d.256"]
fn vpgatherdd(
src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8
src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8,
) -> i32x8;
#[link_name = "llvm.x86.avx2.gather.d.q"]
fn pgatherdq(
src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8
src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8,
) -> i64x2;
#[link_name = "llvm.x86.avx2.gather.d.q.256"]
fn vpgatherdq(
src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8
src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8,
) -> i64x4;
#[link_name = "llvm.x86.avx2.gather.q.d"]
fn pgatherqd(
src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8
src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8,
) -> i32x4;
#[link_name = "llvm.x86.avx2.gather.q.d.256"]
fn vpgatherqd(
src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8
src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8,
) -> i32x4;
#[link_name = "llvm.x86.avx2.gather.q.q"]
fn pgatherqq(
src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8
src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8,
) -> i64x2;
#[link_name = "llvm.x86.avx2.gather.q.q.256"]
fn vpgatherqq(
src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8
src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8,
) -> i64x4;
#[link_name = "llvm.x86.avx2.gather.d.pd"]
fn pgatherdpd(
@@ -4235,19 +4120,19 @@ extern "C" {
) -> __m256d;
#[link_name = "llvm.x86.avx2.gather.d.ps"]
fn pgatherdps(
src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8
src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8,
) -> __m128;
#[link_name = "llvm.x86.avx2.gather.d.ps.256"]
fn vpgatherdps(
src: __m256, slice: *const i8, offsets: i32x8, mask: __m256, scale: i8
src: __m256, slice: *const i8, offsets: i32x8, mask: __m256, scale: i8,
) -> __m256;
#[link_name = "llvm.x86.avx2.gather.q.ps"]
fn pgatherqps(
src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8
src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8,
) -> __m128;
#[link_name = "llvm.x86.avx2.gather.q.ps.256"]
fn vpgatherqps(
src: __m128, slice: *const i8, offsets: i64x4, mask: __m128, scale: i8
src: __m128, slice: *const i8, offsets: i64x4, mask: __m128, scale: i8,
) -> __m128;
#[link_name = "llvm.x86.avx2.psll.dq"]
fn vpslldq(a: i64x4, b: i32) -> i64x4;
@@ -4718,10 +4603,7 @@ mod tests {
7, 6, 5, 4, 3, 2, 1, 0,
);
let r = _mm256_cmpeq_epi8(a, b);
assert_eq_m256i(
r,
_mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2),
);
assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2));
}
#[simd_test(enable = "avx2")]
@@ -4737,10 +4619,7 @@ mod tests {
7, 6, 5, 4, 3, 2, 1, 0,
);
let r = _mm256_cmpeq_epi16(a, b);
assert_eq_m256i(
r,
_mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2),
);
assert_eq_m256i(r, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2));
}
#[simd_test(enable = "avx2")]
@@ -4758,10 +4637,7 @@ mod tests {
let a = _mm256_setr_epi64x(0, 1, 2, 3);
let b = _mm256_setr_epi64x(3, 2, 2, 0);
let r = _mm256_cmpeq_epi64(a, b);
assert_eq_m256i(
r,
_mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2),
);
assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2));
}
#[simd_test(enable = "avx2")]
@@ -4769,10 +4645,7 @@ mod tests {
let a = _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0);
let b = _mm256_set1_epi8(0);
let r = _mm256_cmpgt_epi8(a, b);
assert_eq_m256i(
r,
_mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0),
);
assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0));
}
#[simd_test(enable = "avx2")]
@@ -4780,10 +4653,7 @@ mod tests {
let a = _mm256_insert_epi16(_mm256_set1_epi16(0), 5, 0);
let b = _mm256_set1_epi16(0);
let r = _mm256_cmpgt_epi16(a, b);
assert_eq_m256i(
r,
_mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0),
);
assert_eq_m256i(r, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0));
}
#[simd_test(enable = "avx2")]
@@ -4791,10 +4661,7 @@ mod tests {
let a = _mm256_insert_epi32(_mm256_set1_epi32(0), 5, 0);
let b = _mm256_set1_epi32(0);
let r = _mm256_cmpgt_epi32(a, b);
assert_eq_m256i(
r,
_mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0),
);
assert_eq_m256i(r, _mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0));
}
#[simd_test(enable = "avx2")]
@@ -4802,10 +4669,7 @@ mod tests {
let a = _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0);
let b = _mm256_set1_epi64x(0);
let r = _mm256_cmpgt_epi64(a, b);
assert_eq_m256i(
r,
_mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0),
);
assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0));
}
#[simd_test(enable = "avx2")]
@@ -5997,16 +5861,7 @@ mod tests {
);
assert_eq_m256(
r,
_mm256_setr_ps(
0.0,
16.0,
64.0,
256.0,
256.0,
256.0,
256.0,
256.0,
),
_mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
);
}

View File

@@ -21,17 +21,14 @@ use stdsimd_test::assert_instr;
#[cfg_attr(test, assert_instr(bextr))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(
a,
(start & 0xff_u32) | ((len & 0xff_u32) << 8_u32),
)
_bextr2_u32(a, (start & 0xff_u32) | ((len & 0xff_u32) << 8_u32))
}
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
/// be extracted, and bits `[15,8]` specify the length of the range.
/// Bits `[7,0]` of `control` specify the index to the first bit in the range
/// to be extracted, and bits `[15,8]` specify the length of the range.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32)
#[inline]

View File

@@ -58,7 +58,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) {
#[cfg(test)]
mod tests {
use coresimd::x86::*;
use std::{fmt, cmp::PartialEq};
use std::{cmp::PartialEq, fmt};
use stdsimd_test::simd_test;
#[repr(align(16))]

View File

@@ -380,7 +380,7 @@ pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_set_pi8(
e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8
e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8,
) -> __m64 {
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
}
@@ -426,7 +426,7 @@ pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_setr_pi8(
e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8
e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8,
) -> __m64 {
mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
}
@@ -514,12 +514,8 @@ mod tests {
-30001,
i16::max_value() - 1,
);
let e = _mm_setr_pi16(
i16::min_value(),
30000,
-30000,
i16::max_value(),
);
let e =
_mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value());
assert_eq_m64(e, _mm_add_pi16(a, b));
assert_eq_m64(e, _m_paddw(a, b));
}
@@ -537,16 +533,8 @@ mod tests {
unsafe fn test_mm_adds_pi8() {
let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0);
let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1);
let e = _mm_setr_pi8(
i8::min_value(),
0,
0,
i8::max_value(),
-1,
-1,
1,
1,
);
let e =
_mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1);
assert_eq_m64(e, _mm_adds_pi8(a, b));
assert_eq_m64(e, _m_paddsb(a, b));
}

View File

@@ -444,11 +444,12 @@ impl m256iExt for __m256i {
}
}
use coresimd::simd::{f32x2, f32x4, f32x8, f64x2, f64x4, i16x16, i16x4, i16x8,
i32x2, i32x4, i32x8, i64x2, i64x4, i8x16, i8x32, i8x8,
m16x16, m16x4, m16x8, m32x2, m32x4, m32x8, m64x2, m64x4,
m8x16, m8x32, m8x8, u16x16, u16x4, u16x8, u32x2, u32x4,
u32x8, u64x2, u64x4, u8x16, u8x32, u8x8};
use coresimd::simd::{
f32x2, f32x4, f32x8, f64x2, f64x4, i16x16, i16x4, i16x8, i32x2, i32x4,
i32x8, i64x2, i64x4, i8x16, i8x32, i8x8, m16x16, m16x4, m16x8, m32x2,
m32x4, m32x8, m64x2, m64x4, m8x16, m8x32, m8x8, u16x16, u16x4, u16x8,
u32x2, u32x4, u32x8, u64x2, u64x4, u8x16, u8x32, u8x8,
};
impl_from_bits_!(
__m64: u32x2,

View File

@@ -25,20 +25,25 @@ extern "C" {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128)
#[inline]
#[target_feature(enable = "pclmulqdq")]
#[cfg_attr(all(test, not(target_os = "linux")),
assert_instr(pclmulqdq, imm8 = 0))]
#[cfg_attr(all(test, target_os = "linux"),
assert_instr(pclmullqlqdq, imm8 = 0))]
#[cfg_attr(all(test, target_os = "linux"),
assert_instr(pclmulhqlqdq, imm8 = 1))]
#[cfg_attr(all(test, target_os = "linux"),
assert_instr(pclmullqhqdq, imm8 = 16))]
#[cfg_attr(all(test, target_os = "linux"),
assert_instr(pclmulhqhqdq, imm8 = 17))]
#[cfg_attr(
all(test, not(target_os = "linux")), assert_instr(pclmulqdq, imm8 = 0)
)]
#[cfg_attr(
all(test, target_os = "linux"), assert_instr(pclmullqlqdq, imm8 = 0)
)]
#[cfg_attr(
all(test, target_os = "linux"), assert_instr(pclmulhqlqdq, imm8 = 1)
)]
#[cfg_attr(
all(test, target_os = "linux"), assert_instr(pclmullqhqdq, imm8 = 16)
)]
#[cfg_attr(
all(test, target_os = "linux"), assert_instr(pclmulhqhqdq, imm8 = 17)
)]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_clmulepi64_si128(
a: __m128i, b: __m128i, imm8: i32
a: __m128i, b: __m128i, imm8: i32,
) -> __m128i {
macro_rules! call {
($imm8:expr) => {

View File

@@ -1,4 +1,3 @@
//! RDRAND and RDSEED instructions for returning random numbers from an Intel
//! on-chip hardware random number generator which has been seeded by an
//! on-chip entropy source.

View File

@@ -75,7 +75,7 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha1rnds4_epu32(
a: __m128i, b: __m128i, func: i32
a: __m128i, b: __m128i, func: i32,
) -> __m128i {
let a = a.as_i32x4();
let b = b.as_i32x4();
@@ -126,13 +126,9 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(sha256rnds2))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha256rnds2_epu32(
a: __m128i, b: __m128i, k: __m128i
a: __m128i, b: __m128i, k: __m128i,
) -> __m128i {
mem::transmute(sha256rnds2(
a.as_i32x4(),
b.as_i32x4(),
k.as_i32x4(),
))
mem::transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4()))
}
#[cfg(test)]

View File

@@ -230,8 +230,10 @@ pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
#[inline]
#[target_feature(enable = "sse")]
// i586 only seems to generate plain `and` instructions, so ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andps))]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
@@ -249,8 +251,10 @@ pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
#[target_feature(enable = "sse")]
// i586 only seems to generate plain `not` and `and` instructions, so ignore
// it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andnps))]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(andnps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
@@ -265,8 +269,10 @@ pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
#[inline]
#[target_feature(enable = "sse")]
// i586 only seems to generate plain `or` instructions, so we ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(orps))]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(orps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
@@ -281,8 +287,10 @@ pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
#[inline]
#[target_feature(enable = "sse")]
// i586 only seems to generate plain `xor` instructions, so we ignore it.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(xorps))]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(xorps)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
let a: __m128i = mem::transmute(a);
@@ -1132,10 +1140,14 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))]
// 32-bit codegen does not generate `movhps` or `movhpd`, but instead
// `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
#[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"),
assert_instr(movlhps))]
#[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")),
assert_instr(unpcklps))]
#[cfg_attr(
all(test, target_arch = "x86", target_feature = "sse2"),
assert_instr(movlhps)
)]
#[cfg_attr(
all(test, target_arch = "x86", not(target_feature = "sse2")),
assert_instr(unpcklps)
)]
// TODO: This function is actually not limited to floats, but that's what
// what matches the C type most closely: (__m128, *const __m64) -> __m128
pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
@@ -1185,11 +1197,15 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
// #[cfg_attr(test, assert_instr(movlps))]
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))]
// On 32-bit targets with SSE2, it just generates two `movsd`.
#[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"),
assert_instr(movsd))]
#[cfg_attr(
all(test, target_arch = "x86", target_feature = "sse2"),
assert_instr(movsd)
)]
// It should really generate "movlps", but oh well...
#[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")),
assert_instr(movss))]
#[cfg_attr(
all(test, target_arch = "x86", not(target_feature = "sse2")),
assert_instr(movss)
)]
// TODO: Like _mm_loadh_pi, this also isn't limited to floats.
pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
let q = p as *const f32x2;
@@ -1321,8 +1337,10 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
// fine.
// On i586 (no SSE2) it just generates plain MOV instructions.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(movhpd))]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(movhpd)
)]
pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")]
{
@@ -1349,8 +1367,10 @@ pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
#[inline]
#[target_feature(enable = "sse")]
// On i586 the codegen just generates plane MOVs. No need to test for that.
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(movlps))]
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2")),
assert_instr(movlps)
)]
pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")]
{
@@ -1929,7 +1949,7 @@ pub unsafe fn _mm_undefined_ps() -> __m128 {
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _MM_TRANSPOSE4_PS(
row0: &mut __m128, row1: &mut __m128, row2: &mut __m128, row3: &mut __m128
row0: &mut __m128, row1: &mut __m128, row2: &mut __m128, row3: &mut __m128,
) {
let tmp0 = _mm_unpacklo_ps(*row0, *row1);
let tmp2 = _mm_unpacklo_ps(*row2, *row3);
@@ -2734,12 +2754,8 @@ mod tests {
let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
let e2: u32x4 = transmute(_mm_setr_ps(
transmute(0xffffffffu32),
2.0,
3.0,
4.0,
));
let e2: u32x4 =
transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
assert_eq!(r2, e2);
}
@@ -3453,22 +3469,9 @@ mod tests {
#[simd_test(enable = "sse")]
unsafe fn test_mm_cvtss_si32() {
let inputs = &[
42.0f32,
-3.1,
4.0e10,
4.0e-20,
NAN,
2147483500.1,
];
let result = &[
42i32,
-3,
i32::min_value(),
0,
i32::min_value(),
2147483520,
];
let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
let result =
&[42i32, -3, i32::min_value(), 0, i32::min_value(), 2147483520];
for i in 0..inputs.len() {
let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
let e = result[i];
@@ -3672,10 +3675,8 @@ mod tests {
}
let r = _mm_load_ps(p);
let e = _mm_add_ps(
_mm_setr_ps(1.0, 2.0, 3.0, 4.0),
_mm_set1_ps(fixup),
);
let e =
_mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
assert_eq_m128(r, e);
}
@@ -3705,10 +3706,8 @@ mod tests {
}
let r = _mm_loadr_ps(p);
let e = _mm_add_ps(
_mm_setr_ps(4.0, 3.0, 2.0, 1.0),
_mm_set1_ps(fixup),
);
let e =
_mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
assert_eq_m128(r, e);
}
@@ -3947,9 +3946,7 @@ mod tests {
#[simd_test(enable = "sse")]
unsafe fn test_mm_stream_ps() {
let a = _mm_set1_ps(7.0);
let mut mem = Memory {
data: [-1.0; 4],
};
let mut mem = Memory { data: [-1.0; 4] };
_mm_stream_ps(&mut mem.data[0] as *mut f32, a);
for i in 0..4 {
@@ -4157,12 +4154,8 @@ mod tests {
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_movemask_pi8() {
let a = _mm_setr_pi16(
0b1000_0000,
0b0100_0000,
0b1000_0000,
0b0100_0000,
);
let a =
_mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
let r = _mm_movemask_pi8(a);
assert_eq!(r, 0b10001);

View File

@@ -1010,7 +1010,7 @@ pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
// no particular instruction to test
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_set_epi16(
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
) -> __m128i {
mem::transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
}
@@ -1095,7 +1095,7 @@ pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
// no particular instruction to test
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_setr_epi16(
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
) -> __m128i {
_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
}
@@ -1134,10 +1134,15 @@ pub unsafe fn _mm_setzero_si128() -> __m128i {
#[inline]
#[target_feature(enable = "sse2")]
// FIXME movsd on windows
#[cfg_attr(all(test, not(windows),
#[cfg_attr(
all(
test,
not(windows),
not(all(target_os = "linux", target_arch = "x86_64")),
target_arch = "x86_64"),
assert_instr(movq))]
target_arch = "x86_64"
),
assert_instr(movq)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
_mm_set_epi64x(0, simd_extract((*mem_addr).as_i64x2(), 0))
@@ -1190,7 +1195,7 @@ pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(maskmovdqu))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskmoveu_si128(
a: __m128i, mask: __m128i, mem_addr: *mut i8
a: __m128i, mask: __m128i, mem_addr: *mut i8,
) {
maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
}
@@ -1229,10 +1234,15 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
#[inline]
#[target_feature(enable = "sse2")]
// FIXME mov on windows, movlps on i686
#[cfg_attr(all(test, not(windows),
#[cfg_attr(
all(
test,
not(windows),
not(all(target_os = "linux", target_arch = "x86_64")),
target_arch = "x86_64"),
assert_instr(movq))]
target_arch = "x86_64"
),
assert_instr(movq)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
ptr::copy_nonoverlapping(
@@ -1275,8 +1285,9 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
#[inline]
#[target_feature(enable = "sse2")]
// FIXME movd on windows, movd on i686
#[cfg_attr(all(test, not(windows), target_arch = "x86_64"),
assert_instr(movq))]
#[cfg_attr(
all(test, not(windows), target_arch = "x86_64"), assert_instr(movq)
)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
let zero = _mm_setzero_si128();
@@ -1341,11 +1352,7 @@ pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert(
a.as_i16x8(),
(imm8 & 7) as u32,
i as i16,
))
mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16))
}
/// Return a mask of the most significant bit of each element in `a`.
@@ -1443,16 +1450,7 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
simd_shuffle8(
a,
a,
[
0,
1,
2,
3,
$x01 + 4,
$x23 + 4,
$x45 + 4,
$x67 + 4,
],
[0, 1, 2, 3, $x01 + 4, $x23 + 4, $x45 + 4, $x67 + 4],
)
};
}
@@ -1567,9 +1565,7 @@ pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i8x16, _>(simd_shuffle16(
a.as_i8x16(),
b.as_i8x16(),
[
8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31
],
[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
))
}
@@ -1630,9 +1626,7 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute::<i8x16, _>(simd_shuffle16(
a.as_i8x16(),
b.as_i8x16(),
[
0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
],
[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
))
}
@@ -1644,11 +1638,8 @@ pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(punpcklwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
let x = simd_shuffle8(
a.as_i16x8(),
b.as_i16x8(),
[0, 8, 1, 9, 2, 10, 3, 11],
);
let x =
simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
mem::transmute::<i16x8, _>(x)
}
@@ -1947,11 +1938,7 @@ pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmpltsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert(
_mm_cmplt_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
}
/// Return a new vector with the low element of `a` replaced by the
@@ -1963,11 +1950,7 @@ pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmplesd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert(
_mm_cmple_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
}
/// Return a new vector with the low element of `a` replaced by the result
@@ -2042,11 +2025,7 @@ pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmpnltsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert(
_mm_cmpnlt_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
}
/// Return a new vector with the low element of `a` replaced by the
@@ -2058,11 +2037,7 @@ pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
#[cfg_attr(test, assert_instr(cmpnlesd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
simd_insert(
_mm_cmpnle_sd(b, a),
1,
simd_extract::<_, f64>(a, 1),
)
simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
}
/// Compare corresponding elements in `a` and `b` for equality.
@@ -2881,8 +2856,9 @@ pub unsafe fn _mm_undefined_si128() -> __m128i {
/// The resulting `__m128d` element is composed by the low-order values of
/// the two `__m128d` interleaved input elements, i.e.:
///
/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input
/// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input
/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
/// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
/// input
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
#[inline]
@@ -3223,22 +3199,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_add_epi8() {
let a = _mm_setr_epi8(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
);
#[cfg_attr(rustfmt, rustfmt_skip)]
let b = _mm_setr_epi8(
@@ -3290,22 +3251,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_adds_epi8() {
let a = _mm_setr_epi8(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
);
#[cfg_attr(rustfmt, rustfmt_skip)]
let b = _mm_setr_epi8(
@@ -3363,22 +3309,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_adds_epu8() {
let a = _mm_setr_epi8(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
);
#[cfg_attr(rustfmt, rustfmt_skip)]
let b = _mm_setr_epi8(
@@ -3629,22 +3560,7 @@ mod tests {
);
let r = _mm_slli_si128(a, 1);
let e = _mm_setr_epi8(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
);
assert_eq_m128i(r, e);
@@ -3888,41 +3804,10 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_cmpeq_epi8() {
let a = _mm_setr_epi8(
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
);
let b = _mm_setr_epi8(
15,
14,
2,
12,
11,
10,
9,
8,
7,
6,
5,
4,
3,
2,
1,
0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
);
let b =
_mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm_cmpeq_epi8(a, b);
#[cfg_attr(rustfmt, rustfmt_skip)]
assert_eq_m128i(
@@ -4869,9 +4754,7 @@ mod tests {
pub data: [f64; 2],
}
let a = _mm_set1_pd(7.0);
let mut mem = Memory {
data: [-1.0; 2],
};
let mut mem = Memory { data: [-1.0; 2] };
_mm_stream_pd(&mut mem.data[0] as *mut f64, a);
for i in 0..2 {
@@ -4889,9 +4772,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_store_pd() {
let mut mem = Memory {
data: [0.0f64; 4],
};
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
@@ -4903,9 +4784,7 @@ mod tests {
#[simd_test(enable = "sse")]
unsafe fn test_mm_storeu_pd() {
let mut mem = Memory {
data: [0.0f64; 4],
};
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
@@ -4929,9 +4808,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_store1_pd() {
let mut mem = Memory {
data: [0.0f64; 4],
};
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
@@ -4943,9 +4820,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_store_pd1() {
let mut mem = Memory {
data: [0.0f64; 4],
};
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
@@ -4957,9 +4832,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_storer_pd() {
let mut mem = Memory {
data: [0.0f64; 4],
};
let mut mem = Memory { data: [0.0f64; 4] };
let vals = &mut mem.data;
let a = _mm_setr_pd(1.0, 2.0);
let d = vals.as_mut_ptr();
@@ -5013,10 +4886,7 @@ mod tests {
}
let r = _mm_loadu_pd(d);
let e = _mm_add_pd(
_mm_setr_pd(1.0, 2.0),
_mm_set1_pd(offset as f64),
);
let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
assert_eq_m128d(r, e);
}
@@ -5091,12 +4961,8 @@ mod tests {
assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
let a = _mm_setr_ps(
-1.1,
f32::NEG_INFINITY,
f32::MAX,
f32::NEG_INFINITY,
);
let a =
_mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
let b = _mm_setr_pd(f64::INFINITY, -5.0);
let r = _mm_cvtsd_ss(a, b);
@@ -5161,12 +5027,8 @@ mod tests {
let r = _mm_cvttps_epi32(a);
assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
let a = _mm_setr_ps(
f32::NEG_INFINITY,
f32::INFINITY,
f32::MIN,
f32::MAX,
);
let a =
_mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
let r = _mm_cvttps_epi32(a);
assert_eq_m128i(
r,

View File

@@ -66,13 +66,9 @@ pub const _MM_FROUND_NEARBYINT: i32 =
#[cfg_attr(test, assert_instr(pblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_epi8(
a: __m128i, b: __m128i, mask: __m128i
a: __m128i, b: __m128i, mask: __m128i,
) -> __m128i {
mem::transmute(pblendvb(
a.as_i8x16(),
b.as_i8x16(),
mask.as_i8x16(),
))
mem::transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16()))
}
/// Blend packed 16-bit integers from `a` and `b` using the mask `imm8`.
@@ -250,11 +246,7 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert(
a.as_i8x16(),
(imm8 & 0b1111) as u32,
i as i8,
))
mem::transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8))
}
/// Return a copy of `a` with the 32-bit integer from `i` inserted at a
@@ -267,11 +259,7 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert(
a.as_i32x4(),
(imm8 & 0b11) as u32,
i,
))
mem::transmute(simd_insert(a.as_i32x4(), (imm8 & 0b11) as u32, i))
}
/// Compare packed 8-bit integers in `a` and `b` and return packed maximum
@@ -1778,16 +1766,12 @@ mod tests {
}
{
let a = _mm_setr_epi32(
15,
2, /* ignored */
1234567,
4, /* ignored */
15, 2, /* ignored */
1234567, 4, /* ignored */
);
let b = _mm_setr_epi32(
-20,
-256, /* ignored */
666666,
666666, /* ignored */
-20, -256, /* ignored */
666666, 666666, /* ignored */
);
let r = _mm_mul_epi32(a, b);
let e = _mm_setr_epi64x(-300, 823043843622);

View File

@@ -439,7 +439,7 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrm(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> __m128i {
let a = a.as_i8x16();
let b = b.as_i8x16();
@@ -544,7 +544,7 @@ pub unsafe fn _mm_cmpestrm(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestri(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@@ -567,7 +567,7 @@ pub unsafe fn _mm_cmpestri(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrz(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@@ -590,7 +590,7 @@ pub unsafe fn _mm_cmpestrz(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrc(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@@ -613,7 +613,7 @@ pub unsafe fn _mm_cmpestrc(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrs(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@@ -636,7 +636,7 @@ pub unsafe fn _mm_cmpestrs(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestro(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@@ -660,7 +660,7 @@ pub unsafe fn _mm_cmpestro(
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestra(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@@ -917,13 +917,8 @@ mod tests {
unsafe fn test_mm_cmpestra() {
let a = str_to_m128i(b"Cannot match a");
let b = str_to_m128i(b"Null after 14");
let i = _mm_cmpestra(
a,
14,
b,
16,
_SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK,
);
let i =
_mm_cmpestra(a, 14, b, 16, _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK);
assert_eq!(1, i);
}

View File

@@ -25,8 +25,8 @@ extern "C" {
/// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
///
/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
/// `[5:0]` bits of `y` specify the length of the bit-range to extract. All other
/// bits are ignored.
/// `[5:0]` bits of `y` specify the length of the bit-range to extract. All
/// other bits are ignored.
///
/// If the length is zero, it is interpreted as `64`. If the length and index
/// are zero, the lower 64 bits of `x` are extracted.

View File

@@ -596,24 +596,8 @@ mod tests {
12, 5, 5, 10,
4, 1, 8, 0,
);
let expected = _mm_setr_epi8(
5,
0,
5,
4,
9,
13,
7,
4,
13,
6,
6,
11,
5,
2,
9,
1,
);
let expected =
_mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
let r = _mm_shuffle_epi8(a, b);
assert_eq_m128i(r, expected);
}

View File

@@ -121,7 +121,7 @@ mod x86_polyfill {
#[target_feature(enable = "avx2")]
pub unsafe fn _mm256_insert_epi64(
a: __m256i, val: i64, idx: i32
a: __m256i, val: i64, idx: i32,
) -> __m256i {
union A {
a: __m256i,

View File

@@ -38,11 +38,7 @@ extern "C" {
#[cfg_attr(test, assert_instr(xsave))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
xsave(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial restore of the enabled processor states using
@@ -110,11 +106,7 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
#[cfg_attr(test, assert_instr(xsaveopt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
xsaveopt(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial save of the enabled processor states to memory
@@ -130,11 +122,7 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsavec))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
xsavec(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial save of the enabled processor states to memory at
@@ -151,11 +139,7 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsaves))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
xsaves(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial restore of the enabled processor states using the
@@ -196,9 +180,7 @@ mod tests {
impl XsaveArea {
fn new() -> XsaveArea {
XsaveArea {
data: [0; 2560],
}
XsaveArea { data: [0; 2560] }
}
fn ptr(&mut self) -> *mut u8 {
&mut self.data[0] as *mut _ as *mut u8

View File

@@ -28,8 +28,8 @@ pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
/// Extracts bits of `a` specified by `control` into
/// the least significant bits of the result.
///
/// Bits `[7,0]` of `control` specify the index to the first bit in the range to
/// be extracted, and bits `[15,8]` specify the length of the range.
/// Bits `[7,0]` of `control` specify the index to the first bit in the range
/// to be extracted, and bits `[15,8]` specify the length of the range.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64)
#[inline]

View File

@@ -58,7 +58,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) {
#[cfg(test)]
mod tests {
use coresimd::x86_64::*;
use std::{fmt, cmp::PartialEq};
use std::{cmp::PartialEq, fmt};
use stdsimd_test::simd_test;
#[repr(align(16))]

View File

@@ -36,11 +36,7 @@ extern "C" {
#[cfg_attr(test, assert_instr(xsave64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
xsave64(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial restore of the enabled processor states using
@@ -73,11 +69,7 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
#[cfg_attr(test, assert_instr(xsaveopt64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
xsaveopt64(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial save of the enabled processor states to memory
@@ -93,11 +85,7 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsavec64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
xsavec64(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial save of the enabled processor states to memory at
@@ -114,11 +102,7 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
#[cfg_attr(test, assert_instr(xsaves64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
xsaves64(
mem_addr,
(save_mask >> 32) as u32,
save_mask as u32,
);
xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
}
/// Perform a full or partial restore of the enabled processor states using the

View File

@@ -21,7 +21,7 @@ use proc_macro2::TokenStream;
#[proc_macro_attribute]
pub fn assert_instr(
attr: proc_macro::TokenStream, item: proc_macro::TokenStream
attr: proc_macro::TokenStream, item: proc_macro::TokenStream,
) -> proc_macro::TokenStream {
let invoc = syn::parse::<Invoc>(attr)
.expect("expected #[assert_instr(instr, a = b, ...)]");
@@ -36,9 +36,10 @@ pub fn assert_instr(
let name = &func.ident;
// Disable assert_instr for x86 targets compiled with avx enabled, which
// causes LLVM to generate different intrinsics that the ones we are testing
// for.
let disable_assert_instr = std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok();
// causes LLVM to generate different intrinsics that the ones we are
// testing for.
let disable_assert_instr =
std::env::var("STDSIMD_DISABLE_ASSERT_INSTR").is_ok();
let maybe_ignore = if cfg!(optimized) && !disable_assert_instr {
TokenStream::new()
} else {
@@ -72,11 +73,7 @@ pub fn assert_instr(
syn::Pat::Ident(ref i) => &i.ident,
_ => panic!("must have bare arguments"),
};
match invoc
.args
.iter()
.find(|a| *ident == a.0)
{
match invoc.args.iter().find(|a| *ident == a.0) {
Some(&(_, ref tts)) => {
input_vals.push(quote! { #tts });
}
@@ -87,7 +84,8 @@ pub fn assert_instr(
};
}
let attrs = func.attrs
let attrs = func
.attrs
.iter()
.filter(|attr| {
attr.path
@@ -142,9 +140,8 @@ pub fn assert_instr(
}
}.into();
// why? necessary now to get tests to work?
let tts: TokenStream = tts.to_string()
.parse()
.expect("cannot parse tokenstream");
let tts: TokenStream =
tts.to_string().parse().expect("cannot parse tokenstream");
let tts: TokenStream = quote! {
#item

View File

@@ -1,8 +1,5 @@
use std::env;
fn main() {
println!(
"cargo:rustc-env=TARGET={}",
env::var("TARGET").unwrap()
);
println!("cargo:rustc-env=TARGET={}", env::var("TARGET").unwrap());
}

View File

@@ -9,29 +9,35 @@
#![cfg_attr(stdsimd_strict, deny(warnings))]
#![allow(dead_code)]
#![allow(unused_features)]
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
simd_ffi, asm, proc_macro_gen,
integer_atomics, stmt_expr_attributes, core_intrinsics,
crate_in_paths, no_core, attr_literals, rustc_attrs, stdsimd,
staged_api, core_float, core_slice_ext, align_offset,
doc_cfg, mmx_target_feature, tbm_target_feature,
sse4a_target_feature, arm_target_feature, aarch64_target_feature,
mips_target_feature, powerpc_target_feature)]
#![cfg_attr(test,
feature(proc_macro, test, attr_literals, abi_vectorcall,
untagged_unions))]
#![cfg_attr(feature = "cargo-clippy",
allow(inline_always, too_many_arguments, cast_sign_loss,
cast_lossless, cast_possible_wrap,
cast_possible_truncation, cast_precision_loss,
#![feature(
const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi,
asm, proc_macro_gen, integer_atomics, stmt_expr_attributes,
core_intrinsics, crate_in_paths, no_core, attr_literals, rustc_attrs,
stdsimd, staged_api, core_float, core_slice_ext, align_offset, doc_cfg,
mmx_target_feature, tbm_target_feature, sse4a_target_feature,
arm_target_feature, aarch64_target_feature, mips_target_feature,
powerpc_target_feature
)]
#![cfg_attr(
test,
feature(proc_macro, test, attr_literals, abi_vectorcall, untagged_unions)
)]
#![cfg_attr(
feature = "cargo-clippy",
allow(
inline_always, too_many_arguments, cast_sign_loss, cast_lossless,
cast_possible_wrap, cast_possible_truncation, cast_precision_loss,
shadow_reuse, cyclomatic_complexity, similar_names,
many_single_char_names))]
many_single_char_names
)
)]
#![cfg_attr(test, allow(unused_imports))]
#![no_core]
#![unstable(feature = "stdsimd", issue = "27731")]
#![doc(test(attr(deny(warnings))),
test(attr(allow(dead_code, deprecated, unused_variables,
unused_mut))))]
#![doc(
test(attr(deny(warnings))),
test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
)]
#[cfg_attr(not(test), macro_use)]
extern crate core as _core;

View File

@@ -1,7 +1,9 @@
#![feature(stdsimd)]
#![cfg_attr(stdsimd_strict, deny(warnings))]
#![cfg_attr(feature = "cargo-clippy",
allow(option_unwrap_used, print_stdout, use_debug))]
#![cfg_attr(
feature = "cargo-clippy",
allow(option_unwrap_used, print_stdout, use_debug)
)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use]
@@ -14,53 +16,20 @@ fn x86_all() {
println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
println!(
"sse4.1: {:?}",
is_x86_feature_detected!("sse4.1")
);
println!(
"sse4.2: {:?}",
is_x86_feature_detected!("sse4.2")
);
println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
println!("avx: {:?}", is_x86_feature_detected!("avx"));
println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
println!(
"avx512f {:?}",
is_x86_feature_detected!("avx512f")
);
println!(
"avx512cd {:?}",
is_x86_feature_detected!("avx512cd")
);
println!(
"avx512er {:?}",
is_x86_feature_detected!("avx512er")
);
println!(
"avx512pf {:?}",
is_x86_feature_detected!("avx512pf")
);
println!(
"avx512bw {:?}",
is_x86_feature_detected!("avx512bw")
);
println!(
"avx512dq {:?}",
is_x86_feature_detected!("avx512dq")
);
println!(
"avx512vl {:?}",
is_x86_feature_detected!("avx512vl")
);
println!(
"avx512_ifma {:?}",
is_x86_feature_detected!("avx512ifma")
);
println!(
"avx512_vbmi {:?}",
is_x86_feature_detected!("avx512vbmi")
);
println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
println!(
"avx512_vpopcntdq {:?}",
is_x86_feature_detected!("avx512vpopcntdq")
@@ -70,23 +39,11 @@ fn x86_all() {
println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
println!(
"popcnt: {:?}",
is_x86_feature_detected!("popcnt")
);
println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
println!(
"xsaveopt: {:?}",
is_x86_feature_detected!("xsaveopt")
);
println!(
"xsaves: {:?}",
is_x86_feature_detected!("xsaves")
);
println!(
"xsavec: {:?}",
is_x86_feature_detected!("xsavec")
);
println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
}

View File

@@ -253,11 +253,7 @@ macro_rules! product_nan_test {
}
}
let v = $id::splat(n0);
assert!(
v.product().is_nan(),
"all nans | {:?}",
v
);
assert!(v.product().is_nan(), "all nans | {:?}", v);
}
unsafe { test_fn() };
}
@@ -355,8 +351,7 @@ mod offset {
// tolerate 1 ULP difference:
if vsum.as_int() > tsum.as_int() {
assert!(
vsum.as_int() - tsum.as_int()
< 2,
vsum.as_int() - tsum.as_int() < 2,
"v: {:?} | vsum: {} | tsum: {}",
v,
vsum,
@@ -364,8 +359,7 @@ mod offset {
);
} else {
assert!(
tsum.as_int() - vsum.as_int()
< 2,
tsum.as_int() - vsum.as_int() < 2,
"v: {:?} | vsum: {} | tsum: {}",
v,
vsum,

View File

@@ -12,7 +12,7 @@ extern crate quote;
use std::env;
use proc_macro2::{Literal, Span, Ident, TokenStream, TokenTree};
use proc_macro2::{Ident, Literal, Span, TokenStream, TokenTree};
fn string(s: &str) -> TokenTree {
Literal::string(s).into()
@@ -20,11 +20,9 @@ fn string(s: &str) -> TokenTree {
#[proc_macro_attribute]
pub fn simd_test(
attr: proc_macro::TokenStream, item: proc_macro::TokenStream
attr: proc_macro::TokenStream, item: proc_macro::TokenStream,
) -> proc_macro::TokenStream {
let tokens = TokenStream::from(attr)
.into_iter()
.collect::<Vec<_>>();
let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
if tokens.len() != 3 {
panic!("expected #[simd_test(enable = \"feature\")]");
}
@@ -53,18 +51,19 @@ pub fn simd_test(
let item = TokenStream::from(item);
let name = find_name(item.clone());
let name: TokenStream = name.to_string().parse().expect(&format!(
"failed to parse name: {}",
name.to_string()
));
let name: TokenStream = name
.to_string()
.parse()
.expect(&format!("failed to parse name: {}", name.to_string()));
let target = env::var("TARGET")
.expect("TARGET environment variable should be set for rustc");
let mut force_test = false;
let macro_test = match target.split('-').next().expect(&format!(
"target triple contained no \"-\": {}",
target
)) {
let macro_test = match target
.split('-')
.next()
.expect(&format!("target triple contained no \"-\": {}", target))
{
"i686" | "x86_64" | "i586" => "is_x86_feature_detected",
"arm" | "armv7" => "is_arm_feature_detected",
"aarch64" => "is_aarch64_feature_detected",

View File

@@ -5,8 +5,10 @@
//! assertions about the disassembly of a function.
#![feature(proc_macro)]
#![cfg_attr(feature = "cargo-clippy",
allow(missing_docs_in_private_items, print_stdout))]
#![cfg_attr(
feature = "cargo-clippy",
allow(missing_docs_in_private_items, print_stdout)
)]
extern crate assert_instr_macro;
extern crate backtrace;
@@ -25,7 +27,8 @@ pub use assert_instr_macro::*;
pub use simd_test_macro::*;
lazy_static! {
static ref DISASSEMBLY: HashMap<String, Vec<Function>> = disassemble_myself();
static ref DISASSEMBLY: HashMap<String, Vec<Function>> =
disassemble_myself();
}
struct Function {
@@ -39,14 +42,16 @@ struct Instruction {
fn disassemble_myself() -> HashMap<String, Vec<Function>> {
let me = env::current_exe().expect("failed to get current exe");
if cfg!(target_arch = "x86_64") && cfg!(target_os = "windows")
if cfg!(target_arch = "x86_64")
&& cfg!(target_os = "windows")
&& cfg!(target_env = "msvc")
{
let mut cmd = cc::windows_registry::find(
"x86_64-pc-windows-msvc",
"dumpbin.exe",
).expect("failed to find `dumpbin` tool");
let output = cmd.arg("/DISASM")
let output = cmd
.arg("/DISASM")
.arg(&me)
.output()
.expect("failed to execute dumpbin");
@@ -257,9 +262,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
// in the disassembly.
let mut sym = None;
backtrace::resolve(fnptr as *mut _, |name| {
sym = name.name()
.and_then(|s| s.as_str())
.map(normalize);
sym = name.name().and_then(|s| s.as_str()).map(normalize);
});
let functions =
@@ -270,26 +273,17 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
println!("assumed symbol name: `{}`", sym);
}
println!("maybe related functions");
for f in DISASSEMBLY
.keys()
.filter(|k| k.contains(fnname))
{
for f in DISASSEMBLY.keys().filter(|k| k.contains(fnname)) {
println!("\t- {}", f);
}
panic!(
"failed to find disassembly of {:#x} ({})",
fnptr, fnname
);
panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname);
};
assert_eq!(functions.len(), 1);
let function = &functions[0];
let mut instrs = &function.instrs[..];
while instrs
.last()
.map_or(false, |s| s.parts == ["nop"])
{
while instrs.last().map_or(false, |s| s.parts == ["nop"]) {
instrs = &instrs[..instrs.len() - 1];
}
@@ -400,10 +394,7 @@ pub fn assert_skip_test_ok(name: &str) {
if env::var("STDSIMD_TEST_EVERYTHING").is_err() {
return;
}
panic!(
"skipped test `{}` when it shouldn't be skipped",
name
);
panic!("skipped test `{}` when it shouldn't be skipped", name);
}
// See comment in `assert-instr-macro` crate for why this exists

View File

@@ -1,9 +1,12 @@
#![feature(proc_macro)]
#![allow(bad_style)]
#![cfg_attr(feature = "cargo-clippy",
allow(shadow_reuse, cast_lossless, match_same_arms,
nonminimal_bool, print_stdout, use_debug, eq_op,
useless_format))]
#![cfg_attr(
feature = "cargo-clippy",
allow(
shadow_reuse, cast_lossless, match_same_arms, nonminimal_bool,
print_stdout, use_debug, eq_op, useless_format
)
)]
#[macro_use]
extern crate serde_derive;
@@ -249,10 +252,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
.flat_map(|c| c.to_lowercase())
.collect::<String>();
let rust_feature = rust.target_feature.expect(&format!(
"no target feature listed for {}",
rust.name
));
let rust_feature = rust
.target_feature
.expect(&format!("no target feature listed for {}", rust.name));
if rust_feature.contains(&cpuid) {
continue;
}
@@ -314,24 +316,19 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
if rust.arguments.len() != intel.parameters.len() {
bail!("wrong number of arguments on {}", rust.name)
}
for (i, (a, b)) in intel
.parameters
.iter()
.zip(rust.arguments)
.enumerate()
for (i, (a, b)) in
intel.parameters.iter().zip(rust.arguments).enumerate()
{
let is_const = rust.required_const.contains(&i);
equate(b, &a.type_, &intel.name, is_const)?;
}
}
let any_i64 = rust.arguments
.iter()
.cloned()
.chain(rust.ret)
.any(|arg| match *arg {
let any_i64 = rust.arguments.iter().cloned().chain(rust.ret).any(|arg| {
match *arg {
Type::PrimSigned(64) | Type::PrimUnsigned(64) => true,
_ => false,
}
});
let any_i64_exempt = match rust.name {
// These intrinsics have all been manually verified against Clang's
@@ -363,7 +360,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
}
fn equate(
t: &Type, intel: &str, intrinsic: &str, is_const: bool
t: &Type, intel: &str, intrinsic: &str, is_const: bool,
) -> Result<(), String> {
let intel = intel.replace(" *", "*");
let intel = intel.replace(" const*", "*");
@@ -371,9 +368,7 @@ fn equate(
if is_const {
return Ok(());
}
Err(format!(
"argument required to be const but isn't"
))
Err(format!("argument required to be const but isn't"))
};
match (t, &intel[..]) {
(&Type::PrimFloat(32), "float") => {}

View File

@@ -1,11 +1,20 @@
#![feature(stdsimd)]
#![cfg_attr(stdsimd_strict, deny(warnings))]
#![cfg_attr(feature = "cargo-clippy",
allow(option_unwrap_used, use_debug, print_stdout))]
#![cfg_attr(
feature = "cargo-clippy",
allow(option_unwrap_used, use_debug, print_stdout)
)]
#[cfg(any(target_arch = "arm", target_arch = "aarch64",
target_arch = "x86", target_arch = "x86_64",
target_arch = "powerpc", target_arch = "powerpc64"))]
#[cfg(
any(
target_arch = "arm",
target_arch = "aarch64",
target_arch = "x86",
target_arch = "x86_64",
target_arch = "powerpc",
target_arch = "powerpc64"
)
)]
#[macro_use]
extern crate stdsimd;
@@ -22,51 +31,30 @@ fn aarch64_linux() {
println!("fp: {}", is_aarch64_feature_detected!("fp"));
println!("fp16: {}", is_aarch64_feature_detected!("fp16"));
println!("neon: {}", is_aarch64_feature_detected!("neon"));
println!(
"asimd: {}",
is_aarch64_feature_detected!("asimd")
);
println!("asimd: {}", is_aarch64_feature_detected!("asimd"));
println!("sve: {}", is_aarch64_feature_detected!("sve"));
println!("crc: {}", is_aarch64_feature_detected!("crc"));
println!(
"crypto: {}",
is_aarch64_feature_detected!("crypto")
);
println!("crypto: {}", is_aarch64_feature_detected!("crypto"));
println!("lse: {}", is_aarch64_feature_detected!("lse"));
println!("rdm: {}", is_aarch64_feature_detected!("rdm"));
println!("rcpc: {}", is_aarch64_feature_detected!("rcpc"));
println!(
"dotprod: {}",
is_aarch64_feature_detected!("dotprod")
);
println!("dotprod: {}", is_aarch64_feature_detected!("dotprod"));
}
#[test]
#[cfg(all(target_arch = "powerpc", target_os = "linux"))]
fn powerpc_linux() {
println!(
"altivec: {}",
is_powerpc_feature_detected!("altivec")
);
println!("altivec: {}", is_powerpc_feature_detected!("altivec"));
println!("vsx: {}", is_powerpc_feature_detected!("vsx"));
println!(
"power8: {}",
is_powerpc_feature_detected!("power8")
);
println!("power8: {}", is_powerpc_feature_detected!("power8"));
}
#[test]
#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
fn powerpc64_linux() {
println!(
"altivec: {}",
is_powerpc64_feature_detected!("altivec")
);
println!("altivec: {}", is_powerpc64_feature_detected!("altivec"));
println!("vsx: {}", is_powerpc64_feature_detected!("vsx"));
println!(
"power8: {}",
is_powerpc64_feature_detected!("power8")
);
println!("power8: {}", is_powerpc64_feature_detected!("power8"));
}
#[test]
@@ -76,54 +64,21 @@ fn x86_all() {
println!("sse2: {:?}", is_x86_feature_detected!("sse2"));
println!("sse3: {:?}", is_x86_feature_detected!("sse3"));
println!("ssse3: {:?}", is_x86_feature_detected!("ssse3"));
println!(
"sse4.1: {:?}",
is_x86_feature_detected!("sse4.1")
);
println!(
"sse4.2: {:?}",
is_x86_feature_detected!("sse4.2")
);
println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
println!("sha: {:?}", is_x86_feature_detected!("sha"));
println!("avx: {:?}", is_x86_feature_detected!("avx"));
println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
println!(
"avx512f {:?}",
is_x86_feature_detected!("avx512f")
);
println!(
"avx512cd {:?}",
is_x86_feature_detected!("avx512cd")
);
println!(
"avx512er {:?}",
is_x86_feature_detected!("avx512er")
);
println!(
"avx512pf {:?}",
is_x86_feature_detected!("avx512pf")
);
println!(
"avx512bw {:?}",
is_x86_feature_detected!("avx512bw")
);
println!(
"avx512dq {:?}",
is_x86_feature_detected!("avx512dq")
);
println!(
"avx512vl {:?}",
is_x86_feature_detected!("avx512vl")
);
println!(
"avx512_ifma {:?}",
is_x86_feature_detected!("avx512ifma")
);
println!(
"avx512_vbmi {:?}",
is_x86_feature_detected!("avx512vbmi")
);
println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd"));
println!("avx512er {:?}", is_x86_feature_detected!("avx512er"));
println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf"));
println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw"));
println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq"));
println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl"));
println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma"));
println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi"));
println!(
"avx512_vpopcntdq {:?}",
is_x86_feature_detected!("avx512vpopcntdq")
@@ -133,23 +88,11 @@ fn x86_all() {
println!("bmi: {:?}", is_x86_feature_detected!("bmi1"));
println!("bmi2: {:?}", is_x86_feature_detected!("bmi2"));
println!("tbm: {:?}", is_x86_feature_detected!("tbm"));
println!(
"popcnt: {:?}",
is_x86_feature_detected!("popcnt")
);
println!("popcnt: {:?}", is_x86_feature_detected!("popcnt"));
println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt"));
println!("fxsr: {:?}", is_x86_feature_detected!("fxsr"));
println!("xsave: {:?}", is_x86_feature_detected!("xsave"));
println!(
"xsaveopt: {:?}",
is_x86_feature_detected!("xsaveopt")
);
println!(
"xsaves: {:?}",
is_x86_feature_detected!("xsaves")
);
println!(
"xsavec: {:?}",
is_x86_feature_detected!("xsavec")
);
println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt"));
println!("xsaves: {:?}", is_x86_feature_detected!("xsaves"));
println!("xsavec: {:?}", is_x86_feature_detected!("xsavec"));
}

View File

@@ -14,10 +14,13 @@
#![feature(stdsimd)]
#![cfg_attr(test, feature(test))]
#![cfg_attr(feature = "cargo-clippy",
allow(result_unwrap_used, print_stdout, option_unwrap_used,
shadow_reuse, cast_possible_wrap, cast_sign_loss,
missing_docs_in_private_items))]
#![cfg_attr(
feature = "cargo-clippy",
allow(
result_unwrap_used, print_stdout, option_unwrap_used, shadow_reuse,
cast_possible_wrap, cast_sign_loss, missing_docs_in_private_items
)
)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use]
@@ -68,7 +71,7 @@ fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
#[target_feature(enable = "avx2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_avx2<'a>(
mut src: &[u8], dst: &'a mut [u8]
mut src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> {
let ascii_zero = _mm256_set1_epi8(b'0' as i8);
let nines = _mm256_set1_epi8(9);
@@ -115,16 +118,14 @@ unsafe fn hex_encode_avx2<'a>(
let i = i as usize;
let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
Ok(str::from_utf8_unchecked(
&dst[..src.len() * 2 + i * 2],
))
Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}
// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
#[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_sse41<'a>(
mut src: &[u8], dst: &'a mut [u8]
mut src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> {
let ascii_zero = _mm_set1_epi8(b'0' as i8);
let nines = _mm_set1_epi8(9);
@@ -157,10 +158,7 @@ unsafe fn hex_encode_sse41<'a>(
let res2 = _mm_unpackhi_epi8(masked2, masked1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
_mm_storeu_si128(
dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
res2,
);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
src = &src[16..];
i += 16;
}
@@ -168,13 +166,11 @@ unsafe fn hex_encode_sse41<'a>(
let i = i as usize;
let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
Ok(str::from_utf8_unchecked(
&dst[..src.len() * 2 + i * 2],
))
Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}
fn hex_encode_fallback<'a>(
src: &[u8], dst: &'a mut [u8]
src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> {
fn hex(byte: u8) -> u8 {
static TABLE: &[u8] = b"0123456789abcdef";
@@ -199,10 +195,7 @@ mod tests {
fn test(input: &[u8], output: &str) {
let tmp = || vec![0; input.len() * 2];
assert_eq!(
hex_encode_fallback(input, &mut tmp()).unwrap(),
output
);
assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
@@ -239,9 +232,7 @@ mod tests {
fn odd() {
test(
&[0; 313],
&iter::repeat('0')
.take(313 * 2)
.collect::<String>(),
&iter::repeat('0').take(313 * 2).collect::<String>(),
);
}

View File

@@ -5,9 +5,13 @@
#![cfg_attr(stdsimd_strict, deny(warnings))]
#![feature(stdsimd)]
#![cfg_attr(feature = "cargo-clippy",
allow(similar_names, missing_docs_in_private_items,
shadow_reuse, print_stdout))]
#![cfg_attr(
feature = "cargo-clippy",
allow(
similar_names, missing_docs_in_private_items, shadow_reuse,
print_stdout
)
)]
extern crate stdsimd;
#[macro_use]
@@ -15,8 +19,6 @@ extern crate cfg_if;
use stdsimd::simd::*;
const PI: f64 = std::f64::consts::PI;
const SOLAR_MASS: f64 = 4.0 * PI * PI;
const DAYS_PER_YEAR: f64 = 365.24;
@@ -81,7 +83,7 @@ struct Body {
impl Body {
fn new(
x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64
x0: f64, x1: f64, x2: f64, v0: f64, v1: f64, v2: f64, mass: f64,
) -> Self {
Self {
x: [x0, x1, x2],

View File

@@ -64,7 +64,8 @@ macro_rules! is_aarch64_feature_detected {
#[unstable(feature = "stdsimd", issue = "27731")]
macro_rules! is_powerpc_feature_detected {
($t:tt) => {
compile_error!(r#"
compile_error!(
r#"
is_powerpc_feature_detected can only be used on PowerPC targets.
You can prevent it from being used in other architectures by
guarding it behind a cfg(target_arch) as follows:
@@ -72,7 +73,8 @@ guarding it behind a cfg(target_arch) as follows:
#[cfg(target_arch = "powerpc")] {
if is_powerpc_feature_detected(...) { ... }
}
"#)
"#
)
};
}
@@ -81,7 +83,8 @@ guarding it behind a cfg(target_arch) as follows:
#[unstable(feature = "stdsimd", issue = "27731")]
macro_rules! is_powerpc64_feature_detected {
($t:tt) => {
compile_error!(r#"
compile_error!(
r#"
is_powerpc64_feature_detected can only be used on PowerPC64 targets.
You can prevent it from being used in other architectures by
guarding it behind a cfg(target_arch) as follows:
@@ -89,7 +92,8 @@ guarding it behind a cfg(target_arch) as follows:
#[cfg(target_arch = "powerpc64")] {
if is_powerpc64_feature_detected(...) { ... }
}
"#)
"#
)
};
}

View File

@@ -60,8 +60,8 @@ cfg_if! {
}
pub use self::arch::Feature;
mod cache;
mod bit;
mod cache;
cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {