Reorganize and refactor source tree (#324)

With RFC 2325 looking close to being accepted, I took a crack at
reorganizing this repository to being more amenable for inclusion in
libstd/libcore. My current plan is to add stdsimd as a submodule in
rust-lang/rust and then use `#[path]` to include the modules directly
into libstd/libcore.

Before this commit, however, the source code of coresimd/stdsimd
themselves were not quite ready for this. Imports wouldn't compile for
one reason or another, and the organization was also different than the
RFC itself!

In addition to moving a lot of files around, this commit has the
following major changes:

* The `cfg_feature_enabled!` macro is now renamed to
  `is_target_feature_detected!`
* The `vendor` module is now called `arch`.
* Under the `arch` module is a suite of modules like `x86`, `x86_64`,
  etc. One per `cfg!(target_arch)`.
* The `is_target_feature_detected!` macro was removed from coresimd.
  Unfortunately libcore has no ability to export unstable macros, so for
  now all feature detection is canonicalized in stdsimd.

The `coresimd` and `stdsimd` crates have been updated to the planned
organization in RFC 2325 as well. The runtime bits saw the largest
amount of refactoring, seeing a good deal of simplification without the
core/std split.
This commit is contained in:
Alex Crichton
2018-02-18 10:07:35 +09:00
committed by GitHub
parent d097221faf
commit 39b5ec91ae
123 changed files with 1852 additions and 2051 deletions

View File

@@ -24,7 +24,7 @@ matrix:
- env: DOCUMENTATION
install: true
script: ci/dox.sh
- script: cargo test --manifest-path stdsimd-verify/Cargo.toml
- script: cargo test --manifest-path crates/stdsimd-verify/Cargo.toml
install: true
- env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
script: |

View File

@@ -1,33 +1,8 @@
[package]
name = "stdsimd"
version = "0.0.3"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = "SIMD support in Rust's standard library."
documentation = "https://docs.rs/stdsimd"
homepage = "https://github.com/rust-lang-nursery/stdsimd"
repository = "https://github.com/rust-lang-nursery/stdsimd"
readme = "README.md"
keywords = ["std", "simd", "intrinsics"]
categories = ["hardware-support"]
license = "MIT/Apache-2.0"
[workspace]
members = ["stdsimd-verify"]
[badges]
travis-ci = { repository = "rust-lang-nursery/stdsimd" }
appveyor = { repository = "rust-lang-nursery/stdsimd" }
is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
maintenance = { status = "experimental" }
[dependencies]
coresimd = { version = "0.0.3", path = "coresimd/" }
[dev-dependencies]
auxv = "0.3.3"
quickcheck = "0.6"
rand = "0.4"
members = [
"crates/stdsimd-verify",
"crates/stdsimd",
]
[profile.release]
debug = true
@@ -36,10 +11,3 @@ opt-level = 3
[profile.bench]
debug = 1
opt-level = 3
[features]
# Internal-usage only: denies all warnings.
strict = [ "coresimd/strict" ]
# Internal-usage only: enables only those intrinsics supported by Intel's
# Software Development Environment (SDE).
intel_sde = [ "coresimd/intel_sde" ]

View File

@@ -22,16 +22,18 @@ dox() {
rm -rf target/doc/$arch
mkdir target/doc/$arch
cargo build --target $target
cargo build --target $target --manifest-path crates/stdsimd/Cargo.toml
rustdoc --target $target \
-o target/doc/$arch coresimd/src/lib.rs \
-o target/doc/$arch crates/coresimd/src/lib.rs \
--crate-name coresimd \
--library-path target/$target/debug/deps
rustdoc --target $target \
-o target/doc/$arch src/lib.rs \
-o target/doc/$arch crates/stdsimd/src/lib.rs \
--crate-name stdsimd \
--library-path target/$target/debug/deps
--library-path target/$target/debug/deps \
--extern cfg_if=`ls target/$target/debug/deps/libcfg_if-*.rlib` \
--extern libc=`ls target/$target/debug/deps/liblibc-*.rlib`
}
dox i686 i686-unknown-linux-gnu

View File

@@ -23,7 +23,7 @@ echo "OBJDUMP=${OBJDUMP}"
cargo_test() {
cmd="cargo test --target=$TARGET --features $FEATURES $1"
cmd="$cmd -p coresimd -p stdsimd"
cmd="$cmd -p coresimd -p stdsimd --manifest-path crates/stdsimd/Cargo.toml"
cmd="$cmd -- $2"
$cmd
}

View File

@@ -1 +0,0 @@
../LICENSE-APACHE

View File

@@ -1 +0,0 @@
../LICENSE-MIT

View File

@@ -1 +0,0 @@
../README.md

View File

@@ -4,8 +4,8 @@
#[cfg(test)]
use stdsimd_test::assert_instr;
use simd_llvm::simd_add;
use v128::f64x2;
use coresimd::simd_llvm::simd_add;
use coresimd::v128::f64x2;
/// Vector add.
#[inline]
@@ -41,8 +41,8 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
#[cfg(test)]
mod tests {
use super::f64x2;
use aarch64::neon;
use simd::f64x2;
use coresimd::aarch64::neon;
use stdsimd_test::simd_test;
#[simd_test = "neon"]

View File

@@ -57,7 +57,7 @@ pub unsafe fn _cls_u64(x: u64) -> u64 {
#[cfg(test)]
mod tests {
use aarch64::v8;
use coresimd::aarch64::v8;
#[test]
fn _rev_u64() {

View File

@@ -3,10 +3,9 @@
#[cfg(test)]
use stdsimd_test::assert_instr;
use simd_llvm::simd_add;
use v64::{f32x2, i16x4, i32x2, i8x8, u16x4, u32x2, u8x8};
use v128::{f32x4, i16x8, i32x4, i64x2, i8x16, u16x8, u32x4, u64x2, u8x16};
use coresimd::simd_llvm::simd_add;
use coresimd::v64::*;
use coresimd::v128::*;
/// Vector add.
#[inline]
@@ -216,7 +215,7 @@ pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
mod tests {
use stdsimd_test::simd_test;
use simd::*;
use arm::neon;
use coresimd::arm::neon;
#[simd_test = "neon"]
unsafe fn vadd_s8() {

View File

@@ -25,7 +25,7 @@ pub unsafe fn _rev_u32(x: u32) -> u32 {
#[cfg(test)]
mod tests {
use arm::v6;
use coresimd::arm::v6;
#[test]
fn _rev_u16() {

View File

@@ -50,7 +50,7 @@ extern "C" {
#[cfg(test)]
mod tests {
use arm::v7;
use coresimd::arm::v7;
#[test]
fn _clz_u8() {

View File

@@ -82,8 +82,8 @@ macro_rules! define_impl {
slice: &mut [$elemty],
offset: usize,
) {
use core::mem::size_of;
use core::ptr;
use mem::size_of;
use ptr;
ptr::copy_nonoverlapping(
&self as *const $name as *const u8,
@@ -102,8 +102,8 @@ macro_rules! define_impl {
slice: &[$elemty],
offset: usize,
) -> $name {
use core::mem::size_of;
use core::ptr;
use mem::size_of;
use ptr;
let mut x = $name::splat(0 as $elemty);
ptr::copy_nonoverlapping(
@@ -152,7 +152,7 @@ macro_rules! define_from {
impl From<$from> for $to {
#[inline(always)]
fn from(f: $from) -> $to {
unsafe { ::core::mem::transmute(f) }
unsafe { ::mem::transmute(f) }
}
}
)+
@@ -162,7 +162,7 @@ macro_rules! define_from {
macro_rules! define_common_ops {
($($ty:ident),+) => {
$(
impl ::core::ops::Add for $ty {
impl ::ops::Add for $ty {
type Output = Self;
#[inline(always)]
fn add(self, other: Self) -> Self {
@@ -170,7 +170,7 @@ macro_rules! define_common_ops {
}
}
impl ::core::ops::Sub for $ty {
impl ::ops::Sub for $ty {
type Output = Self;
#[inline(always)]
fn sub(self, other: Self) -> Self {
@@ -178,7 +178,7 @@ macro_rules! define_common_ops {
}
}
impl ::core::ops::Mul for $ty {
impl ::ops::Mul for $ty {
type Output = Self;
#[inline(always)]
fn mul(self, other: Self) -> Self {
@@ -186,7 +186,7 @@ macro_rules! define_common_ops {
}
}
impl ::core::ops::Div for $ty {
impl ::ops::Div for $ty {
type Output = Self;
#[inline(always)]
fn div(self, other: Self) -> Self {
@@ -194,7 +194,7 @@ macro_rules! define_common_ops {
}
}
impl ::core::ops::Rem for $ty {
impl ::ops::Rem for $ty {
type Output = Self;
#[inline(always)]
fn rem(self, other: Self) -> Self {
@@ -202,35 +202,35 @@ macro_rules! define_common_ops {
}
}
impl ::core::ops::AddAssign for $ty {
impl ::ops::AddAssign for $ty {
#[inline(always)]
fn add_assign(&mut self, other: Self) {
*self = *self + other;
}
}
impl ::core::ops::SubAssign for $ty {
impl ::ops::SubAssign for $ty {
#[inline(always)]
fn sub_assign(&mut self, other: Self) {
*self = *self - other;
}
}
impl ::core::ops::MulAssign for $ty {
impl ::ops::MulAssign for $ty {
#[inline(always)]
fn mul_assign(&mut self, other: Self) {
*self = *self * other;
}
}
impl ::core::ops::DivAssign for $ty {
impl ::ops::DivAssign for $ty {
#[inline(always)]
fn div_assign(&mut self, other: Self) {
*self = *self / other;
}
}
impl ::core::ops::RemAssign for $ty {
impl ::ops::RemAssign for $ty {
#[inline(always)]
fn rem_assign(&mut self, other: Self) {
*self = *self % other;
@@ -244,14 +244,14 @@ macro_rules! define_common_ops {
macro_rules! define_shifts {
($ty:ident, $elem:ident, $($by:ident),+) => {
$(
impl ::core::ops::Shl<$by> for $ty {
impl ::ops::Shl<$by> for $ty {
type Output = Self;
#[inline(always)]
fn shl(self, other: $by) -> Self {
unsafe { simd_shl(self, $ty::splat(other as $elem)) }
}
}
impl ::core::ops::Shr<$by> for $ty {
impl ::ops::Shr<$by> for $ty {
type Output = Self;
#[inline(always)]
fn shr(self, other: $by) -> Self {
@@ -259,13 +259,13 @@ macro_rules! define_shifts {
}
}
impl ::core::ops::ShlAssign<$by> for $ty {
impl ::ops::ShlAssign<$by> for $ty {
#[inline(always)]
fn shl_assign(&mut self, other: $by) {
*self = *self << other;
}
}
impl ::core::ops::ShrAssign<$by> for $ty {
impl ::ops::ShrAssign<$by> for $ty {
#[inline(always)]
fn shr_assign(&mut self, other: $by) {
*self = *self >> other;
@@ -279,7 +279,7 @@ macro_rules! define_shifts {
macro_rules! define_float_ops {
($($ty:ident),+) => {
$(
impl ::core::ops::Neg for $ty {
impl ::ops::Neg for $ty {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self {
@@ -293,7 +293,7 @@ macro_rules! define_float_ops {
macro_rules! define_signed_integer_ops {
($($ty:ident),+) => {
$(
impl ::core::ops::Neg for $ty {
impl ::ops::Neg for $ty {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self {
@@ -307,7 +307,7 @@ macro_rules! define_signed_integer_ops {
macro_rules! define_integer_ops {
($(($ty:ident, $elem:ident)),+) => {
$(
impl ::core::ops::Not for $ty {
impl ::ops::Not for $ty {
type Output = Self;
#[inline(always)]
fn not(self) -> Self {
@@ -315,40 +315,40 @@ macro_rules! define_integer_ops {
}
}
impl ::core::ops::BitAnd for $ty {
impl ::ops::BitAnd for $ty {
type Output = Self;
#[inline(always)]
fn bitand(self, other: Self) -> Self {
unsafe { simd_and(self, other) }
}
}
impl ::core::ops::BitOr for $ty {
impl ::ops::BitOr for $ty {
type Output = Self;
#[inline(always)]
fn bitor(self, other: Self) -> Self {
unsafe { simd_or(self, other) }
}
}
impl ::core::ops::BitXor for $ty {
impl ::ops::BitXor for $ty {
type Output = Self;
#[inline(always)]
fn bitxor(self, other: Self) -> Self {
unsafe { simd_xor(self, other) }
}
}
impl ::core::ops::BitAndAssign for $ty {
impl ::ops::BitAndAssign for $ty {
#[inline(always)]
fn bitand_assign(&mut self, other: Self) {
*self = *self & other;
}
}
impl ::core::ops::BitOrAssign for $ty {
impl ::ops::BitOrAssign for $ty {
#[inline(always)]
fn bitor_assign(&mut self, other: Self) {
*self = *self | other;
}
}
impl ::core::ops::BitXorAssign for $ty {
impl ::ops::BitXorAssign for $ty {
#[inline(always)]
fn bitxor_assign(&mut self, other: Self) {
*self = *self ^ other;
@@ -360,12 +360,12 @@ macro_rules! define_integer_ops {
u8, u16, u32, u64, usize,
i8, i16, i32, i64, isize);
impl ::core::fmt::LowerHex for $ty {
fn fmt(&self, f: &mut ::core::fmt::Formatter)
-> ::core::fmt::Result {
impl ::fmt::LowerHex for $ty {
fn fmt(&self, f: &mut ::fmt::Formatter)
-> ::fmt::Result {
write!(f, "{}(", stringify!($ty))?;
let n = ::core::mem::size_of_val(self)
/ ::core::mem::size_of::<$elem>();
let n = ::mem::size_of_val(self)
/ ::mem::size_of::<$elem>();
for i in 0..n {
if i > 0 {
write!(f, ", ")?;
@@ -384,7 +384,7 @@ macro_rules! define_casts {
$(
impl $fromty {
#[inline(always)]
pub fn $cast(self) -> ::simd::$toty {
pub fn $cast(self) -> ::coresimd::simd::$toty {
unsafe { simd_cast(self) }
}
}

View File

@@ -0,0 +1,80 @@
/// Platform independent SIMD vector types and operations.
pub mod simd {
pub use coresimd::v128::*;
pub use coresimd::v256::*;
pub use coresimd::v512::*;
pub use coresimd::v64::*;
}
/// Platform dependent vendor intrinsics.
pub mod vendor {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use coresimd::x86::*;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
pub use coresimd::arm::*;
#[cfg(target_arch = "aarch64")]
pub use coresimd::aarch64::*;
// FIXME: rust does not expose the nvptx and nvptx64 targets yet
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
target_arch = "arm", target_arch = "aarch64")))]
pub use coresimd::nvptx::*;
}
#[macro_use]
mod macros;
mod simd_llvm;
mod v128;
mod v256;
mod v512;
mod v64;
/// 32-bit wide vector tpyes
mod v32 {
#[cfg(not(test))]
use prelude::v1::*;
use coresimd::simd_llvm::*;
define_ty! { i16x2, i16, i16 }
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
define_ty! { u16x2, u16, u16 }
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
define_ty! { i8x4, i8, i8, i8, i8 }
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
define_ty! { u8x4, u8, u8, u8, u8 }
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
define_casts!(
(i16x2, i64x2, as_i64x2),
(u16x2, i64x2, as_i64x2),
(i8x4, i32x4, as_i32x4),
(u8x4, i32x4, as_i32x4)
);
}
/// 16-bit wide vector tpyes
mod v16 {
#[cfg(not(test))]
use prelude::v1::*;
use coresimd::simd_llvm::*;
define_ty! { i8x2, i8, i8 }
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
define_ty! { u8x2, u8, u8 }
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
mod arm;
#[cfg(target_arch = "aarch64")]
mod aarch64;
mod nvptx;

View File

@@ -1 +0,0 @@
../rustfmt.toml

View File

@@ -1,139 +0,0 @@
//! SIMD and vendor intrinsics support library.
//!
//! This documentation is only for one particular architecture, you can find
//! others at:
//!
//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
#![cfg_attr(feature = "strict", deny(warnings))]
#![allow(dead_code)]
#![allow(unused_features)]
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
integer_atomics, stmt_expr_attributes, core_intrinsics,
crate_in_paths, attr_literals, rustc_attrs)]
#![cfg_attr(test, feature(proc_macro, test, abi_vectorcall))]
#![cfg_attr(feature = "cargo-clippy",
allow(inline_always, too_many_arguments, cast_sign_loss,
cast_lossless, cast_possible_wrap,
cast_possible_truncation, cast_precision_loss,
shadow_reuse, cyclomatic_complexity, similar_names,
many_single_char_names))]
#![no_std]
#[cfg(test)]
#[macro_use]
extern crate std;
#[cfg(test)]
extern crate stdsimd_test;
#[cfg(test)]
extern crate test;
/// Platform independent SIMD vector types and operations.
pub mod simd {
pub use v128::*;
pub use v256::*;
pub use v512::*;
pub use v64::*;
}
/// Platform dependent vendor intrinsics.
pub mod vendor {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use x86::*;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
pub use arm::*;
#[cfg(target_arch = "aarch64")]
pub use aarch64::*;
// FIXME: rust does not expose the nvptx and nvptx64 targets yet
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
target_arch = "arm", target_arch = "aarch64")))]
pub use nvptx::*;
}
/// Run-time feature detection.
#[doc(hidden)]
pub mod __vendor_runtime {
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64"))))]
pub use runtime::core::*;
// Re-exports `coresimd` run-time building blocks for usage in the
// `stdsimd` run-time.
#[cfg(all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64")))]
#[doc(hidden)]
pub mod __runtime {
pub use runtime::*;
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64"))))]
#[macro_use]
mod runtime;
#[macro_use]
mod macros;
mod simd_llvm;
mod v128;
mod v256;
mod v512;
mod v64;
/// 32-bit wide vector tpyes
mod v32 {
use simd_llvm::*;
define_ty! { i16x2, i16, i16 }
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
define_ty! { u16x2, u16, u16 }
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
define_ty! { i8x4, i8, i8, i8, i8 }
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
define_ty! { u8x4, u8, u8, u8, u8 }
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
define_casts!(
(i16x2, i64x2, as_i64x2),
(u16x2, i64x2, as_i64x2),
(i8x4, i32x4, as_i32x4),
(u8x4, i32x4, as_i32x4)
);
}
/// 16-bit wide vector tpyes
mod v16 {
use simd_llvm::*;
define_ty! { i8x2, i8, i8 }
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
define_ty! { u8x2, u8, u8 }
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
mod arm;
#[cfg(target_arch = "aarch64")]
mod aarch64;
mod nvptx;

View File

@@ -1,47 +0,0 @@
//! Run-time feature detection on ARM Aarch64.
use runtime::cache;
use runtime::arch::HasFeature;
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
("neon", $unstable_detect_feature:path) => {
// FIXME: this should be removed once we rename Aarch64 neon to asimd
$unstable_detect_feature($crate::__vendor_runtime::_Feature::asimd{})
};
("asimd", $unstable_detect_feature:path) => {
$unstable_detect_feature($crate::__vendor_runtime::__Feature::asimd{})
};
("pmull", $unstable_detect_feature:path) => {
$unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
};
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
}
/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
/// for a particular feature.
///
/// PLEASE: do not use this, it is an implementation detail subject to change.
#[doc(hidden)]
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum __Feature {
/// ARM Advanced SIMD (ASIMD) - Aarch64
asimd,
/// Polynomial Multiply
pmull,
}
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
let mut value = cache::Initializer::default();
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
value.set(f as u32);
}
};
enable_feature(__Feature::asimd);
enable_feature(__Feature::pmull);
}
value
}

View File

@@ -1,43 +0,0 @@
//! Run-time feature detection on ARM Aarch32.
use runtime::cache;
use runtime::arch::HasFeature;
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
("neon", $unstable_detect_feature:path) => {
$unstable_detect_feature($crate::__vendor_runtime::__Feature::neon{})
};
("pmull", $unstable_detect_feature:path) => {
$unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
};
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
}
/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
/// particular feature.
///
/// PLEASE: do not use this, it is an implementation detail subject to change.
#[doc(hidden)]
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum __Feature {
/// ARM Advanced SIMD (NEON) - Aarch32
neon,
/// Polynomial Multiply
pmull,
}
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
let mut value = cache::Initializer::default();
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
value.set(f as u32);
}
};
enable_feature(__Feature::neon);
enable_feature(__Feature::pmull);
}
value
}

View File

@@ -1,20 +0,0 @@
//! Run-time feature detection for Aarch64 on Linux and `core`.
use runtime::bit;
use runtime::linux::auxv::AuxVec;
use runtime::arch::{HasFeature, __Feature};
/// Probe the ELF Auxiliary vector for hardware capabilities
///
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
///
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
impl HasFeature for AuxVec {
fn has_feature(&mut self, x: &__Feature) -> bool {
use self::__Feature::*;
match *x {
asimd => bit::test(self.hwcap, 1),
pmull => bit::test(self.hwcap, 4),
}
}
}

View File

@@ -1,20 +0,0 @@
//! Run-time feature detection for ARM32 on Linux and `core`.
use runtime::bit;
use runtime::linux::auxv::AuxVec;
use runtime::arch::{HasFeature, __Feature};
/// Probe the ELF Auxiliary vector for hardware capabilities
///
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
///
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
impl HasFeature for AuxVec {
fn has_feature(&mut self, x: &__Feature) -> bool {
use self::__Feature::*;
match *x {
neon => bit::test(self.hwcap, 12),
pmull => bit::test(self.hwcap2, 1),
}
}
}

View File

@@ -1,46 +0,0 @@
//! ELF Auxiliary Vector
//!
//! The auxiliary vector is a memory region in a running ELF program's stack
//! composed of (key: usize, value: usize) pairs.
//!
//! The keys used in the aux vector are platform dependent. For Linux, they are
//! defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given
//! CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys.
//!
//! There is no perfect way of reading the auxiliary vector.
//!
//! - `coresimd`: if `getauxval` is available, `coresimd` will try to use it.
//! - `stdsimd`: if `getauxval` is not available, it will try to read
//! `/proc/self/auxv`, and if that fails it will try to read `/proc/cpuinfo`.
//!
//! For more information about when `getauxval` is available check the great
//! [`auxv` crate documentation][auxv_docs].
//!
//! [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
//! [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
/// Key to access the CPU Hardware capabilities bitfield.
pub const AT_HWCAP: usize = 16;
/// Key to access the CPU Hardware capabilities 2 bitfield.
pub const AT_HWCAP2: usize = 26;
/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
///
/// If an entry cannot be read all the bits in the bitfield
/// are set to zero.
#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
#[derive(Debug, Copy, Clone)]
pub struct AuxVec {
pub hwcap: usize,
pub hwcap2: usize,
}
/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
///
/// If an entry cannot be read all the bits in the bitfield
/// are set to zero.
#[cfg(target_arch = "aarch64")]
#[derive(Debug, Copy, Clone)]
pub struct AuxVec {
pub hwcap: usize,
}

View File

@@ -1,12 +0,0 @@
//! Run-time feature detection for ARM and PowerPC64 on Linux.
#[cfg(target_arch = "arm")]
mod arm;
#[cfg(target_arch = "aarch64")]
mod aarch64;
#[cfg(target_arch = "powerpc64")]
mod powerpc64;
pub mod auxv;

View File

@@ -1,22 +0,0 @@
//! Run-time feature detection for PowerPC64 on Linux and `core`.
use runtime::linux::auxv::AuxVec;
use runtime::arch::{HasFeature, __Feature};
/// Probe the ELF Auxiliary vector for hardware capabilities
///
/// The values are part of the platform-specific [asm/cputable.h][cputable]
///
/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
impl HasFeature for AuxVec {
fn has_feature(&mut self, x: &__Feature) -> bool {
use self::__Feature::*;
// note: the PowerPC values are the mask to do the test (instead of the
// index of the bit to test like in ARM and Aarch64)
match *x {
altivec => self.hwcap & 0x10000000 != 0,
vsx => self.hwcap & 0x00000080 != 0,
power8 => self.hwcap2 & 0x80000000 != 0,
}
}
}

View File

@@ -1,30 +0,0 @@
//! Run-time feature detection macros.
/// Is a feature supported by the host CPU?
///
/// This macro performs run-time feature detection in `coresimd`. It returns
/// true if the host CPU in which the binary is running on supports a
/// particular feature.
#[macro_export]
macro_rules! cfg_feature_enabled {
($name:tt) => (
{
#[cfg(target_feature = $name)]
{
true
}
#[cfg(not(target_feature = $name))]
{
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
__unstable_detect_feature!($name,
$crate::__vendor_runtime::__unstable_detect_feature)
}
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
{
compile_error!("cfg_target_feature! is not supported in this architecture")
}
}
}
)
}

View File

@@ -1,64 +0,0 @@
//! Run-time feature detection
pub mod cache;
pub mod bit;
#[macro_use]
pub mod macros;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use]
pub mod x86;
#[cfg(target_arch = "arm")]
#[macro_use]
pub mod arm;
#[cfg(target_arch = "aarch64")]
#[macro_use]
pub mod aarch64;
#[cfg(target_arch = "powerpc64")]
#[macro_use]
pub mod powerpc64;
#[cfg(all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64")))]
pub mod linux;
/// Exports architecture specific functionality for
/// reuse in `stdsimd`.
pub mod arch {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use super::x86::{detect_features, __Feature};
#[cfg(target_arch = "arm")]
pub use runtime::arm::{detect_features, __Feature};
#[cfg(target_arch = "aarch64")]
pub use runtime::aarch64::{detect_features, __Feature};
#[cfg(target_arch = "powerpc64")]
pub use runtime::powerpc64::{detect_features, __Feature};
/// Interface for querying whether a feature is enabled.
pub trait HasFeature {
/// Is the feature `x` enabled at run-time?
fn has_feature(&mut self, x: &__Feature) -> bool;
}
}
/// Run-time feature detection exposed by `coresimd`.
pub mod core {
pub use super::arch::__Feature;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use super::arch::detect_features;
/// Performs run-time feature detection.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[doc(hidden)]
pub fn __unstable_detect_feature(x: __Feature) -> bool {
super::cache::test(x as u32, detect_features)
}
}

View File

@@ -1,49 +0,0 @@
//! Run-time feature detection on PowerPC64.
use runtime::cache;
use runtime::arch::HasFeature;
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
("altivec", $unstable_detect_feature:path) => {
$unstable_detect_feature($crate::__vendor_runtime::__Feature::altivec{})
};
("vsx", $unstable_detect_feature:path) => {
$unstable_detect_feature($crate::__vendor_runtime::__Feature::vsx{})
};
("power8", $unstable_detect_feature:path) => {
$unstable_detect_feature($crate::__vendor_runtime::__Feature::power8{})
};
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
}
/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
/// for a particular feature.
///
/// PLEASE: do not use this, it is an implementation detail subject to change.
#[doc(hidden)]
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum __Feature {
/// Altivec
altivec,
/// VSX
vsx,
/// Power8
power8,
}
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
let mut value = cache::Initializer::default();
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
value.set(f as u32);
}
};
enable_feature(__Feature::altivec);
enable_feature(__Feature::vsx);
enable_feature(__Feature::power8);
}
value
}

View File

@@ -1,554 +0,0 @@
//! This module implements minimal run-time feature detection for x86.
//!
//! The features are detected using the `detect_features` function below.
//! This function uses the CPUID instruction to read the feature flags from the
//! CPU and encodes them in an `usize` where each bit position represents
//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
//!
//! The enum `__Feature` is used to map bit positions to feature names, and the
//! the `__unstable_detect_feature!` macro is used to map string literals (e.g.
//! "avx") to these bit positions (e.g. `__Feature::avx`).
//!
//!
//! The run-time feature detection is performed by the
//! `__unstable_detect_feature(__Feature) -> bool` function. On its first call,
//! this functions queries the CPU for the available features and stores them
//! in a global `AtomicUsize` variable. The query is performed by just checking
//! whether the feature bit in this global variable is set or cleared.
use core::mem;
use super::{bit, cache};
/// This macro maps the string-literal feature names to values of the
/// `__Feature` enum at compile-time. The feature names used are the same as
/// those of rustc `target_feature` and `cfg_target_feature` features.
///
/// PLESE: do not use this, it is an implementation detail subjected to change.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_export]
#[doc(hidden)]
macro_rules! __unstable_detect_feature {
("aes", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::aes{}) };
("tsc", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::tsc{}) };
("mmx", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::mmx{}) };
("sse", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::sse{}) };
("sse2", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::sse2{})
};
("sse3", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::sse3{})
};
("ssse3", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::ssse3{})
};
("sse4.1", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::sse4_1{})
};
("sse4.2", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::sse4_2{})
};
("sse4a", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::sse4a{})
};
("avx", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx{})
};
("avx2", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx2{})
};
("avx512f", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512f{})
};
("avx512cd", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512cd{})
};
("avx512er", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512er{})
};
("avx512pf", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512pf{})
};
("avx512bw", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512bw{})
};
("avx512dq", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512dq{})
};
("avx512vl", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512vl{})
};
("avx512ifma", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512_ifma{})
};
("avx512vbmi", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512_vbmi{})
};
("avx512vpopcntdq", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::avx512_vpopcntdq{})
};
("fma", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::fma{})
};
("bmi", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::bmi{})
};
("bmi2", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::bmi2{})
};
("abm", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::abm{})
};
("lzcnt", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::abm{})
};
("tbm", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::tbm{})
};
("popcnt", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::popcnt{})
};
("fxsr", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::fxsr{})
};
("xsave", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::xsave{})
};
("xsaveopt", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::xsaveopt{})
};
("xsaves", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::xsaves{})
};
("xsavec", $unstable_detect_feature:path) => {
$unstable_detect_feature(
$crate::__vendor_runtime::__Feature::xsavec{})
};
($t:tt, $unstable_detect_feature:path) => {
compile_error!(concat!("unknown target feature: ", $t))
};
}
/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
/// particular feature.
///
/// PLEASE: do not use this, it is an implementation detail subject to change.
#[doc(hidden)]
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum __Feature {
/// AES (Advanced Encryption Standard New Instructions AES-NI)
aes,
/// TSC (Time Stamp Counter)
tsc,
/// MMX
mmx,
/// SSE (Streaming SIMD Extensions)
sse,
/// SSE2 (Streaming SIMD Extensions 2)
sse2,
/// SSE3 (Streaming SIMD Extensions 3)
sse3,
/// SSSE3 (Supplemental Streaming SIMD Extensions 3)
ssse3,
/// SSE4.1 (Streaming SIMD Extensions 4.1)
sse4_1,
/// SSE4.2 (Streaming SIMD Extensions 4.2)
sse4_2,
/// SSE4a (Streaming SIMD Extensions 4a)
sse4a,
/// AVX (Advanced Vector Extensions)
avx,
/// AVX2 (Advanced Vector Extensions 2)
avx2,
/// AVX-512 F (Foundation)
avx512f,
/// AVX-512 CD (Conflict Detection Instructions)
avx512cd,
/// AVX-512 ER (Exponential and Reciprocal Instructions)
avx512er,
/// AVX-512 PF (Prefetch Instructions)
avx512pf,
/// AVX-512 BW (Byte and Word Instructions)
avx512bw,
/// AVX-512 DQ (Doubleword and Quadword)
avx512dq,
/// AVX-512 VL (Vector Length Extensions)
avx512vl,
/// AVX-512 IFMA (Integer Fused Multiply Add)
avx512_ifma,
/// AVX-512 VBMI (Vector Byte Manipulation Instructions)
avx512_vbmi,
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
/// Quadword)
avx512_vpopcntdq,
/// FMA (Fused Multiply Add)
fma,
/// BMI1 (Bit Manipulation Instructions 1)
bmi,
/// BMI1 (Bit Manipulation Instructions 2)
bmi2,
/// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
/// Count) on Intel
abm,
/// TBM (Trailing Bit Manipulation)
tbm,
/// POPCNT (Population Count)
popcnt,
/// FXSR (Floating-point context fast save and restor)
fxsr,
/// XSAVE (Save Processor Extended States)
xsave,
/// XSAVEOPT (Save Processor Extended States Optimized)
xsaveopt,
/// XSAVES (Save Processor Extended States Supervisor)
xsaves,
/// XSAVEC (Save Processor Extended States Compacted)
xsavec,
#[doc(hidden)]
__NonExhaustive,
}
/// Run-time feature detection on x86 works by using the CPUID instruction.
///
/// The [CPUID Wikipedia page][wiki_cpuid] contains
/// all the information about which flags to set to query which values, and in
/// which registers these are reported.
///
/// The definitive references are:
/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
/// Instruction Set Reference, A-Z][intel64_ref].
/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
/// System Instructions][amd64_ref].
///
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
pub fn detect_features() -> cache::Initializer {
use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
use vendor::_xgetbv;
let mut value = cache::Initializer::default();
// If the x86 CPU does not support the CPUID instruction then it is too
// old to support any of the currently-detectable features.
if !has_cpuid() {
return value;
}
// Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
// has `cpuid` support.
// 0. EAX = 0: Basic Information:
// - EAX returns the "Highest Function Parameter", that is, the maximum
// leaf value for subsequent calls of `cpuinfo` in range [0,
// 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
// returned in EBX, EDX, and ECX (in that order):
let (max_basic_leaf, vendor_id) = unsafe {
let CpuidResult {
eax: max_basic_leaf,
ebx,
ecx,
edx,
} = __cpuid(0);
let vendor_id: [[u8; 4]; 3] = [
mem::transmute(ebx),
mem::transmute(edx),
mem::transmute(ecx),
];
let vendor_id: [u8; 12] = mem::transmute(vendor_id);
(max_basic_leaf, vendor_id)
};
if max_basic_leaf < 1 {
// Earlier Intel 486, CPUID not implemented
return value;
}
// EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
// Contains information about most x86 features.
let CpuidResult {
ecx: proc_info_ecx,
edx: proc_info_edx,
..
} = unsafe { __cpuid(0x0000_0001_u32) };
// EAX = 7, ECX = 0: Queries "Extended Features";
// Contains information about bmi,bmi2, and avx2 support.
let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7
{
let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
(ebx, ecx)
} else {
(0, 0) // CPUID does not support "Extended Features"
};
// EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
// - EAX returns the max leaf value for extended information, that is,
// `cpuid` calls in range [0x8000_0000; u32::MAX]:
let CpuidResult {
eax: extended_max_basic_leaf,
..
} = unsafe { __cpuid(0x8000_0000_u32) };
// EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
// Bits"
let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
ecx
} else {
0
};
{
// borrows value till the end of this scope:
let mut enable = |r, rb, f| {
if bit::test(r as usize, rb) {
value.set(f as u32);
}
};
enable(proc_info_ecx, 0, __Feature::sse3);
enable(proc_info_ecx, 9, __Feature::ssse3);
enable(proc_info_ecx, 12, __Feature::fma);
enable(proc_info_ecx, 19, __Feature::sse4_1);
enable(proc_info_ecx, 20, __Feature::sse4_2);
enable(proc_info_ecx, 23, __Feature::popcnt);
enable(proc_info_ecx, 25, __Feature::aes);
enable(proc_info_edx, 4, __Feature::tsc);
enable(proc_info_edx, 23, __Feature::mmx);
enable(proc_info_edx, 24, __Feature::fxsr);
enable(proc_info_edx, 25, __Feature::sse);
enable(proc_info_edx, 26, __Feature::sse2);
enable(extended_features_ebx, 3, __Feature::bmi);
enable(extended_features_ebx, 8, __Feature::bmi2);
// `XSAVE` and `AVX` support:
let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
if cpu_xsave {
// 0. Here the CPU supports `XSAVE`.
// 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
// supports saving the state of the AVX/AVX2 vector registers on
// context-switches, see:
//
// - [intel: is avx enabled?][is_avx_enabled],
// - [mozilla: sse.cpp][mozilla_sse_cpp].
//
// [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
// [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
// 2. The OS must have signaled the CPU that it supports saving and
// restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
// `XCR0.AVX[2]` to `1`.
//
// This is safe because the CPU supports `xsave`
let xcr0 = unsafe { _xgetbv(0) };
let os_avx_support = xcr0 & 6 == 6;
let os_avx512_support = xcr0 & 224 == 224;
// Only if the OS and the CPU support saving/restoring the AVX
// registers we enable `xsave` support:
if cpu_osxsave && os_avx_support {
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
// Developers Manual, Volume 1: Basic Architecture":
//
// "Software enables the XSAVE feature set by setting
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
// instruction). If this bit is 0, execution of any of XGETBV,
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
// causes an invalid-opcode exception (#UD)"
//
enable(proc_info_ecx, 26, __Feature::xsave);
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
// ECX = 1):
if max_basic_leaf >= 0xd {
let CpuidResult {
eax: proc_extended_state1_eax,
..
} = unsafe { __cpuid_count(0xd_u32, 1) };
enable(proc_extended_state1_eax, 0, __Feature::xsaveopt);
enable(proc_extended_state1_eax, 1, __Feature::xsavec);
enable(proc_extended_state1_eax, 3, __Feature::xsaves);
}
// And AVX/AVX2:
enable(proc_info_ecx, 28, __Feature::avx);
enable(extended_features_ebx, 5, __Feature::avx2);
// For AVX-512 the OS also needs to support saving/restoring
// the extended state, only then we enable AVX-512 support:
if os_avx512_support {
enable(extended_features_ebx, 16, __Feature::avx512f);
enable(extended_features_ebx, 17, __Feature::avx512dq);
enable(extended_features_ebx, 21, __Feature::avx512_ifma);
enable(extended_features_ebx, 26, __Feature::avx512pf);
enable(extended_features_ebx, 27, __Feature::avx512er);
enable(extended_features_ebx, 28, __Feature::avx512cd);
enable(extended_features_ebx, 30, __Feature::avx512bw);
enable(extended_features_ebx, 31, __Feature::avx512vl);
enable(extended_features_ecx, 1, __Feature::avx512_vbmi);
enable(
extended_features_ecx,
14,
__Feature::avx512_vpopcntdq,
);
}
}
}
// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
// On intel CPUs with popcnt, lzcnt implements the
// "missing part" of ABM, so we map both to the same
// internal feature.
//
// The `cfg_feature_enabled!("lzcnt")` macro then
// internally maps to __Feature::abm.
enable(extended_proc_info_ecx, 5, __Feature::abm);
if vendor_id == *b"AuthenticAMD" {
// These features are only available on AMD CPUs:
enable(extended_proc_info_ecx, 6, __Feature::sse4a);
enable(extended_proc_info_ecx, 21, __Feature::tbm);
}
}
value
}
#[cfg(test)]
mod tests {
extern crate cupid;
#[test]
fn dump() {
println!("aes: {:?}", cfg_feature_enabled!("aes"));
println!("tsc: {:?}", cfg_feature_enabled!("tsc"));
println!("sse: {:?}", cfg_feature_enabled!("sse"));
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
println!("avx: {:?}", cfg_feature_enabled!("avx"));
println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
println!(
"avx512_vpopcntdq {:?}",
cfg_feature_enabled!("avx512vpopcntdq")
);
println!("fma: {:?}", cfg_feature_enabled!("fma"));
println!("abm: {:?}", cfg_feature_enabled!("abm"));
println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
}
#[test]
fn compare_with_cupid() {
let information = cupid::master().unwrap();
assert_eq!(cfg_feature_enabled!("aes"), information.aesni());
assert_eq!(cfg_feature_enabled!("tsc"), information.tsc());
assert_eq!(cfg_feature_enabled!("sse"), information.sse());
assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());
assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3());
assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1());
assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2());
assert_eq!(cfg_feature_enabled!("sse4a"), information.sse4a());
assert_eq!(cfg_feature_enabled!("avx"), information.avx());
assert_eq!(cfg_feature_enabled!("avx2"), information.avx2());
assert_eq!(cfg_feature_enabled!("avx512f"), information.avx512f());
assert_eq!(cfg_feature_enabled!("avx512cd"), information.avx512cd());
assert_eq!(cfg_feature_enabled!("avx512er"), information.avx512er());
assert_eq!(cfg_feature_enabled!("avx512pf"), information.avx512pf());
assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw());
assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq());
assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl());
assert_eq!(
cfg_feature_enabled!("avx512ifma"),
information.avx512_ifma()
);
assert_eq!(
cfg_feature_enabled!("avx512vbmi"),
information.avx512_vbmi()
);
assert_eq!(
cfg_feature_enabled!("avx512vpopcntdq"),
information.avx512_vpopcntdq()
);
assert_eq!(cfg_feature_enabled!("fma"), information.fma());
assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1());
assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2());
assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt());
assert_eq!(cfg_feature_enabled!("abm"), information.lzcnt());
assert_eq!(cfg_feature_enabled!("tbm"), information.tbm());
assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt());
assert_eq!(cfg_feature_enabled!("xsave"), information.xsave());
assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt());
assert_eq!(
cfg_feature_enabled!("xsavec"),
information.xsavec_and_xrstor()
);
assert_eq!(
cfg_feature_enabled!("xsaves"),
information.xsaves_xrstors_and_ia32_xss()
);
}
}

View File

@@ -1,47 +0,0 @@
#![feature(cfg_target_feature)]
#![cfg_attr(feature = "strict", deny(warnings))]
#![cfg_attr(feature = "cargo-clippy",
allow(option_unwrap_used, print_stdout, use_debug))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use]
extern crate coresimd;
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn x86_all() {
println!("sse: {:?}", cfg_feature_enabled!("sse"));
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
println!("avx: {:?}", cfg_feature_enabled!("avx"));
println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
println!(
"avx512_vpopcntdq {:?}",
cfg_feature_enabled!("avx512vpopcntdq")
);
println!("fma: {:?}", cfg_feature_enabled!("fma"));
println!("abm: {:?}", cfg_feature_enabled!("abm"));
println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
}

View File

@@ -1,6 +1,8 @@
//! 128-bit wide vector types
use simd_llvm::*;
use prelude::v1::*;
use coresimd::simd_llvm::*;
define_ty! { f64x2, f64, f64 }
define_impl! { f64x2, f64, 2, i64x2, x0, x1 }

View File

@@ -1,6 +1,8 @@
//! 256-bit wide vector types
use simd_llvm::*;
use prelude::v1::*;
use coresimd::simd_llvm::*;
define_ty! { f64x4, f64, f64, f64, f64 }
define_impl! { f64x4, f64, 4, i64x4, x0, x1, x2, x3 }

View File

@@ -1,6 +1,8 @@
//! 512-bit wide vector types
use simd_llvm::*;
use prelude::v1::*;
use coresimd::simd_llvm::*;
define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }
define_impl! { f64x8, f64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }

View File

@@ -1,6 +1,8 @@
//! 64-bit wide vector types
use simd_llvm::*;
use prelude::v1::*;
use coresimd::simd_llvm::*;
define_ty_doc! {
f32x2, f32, f32 |

View File

@@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) {
#[cfg(test)]
mod tests {
use x86::i386::*;
use coresimd::x86::i386::*;
#[test]
fn test_eflags() {

View File

@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) {
#[cfg(test)]
mod tests {
use x86::i386::fxsr;
use coresimd::x86::i386::fxsr;
use stdsimd_test::simd_test;
use std::fmt;

View File

@@ -54,7 +54,7 @@ extern "C" {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use x86::i386::rdtsc;
use coresimd::x86::i386::rdtsc;
#[simd_test = "sse2"]
unsafe fn _rdtsc() {

View File

@@ -42,7 +42,7 @@ pub unsafe fn _popcnt32(x: i32) -> i32 {
mod tests {
use stdsimd_test::simd_test;
use x86::i586::abm;
use coresimd::x86::i586::abm;
#[simd_test = "lzcnt"]
unsafe fn _lzcnt_u32() {

View File

@@ -13,17 +13,17 @@
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
use core::mem;
use core::ptr;
use coresimd::simd_llvm::*;
use coresimd::v128::*;
use coresimd::v256::*;
use coresimd::x86::*;
use intrinsics;
use mem;
use ptr;
#[cfg(test)]
use stdsimd_test::assert_instr;
use simd_llvm::*;
use v128::*;
use v256::*;
use x86::*;
/// Add packed double-precision (64-bit) floating-point elements
/// in `a` and `b`.
#[inline]
@@ -1547,7 +1547,7 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
}
/// Moves double-precision values from a 256-bit vector of [4 x double]
@@ -1557,7 +1557,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
}
/// Moves single-precision floating point values from a 256-bit vector
@@ -1568,7 +1568,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))]
pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
}
/// Compute the approximate reciprocal of packed single-precision (32-bit)
@@ -2366,7 +2366,6 @@ pub unsafe fn _mm256_loadu2_m128d(
pub unsafe fn _mm256_loadu2_m128i(
hiaddr: *const __m128i, loaddr: *const __m128i
) -> __m256i {
use x86::i586::sse2::_mm_loadu_si128;
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
}
@@ -2412,7 +2411,6 @@ pub unsafe fn _mm256_storeu2_m128d(
pub unsafe fn _mm256_storeu2_m128i(
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
) {
use x86::i586::sse2::_mm_storeu_si128;
let lo = _mm256_castsi256_si128(a);
_mm_storeu_si128(loaddr, lo);
let hi = _mm256_extractf128_si256(a, 1);
@@ -2579,7 +2577,7 @@ mod tests {
use stdsimd_test::simd_test;
use test::black_box; // Used to inhibit constant-folding.
use x86::*;
use coresimd::x86::*;
#[simd_test = "avx"]
unsafe fn test_mm256_add_pd() {

View File

@@ -18,14 +18,13 @@
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
use core::mem;
use simd_llvm::*;
use v256::*;
use v128::*;
use v64::*;
use v32::*;
use x86::*;
use coresimd::simd_llvm::*;
use coresimd::v256::*;
use coresimd::v128::*;
use coresimd::v64::*;
use coresimd::v32::*;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -1912,7 +1911,6 @@ pub unsafe fn _mm256_permute2x128_si256(
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
use x86::i586::avx::_mm256_undefined_pd;
let imm8 = (imm8 & 0xFF) as u8;
let undef = _mm256_undefined_pd();
macro_rules! shuffle_done {
@@ -2024,10 +2022,13 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
///
@@ -2627,10 +2628,13 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2673,10 +2677,13 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2718,10 +2725,13 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
@@ -2759,10 +2769,13 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
@@ -2800,10 +2813,13 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
@@ -2840,10 +2856,13 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
@@ -2877,10 +2896,13 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
@@ -2913,10 +2935,13 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("avx2") {
/// # if is_target_feature_detected!("avx2") {
/// # #[target_feature(enable = "avx2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
@@ -3253,10 +3278,10 @@ extern "C" {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use std;
use coresimd::x86::*;
#[simd_test = "avx2"]
unsafe fn test_mm256_abs_epi32() {
#[cfg_attr(rustfmt, rustfmt_skip)]

View File

@@ -96,7 +96,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use x86::i586::bmi;
use coresimd::x86::i586::bmi;
#[simd_test = "bmi"]
unsafe fn _bextr_u32() {

View File

@@ -67,7 +67,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use x86::i586::bmi2;
use coresimd::x86::i586::bmi2;
#[simd_test = "bmi2"]
unsafe fn _pext_u32() {

View File

@@ -2,6 +2,8 @@
#![cfg_attr(feature = "cargo-clippy", allow(stutter))]
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -45,7 +47,7 @@ pub struct CpuidResult {
#[inline]
#[cfg_attr(test, assert_instr(cpuid))]
pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
let mut r = ::core::mem::uninitialized::<CpuidResult>();
let mut r = mem::uninitialized::<CpuidResult>();
if cfg!(target_arch = "x86") {
asm!("cpuid"
: "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
@@ -77,7 +79,7 @@ pub fn has_cpuid() -> bool {
}
#[cfg(target_arch = "x86")]
{
use x86::i386::{__readeflags, __writeeflags};
use coresimd::x86::i386::{__readeflags, __writeeflags};
// On `x86` the `cpuid` instruction is not always available.
// This follows the approach indicated in:
@@ -119,7 +121,7 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
#[cfg(test)]
mod tests {
use x86::i586::cpuid;
use coresimd::x86::i586::cpuid;
#[test]
fn test_always_has_cpuid() {
@@ -131,7 +133,7 @@ mod tests {
#[cfg(target_arch = "x86")]
#[test]
fn test_has_cpuid() {
use x86::i386::__readeflags;
use coresimd::x86::i386::__readeflags;
unsafe {
let before = __readeflags();

View File

@@ -1,12 +1,12 @@
//! Streaming SIMD Extensions (SSE)
use core::mem;
use core::ptr;
use simd_llvm::*;
use v128::*;
use v64::*;
use x86::*;
use coresimd::simd_llvm::*;
use coresimd::v128::*;
use coresimd::v64::*;
use coresimd::x86::*;
use intrinsics;
use mem;
use ptr;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -873,12 +873,15 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
/// #
/// # // The real main function
/// # fn main() {
/// # if cfg_feature_enabled!("sse") {
/// # if is_target_feature_detected!("sse") {
/// # #[target_feature(enable = "sse")]
/// # unsafe fn worker() {
/// #
/// # use stdsimd::vendor::*;
/// #
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// unsafe {
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
@@ -924,12 +927,15 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
/// #
/// # // The real main function
/// # fn main() {
/// # if cfg_feature_enabled!("sse") {
/// # if is_target_feature_detected!("sse") {
/// # #[target_feature(enable = "sse")]
/// # unsafe fn worker() {
/// #
/// # use stdsimd::vendor::*;
/// #
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// unsafe {
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
@@ -1684,7 +1690,7 @@ extern "C" {
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movntps))]
pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
}
/// Store 64-bits of integer data from a into memory using a non-temporal
@@ -1701,12 +1707,13 @@ mod tests {
use std::mem::transmute;
use std::f32::NAN;
use v128::*;
use v64::*;
use x86::*;
use stdsimd_test::simd_test;
use test::black_box; // Used to inhibit constant-folding.
use coresimd::v128::*;
use coresimd::v64::*;
use coresimd::x86::*;
#[simd_test = "sse"]
unsafe fn test_mm_add_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);

View File

@@ -3,13 +3,13 @@
#[cfg(test)]
use stdsimd_test::assert_instr;
use core::mem;
use core::ptr;
use simd_llvm::*;
use v128::*;
use v64::*;
use x86::*;
use coresimd::simd_llvm::*;
use coresimd::v128::*;
use coresimd::v64::*;
use coresimd::x86::*;
use intrinsics;
use mem;
use ptr;
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
///
@@ -952,7 +952,7 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
::core::intrinsics::nontemporal_store(mem_addr, a);
::intrinsics::nontemporal_store(mem_addr, a);
}
/// Stores a 32-bit integer value in the specified memory location.
@@ -962,7 +962,7 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movnti))]
pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
::core::intrinsics::nontemporal_store(mem_addr, a);
::intrinsics::nontemporal_store(mem_addr, a);
}
/// Return a vector where the low element is extracted from `a` and its upper
@@ -1974,7 +1974,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
}
/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
@@ -2382,8 +2382,8 @@ mod tests {
use stdsimd_test::simd_test;
use test::black_box; // Used to inhibit constant-folding.
use x86::*;
use v128::*;
use coresimd::x86::*;
use coresimd::v128::*;
#[simd_test = "sse2"]
unsafe fn test_mm_pause() {

View File

@@ -1,8 +1,8 @@
//! Streaming SIMD Extensions 3 (SSE3)
use simd_llvm::{simd_shuffle2, simd_shuffle4};
use v128::*;
use x86::*;
use coresimd::simd_llvm::{simd_shuffle2, simd_shuffle4};
use coresimd::v128::*;
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -129,7 +129,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse3"]
unsafe fn test_mm_addsub_ps() {

View File

@@ -1,14 +1,16 @@
//! Streaming SIMD Extensions 4.1 (SSE4.1)
use core::mem;
use coresimd::simd_llvm::*;
use coresimd::v128::*;
use coresimd::v64::*;
use coresimd::v32::*;
use coresimd::v16::*;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
use simd_llvm::*;
use v128::*;
use x86::*;
// SSE4 rounding constans
/// round to nearest
pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
@@ -301,7 +303,7 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovsxbw))]
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
let a = a.as_i8x16();
let a = simd_shuffle8::<_, ::v64::i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
mem::transmute(simd_cast::<_, i16x8>(a))
}
@@ -311,7 +313,7 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovsxbd))]
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
let a = a.as_i8x16();
let a = simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]);
let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]);
mem::transmute(simd_cast::<_, i32x4>(a))
}
@@ -322,7 +324,7 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovsxbq))]
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
let a = a.as_i8x16();
let a = simd_shuffle2::<_, ::v16::i8x2>(a, a, [0, 1]);
let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]);
mem::transmute(simd_cast::<_, i64x2>(a))
}
@@ -332,7 +334,7 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovsxwd))]
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
let a = a.as_i16x8();
let a = simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]);
let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]);
mem::transmute(simd_cast::<_, i32x4>(a))
}
@@ -342,7 +344,7 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovsxwq))]
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
let a = a.as_i16x8();
let a = simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]);
let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]);
mem::transmute(simd_cast::<_, i64x2>(a))
}
@@ -352,7 +354,7 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovsxdq))]
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
let a = a.as_i32x4();
let a = simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]);
let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]);
mem::transmute(simd_cast::<_, i64x2>(a))
}
@@ -362,7 +364,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovzxbw))]
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
let a = a.as_u8x16();
let a = simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
mem::transmute(simd_cast::<_, i16x8>(a))
}
@@ -372,7 +374,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovzxbd))]
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
let a = a.as_u8x16();
let a = simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]);
let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]);
mem::transmute(simd_cast::<_, i32x4>(a))
}
@@ -382,7 +384,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovzxbq))]
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
let a = a.as_u8x16();
let a = simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]);
let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]);
mem::transmute(simd_cast::<_, i64x2>(a))
}
@@ -393,7 +395,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovzxwd))]
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
let a = a.as_u16x8();
let a = simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]);
let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]);
mem::transmute(simd_cast::<_, i32x4>(a))
}
@@ -404,7 +406,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovzxwq))]
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
let a = a.as_u16x8();
let a = simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]);
let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]);
mem::transmute(simd_cast::<_, i64x2>(a))
}
@@ -415,7 +417,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmovzxdq))]
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
let a = a.as_u32x4();
let a = simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]);
let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]);
mem::transmute(simd_cast::<_, i64x2>(a))
}
@@ -549,18 +551,25 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
/// Rounding is done according to the rounding parameter, which can be one of:
///
/// ```
/// use coresimd::vendor;
/// extern crate stdsimd;
///
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// # fn main() {
/// // round to nearest, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
/// // round down, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
/// // round up, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
/// // truncate, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
#[inline]
#[target_feature(enable = "sse4.1")]
@@ -579,18 +588,25 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
/// Rounding is done according to the rounding parameter, which can be one of:
///
/// ```
/// use coresimd::vendor;
/// extern crate stdsimd;
///
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// # fn main() {
/// // round to nearest, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
/// // round down, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
/// // round up, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
/// // truncate, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
#[inline]
#[target_feature(enable = "sse4.1")]
@@ -611,18 +627,25 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
/// Rounding is done according to the rounding parameter, which can be one of:
///
/// ```
/// use coresimd::vendor;
/// extern crate stdsimd;
///
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// # fn main() {
/// // round to nearest, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
/// // round down, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
/// // round up, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
/// // truncate, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
#[inline]
#[target_feature(enable = "sse4.1")]
@@ -643,18 +666,25 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
/// Rounding is done according to the rounding parameter, which can be one of:
///
/// ```
/// use coresimd::vendor;
/// extern crate stdsimd;
///
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// # fn main() {
/// // round to nearest, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
/// // round down, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
/// // round up, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
/// // truncate, and suppress exceptions:
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
/// vendor::_MM_FROUND_CUR_DIRECTION;
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
#[inline]
#[target_feature(enable = "sse4.1")]
@@ -817,7 +847,7 @@ extern "C" {
mod tests {
use std::mem;
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse4.1"]
unsafe fn test_mm_blendv_epi8() {

View File

@@ -5,8 +5,8 @@
#[cfg(test)]
use stdsimd_test::assert_instr;
use v128::*;
use x86::*;
use coresimd::v128::*;
use coresimd::x86::*;
/// String contains unsigned 8-bit characters *(Default)*
pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
@@ -102,11 +102,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
/// # if is_target_feature_detected!("sse4.2") {
/// # #[target_feature(enable = "sse4.2")]
/// # unsafe fn worker() {
///
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let haystack = b"This is a long string of text data\r\n\tthat extends
/// multiple lines";
@@ -142,10 +145,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
/// # if is_target_feature_detected!("sse4.2") {
/// # #[target_feature(enable = "sse4.2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// // Ensure your input is 16 byte aligned
/// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
@@ -180,10 +186,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
/// # if is_target_feature_detected!("sse4.2") {
/// # #[target_feature(enable = "sse4.2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// # let b = b":;<=>?@[\\]^_`abc";
/// # let b = _mm_loadu_si128(b.as_ptr() as *const _);
///
@@ -217,10 +227,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
/// # if is_target_feature_detected!("sse4.2") {
/// # #[target_feature(enable = "sse4.2")]
/// # unsafe fn worker() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// # let mut some_utf16_words = [0u16; 8];
/// # let mut more_utf16_words = [0u16; 8];
@@ -407,11 +420,14 @@ pub unsafe fn _mm_cmpestrm(
/// # #[macro_use] extern crate stdsimd;
/// #
/// # fn main() {
/// # if cfg_feature_enabled!("sse4.2") {
/// # if is_target_feature_detected!("sse4.2") {
/// # #[target_feature(enable = "sse4.2")]
/// # unsafe fn worker() {
///
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// // The string we want to find a substring in
/// let haystack = b"Split \r\n\t line ";
@@ -625,7 +641,7 @@ mod tests {
use stdsimd_test::simd_test;
use std::ptr;
use x86::*;
use coresimd::x86::*;
// Currently one cannot `load` a &[u8] that is is less than 16
// in length. This makes loading strings less than 16 in length

View File

@@ -1,14 +1,13 @@
//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
use core::mem;
use coresimd::simd_llvm::simd_shuffle16;
use coresimd::v128::*;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
use simd_llvm::simd_shuffle16;
use v128::*;
use x86::*;
/// Compute the absolute value of packed 8-bit signed integers in `a` and
/// return the unsigned results.
#[inline]
@@ -292,7 +291,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "ssse3"]
unsafe fn test_mm_abs_epi8() {

View File

@@ -263,7 +263,7 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
mod tests {
use stdsimd_test::simd_test;
use x86::i586::tbm;
use coresimd::x86::i586::tbm;
/*
#[simd_test = "tbm"]

View File

@@ -137,9 +137,11 @@ pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {
#[cfg(test)]
mod tests {
use x86::i586::xsave;
use stdsimd_test::simd_test;
use std::fmt;
use std::prelude::v1::*;
use coresimd::x86::i586::xsave;
use stdsimd_test::simd_test;
#[repr(align(64))]
struct XsaveArea {

View File

@@ -6,7 +6,8 @@
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
use x86::__m128i;
use coresimd::x86::__m128i;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -92,7 +93,7 @@ mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "aes"]
unsafe fn test_mm_aesdec_si128() {

View File

@@ -8,9 +8,9 @@
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
use v64::*;
use x86::*;
use core::mem;
use coresimd::v64::*;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -487,7 +487,7 @@ extern "C" {
#[cfg(test)]
mod tests {
use x86::*;
use coresimd::x86::*;
use stdsimd_test::simd_test;
#[simd_test = "mmx"]

View File

@@ -1,6 +1,6 @@
//! `i686` Streaming SIMD Extensions (SSE)
use x86::*;
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -469,7 +469,7 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
#[cfg(test)]
mod tests {
use x86::*;
use coresimd::x86::*;
use stdsimd_test::simd_test;
#[simd_test = "sse,mmx"]

View File

@@ -1,9 +1,8 @@
//! `i686`'s Streaming SIMD Extensions 2 (SSE2)
use core::mem;
use simd_llvm::simd_extract;
use x86::*;
use coresimd::simd_llvm::simd_extract;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -137,7 +136,7 @@ mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_add_si64() {

View File

@@ -1,7 +1,7 @@
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
use v128::*;
use x86::*;
use coresimd::v128::*;
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -138,7 +138,7 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse4.1"]
unsafe fn test_mm_testz_si128() {

View File

@@ -1,8 +1,8 @@
//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)
use simd_llvm::*;
use v128::*;
use x86::*;
use coresimd::simd_llvm::*;
use coresimd::v128::*;
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -18,7 +18,7 @@ pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
#[cfg(test)]
mod tests {
use x86::*;
use coresimd::x86::*;
use stdsimd_test::simd_test;

View File

@@ -1,8 +1,8 @@
//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
use core::mem;
use v128::*;
use x86::*;
use coresimd::v128::*;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -75,7 +75,7 @@ pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse4a"]
unsafe fn test_mm_extract_si64() {

View File

@@ -3,7 +3,7 @@
#[cfg(test)]
use stdsimd_test::assert_instr;
use x86::*;
use coresimd::x86::*;
/// Compute the absolute value of packed 8-bit integers in `a` and
/// return the unsigned results.
@@ -223,7 +223,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_abs_pi8() {

View File

@@ -1,6 +1,7 @@
//! `x86` and `x86_64` intrinsics.
use core::mem;
use prelude::v1::*;
use mem;
#[macro_use]
mod macros;
@@ -59,13 +60,16 @@ types! {
/// # fn main() {
/// # #[target_feature(enable = "mmx")]
/// # unsafe fn foo() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let all_bytes_zero = _mm_setzero_si64();
/// let all_bytes_one = _mm_set1_pi8(1);
/// let two_i32 = _mm_set_pi32(1, 2);
/// # }
/// # if cfg_feature_enabled!("mmx") { unsafe { foo() } }
/// # if is_target_feature_detected!("mmx") { unsafe { foo() } }
/// # }
/// ```
pub struct __m64(i64);
@@ -102,13 +106,16 @@ types! {
/// # fn main() {
/// # #[target_feature(enable = "sse2")]
/// # unsafe fn foo() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let all_bytes_zero = _mm_setzero_si128();
/// let all_bytes_one = _mm_set1_epi8(1);
/// let four_i32 = _mm_set_epi32(1, 2, 3, 4);
/// # }
/// # if cfg_feature_enabled!("sse2") { unsafe { foo() } }
/// # if is_target_feature_detected!("sse2") { unsafe { foo() } }
/// # }
/// ```
pub struct __m128i(i64, i64);
@@ -138,13 +145,16 @@ types! {
/// # fn main() {
/// # #[target_feature(enable = "sse")]
/// # unsafe fn foo() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let four_zeros = _mm_setzero_ps();
/// let four_ones = _mm_set1_ps(1.0);
/// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
/// # }
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
/// # }
/// ```
pub struct __m128(f32, f32, f32, f32);
@@ -174,13 +184,16 @@ types! {
/// # fn main() {
/// # #[target_feature(enable = "sse")]
/// # unsafe fn foo() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let two_zeros = _mm_setzero_pd();
/// let two_ones = _mm_set1_pd(1.0);
/// let two_floats = _mm_set_pd(1.0, 2.0);
/// # }
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
/// # }
/// ```
pub struct __m128d(f64, f64);
@@ -214,13 +227,16 @@ types! {
/// # fn main() {
/// # #[target_feature(enable = "avx")]
/// # unsafe fn foo() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let all_bytes_zero = _mm256_setzero_si256();
/// let all_bytes_one = _mm256_set1_epi8(1);
/// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
/// # }
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
/// # if is_target_feature_detected!("avx") { unsafe { foo() } }
/// # }
/// ```
pub struct __m256i(i64, i64, i64, i64);
@@ -250,13 +266,16 @@ types! {
/// # fn main() {
/// # #[target_feature(enable = "sse")]
/// # unsafe fn foo() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let eight_zeros = _mm256_setzero_ps();
/// let eight_ones = _mm256_set1_ps(1.0);
/// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
/// # }
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
/// # }
/// ```
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
@@ -286,13 +305,16 @@ types! {
/// # fn main() {
/// # #[target_feature(enable = "avx")]
/// # unsafe fn foo() {
/// use stdsimd::vendor::*;
/// #[cfg(target_arch = "x86")]
/// use stdsimd::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use stdsimd::arch::x86_64::*;
///
/// let four_zeros = _mm256_setzero_pd();
/// let four_ones = _mm256_set1_pd(1.0);
/// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
/// # }
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
/// # if is_target_feature_detected!("avx") { unsafe { foo() } }
/// # }
/// ```
pub struct __m256d(f64, f64, f64, f64);
@@ -309,42 +331,42 @@ trait m128iExt: Sized {
fn as_m128i(self) -> __m128i;
#[inline]
fn as_u8x16(self) -> ::v128::u8x16 {
fn as_u8x16(self) -> ::coresimd::v128::u8x16 {
unsafe { mem::transmute(self.as_m128i()) }
}
#[inline]
fn as_u16x8(self) -> ::v128::u16x8 {
fn as_u16x8(self) -> ::coresimd::v128::u16x8 {
unsafe { mem::transmute(self.as_m128i()) }
}
#[inline]
fn as_u32x4(self) -> ::v128::u32x4 {
fn as_u32x4(self) -> ::coresimd::v128::u32x4 {
unsafe { mem::transmute(self.as_m128i()) }
}
#[inline]
fn as_u64x2(self) -> ::v128::u64x2 {
fn as_u64x2(self) -> ::coresimd::v128::u64x2 {
unsafe { mem::transmute(self.as_m128i()) }
}
#[inline]
fn as_i8x16(self) -> ::v128::i8x16 {
fn as_i8x16(self) -> ::coresimd::v128::i8x16 {
unsafe { mem::transmute(self.as_m128i()) }
}
#[inline]
fn as_i16x8(self) -> ::v128::i16x8 {
fn as_i16x8(self) -> ::coresimd::v128::i16x8 {
unsafe { mem::transmute(self.as_m128i()) }
}
#[inline]
fn as_i32x4(self) -> ::v128::i32x4 {
fn as_i32x4(self) -> ::coresimd::v128::i32x4 {
unsafe { mem::transmute(self.as_m128i()) }
}
#[inline]
fn as_i64x2(self) -> ::v128::i64x2 {
fn as_i64x2(self) -> ::coresimd::v128::i64x2 {
unsafe { mem::transmute(self.as_m128i()) }
}
}
@@ -362,42 +384,42 @@ trait m256iExt: Sized {
fn as_m256i(self) -> __m256i;
#[inline]
fn as_u8x32(self) -> ::v256::u8x32 {
fn as_u8x32(self) -> ::coresimd::v256::u8x32 {
unsafe { mem::transmute(self.as_m256i()) }
}
#[inline]
fn as_u16x16(self) -> ::v256::u16x16 {
fn as_u16x16(self) -> ::coresimd::v256::u16x16 {
unsafe { mem::transmute(self.as_m256i()) }
}
#[inline]
fn as_u32x8(self) -> ::v256::u32x8 {
fn as_u32x8(self) -> ::coresimd::v256::u32x8 {
unsafe { mem::transmute(self.as_m256i()) }
}
#[inline]
fn as_u64x4(self) -> ::v256::u64x4 {
fn as_u64x4(self) -> ::coresimd::v256::u64x4 {
unsafe { mem::transmute(self.as_m256i()) }
}
#[inline]
fn as_i8x32(self) -> ::v256::i8x32 {
fn as_i8x32(self) -> ::coresimd::v256::i8x32 {
unsafe { mem::transmute(self.as_m256i()) }
}
#[inline]
fn as_i16x16(self) -> ::v256::i16x16 {
fn as_i16x16(self) -> ::coresimd::v256::i16x16 {
unsafe { mem::transmute(self.as_m256i()) }
}
#[inline]
fn as_i32x8(self) -> ::v256::i32x8 {
fn as_i32x8(self) -> ::coresimd::v256::i32x8 {
unsafe { mem::transmute(self.as_m256i()) }
}
#[inline]
fn as_i64x4(self) -> ::v256::i64x4 {
fn as_i64x4(self) -> ::coresimd::v256::i64x4 {
unsafe { mem::transmute(self.as_m256i()) }
}
}

View File

@@ -1,6 +1,6 @@
//! Utilities used in testing the x86 intrinsics
use x86::*;
use coresimd::x86::*;
#[target_feature(enable = "mmx")]
pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
@@ -107,7 +107,7 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
// which doesn't exist on x86!
#[cfg(target_arch = "x86")]
mod x86_polyfill {
use x86::*;
use coresimd::x86::*;
pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
union A {

View File

@@ -42,7 +42,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "lzcnt"]
unsafe fn test_lzcnt_u64() {

View File

@@ -13,10 +13,9 @@
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
use core::mem;
use simd_llvm::*;
use x86::*;
use coresimd::simd_llvm::*;
use coresimd::x86::*;
use mem;
/// Copy `a` to result, and insert the 64-bit integer `i` into result
/// at the location specified by `index`.
@@ -32,7 +31,7 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "avx"]
unsafe fn test_mm256_insert_epi64() {

View File

@@ -18,8 +18,8 @@
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
use simd_llvm::*;
use x86::*;
use coresimd::simd_llvm::*;
use coresimd::x86::*;
/// Extract a 64-bit integer from `a`, selected with `imm8`.
#[inline]
@@ -35,7 +35,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "avx2"]
unsafe fn test_mm256_extract_epi64() {

View File

@@ -101,7 +101,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "bmi"]
unsafe fn test_bextr_u64() {

View File

@@ -69,7 +69,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "bmi2"]
unsafe fn test_pext_u64() {

View File

@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) {
#[cfg(test)]
mod tests {
use x86::x86_64::fxsr;
use coresimd::x86::x86_64::fxsr;
use stdsimd_test::simd_test;
use std::fmt;

View File

@@ -1,6 +1,6 @@
//! `x86_64` Streaming SIMD Extensions (SSE)
use x86::*;
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -66,7 +66,7 @@ mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse"]
unsafe fn test_mm_cvtss_si64() {

View File

@@ -1,7 +1,8 @@
//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2)
use x86::*;
use simd_llvm::*;
use coresimd::x86::*;
use coresimd::simd_llvm::*;
use intrinsics;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -55,7 +56,7 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movnti))]
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
::core::intrinsics::nontemporal_store(mem_addr, a);
intrinsics::nontemporal_store(mem_addr, a);
}
/// Return a vector whose lowest element is `a` and all higher elements are
@@ -116,7 +117,7 @@ mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse2"]
unsafe fn test_mm_cvtsd_si64() {

View File

@@ -1,9 +1,8 @@
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
use core::mem;
use x86::*;
use simd_llvm::*;
use coresimd::x86::*;
use coresimd::simd_llvm::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
@@ -32,7 +31,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use x86::*;
use coresimd::x86::*;
#[simd_test = "sse4.1"]
unsafe fn test_mm_extract_epi64() {

View File

@@ -20,7 +20,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
#[cfg(test)]
mod tests {
use x86::*;
use coresimd::x86::*;
use stdsimd_test::simd_test;

View File

@@ -111,7 +111,7 @@ pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
/*
#[cfg(test)]
mod tests {
use x86::x86_64::xsave;
use coresimd::x86::x86_64::xsave;
use stdsimd_test::simd_test;
use std::fmt;

View File

@@ -19,9 +19,8 @@ is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
maintenance = { status = "experimental" }
[dev-dependencies]
cupid = "0.5.0"
stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
stdsimd = { version = "0.0.3", path = ".." }
stdsimd = { version = "0.0.3", path = "../stdsimd" }
[features]
# Internal-usage only: denies all warnings.

View File

@@ -0,0 +1,82 @@
//! SIMD and vendor intrinsics support library.
//!
//! This documentation is only for one particular architecture, you can find
//! others at:
//!
//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
#![cfg_attr(feature = "strict", deny(warnings))]
#![allow(dead_code)]
#![allow(unused_features)]
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
integer_atomics, stmt_expr_attributes, core_intrinsics,
crate_in_paths, no_core, attr_literals, rustc_attrs)]
#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))]
#![cfg_attr(feature = "cargo-clippy",
allow(inline_always, too_many_arguments, cast_sign_loss,
cast_lossless, cast_possible_wrap,
cast_possible_truncation, cast_precision_loss,
shadow_reuse, cyclomatic_complexity, similar_names,
many_single_char_names))]
#![cfg_attr(test, allow(unused_imports))]
#![no_core]
#[cfg_attr(not(test), macro_use)]
extern crate core as _core;
#[cfg(test)]
#[macro_use]
extern crate std;
#[cfg(test)]
extern crate stdsimd_test;
#[cfg(test)]
extern crate test;
#[cfg(test)]
#[macro_use]
extern crate stdsimd;
#[path = "../../../coresimd/mod.rs"]
mod coresimd;
pub use coresimd::simd;
pub mod arch {
#[cfg(target_arch = "x86")]
pub mod x86 { pub use coresimd::vendor::*; }
#[cfg(target_arch = "x86_64")]
pub mod x86_64 { pub use coresimd::vendor::*; }
#[cfg(target_arch = "arm")]
pub mod arm { pub use coresimd::vendor::*; }
#[cfg(target_arch = "aarch64")]
pub mod aarch64 { pub use coresimd::vendor::*; }
}
#[allow(unused_imports)]
use _core::clone;
#[allow(unused_imports)]
use _core::cmp;
#[allow(unused_imports)]
use _core::convert;
#[allow(unused_imports)]
use _core::fmt;
#[allow(unused_imports)]
use _core::intrinsics;
#[allow(unused_imports)]
use _core::iter;
#[allow(unused_imports)]
use _core::marker;
#[allow(unused_imports)]
use _core::mem;
#[allow(unused_imports)]
use _core::ops;
#[allow(unused_imports)]
use _core::option;
#[allow(unused_imports)]
use _core::prelude;
#[allow(unused_imports)]
use _core::ptr;
#[allow(unused_imports)]
use _core::result;

View File

@@ -0,0 +1,47 @@
#![feature(cfg_target_feature)]
#![cfg_attr(feature = "strict", deny(warnings))]
#![cfg_attr(feature = "cargo-clippy",
allow(option_unwrap_used, print_stdout, use_debug))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use]
extern crate stdsimd;
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn x86_all() {
println!("sse: {:?}", is_target_feature_detected!("sse"));
println!("sse2: {:?}", is_target_feature_detected!("sse2"));
println!("sse3: {:?}", is_target_feature_detected!("sse3"));
println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
println!("avx: {:?}", is_target_feature_detected!("avx"));
println!("avx2: {:?}", is_target_feature_detected!("avx2"));
println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
println!(
"avx512_vpopcntdq {:?}",
is_target_feature_detected!("avx512vpopcntdq")
);
println!("fma: {:?}", is_target_feature_detected!("fma"));
println!("abm: {:?}", is_target_feature_detected!("abm"));
println!("bmi: {:?}", is_target_feature_detected!("bmi"));
println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
println!("tbm: {:?}", is_target_feature_detected!("tbm"));
println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
println!("xsave: {:?}", is_target_feature_detected!("xsave"));
println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
}

View File

@@ -58,7 +58,7 @@ pub fn simd_test(
for feature in target_features {
let q = quote_spanned! {
proc_macro2::Span::call_site() =>
cfg_feature_enabled!(#feature) &&
is_target_feature_detected!(#feature) &&
};
q.to_tokens(&mut cfg_target_features);
}

View File

@@ -4,8 +4,8 @@ version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
[dependencies]
assert-instr-macro = { path = "assert-instr-macro" }
simd-test-macro = { path = "simd-test-macro" }
assert-instr-macro = { path = "../assert-instr-macro" }
simd-test-macro = { path = "../simd-test-macro" }
backtrace = "0.3"
cc = "1.0"
lazy_static = "0.2"

View File

@@ -3,7 +3,7 @@ use std::path::Path;
fn main() {
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
let root = dir.parent().unwrap();
let root = root.join("coresimd/src/x86");
let root = root.join("../coresimd/x86");
walk(&root);
}

View File

@@ -22,7 +22,7 @@ macro_rules! my_quote {
pub fn x86_functions(input: TokenStream) -> TokenStream {
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
let root = dir.parent().unwrap();
let root = root.join("coresimd/src/x86");
let root = root.join("../coresimd/x86");
let mut files = Vec::new();
walk(&root, &mut files);

View File

@@ -0,0 +1,37 @@
[package]
name = "stdsimd"
version = "0.0.3"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = "SIMD support in Rust's standard library."
documentation = "https://docs.rs/stdsimd"
homepage = "https://github.com/rust-lang-nursery/stdsimd"
repository = "https://github.com/rust-lang-nursery/stdsimd"
readme = "README.md"
keywords = ["std", "simd", "intrinsics"]
categories = ["hardware-support"]
license = "MIT/Apache-2.0"
[badges]
travis-ci = { repository = "rust-lang-nursery/stdsimd" }
appveyor = { repository = "rust-lang-nursery/stdsimd" }
is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
maintenance = { status = "experimental" }
[dependencies]
coresimd = { version = "0.0.3", path = "../coresimd" }
libc = "0.2"
cfg-if = "0.1"
[dev-dependencies]
auxv = "0.3.3"
quickcheck = "0.6"
rand = "0.4"
cupid = "0.5.0"
[features]
# Internal-usage only: denies all warnings.
strict = [ "coresimd/strict" ]
# Internal-usage only: enables only those intrinsics supported by Intel's
# Software Development Environment (SDE).
intel_sde = [ "coresimd/intel_sde" ]

View File

@@ -25,7 +25,7 @@
//!
//! * `cfg!(target_feature = "feature")`: returns `true` if the `feature` is
//! enabled in all CPUs that the binary will run on (at compile-time)
//! * `cfg_feature_enabled!("feature")`: returns `true` if the `feature` is
//! * `is_target_feature_detected!("feature")`: returns `true` if the `feature` is
//! enabled in the CPU in which the binary is currently running on (at
//! run-time, unless the result is known at compile time)
//!
@@ -36,7 +36,6 @@
//!
//! #[macro_use]
//! extern crate stdsimd;
//! use stdsimd::vendor;
//! use stdsimd::simd::i32x4;
//!
//! fn main() {
@@ -65,11 +64,16 @@
//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
//! #[target_feature(enable = "sse2")]
//! unsafe fn sum_sse2(x: i32x4) -> i32 {
//! #[cfg(target_arch = "x86")]
//! use stdsimd::arch::x86::*;;
//! #[cfg(target_arch = "x86_64")]
//! use stdsimd::arch::x86_64::*;;
//! use std::mem;
//! let x: vendor::__m128i = mem::transmute(x);
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 8));
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 4));
//! let ret = vendor::_mm_cvtsi128_si32(x);
//!
//! let x: __m128i = mem::transmute(x);
//! let x = _mm_add_epi32(x, _mm_srli_si128(x, 8));
//! let x = _mm_add_epi32(x, _mm_srli_si128(x, 4));
//! let ret = _mm_cvtsi128_si32(x);
//! mem::transmute(ret)
//! }
//!
@@ -97,7 +101,7 @@
//! {
//! // If SSE2 is not enabled at compile-time, this
//! // detects whether SSE2 is available at run-time:
//! if cfg_feature_enabled!("sse2") {
//! if is_target_feature_detected!("sse2") {
//! return unsafe { sum_sse2(x) };
//! }
//! }
@@ -128,56 +132,25 @@
//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367
//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839
#![feature(const_fn, const_size_of, use_extern_macros, cfg_target_feature)]
#![feature(const_fn, integer_atomics)]
#![cfg_attr(target_os = "linux", feature(linkage))]
#![no_std]
extern crate std as _std;
extern crate coresimd;
/// Re-export run-time feature detection macros.
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "arm",
target_arch = "aarch64", target_arch = "powerpc64"))]
pub use coresimd::__unstable_detect_feature;
/// Platform dependent vendor intrinsics.
pub mod vendor {
#[doc(inline)]
pub use coresimd::vendor::*;
}
/// Run-time feature detection.
#[doc(hidden)]
pub mod __vendor_runtime {
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64"))))]
pub use runtime::std::*;
}
/// Platform independent SIMD vector types and operations.
pub mod simd {
#[doc(inline)]
pub use coresimd::simd::*;
}
/// The `stdsimd` run-time.
extern crate libc;
#[macro_use]
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64"))))]
mod runtime;
extern crate cfg_if;
/// Error gracefully in architectures without run-time detection support.
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64",
target_arch = "powerpc64")))))]
#[doc(hidden)]
#[macro_export]
macro_rules! cfg_feature_enabled {
($name:tt) => (
{
compile_error!("cfg_target_feature! is not supported in this architecture")
}
)
}
#[cfg(test)]
#[macro_use]
extern crate std;
#[path = "../../../stdsimd/mod.rs"]
mod stdsimd;
pub use stdsimd::*;
pub use _std::prelude;
pub use _std::fs;
pub use _std::io;

View File

@@ -0,0 +1,72 @@
#![feature(cfg_target_feature)]
#![cfg_attr(feature = "strict", deny(warnings))]
#![cfg_attr(feature = "cargo-clippy",
allow(option_unwrap_used, use_debug, print_stdout))]
#[cfg(any(target_arch = "arm", target_arch = "aarch64",
target_arch = "x86", target_arch = "x86_64",
target_arch = "powerpc64"))]
#[macro_use]
extern crate stdsimd;
#[test]
#[cfg(all(target_arch = "arm", target_os = "linux"))]
fn arm_linux() {
println!("neon: {}", is_target_feature_detected!("neon"));
println!("pmull: {}", is_target_feature_detected!("pmull"));
}
#[test]
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
fn aarch64_linux() {
println!("neon: {}", is_target_feature_detected!("neon"));
println!("asimd: {}", is_target_feature_detected!("asimd"));
println!("pmull: {}", is_target_feature_detected!("pmull"));
}
#[test]
#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
fn powerpc64_linux() {
println!("altivec: {}", is_target_feature_detected!("altivec"));
println!("vsx: {}", is_target_feature_detected!("vsx"));
println!("power8: {}", is_target_feature_detected!("power8"));
}
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn x86_all() {
println!("sse: {:?}", is_target_feature_detected!("sse"));
println!("sse2: {:?}", is_target_feature_detected!("sse2"));
println!("sse3: {:?}", is_target_feature_detected!("sse3"));
println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
println!("avx: {:?}", is_target_feature_detected!("avx"));
println!("avx2: {:?}", is_target_feature_detected!("avx2"));
println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
println!(
"avx512_vpopcntdq {:?}",
is_target_feature_detected!("avx512vpopcntdq")
);
println!("fma: {:?}", is_target_feature_detected!("fma"));
println!("abm: {:?}", is_target_feature_detected!("abm"));
println!("bmi: {:?}", is_target_feature_detected!("bmi"));
println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
println!("tbm: {:?}", is_target_feature_detected!("tbm"));
println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
println!("xsave: {:?}", is_target_feature_detected!("xsave"));
println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
}

Some files were not shown because too many files have changed in this diff Show More