Reorganize and refactor source tree (#324)
With RFC 2325 looking close to being accepted, I took a crack at reorganizing this repository to being more amenable for inclusion in libstd/libcore. My current plan is to add stdsimd as a submodule in rust-lang/rust and then use `#[path]` to include the modules directly into libstd/libcore. Before this commit, however, the source code of coresimd/stdsimd themselves were not quite ready for this. Imports wouldn't compile for one reason or another, and the organization was also different than the RFC itself! In addition to moving a lot of files around, this commit has the following major changes: * The `cfg_feature_enabled!` macro is now renamed to `is_target_feature_detected!` * The `vendor` module is now called `arch`. * Under the `arch` module is a suite of modules like `x86`, `x86_64`, etc. One per `cfg!(target_arch)`. * The `is_target_feature_detected!` macro was removed from coresimd. Unfortunately libcore has no ability to export unstable macros, so for now all feature detection is canonicalized in stdsimd. The `coresimd` and `stdsimd` crates have been updated to the planned organization in RFC 2325 as well. The runtime bits saw the largest amount of refactoring, seeing a good deal of simplification without the core/std split.
This commit is contained in:
@@ -24,7 +24,7 @@ matrix:
|
||||
- env: DOCUMENTATION
|
||||
install: true
|
||||
script: ci/dox.sh
|
||||
- script: cargo test --manifest-path stdsimd-verify/Cargo.toml
|
||||
- script: cargo test --manifest-path crates/stdsimd-verify/Cargo.toml
|
||||
install: true
|
||||
- env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
|
||||
script: |
|
||||
|
||||
@@ -1,33 +1,8 @@
|
||||
[package]
|
||||
name = "stdsimd"
|
||||
version = "0.0.3"
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = "SIMD support in Rust's standard library."
|
||||
documentation = "https://docs.rs/stdsimd"
|
||||
homepage = "https://github.com/rust-lang-nursery/stdsimd"
|
||||
repository = "https://github.com/rust-lang-nursery/stdsimd"
|
||||
readme = "README.md"
|
||||
keywords = ["std", "simd", "intrinsics"]
|
||||
categories = ["hardware-support"]
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[workspace]
|
||||
members = ["stdsimd-verify"]
|
||||
|
||||
[badges]
|
||||
travis-ci = { repository = "rust-lang-nursery/stdsimd" }
|
||||
appveyor = { repository = "rust-lang-nursery/stdsimd" }
|
||||
is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
|
||||
is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[dependencies]
|
||||
coresimd = { version = "0.0.3", path = "coresimd/" }
|
||||
|
||||
[dev-dependencies]
|
||||
auxv = "0.3.3"
|
||||
quickcheck = "0.6"
|
||||
rand = "0.4"
|
||||
members = [
|
||||
"crates/stdsimd-verify",
|
||||
"crates/stdsimd",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
@@ -36,10 +11,3 @@ opt-level = 3
|
||||
[profile.bench]
|
||||
debug = 1
|
||||
opt-level = 3
|
||||
|
||||
[features]
|
||||
# Internal-usage only: denies all warnings.
|
||||
strict = [ "coresimd/strict" ]
|
||||
# Internal-usage only: enables only those intrinsics supported by Intel's
|
||||
# Software Development Environment (SDE).
|
||||
intel_sde = [ "coresimd/intel_sde" ]
|
||||
|
||||
@@ -22,16 +22,18 @@ dox() {
|
||||
rm -rf target/doc/$arch
|
||||
mkdir target/doc/$arch
|
||||
|
||||
cargo build --target $target
|
||||
cargo build --target $target --manifest-path crates/stdsimd/Cargo.toml
|
||||
|
||||
rustdoc --target $target \
|
||||
-o target/doc/$arch coresimd/src/lib.rs \
|
||||
-o target/doc/$arch crates/coresimd/src/lib.rs \
|
||||
--crate-name coresimd \
|
||||
--library-path target/$target/debug/deps
|
||||
rustdoc --target $target \
|
||||
-o target/doc/$arch src/lib.rs \
|
||||
-o target/doc/$arch crates/stdsimd/src/lib.rs \
|
||||
--crate-name stdsimd \
|
||||
--library-path target/$target/debug/deps
|
||||
--library-path target/$target/debug/deps \
|
||||
--extern cfg_if=`ls target/$target/debug/deps/libcfg_if-*.rlib` \
|
||||
--extern libc=`ls target/$target/debug/deps/liblibc-*.rlib`
|
||||
}
|
||||
|
||||
dox i686 i686-unknown-linux-gnu
|
||||
|
||||
@@ -23,7 +23,7 @@ echo "OBJDUMP=${OBJDUMP}"
|
||||
|
||||
cargo_test() {
|
||||
cmd="cargo test --target=$TARGET --features $FEATURES $1"
|
||||
cmd="$cmd -p coresimd -p stdsimd"
|
||||
cmd="$cmd -p coresimd -p stdsimd --manifest-path crates/stdsimd/Cargo.toml"
|
||||
cmd="$cmd -- $2"
|
||||
$cmd
|
||||
}
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../LICENSE-APACHE
|
||||
@@ -1 +0,0 @@
|
||||
../LICENSE-MIT
|
||||
@@ -1 +0,0 @@
|
||||
../README.md
|
||||
@@ -4,8 +4,8 @@
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
use simd_llvm::simd_add;
|
||||
use v128::f64x2;
|
||||
use coresimd::simd_llvm::simd_add;
|
||||
use coresimd::v128::f64x2;
|
||||
|
||||
/// Vector add.
|
||||
#[inline]
|
||||
@@ -41,8 +41,8 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::f64x2;
|
||||
use aarch64::neon;
|
||||
use simd::f64x2;
|
||||
use coresimd::aarch64::neon;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[simd_test = "neon"]
|
||||
@@ -57,7 +57,7 @@ pub unsafe fn _cls_u64(x: u64) -> u64 {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use aarch64::v8;
|
||||
use coresimd::aarch64::v8;
|
||||
|
||||
#[test]
|
||||
fn _rev_u64() {
|
||||
@@ -3,10 +3,9 @@
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use simd_llvm::simd_add;
|
||||
|
||||
use v64::{f32x2, i16x4, i32x2, i8x8, u16x4, u32x2, u8x8};
|
||||
use v128::{f32x4, i16x8, i32x4, i64x2, i8x16, u16x8, u32x4, u64x2, u8x16};
|
||||
use coresimd::simd_llvm::simd_add;
|
||||
use coresimd::v64::*;
|
||||
use coresimd::v128::*;
|
||||
|
||||
/// Vector add.
|
||||
#[inline]
|
||||
@@ -216,7 +215,7 @@ pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
use simd::*;
|
||||
use arm::neon;
|
||||
use coresimd::arm::neon;
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vadd_s8() {
|
||||
@@ -25,7 +25,7 @@ pub unsafe fn _rev_u32(x: u32) -> u32 {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arm::v6;
|
||||
use coresimd::arm::v6;
|
||||
|
||||
#[test]
|
||||
fn _rev_u16() {
|
||||
@@ -50,7 +50,7 @@ extern "C" {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arm::v7;
|
||||
use coresimd::arm::v7;
|
||||
|
||||
#[test]
|
||||
fn _clz_u8() {
|
||||
@@ -82,8 +82,8 @@ macro_rules! define_impl {
|
||||
slice: &mut [$elemty],
|
||||
offset: usize,
|
||||
) {
|
||||
use core::mem::size_of;
|
||||
use core::ptr;
|
||||
use mem::size_of;
|
||||
use ptr;
|
||||
|
||||
ptr::copy_nonoverlapping(
|
||||
&self as *const $name as *const u8,
|
||||
@@ -102,8 +102,8 @@ macro_rules! define_impl {
|
||||
slice: &[$elemty],
|
||||
offset: usize,
|
||||
) -> $name {
|
||||
use core::mem::size_of;
|
||||
use core::ptr;
|
||||
use mem::size_of;
|
||||
use ptr;
|
||||
|
||||
let mut x = $name::splat(0 as $elemty);
|
||||
ptr::copy_nonoverlapping(
|
||||
@@ -152,7 +152,7 @@ macro_rules! define_from {
|
||||
impl From<$from> for $to {
|
||||
#[inline(always)]
|
||||
fn from(f: $from) -> $to {
|
||||
unsafe { ::core::mem::transmute(f) }
|
||||
unsafe { ::mem::transmute(f) }
|
||||
}
|
||||
}
|
||||
)+
|
||||
@@ -162,7 +162,7 @@ macro_rules! define_from {
|
||||
macro_rules! define_common_ops {
|
||||
($($ty:ident),+) => {
|
||||
$(
|
||||
impl ::core::ops::Add for $ty {
|
||||
impl ::ops::Add for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn add(self, other: Self) -> Self {
|
||||
@@ -170,7 +170,7 @@ macro_rules! define_common_ops {
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::Sub for $ty {
|
||||
impl ::ops::Sub for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn sub(self, other: Self) -> Self {
|
||||
@@ -178,7 +178,7 @@ macro_rules! define_common_ops {
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::Mul for $ty {
|
||||
impl ::ops::Mul for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn mul(self, other: Self) -> Self {
|
||||
@@ -186,7 +186,7 @@ macro_rules! define_common_ops {
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::Div for $ty {
|
||||
impl ::ops::Div for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn div(self, other: Self) -> Self {
|
||||
@@ -194,7 +194,7 @@ macro_rules! define_common_ops {
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::Rem for $ty {
|
||||
impl ::ops::Rem for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn rem(self, other: Self) -> Self {
|
||||
@@ -202,35 +202,35 @@ macro_rules! define_common_ops {
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::AddAssign for $ty {
|
||||
impl ::ops::AddAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
*self = *self + other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::SubAssign for $ty {
|
||||
impl ::ops::SubAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn sub_assign(&mut self, other: Self) {
|
||||
*self = *self - other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::MulAssign for $ty {
|
||||
impl ::ops::MulAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn mul_assign(&mut self, other: Self) {
|
||||
*self = *self * other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::DivAssign for $ty {
|
||||
impl ::ops::DivAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn div_assign(&mut self, other: Self) {
|
||||
*self = *self / other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::RemAssign for $ty {
|
||||
impl ::ops::RemAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn rem_assign(&mut self, other: Self) {
|
||||
*self = *self % other;
|
||||
@@ -244,14 +244,14 @@ macro_rules! define_common_ops {
|
||||
macro_rules! define_shifts {
|
||||
($ty:ident, $elem:ident, $($by:ident),+) => {
|
||||
$(
|
||||
impl ::core::ops::Shl<$by> for $ty {
|
||||
impl ::ops::Shl<$by> for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn shl(self, other: $by) -> Self {
|
||||
unsafe { simd_shl(self, $ty::splat(other as $elem)) }
|
||||
}
|
||||
}
|
||||
impl ::core::ops::Shr<$by> for $ty {
|
||||
impl ::ops::Shr<$by> for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn shr(self, other: $by) -> Self {
|
||||
@@ -259,13 +259,13 @@ macro_rules! define_shifts {
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::ShlAssign<$by> for $ty {
|
||||
impl ::ops::ShlAssign<$by> for $ty {
|
||||
#[inline(always)]
|
||||
fn shl_assign(&mut self, other: $by) {
|
||||
*self = *self << other;
|
||||
}
|
||||
}
|
||||
impl ::core::ops::ShrAssign<$by> for $ty {
|
||||
impl ::ops::ShrAssign<$by> for $ty {
|
||||
#[inline(always)]
|
||||
fn shr_assign(&mut self, other: $by) {
|
||||
*self = *self >> other;
|
||||
@@ -279,7 +279,7 @@ macro_rules! define_shifts {
|
||||
macro_rules! define_float_ops {
|
||||
($($ty:ident),+) => {
|
||||
$(
|
||||
impl ::core::ops::Neg for $ty {
|
||||
impl ::ops::Neg for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn neg(self) -> Self {
|
||||
@@ -293,7 +293,7 @@ macro_rules! define_float_ops {
|
||||
macro_rules! define_signed_integer_ops {
|
||||
($($ty:ident),+) => {
|
||||
$(
|
||||
impl ::core::ops::Neg for $ty {
|
||||
impl ::ops::Neg for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn neg(self) -> Self {
|
||||
@@ -307,7 +307,7 @@ macro_rules! define_signed_integer_ops {
|
||||
macro_rules! define_integer_ops {
|
||||
($(($ty:ident, $elem:ident)),+) => {
|
||||
$(
|
||||
impl ::core::ops::Not for $ty {
|
||||
impl ::ops::Not for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn not(self) -> Self {
|
||||
@@ -315,40 +315,40 @@ macro_rules! define_integer_ops {
|
||||
}
|
||||
}
|
||||
|
||||
impl ::core::ops::BitAnd for $ty {
|
||||
impl ::ops::BitAnd for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn bitand(self, other: Self) -> Self {
|
||||
unsafe { simd_and(self, other) }
|
||||
}
|
||||
}
|
||||
impl ::core::ops::BitOr for $ty {
|
||||
impl ::ops::BitOr for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn bitor(self, other: Self) -> Self {
|
||||
unsafe { simd_or(self, other) }
|
||||
}
|
||||
}
|
||||
impl ::core::ops::BitXor for $ty {
|
||||
impl ::ops::BitXor for $ty {
|
||||
type Output = Self;
|
||||
#[inline(always)]
|
||||
fn bitxor(self, other: Self) -> Self {
|
||||
unsafe { simd_xor(self, other) }
|
||||
}
|
||||
}
|
||||
impl ::core::ops::BitAndAssign for $ty {
|
||||
impl ::ops::BitAndAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn bitand_assign(&mut self, other: Self) {
|
||||
*self = *self & other;
|
||||
}
|
||||
}
|
||||
impl ::core::ops::BitOrAssign for $ty {
|
||||
impl ::ops::BitOrAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn bitor_assign(&mut self, other: Self) {
|
||||
*self = *self | other;
|
||||
}
|
||||
}
|
||||
impl ::core::ops::BitXorAssign for $ty {
|
||||
impl ::ops::BitXorAssign for $ty {
|
||||
#[inline(always)]
|
||||
fn bitxor_assign(&mut self, other: Self) {
|
||||
*self = *self ^ other;
|
||||
@@ -360,12 +360,12 @@ macro_rules! define_integer_ops {
|
||||
u8, u16, u32, u64, usize,
|
||||
i8, i16, i32, i64, isize);
|
||||
|
||||
impl ::core::fmt::LowerHex for $ty {
|
||||
fn fmt(&self, f: &mut ::core::fmt::Formatter)
|
||||
-> ::core::fmt::Result {
|
||||
impl ::fmt::LowerHex for $ty {
|
||||
fn fmt(&self, f: &mut ::fmt::Formatter)
|
||||
-> ::fmt::Result {
|
||||
write!(f, "{}(", stringify!($ty))?;
|
||||
let n = ::core::mem::size_of_val(self)
|
||||
/ ::core::mem::size_of::<$elem>();
|
||||
let n = ::mem::size_of_val(self)
|
||||
/ ::mem::size_of::<$elem>();
|
||||
for i in 0..n {
|
||||
if i > 0 {
|
||||
write!(f, ", ")?;
|
||||
@@ -384,7 +384,7 @@ macro_rules! define_casts {
|
||||
$(
|
||||
impl $fromty {
|
||||
#[inline(always)]
|
||||
pub fn $cast(self) -> ::simd::$toty {
|
||||
pub fn $cast(self) -> ::coresimd::simd::$toty {
|
||||
unsafe { simd_cast(self) }
|
||||
}
|
||||
}
|
||||
80
library/stdarch/coresimd/mod.rs
Normal file
80
library/stdarch/coresimd/mod.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
/// Platform independent SIMD vector types and operations.
|
||||
pub mod simd {
|
||||
pub use coresimd::v128::*;
|
||||
pub use coresimd::v256::*;
|
||||
pub use coresimd::v512::*;
|
||||
pub use coresimd::v64::*;
|
||||
}
|
||||
|
||||
/// Platform dependent vendor intrinsics.
|
||||
pub mod vendor {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
pub use coresimd::x86::*;
|
||||
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||
pub use coresimd::arm::*;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub use coresimd::aarch64::*;
|
||||
|
||||
// FIXME: rust does not expose the nvptx and nvptx64 targets yet
|
||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
|
||||
target_arch = "arm", target_arch = "aarch64")))]
|
||||
pub use coresimd::nvptx::*;
|
||||
}
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
mod simd_llvm;
|
||||
mod v128;
|
||||
mod v256;
|
||||
mod v512;
|
||||
mod v64;
|
||||
|
||||
/// 32-bit wide vector tpyes
|
||||
mod v32 {
|
||||
#[cfg(not(test))]
|
||||
use prelude::v1::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
|
||||
define_ty! { i16x2, i16, i16 }
|
||||
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
|
||||
define_ty! { u16x2, u16, u16 }
|
||||
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
|
||||
|
||||
define_ty! { i8x4, i8, i8, i8, i8 }
|
||||
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
|
||||
define_ty! { u8x4, u8, u8, u8, u8 }
|
||||
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
|
||||
|
||||
define_casts!(
|
||||
(i16x2, i64x2, as_i64x2),
|
||||
(u16x2, i64x2, as_i64x2),
|
||||
(i8x4, i32x4, as_i32x4),
|
||||
(u8x4, i32x4, as_i32x4)
|
||||
);
|
||||
}
|
||||
|
||||
/// 16-bit wide vector tpyes
|
||||
mod v16 {
|
||||
#[cfg(not(test))]
|
||||
use prelude::v1::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
|
||||
define_ty! { i8x2, i8, i8 }
|
||||
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
|
||||
define_ty! { u8x2, u8, u8 }
|
||||
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
|
||||
|
||||
define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
mod x86;
|
||||
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||
mod arm;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod aarch64;
|
||||
|
||||
mod nvptx;
|
||||
@@ -1 +0,0 @@
|
||||
../rustfmt.toml
|
||||
@@ -1,139 +0,0 @@
|
||||
//! SIMD and vendor intrinsics support library.
|
||||
//!
|
||||
//! This documentation is only for one particular architecture, you can find
|
||||
//! others at:
|
||||
//!
|
||||
//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
|
||||
//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
|
||||
//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
|
||||
//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_features)]
|
||||
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
|
||||
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
|
||||
integer_atomics, stmt_expr_attributes, core_intrinsics,
|
||||
crate_in_paths, attr_literals, rustc_attrs)]
|
||||
#![cfg_attr(test, feature(proc_macro, test, abi_vectorcall))]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(inline_always, too_many_arguments, cast_sign_loss,
|
||||
cast_lossless, cast_possible_wrap,
|
||||
cast_possible_truncation, cast_precision_loss,
|
||||
shadow_reuse, cyclomatic_complexity, similar_names,
|
||||
many_single_char_names))]
|
||||
#![no_std]
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate std;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate stdsimd_test;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate test;
|
||||
|
||||
/// Platform independent SIMD vector types and operations.
|
||||
pub mod simd {
|
||||
pub use v128::*;
|
||||
pub use v256::*;
|
||||
pub use v512::*;
|
||||
pub use v64::*;
|
||||
}
|
||||
|
||||
/// Platform dependent vendor intrinsics.
|
||||
pub mod vendor {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
pub use x86::*;
|
||||
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||
pub use arm::*;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub use aarch64::*;
|
||||
|
||||
// FIXME: rust does not expose the nvptx and nvptx64 targets yet
|
||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
|
||||
target_arch = "arm", target_arch = "aarch64")))]
|
||||
pub use nvptx::*;
|
||||
}
|
||||
|
||||
/// Run-time feature detection.
|
||||
#[doc(hidden)]
|
||||
pub mod __vendor_runtime {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
||||
all(target_os = "linux",
|
||||
any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "powerpc64"))))]
|
||||
pub use runtime::core::*;
|
||||
|
||||
// Re-exports `coresimd` run-time building blocks for usage in the
|
||||
// `stdsimd` run-time.
|
||||
#[cfg(all(target_os = "linux",
|
||||
any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "powerpc64")))]
|
||||
#[doc(hidden)]
|
||||
pub mod __runtime {
|
||||
pub use runtime::*;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
||||
all(target_os = "linux",
|
||||
any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "powerpc64"))))]
|
||||
#[macro_use]
|
||||
mod runtime;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
mod simd_llvm;
|
||||
mod v128;
|
||||
mod v256;
|
||||
mod v512;
|
||||
mod v64;
|
||||
|
||||
/// 32-bit wide vector tpyes
|
||||
mod v32 {
|
||||
use simd_llvm::*;
|
||||
|
||||
define_ty! { i16x2, i16, i16 }
|
||||
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
|
||||
define_ty! { u16x2, u16, u16 }
|
||||
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
|
||||
|
||||
define_ty! { i8x4, i8, i8, i8, i8 }
|
||||
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
|
||||
define_ty! { u8x4, u8, u8, u8, u8 }
|
||||
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
|
||||
|
||||
define_casts!(
|
||||
(i16x2, i64x2, as_i64x2),
|
||||
(u16x2, i64x2, as_i64x2),
|
||||
(i8x4, i32x4, as_i32x4),
|
||||
(u8x4, i32x4, as_i32x4)
|
||||
);
|
||||
}
|
||||
|
||||
/// 16-bit wide vector tpyes
|
||||
mod v16 {
|
||||
use simd_llvm::*;
|
||||
|
||||
define_ty! { i8x2, i8, i8 }
|
||||
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
|
||||
define_ty! { u8x2, u8, u8 }
|
||||
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
|
||||
|
||||
define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
mod x86;
|
||||
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||
mod arm;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod aarch64;
|
||||
|
||||
mod nvptx;
|
||||
@@ -1,47 +0,0 @@
|
||||
//! Run-time feature detection on ARM Aarch64.
|
||||
use runtime::cache;
|
||||
use runtime::arch::HasFeature;
|
||||
|
||||
#[macro_export]
|
||||
#[doc(hidden)]
|
||||
macro_rules! __unstable_detect_feature {
|
||||
("neon", $unstable_detect_feature:path) => {
|
||||
// FIXME: this should be removed once we rename Aarch64 neon to asimd
|
||||
$unstable_detect_feature($crate::__vendor_runtime::_Feature::asimd{})
|
||||
};
|
||||
("asimd", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::asimd{})
|
||||
};
|
||||
("pmull", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
|
||||
};
|
||||
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
|
||||
}
|
||||
|
||||
/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
|
||||
/// for a particular feature.
|
||||
///
|
||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
||||
#[doc(hidden)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(u8)]
|
||||
pub enum __Feature {
|
||||
/// ARM Advanced SIMD (ASIMD) - Aarch64
|
||||
asimd,
|
||||
/// Polynomial Multiply
|
||||
pmull,
|
||||
}
|
||||
|
||||
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
|
||||
let mut value = cache::Initializer::default();
|
||||
{
|
||||
let mut enable_feature = |f| {
|
||||
if x.has_feature(&f) {
|
||||
value.set(f as u32);
|
||||
}
|
||||
};
|
||||
enable_feature(__Feature::asimd);
|
||||
enable_feature(__Feature::pmull);
|
||||
}
|
||||
value
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
//! Run-time feature detection on ARM Aarch32.
|
||||
use runtime::cache;
|
||||
use runtime::arch::HasFeature;
|
||||
|
||||
#[macro_export]
|
||||
#[doc(hidden)]
|
||||
macro_rules! __unstable_detect_feature {
|
||||
("neon", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::neon{})
|
||||
};
|
||||
("pmull", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
|
||||
};
|
||||
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
|
||||
}
|
||||
|
||||
/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
|
||||
/// particular feature.
|
||||
///
|
||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
||||
#[doc(hidden)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(u8)]
|
||||
pub enum __Feature {
|
||||
/// ARM Advanced SIMD (NEON) - Aarch32
|
||||
neon,
|
||||
/// Polynomial Multiply
|
||||
pmull,
|
||||
}
|
||||
|
||||
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
|
||||
let mut value = cache::Initializer::default();
|
||||
{
|
||||
let mut enable_feature = |f| {
|
||||
if x.has_feature(&f) {
|
||||
value.set(f as u32);
|
||||
}
|
||||
};
|
||||
enable_feature(__Feature::neon);
|
||||
enable_feature(__Feature::pmull);
|
||||
}
|
||||
value
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
//! Run-time feature detection for Aarch64 on Linux and `core`.
|
||||
|
||||
use runtime::bit;
|
||||
use runtime::linux::auxv::AuxVec;
|
||||
use runtime::arch::{HasFeature, __Feature};
|
||||
|
||||
/// Probe the ELF Auxiliary vector for hardware capabilities
|
||||
///
|
||||
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
|
||||
///
|
||||
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
|
||||
impl HasFeature for AuxVec {
|
||||
fn has_feature(&mut self, x: &__Feature) -> bool {
|
||||
use self::__Feature::*;
|
||||
match *x {
|
||||
asimd => bit::test(self.hwcap, 1),
|
||||
pmull => bit::test(self.hwcap, 4),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
//! Run-time feature detection for ARM32 on Linux and `core`.
|
||||
|
||||
use runtime::bit;
|
||||
use runtime::linux::auxv::AuxVec;
|
||||
use runtime::arch::{HasFeature, __Feature};
|
||||
|
||||
/// Probe the ELF Auxiliary vector for hardware capabilities
|
||||
///
|
||||
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
|
||||
///
|
||||
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
|
||||
impl HasFeature for AuxVec {
|
||||
fn has_feature(&mut self, x: &__Feature) -> bool {
|
||||
use self::__Feature::*;
|
||||
match *x {
|
||||
neon => bit::test(self.hwcap, 12),
|
||||
pmull => bit::test(self.hwcap2, 1),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
//! ELF Auxiliary Vector
|
||||
//!
|
||||
//! The auxiliary vector is a memory region in a running ELF program's stack
|
||||
//! composed of (key: usize, value: usize) pairs.
|
||||
//!
|
||||
//! The keys used in the aux vector are platform dependent. For Linux, they are
|
||||
//! defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given
|
||||
//! CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys.
|
||||
//!
|
||||
//! There is no perfect way of reading the auxiliary vector.
|
||||
//!
|
||||
//! - `coresimd`: if `getauxval` is available, `coresimd` will try to use it.
|
||||
//! - `stdsimd`: if `getauxval` is not available, it will try to read
|
||||
//! `/proc/self/auxv`, and if that fails it will try to read `/proc/cpuinfo`.
|
||||
//!
|
||||
//! For more information about when `getauxval` is available check the great
|
||||
//! [`auxv` crate documentation][auxv_docs].
|
||||
//!
|
||||
//! [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
|
||||
//! [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
|
||||
|
||||
/// Key to access the CPU Hardware capabilities bitfield.
|
||||
pub const AT_HWCAP: usize = 16;
|
||||
/// Key to access the CPU Hardware capabilities 2 bitfield.
|
||||
pub const AT_HWCAP2: usize = 26;
|
||||
|
||||
/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
|
||||
///
|
||||
/// If an entry cannot be read all the bits in the bitfield
|
||||
/// are set to zero.
|
||||
#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct AuxVec {
|
||||
pub hwcap: usize,
|
||||
pub hwcap2: usize,
|
||||
}
|
||||
|
||||
/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
|
||||
///
|
||||
/// If an entry cannot be read all the bits in the bitfield
|
||||
/// are set to zero.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct AuxVec {
|
||||
pub hwcap: usize,
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
//! Run-time feature detection for ARM and PowerPC64 on Linux.
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
mod arm;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod aarch64;
|
||||
|
||||
#[cfg(target_arch = "powerpc64")]
|
||||
mod powerpc64;
|
||||
|
||||
pub mod auxv;
|
||||
@@ -1,22 +0,0 @@
|
||||
//! Run-time feature detection for PowerPC64 on Linux and `core`.
|
||||
|
||||
use runtime::linux::auxv::AuxVec;
|
||||
use runtime::arch::{HasFeature, __Feature};
|
||||
|
||||
/// Probe the ELF Auxiliary vector for hardware capabilities
|
||||
///
|
||||
/// The values are part of the platform-specific [asm/cputable.h][cputable]
|
||||
///
|
||||
/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
|
||||
impl HasFeature for AuxVec {
|
||||
fn has_feature(&mut self, x: &__Feature) -> bool {
|
||||
use self::__Feature::*;
|
||||
// note: the PowerPC values are the mask to do the test (instead of the
|
||||
// index of the bit to test like in ARM and Aarch64)
|
||||
match *x {
|
||||
altivec => self.hwcap & 0x10000000 != 0,
|
||||
vsx => self.hwcap & 0x00000080 != 0,
|
||||
power8 => self.hwcap2 & 0x80000000 != 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
//! Run-time feature detection macros.
|
||||
|
||||
/// Is a feature supported by the host CPU?
|
||||
///
|
||||
/// This macro performs run-time feature detection in `coresimd`. It returns
|
||||
/// true if the host CPU in which the binary is running on supports a
|
||||
/// particular feature.
|
||||
#[macro_export]
|
||||
macro_rules! cfg_feature_enabled {
|
||||
($name:tt) => (
|
||||
{
|
||||
#[cfg(target_feature = $name)]
|
||||
{
|
||||
true
|
||||
}
|
||||
#[cfg(not(target_feature = $name))]
|
||||
{
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
__unstable_detect_feature!($name,
|
||||
$crate::__vendor_runtime::__unstable_detect_feature)
|
||||
}
|
||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
{
|
||||
compile_error!("cfg_target_feature! is not supported in this architecture")
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
//! Run-time feature detection
|
||||
pub mod cache;
|
||||
pub mod bit;
|
||||
|
||||
#[macro_use]
|
||||
pub mod macros;
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[macro_use]
|
||||
pub mod x86;
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[macro_use]
|
||||
pub mod arm;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[macro_use]
|
||||
pub mod aarch64;
|
||||
|
||||
#[cfg(target_arch = "powerpc64")]
|
||||
#[macro_use]
|
||||
pub mod powerpc64;
|
||||
|
||||
#[cfg(all(target_os = "linux",
|
||||
any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "powerpc64")))]
|
||||
pub mod linux;
|
||||
|
||||
/// Exports architecture specific functionality for
|
||||
/// reuse in `stdsimd`.
|
||||
pub mod arch {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
pub use super::x86::{detect_features, __Feature};
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
pub use runtime::arm::{detect_features, __Feature};
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub use runtime::aarch64::{detect_features, __Feature};
|
||||
|
||||
#[cfg(target_arch = "powerpc64")]
|
||||
pub use runtime::powerpc64::{detect_features, __Feature};
|
||||
|
||||
/// Interface for querying whether a feature is enabled.
|
||||
pub trait HasFeature {
|
||||
/// Is the feature `x` enabled at run-time?
|
||||
fn has_feature(&mut self, x: &__Feature) -> bool;
|
||||
}
|
||||
}
|
||||
|
||||
/// Run-time feature detection exposed by `coresimd`.
|
||||
pub mod core {
|
||||
pub use super::arch::__Feature;
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
pub use super::arch::detect_features;
|
||||
|
||||
/// Performs run-time feature detection.
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[doc(hidden)]
|
||||
pub fn __unstable_detect_feature(x: __Feature) -> bool {
|
||||
super::cache::test(x as u32, detect_features)
|
||||
}
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
//! Run-time feature detection on PowerPC64.
|
||||
use runtime::cache;
|
||||
use runtime::arch::HasFeature;
|
||||
|
||||
#[macro_export]
|
||||
#[doc(hidden)]
|
||||
macro_rules! __unstable_detect_feature {
|
||||
("altivec", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::altivec{})
|
||||
};
|
||||
("vsx", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::vsx{})
|
||||
};
|
||||
("power8", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::power8{})
|
||||
};
|
||||
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
|
||||
}
|
||||
|
||||
/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
|
||||
/// for a particular feature.
|
||||
///
|
||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
||||
#[doc(hidden)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(u8)]
|
||||
pub enum __Feature {
|
||||
/// Altivec
|
||||
altivec,
|
||||
/// VSX
|
||||
vsx,
|
||||
/// Power8
|
||||
power8,
|
||||
}
|
||||
|
||||
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
|
||||
let mut value = cache::Initializer::default();
|
||||
{
|
||||
let mut enable_feature = |f| {
|
||||
if x.has_feature(&f) {
|
||||
value.set(f as u32);
|
||||
}
|
||||
};
|
||||
enable_feature(__Feature::altivec);
|
||||
enable_feature(__Feature::vsx);
|
||||
enable_feature(__Feature::power8);
|
||||
}
|
||||
value
|
||||
}
|
||||
@@ -1,554 +0,0 @@
|
||||
//! This module implements minimal run-time feature detection for x86.
|
||||
//!
|
||||
//! The features are detected using the `detect_features` function below.
|
||||
//! This function uses the CPUID instruction to read the feature flags from the
|
||||
//! CPU and encodes them in an `usize` where each bit position represents
|
||||
//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
|
||||
//!
|
||||
//! The enum `__Feature` is used to map bit positions to feature names, and the
|
||||
//! the `__unstable_detect_feature!` macro is used to map string literals (e.g.
|
||||
//! "avx") to these bit positions (e.g. `__Feature::avx`).
|
||||
//!
|
||||
//!
|
||||
//! The run-time feature detection is performed by the
|
||||
//! `__unstable_detect_feature(__Feature) -> bool` function. On its first call,
|
||||
//! this functions queries the CPU for the available features and stores them
|
||||
//! in a global `AtomicUsize` variable. The query is performed by just checking
|
||||
//! whether the feature bit in this global variable is set or cleared.
|
||||
|
||||
use core::mem;
|
||||
|
||||
use super::{bit, cache};
|
||||
|
||||
/// This macro maps the string-literal feature names to values of the
|
||||
/// `__Feature` enum at compile-time. The feature names used are the same as
|
||||
/// those of rustc `target_feature` and `cfg_target_feature` features.
|
||||
///
|
||||
/// PLESE: do not use this, it is an implementation detail subjected to change.
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[macro_export]
|
||||
#[doc(hidden)]
|
||||
macro_rules! __unstable_detect_feature {
|
||||
("aes", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::aes{}) };
|
||||
("tsc", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::tsc{}) };
|
||||
("mmx", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::mmx{}) };
|
||||
("sse", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::sse{}) };
|
||||
("sse2", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::sse2{})
|
||||
};
|
||||
("sse3", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::sse3{})
|
||||
};
|
||||
("ssse3", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::ssse3{})
|
||||
};
|
||||
("sse4.1", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::sse4_1{})
|
||||
};
|
||||
("sse4.2", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::sse4_2{})
|
||||
};
|
||||
("sse4a", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::sse4a{})
|
||||
};
|
||||
("avx", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx{})
|
||||
};
|
||||
("avx2", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx2{})
|
||||
};
|
||||
("avx512f", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512f{})
|
||||
};
|
||||
("avx512cd", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512cd{})
|
||||
};
|
||||
("avx512er", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512er{})
|
||||
};
|
||||
("avx512pf", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512pf{})
|
||||
};
|
||||
("avx512bw", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512bw{})
|
||||
};
|
||||
("avx512dq", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512dq{})
|
||||
};
|
||||
("avx512vl", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512vl{})
|
||||
};
|
||||
("avx512ifma", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512_ifma{})
|
||||
};
|
||||
("avx512vbmi", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512_vbmi{})
|
||||
};
|
||||
("avx512vpopcntdq", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::avx512_vpopcntdq{})
|
||||
};
|
||||
("fma", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::fma{})
|
||||
};
|
||||
("bmi", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::bmi{})
|
||||
};
|
||||
("bmi2", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::bmi2{})
|
||||
};
|
||||
("abm", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::abm{})
|
||||
};
|
||||
("lzcnt", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::abm{})
|
||||
};
|
||||
("tbm", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::tbm{})
|
||||
};
|
||||
("popcnt", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::popcnt{})
|
||||
};
|
||||
("fxsr", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::fxsr{})
|
||||
};
|
||||
("xsave", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::xsave{})
|
||||
};
|
||||
("xsaveopt", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::xsaveopt{})
|
||||
};
|
||||
("xsaves", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::xsaves{})
|
||||
};
|
||||
("xsavec", $unstable_detect_feature:path) => {
|
||||
$unstable_detect_feature(
|
||||
$crate::__vendor_runtime::__Feature::xsavec{})
|
||||
};
|
||||
($t:tt, $unstable_detect_feature:path) => {
|
||||
compile_error!(concat!("unknown target feature: ", $t))
|
||||
};
|
||||
}
|
||||
|
||||
/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
|
||||
/// particular feature.
|
||||
///
|
||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
||||
#[doc(hidden)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(u8)]
|
||||
pub enum __Feature {
|
||||
/// AES (Advanced Encryption Standard New Instructions AES-NI)
|
||||
aes,
|
||||
/// TSC (Time Stamp Counter)
|
||||
tsc,
|
||||
/// MMX
|
||||
mmx,
|
||||
/// SSE (Streaming SIMD Extensions)
|
||||
sse,
|
||||
/// SSE2 (Streaming SIMD Extensions 2)
|
||||
sse2,
|
||||
/// SSE3 (Streaming SIMD Extensions 3)
|
||||
sse3,
|
||||
/// SSSE3 (Supplemental Streaming SIMD Extensions 3)
|
||||
ssse3,
|
||||
/// SSE4.1 (Streaming SIMD Extensions 4.1)
|
||||
sse4_1,
|
||||
/// SSE4.2 (Streaming SIMD Extensions 4.2)
|
||||
sse4_2,
|
||||
/// SSE4a (Streaming SIMD Extensions 4a)
|
||||
sse4a,
|
||||
/// AVX (Advanced Vector Extensions)
|
||||
avx,
|
||||
/// AVX2 (Advanced Vector Extensions 2)
|
||||
avx2,
|
||||
/// AVX-512 F (Foundation)
|
||||
avx512f,
|
||||
/// AVX-512 CD (Conflict Detection Instructions)
|
||||
avx512cd,
|
||||
/// AVX-512 ER (Exponential and Reciprocal Instructions)
|
||||
avx512er,
|
||||
/// AVX-512 PF (Prefetch Instructions)
|
||||
avx512pf,
|
||||
/// AVX-512 BW (Byte and Word Instructions)
|
||||
avx512bw,
|
||||
/// AVX-512 DQ (Doubleword and Quadword)
|
||||
avx512dq,
|
||||
/// AVX-512 VL (Vector Length Extensions)
|
||||
avx512vl,
|
||||
/// AVX-512 IFMA (Integer Fused Multiply Add)
|
||||
avx512_ifma,
|
||||
/// AVX-512 VBMI (Vector Byte Manipulation Instructions)
|
||||
avx512_vbmi,
|
||||
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
|
||||
/// Quadword)
|
||||
avx512_vpopcntdq,
|
||||
/// FMA (Fused Multiply Add)
|
||||
fma,
|
||||
/// BMI1 (Bit Manipulation Instructions 1)
|
||||
bmi,
|
||||
/// BMI1 (Bit Manipulation Instructions 2)
|
||||
bmi2,
|
||||
/// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
|
||||
/// Count) on Intel
|
||||
abm,
|
||||
/// TBM (Trailing Bit Manipulation)
|
||||
tbm,
|
||||
/// POPCNT (Population Count)
|
||||
popcnt,
|
||||
/// FXSR (Floating-point context fast save and restor)
|
||||
fxsr,
|
||||
/// XSAVE (Save Processor Extended States)
|
||||
xsave,
|
||||
/// XSAVEOPT (Save Processor Extended States Optimized)
|
||||
xsaveopt,
|
||||
/// XSAVES (Save Processor Extended States Supervisor)
|
||||
xsaves,
|
||||
/// XSAVEC (Save Processor Extended States Compacted)
|
||||
xsavec,
|
||||
#[doc(hidden)]
|
||||
__NonExhaustive,
|
||||
}
|
||||
|
||||
/// Run-time feature detection on x86 works by using the CPUID instruction.
|
||||
///
|
||||
/// The [CPUID Wikipedia page][wiki_cpuid] contains
|
||||
/// all the information about which flags to set to query which values, and in
|
||||
/// which registers these are reported.
|
||||
///
|
||||
/// The definitive references are:
|
||||
/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
|
||||
/// Instruction Set Reference, A-Z][intel64_ref].
|
||||
/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
|
||||
/// System Instructions][amd64_ref].
|
||||
///
|
||||
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
|
||||
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||
pub fn detect_features() -> cache::Initializer {
|
||||
use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
|
||||
use vendor::_xgetbv;
|
||||
let mut value = cache::Initializer::default();
|
||||
|
||||
// If the x86 CPU does not support the CPUID instruction then it is too
|
||||
// old to support any of the currently-detectable features.
|
||||
if !has_cpuid() {
|
||||
return value;
|
||||
}
|
||||
|
||||
// Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
|
||||
// has `cpuid` support.
|
||||
|
||||
// 0. EAX = 0: Basic Information:
|
||||
// - EAX returns the "Highest Function Parameter", that is, the maximum
|
||||
// leaf value for subsequent calls of `cpuinfo` in range [0,
|
||||
// 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
|
||||
// returned in EBX, EDX, and ECX (in that order):
|
||||
let (max_basic_leaf, vendor_id) = unsafe {
|
||||
let CpuidResult {
|
||||
eax: max_basic_leaf,
|
||||
ebx,
|
||||
ecx,
|
||||
edx,
|
||||
} = __cpuid(0);
|
||||
let vendor_id: [[u8; 4]; 3] = [
|
||||
mem::transmute(ebx),
|
||||
mem::transmute(edx),
|
||||
mem::transmute(ecx),
|
||||
];
|
||||
let vendor_id: [u8; 12] = mem::transmute(vendor_id);
|
||||
(max_basic_leaf, vendor_id)
|
||||
};
|
||||
|
||||
if max_basic_leaf < 1 {
|
||||
// Earlier Intel 486, CPUID not implemented
|
||||
return value;
|
||||
}
|
||||
|
||||
// EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
|
||||
// Contains information about most x86 features.
|
||||
let CpuidResult {
|
||||
ecx: proc_info_ecx,
|
||||
edx: proc_info_edx,
|
||||
..
|
||||
} = unsafe { __cpuid(0x0000_0001_u32) };
|
||||
|
||||
// EAX = 7, ECX = 0: Queries "Extended Features";
|
||||
// Contains information about bmi,bmi2, and avx2 support.
|
||||
let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7
|
||||
{
|
||||
let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
|
||||
(ebx, ecx)
|
||||
} else {
|
||||
(0, 0) // CPUID does not support "Extended Features"
|
||||
};
|
||||
|
||||
// EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
|
||||
// - EAX returns the max leaf value for extended information, that is,
|
||||
// `cpuid` calls in range [0x8000_0000; u32::MAX]:
|
||||
let CpuidResult {
|
||||
eax: extended_max_basic_leaf,
|
||||
..
|
||||
} = unsafe { __cpuid(0x8000_0000_u32) };
|
||||
|
||||
// EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
|
||||
// Bits"
|
||||
let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
|
||||
let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
|
||||
ecx
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
{
|
||||
// borrows value till the end of this scope:
|
||||
let mut enable = |r, rb, f| {
|
||||
if bit::test(r as usize, rb) {
|
||||
value.set(f as u32);
|
||||
}
|
||||
};
|
||||
|
||||
enable(proc_info_ecx, 0, __Feature::sse3);
|
||||
enable(proc_info_ecx, 9, __Feature::ssse3);
|
||||
enable(proc_info_ecx, 12, __Feature::fma);
|
||||
enable(proc_info_ecx, 19, __Feature::sse4_1);
|
||||
enable(proc_info_ecx, 20, __Feature::sse4_2);
|
||||
enable(proc_info_ecx, 23, __Feature::popcnt);
|
||||
enable(proc_info_ecx, 25, __Feature::aes);
|
||||
enable(proc_info_edx, 4, __Feature::tsc);
|
||||
enable(proc_info_edx, 23, __Feature::mmx);
|
||||
enable(proc_info_edx, 24, __Feature::fxsr);
|
||||
enable(proc_info_edx, 25, __Feature::sse);
|
||||
enable(proc_info_edx, 26, __Feature::sse2);
|
||||
|
||||
enable(extended_features_ebx, 3, __Feature::bmi);
|
||||
enable(extended_features_ebx, 8, __Feature::bmi2);
|
||||
|
||||
// `XSAVE` and `AVX` support:
|
||||
let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
|
||||
if cpu_xsave {
|
||||
// 0. Here the CPU supports `XSAVE`.
|
||||
|
||||
// 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
|
||||
// supports saving the state of the AVX/AVX2 vector registers on
|
||||
// context-switches, see:
|
||||
//
|
||||
// - [intel: is avx enabled?][is_avx_enabled],
|
||||
// - [mozilla: sse.cpp][mozilla_sse_cpp].
|
||||
//
|
||||
// [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
|
||||
// [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
|
||||
let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
|
||||
|
||||
// 2. The OS must have signaled the CPU that it supports saving and
|
||||
// restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
|
||||
// `XCR0.AVX[2]` to `1`.
|
||||
//
|
||||
// This is safe because the CPU supports `xsave`
|
||||
let xcr0 = unsafe { _xgetbv(0) };
|
||||
let os_avx_support = xcr0 & 6 == 6;
|
||||
let os_avx512_support = xcr0 & 224 == 224;
|
||||
|
||||
// Only if the OS and the CPU support saving/restoring the AVX
|
||||
// registers we enable `xsave` support:
|
||||
if cpu_osxsave && os_avx_support {
|
||||
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
|
||||
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
|
||||
// Developer’s Manual, Volume 1: Basic Architecture":
|
||||
//
|
||||
// "Software enables the XSAVE feature set by setting
|
||||
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
|
||||
// instruction). If this bit is 0, execution of any of XGETBV,
|
||||
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
|
||||
// causes an invalid-opcode exception (#UD)"
|
||||
//
|
||||
enable(proc_info_ecx, 26, __Feature::xsave);
|
||||
|
||||
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
|
||||
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
|
||||
// ECX = 1):
|
||||
if max_basic_leaf >= 0xd {
|
||||
let CpuidResult {
|
||||
eax: proc_extended_state1_eax,
|
||||
..
|
||||
} = unsafe { __cpuid_count(0xd_u32, 1) };
|
||||
enable(proc_extended_state1_eax, 0, __Feature::xsaveopt);
|
||||
enable(proc_extended_state1_eax, 1, __Feature::xsavec);
|
||||
enable(proc_extended_state1_eax, 3, __Feature::xsaves);
|
||||
}
|
||||
|
||||
// And AVX/AVX2:
|
||||
enable(proc_info_ecx, 28, __Feature::avx);
|
||||
enable(extended_features_ebx, 5, __Feature::avx2);
|
||||
|
||||
// For AVX-512 the OS also needs to support saving/restoring
|
||||
// the extended state, only then we enable AVX-512 support:
|
||||
if os_avx512_support {
|
||||
enable(extended_features_ebx, 16, __Feature::avx512f);
|
||||
enable(extended_features_ebx, 17, __Feature::avx512dq);
|
||||
enable(extended_features_ebx, 21, __Feature::avx512_ifma);
|
||||
enable(extended_features_ebx, 26, __Feature::avx512pf);
|
||||
enable(extended_features_ebx, 27, __Feature::avx512er);
|
||||
enable(extended_features_ebx, 28, __Feature::avx512cd);
|
||||
enable(extended_features_ebx, 30, __Feature::avx512bw);
|
||||
enable(extended_features_ebx, 31, __Feature::avx512vl);
|
||||
enable(extended_features_ecx, 1, __Feature::avx512_vbmi);
|
||||
enable(
|
||||
extended_features_ecx,
|
||||
14,
|
||||
__Feature::avx512_vpopcntdq,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
|
||||
// On intel CPUs with popcnt, lzcnt implements the
|
||||
// "missing part" of ABM, so we map both to the same
|
||||
// internal feature.
|
||||
//
|
||||
// The `cfg_feature_enabled!("lzcnt")` macro then
|
||||
// internally maps to __Feature::abm.
|
||||
enable(extended_proc_info_ecx, 5, __Feature::abm);
|
||||
if vendor_id == *b"AuthenticAMD" {
|
||||
// These features are only available on AMD CPUs:
|
||||
enable(extended_proc_info_ecx, 6, __Feature::sse4a);
|
||||
enable(extended_proc_info_ecx, 21, __Feature::tbm);
|
||||
}
|
||||
}
|
||||
|
||||
value
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
extern crate cupid;
|
||||
|
||||
#[test]
|
||||
fn dump() {
|
||||
println!("aes: {:?}", cfg_feature_enabled!("aes"));
|
||||
println!("tsc: {:?}", cfg_feature_enabled!("tsc"));
|
||||
println!("sse: {:?}", cfg_feature_enabled!("sse"));
|
||||
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
|
||||
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
|
||||
println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
|
||||
println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
|
||||
println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
|
||||
println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
|
||||
println!("avx: {:?}", cfg_feature_enabled!("avx"));
|
||||
println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
|
||||
println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
|
||||
println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
|
||||
println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
|
||||
println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
|
||||
println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
|
||||
println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
|
||||
println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
|
||||
println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
|
||||
println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
|
||||
println!(
|
||||
"avx512_vpopcntdq {:?}",
|
||||
cfg_feature_enabled!("avx512vpopcntdq")
|
||||
);
|
||||
println!("fma: {:?}", cfg_feature_enabled!("fma"));
|
||||
println!("abm: {:?}", cfg_feature_enabled!("abm"));
|
||||
println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
|
||||
println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
|
||||
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
|
||||
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
|
||||
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
|
||||
println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
|
||||
println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
|
||||
println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
|
||||
println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
|
||||
println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compare_with_cupid() {
|
||||
let information = cupid::master().unwrap();
|
||||
assert_eq!(cfg_feature_enabled!("aes"), information.aesni());
|
||||
assert_eq!(cfg_feature_enabled!("tsc"), information.tsc());
|
||||
assert_eq!(cfg_feature_enabled!("sse"), information.sse());
|
||||
assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
|
||||
assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());
|
||||
assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3());
|
||||
assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1());
|
||||
assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2());
|
||||
assert_eq!(cfg_feature_enabled!("sse4a"), information.sse4a());
|
||||
assert_eq!(cfg_feature_enabled!("avx"), information.avx());
|
||||
assert_eq!(cfg_feature_enabled!("avx2"), information.avx2());
|
||||
assert_eq!(cfg_feature_enabled!("avx512f"), information.avx512f());
|
||||
assert_eq!(cfg_feature_enabled!("avx512cd"), information.avx512cd());
|
||||
assert_eq!(cfg_feature_enabled!("avx512er"), information.avx512er());
|
||||
assert_eq!(cfg_feature_enabled!("avx512pf"), information.avx512pf());
|
||||
assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw());
|
||||
assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq());
|
||||
assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl());
|
||||
assert_eq!(
|
||||
cfg_feature_enabled!("avx512ifma"),
|
||||
information.avx512_ifma()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg_feature_enabled!("avx512vbmi"),
|
||||
information.avx512_vbmi()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg_feature_enabled!("avx512vpopcntdq"),
|
||||
information.avx512_vpopcntdq()
|
||||
);
|
||||
assert_eq!(cfg_feature_enabled!("fma"), information.fma());
|
||||
assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1());
|
||||
assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2());
|
||||
assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt());
|
||||
assert_eq!(cfg_feature_enabled!("abm"), information.lzcnt());
|
||||
assert_eq!(cfg_feature_enabled!("tbm"), information.tbm());
|
||||
assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt());
|
||||
assert_eq!(cfg_feature_enabled!("xsave"), information.xsave());
|
||||
assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt());
|
||||
assert_eq!(
|
||||
cfg_feature_enabled!("xsavec"),
|
||||
information.xsavec_and_xrstor()
|
||||
);
|
||||
assert_eq!(
|
||||
cfg_feature_enabled!("xsaves"),
|
||||
information.xsaves_xrstors_and_ia32_xss()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
#![feature(cfg_target_feature)]
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(option_unwrap_used, print_stdout, use_debug))]
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[macro_use]
|
||||
extern crate coresimd;
|
||||
|
||||
#[test]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
fn x86_all() {
|
||||
println!("sse: {:?}", cfg_feature_enabled!("sse"));
|
||||
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
|
||||
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
|
||||
println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
|
||||
println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
|
||||
println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
|
||||
println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
|
||||
println!("avx: {:?}", cfg_feature_enabled!("avx"));
|
||||
println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
|
||||
println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
|
||||
println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
|
||||
println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
|
||||
println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
|
||||
println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
|
||||
println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
|
||||
println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
|
||||
println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
|
||||
println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
|
||||
println!(
|
||||
"avx512_vpopcntdq {:?}",
|
||||
cfg_feature_enabled!("avx512vpopcntdq")
|
||||
);
|
||||
println!("fma: {:?}", cfg_feature_enabled!("fma"));
|
||||
println!("abm: {:?}", cfg_feature_enabled!("abm"));
|
||||
println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
|
||||
println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
|
||||
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
|
||||
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
|
||||
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
|
||||
println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
|
||||
println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
|
||||
println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
|
||||
println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
|
||||
println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
//! 128-bit wide vector types
|
||||
|
||||
use simd_llvm::*;
|
||||
use prelude::v1::*;
|
||||
|
||||
use coresimd::simd_llvm::*;
|
||||
|
||||
define_ty! { f64x2, f64, f64 }
|
||||
define_impl! { f64x2, f64, 2, i64x2, x0, x1 }
|
||||
@@ -1,6 +1,8 @@
|
||||
//! 256-bit wide vector types
|
||||
|
||||
use simd_llvm::*;
|
||||
use prelude::v1::*;
|
||||
|
||||
use coresimd::simd_llvm::*;
|
||||
|
||||
define_ty! { f64x4, f64, f64, f64, f64 }
|
||||
define_impl! { f64x4, f64, 4, i64x4, x0, x1, x2, x3 }
|
||||
@@ -1,6 +1,8 @@
|
||||
//! 512-bit wide vector types
|
||||
|
||||
use simd_llvm::*;
|
||||
use prelude::v1::*;
|
||||
|
||||
use coresimd::simd_llvm::*;
|
||||
|
||||
define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }
|
||||
define_impl! { f64x8, f64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }
|
||||
@@ -1,6 +1,8 @@
|
||||
//! 64-bit wide vector types
|
||||
|
||||
use simd_llvm::*;
|
||||
use prelude::v1::*;
|
||||
|
||||
use coresimd::simd_llvm::*;
|
||||
|
||||
define_ty_doc! {
|
||||
f32x2, f32, f32 |
|
||||
@@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::i386::*;
|
||||
use coresimd::x86::i386::*;
|
||||
|
||||
#[test]
|
||||
fn test_eflags() {
|
||||
@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::i386::fxsr;
|
||||
use coresimd::x86::i386::fxsr;
|
||||
use stdsimd_test::simd_test;
|
||||
use std::fmt;
|
||||
|
||||
@@ -54,7 +54,7 @@ extern "C" {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
use x86::i386::rdtsc;
|
||||
use coresimd::x86::i386::rdtsc;
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _rdtsc() {
|
||||
@@ -42,7 +42,7 @@ pub unsafe fn _popcnt32(x: i32) -> i32 {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::i586::abm;
|
||||
use coresimd::x86::i586::abm;
|
||||
|
||||
#[simd_test = "lzcnt"]
|
||||
unsafe fn _lzcnt_u32() {
|
||||
@@ -13,17 +13,17 @@
|
||||
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||
|
||||
use core::mem;
|
||||
use core::ptr;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::v256::*;
|
||||
use coresimd::x86::*;
|
||||
use intrinsics;
|
||||
use mem;
|
||||
use ptr;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use simd_llvm::*;
|
||||
use v128::*;
|
||||
use v256::*;
|
||||
use x86::*;
|
||||
|
||||
/// Add packed double-precision (64-bit) floating-point elements
|
||||
/// in `a` and `b`.
|
||||
#[inline]
|
||||
@@ -1547,7 +1547,7 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
|
||||
pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
|
||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
}
|
||||
|
||||
/// Moves double-precision values from a 256-bit vector of [4 x double]
|
||||
@@ -1557,7 +1557,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
|
||||
pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
|
||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
}
|
||||
|
||||
/// Moves single-precision floating point values from a 256-bit vector
|
||||
@@ -1568,7 +1568,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
|
||||
#[target_feature(enable = "avx")]
|
||||
#[cfg_attr(test, assert_instr(vmovntps))]
|
||||
pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) {
|
||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
}
|
||||
|
||||
/// Compute the approximate reciprocal of packed single-precision (32-bit)
|
||||
@@ -2366,7 +2366,6 @@ pub unsafe fn _mm256_loadu2_m128d(
|
||||
pub unsafe fn _mm256_loadu2_m128i(
|
||||
hiaddr: *const __m128i, loaddr: *const __m128i
|
||||
) -> __m256i {
|
||||
use x86::i586::sse2::_mm_loadu_si128;
|
||||
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
|
||||
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
|
||||
}
|
||||
@@ -2412,7 +2411,6 @@ pub unsafe fn _mm256_storeu2_m128d(
|
||||
pub unsafe fn _mm256_storeu2_m128i(
|
||||
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
|
||||
) {
|
||||
use x86::i586::sse2::_mm_storeu_si128;
|
||||
let lo = _mm256_castsi256_si128(a);
|
||||
_mm_storeu_si128(loaddr, lo);
|
||||
let hi = _mm256_extractf128_si256(a, 1);
|
||||
@@ -2579,7 +2577,7 @@ mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
use test::black_box; // Used to inhibit constant-folding.
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "avx"]
|
||||
unsafe fn test_mm256_add_pd() {
|
||||
@@ -18,14 +18,13 @@
|
||||
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
|
||||
|
||||
use core::mem;
|
||||
|
||||
use simd_llvm::*;
|
||||
use v256::*;
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
use v32::*;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::v256::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::v64::*;
|
||||
use coresimd::v32::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -1912,7 +1911,6 @@ pub unsafe fn _mm256_permute2x128_si256(
|
||||
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
|
||||
use x86::i586::avx::_mm256_undefined_pd;
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let undef = _mm256_undefined_pd();
|
||||
macro_rules! shuffle_done {
|
||||
@@ -2024,10 +2022,13 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
///
|
||||
@@ -2627,10 +2628,13 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||
@@ -2673,10 +2677,13 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||
@@ -2718,10 +2725,13 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
|
||||
@@ -2759,10 +2769,13 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
|
||||
@@ -2800,10 +2813,13 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
|
||||
@@ -2840,10 +2856,13 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
|
||||
@@ -2877,10 +2896,13 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
|
||||
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
|
||||
@@ -2913,10 +2935,13 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("avx2") {
|
||||
/// # if is_target_feature_detected!("avx2") {
|
||||
/// # #[target_feature(enable = "avx2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
|
||||
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
|
||||
@@ -3253,10 +3278,10 @@ extern "C" {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use std;
|
||||
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "avx2"]
|
||||
unsafe fn test_mm256_abs_epi32() {
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
@@ -96,7 +96,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::i586::bmi;
|
||||
use coresimd::x86::i586::bmi;
|
||||
|
||||
#[simd_test = "bmi"]
|
||||
unsafe fn _bextr_u32() {
|
||||
@@ -67,7 +67,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::i586::bmi2;
|
||||
use coresimd::x86::i586::bmi2;
|
||||
|
||||
#[simd_test = "bmi2"]
|
||||
unsafe fn _pext_u32() {
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
@@ -45,7 +47,7 @@ pub struct CpuidResult {
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(cpuid))]
|
||||
pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
|
||||
let mut r = ::core::mem::uninitialized::<CpuidResult>();
|
||||
let mut r = mem::uninitialized::<CpuidResult>();
|
||||
if cfg!(target_arch = "x86") {
|
||||
asm!("cpuid"
|
||||
: "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
|
||||
@@ -77,7 +79,7 @@ pub fn has_cpuid() -> bool {
|
||||
}
|
||||
#[cfg(target_arch = "x86")]
|
||||
{
|
||||
use x86::i386::{__readeflags, __writeeflags};
|
||||
use coresimd::x86::i386::{__readeflags, __writeeflags};
|
||||
|
||||
// On `x86` the `cpuid` instruction is not always available.
|
||||
// This follows the approach indicated in:
|
||||
@@ -119,7 +121,7 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::i586::cpuid;
|
||||
use coresimd::x86::i586::cpuid;
|
||||
|
||||
#[test]
|
||||
fn test_always_has_cpuid() {
|
||||
@@ -131,7 +133,7 @@ mod tests {
|
||||
#[cfg(target_arch = "x86")]
|
||||
#[test]
|
||||
fn test_has_cpuid() {
|
||||
use x86::i386::__readeflags;
|
||||
use coresimd::x86::i386::__readeflags;
|
||||
unsafe {
|
||||
let before = __readeflags();
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
//! Streaming SIMD Extensions (SSE)
|
||||
|
||||
use core::mem;
|
||||
use core::ptr;
|
||||
|
||||
use simd_llvm::*;
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::v64::*;
|
||||
use coresimd::x86::*;
|
||||
use intrinsics;
|
||||
use mem;
|
||||
use ptr;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -873,12 +873,15 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
|
||||
/// #
|
||||
/// # // The real main function
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("sse") {
|
||||
/// # if is_target_feature_detected!("sse") {
|
||||
/// # #[target_feature(enable = "sse")]
|
||||
/// # unsafe fn worker() {
|
||||
/// #
|
||||
/// # use stdsimd::vendor::*;
|
||||
/// #
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// unsafe {
|
||||
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
||||
@@ -924,12 +927,15 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
|
||||
/// #
|
||||
/// # // The real main function
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("sse") {
|
||||
/// # if is_target_feature_detected!("sse") {
|
||||
/// # #[target_feature(enable = "sse")]
|
||||
/// # unsafe fn worker() {
|
||||
/// #
|
||||
/// # use stdsimd::vendor::*;
|
||||
/// #
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// unsafe {
|
||||
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
||||
@@ -1684,7 +1690,7 @@ extern "C" {
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movntps))]
|
||||
pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
|
||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
}
|
||||
|
||||
/// Store 64-bits of integer data from a into memory using a non-temporal
|
||||
@@ -1701,12 +1707,13 @@ mod tests {
|
||||
use std::mem::transmute;
|
||||
use std::f32::NAN;
|
||||
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
use x86::*;
|
||||
use stdsimd_test::simd_test;
|
||||
use test::black_box; // Used to inhibit constant-folding.
|
||||
|
||||
use coresimd::v128::*;
|
||||
use coresimd::v64::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_add_ps() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
@@ -3,13 +3,13 @@
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use core::mem;
|
||||
use core::ptr;
|
||||
|
||||
use simd_llvm::*;
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::v64::*;
|
||||
use coresimd::x86::*;
|
||||
use intrinsics;
|
||||
use mem;
|
||||
use ptr;
|
||||
|
||||
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
|
||||
///
|
||||
@@ -952,7 +952,7 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
|
||||
pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
|
||||
::core::intrinsics::nontemporal_store(mem_addr, a);
|
||||
::intrinsics::nontemporal_store(mem_addr, a);
|
||||
}
|
||||
|
||||
/// Stores a 32-bit integer value in the specified memory location.
|
||||
@@ -962,7 +962,7 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(movnti))]
|
||||
pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
|
||||
::core::intrinsics::nontemporal_store(mem_addr, a);
|
||||
::intrinsics::nontemporal_store(mem_addr, a);
|
||||
}
|
||||
|
||||
/// Return a vector where the low element is extracted from `a` and its upper
|
||||
@@ -1974,7 +1974,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
|
||||
pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
|
||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||
}
|
||||
|
||||
/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
|
||||
@@ -2382,8 +2382,8 @@ mod tests {
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
use test::black_box; // Used to inhibit constant-folding.
|
||||
use x86::*;
|
||||
use v128::*;
|
||||
use coresimd::x86::*;
|
||||
use coresimd::v128::*;
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn test_mm_pause() {
|
||||
@@ -1,8 +1,8 @@
|
||||
//! Streaming SIMD Extensions 3 (SSE3)
|
||||
|
||||
use simd_llvm::{simd_shuffle2, simd_shuffle4};
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::{simd_shuffle2, simd_shuffle4};
|
||||
use coresimd::v128::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -129,7 +129,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse3"]
|
||||
unsafe fn test_mm_addsub_ps() {
|
||||
@@ -1,14 +1,16 @@
|
||||
//! Streaming SIMD Extensions 4.1 (SSE4.1)
|
||||
|
||||
use core::mem;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::v64::*;
|
||||
use coresimd::v32::*;
|
||||
use coresimd::v16::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use simd_llvm::*;
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
|
||||
// SSE4 rounding constans
|
||||
/// round to nearest
|
||||
pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
|
||||
@@ -301,7 +303,7 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovsxbw))]
|
||||
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle8::<_, ::v64::i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
mem::transmute(simd_cast::<_, i16x8>(a))
|
||||
}
|
||||
|
||||
@@ -311,7 +313,7 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovsxbd))]
|
||||
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]);
|
||||
let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]);
|
||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
@@ -322,7 +324,7 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovsxbq))]
|
||||
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle2::<_, ::v16::i8x2>(a, a, [0, 1]);
|
||||
let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]);
|
||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
@@ -332,7 +334,7 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovsxwd))]
|
||||
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let a = simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]);
|
||||
let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]);
|
||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
@@ -342,7 +344,7 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovsxwq))]
|
||||
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let a = simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]);
|
||||
let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]);
|
||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
@@ -352,7 +354,7 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovsxdq))]
|
||||
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i32x4();
|
||||
let a = simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]);
|
||||
let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]);
|
||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
@@ -362,7 +364,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovzxbw))]
|
||||
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
mem::transmute(simd_cast::<_, i16x8>(a))
|
||||
}
|
||||
|
||||
@@ -372,7 +374,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovzxbd))]
|
||||
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]);
|
||||
let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]);
|
||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
@@ -382,7 +384,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovzxbq))]
|
||||
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]);
|
||||
let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]);
|
||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
@@ -393,7 +395,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovzxwd))]
|
||||
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_u16x8();
|
||||
let a = simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]);
|
||||
let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]);
|
||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||
}
|
||||
|
||||
@@ -404,7 +406,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovzxwq))]
|
||||
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u16x8();
|
||||
let a = simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]);
|
||||
let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]);
|
||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
@@ -415,7 +417,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(pmovzxdq))]
|
||||
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u32x4();
|
||||
let a = simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]);
|
||||
let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]);
|
||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||
}
|
||||
|
||||
@@ -549,18 +551,25 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// ```
|
||||
/// use coresimd::vendor;
|
||||
/// extern crate stdsimd;
|
||||
///
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// // round to nearest, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
/// // round down, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||
/// // round up, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||
/// // truncate, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
||||
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
@@ -579,18 +588,25 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// ```
|
||||
/// use coresimd::vendor;
|
||||
/// extern crate stdsimd;
|
||||
///
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// // round to nearest, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
/// // round down, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||
/// // round up, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||
/// // truncate, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
||||
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
@@ -611,18 +627,25 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// ```
|
||||
/// use coresimd::vendor;
|
||||
/// extern crate stdsimd;
|
||||
///
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// // round to nearest, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
/// // round down, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||
/// // round up, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||
/// // truncate, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
||||
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
@@ -643,18 +666,25 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// ```
|
||||
/// use coresimd::vendor;
|
||||
/// extern crate stdsimd;
|
||||
///
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// // round to nearest, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||
/// // round down, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||
/// // round up, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
||||
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||
/// // truncate, and suppress exceptions:
|
||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
||||
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
@@ -817,7 +847,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use std::mem;
|
||||
use stdsimd_test::simd_test;
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn test_mm_blendv_epi8() {
|
||||
@@ -5,8 +5,8 @@
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
/// String contains unsigned 8-bit characters *(Default)*
|
||||
pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
|
||||
@@ -102,11 +102,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("sse4.2") {
|
||||
/// # if is_target_feature_detected!("sse4.2") {
|
||||
/// # #[target_feature(enable = "sse4.2")]
|
||||
/// # unsafe fn worker() {
|
||||
///
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let haystack = b"This is a long string of text data\r\n\tthat extends
|
||||
/// multiple lines";
|
||||
@@ -142,10 +145,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("sse4.2") {
|
||||
/// # if is_target_feature_detected!("sse4.2") {
|
||||
/// # #[target_feature(enable = "sse4.2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// // Ensure your input is 16 byte aligned
|
||||
/// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
|
||||
@@ -180,10 +186,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("sse4.2") {
|
||||
/// # if is_target_feature_detected!("sse4.2") {
|
||||
/// # #[target_feature(enable = "sse4.2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// # let b = b":;<=>?@[\\]^_`abc";
|
||||
/// # let b = _mm_loadu_si128(b.as_ptr() as *const _);
|
||||
///
|
||||
@@ -217,10 +227,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("sse4.2") {
|
||||
/// # if is_target_feature_detected!("sse4.2") {
|
||||
/// # #[target_feature(enable = "sse4.2")]
|
||||
/// # unsafe fn worker() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// # let mut some_utf16_words = [0u16; 8];
|
||||
/// # let mut more_utf16_words = [0u16; 8];
|
||||
@@ -407,11 +420,14 @@ pub unsafe fn _mm_cmpestrm(
|
||||
/// # #[macro_use] extern crate stdsimd;
|
||||
/// #
|
||||
/// # fn main() {
|
||||
/// # if cfg_feature_enabled!("sse4.2") {
|
||||
/// # if is_target_feature_detected!("sse4.2") {
|
||||
/// # #[target_feature(enable = "sse4.2")]
|
||||
/// # unsafe fn worker() {
|
||||
///
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// // The string we want to find a substring in
|
||||
/// let haystack = b"Split \r\n\t line ";
|
||||
@@ -625,7 +641,7 @@ mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use std::ptr;
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
// Currently one cannot `load` a &[u8] that is is less than 16
|
||||
// in length. This makes loading strings less than 16 in length
|
||||
@@ -1,14 +1,13 @@
|
||||
//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
|
||||
|
||||
use core::mem;
|
||||
use coresimd::simd_llvm::simd_shuffle16;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use simd_llvm::simd_shuffle16;
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
|
||||
/// Compute the absolute value of packed 8-bit signed integers in `a` and
|
||||
/// return the unsigned results.
|
||||
#[inline]
|
||||
@@ -292,7 +291,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn test_mm_abs_epi8() {
|
||||
@@ -263,7 +263,7 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::i586::tbm;
|
||||
use coresimd::x86::i586::tbm;
|
||||
|
||||
/*
|
||||
#[simd_test = "tbm"]
|
||||
@@ -137,9 +137,11 @@ pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::i586::xsave;
|
||||
use stdsimd_test::simd_test;
|
||||
use std::fmt;
|
||||
use std::prelude::v1::*;
|
||||
|
||||
use coresimd::x86::i586::xsave;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[repr(align(64))]
|
||||
struct XsaveArea {
|
||||
@@ -6,7 +6,8 @@
|
||||
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
|
||||
//!
|
||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
use x86::__m128i;
|
||||
|
||||
use coresimd::x86::__m128i;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -92,7 +93,7 @@ mod tests {
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "aes"]
|
||||
unsafe fn test_mm_aesdec_si128() {
|
||||
@@ -8,9 +8,9 @@
|
||||
//!
|
||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
|
||||
use v64::*;
|
||||
use x86::*;
|
||||
use core::mem;
|
||||
use coresimd::v64::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -487,7 +487,7 @@ extern "C" {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
@@ -1,6 +1,6 @@
|
||||
//! `i686` Streaming SIMD Extensions (SSE)
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -469,7 +469,7 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[simd_test = "sse,mmx"]
|
||||
@@ -1,9 +1,8 @@
|
||||
//! `i686`'s Streaming SIMD Extensions 2 (SSE2)
|
||||
|
||||
use core::mem;
|
||||
|
||||
use simd_llvm::simd_extract;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::simd_extract;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -137,7 +136,7 @@ mod tests {
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_add_si64() {
|
||||
@@ -1,7 +1,7 @@
|
||||
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
|
||||
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -138,7 +138,7 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn test_mm_testz_si128() {
|
||||
@@ -1,8 +1,8 @@
|
||||
//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)
|
||||
|
||||
use simd_llvm::*;
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -18,7 +18,7 @@ pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
|
||||
|
||||
use core::mem;
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
use coresimd::v128::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -75,7 +75,7 @@ pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse4a"]
|
||||
unsafe fn test_mm_extract_si64() {
|
||||
@@ -3,7 +3,7 @@
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
/// Compute the absolute value of packed 8-bit integers in `a` and
|
||||
/// return the unsigned results.
|
||||
@@ -223,7 +223,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_abs_pi8() {
|
||||
@@ -1,6 +1,7 @@
|
||||
//! `x86` and `x86_64` intrinsics.
|
||||
|
||||
use core::mem;
|
||||
use prelude::v1::*;
|
||||
use mem;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
@@ -59,13 +60,16 @@ types! {
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "mmx")]
|
||||
/// # unsafe fn foo() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let all_bytes_zero = _mm_setzero_si64();
|
||||
/// let all_bytes_one = _mm_set1_pi8(1);
|
||||
/// let two_i32 = _mm_set_pi32(1, 2);
|
||||
/// # }
|
||||
/// # if cfg_feature_enabled!("mmx") { unsafe { foo() } }
|
||||
/// # if is_target_feature_detected!("mmx") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m64(i64);
|
||||
@@ -102,13 +106,16 @@ types! {
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "sse2")]
|
||||
/// # unsafe fn foo() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let all_bytes_zero = _mm_setzero_si128();
|
||||
/// let all_bytes_one = _mm_set1_epi8(1);
|
||||
/// let four_i32 = _mm_set_epi32(1, 2, 3, 4);
|
||||
/// # }
|
||||
/// # if cfg_feature_enabled!("sse2") { unsafe { foo() } }
|
||||
/// # if is_target_feature_detected!("sse2") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m128i(i64, i64);
|
||||
@@ -138,13 +145,16 @@ types! {
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "sse")]
|
||||
/// # unsafe fn foo() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let four_zeros = _mm_setzero_ps();
|
||||
/// let four_ones = _mm_set1_ps(1.0);
|
||||
/// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
|
||||
/// # }
|
||||
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
|
||||
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m128(f32, f32, f32, f32);
|
||||
@@ -174,13 +184,16 @@ types! {
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "sse")]
|
||||
/// # unsafe fn foo() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let two_zeros = _mm_setzero_pd();
|
||||
/// let two_ones = _mm_set1_pd(1.0);
|
||||
/// let two_floats = _mm_set_pd(1.0, 2.0);
|
||||
/// # }
|
||||
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
|
||||
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m128d(f64, f64);
|
||||
@@ -214,13 +227,16 @@ types! {
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "avx")]
|
||||
/// # unsafe fn foo() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let all_bytes_zero = _mm256_setzero_si256();
|
||||
/// let all_bytes_one = _mm256_set1_epi8(1);
|
||||
/// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
/// # }
|
||||
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
|
||||
/// # if is_target_feature_detected!("avx") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m256i(i64, i64, i64, i64);
|
||||
@@ -250,13 +266,16 @@ types! {
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "sse")]
|
||||
/// # unsafe fn foo() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let eight_zeros = _mm256_setzero_ps();
|
||||
/// let eight_ones = _mm256_set1_ps(1.0);
|
||||
/// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
||||
/// # }
|
||||
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
|
||||
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
|
||||
@@ -286,13 +305,16 @@ types! {
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "avx")]
|
||||
/// # unsafe fn foo() {
|
||||
/// use stdsimd::vendor::*;
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use stdsimd::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use stdsimd::arch::x86_64::*;
|
||||
///
|
||||
/// let four_zeros = _mm256_setzero_pd();
|
||||
/// let four_ones = _mm256_set1_pd(1.0);
|
||||
/// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
|
||||
/// # }
|
||||
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
|
||||
/// # if is_target_feature_detected!("avx") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m256d(f64, f64, f64, f64);
|
||||
@@ -309,42 +331,42 @@ trait m128iExt: Sized {
|
||||
fn as_m128i(self) -> __m128i;
|
||||
|
||||
#[inline]
|
||||
fn as_u8x16(self) -> ::v128::u8x16 {
|
||||
fn as_u8x16(self) -> ::coresimd::v128::u8x16 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u16x8(self) -> ::v128::u16x8 {
|
||||
fn as_u16x8(self) -> ::coresimd::v128::u16x8 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u32x4(self) -> ::v128::u32x4 {
|
||||
fn as_u32x4(self) -> ::coresimd::v128::u32x4 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u64x2(self) -> ::v128::u64x2 {
|
||||
fn as_u64x2(self) -> ::coresimd::v128::u64x2 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i8x16(self) -> ::v128::i8x16 {
|
||||
fn as_i8x16(self) -> ::coresimd::v128::i8x16 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i16x8(self) -> ::v128::i16x8 {
|
||||
fn as_i16x8(self) -> ::coresimd::v128::i16x8 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32x4(self) -> ::v128::i32x4 {
|
||||
fn as_i32x4(self) -> ::coresimd::v128::i32x4 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i64x2(self) -> ::v128::i64x2 {
|
||||
fn as_i64x2(self) -> ::coresimd::v128::i64x2 {
|
||||
unsafe { mem::transmute(self.as_m128i()) }
|
||||
}
|
||||
}
|
||||
@@ -362,42 +384,42 @@ trait m256iExt: Sized {
|
||||
fn as_m256i(self) -> __m256i;
|
||||
|
||||
#[inline]
|
||||
fn as_u8x32(self) -> ::v256::u8x32 {
|
||||
fn as_u8x32(self) -> ::coresimd::v256::u8x32 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u16x16(self) -> ::v256::u16x16 {
|
||||
fn as_u16x16(self) -> ::coresimd::v256::u16x16 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u32x8(self) -> ::v256::u32x8 {
|
||||
fn as_u32x8(self) -> ::coresimd::v256::u32x8 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u64x4(self) -> ::v256::u64x4 {
|
||||
fn as_u64x4(self) -> ::coresimd::v256::u64x4 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i8x32(self) -> ::v256::i8x32 {
|
||||
fn as_i8x32(self) -> ::coresimd::v256::i8x32 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i16x16(self) -> ::v256::i16x16 {
|
||||
fn as_i16x16(self) -> ::coresimd::v256::i16x16 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32x8(self) -> ::v256::i32x8 {
|
||||
fn as_i32x8(self) -> ::coresimd::v256::i32x8 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i64x4(self) -> ::v256::i64x4 {
|
||||
fn as_i64x4(self) -> ::coresimd::v256::i64x4 {
|
||||
unsafe { mem::transmute(self.as_m256i()) }
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Utilities used in testing the x86 intrinsics
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
|
||||
@@ -107,7 +107,7 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
|
||||
// which doesn't exist on x86!
|
||||
#[cfg(target_arch = "x86")]
|
||||
mod x86_polyfill {
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
|
||||
union A {
|
||||
@@ -42,7 +42,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "lzcnt"]
|
||||
unsafe fn test_lzcnt_u64() {
|
||||
@@ -13,10 +13,9 @@
|
||||
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||
|
||||
use core::mem;
|
||||
|
||||
use simd_llvm::*;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
/// Copy `a` to result, and insert the 64-bit integer `i` into result
|
||||
/// at the location specified by `index`.
|
||||
@@ -32,7 +31,7 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "avx"]
|
||||
unsafe fn test_mm256_insert_epi64() {
|
||||
@@ -18,8 +18,8 @@
|
||||
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
|
||||
|
||||
use simd_llvm::*;
|
||||
use x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
/// Extract a 64-bit integer from `a`, selected with `imm8`.
|
||||
#[inline]
|
||||
@@ -35,7 +35,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "avx2"]
|
||||
unsafe fn test_mm256_extract_epi64() {
|
||||
@@ -101,7 +101,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "bmi"]
|
||||
unsafe fn test_bextr_u64() {
|
||||
@@ -69,7 +69,7 @@ extern "C" {
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "bmi2"]
|
||||
unsafe fn test_pext_u64() {
|
||||
@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::x86_64::fxsr;
|
||||
use coresimd::x86::x86_64::fxsr;
|
||||
use stdsimd_test::simd_test;
|
||||
use std::fmt;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! `x86_64` Streaming SIMD Extensions (SSE)
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -66,7 +66,7 @@ mod tests {
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtss_si64() {
|
||||
@@ -1,7 +1,8 @@
|
||||
//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2)
|
||||
|
||||
use x86::*;
|
||||
use simd_llvm::*;
|
||||
use coresimd::x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use intrinsics;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -55,7 +56,7 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(movnti))]
|
||||
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
|
||||
::core::intrinsics::nontemporal_store(mem_addr, a);
|
||||
intrinsics::nontemporal_store(mem_addr, a);
|
||||
}
|
||||
|
||||
/// Return a vector whose lowest element is `a` and all higher elements are
|
||||
@@ -116,7 +117,7 @@ mod tests {
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn test_mm_cvtsd_si64() {
|
||||
@@ -1,9 +1,8 @@
|
||||
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
|
||||
|
||||
use core::mem;
|
||||
|
||||
use x86::*;
|
||||
use simd_llvm::*;
|
||||
use coresimd::x86::*;
|
||||
use coresimd::simd_llvm::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
@@ -32,7 +31,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn test_mm_extract_epi64() {
|
||||
@@ -20,7 +20,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::*;
|
||||
use coresimd::x86::*;
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
@@ -111,7 +111,7 @@ pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
|
||||
/*
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use x86::x86_64::xsave;
|
||||
use coresimd::x86::x86_64::xsave;
|
||||
use stdsimd_test::simd_test;
|
||||
use std::fmt;
|
||||
|
||||
@@ -19,9 +19,8 @@ is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[dev-dependencies]
|
||||
cupid = "0.5.0"
|
||||
stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
|
||||
stdsimd = { version = "0.0.3", path = ".." }
|
||||
stdsimd = { version = "0.0.3", path = "../stdsimd" }
|
||||
|
||||
[features]
|
||||
# Internal-usage only: denies all warnings.
|
||||
82
library/stdarch/crates/coresimd/src/lib.rs
Normal file
82
library/stdarch/crates/coresimd/src/lib.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
//! SIMD and vendor intrinsics support library.
|
||||
//!
|
||||
//! This documentation is only for one particular architecture, you can find
|
||||
//! others at:
|
||||
//!
|
||||
//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
|
||||
//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
|
||||
//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
|
||||
//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_features)]
|
||||
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
|
||||
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
|
||||
integer_atomics, stmt_expr_attributes, core_intrinsics,
|
||||
crate_in_paths, no_core, attr_literals, rustc_attrs)]
|
||||
#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(inline_always, too_many_arguments, cast_sign_loss,
|
||||
cast_lossless, cast_possible_wrap,
|
||||
cast_possible_truncation, cast_precision_loss,
|
||||
shadow_reuse, cyclomatic_complexity, similar_names,
|
||||
many_single_char_names))]
|
||||
#![cfg_attr(test, allow(unused_imports))]
|
||||
#![no_core]
|
||||
|
||||
#[cfg_attr(not(test), macro_use)]
|
||||
extern crate core as _core;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate std;
|
||||
#[cfg(test)]
|
||||
extern crate stdsimd_test;
|
||||
#[cfg(test)]
|
||||
extern crate test;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate stdsimd;
|
||||
|
||||
#[path = "../../../coresimd/mod.rs"]
|
||||
mod coresimd;
|
||||
|
||||
pub use coresimd::simd;
|
||||
|
||||
pub mod arch {
|
||||
#[cfg(target_arch = "x86")]
|
||||
pub mod x86 { pub use coresimd::vendor::*; }
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub mod x86_64 { pub use coresimd::vendor::*; }
|
||||
#[cfg(target_arch = "arm")]
|
||||
pub mod arm { pub use coresimd::vendor::*; }
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub mod aarch64 { pub use coresimd::vendor::*; }
|
||||
}
|
||||
|
||||
#[allow(unused_imports)]
|
||||
use _core::clone;
|
||||
#[allow(unused_imports)]
|
||||
use _core::cmp;
|
||||
#[allow(unused_imports)]
|
||||
use _core::convert;
|
||||
#[allow(unused_imports)]
|
||||
use _core::fmt;
|
||||
#[allow(unused_imports)]
|
||||
use _core::intrinsics;
|
||||
#[allow(unused_imports)]
|
||||
use _core::iter;
|
||||
#[allow(unused_imports)]
|
||||
use _core::marker;
|
||||
#[allow(unused_imports)]
|
||||
use _core::mem;
|
||||
#[allow(unused_imports)]
|
||||
use _core::ops;
|
||||
#[allow(unused_imports)]
|
||||
use _core::option;
|
||||
#[allow(unused_imports)]
|
||||
use _core::prelude;
|
||||
#[allow(unused_imports)]
|
||||
use _core::ptr;
|
||||
#[allow(unused_imports)]
|
||||
use _core::result;
|
||||
47
library/stdarch/crates/coresimd/tests/cpu-detection.rs
Normal file
47
library/stdarch/crates/coresimd/tests/cpu-detection.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
#![feature(cfg_target_feature)]
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(option_unwrap_used, print_stdout, use_debug))]
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[macro_use]
|
||||
extern crate stdsimd;
|
||||
|
||||
#[test]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
fn x86_all() {
|
||||
println!("sse: {:?}", is_target_feature_detected!("sse"));
|
||||
println!("sse2: {:?}", is_target_feature_detected!("sse2"));
|
||||
println!("sse3: {:?}", is_target_feature_detected!("sse3"));
|
||||
println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
|
||||
println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
|
||||
println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
|
||||
println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
|
||||
println!("avx: {:?}", is_target_feature_detected!("avx"));
|
||||
println!("avx2: {:?}", is_target_feature_detected!("avx2"));
|
||||
println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
|
||||
println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
|
||||
println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
|
||||
println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
|
||||
println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
|
||||
println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
|
||||
println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
|
||||
println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
|
||||
println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
|
||||
println!(
|
||||
"avx512_vpopcntdq {:?}",
|
||||
is_target_feature_detected!("avx512vpopcntdq")
|
||||
);
|
||||
println!("fma: {:?}", is_target_feature_detected!("fma"));
|
||||
println!("abm: {:?}", is_target_feature_detected!("abm"));
|
||||
println!("bmi: {:?}", is_target_feature_detected!("bmi"));
|
||||
println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
|
||||
println!("tbm: {:?}", is_target_feature_detected!("tbm"));
|
||||
println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
|
||||
println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
|
||||
println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
|
||||
println!("xsave: {:?}", is_target_feature_detected!("xsave"));
|
||||
println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
|
||||
println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
|
||||
println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
|
||||
}
|
||||
@@ -58,7 +58,7 @@ pub fn simd_test(
|
||||
for feature in target_features {
|
||||
let q = quote_spanned! {
|
||||
proc_macro2::Span::call_site() =>
|
||||
cfg_feature_enabled!(#feature) &&
|
||||
is_target_feature_detected!(#feature) &&
|
||||
};
|
||||
q.to_tokens(&mut cfg_target_features);
|
||||
}
|
||||
@@ -4,8 +4,8 @@ version = "0.1.0"
|
||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||
|
||||
[dependencies]
|
||||
assert-instr-macro = { path = "assert-instr-macro" }
|
||||
simd-test-macro = { path = "simd-test-macro" }
|
||||
assert-instr-macro = { path = "../assert-instr-macro" }
|
||||
simd-test-macro = { path = "../simd-test-macro" }
|
||||
backtrace = "0.3"
|
||||
cc = "1.0"
|
||||
lazy_static = "0.2"
|
||||
@@ -3,7 +3,7 @@ use std::path::Path;
|
||||
fn main() {
|
||||
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
|
||||
let root = dir.parent().unwrap();
|
||||
let root = root.join("coresimd/src/x86");
|
||||
let root = root.join("../coresimd/x86");
|
||||
walk(&root);
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ macro_rules! my_quote {
|
||||
pub fn x86_functions(input: TokenStream) -> TokenStream {
|
||||
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
|
||||
let root = dir.parent().unwrap();
|
||||
let root = root.join("coresimd/src/x86");
|
||||
let root = root.join("../coresimd/x86");
|
||||
|
||||
let mut files = Vec::new();
|
||||
walk(&root, &mut files);
|
||||
37
library/stdarch/crates/stdsimd/Cargo.toml
Normal file
37
library/stdarch/crates/stdsimd/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
[package]
|
||||
name = "stdsimd"
|
||||
version = "0.0.3"
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = "SIMD support in Rust's standard library."
|
||||
documentation = "https://docs.rs/stdsimd"
|
||||
homepage = "https://github.com/rust-lang-nursery/stdsimd"
|
||||
repository = "https://github.com/rust-lang-nursery/stdsimd"
|
||||
readme = "README.md"
|
||||
keywords = ["std", "simd", "intrinsics"]
|
||||
categories = ["hardware-support"]
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[badges]
|
||||
travis-ci = { repository = "rust-lang-nursery/stdsimd" }
|
||||
appveyor = { repository = "rust-lang-nursery/stdsimd" }
|
||||
is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
|
||||
is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[dependencies]
|
||||
coresimd = { version = "0.0.3", path = "../coresimd" }
|
||||
libc = "0.2"
|
||||
cfg-if = "0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
auxv = "0.3.3"
|
||||
quickcheck = "0.6"
|
||||
rand = "0.4"
|
||||
cupid = "0.5.0"
|
||||
|
||||
[features]
|
||||
# Internal-usage only: denies all warnings.
|
||||
strict = [ "coresimd/strict" ]
|
||||
# Internal-usage only: enables only those intrinsics supported by Intel's
|
||||
# Software Development Environment (SDE).
|
||||
intel_sde = [ "coresimd/intel_sde" ]
|
||||
@@ -25,7 +25,7 @@
|
||||
//!
|
||||
//! * `cfg!(target_feature = "feature")`: returns `true` if the `feature` is
|
||||
//! enabled in all CPUs that the binary will run on (at compile-time)
|
||||
//! * `cfg_feature_enabled!("feature")`: returns `true` if the `feature` is
|
||||
//! * `is_target_feature_detected!("feature")`: returns `true` if the `feature` is
|
||||
//! enabled in the CPU in which the binary is currently running on (at
|
||||
//! run-time, unless the result is known at compile time)
|
||||
//!
|
||||
@@ -36,7 +36,6 @@
|
||||
//!
|
||||
//! #[macro_use]
|
||||
//! extern crate stdsimd;
|
||||
//! use stdsimd::vendor;
|
||||
//! use stdsimd::simd::i32x4;
|
||||
//!
|
||||
//! fn main() {
|
||||
@@ -65,11 +64,16 @@
|
||||
//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
//! #[target_feature(enable = "sse2")]
|
||||
//! unsafe fn sum_sse2(x: i32x4) -> i32 {
|
||||
//! #[cfg(target_arch = "x86")]
|
||||
//! use stdsimd::arch::x86::*;;
|
||||
//! #[cfg(target_arch = "x86_64")]
|
||||
//! use stdsimd::arch::x86_64::*;;
|
||||
//! use std::mem;
|
||||
//! let x: vendor::__m128i = mem::transmute(x);
|
||||
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 8));
|
||||
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 4));
|
||||
//! let ret = vendor::_mm_cvtsi128_si32(x);
|
||||
//!
|
||||
//! let x: __m128i = mem::transmute(x);
|
||||
//! let x = _mm_add_epi32(x, _mm_srli_si128(x, 8));
|
||||
//! let x = _mm_add_epi32(x, _mm_srli_si128(x, 4));
|
||||
//! let ret = _mm_cvtsi128_si32(x);
|
||||
//! mem::transmute(ret)
|
||||
//! }
|
||||
//!
|
||||
@@ -97,7 +101,7 @@
|
||||
//! {
|
||||
//! // If SSE2 is not enabled at compile-time, this
|
||||
//! // detects whether SSE2 is available at run-time:
|
||||
//! if cfg_feature_enabled!("sse2") {
|
||||
//! if is_target_feature_detected!("sse2") {
|
||||
//! return unsafe { sum_sse2(x) };
|
||||
//! }
|
||||
//! }
|
||||
@@ -128,56 +132,25 @@
|
||||
//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367
|
||||
//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839
|
||||
|
||||
#![feature(const_fn, const_size_of, use_extern_macros, cfg_target_feature)]
|
||||
#![feature(const_fn, integer_atomics)]
|
||||
#![cfg_attr(target_os = "linux", feature(linkage))]
|
||||
#![no_std]
|
||||
|
||||
extern crate std as _std;
|
||||
extern crate coresimd;
|
||||
|
||||
/// Re-export run-time feature detection macros.
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "arm",
|
||||
target_arch = "aarch64", target_arch = "powerpc64"))]
|
||||
pub use coresimd::__unstable_detect_feature;
|
||||
|
||||
/// Platform dependent vendor intrinsics.
|
||||
pub mod vendor {
|
||||
#[doc(inline)]
|
||||
pub use coresimd::vendor::*;
|
||||
}
|
||||
|
||||
/// Run-time feature detection.
|
||||
#[doc(hidden)]
|
||||
pub mod __vendor_runtime {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
||||
all(target_os = "linux",
|
||||
any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "powerpc64"))))]
|
||||
pub use runtime::std::*;
|
||||
}
|
||||
|
||||
/// Platform independent SIMD vector types and operations.
|
||||
pub mod simd {
|
||||
#[doc(inline)]
|
||||
pub use coresimd::simd::*;
|
||||
}
|
||||
|
||||
/// The `stdsimd` run-time.
|
||||
extern crate libc;
|
||||
#[macro_use]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
||||
all(target_os = "linux",
|
||||
any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "powerpc64"))))]
|
||||
mod runtime;
|
||||
extern crate cfg_if;
|
||||
|
||||
/// Error gracefully in architectures without run-time detection support.
|
||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
|
||||
all(target_os = "linux",
|
||||
any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "powerpc64")))))]
|
||||
#[doc(hidden)]
|
||||
#[macro_export]
|
||||
macro_rules! cfg_feature_enabled {
|
||||
($name:tt) => (
|
||||
{
|
||||
compile_error!("cfg_target_feature! is not supported in this architecture")
|
||||
}
|
||||
)
|
||||
}
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate std;
|
||||
|
||||
#[path = "../../../stdsimd/mod.rs"]
|
||||
mod stdsimd;
|
||||
|
||||
pub use stdsimd::*;
|
||||
|
||||
pub use _std::prelude;
|
||||
pub use _std::fs;
|
||||
pub use _std::io;
|
||||
72
library/stdarch/crates/stdsimd/tests/cpu-detection.rs
Normal file
72
library/stdarch/crates/stdsimd/tests/cpu-detection.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
#![feature(cfg_target_feature)]
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(option_unwrap_used, use_debug, print_stdout))]
|
||||
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64",
|
||||
target_arch = "x86", target_arch = "x86_64",
|
||||
target_arch = "powerpc64"))]
|
||||
#[macro_use]
|
||||
extern crate stdsimd;
|
||||
|
||||
#[test]
|
||||
#[cfg(all(target_arch = "arm", target_os = "linux"))]
|
||||
fn arm_linux() {
|
||||
println!("neon: {}", is_target_feature_detected!("neon"));
|
||||
println!("pmull: {}", is_target_feature_detected!("pmull"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||
fn aarch64_linux() {
|
||||
println!("neon: {}", is_target_feature_detected!("neon"));
|
||||
println!("asimd: {}", is_target_feature_detected!("asimd"));
|
||||
println!("pmull: {}", is_target_feature_detected!("pmull"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
|
||||
fn powerpc64_linux() {
|
||||
println!("altivec: {}", is_target_feature_detected!("altivec"));
|
||||
println!("vsx: {}", is_target_feature_detected!("vsx"));
|
||||
println!("power8: {}", is_target_feature_detected!("power8"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
fn x86_all() {
|
||||
println!("sse: {:?}", is_target_feature_detected!("sse"));
|
||||
println!("sse2: {:?}", is_target_feature_detected!("sse2"));
|
||||
println!("sse3: {:?}", is_target_feature_detected!("sse3"));
|
||||
println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
|
||||
println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
|
||||
println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
|
||||
println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
|
||||
println!("avx: {:?}", is_target_feature_detected!("avx"));
|
||||
println!("avx2: {:?}", is_target_feature_detected!("avx2"));
|
||||
println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
|
||||
println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
|
||||
println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
|
||||
println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
|
||||
println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
|
||||
println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
|
||||
println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
|
||||
println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
|
||||
println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
|
||||
println!(
|
||||
"avx512_vpopcntdq {:?}",
|
||||
is_target_feature_detected!("avx512vpopcntdq")
|
||||
);
|
||||
println!("fma: {:?}", is_target_feature_detected!("fma"));
|
||||
println!("abm: {:?}", is_target_feature_detected!("abm"));
|
||||
println!("bmi: {:?}", is_target_feature_detected!("bmi"));
|
||||
println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
|
||||
println!("tbm: {:?}", is_target_feature_detected!("tbm"));
|
||||
println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
|
||||
println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
|
||||
println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
|
||||
println!("xsave: {:?}", is_target_feature_detected!("xsave"));
|
||||
println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
|
||||
println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
|
||||
println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user