Reorganize and refactor source tree (#324)
With RFC 2325 looking close to being accepted, I took a crack at reorganizing this repository to being more amenable for inclusion in libstd/libcore. My current plan is to add stdsimd as a submodule in rust-lang/rust and then use `#[path]` to include the modules directly into libstd/libcore. Before this commit, however, the source code of coresimd/stdsimd themselves were not quite ready for this. Imports wouldn't compile for one reason or another, and the organization was also different than the RFC itself! In addition to moving a lot of files around, this commit has the following major changes: * The `cfg_feature_enabled!` macro is now renamed to `is_target_feature_detected!` * The `vendor` module is now called `arch`. * Under the `arch` module is a suite of modules like `x86`, `x86_64`, etc. One per `cfg!(target_arch)`. * The `is_target_feature_detected!` macro was removed from coresimd. Unfortunately libcore has no ability to export unstable macros, so for now all feature detection is canonicalized in stdsimd. The `coresimd` and `stdsimd` crates have been updated to the planned organization in RFC 2325 as well. The runtime bits saw the largest amount of refactoring, seeing a good deal of simplification without the core/std split.
This commit is contained in:
@@ -24,7 +24,7 @@ matrix:
|
|||||||
- env: DOCUMENTATION
|
- env: DOCUMENTATION
|
||||||
install: true
|
install: true
|
||||||
script: ci/dox.sh
|
script: ci/dox.sh
|
||||||
- script: cargo test --manifest-path stdsimd-verify/Cargo.toml
|
- script: cargo test --manifest-path crates/stdsimd-verify/Cargo.toml
|
||||||
install: true
|
install: true
|
||||||
- env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
|
- env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
|
||||||
script: |
|
script: |
|
||||||
|
|||||||
@@ -1,33 +1,8 @@
|
|||||||
[package]
|
|
||||||
name = "stdsimd"
|
|
||||||
version = "0.0.3"
|
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
|
||||||
description = "SIMD support in Rust's standard library."
|
|
||||||
documentation = "https://docs.rs/stdsimd"
|
|
||||||
homepage = "https://github.com/rust-lang-nursery/stdsimd"
|
|
||||||
repository = "https://github.com/rust-lang-nursery/stdsimd"
|
|
||||||
readme = "README.md"
|
|
||||||
keywords = ["std", "simd", "intrinsics"]
|
|
||||||
categories = ["hardware-support"]
|
|
||||||
license = "MIT/Apache-2.0"
|
|
||||||
|
|
||||||
[workspace]
|
[workspace]
|
||||||
members = ["stdsimd-verify"]
|
members = [
|
||||||
|
"crates/stdsimd-verify",
|
||||||
[badges]
|
"crates/stdsimd",
|
||||||
travis-ci = { repository = "rust-lang-nursery/stdsimd" }
|
]
|
||||||
appveyor = { repository = "rust-lang-nursery/stdsimd" }
|
|
||||||
is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
|
|
||||||
is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
|
|
||||||
maintenance = { status = "experimental" }
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
coresimd = { version = "0.0.3", path = "coresimd/" }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
auxv = "0.3.3"
|
|
||||||
quickcheck = "0.6"
|
|
||||||
rand = "0.4"
|
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
debug = true
|
debug = true
|
||||||
@@ -36,10 +11,3 @@ opt-level = 3
|
|||||||
[profile.bench]
|
[profile.bench]
|
||||||
debug = 1
|
debug = 1
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
|
|
||||||
[features]
|
|
||||||
# Internal-usage only: denies all warnings.
|
|
||||||
strict = [ "coresimd/strict" ]
|
|
||||||
# Internal-usage only: enables only those intrinsics supported by Intel's
|
|
||||||
# Software Development Environment (SDE).
|
|
||||||
intel_sde = [ "coresimd/intel_sde" ]
|
|
||||||
|
|||||||
@@ -22,16 +22,18 @@ dox() {
|
|||||||
rm -rf target/doc/$arch
|
rm -rf target/doc/$arch
|
||||||
mkdir target/doc/$arch
|
mkdir target/doc/$arch
|
||||||
|
|
||||||
cargo build --target $target
|
cargo build --target $target --manifest-path crates/stdsimd/Cargo.toml
|
||||||
|
|
||||||
rustdoc --target $target \
|
rustdoc --target $target \
|
||||||
-o target/doc/$arch coresimd/src/lib.rs \
|
-o target/doc/$arch crates/coresimd/src/lib.rs \
|
||||||
--crate-name coresimd \
|
--crate-name coresimd \
|
||||||
--library-path target/$target/debug/deps
|
--library-path target/$target/debug/deps
|
||||||
rustdoc --target $target \
|
rustdoc --target $target \
|
||||||
-o target/doc/$arch src/lib.rs \
|
-o target/doc/$arch crates/stdsimd/src/lib.rs \
|
||||||
--crate-name stdsimd \
|
--crate-name stdsimd \
|
||||||
--library-path target/$target/debug/deps
|
--library-path target/$target/debug/deps \
|
||||||
|
--extern cfg_if=`ls target/$target/debug/deps/libcfg_if-*.rlib` \
|
||||||
|
--extern libc=`ls target/$target/debug/deps/liblibc-*.rlib`
|
||||||
}
|
}
|
||||||
|
|
||||||
dox i686 i686-unknown-linux-gnu
|
dox i686 i686-unknown-linux-gnu
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ echo "OBJDUMP=${OBJDUMP}"
|
|||||||
|
|
||||||
cargo_test() {
|
cargo_test() {
|
||||||
cmd="cargo test --target=$TARGET --features $FEATURES $1"
|
cmd="cargo test --target=$TARGET --features $FEATURES $1"
|
||||||
cmd="$cmd -p coresimd -p stdsimd"
|
cmd="$cmd -p coresimd -p stdsimd --manifest-path crates/stdsimd/Cargo.toml"
|
||||||
cmd="$cmd -- $2"
|
cmd="$cmd -- $2"
|
||||||
$cmd
|
$cmd
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
../LICENSE-APACHE
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
../LICENSE-MIT
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
../README.md
|
|
||||||
@@ -4,8 +4,8 @@
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
use simd_llvm::simd_add;
|
use coresimd::simd_llvm::simd_add;
|
||||||
use v128::f64x2;
|
use coresimd::v128::f64x2;
|
||||||
|
|
||||||
/// Vector add.
|
/// Vector add.
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -41,8 +41,8 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::f64x2;
|
use simd::f64x2;
|
||||||
use aarch64::neon;
|
use coresimd::aarch64::neon;
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
#[simd_test = "neon"]
|
#[simd_test = "neon"]
|
||||||
@@ -57,7 +57,7 @@ pub unsafe fn _cls_u64(x: u64) -> u64 {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use aarch64::v8;
|
use coresimd::aarch64::v8;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn _rev_u64() {
|
fn _rev_u64() {
|
||||||
@@ -3,10 +3,9 @@
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
use simd_llvm::simd_add;
|
use coresimd::simd_llvm::simd_add;
|
||||||
|
use coresimd::v64::*;
|
||||||
use v64::{f32x2, i16x4, i32x2, i8x8, u16x4, u32x2, u8x8};
|
use coresimd::v128::*;
|
||||||
use v128::{f32x4, i16x8, i32x4, i64x2, i8x16, u16x8, u32x4, u64x2, u8x16};
|
|
||||||
|
|
||||||
/// Vector add.
|
/// Vector add.
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -216,7 +215,7 @@ pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use simd::*;
|
use simd::*;
|
||||||
use arm::neon;
|
use coresimd::arm::neon;
|
||||||
|
|
||||||
#[simd_test = "neon"]
|
#[simd_test = "neon"]
|
||||||
unsafe fn vadd_s8() {
|
unsafe fn vadd_s8() {
|
||||||
@@ -25,7 +25,7 @@ pub unsafe fn _rev_u32(x: u32) -> u32 {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use arm::v6;
|
use coresimd::arm::v6;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn _rev_u16() {
|
fn _rev_u16() {
|
||||||
@@ -50,7 +50,7 @@ extern "C" {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use arm::v7;
|
use coresimd::arm::v7;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn _clz_u8() {
|
fn _clz_u8() {
|
||||||
@@ -82,8 +82,8 @@ macro_rules! define_impl {
|
|||||||
slice: &mut [$elemty],
|
slice: &mut [$elemty],
|
||||||
offset: usize,
|
offset: usize,
|
||||||
) {
|
) {
|
||||||
use core::mem::size_of;
|
use mem::size_of;
|
||||||
use core::ptr;
|
use ptr;
|
||||||
|
|
||||||
ptr::copy_nonoverlapping(
|
ptr::copy_nonoverlapping(
|
||||||
&self as *const $name as *const u8,
|
&self as *const $name as *const u8,
|
||||||
@@ -102,8 +102,8 @@ macro_rules! define_impl {
|
|||||||
slice: &[$elemty],
|
slice: &[$elemty],
|
||||||
offset: usize,
|
offset: usize,
|
||||||
) -> $name {
|
) -> $name {
|
||||||
use core::mem::size_of;
|
use mem::size_of;
|
||||||
use core::ptr;
|
use ptr;
|
||||||
|
|
||||||
let mut x = $name::splat(0 as $elemty);
|
let mut x = $name::splat(0 as $elemty);
|
||||||
ptr::copy_nonoverlapping(
|
ptr::copy_nonoverlapping(
|
||||||
@@ -152,7 +152,7 @@ macro_rules! define_from {
|
|||||||
impl From<$from> for $to {
|
impl From<$from> for $to {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn from(f: $from) -> $to {
|
fn from(f: $from) -> $to {
|
||||||
unsafe { ::core::mem::transmute(f) }
|
unsafe { ::mem::transmute(f) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)+
|
)+
|
||||||
@@ -162,7 +162,7 @@ macro_rules! define_from {
|
|||||||
macro_rules! define_common_ops {
|
macro_rules! define_common_ops {
|
||||||
($($ty:ident),+) => {
|
($($ty:ident),+) => {
|
||||||
$(
|
$(
|
||||||
impl ::core::ops::Add for $ty {
|
impl ::ops::Add for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn add(self, other: Self) -> Self {
|
fn add(self, other: Self) -> Self {
|
||||||
@@ -170,7 +170,7 @@ macro_rules! define_common_ops {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::Sub for $ty {
|
impl ::ops::Sub for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn sub(self, other: Self) -> Self {
|
fn sub(self, other: Self) -> Self {
|
||||||
@@ -178,7 +178,7 @@ macro_rules! define_common_ops {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::Mul for $ty {
|
impl ::ops::Mul for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn mul(self, other: Self) -> Self {
|
fn mul(self, other: Self) -> Self {
|
||||||
@@ -186,7 +186,7 @@ macro_rules! define_common_ops {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::Div for $ty {
|
impl ::ops::Div for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn div(self, other: Self) -> Self {
|
fn div(self, other: Self) -> Self {
|
||||||
@@ -194,7 +194,7 @@ macro_rules! define_common_ops {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::Rem for $ty {
|
impl ::ops::Rem for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn rem(self, other: Self) -> Self {
|
fn rem(self, other: Self) -> Self {
|
||||||
@@ -202,35 +202,35 @@ macro_rules! define_common_ops {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::AddAssign for $ty {
|
impl ::ops::AddAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn add_assign(&mut self, other: Self) {
|
fn add_assign(&mut self, other: Self) {
|
||||||
*self = *self + other;
|
*self = *self + other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::SubAssign for $ty {
|
impl ::ops::SubAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn sub_assign(&mut self, other: Self) {
|
fn sub_assign(&mut self, other: Self) {
|
||||||
*self = *self - other;
|
*self = *self - other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::MulAssign for $ty {
|
impl ::ops::MulAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn mul_assign(&mut self, other: Self) {
|
fn mul_assign(&mut self, other: Self) {
|
||||||
*self = *self * other;
|
*self = *self * other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::DivAssign for $ty {
|
impl ::ops::DivAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn div_assign(&mut self, other: Self) {
|
fn div_assign(&mut self, other: Self) {
|
||||||
*self = *self / other;
|
*self = *self / other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::RemAssign for $ty {
|
impl ::ops::RemAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn rem_assign(&mut self, other: Self) {
|
fn rem_assign(&mut self, other: Self) {
|
||||||
*self = *self % other;
|
*self = *self % other;
|
||||||
@@ -244,14 +244,14 @@ macro_rules! define_common_ops {
|
|||||||
macro_rules! define_shifts {
|
macro_rules! define_shifts {
|
||||||
($ty:ident, $elem:ident, $($by:ident),+) => {
|
($ty:ident, $elem:ident, $($by:ident),+) => {
|
||||||
$(
|
$(
|
||||||
impl ::core::ops::Shl<$by> for $ty {
|
impl ::ops::Shl<$by> for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn shl(self, other: $by) -> Self {
|
fn shl(self, other: $by) -> Self {
|
||||||
unsafe { simd_shl(self, $ty::splat(other as $elem)) }
|
unsafe { simd_shl(self, $ty::splat(other as $elem)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ::core::ops::Shr<$by> for $ty {
|
impl ::ops::Shr<$by> for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn shr(self, other: $by) -> Self {
|
fn shr(self, other: $by) -> Self {
|
||||||
@@ -259,13 +259,13 @@ macro_rules! define_shifts {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::ShlAssign<$by> for $ty {
|
impl ::ops::ShlAssign<$by> for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn shl_assign(&mut self, other: $by) {
|
fn shl_assign(&mut self, other: $by) {
|
||||||
*self = *self << other;
|
*self = *self << other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ::core::ops::ShrAssign<$by> for $ty {
|
impl ::ops::ShrAssign<$by> for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn shr_assign(&mut self, other: $by) {
|
fn shr_assign(&mut self, other: $by) {
|
||||||
*self = *self >> other;
|
*self = *self >> other;
|
||||||
@@ -279,7 +279,7 @@ macro_rules! define_shifts {
|
|||||||
macro_rules! define_float_ops {
|
macro_rules! define_float_ops {
|
||||||
($($ty:ident),+) => {
|
($($ty:ident),+) => {
|
||||||
$(
|
$(
|
||||||
impl ::core::ops::Neg for $ty {
|
impl ::ops::Neg for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn neg(self) -> Self {
|
fn neg(self) -> Self {
|
||||||
@@ -293,7 +293,7 @@ macro_rules! define_float_ops {
|
|||||||
macro_rules! define_signed_integer_ops {
|
macro_rules! define_signed_integer_ops {
|
||||||
($($ty:ident),+) => {
|
($($ty:ident),+) => {
|
||||||
$(
|
$(
|
||||||
impl ::core::ops::Neg for $ty {
|
impl ::ops::Neg for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn neg(self) -> Self {
|
fn neg(self) -> Self {
|
||||||
@@ -307,7 +307,7 @@ macro_rules! define_signed_integer_ops {
|
|||||||
macro_rules! define_integer_ops {
|
macro_rules! define_integer_ops {
|
||||||
($(($ty:ident, $elem:ident)),+) => {
|
($(($ty:ident, $elem:ident)),+) => {
|
||||||
$(
|
$(
|
||||||
impl ::core::ops::Not for $ty {
|
impl ::ops::Not for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn not(self) -> Self {
|
fn not(self) -> Self {
|
||||||
@@ -315,40 +315,40 @@ macro_rules! define_integer_ops {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ::core::ops::BitAnd for $ty {
|
impl ::ops::BitAnd for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bitand(self, other: Self) -> Self {
|
fn bitand(self, other: Self) -> Self {
|
||||||
unsafe { simd_and(self, other) }
|
unsafe { simd_and(self, other) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ::core::ops::BitOr for $ty {
|
impl ::ops::BitOr for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bitor(self, other: Self) -> Self {
|
fn bitor(self, other: Self) -> Self {
|
||||||
unsafe { simd_or(self, other) }
|
unsafe { simd_or(self, other) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ::core::ops::BitXor for $ty {
|
impl ::ops::BitXor for $ty {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bitxor(self, other: Self) -> Self {
|
fn bitxor(self, other: Self) -> Self {
|
||||||
unsafe { simd_xor(self, other) }
|
unsafe { simd_xor(self, other) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ::core::ops::BitAndAssign for $ty {
|
impl ::ops::BitAndAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bitand_assign(&mut self, other: Self) {
|
fn bitand_assign(&mut self, other: Self) {
|
||||||
*self = *self & other;
|
*self = *self & other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ::core::ops::BitOrAssign for $ty {
|
impl ::ops::BitOrAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bitor_assign(&mut self, other: Self) {
|
fn bitor_assign(&mut self, other: Self) {
|
||||||
*self = *self | other;
|
*self = *self | other;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl ::core::ops::BitXorAssign for $ty {
|
impl ::ops::BitXorAssign for $ty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn bitxor_assign(&mut self, other: Self) {
|
fn bitxor_assign(&mut self, other: Self) {
|
||||||
*self = *self ^ other;
|
*self = *self ^ other;
|
||||||
@@ -360,12 +360,12 @@ macro_rules! define_integer_ops {
|
|||||||
u8, u16, u32, u64, usize,
|
u8, u16, u32, u64, usize,
|
||||||
i8, i16, i32, i64, isize);
|
i8, i16, i32, i64, isize);
|
||||||
|
|
||||||
impl ::core::fmt::LowerHex for $ty {
|
impl ::fmt::LowerHex for $ty {
|
||||||
fn fmt(&self, f: &mut ::core::fmt::Formatter)
|
fn fmt(&self, f: &mut ::fmt::Formatter)
|
||||||
-> ::core::fmt::Result {
|
-> ::fmt::Result {
|
||||||
write!(f, "{}(", stringify!($ty))?;
|
write!(f, "{}(", stringify!($ty))?;
|
||||||
let n = ::core::mem::size_of_val(self)
|
let n = ::mem::size_of_val(self)
|
||||||
/ ::core::mem::size_of::<$elem>();
|
/ ::mem::size_of::<$elem>();
|
||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
write!(f, ", ")?;
|
write!(f, ", ")?;
|
||||||
@@ -384,7 +384,7 @@ macro_rules! define_casts {
|
|||||||
$(
|
$(
|
||||||
impl $fromty {
|
impl $fromty {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn $cast(self) -> ::simd::$toty {
|
pub fn $cast(self) -> ::coresimd::simd::$toty {
|
||||||
unsafe { simd_cast(self) }
|
unsafe { simd_cast(self) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
80
library/stdarch/coresimd/mod.rs
Normal file
80
library/stdarch/coresimd/mod.rs
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
/// Platform independent SIMD vector types and operations.
|
||||||
|
pub mod simd {
|
||||||
|
pub use coresimd::v128::*;
|
||||||
|
pub use coresimd::v256::*;
|
||||||
|
pub use coresimd::v512::*;
|
||||||
|
pub use coresimd::v64::*;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Platform dependent vendor intrinsics.
|
||||||
|
pub mod vendor {
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
pub use coresimd::x86::*;
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||||
|
pub use coresimd::arm::*;
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
pub use coresimd::aarch64::*;
|
||||||
|
|
||||||
|
// FIXME: rust does not expose the nvptx and nvptx64 targets yet
|
||||||
|
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
|
||||||
|
target_arch = "arm", target_arch = "aarch64")))]
|
||||||
|
pub use coresimd::nvptx::*;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
mod macros;
|
||||||
|
mod simd_llvm;
|
||||||
|
mod v128;
|
||||||
|
mod v256;
|
||||||
|
mod v512;
|
||||||
|
mod v64;
|
||||||
|
|
||||||
|
/// 32-bit wide vector tpyes
|
||||||
|
mod v32 {
|
||||||
|
#[cfg(not(test))]
|
||||||
|
use prelude::v1::*;
|
||||||
|
use coresimd::simd_llvm::*;
|
||||||
|
|
||||||
|
define_ty! { i16x2, i16, i16 }
|
||||||
|
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
|
||||||
|
define_ty! { u16x2, u16, u16 }
|
||||||
|
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
|
||||||
|
|
||||||
|
define_ty! { i8x4, i8, i8, i8, i8 }
|
||||||
|
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
|
||||||
|
define_ty! { u8x4, u8, u8, u8, u8 }
|
||||||
|
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
|
||||||
|
|
||||||
|
define_casts!(
|
||||||
|
(i16x2, i64x2, as_i64x2),
|
||||||
|
(u16x2, i64x2, as_i64x2),
|
||||||
|
(i8x4, i32x4, as_i32x4),
|
||||||
|
(u8x4, i32x4, as_i32x4)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 16-bit wide vector tpyes
|
||||||
|
mod v16 {
|
||||||
|
#[cfg(not(test))]
|
||||||
|
use prelude::v1::*;
|
||||||
|
use coresimd::simd_llvm::*;
|
||||||
|
|
||||||
|
define_ty! { i8x2, i8, i8 }
|
||||||
|
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
|
||||||
|
define_ty! { u8x2, u8, u8 }
|
||||||
|
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
|
||||||
|
|
||||||
|
define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
mod x86;
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||||
|
mod arm;
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
mod aarch64;
|
||||||
|
|
||||||
|
mod nvptx;
|
||||||
@@ -1 +0,0 @@
|
|||||||
../rustfmt.toml
|
|
||||||
@@ -1,139 +0,0 @@
|
|||||||
//! SIMD and vendor intrinsics support library.
|
|
||||||
//!
|
|
||||||
//! This documentation is only for one particular architecture, you can find
|
|
||||||
//! others at:
|
|
||||||
//!
|
|
||||||
//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
|
|
||||||
//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
|
|
||||||
//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
|
|
||||||
//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
|
|
||||||
|
|
||||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
|
||||||
#![allow(dead_code)]
|
|
||||||
#![allow(unused_features)]
|
|
||||||
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
|
|
||||||
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
|
|
||||||
integer_atomics, stmt_expr_attributes, core_intrinsics,
|
|
||||||
crate_in_paths, attr_literals, rustc_attrs)]
|
|
||||||
#![cfg_attr(test, feature(proc_macro, test, abi_vectorcall))]
|
|
||||||
#![cfg_attr(feature = "cargo-clippy",
|
|
||||||
allow(inline_always, too_many_arguments, cast_sign_loss,
|
|
||||||
cast_lossless, cast_possible_wrap,
|
|
||||||
cast_possible_truncation, cast_precision_loss,
|
|
||||||
shadow_reuse, cyclomatic_complexity, similar_names,
|
|
||||||
many_single_char_names))]
|
|
||||||
#![no_std]
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
#[macro_use]
|
|
||||||
extern crate std;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
extern crate stdsimd_test;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
extern crate test;
|
|
||||||
|
|
||||||
/// Platform independent SIMD vector types and operations.
|
|
||||||
pub mod simd {
|
|
||||||
pub use v128::*;
|
|
||||||
pub use v256::*;
|
|
||||||
pub use v512::*;
|
|
||||||
pub use v64::*;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Platform dependent vendor intrinsics.
|
|
||||||
pub mod vendor {
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
pub use x86::*;
|
|
||||||
|
|
||||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
||||||
pub use arm::*;
|
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
pub use aarch64::*;
|
|
||||||
|
|
||||||
// FIXME: rust does not expose the nvptx and nvptx64 targets yet
|
|
||||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
|
|
||||||
target_arch = "arm", target_arch = "aarch64")))]
|
|
||||||
pub use nvptx::*;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run-time feature detection.
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod __vendor_runtime {
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
|
||||||
all(target_os = "linux",
|
|
||||||
any(target_arch = "arm", target_arch = "aarch64",
|
|
||||||
target_arch = "powerpc64"))))]
|
|
||||||
pub use runtime::core::*;
|
|
||||||
|
|
||||||
// Re-exports `coresimd` run-time building blocks for usage in the
|
|
||||||
// `stdsimd` run-time.
|
|
||||||
#[cfg(all(target_os = "linux",
|
|
||||||
any(target_arch = "arm", target_arch = "aarch64",
|
|
||||||
target_arch = "powerpc64")))]
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod __runtime {
|
|
||||||
pub use runtime::*;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
|
||||||
all(target_os = "linux",
|
|
||||||
any(target_arch = "arm", target_arch = "aarch64",
|
|
||||||
target_arch = "powerpc64"))))]
|
|
||||||
#[macro_use]
|
|
||||||
mod runtime;
|
|
||||||
|
|
||||||
#[macro_use]
|
|
||||||
mod macros;
|
|
||||||
mod simd_llvm;
|
|
||||||
mod v128;
|
|
||||||
mod v256;
|
|
||||||
mod v512;
|
|
||||||
mod v64;
|
|
||||||
|
|
||||||
/// 32-bit wide vector tpyes
|
|
||||||
mod v32 {
|
|
||||||
use simd_llvm::*;
|
|
||||||
|
|
||||||
define_ty! { i16x2, i16, i16 }
|
|
||||||
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
|
|
||||||
define_ty! { u16x2, u16, u16 }
|
|
||||||
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
|
|
||||||
|
|
||||||
define_ty! { i8x4, i8, i8, i8, i8 }
|
|
||||||
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
|
|
||||||
define_ty! { u8x4, u8, u8, u8, u8 }
|
|
||||||
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
|
|
||||||
|
|
||||||
define_casts!(
|
|
||||||
(i16x2, i64x2, as_i64x2),
|
|
||||||
(u16x2, i64x2, as_i64x2),
|
|
||||||
(i8x4, i32x4, as_i32x4),
|
|
||||||
(u8x4, i32x4, as_i32x4)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// 16-bit wide vector tpyes
|
|
||||||
mod v16 {
|
|
||||||
use simd_llvm::*;
|
|
||||||
|
|
||||||
define_ty! { i8x2, i8, i8 }
|
|
||||||
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
|
|
||||||
define_ty! { u8x2, u8, u8 }
|
|
||||||
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
|
|
||||||
|
|
||||||
define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
mod x86;
|
|
||||||
|
|
||||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
|
||||||
mod arm;
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
mod aarch64;
|
|
||||||
|
|
||||||
mod nvptx;
|
|
||||||
@@ -1,47 +0,0 @@
|
|||||||
//! Run-time feature detection on ARM Aarch64.
|
|
||||||
use runtime::cache;
|
|
||||||
use runtime::arch::HasFeature;
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
#[doc(hidden)]
|
|
||||||
macro_rules! __unstable_detect_feature {
|
|
||||||
("neon", $unstable_detect_feature:path) => {
|
|
||||||
// FIXME: this should be removed once we rename Aarch64 neon to asimd
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::_Feature::asimd{})
|
|
||||||
};
|
|
||||||
("asimd", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::asimd{})
|
|
||||||
};
|
|
||||||
("pmull", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
|
|
||||||
};
|
|
||||||
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
|
|
||||||
/// for a particular feature.
|
|
||||||
///
|
|
||||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
|
||||||
#[doc(hidden)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
#[repr(u8)]
|
|
||||||
pub enum __Feature {
|
|
||||||
/// ARM Advanced SIMD (ASIMD) - Aarch64
|
|
||||||
asimd,
|
|
||||||
/// Polynomial Multiply
|
|
||||||
pmull,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
|
|
||||||
let mut value = cache::Initializer::default();
|
|
||||||
{
|
|
||||||
let mut enable_feature = |f| {
|
|
||||||
if x.has_feature(&f) {
|
|
||||||
value.set(f as u32);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
enable_feature(__Feature::asimd);
|
|
||||||
enable_feature(__Feature::pmull);
|
|
||||||
}
|
|
||||||
value
|
|
||||||
}
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
//! Run-time feature detection on ARM Aarch32.
|
|
||||||
use runtime::cache;
|
|
||||||
use runtime::arch::HasFeature;
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
#[doc(hidden)]
|
|
||||||
macro_rules! __unstable_detect_feature {
|
|
||||||
("neon", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::neon{})
|
|
||||||
};
|
|
||||||
("pmull", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
|
|
||||||
};
|
|
||||||
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
|
|
||||||
/// particular feature.
|
|
||||||
///
|
|
||||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
|
||||||
#[doc(hidden)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
#[repr(u8)]
|
|
||||||
pub enum __Feature {
|
|
||||||
/// ARM Advanced SIMD (NEON) - Aarch32
|
|
||||||
neon,
|
|
||||||
/// Polynomial Multiply
|
|
||||||
pmull,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
|
|
||||||
let mut value = cache::Initializer::default();
|
|
||||||
{
|
|
||||||
let mut enable_feature = |f| {
|
|
||||||
if x.has_feature(&f) {
|
|
||||||
value.set(f as u32);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
enable_feature(__Feature::neon);
|
|
||||||
enable_feature(__Feature::pmull);
|
|
||||||
}
|
|
||||||
value
|
|
||||||
}
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
//! Run-time feature detection for Aarch64 on Linux and `core`.
|
|
||||||
|
|
||||||
use runtime::bit;
|
|
||||||
use runtime::linux::auxv::AuxVec;
|
|
||||||
use runtime::arch::{HasFeature, __Feature};
|
|
||||||
|
|
||||||
/// Probe the ELF Auxiliary vector for hardware capabilities
|
|
||||||
///
|
|
||||||
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
|
|
||||||
///
|
|
||||||
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
|
|
||||||
impl HasFeature for AuxVec {
|
|
||||||
fn has_feature(&mut self, x: &__Feature) -> bool {
|
|
||||||
use self::__Feature::*;
|
|
||||||
match *x {
|
|
||||||
asimd => bit::test(self.hwcap, 1),
|
|
||||||
pmull => bit::test(self.hwcap, 4),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
//! Run-time feature detection for ARM32 on Linux and `core`.
|
|
||||||
|
|
||||||
use runtime::bit;
|
|
||||||
use runtime::linux::auxv::AuxVec;
|
|
||||||
use runtime::arch::{HasFeature, __Feature};
|
|
||||||
|
|
||||||
/// Probe the ELF Auxiliary vector for hardware capabilities
|
|
||||||
///
|
|
||||||
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
|
|
||||||
///
|
|
||||||
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
|
|
||||||
impl HasFeature for AuxVec {
|
|
||||||
fn has_feature(&mut self, x: &__Feature) -> bool {
|
|
||||||
use self::__Feature::*;
|
|
||||||
match *x {
|
|
||||||
neon => bit::test(self.hwcap, 12),
|
|
||||||
pmull => bit::test(self.hwcap2, 1),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
//! ELF Auxiliary Vector
|
|
||||||
//!
|
|
||||||
//! The auxiliary vector is a memory region in a running ELF program's stack
|
|
||||||
//! composed of (key: usize, value: usize) pairs.
|
|
||||||
//!
|
|
||||||
//! The keys used in the aux vector are platform dependent. For Linux, they are
|
|
||||||
//! defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given
|
|
||||||
//! CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys.
|
|
||||||
//!
|
|
||||||
//! There is no perfect way of reading the auxiliary vector.
|
|
||||||
//!
|
|
||||||
//! - `coresimd`: if `getauxval` is available, `coresimd` will try to use it.
|
|
||||||
//! - `stdsimd`: if `getauxval` is not available, it will try to read
|
|
||||||
//! `/proc/self/auxv`, and if that fails it will try to read `/proc/cpuinfo`.
|
|
||||||
//!
|
|
||||||
//! For more information about when `getauxval` is available check the great
|
|
||||||
//! [`auxv` crate documentation][auxv_docs].
|
|
||||||
//!
|
|
||||||
//! [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
|
|
||||||
//! [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
|
|
||||||
|
|
||||||
/// Key to access the CPU Hardware capabilities bitfield.
|
|
||||||
pub const AT_HWCAP: usize = 16;
|
|
||||||
/// Key to access the CPU Hardware capabilities 2 bitfield.
|
|
||||||
pub const AT_HWCAP2: usize = 26;
|
|
||||||
|
|
||||||
/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
|
|
||||||
///
|
|
||||||
/// If an entry cannot be read all the bits in the bitfield
|
|
||||||
/// are set to zero.
|
|
||||||
#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
pub struct AuxVec {
|
|
||||||
pub hwcap: usize,
|
|
||||||
pub hwcap2: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
|
|
||||||
///
|
|
||||||
/// If an entry cannot be read all the bits in the bitfield
|
|
||||||
/// are set to zero.
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
pub struct AuxVec {
|
|
||||||
pub hwcap: usize,
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
//! Run-time feature detection for ARM and PowerPC64 on Linux.
|
|
||||||
|
|
||||||
#[cfg(target_arch = "arm")]
|
|
||||||
mod arm;
|
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
mod aarch64;
|
|
||||||
|
|
||||||
#[cfg(target_arch = "powerpc64")]
|
|
||||||
mod powerpc64;
|
|
||||||
|
|
||||||
pub mod auxv;
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
//! Run-time feature detection for PowerPC64 on Linux and `core`.
|
|
||||||
|
|
||||||
use runtime::linux::auxv::AuxVec;
|
|
||||||
use runtime::arch::{HasFeature, __Feature};
|
|
||||||
|
|
||||||
/// Probe the ELF Auxiliary vector for hardware capabilities
|
|
||||||
///
|
|
||||||
/// The values are part of the platform-specific [asm/cputable.h][cputable]
|
|
||||||
///
|
|
||||||
/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
|
|
||||||
impl HasFeature for AuxVec {
|
|
||||||
fn has_feature(&mut self, x: &__Feature) -> bool {
|
|
||||||
use self::__Feature::*;
|
|
||||||
// note: the PowerPC values are the mask to do the test (instead of the
|
|
||||||
// index of the bit to test like in ARM and Aarch64)
|
|
||||||
match *x {
|
|
||||||
altivec => self.hwcap & 0x10000000 != 0,
|
|
||||||
vsx => self.hwcap & 0x00000080 != 0,
|
|
||||||
power8 => self.hwcap2 & 0x80000000 != 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
//! Run-time feature detection macros.
|
|
||||||
|
|
||||||
/// Is a feature supported by the host CPU?
|
|
||||||
///
|
|
||||||
/// This macro performs run-time feature detection in `coresimd`. It returns
|
|
||||||
/// true if the host CPU in which the binary is running on supports a
|
|
||||||
/// particular feature.
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! cfg_feature_enabled {
|
|
||||||
($name:tt) => (
|
|
||||||
{
|
|
||||||
#[cfg(target_feature = $name)]
|
|
||||||
{
|
|
||||||
true
|
|
||||||
}
|
|
||||||
#[cfg(not(target_feature = $name))]
|
|
||||||
{
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
{
|
|
||||||
__unstable_detect_feature!($name,
|
|
||||||
$crate::__vendor_runtime::__unstable_detect_feature)
|
|
||||||
}
|
|
||||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
|
|
||||||
{
|
|
||||||
compile_error!("cfg_target_feature! is not supported in this architecture")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
}
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
//! Run-time feature detection
|
|
||||||
pub mod cache;
|
|
||||||
pub mod bit;
|
|
||||||
|
|
||||||
#[macro_use]
|
|
||||||
pub mod macros;
|
|
||||||
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
#[macro_use]
|
|
||||||
pub mod x86;
|
|
||||||
|
|
||||||
#[cfg(target_arch = "arm")]
|
|
||||||
#[macro_use]
|
|
||||||
pub mod arm;
|
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
#[macro_use]
|
|
||||||
pub mod aarch64;
|
|
||||||
|
|
||||||
#[cfg(target_arch = "powerpc64")]
|
|
||||||
#[macro_use]
|
|
||||||
pub mod powerpc64;
|
|
||||||
|
|
||||||
#[cfg(all(target_os = "linux",
|
|
||||||
any(target_arch = "arm", target_arch = "aarch64",
|
|
||||||
target_arch = "powerpc64")))]
|
|
||||||
pub mod linux;
|
|
||||||
|
|
||||||
/// Exports architecture specific functionality for
|
|
||||||
/// reuse in `stdsimd`.
|
|
||||||
pub mod arch {
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
pub use super::x86::{detect_features, __Feature};
|
|
||||||
|
|
||||||
#[cfg(target_arch = "arm")]
|
|
||||||
pub use runtime::arm::{detect_features, __Feature};
|
|
||||||
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
pub use runtime::aarch64::{detect_features, __Feature};
|
|
||||||
|
|
||||||
#[cfg(target_arch = "powerpc64")]
|
|
||||||
pub use runtime::powerpc64::{detect_features, __Feature};
|
|
||||||
|
|
||||||
/// Interface for querying whether a feature is enabled.
|
|
||||||
pub trait HasFeature {
|
|
||||||
/// Is the feature `x` enabled at run-time?
|
|
||||||
fn has_feature(&mut self, x: &__Feature) -> bool;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run-time feature detection exposed by `coresimd`.
|
|
||||||
pub mod core {
|
|
||||||
pub use super::arch::__Feature;
|
|
||||||
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
pub use super::arch::detect_features;
|
|
||||||
|
|
||||||
/// Performs run-time feature detection.
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub fn __unstable_detect_feature(x: __Feature) -> bool {
|
|
||||||
super::cache::test(x as u32, detect_features)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
//! Run-time feature detection on PowerPC64.
|
|
||||||
use runtime::cache;
|
|
||||||
use runtime::arch::HasFeature;
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
#[doc(hidden)]
|
|
||||||
macro_rules! __unstable_detect_feature {
|
|
||||||
("altivec", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::altivec{})
|
|
||||||
};
|
|
||||||
("vsx", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::vsx{})
|
|
||||||
};
|
|
||||||
("power8", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature($crate::__vendor_runtime::__Feature::power8{})
|
|
||||||
};
|
|
||||||
($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
|
|
||||||
}
|
|
||||||
|
|
||||||
/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
|
|
||||||
/// for a particular feature.
|
|
||||||
///
|
|
||||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
|
||||||
#[doc(hidden)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
#[repr(u8)]
|
|
||||||
pub enum __Feature {
|
|
||||||
/// Altivec
|
|
||||||
altivec,
|
|
||||||
/// VSX
|
|
||||||
vsx,
|
|
||||||
/// Power8
|
|
||||||
power8,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
|
|
||||||
let mut value = cache::Initializer::default();
|
|
||||||
{
|
|
||||||
let mut enable_feature = |f| {
|
|
||||||
if x.has_feature(&f) {
|
|
||||||
value.set(f as u32);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
enable_feature(__Feature::altivec);
|
|
||||||
enable_feature(__Feature::vsx);
|
|
||||||
enable_feature(__Feature::power8);
|
|
||||||
}
|
|
||||||
value
|
|
||||||
}
|
|
||||||
@@ -1,554 +0,0 @@
|
|||||||
//! This module implements minimal run-time feature detection for x86.
|
|
||||||
//!
|
|
||||||
//! The features are detected using the `detect_features` function below.
|
|
||||||
//! This function uses the CPUID instruction to read the feature flags from the
|
|
||||||
//! CPU and encodes them in an `usize` where each bit position represents
|
|
||||||
//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
|
|
||||||
//!
|
|
||||||
//! The enum `__Feature` is used to map bit positions to feature names, and the
|
|
||||||
//! the `__unstable_detect_feature!` macro is used to map string literals (e.g.
|
|
||||||
//! "avx") to these bit positions (e.g. `__Feature::avx`).
|
|
||||||
//!
|
|
||||||
//!
|
|
||||||
//! The run-time feature detection is performed by the
|
|
||||||
//! `__unstable_detect_feature(__Feature) -> bool` function. On its first call,
|
|
||||||
//! this functions queries the CPU for the available features and stores them
|
|
||||||
//! in a global `AtomicUsize` variable. The query is performed by just checking
|
|
||||||
//! whether the feature bit in this global variable is set or cleared.
|
|
||||||
|
|
||||||
use core::mem;
|
|
||||||
|
|
||||||
use super::{bit, cache};
|
|
||||||
|
|
||||||
/// This macro maps the string-literal feature names to values of the
|
|
||||||
/// `__Feature` enum at compile-time. The feature names used are the same as
|
|
||||||
/// those of rustc `target_feature` and `cfg_target_feature` features.
|
|
||||||
///
|
|
||||||
/// PLESE: do not use this, it is an implementation detail subjected to change.
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
#[macro_export]
|
|
||||||
#[doc(hidden)]
|
|
||||||
macro_rules! __unstable_detect_feature {
|
|
||||||
("aes", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::aes{}) };
|
|
||||||
("tsc", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::tsc{}) };
|
|
||||||
("mmx", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::mmx{}) };
|
|
||||||
("sse", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::sse{}) };
|
|
||||||
("sse2", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::sse2{})
|
|
||||||
};
|
|
||||||
("sse3", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::sse3{})
|
|
||||||
};
|
|
||||||
("ssse3", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::ssse3{})
|
|
||||||
};
|
|
||||||
("sse4.1", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::sse4_1{})
|
|
||||||
};
|
|
||||||
("sse4.2", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::sse4_2{})
|
|
||||||
};
|
|
||||||
("sse4a", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::sse4a{})
|
|
||||||
};
|
|
||||||
("avx", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx{})
|
|
||||||
};
|
|
||||||
("avx2", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx2{})
|
|
||||||
};
|
|
||||||
("avx512f", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512f{})
|
|
||||||
};
|
|
||||||
("avx512cd", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512cd{})
|
|
||||||
};
|
|
||||||
("avx512er", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512er{})
|
|
||||||
};
|
|
||||||
("avx512pf", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512pf{})
|
|
||||||
};
|
|
||||||
("avx512bw", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512bw{})
|
|
||||||
};
|
|
||||||
("avx512dq", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512dq{})
|
|
||||||
};
|
|
||||||
("avx512vl", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512vl{})
|
|
||||||
};
|
|
||||||
("avx512ifma", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512_ifma{})
|
|
||||||
};
|
|
||||||
("avx512vbmi", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512_vbmi{})
|
|
||||||
};
|
|
||||||
("avx512vpopcntdq", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::avx512_vpopcntdq{})
|
|
||||||
};
|
|
||||||
("fma", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::fma{})
|
|
||||||
};
|
|
||||||
("bmi", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::bmi{})
|
|
||||||
};
|
|
||||||
("bmi2", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::bmi2{})
|
|
||||||
};
|
|
||||||
("abm", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::abm{})
|
|
||||||
};
|
|
||||||
("lzcnt", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::abm{})
|
|
||||||
};
|
|
||||||
("tbm", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::tbm{})
|
|
||||||
};
|
|
||||||
("popcnt", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::popcnt{})
|
|
||||||
};
|
|
||||||
("fxsr", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::fxsr{})
|
|
||||||
};
|
|
||||||
("xsave", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::xsave{})
|
|
||||||
};
|
|
||||||
("xsaveopt", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::xsaveopt{})
|
|
||||||
};
|
|
||||||
("xsaves", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::xsaves{})
|
|
||||||
};
|
|
||||||
("xsavec", $unstable_detect_feature:path) => {
|
|
||||||
$unstable_detect_feature(
|
|
||||||
$crate::__vendor_runtime::__Feature::xsavec{})
|
|
||||||
};
|
|
||||||
($t:tt, $unstable_detect_feature:path) => {
|
|
||||||
compile_error!(concat!("unknown target feature: ", $t))
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
|
|
||||||
/// particular feature.
|
|
||||||
///
|
|
||||||
/// PLEASE: do not use this, it is an implementation detail subject to change.
|
|
||||||
#[doc(hidden)]
|
|
||||||
#[allow(non_camel_case_types)]
|
|
||||||
#[repr(u8)]
|
|
||||||
pub enum __Feature {
|
|
||||||
/// AES (Advanced Encryption Standard New Instructions AES-NI)
|
|
||||||
aes,
|
|
||||||
/// TSC (Time Stamp Counter)
|
|
||||||
tsc,
|
|
||||||
/// MMX
|
|
||||||
mmx,
|
|
||||||
/// SSE (Streaming SIMD Extensions)
|
|
||||||
sse,
|
|
||||||
/// SSE2 (Streaming SIMD Extensions 2)
|
|
||||||
sse2,
|
|
||||||
/// SSE3 (Streaming SIMD Extensions 3)
|
|
||||||
sse3,
|
|
||||||
/// SSSE3 (Supplemental Streaming SIMD Extensions 3)
|
|
||||||
ssse3,
|
|
||||||
/// SSE4.1 (Streaming SIMD Extensions 4.1)
|
|
||||||
sse4_1,
|
|
||||||
/// SSE4.2 (Streaming SIMD Extensions 4.2)
|
|
||||||
sse4_2,
|
|
||||||
/// SSE4a (Streaming SIMD Extensions 4a)
|
|
||||||
sse4a,
|
|
||||||
/// AVX (Advanced Vector Extensions)
|
|
||||||
avx,
|
|
||||||
/// AVX2 (Advanced Vector Extensions 2)
|
|
||||||
avx2,
|
|
||||||
/// AVX-512 F (Foundation)
|
|
||||||
avx512f,
|
|
||||||
/// AVX-512 CD (Conflict Detection Instructions)
|
|
||||||
avx512cd,
|
|
||||||
/// AVX-512 ER (Exponential and Reciprocal Instructions)
|
|
||||||
avx512er,
|
|
||||||
/// AVX-512 PF (Prefetch Instructions)
|
|
||||||
avx512pf,
|
|
||||||
/// AVX-512 BW (Byte and Word Instructions)
|
|
||||||
avx512bw,
|
|
||||||
/// AVX-512 DQ (Doubleword and Quadword)
|
|
||||||
avx512dq,
|
|
||||||
/// AVX-512 VL (Vector Length Extensions)
|
|
||||||
avx512vl,
|
|
||||||
/// AVX-512 IFMA (Integer Fused Multiply Add)
|
|
||||||
avx512_ifma,
|
|
||||||
/// AVX-512 VBMI (Vector Byte Manipulation Instructions)
|
|
||||||
avx512_vbmi,
|
|
||||||
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
|
|
||||||
/// Quadword)
|
|
||||||
avx512_vpopcntdq,
|
|
||||||
/// FMA (Fused Multiply Add)
|
|
||||||
fma,
|
|
||||||
/// BMI1 (Bit Manipulation Instructions 1)
|
|
||||||
bmi,
|
|
||||||
/// BMI1 (Bit Manipulation Instructions 2)
|
|
||||||
bmi2,
|
|
||||||
/// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
|
|
||||||
/// Count) on Intel
|
|
||||||
abm,
|
|
||||||
/// TBM (Trailing Bit Manipulation)
|
|
||||||
tbm,
|
|
||||||
/// POPCNT (Population Count)
|
|
||||||
popcnt,
|
|
||||||
/// FXSR (Floating-point context fast save and restor)
|
|
||||||
fxsr,
|
|
||||||
/// XSAVE (Save Processor Extended States)
|
|
||||||
xsave,
|
|
||||||
/// XSAVEOPT (Save Processor Extended States Optimized)
|
|
||||||
xsaveopt,
|
|
||||||
/// XSAVES (Save Processor Extended States Supervisor)
|
|
||||||
xsaves,
|
|
||||||
/// XSAVEC (Save Processor Extended States Compacted)
|
|
||||||
xsavec,
|
|
||||||
#[doc(hidden)]
|
|
||||||
__NonExhaustive,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run-time feature detection on x86 works by using the CPUID instruction.
|
|
||||||
///
|
|
||||||
/// The [CPUID Wikipedia page][wiki_cpuid] contains
|
|
||||||
/// all the information about which flags to set to query which values, and in
|
|
||||||
/// which registers these are reported.
|
|
||||||
///
|
|
||||||
/// The definitive references are:
|
|
||||||
/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
|
|
||||||
/// Instruction Set Reference, A-Z][intel64_ref].
|
|
||||||
/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
|
|
||||||
/// System Instructions][amd64_ref].
|
|
||||||
///
|
|
||||||
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
|
|
||||||
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
|
||||||
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
|
||||||
pub fn detect_features() -> cache::Initializer {
|
|
||||||
use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
|
|
||||||
use vendor::_xgetbv;
|
|
||||||
let mut value = cache::Initializer::default();
|
|
||||||
|
|
||||||
// If the x86 CPU does not support the CPUID instruction then it is too
|
|
||||||
// old to support any of the currently-detectable features.
|
|
||||||
if !has_cpuid() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
|
|
||||||
// has `cpuid` support.
|
|
||||||
|
|
||||||
// 0. EAX = 0: Basic Information:
|
|
||||||
// - EAX returns the "Highest Function Parameter", that is, the maximum
|
|
||||||
// leaf value for subsequent calls of `cpuinfo` in range [0,
|
|
||||||
// 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
|
|
||||||
// returned in EBX, EDX, and ECX (in that order):
|
|
||||||
let (max_basic_leaf, vendor_id) = unsafe {
|
|
||||||
let CpuidResult {
|
|
||||||
eax: max_basic_leaf,
|
|
||||||
ebx,
|
|
||||||
ecx,
|
|
||||||
edx,
|
|
||||||
} = __cpuid(0);
|
|
||||||
let vendor_id: [[u8; 4]; 3] = [
|
|
||||||
mem::transmute(ebx),
|
|
||||||
mem::transmute(edx),
|
|
||||||
mem::transmute(ecx),
|
|
||||||
];
|
|
||||||
let vendor_id: [u8; 12] = mem::transmute(vendor_id);
|
|
||||||
(max_basic_leaf, vendor_id)
|
|
||||||
};
|
|
||||||
|
|
||||||
if max_basic_leaf < 1 {
|
|
||||||
// Earlier Intel 486, CPUID not implemented
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
// EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
|
|
||||||
// Contains information about most x86 features.
|
|
||||||
let CpuidResult {
|
|
||||||
ecx: proc_info_ecx,
|
|
||||||
edx: proc_info_edx,
|
|
||||||
..
|
|
||||||
} = unsafe { __cpuid(0x0000_0001_u32) };
|
|
||||||
|
|
||||||
// EAX = 7, ECX = 0: Queries "Extended Features";
|
|
||||||
// Contains information about bmi,bmi2, and avx2 support.
|
|
||||||
let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7
|
|
||||||
{
|
|
||||||
let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
|
|
||||||
(ebx, ecx)
|
|
||||||
} else {
|
|
||||||
(0, 0) // CPUID does not support "Extended Features"
|
|
||||||
};
|
|
||||||
|
|
||||||
// EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
|
|
||||||
// - EAX returns the max leaf value for extended information, that is,
|
|
||||||
// `cpuid` calls in range [0x8000_0000; u32::MAX]:
|
|
||||||
let CpuidResult {
|
|
||||||
eax: extended_max_basic_leaf,
|
|
||||||
..
|
|
||||||
} = unsafe { __cpuid(0x8000_0000_u32) };
|
|
||||||
|
|
||||||
// EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
|
|
||||||
// Bits"
|
|
||||||
let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
|
|
||||||
let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
|
|
||||||
ecx
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
{
|
|
||||||
// borrows value till the end of this scope:
|
|
||||||
let mut enable = |r, rb, f| {
|
|
||||||
if bit::test(r as usize, rb) {
|
|
||||||
value.set(f as u32);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
enable(proc_info_ecx, 0, __Feature::sse3);
|
|
||||||
enable(proc_info_ecx, 9, __Feature::ssse3);
|
|
||||||
enable(proc_info_ecx, 12, __Feature::fma);
|
|
||||||
enable(proc_info_ecx, 19, __Feature::sse4_1);
|
|
||||||
enable(proc_info_ecx, 20, __Feature::sse4_2);
|
|
||||||
enable(proc_info_ecx, 23, __Feature::popcnt);
|
|
||||||
enable(proc_info_ecx, 25, __Feature::aes);
|
|
||||||
enable(proc_info_edx, 4, __Feature::tsc);
|
|
||||||
enable(proc_info_edx, 23, __Feature::mmx);
|
|
||||||
enable(proc_info_edx, 24, __Feature::fxsr);
|
|
||||||
enable(proc_info_edx, 25, __Feature::sse);
|
|
||||||
enable(proc_info_edx, 26, __Feature::sse2);
|
|
||||||
|
|
||||||
enable(extended_features_ebx, 3, __Feature::bmi);
|
|
||||||
enable(extended_features_ebx, 8, __Feature::bmi2);
|
|
||||||
|
|
||||||
// `XSAVE` and `AVX` support:
|
|
||||||
let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
|
|
||||||
if cpu_xsave {
|
|
||||||
// 0. Here the CPU supports `XSAVE`.
|
|
||||||
|
|
||||||
// 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
|
|
||||||
// supports saving the state of the AVX/AVX2 vector registers on
|
|
||||||
// context-switches, see:
|
|
||||||
//
|
|
||||||
// - [intel: is avx enabled?][is_avx_enabled],
|
|
||||||
// - [mozilla: sse.cpp][mozilla_sse_cpp].
|
|
||||||
//
|
|
||||||
// [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
|
|
||||||
// [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
|
|
||||||
let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
|
|
||||||
|
|
||||||
// 2. The OS must have signaled the CPU that it supports saving and
|
|
||||||
// restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
|
|
||||||
// `XCR0.AVX[2]` to `1`.
|
|
||||||
//
|
|
||||||
// This is safe because the CPU supports `xsave`
|
|
||||||
let xcr0 = unsafe { _xgetbv(0) };
|
|
||||||
let os_avx_support = xcr0 & 6 == 6;
|
|
||||||
let os_avx512_support = xcr0 & 224 == 224;
|
|
||||||
|
|
||||||
// Only if the OS and the CPU support saving/restoring the AVX
|
|
||||||
// registers we enable `xsave` support:
|
|
||||||
if cpu_osxsave && os_avx_support {
|
|
||||||
// See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
|
|
||||||
// FEATURES" in the "Intel® 64 and IA-32 Architectures Software
|
|
||||||
// Developer’s Manual, Volume 1: Basic Architecture":
|
|
||||||
//
|
|
||||||
// "Software enables the XSAVE feature set by setting
|
|
||||||
// CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
|
|
||||||
// instruction). If this bit is 0, execution of any of XGETBV,
|
|
||||||
// XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
|
|
||||||
// causes an invalid-opcode exception (#UD)"
|
|
||||||
//
|
|
||||||
enable(proc_info_ecx, 26, __Feature::xsave);
|
|
||||||
|
|
||||||
// For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
|
|
||||||
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
|
|
||||||
// ECX = 1):
|
|
||||||
if max_basic_leaf >= 0xd {
|
|
||||||
let CpuidResult {
|
|
||||||
eax: proc_extended_state1_eax,
|
|
||||||
..
|
|
||||||
} = unsafe { __cpuid_count(0xd_u32, 1) };
|
|
||||||
enable(proc_extended_state1_eax, 0, __Feature::xsaveopt);
|
|
||||||
enable(proc_extended_state1_eax, 1, __Feature::xsavec);
|
|
||||||
enable(proc_extended_state1_eax, 3, __Feature::xsaves);
|
|
||||||
}
|
|
||||||
|
|
||||||
// And AVX/AVX2:
|
|
||||||
enable(proc_info_ecx, 28, __Feature::avx);
|
|
||||||
enable(extended_features_ebx, 5, __Feature::avx2);
|
|
||||||
|
|
||||||
// For AVX-512 the OS also needs to support saving/restoring
|
|
||||||
// the extended state, only then we enable AVX-512 support:
|
|
||||||
if os_avx512_support {
|
|
||||||
enable(extended_features_ebx, 16, __Feature::avx512f);
|
|
||||||
enable(extended_features_ebx, 17, __Feature::avx512dq);
|
|
||||||
enable(extended_features_ebx, 21, __Feature::avx512_ifma);
|
|
||||||
enable(extended_features_ebx, 26, __Feature::avx512pf);
|
|
||||||
enable(extended_features_ebx, 27, __Feature::avx512er);
|
|
||||||
enable(extended_features_ebx, 28, __Feature::avx512cd);
|
|
||||||
enable(extended_features_ebx, 30, __Feature::avx512bw);
|
|
||||||
enable(extended_features_ebx, 31, __Feature::avx512vl);
|
|
||||||
enable(extended_features_ecx, 1, __Feature::avx512_vbmi);
|
|
||||||
enable(
|
|
||||||
extended_features_ecx,
|
|
||||||
14,
|
|
||||||
__Feature::avx512_vpopcntdq,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
|
|
||||||
// On intel CPUs with popcnt, lzcnt implements the
|
|
||||||
// "missing part" of ABM, so we map both to the same
|
|
||||||
// internal feature.
|
|
||||||
//
|
|
||||||
// The `cfg_feature_enabled!("lzcnt")` macro then
|
|
||||||
// internally maps to __Feature::abm.
|
|
||||||
enable(extended_proc_info_ecx, 5, __Feature::abm);
|
|
||||||
if vendor_id == *b"AuthenticAMD" {
|
|
||||||
// These features are only available on AMD CPUs:
|
|
||||||
enable(extended_proc_info_ecx, 6, __Feature::sse4a);
|
|
||||||
enable(extended_proc_info_ecx, 21, __Feature::tbm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
value
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
extern crate cupid;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn dump() {
|
|
||||||
println!("aes: {:?}", cfg_feature_enabled!("aes"));
|
|
||||||
println!("tsc: {:?}", cfg_feature_enabled!("tsc"));
|
|
||||||
println!("sse: {:?}", cfg_feature_enabled!("sse"));
|
|
||||||
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
|
|
||||||
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
|
|
||||||
println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
|
|
||||||
println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
|
|
||||||
println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
|
|
||||||
println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
|
|
||||||
println!("avx: {:?}", cfg_feature_enabled!("avx"));
|
|
||||||
println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
|
|
||||||
println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
|
|
||||||
println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
|
|
||||||
println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
|
|
||||||
println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
|
|
||||||
println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
|
|
||||||
println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
|
|
||||||
println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
|
|
||||||
println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
|
|
||||||
println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
|
|
||||||
println!(
|
|
||||||
"avx512_vpopcntdq {:?}",
|
|
||||||
cfg_feature_enabled!("avx512vpopcntdq")
|
|
||||||
);
|
|
||||||
println!("fma: {:?}", cfg_feature_enabled!("fma"));
|
|
||||||
println!("abm: {:?}", cfg_feature_enabled!("abm"));
|
|
||||||
println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
|
|
||||||
println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
|
|
||||||
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
|
|
||||||
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
|
|
||||||
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
|
|
||||||
println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
|
|
||||||
println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
|
|
||||||
println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
|
|
||||||
println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
|
|
||||||
println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn compare_with_cupid() {
|
|
||||||
let information = cupid::master().unwrap();
|
|
||||||
assert_eq!(cfg_feature_enabled!("aes"), information.aesni());
|
|
||||||
assert_eq!(cfg_feature_enabled!("tsc"), information.tsc());
|
|
||||||
assert_eq!(cfg_feature_enabled!("sse"), information.sse());
|
|
||||||
assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
|
|
||||||
assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());
|
|
||||||
assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3());
|
|
||||||
assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1());
|
|
||||||
assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2());
|
|
||||||
assert_eq!(cfg_feature_enabled!("sse4a"), information.sse4a());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx"), information.avx());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx2"), information.avx2());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx512f"), information.avx512f());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx512cd"), information.avx512cd());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx512er"), information.avx512er());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx512pf"), information.avx512pf());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq());
|
|
||||||
assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl());
|
|
||||||
assert_eq!(
|
|
||||||
cfg_feature_enabled!("avx512ifma"),
|
|
||||||
information.avx512_ifma()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
cfg_feature_enabled!("avx512vbmi"),
|
|
||||||
information.avx512_vbmi()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
cfg_feature_enabled!("avx512vpopcntdq"),
|
|
||||||
information.avx512_vpopcntdq()
|
|
||||||
);
|
|
||||||
assert_eq!(cfg_feature_enabled!("fma"), information.fma());
|
|
||||||
assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1());
|
|
||||||
assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2());
|
|
||||||
assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt());
|
|
||||||
assert_eq!(cfg_feature_enabled!("abm"), information.lzcnt());
|
|
||||||
assert_eq!(cfg_feature_enabled!("tbm"), information.tbm());
|
|
||||||
assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt());
|
|
||||||
assert_eq!(cfg_feature_enabled!("xsave"), information.xsave());
|
|
||||||
assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt());
|
|
||||||
assert_eq!(
|
|
||||||
cfg_feature_enabled!("xsavec"),
|
|
||||||
information.xsavec_and_xrstor()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
cfg_feature_enabled!("xsaves"),
|
|
||||||
information.xsaves_xrstors_and_ia32_xss()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,47 +0,0 @@
|
|||||||
#![feature(cfg_target_feature)]
|
|
||||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
|
||||||
#![cfg_attr(feature = "cargo-clippy",
|
|
||||||
allow(option_unwrap_used, print_stdout, use_debug))]
|
|
||||||
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
#[macro_use]
|
|
||||||
extern crate coresimd;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
fn x86_all() {
|
|
||||||
println!("sse: {:?}", cfg_feature_enabled!("sse"));
|
|
||||||
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
|
|
||||||
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
|
|
||||||
println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
|
|
||||||
println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
|
|
||||||
println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
|
|
||||||
println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
|
|
||||||
println!("avx: {:?}", cfg_feature_enabled!("avx"));
|
|
||||||
println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
|
|
||||||
println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
|
|
||||||
println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
|
|
||||||
println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
|
|
||||||
println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
|
|
||||||
println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
|
|
||||||
println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
|
|
||||||
println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
|
|
||||||
println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
|
|
||||||
println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
|
|
||||||
println!(
|
|
||||||
"avx512_vpopcntdq {:?}",
|
|
||||||
cfg_feature_enabled!("avx512vpopcntdq")
|
|
||||||
);
|
|
||||||
println!("fma: {:?}", cfg_feature_enabled!("fma"));
|
|
||||||
println!("abm: {:?}", cfg_feature_enabled!("abm"));
|
|
||||||
println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
|
|
||||||
println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
|
|
||||||
println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
|
|
||||||
println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
|
|
||||||
println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
|
|
||||||
println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
|
|
||||||
println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
|
|
||||||
println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
|
|
||||||
println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
|
|
||||||
println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
//! 128-bit wide vector types
|
//! 128-bit wide vector types
|
||||||
|
|
||||||
use simd_llvm::*;
|
use prelude::v1::*;
|
||||||
|
|
||||||
|
use coresimd::simd_llvm::*;
|
||||||
|
|
||||||
define_ty! { f64x2, f64, f64 }
|
define_ty! { f64x2, f64, f64 }
|
||||||
define_impl! { f64x2, f64, 2, i64x2, x0, x1 }
|
define_impl! { f64x2, f64, 2, i64x2, x0, x1 }
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
//! 256-bit wide vector types
|
//! 256-bit wide vector types
|
||||||
|
|
||||||
use simd_llvm::*;
|
use prelude::v1::*;
|
||||||
|
|
||||||
|
use coresimd::simd_llvm::*;
|
||||||
|
|
||||||
define_ty! { f64x4, f64, f64, f64, f64 }
|
define_ty! { f64x4, f64, f64, f64, f64 }
|
||||||
define_impl! { f64x4, f64, 4, i64x4, x0, x1, x2, x3 }
|
define_impl! { f64x4, f64, 4, i64x4, x0, x1, x2, x3 }
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
//! 512-bit wide vector types
|
//! 512-bit wide vector types
|
||||||
|
|
||||||
use simd_llvm::*;
|
use prelude::v1::*;
|
||||||
|
|
||||||
|
use coresimd::simd_llvm::*;
|
||||||
|
|
||||||
define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }
|
define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }
|
||||||
define_impl! { f64x8, f64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }
|
define_impl! { f64x8, f64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
//! 64-bit wide vector types
|
//! 64-bit wide vector types
|
||||||
|
|
||||||
use simd_llvm::*;
|
use prelude::v1::*;
|
||||||
|
|
||||||
|
use coresimd::simd_llvm::*;
|
||||||
|
|
||||||
define_ty_doc! {
|
define_ty_doc! {
|
||||||
f32x2, f32, f32 |
|
f32x2, f32, f32 |
|
||||||
@@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::i386::*;
|
use coresimd::x86::i386::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_eflags() {
|
fn test_eflags() {
|
||||||
@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::i386::fxsr;
|
use coresimd::x86::i386::fxsr;
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
@@ -54,7 +54,7 @@ extern "C" {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use x86::i386::rdtsc;
|
use coresimd::x86::i386::rdtsc;
|
||||||
|
|
||||||
#[simd_test = "sse2"]
|
#[simd_test = "sse2"]
|
||||||
unsafe fn _rdtsc() {
|
unsafe fn _rdtsc() {
|
||||||
@@ -42,7 +42,7 @@ pub unsafe fn _popcnt32(x: i32) -> i32 {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::i586::abm;
|
use coresimd::x86::i586::abm;
|
||||||
|
|
||||||
#[simd_test = "lzcnt"]
|
#[simd_test = "lzcnt"]
|
||||||
unsafe fn _lzcnt_u32() {
|
unsafe fn _lzcnt_u32() {
|
||||||
@@ -13,17 +13,17 @@
|
|||||||
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||||
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::*;
|
||||||
use core::ptr;
|
use coresimd::v128::*;
|
||||||
|
use coresimd::v256::*;
|
||||||
|
use coresimd::x86::*;
|
||||||
|
use intrinsics;
|
||||||
|
use mem;
|
||||||
|
use ptr;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
use simd_llvm::*;
|
|
||||||
use v128::*;
|
|
||||||
use v256::*;
|
|
||||||
use x86::*;
|
|
||||||
|
|
||||||
/// Add packed double-precision (64-bit) floating-point elements
|
/// Add packed double-precision (64-bit) floating-point elements
|
||||||
/// in `a` and `b`.
|
/// in `a` and `b`.
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -1547,7 +1547,7 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
|
|||||||
#[target_feature(enable = "avx")]
|
#[target_feature(enable = "avx")]
|
||||||
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
|
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
|
||||||
pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
|
pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
|
||||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Moves double-precision values from a 256-bit vector of [4 x double]
|
/// Moves double-precision values from a 256-bit vector of [4 x double]
|
||||||
@@ -1557,7 +1557,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
|
|||||||
#[target_feature(enable = "avx")]
|
#[target_feature(enable = "avx")]
|
||||||
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
|
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
|
||||||
pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
|
pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
|
||||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Moves single-precision floating point values from a 256-bit vector
|
/// Moves single-precision floating point values from a 256-bit vector
|
||||||
@@ -1568,7 +1568,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
|
|||||||
#[target_feature(enable = "avx")]
|
#[target_feature(enable = "avx")]
|
||||||
#[cfg_attr(test, assert_instr(vmovntps))]
|
#[cfg_attr(test, assert_instr(vmovntps))]
|
||||||
pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) {
|
pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) {
|
||||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the approximate reciprocal of packed single-precision (32-bit)
|
/// Compute the approximate reciprocal of packed single-precision (32-bit)
|
||||||
@@ -2366,7 +2366,6 @@ pub unsafe fn _mm256_loadu2_m128d(
|
|||||||
pub unsafe fn _mm256_loadu2_m128i(
|
pub unsafe fn _mm256_loadu2_m128i(
|
||||||
hiaddr: *const __m128i, loaddr: *const __m128i
|
hiaddr: *const __m128i, loaddr: *const __m128i
|
||||||
) -> __m256i {
|
) -> __m256i {
|
||||||
use x86::i586::sse2::_mm_loadu_si128;
|
|
||||||
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
|
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
|
||||||
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
|
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
|
||||||
}
|
}
|
||||||
@@ -2412,7 +2411,6 @@ pub unsafe fn _mm256_storeu2_m128d(
|
|||||||
pub unsafe fn _mm256_storeu2_m128i(
|
pub unsafe fn _mm256_storeu2_m128i(
|
||||||
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
|
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
|
||||||
) {
|
) {
|
||||||
use x86::i586::sse2::_mm_storeu_si128;
|
|
||||||
let lo = _mm256_castsi256_si128(a);
|
let lo = _mm256_castsi256_si128(a);
|
||||||
_mm_storeu_si128(loaddr, lo);
|
_mm_storeu_si128(loaddr, lo);
|
||||||
let hi = _mm256_extractf128_si256(a, 1);
|
let hi = _mm256_extractf128_si256(a, 1);
|
||||||
@@ -2579,7 +2577,7 @@ mod tests {
|
|||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use test::black_box; // Used to inhibit constant-folding.
|
use test::black_box; // Used to inhibit constant-folding.
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "avx"]
|
#[simd_test = "avx"]
|
||||||
unsafe fn test_mm256_add_pd() {
|
unsafe fn test_mm256_add_pd() {
|
||||||
@@ -18,14 +18,13 @@
|
|||||||
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||||
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
|
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::*;
|
||||||
|
use coresimd::v256::*;
|
||||||
use simd_llvm::*;
|
use coresimd::v128::*;
|
||||||
use v256::*;
|
use coresimd::v64::*;
|
||||||
use v128::*;
|
use coresimd::v32::*;
|
||||||
use v64::*;
|
use coresimd::x86::*;
|
||||||
use v32::*;
|
use mem;
|
||||||
use x86::*;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -1912,7 +1911,6 @@ pub unsafe fn _mm256_permute2x128_si256(
|
|||||||
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
|
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
|
||||||
#[rustc_args_required_const(1)]
|
#[rustc_args_required_const(1)]
|
||||||
pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
|
pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
|
||||||
use x86::i586::avx::_mm256_undefined_pd;
|
|
||||||
let imm8 = (imm8 & 0xFF) as u8;
|
let imm8 = (imm8 & 0xFF) as u8;
|
||||||
let undef = _mm256_undefined_pd();
|
let undef = _mm256_undefined_pd();
|
||||||
macro_rules! shuffle_done {
|
macro_rules! shuffle_done {
|
||||||
@@ -2024,10 +2022,13 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
///
|
///
|
||||||
@@ -2627,10 +2628,13 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||||
@@ -2673,10 +2677,13 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
/// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
/// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
|
||||||
@@ -2718,10 +2725,13 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||||
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
|
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
|
||||||
@@ -2759,10 +2769,13 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
/// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||||
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
|
/// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
|
||||||
@@ -2800,10 +2813,13 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
|
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
|
||||||
@@ -2840,10 +2856,13 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
||||||
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
|
/// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
|
||||||
@@ -2877,10 +2896,13 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
|
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
|
||||||
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
|
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
|
||||||
@@ -2913,10 +2935,13 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("avx2") {
|
/// # if is_target_feature_detected!("avx2") {
|
||||||
/// # #[target_feature(enable = "avx2")]
|
/// # #[target_feature(enable = "avx2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
|
/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
|
||||||
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
|
/// let b = _mm256_setr_epi64x(0,-1,-2,-3);
|
||||||
@@ -3253,10 +3278,10 @@ extern "C" {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
|
||||||
use std;
|
use std;
|
||||||
|
|
||||||
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "avx2"]
|
#[simd_test = "avx2"]
|
||||||
unsafe fn test_mm256_abs_epi32() {
|
unsafe fn test_mm256_abs_epi32() {
|
||||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||||
@@ -96,7 +96,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::i586::bmi;
|
use coresimd::x86::i586::bmi;
|
||||||
|
|
||||||
#[simd_test = "bmi"]
|
#[simd_test = "bmi"]
|
||||||
unsafe fn _bextr_u32() {
|
unsafe fn _bextr_u32() {
|
||||||
@@ -67,7 +67,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::i586::bmi2;
|
use coresimd::x86::i586::bmi2;
|
||||||
|
|
||||||
#[simd_test = "bmi2"]
|
#[simd_test = "bmi2"]
|
||||||
unsafe fn _pext_u32() {
|
unsafe fn _pext_u32() {
|
||||||
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
#![cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
#![cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||||
|
|
||||||
|
use mem;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
@@ -45,7 +47,7 @@ pub struct CpuidResult {
|
|||||||
#[inline]
|
#[inline]
|
||||||
#[cfg_attr(test, assert_instr(cpuid))]
|
#[cfg_attr(test, assert_instr(cpuid))]
|
||||||
pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
|
pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
|
||||||
let mut r = ::core::mem::uninitialized::<CpuidResult>();
|
let mut r = mem::uninitialized::<CpuidResult>();
|
||||||
if cfg!(target_arch = "x86") {
|
if cfg!(target_arch = "x86") {
|
||||||
asm!("cpuid"
|
asm!("cpuid"
|
||||||
: "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
|
: "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
|
||||||
@@ -77,7 +79,7 @@ pub fn has_cpuid() -> bool {
|
|||||||
}
|
}
|
||||||
#[cfg(target_arch = "x86")]
|
#[cfg(target_arch = "x86")]
|
||||||
{
|
{
|
||||||
use x86::i386::{__readeflags, __writeeflags};
|
use coresimd::x86::i386::{__readeflags, __writeeflags};
|
||||||
|
|
||||||
// On `x86` the `cpuid` instruction is not always available.
|
// On `x86` the `cpuid` instruction is not always available.
|
||||||
// This follows the approach indicated in:
|
// This follows the approach indicated in:
|
||||||
@@ -119,7 +121,7 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::i586::cpuid;
|
use coresimd::x86::i586::cpuid;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_always_has_cpuid() {
|
fn test_always_has_cpuid() {
|
||||||
@@ -131,7 +133,7 @@ mod tests {
|
|||||||
#[cfg(target_arch = "x86")]
|
#[cfg(target_arch = "x86")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_has_cpuid() {
|
fn test_has_cpuid() {
|
||||||
use x86::i386::__readeflags;
|
use coresimd::x86::i386::__readeflags;
|
||||||
unsafe {
|
unsafe {
|
||||||
let before = __readeflags();
|
let before = __readeflags();
|
||||||
|
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
//! Streaming SIMD Extensions (SSE)
|
//! Streaming SIMD Extensions (SSE)
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::*;
|
||||||
use core::ptr;
|
use coresimd::v128::*;
|
||||||
|
use coresimd::v64::*;
|
||||||
use simd_llvm::*;
|
use coresimd::x86::*;
|
||||||
use v128::*;
|
use intrinsics;
|
||||||
use v64::*;
|
use mem;
|
||||||
use x86::*;
|
use ptr;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -873,12 +873,15 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
|
|||||||
/// #
|
/// #
|
||||||
/// # // The real main function
|
/// # // The real main function
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("sse") {
|
/// # if is_target_feature_detected!("sse") {
|
||||||
/// # #[target_feature(enable = "sse")]
|
/// # #[target_feature(enable = "sse")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// #
|
/// #
|
||||||
/// # use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
/// #
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
|
///
|
||||||
/// unsafe {
|
/// unsafe {
|
||||||
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||||
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
||||||
@@ -924,12 +927,15 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
|
|||||||
/// #
|
/// #
|
||||||
/// # // The real main function
|
/// # // The real main function
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("sse") {
|
/// # if is_target_feature_detected!("sse") {
|
||||||
/// # #[target_feature(enable = "sse")]
|
/// # #[target_feature(enable = "sse")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// #
|
/// #
|
||||||
/// # use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
/// #
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
|
///
|
||||||
/// unsafe {
|
/// unsafe {
|
||||||
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
/// let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||||
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
/// let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
||||||
@@ -1684,7 +1690,7 @@ extern "C" {
|
|||||||
#[target_feature(enable = "sse")]
|
#[target_feature(enable = "sse")]
|
||||||
#[cfg_attr(test, assert_instr(movntps))]
|
#[cfg_attr(test, assert_instr(movntps))]
|
||||||
pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
|
pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
|
||||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Store 64-bits of integer data from a into memory using a non-temporal
|
/// Store 64-bits of integer data from a into memory using a non-temporal
|
||||||
@@ -1701,12 +1707,13 @@ mod tests {
|
|||||||
use std::mem::transmute;
|
use std::mem::transmute;
|
||||||
use std::f32::NAN;
|
use std::f32::NAN;
|
||||||
|
|
||||||
use v128::*;
|
|
||||||
use v64::*;
|
|
||||||
use x86::*;
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use test::black_box; // Used to inhibit constant-folding.
|
use test::black_box; // Used to inhibit constant-folding.
|
||||||
|
|
||||||
|
use coresimd::v128::*;
|
||||||
|
use coresimd::v64::*;
|
||||||
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse"]
|
#[simd_test = "sse"]
|
||||||
unsafe fn test_mm_add_ps() {
|
unsafe fn test_mm_add_ps() {
|
||||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||||
@@ -3,13 +3,13 @@
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::*;
|
||||||
use core::ptr;
|
use coresimd::v128::*;
|
||||||
|
use coresimd::v64::*;
|
||||||
use simd_llvm::*;
|
use coresimd::x86::*;
|
||||||
use v128::*;
|
use intrinsics;
|
||||||
use v64::*;
|
use mem;
|
||||||
use x86::*;
|
use ptr;
|
||||||
|
|
||||||
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
|
/// Provide a hint to the processor that the code sequence is a spin-wait loop.
|
||||||
///
|
///
|
||||||
@@ -952,7 +952,7 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
|
|||||||
#[target_feature(enable = "sse2")]
|
#[target_feature(enable = "sse2")]
|
||||||
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
|
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
|
||||||
pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
|
pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
|
||||||
::core::intrinsics::nontemporal_store(mem_addr, a);
|
::intrinsics::nontemporal_store(mem_addr, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Stores a 32-bit integer value in the specified memory location.
|
/// Stores a 32-bit integer value in the specified memory location.
|
||||||
@@ -962,7 +962,7 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
|
|||||||
#[target_feature(enable = "sse2")]
|
#[target_feature(enable = "sse2")]
|
||||||
#[cfg_attr(test, assert_instr(movnti))]
|
#[cfg_attr(test, assert_instr(movnti))]
|
||||||
pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
|
pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
|
||||||
::core::intrinsics::nontemporal_store(mem_addr, a);
|
::intrinsics::nontemporal_store(mem_addr, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a vector where the low element is extracted from `a` and its upper
|
/// Return a vector where the low element is extracted from `a` and its upper
|
||||||
@@ -1974,7 +1974,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
|
|||||||
#[target_feature(enable = "sse2")]
|
#[target_feature(enable = "sse2")]
|
||||||
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
|
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
|
||||||
pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
|
pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
|
||||||
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
|
/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
|
||||||
@@ -2382,8 +2382,8 @@ mod tests {
|
|||||||
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use test::black_box; // Used to inhibit constant-folding.
|
use test::black_box; // Used to inhibit constant-folding.
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
use v128::*;
|
use coresimd::v128::*;
|
||||||
|
|
||||||
#[simd_test = "sse2"]
|
#[simd_test = "sse2"]
|
||||||
unsafe fn test_mm_pause() {
|
unsafe fn test_mm_pause() {
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
//! Streaming SIMD Extensions 3 (SSE3)
|
//! Streaming SIMD Extensions 3 (SSE3)
|
||||||
|
|
||||||
use simd_llvm::{simd_shuffle2, simd_shuffle4};
|
use coresimd::simd_llvm::{simd_shuffle2, simd_shuffle4};
|
||||||
use v128::*;
|
use coresimd::v128::*;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -129,7 +129,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse3"]
|
#[simd_test = "sse3"]
|
||||||
unsafe fn test_mm_addsub_ps() {
|
unsafe fn test_mm_addsub_ps() {
|
||||||
@@ -1,14 +1,16 @@
|
|||||||
//! Streaming SIMD Extensions 4.1 (SSE4.1)
|
//! Streaming SIMD Extensions 4.1 (SSE4.1)
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::*;
|
||||||
|
use coresimd::v128::*;
|
||||||
|
use coresimd::v64::*;
|
||||||
|
use coresimd::v32::*;
|
||||||
|
use coresimd::v16::*;
|
||||||
|
use coresimd::x86::*;
|
||||||
|
use mem;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
use simd_llvm::*;
|
|
||||||
use v128::*;
|
|
||||||
use x86::*;
|
|
||||||
|
|
||||||
// SSE4 rounding constans
|
// SSE4 rounding constans
|
||||||
/// round to nearest
|
/// round to nearest
|
||||||
pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
|
pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
|
||||||
@@ -301,7 +303,7 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovsxbw))]
|
#[cfg_attr(test, assert_instr(pmovsxbw))]
|
||||||
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
||||||
let a = a.as_i8x16();
|
let a = a.as_i8x16();
|
||||||
let a = simd_shuffle8::<_, ::v64::i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||||
mem::transmute(simd_cast::<_, i16x8>(a))
|
mem::transmute(simd_cast::<_, i16x8>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -311,7 +313,7 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovsxbd))]
|
#[cfg_attr(test, assert_instr(pmovsxbd))]
|
||||||
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
||||||
let a = a.as_i8x16();
|
let a = a.as_i8x16();
|
||||||
let a = simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]);
|
let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]);
|
||||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -322,7 +324,7 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovsxbq))]
|
#[cfg_attr(test, assert_instr(pmovsxbq))]
|
||||||
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
||||||
let a = a.as_i8x16();
|
let a = a.as_i8x16();
|
||||||
let a = simd_shuffle2::<_, ::v16::i8x2>(a, a, [0, 1]);
|
let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]);
|
||||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -332,7 +334,7 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovsxwd))]
|
#[cfg_attr(test, assert_instr(pmovsxwd))]
|
||||||
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
||||||
let a = a.as_i16x8();
|
let a = a.as_i16x8();
|
||||||
let a = simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]);
|
let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]);
|
||||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -342,7 +344,7 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovsxwq))]
|
#[cfg_attr(test, assert_instr(pmovsxwq))]
|
||||||
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
||||||
let a = a.as_i16x8();
|
let a = a.as_i16x8();
|
||||||
let a = simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]);
|
let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]);
|
||||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -352,7 +354,7 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovsxdq))]
|
#[cfg_attr(test, assert_instr(pmovsxdq))]
|
||||||
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
||||||
let a = a.as_i32x4();
|
let a = a.as_i32x4();
|
||||||
let a = simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]);
|
let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]);
|
||||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -362,7 +364,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovzxbw))]
|
#[cfg_attr(test, assert_instr(pmovzxbw))]
|
||||||
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
||||||
let a = a.as_u8x16();
|
let a = a.as_u8x16();
|
||||||
let a = simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||||
mem::transmute(simd_cast::<_, i16x8>(a))
|
mem::transmute(simd_cast::<_, i16x8>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -372,7 +374,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovzxbd))]
|
#[cfg_attr(test, assert_instr(pmovzxbd))]
|
||||||
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
||||||
let a = a.as_u8x16();
|
let a = a.as_u8x16();
|
||||||
let a = simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]);
|
let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]);
|
||||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -382,7 +384,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovzxbq))]
|
#[cfg_attr(test, assert_instr(pmovzxbq))]
|
||||||
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
||||||
let a = a.as_u8x16();
|
let a = a.as_u8x16();
|
||||||
let a = simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]);
|
let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]);
|
||||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -393,7 +395,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovzxwd))]
|
#[cfg_attr(test, assert_instr(pmovzxwd))]
|
||||||
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
||||||
let a = a.as_u16x8();
|
let a = a.as_u16x8();
|
||||||
let a = simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]);
|
let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]);
|
||||||
mem::transmute(simd_cast::<_, i32x4>(a))
|
mem::transmute(simd_cast::<_, i32x4>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -404,7 +406,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovzxwq))]
|
#[cfg_attr(test, assert_instr(pmovzxwq))]
|
||||||
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
||||||
let a = a.as_u16x8();
|
let a = a.as_u16x8();
|
||||||
let a = simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]);
|
let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]);
|
||||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -415,7 +417,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
|||||||
#[cfg_attr(test, assert_instr(pmovzxdq))]
|
#[cfg_attr(test, assert_instr(pmovzxdq))]
|
||||||
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
||||||
let a = a.as_u32x4();
|
let a = a.as_u32x4();
|
||||||
let a = simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]);
|
let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]);
|
||||||
mem::transmute(simd_cast::<_, i64x2>(a))
|
mem::transmute(simd_cast::<_, i64x2>(a))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -549,18 +551,25 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
|
|||||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use coresimd::vendor;
|
/// extern crate stdsimd;
|
||||||
///
|
///
|
||||||
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
|
///
|
||||||
|
/// # fn main() {
|
||||||
/// // round to nearest, and suppress exceptions:
|
/// // round to nearest, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||||
/// // round down, and suppress exceptions:
|
/// // round down, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // round up, and suppress exceptions:
|
/// // round up, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // truncate, and suppress exceptions:
|
/// // truncate, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
/// _MM_FROUND_CUR_DIRECTION;
|
||||||
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
#[target_feature(enable = "sse4.1")]
|
#[target_feature(enable = "sse4.1")]
|
||||||
@@ -579,18 +588,25 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
|
|||||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use coresimd::vendor;
|
/// extern crate stdsimd;
|
||||||
///
|
///
|
||||||
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
|
///
|
||||||
|
/// # fn main() {
|
||||||
/// // round to nearest, and suppress exceptions:
|
/// // round to nearest, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||||
/// // round down, and suppress exceptions:
|
/// // round down, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // round up, and suppress exceptions:
|
/// // round up, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // truncate, and suppress exceptions:
|
/// // truncate, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
/// _MM_FROUND_CUR_DIRECTION;
|
||||||
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
#[target_feature(enable = "sse4.1")]
|
#[target_feature(enable = "sse4.1")]
|
||||||
@@ -611,18 +627,25 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
|
|||||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use coresimd::vendor;
|
/// extern crate stdsimd;
|
||||||
///
|
///
|
||||||
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
|
///
|
||||||
|
/// # fn main() {
|
||||||
/// // round to nearest, and suppress exceptions:
|
/// // round to nearest, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||||
/// // round down, and suppress exceptions:
|
/// // round down, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // round up, and suppress exceptions:
|
/// // round up, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // truncate, and suppress exceptions:
|
/// // truncate, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
/// _MM_FROUND_CUR_DIRECTION;
|
||||||
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
#[target_feature(enable = "sse4.1")]
|
#[target_feature(enable = "sse4.1")]
|
||||||
@@ -643,18 +666,25 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
|
|||||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use coresimd::vendor;
|
/// extern crate stdsimd;
|
||||||
///
|
///
|
||||||
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
|
///
|
||||||
|
/// # fn main() {
|
||||||
/// // round to nearest, and suppress exceptions:
|
/// // round to nearest, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
||||||
/// // round down, and suppress exceptions:
|
/// // round down, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // round up, and suppress exceptions:
|
/// // round up, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
|
||||||
/// // truncate, and suppress exceptions:
|
/// // truncate, and suppress exceptions:
|
||||||
/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
|
/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||||
/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
|
/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
|
||||||
/// vendor::_MM_FROUND_CUR_DIRECTION;
|
/// _MM_FROUND_CUR_DIRECTION;
|
||||||
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
#[target_feature(enable = "sse4.1")]
|
#[target_feature(enable = "sse4.1")]
|
||||||
@@ -817,7 +847,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse4.1"]
|
#[simd_test = "sse4.1"]
|
||||||
unsafe fn test_mm_blendv_epi8() {
|
unsafe fn test_mm_blendv_epi8() {
|
||||||
@@ -5,8 +5,8 @@
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
use v128::*;
|
use coresimd::v128::*;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
/// String contains unsigned 8-bit characters *(Default)*
|
/// String contains unsigned 8-bit characters *(Default)*
|
||||||
pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
|
pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
|
||||||
@@ -102,11 +102,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("sse4.2") {
|
/// # if is_target_feature_detected!("sse4.2") {
|
||||||
/// # #[target_feature(enable = "sse4.2")]
|
/// # #[target_feature(enable = "sse4.2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
///
|
///
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let haystack = b"This is a long string of text data\r\n\tthat extends
|
/// let haystack = b"This is a long string of text data\r\n\tthat extends
|
||||||
/// multiple lines";
|
/// multiple lines";
|
||||||
@@ -142,10 +145,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("sse4.2") {
|
/// # if is_target_feature_detected!("sse4.2") {
|
||||||
/// # #[target_feature(enable = "sse4.2")]
|
/// # #[target_feature(enable = "sse4.2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// // Ensure your input is 16 byte aligned
|
/// // Ensure your input is 16 byte aligned
|
||||||
/// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
|
/// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
|
||||||
@@ -180,10 +186,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("sse4.2") {
|
/// # if is_target_feature_detected!("sse4.2") {
|
||||||
/// # #[target_feature(enable = "sse4.2")]
|
/// # #[target_feature(enable = "sse4.2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
|
///
|
||||||
/// # let b = b":;<=>?@[\\]^_`abc";
|
/// # let b = b":;<=>?@[\\]^_`abc";
|
||||||
/// # let b = _mm_loadu_si128(b.as_ptr() as *const _);
|
/// # let b = _mm_loadu_si128(b.as_ptr() as *const _);
|
||||||
///
|
///
|
||||||
@@ -217,10 +227,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("sse4.2") {
|
/// # if is_target_feature_detected!("sse4.2") {
|
||||||
/// # #[target_feature(enable = "sse4.2")]
|
/// # #[target_feature(enable = "sse4.2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// # let mut some_utf16_words = [0u16; 8];
|
/// # let mut some_utf16_words = [0u16; 8];
|
||||||
/// # let mut more_utf16_words = [0u16; 8];
|
/// # let mut more_utf16_words = [0u16; 8];
|
||||||
@@ -407,11 +420,14 @@ pub unsafe fn _mm_cmpestrm(
|
|||||||
/// # #[macro_use] extern crate stdsimd;
|
/// # #[macro_use] extern crate stdsimd;
|
||||||
/// #
|
/// #
|
||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # if cfg_feature_enabled!("sse4.2") {
|
/// # if is_target_feature_detected!("sse4.2") {
|
||||||
/// # #[target_feature(enable = "sse4.2")]
|
/// # #[target_feature(enable = "sse4.2")]
|
||||||
/// # unsafe fn worker() {
|
/// # unsafe fn worker() {
|
||||||
///
|
///
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// // The string we want to find a substring in
|
/// // The string we want to find a substring in
|
||||||
/// let haystack = b"Split \r\n\t line ";
|
/// let haystack = b"Split \r\n\t line ";
|
||||||
@@ -625,7 +641,7 @@ mod tests {
|
|||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
// Currently one cannot `load` a &[u8] that is is less than 16
|
// Currently one cannot `load` a &[u8] that is is less than 16
|
||||||
// in length. This makes loading strings less than 16 in length
|
// in length. This makes loading strings less than 16 in length
|
||||||
@@ -1,14 +1,13 @@
|
|||||||
//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
|
//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::simd_shuffle16;
|
||||||
|
use coresimd::v128::*;
|
||||||
|
use coresimd::x86::*;
|
||||||
|
use mem;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
use simd_llvm::simd_shuffle16;
|
|
||||||
use v128::*;
|
|
||||||
use x86::*;
|
|
||||||
|
|
||||||
/// Compute the absolute value of packed 8-bit signed integers in `a` and
|
/// Compute the absolute value of packed 8-bit signed integers in `a` and
|
||||||
/// return the unsigned results.
|
/// return the unsigned results.
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -292,7 +291,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "ssse3"]
|
#[simd_test = "ssse3"]
|
||||||
unsafe fn test_mm_abs_epi8() {
|
unsafe fn test_mm_abs_epi8() {
|
||||||
@@ -263,7 +263,7 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::i586::tbm;
|
use coresimd::x86::i586::tbm;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#[simd_test = "tbm"]
|
#[simd_test = "tbm"]
|
||||||
@@ -137,9 +137,11 @@ pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::i586::xsave;
|
|
||||||
use stdsimd_test::simd_test;
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::prelude::v1::*;
|
||||||
|
|
||||||
|
use coresimd::x86::i586::xsave;
|
||||||
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
#[repr(align(64))]
|
#[repr(align(64))]
|
||||||
struct XsaveArea {
|
struct XsaveArea {
|
||||||
@@ -6,7 +6,8 @@
|
|||||||
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
|
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
|
||||||
//!
|
//!
|
||||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||||
use x86::__m128i;
|
|
||||||
|
use coresimd::x86::__m128i;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -92,7 +93,7 @@ mod tests {
|
|||||||
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "aes"]
|
#[simd_test = "aes"]
|
||||||
unsafe fn test_mm_aesdec_si128() {
|
unsafe fn test_mm_aesdec_si128() {
|
||||||
@@ -8,9 +8,9 @@
|
|||||||
//!
|
//!
|
||||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||||
|
|
||||||
use v64::*;
|
use coresimd::v64::*;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
use core::mem;
|
use mem;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -487,7 +487,7 @@ extern "C" {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
#[simd_test = "mmx"]
|
#[simd_test = "mmx"]
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
//! `i686` Streaming SIMD Extensions (SSE)
|
//! `i686` Streaming SIMD Extensions (SSE)
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -469,7 +469,7 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
#[simd_test = "sse,mmx"]
|
#[simd_test = "sse,mmx"]
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
//! `i686`'s Streaming SIMD Extensions 2 (SSE2)
|
//! `i686`'s Streaming SIMD Extensions 2 (SSE2)
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::simd_extract;
|
||||||
|
use coresimd::x86::*;
|
||||||
use simd_llvm::simd_extract;
|
use mem;
|
||||||
use x86::*;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -137,7 +136,7 @@ mod tests {
|
|||||||
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse2,mmx"]
|
#[simd_test = "sse2,mmx"]
|
||||||
unsafe fn test_mm_add_si64() {
|
unsafe fn test_mm_add_si64() {
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
|
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
|
||||||
|
|
||||||
use v128::*;
|
use coresimd::v128::*;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -138,7 +138,7 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse4.1"]
|
#[simd_test = "sse4.1"]
|
||||||
unsafe fn test_mm_testz_si128() {
|
unsafe fn test_mm_testz_si128() {
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)
|
//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)
|
||||||
|
|
||||||
use simd_llvm::*;
|
use coresimd::simd_llvm::*;
|
||||||
use v128::*;
|
use coresimd::v128::*;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -18,7 +18,7 @@ pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
|
//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::v128::*;
|
||||||
use v128::*;
|
use coresimd::x86::*;
|
||||||
use x86::*;
|
use mem;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -75,7 +75,7 @@ pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse4a"]
|
#[simd_test = "sse4a"]
|
||||||
unsafe fn test_mm_extract_si64() {
|
unsafe fn test_mm_extract_si64() {
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
/// Compute the absolute value of packed 8-bit integers in `a` and
|
/// Compute the absolute value of packed 8-bit integers in `a` and
|
||||||
/// return the unsigned results.
|
/// return the unsigned results.
|
||||||
@@ -223,7 +223,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "ssse3,mmx"]
|
#[simd_test = "ssse3,mmx"]
|
||||||
unsafe fn test_mm_abs_pi8() {
|
unsafe fn test_mm_abs_pi8() {
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
//! `x86` and `x86_64` intrinsics.
|
//! `x86` and `x86_64` intrinsics.
|
||||||
|
|
||||||
use core::mem;
|
use prelude::v1::*;
|
||||||
|
use mem;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
mod macros;
|
mod macros;
|
||||||
@@ -59,13 +60,16 @@ types! {
|
|||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # #[target_feature(enable = "mmx")]
|
/// # #[target_feature(enable = "mmx")]
|
||||||
/// # unsafe fn foo() {
|
/// # unsafe fn foo() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let all_bytes_zero = _mm_setzero_si64();
|
/// let all_bytes_zero = _mm_setzero_si64();
|
||||||
/// let all_bytes_one = _mm_set1_pi8(1);
|
/// let all_bytes_one = _mm_set1_pi8(1);
|
||||||
/// let two_i32 = _mm_set_pi32(1, 2);
|
/// let two_i32 = _mm_set_pi32(1, 2);
|
||||||
/// # }
|
/// # }
|
||||||
/// # if cfg_feature_enabled!("mmx") { unsafe { foo() } }
|
/// # if is_target_feature_detected!("mmx") { unsafe { foo() } }
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
pub struct __m64(i64);
|
pub struct __m64(i64);
|
||||||
@@ -102,13 +106,16 @@ types! {
|
|||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # #[target_feature(enable = "sse2")]
|
/// # #[target_feature(enable = "sse2")]
|
||||||
/// # unsafe fn foo() {
|
/// # unsafe fn foo() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let all_bytes_zero = _mm_setzero_si128();
|
/// let all_bytes_zero = _mm_setzero_si128();
|
||||||
/// let all_bytes_one = _mm_set1_epi8(1);
|
/// let all_bytes_one = _mm_set1_epi8(1);
|
||||||
/// let four_i32 = _mm_set_epi32(1, 2, 3, 4);
|
/// let four_i32 = _mm_set_epi32(1, 2, 3, 4);
|
||||||
/// # }
|
/// # }
|
||||||
/// # if cfg_feature_enabled!("sse2") { unsafe { foo() } }
|
/// # if is_target_feature_detected!("sse2") { unsafe { foo() } }
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
pub struct __m128i(i64, i64);
|
pub struct __m128i(i64, i64);
|
||||||
@@ -138,13 +145,16 @@ types! {
|
|||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # #[target_feature(enable = "sse")]
|
/// # #[target_feature(enable = "sse")]
|
||||||
/// # unsafe fn foo() {
|
/// # unsafe fn foo() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let four_zeros = _mm_setzero_ps();
|
/// let four_zeros = _mm_setzero_ps();
|
||||||
/// let four_ones = _mm_set1_ps(1.0);
|
/// let four_ones = _mm_set1_ps(1.0);
|
||||||
/// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
|
/// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
|
||||||
/// # }
|
/// # }
|
||||||
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
|
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
pub struct __m128(f32, f32, f32, f32);
|
pub struct __m128(f32, f32, f32, f32);
|
||||||
@@ -174,13 +184,16 @@ types! {
|
|||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # #[target_feature(enable = "sse")]
|
/// # #[target_feature(enable = "sse")]
|
||||||
/// # unsafe fn foo() {
|
/// # unsafe fn foo() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let two_zeros = _mm_setzero_pd();
|
/// let two_zeros = _mm_setzero_pd();
|
||||||
/// let two_ones = _mm_set1_pd(1.0);
|
/// let two_ones = _mm_set1_pd(1.0);
|
||||||
/// let two_floats = _mm_set_pd(1.0, 2.0);
|
/// let two_floats = _mm_set_pd(1.0, 2.0);
|
||||||
/// # }
|
/// # }
|
||||||
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
|
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
pub struct __m128d(f64, f64);
|
pub struct __m128d(f64, f64);
|
||||||
@@ -214,13 +227,16 @@ types! {
|
|||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # #[target_feature(enable = "avx")]
|
/// # #[target_feature(enable = "avx")]
|
||||||
/// # unsafe fn foo() {
|
/// # unsafe fn foo() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let all_bytes_zero = _mm256_setzero_si256();
|
/// let all_bytes_zero = _mm256_setzero_si256();
|
||||||
/// let all_bytes_one = _mm256_set1_epi8(1);
|
/// let all_bytes_one = _mm256_set1_epi8(1);
|
||||||
/// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
|
/// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
|
||||||
/// # }
|
/// # }
|
||||||
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
|
/// # if is_target_feature_detected!("avx") { unsafe { foo() } }
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
pub struct __m256i(i64, i64, i64, i64);
|
pub struct __m256i(i64, i64, i64, i64);
|
||||||
@@ -250,13 +266,16 @@ types! {
|
|||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # #[target_feature(enable = "sse")]
|
/// # #[target_feature(enable = "sse")]
|
||||||
/// # unsafe fn foo() {
|
/// # unsafe fn foo() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let eight_zeros = _mm256_setzero_ps();
|
/// let eight_zeros = _mm256_setzero_ps();
|
||||||
/// let eight_ones = _mm256_set1_ps(1.0);
|
/// let eight_ones = _mm256_set1_ps(1.0);
|
||||||
/// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
/// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
|
||||||
/// # }
|
/// # }
|
||||||
/// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
|
/// # if is_target_feature_detected!("sse") { unsafe { foo() } }
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
|
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
|
||||||
@@ -286,13 +305,16 @@ types! {
|
|||||||
/// # fn main() {
|
/// # fn main() {
|
||||||
/// # #[target_feature(enable = "avx")]
|
/// # #[target_feature(enable = "avx")]
|
||||||
/// # unsafe fn foo() {
|
/// # unsafe fn foo() {
|
||||||
/// use stdsimd::vendor::*;
|
/// #[cfg(target_arch = "x86")]
|
||||||
|
/// use stdsimd::arch::x86::*;
|
||||||
|
/// #[cfg(target_arch = "x86_64")]
|
||||||
|
/// use stdsimd::arch::x86_64::*;
|
||||||
///
|
///
|
||||||
/// let four_zeros = _mm256_setzero_pd();
|
/// let four_zeros = _mm256_setzero_pd();
|
||||||
/// let four_ones = _mm256_set1_pd(1.0);
|
/// let four_ones = _mm256_set1_pd(1.0);
|
||||||
/// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
|
/// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
|
||||||
/// # }
|
/// # }
|
||||||
/// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
|
/// # if is_target_feature_detected!("avx") { unsafe { foo() } }
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
pub struct __m256d(f64, f64, f64, f64);
|
pub struct __m256d(f64, f64, f64, f64);
|
||||||
@@ -309,42 +331,42 @@ trait m128iExt: Sized {
|
|||||||
fn as_m128i(self) -> __m128i;
|
fn as_m128i(self) -> __m128i;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u8x16(self) -> ::v128::u8x16 {
|
fn as_u8x16(self) -> ::coresimd::v128::u8x16 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u16x8(self) -> ::v128::u16x8 {
|
fn as_u16x8(self) -> ::coresimd::v128::u16x8 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u32x4(self) -> ::v128::u32x4 {
|
fn as_u32x4(self) -> ::coresimd::v128::u32x4 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u64x2(self) -> ::v128::u64x2 {
|
fn as_u64x2(self) -> ::coresimd::v128::u64x2 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i8x16(self) -> ::v128::i8x16 {
|
fn as_i8x16(self) -> ::coresimd::v128::i8x16 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i16x8(self) -> ::v128::i16x8 {
|
fn as_i16x8(self) -> ::coresimd::v128::i16x8 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i32x4(self) -> ::v128::i32x4 {
|
fn as_i32x4(self) -> ::coresimd::v128::i32x4 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i64x2(self) -> ::v128::i64x2 {
|
fn as_i64x2(self) -> ::coresimd::v128::i64x2 {
|
||||||
unsafe { mem::transmute(self.as_m128i()) }
|
unsafe { mem::transmute(self.as_m128i()) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -362,42 +384,42 @@ trait m256iExt: Sized {
|
|||||||
fn as_m256i(self) -> __m256i;
|
fn as_m256i(self) -> __m256i;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u8x32(self) -> ::v256::u8x32 {
|
fn as_u8x32(self) -> ::coresimd::v256::u8x32 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u16x16(self) -> ::v256::u16x16 {
|
fn as_u16x16(self) -> ::coresimd::v256::u16x16 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u32x8(self) -> ::v256::u32x8 {
|
fn as_u32x8(self) -> ::coresimd::v256::u32x8 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_u64x4(self) -> ::v256::u64x4 {
|
fn as_u64x4(self) -> ::coresimd::v256::u64x4 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i8x32(self) -> ::v256::i8x32 {
|
fn as_i8x32(self) -> ::coresimd::v256::i8x32 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i16x16(self) -> ::v256::i16x16 {
|
fn as_i16x16(self) -> ::coresimd::v256::i16x16 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i32x8(self) -> ::v256::i32x8 {
|
fn as_i32x8(self) -> ::coresimd::v256::i32x8 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn as_i64x4(self) -> ::v256::i64x4 {
|
fn as_i64x4(self) -> ::coresimd::v256::i64x4 {
|
||||||
unsafe { mem::transmute(self.as_m256i()) }
|
unsafe { mem::transmute(self.as_m256i()) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
//! Utilities used in testing the x86 intrinsics
|
//! Utilities used in testing the x86 intrinsics
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[target_feature(enable = "mmx")]
|
#[target_feature(enable = "mmx")]
|
||||||
pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
|
pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
|
||||||
@@ -107,7 +107,7 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
|
|||||||
// which doesn't exist on x86!
|
// which doesn't exist on x86!
|
||||||
#[cfg(target_arch = "x86")]
|
#[cfg(target_arch = "x86")]
|
||||||
mod x86_polyfill {
|
mod x86_polyfill {
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
|
pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
|
||||||
union A {
|
union A {
|
||||||
@@ -42,7 +42,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "lzcnt"]
|
#[simd_test = "lzcnt"]
|
||||||
unsafe fn test_lzcnt_u64() {
|
unsafe fn test_lzcnt_u64() {
|
||||||
@@ -13,10 +13,9 @@
|
|||||||
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||||
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::simd_llvm::*;
|
||||||
|
use coresimd::x86::*;
|
||||||
use simd_llvm::*;
|
use mem;
|
||||||
use x86::*;
|
|
||||||
|
|
||||||
/// Copy `a` to result, and insert the 64-bit integer `i` into result
|
/// Copy `a` to result, and insert the 64-bit integer `i` into result
|
||||||
/// at the location specified by `index`.
|
/// at the location specified by `index`.
|
||||||
@@ -32,7 +31,7 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "avx"]
|
#[simd_test = "avx"]
|
||||||
unsafe fn test_mm256_insert_epi64() {
|
unsafe fn test_mm256_insert_epi64() {
|
||||||
@@ -18,8 +18,8 @@
|
|||||||
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
|
||||||
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
|
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
|
||||||
|
|
||||||
use simd_llvm::*;
|
use coresimd::simd_llvm::*;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
/// Extract a 64-bit integer from `a`, selected with `imm8`.
|
/// Extract a 64-bit integer from `a`, selected with `imm8`.
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -35,7 +35,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "avx2"]
|
#[simd_test = "avx2"]
|
||||||
unsafe fn test_mm256_extract_epi64() {
|
unsafe fn test_mm256_extract_epi64() {
|
||||||
@@ -101,7 +101,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "bmi"]
|
#[simd_test = "bmi"]
|
||||||
unsafe fn test_bextr_u64() {
|
unsafe fn test_bextr_u64() {
|
||||||
@@ -69,7 +69,7 @@ extern "C" {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "bmi2"]
|
#[simd_test = "bmi2"]
|
||||||
unsafe fn test_pext_u64() {
|
unsafe fn test_pext_u64() {
|
||||||
@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::x86_64::fxsr;
|
use coresimd::x86::x86_64::fxsr;
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
//! `x86_64` Streaming SIMD Extensions (SSE)
|
//! `x86_64` Streaming SIMD Extensions (SSE)
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -66,7 +66,7 @@ mod tests {
|
|||||||
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse"]
|
#[simd_test = "sse"]
|
||||||
unsafe fn test_mm_cvtss_si64() {
|
unsafe fn test_mm_cvtss_si64() {
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2)
|
//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2)
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
use simd_llvm::*;
|
use coresimd::simd_llvm::*;
|
||||||
|
use intrinsics;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -55,7 +56,7 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
|
|||||||
#[target_feature(enable = "sse2")]
|
#[target_feature(enable = "sse2")]
|
||||||
#[cfg_attr(test, assert_instr(movnti))]
|
#[cfg_attr(test, assert_instr(movnti))]
|
||||||
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
|
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
|
||||||
::core::intrinsics::nontemporal_store(mem_addr, a);
|
intrinsics::nontemporal_store(mem_addr, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a vector whose lowest element is `a` and all higher elements are
|
/// Return a vector whose lowest element is `a` and all higher elements are
|
||||||
@@ -116,7 +117,7 @@ mod tests {
|
|||||||
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse2"]
|
#[simd_test = "sse2"]
|
||||||
unsafe fn test_mm_cvtsd_si64() {
|
unsafe fn test_mm_cvtsd_si64() {
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
|
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
|
||||||
|
|
||||||
use core::mem;
|
use coresimd::x86::*;
|
||||||
|
use coresimd::simd_llvm::*;
|
||||||
use x86::*;
|
use mem;
|
||||||
use simd_llvm::*;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use stdsimd_test::assert_instr;
|
use stdsimd_test::assert_instr;
|
||||||
@@ -32,7 +31,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
#[simd_test = "sse4.1"]
|
#[simd_test = "sse4.1"]
|
||||||
unsafe fn test_mm_extract_epi64() {
|
unsafe fn test_mm_extract_epi64() {
|
||||||
@@ -20,7 +20,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::*;
|
use coresimd::x86::*;
|
||||||
|
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
|
|
||||||
@@ -111,7 +111,7 @@ pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
|
|||||||
/*
|
/*
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use x86::x86_64::xsave;
|
use coresimd::x86::x86_64::xsave;
|
||||||
use stdsimd_test::simd_test;
|
use stdsimd_test::simd_test;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
@@ -19,9 +19,8 @@ is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
|
|||||||
maintenance = { status = "experimental" }
|
maintenance = { status = "experimental" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
cupid = "0.5.0"
|
|
||||||
stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
|
stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
|
||||||
stdsimd = { version = "0.0.3", path = ".." }
|
stdsimd = { version = "0.0.3", path = "../stdsimd" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
# Internal-usage only: denies all warnings.
|
# Internal-usage only: denies all warnings.
|
||||||
82
library/stdarch/crates/coresimd/src/lib.rs
Normal file
82
library/stdarch/crates/coresimd/src/lib.rs
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
//! SIMD and vendor intrinsics support library.
|
||||||
|
//!
|
||||||
|
//! This documentation is only for one particular architecture, you can find
|
||||||
|
//! others at:
|
||||||
|
//!
|
||||||
|
//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
|
||||||
|
//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
|
||||||
|
//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
|
||||||
|
//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
|
||||||
|
|
||||||
|
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||||
|
#![allow(dead_code)]
|
||||||
|
#![allow(unused_features)]
|
||||||
|
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
|
||||||
|
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
|
||||||
|
integer_atomics, stmt_expr_attributes, core_intrinsics,
|
||||||
|
crate_in_paths, no_core, attr_literals, rustc_attrs)]
|
||||||
|
#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))]
|
||||||
|
#![cfg_attr(feature = "cargo-clippy",
|
||||||
|
allow(inline_always, too_many_arguments, cast_sign_loss,
|
||||||
|
cast_lossless, cast_possible_wrap,
|
||||||
|
cast_possible_truncation, cast_precision_loss,
|
||||||
|
shadow_reuse, cyclomatic_complexity, similar_names,
|
||||||
|
many_single_char_names))]
|
||||||
|
#![cfg_attr(test, allow(unused_imports))]
|
||||||
|
#![no_core]
|
||||||
|
|
||||||
|
#[cfg_attr(not(test), macro_use)]
|
||||||
|
extern crate core as _core;
|
||||||
|
#[cfg(test)]
|
||||||
|
#[macro_use]
|
||||||
|
extern crate std;
|
||||||
|
#[cfg(test)]
|
||||||
|
extern crate stdsimd_test;
|
||||||
|
#[cfg(test)]
|
||||||
|
extern crate test;
|
||||||
|
#[cfg(test)]
|
||||||
|
#[macro_use]
|
||||||
|
extern crate stdsimd;
|
||||||
|
|
||||||
|
#[path = "../../../coresimd/mod.rs"]
|
||||||
|
mod coresimd;
|
||||||
|
|
||||||
|
pub use coresimd::simd;
|
||||||
|
|
||||||
|
pub mod arch {
|
||||||
|
#[cfg(target_arch = "x86")]
|
||||||
|
pub mod x86 { pub use coresimd::vendor::*; }
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
pub mod x86_64 { pub use coresimd::vendor::*; }
|
||||||
|
#[cfg(target_arch = "arm")]
|
||||||
|
pub mod arm { pub use coresimd::vendor::*; }
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
pub mod aarch64 { pub use coresimd::vendor::*; }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::clone;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::cmp;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::convert;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::fmt;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::intrinsics;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::iter;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::marker;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::mem;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::ops;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::option;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::prelude;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::ptr;
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
use _core::result;
|
||||||
47
library/stdarch/crates/coresimd/tests/cpu-detection.rs
Normal file
47
library/stdarch/crates/coresimd/tests/cpu-detection.rs
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#![feature(cfg_target_feature)]
|
||||||
|
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||||
|
#![cfg_attr(feature = "cargo-clippy",
|
||||||
|
allow(option_unwrap_used, print_stdout, use_debug))]
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
#[macro_use]
|
||||||
|
extern crate stdsimd;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
fn x86_all() {
|
||||||
|
println!("sse: {:?}", is_target_feature_detected!("sse"));
|
||||||
|
println!("sse2: {:?}", is_target_feature_detected!("sse2"));
|
||||||
|
println!("sse3: {:?}", is_target_feature_detected!("sse3"));
|
||||||
|
println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
|
||||||
|
println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
|
||||||
|
println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
|
||||||
|
println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
|
||||||
|
println!("avx: {:?}", is_target_feature_detected!("avx"));
|
||||||
|
println!("avx2: {:?}", is_target_feature_detected!("avx2"));
|
||||||
|
println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
|
||||||
|
println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
|
||||||
|
println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
|
||||||
|
println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
|
||||||
|
println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
|
||||||
|
println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
|
||||||
|
println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
|
||||||
|
println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
|
||||||
|
println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
|
||||||
|
println!(
|
||||||
|
"avx512_vpopcntdq {:?}",
|
||||||
|
is_target_feature_detected!("avx512vpopcntdq")
|
||||||
|
);
|
||||||
|
println!("fma: {:?}", is_target_feature_detected!("fma"));
|
||||||
|
println!("abm: {:?}", is_target_feature_detected!("abm"));
|
||||||
|
println!("bmi: {:?}", is_target_feature_detected!("bmi"));
|
||||||
|
println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
|
||||||
|
println!("tbm: {:?}", is_target_feature_detected!("tbm"));
|
||||||
|
println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
|
||||||
|
println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
|
||||||
|
println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
|
||||||
|
println!("xsave: {:?}", is_target_feature_detected!("xsave"));
|
||||||
|
println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
|
||||||
|
println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
|
||||||
|
println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
|
||||||
|
}
|
||||||
@@ -58,7 +58,7 @@ pub fn simd_test(
|
|||||||
for feature in target_features {
|
for feature in target_features {
|
||||||
let q = quote_spanned! {
|
let q = quote_spanned! {
|
||||||
proc_macro2::Span::call_site() =>
|
proc_macro2::Span::call_site() =>
|
||||||
cfg_feature_enabled!(#feature) &&
|
is_target_feature_detected!(#feature) &&
|
||||||
};
|
};
|
||||||
q.to_tokens(&mut cfg_target_features);
|
q.to_tokens(&mut cfg_target_features);
|
||||||
}
|
}
|
||||||
@@ -4,8 +4,8 @@ version = "0.1.0"
|
|||||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
assert-instr-macro = { path = "assert-instr-macro" }
|
assert-instr-macro = { path = "../assert-instr-macro" }
|
||||||
simd-test-macro = { path = "simd-test-macro" }
|
simd-test-macro = { path = "../simd-test-macro" }
|
||||||
backtrace = "0.3"
|
backtrace = "0.3"
|
||||||
cc = "1.0"
|
cc = "1.0"
|
||||||
lazy_static = "0.2"
|
lazy_static = "0.2"
|
||||||
@@ -3,7 +3,7 @@ use std::path::Path;
|
|||||||
fn main() {
|
fn main() {
|
||||||
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
|
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
|
||||||
let root = dir.parent().unwrap();
|
let root = dir.parent().unwrap();
|
||||||
let root = root.join("coresimd/src/x86");
|
let root = root.join("../coresimd/x86");
|
||||||
walk(&root);
|
walk(&root);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ macro_rules! my_quote {
|
|||||||
pub fn x86_functions(input: TokenStream) -> TokenStream {
|
pub fn x86_functions(input: TokenStream) -> TokenStream {
|
||||||
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
|
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
|
||||||
let root = dir.parent().unwrap();
|
let root = dir.parent().unwrap();
|
||||||
let root = root.join("coresimd/src/x86");
|
let root = root.join("../coresimd/x86");
|
||||||
|
|
||||||
let mut files = Vec::new();
|
let mut files = Vec::new();
|
||||||
walk(&root, &mut files);
|
walk(&root, &mut files);
|
||||||
37
library/stdarch/crates/stdsimd/Cargo.toml
Normal file
37
library/stdarch/crates/stdsimd/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
[package]
|
||||||
|
name = "stdsimd"
|
||||||
|
version = "0.0.3"
|
||||||
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
|
description = "SIMD support in Rust's standard library."
|
||||||
|
documentation = "https://docs.rs/stdsimd"
|
||||||
|
homepage = "https://github.com/rust-lang-nursery/stdsimd"
|
||||||
|
repository = "https://github.com/rust-lang-nursery/stdsimd"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["std", "simd", "intrinsics"]
|
||||||
|
categories = ["hardware-support"]
|
||||||
|
license = "MIT/Apache-2.0"
|
||||||
|
|
||||||
|
[badges]
|
||||||
|
travis-ci = { repository = "rust-lang-nursery/stdsimd" }
|
||||||
|
appveyor = { repository = "rust-lang-nursery/stdsimd" }
|
||||||
|
is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
|
||||||
|
is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
|
||||||
|
maintenance = { status = "experimental" }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
coresimd = { version = "0.0.3", path = "../coresimd" }
|
||||||
|
libc = "0.2"
|
||||||
|
cfg-if = "0.1"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
auxv = "0.3.3"
|
||||||
|
quickcheck = "0.6"
|
||||||
|
rand = "0.4"
|
||||||
|
cupid = "0.5.0"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
# Internal-usage only: denies all warnings.
|
||||||
|
strict = [ "coresimd/strict" ]
|
||||||
|
# Internal-usage only: enables only those intrinsics supported by Intel's
|
||||||
|
# Software Development Environment (SDE).
|
||||||
|
intel_sde = [ "coresimd/intel_sde" ]
|
||||||
@@ -25,7 +25,7 @@
|
|||||||
//!
|
//!
|
||||||
//! * `cfg!(target_feature = "feature")`: returns `true` if the `feature` is
|
//! * `cfg!(target_feature = "feature")`: returns `true` if the `feature` is
|
||||||
//! enabled in all CPUs that the binary will run on (at compile-time)
|
//! enabled in all CPUs that the binary will run on (at compile-time)
|
||||||
//! * `cfg_feature_enabled!("feature")`: returns `true` if the `feature` is
|
//! * `is_target_feature_detected!("feature")`: returns `true` if the `feature` is
|
||||||
//! enabled in the CPU in which the binary is currently running on (at
|
//! enabled in the CPU in which the binary is currently running on (at
|
||||||
//! run-time, unless the result is known at compile time)
|
//! run-time, unless the result is known at compile time)
|
||||||
//!
|
//!
|
||||||
@@ -36,7 +36,6 @@
|
|||||||
//!
|
//!
|
||||||
//! #[macro_use]
|
//! #[macro_use]
|
||||||
//! extern crate stdsimd;
|
//! extern crate stdsimd;
|
||||||
//! use stdsimd::vendor;
|
|
||||||
//! use stdsimd::simd::i32x4;
|
//! use stdsimd::simd::i32x4;
|
||||||
//!
|
//!
|
||||||
//! fn main() {
|
//! fn main() {
|
||||||
@@ -65,11 +64,16 @@
|
|||||||
//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
//! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
//! #[target_feature(enable = "sse2")]
|
//! #[target_feature(enable = "sse2")]
|
||||||
//! unsafe fn sum_sse2(x: i32x4) -> i32 {
|
//! unsafe fn sum_sse2(x: i32x4) -> i32 {
|
||||||
|
//! #[cfg(target_arch = "x86")]
|
||||||
|
//! use stdsimd::arch::x86::*;;
|
||||||
|
//! #[cfg(target_arch = "x86_64")]
|
||||||
|
//! use stdsimd::arch::x86_64::*;;
|
||||||
//! use std::mem;
|
//! use std::mem;
|
||||||
//! let x: vendor::__m128i = mem::transmute(x);
|
//!
|
||||||
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 8));
|
//! let x: __m128i = mem::transmute(x);
|
||||||
//! let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 4));
|
//! let x = _mm_add_epi32(x, _mm_srli_si128(x, 8));
|
||||||
//! let ret = vendor::_mm_cvtsi128_si32(x);
|
//! let x = _mm_add_epi32(x, _mm_srli_si128(x, 4));
|
||||||
|
//! let ret = _mm_cvtsi128_si32(x);
|
||||||
//! mem::transmute(ret)
|
//! mem::transmute(ret)
|
||||||
//! }
|
//! }
|
||||||
//!
|
//!
|
||||||
@@ -97,7 +101,7 @@
|
|||||||
//! {
|
//! {
|
||||||
//! // If SSE2 is not enabled at compile-time, this
|
//! // If SSE2 is not enabled at compile-time, this
|
||||||
//! // detects whether SSE2 is available at run-time:
|
//! // detects whether SSE2 is available at run-time:
|
||||||
//! if cfg_feature_enabled!("sse2") {
|
//! if is_target_feature_detected!("sse2") {
|
||||||
//! return unsafe { sum_sse2(x) };
|
//! return unsafe { sum_sse2(x) };
|
||||||
//! }
|
//! }
|
||||||
//! }
|
//! }
|
||||||
@@ -128,56 +132,25 @@
|
|||||||
//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367
|
//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367
|
||||||
//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839
|
//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839
|
||||||
|
|
||||||
#![feature(const_fn, const_size_of, use_extern_macros, cfg_target_feature)]
|
#![feature(const_fn, integer_atomics)]
|
||||||
#![cfg_attr(target_os = "linux", feature(linkage))]
|
#![cfg_attr(target_os = "linux", feature(linkage))]
|
||||||
|
#![no_std]
|
||||||
|
|
||||||
|
extern crate std as _std;
|
||||||
extern crate coresimd;
|
extern crate coresimd;
|
||||||
|
extern crate libc;
|
||||||
/// Re-export run-time feature detection macros.
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "arm",
|
|
||||||
target_arch = "aarch64", target_arch = "powerpc64"))]
|
|
||||||
pub use coresimd::__unstable_detect_feature;
|
|
||||||
|
|
||||||
/// Platform dependent vendor intrinsics.
|
|
||||||
pub mod vendor {
|
|
||||||
#[doc(inline)]
|
|
||||||
pub use coresimd::vendor::*;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run-time feature detection.
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod __vendor_runtime {
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
|
||||||
all(target_os = "linux",
|
|
||||||
any(target_arch = "arm", target_arch = "aarch64",
|
|
||||||
target_arch = "powerpc64"))))]
|
|
||||||
pub use runtime::std::*;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Platform independent SIMD vector types and operations.
|
|
||||||
pub mod simd {
|
|
||||||
#[doc(inline)]
|
|
||||||
pub use coresimd::simd::*;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The `stdsimd` run-time.
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64",
|
extern crate cfg_if;
|
||||||
all(target_os = "linux",
|
|
||||||
any(target_arch = "arm", target_arch = "aarch64",
|
|
||||||
target_arch = "powerpc64"))))]
|
|
||||||
mod runtime;
|
|
||||||
|
|
||||||
/// Error gracefully in architectures without run-time detection support.
|
#[cfg(test)]
|
||||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
|
#[macro_use]
|
||||||
all(target_os = "linux",
|
extern crate std;
|
||||||
any(target_arch = "arm", target_arch = "aarch64",
|
|
||||||
target_arch = "powerpc64")))))]
|
#[path = "../../../stdsimd/mod.rs"]
|
||||||
#[doc(hidden)]
|
mod stdsimd;
|
||||||
#[macro_export]
|
|
||||||
macro_rules! cfg_feature_enabled {
|
pub use stdsimd::*;
|
||||||
($name:tt) => (
|
|
||||||
{
|
pub use _std::prelude;
|
||||||
compile_error!("cfg_target_feature! is not supported in this architecture")
|
pub use _std::fs;
|
||||||
}
|
pub use _std::io;
|
||||||
)
|
|
||||||
}
|
|
||||||
72
library/stdarch/crates/stdsimd/tests/cpu-detection.rs
Normal file
72
library/stdarch/crates/stdsimd/tests/cpu-detection.rs
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
#![feature(cfg_target_feature)]
|
||||||
|
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||||
|
#![cfg_attr(feature = "cargo-clippy",
|
||||||
|
allow(option_unwrap_used, use_debug, print_stdout))]
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "arm", target_arch = "aarch64",
|
||||||
|
target_arch = "x86", target_arch = "x86_64",
|
||||||
|
target_arch = "powerpc64"))]
|
||||||
|
#[macro_use]
|
||||||
|
extern crate stdsimd;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(all(target_arch = "arm", target_os = "linux"))]
|
||||||
|
fn arm_linux() {
|
||||||
|
println!("neon: {}", is_target_feature_detected!("neon"));
|
||||||
|
println!("pmull: {}", is_target_feature_detected!("pmull"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
|
||||||
|
fn aarch64_linux() {
|
||||||
|
println!("neon: {}", is_target_feature_detected!("neon"));
|
||||||
|
println!("asimd: {}", is_target_feature_detected!("asimd"));
|
||||||
|
println!("pmull: {}", is_target_feature_detected!("pmull"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
|
||||||
|
fn powerpc64_linux() {
|
||||||
|
println!("altivec: {}", is_target_feature_detected!("altivec"));
|
||||||
|
println!("vsx: {}", is_target_feature_detected!("vsx"));
|
||||||
|
println!("power8: {}", is_target_feature_detected!("power8"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
fn x86_all() {
|
||||||
|
println!("sse: {:?}", is_target_feature_detected!("sse"));
|
||||||
|
println!("sse2: {:?}", is_target_feature_detected!("sse2"));
|
||||||
|
println!("sse3: {:?}", is_target_feature_detected!("sse3"));
|
||||||
|
println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
|
||||||
|
println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
|
||||||
|
println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
|
||||||
|
println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
|
||||||
|
println!("avx: {:?}", is_target_feature_detected!("avx"));
|
||||||
|
println!("avx2: {:?}", is_target_feature_detected!("avx2"));
|
||||||
|
println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
|
||||||
|
println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
|
||||||
|
println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
|
||||||
|
println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
|
||||||
|
println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
|
||||||
|
println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
|
||||||
|
println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
|
||||||
|
println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
|
||||||
|
println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
|
||||||
|
println!(
|
||||||
|
"avx512_vpopcntdq {:?}",
|
||||||
|
is_target_feature_detected!("avx512vpopcntdq")
|
||||||
|
);
|
||||||
|
println!("fma: {:?}", is_target_feature_detected!("fma"));
|
||||||
|
println!("abm: {:?}", is_target_feature_detected!("abm"));
|
||||||
|
println!("bmi: {:?}", is_target_feature_detected!("bmi"));
|
||||||
|
println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
|
||||||
|
println!("tbm: {:?}", is_target_feature_detected!("tbm"));
|
||||||
|
println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
|
||||||
|
println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
|
||||||
|
println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
|
||||||
|
println!("xsave: {:?}", is_target_feature_detected!("xsave"));
|
||||||
|
println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
|
||||||
|
println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
|
||||||
|
println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user