Reorganize and refactor source tree (#324)

With RFC 2325 looking close to being accepted, I took a crack at reorganizing this repository to being more amenable for inclusion in libstd/libcore. My current plan is to add stdsimd as a submodule in rust-lang/rust and then use `#[path]` to include the modules directly into libstd/libcore. Before this commit, however, the source code of coresimd/stdsimd themselves were not quite ready for this. Imports wouldn't compile for one reason or another, and the organization was also different than the RFC itself! In addition to moving a lot of files around, this commit has the following major changes: * The `cfg_feature_enabled!` macro is now renamed to `is_target_feature_detected!` * The `vendor` module is now called `arch`. * Under the `arch` module is a suite of modules like `x86`, `x86_64`, etc. One per `cfg!(target_arch)`. * The `is_target_feature_detected!` macro was removed from coresimd. Unfortunately libcore has no ability to export unstable macros, so for now all feature detection is canonicalized in stdsimd. The `coresimd` and `stdsimd` crates have been updated to the planned organization in RFC 2325 as well. The runtime bits saw the largest amount of refactoring, seeing a good deal of simplification without the core/std split.
2018-02-18 10:07:35 +09:00
parent d097221faf
commit 39b5ec91ae
123 changed files with 1852 additions and 2051 deletions
--- a/library/stdarch/.travis.yml
+++ b/library/stdarch/.travis.yml
@@ -24,7 +24,7 @@ matrix:
    - env: DOCUMENTATION
      install: true
      script: ci/dox.sh
-    - script: cargo test --manifest-path stdsimd-verify/Cargo.toml
+    - script: cargo test --manifest-path crates/stdsimd-verify/Cargo.toml
      install: true
    - env: RUSTFMT=On TARGET=x86_64-unknown-linux-gnu NO_ADD=1
      script: |
--- a/library/stdarch/Cargo.toml
+++ b/library/stdarch/Cargo.toml
@@ -1,33 +1,8 @@
-[package]
-name = "stdsimd"
-version = "0.0.3"
-authors = ["Andrew Gallant <jamslam@gmail.com>"]
-description = "SIMD support in Rust's standard library."
-documentation = "https://docs.rs/stdsimd"
-homepage = "https://github.com/rust-lang-nursery/stdsimd"
-repository = "https://github.com/rust-lang-nursery/stdsimd"
-readme = "README.md"
-keywords = ["std", "simd", "intrinsics"]
-categories = ["hardware-support"]
-license = "MIT/Apache-2.0"
-
 [workspace]
-members = ["stdsimd-verify"]
-
-[badges]
-travis-ci = { repository = "rust-lang-nursery/stdsimd" }
-appveyor = { repository = "rust-lang-nursery/stdsimd"  }
-is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
-is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
-maintenance = { status = "experimental" }
-
-[dependencies]
-coresimd = { version = "0.0.3", path = "coresimd/" }
-
-[dev-dependencies]
-auxv = "0.3.3"
-quickcheck = "0.6"
-rand = "0.4"
+members = [
+  "crates/stdsimd-verify",
+  "crates/stdsimd",
+]

 [profile.release]
 debug = true
@@ -36,10 +11,3 @@ opt-level = 3
 [profile.bench]
 debug = 1
 opt-level = 3
-
-[features]
-# Internal-usage only: denies all warnings.
-strict = [ "coresimd/strict" ]
-# Internal-usage only: enables only those intrinsics supported by Intel's
-# Software Development Environment (SDE).
-intel_sde = [ "coresimd/intel_sde" ]
--- a/library/stdarch/ci/dox.sh
+++ b/library/stdarch/ci/dox.sh
@@ -22,16 +22,18 @@ dox() {
  rm -rf target/doc/$arch
  mkdir target/doc/$arch

-  cargo build --target $target
+  cargo build --target $target --manifest-path crates/stdsimd/Cargo.toml

  rustdoc --target $target \
-          -o target/doc/$arch coresimd/src/lib.rs \
+          -o target/doc/$arch crates/coresimd/src/lib.rs \
          --crate-name coresimd \
          --library-path target/$target/debug/deps
  rustdoc --target $target \
-          -o target/doc/$arch src/lib.rs \
+          -o target/doc/$arch crates/stdsimd/src/lib.rs \
          --crate-name stdsimd \
-          --library-path target/$target/debug/deps
+          --library-path target/$target/debug/deps \
+          --extern cfg_if=`ls target/$target/debug/deps/libcfg_if-*.rlib` \
+          --extern libc=`ls target/$target/debug/deps/liblibc-*.rlib`
 }

 dox i686 i686-unknown-linux-gnu
--- a/library/stdarch/ci/run.sh
+++ b/library/stdarch/ci/run.sh
@@ -23,7 +23,7 @@ echo "OBJDUMP=${OBJDUMP}"

 cargo_test() {
    cmd="cargo test --target=$TARGET --features $FEATURES $1"
-    cmd="$cmd -p coresimd -p stdsimd"
+    cmd="$cmd -p coresimd -p stdsimd --manifest-path crates/stdsimd/Cargo.toml"
    cmd="$cmd -- $2"
    $cmd
 }
--- a/library/stdarch/coresimd/LICENSE-APACHE
+++ b/library/stdarch/coresimd/LICENSE-APACHE
@@ -1 +0,0 @@
-../LICENSE-APACHE
--- a/library/stdarch/coresimd/LICENSE-MIT
+++ b/library/stdarch/coresimd/LICENSE-MIT
@@ -1 +0,0 @@
-../LICENSE-MIT
--- a/library/stdarch/coresimd/README.md
+++ b/library/stdarch/coresimd/README.md
@@ -1 +0,0 @@
-../README.md
--- a/library/stdarch/coresimd/src/aarch64/mod.rs
+++ b/library/stdarch/coresimd/src/aarch64/mod.rs
--- a/library/stdarch/coresimd/src/aarch64/neon.rs
+++ b/library/stdarch/coresimd/src/aarch64/neon.rs
@@ -4,8 +4,8 @@

 #[cfg(test)]
 use stdsimd_test::assert_instr;
-use simd_llvm::simd_add;
-use v128::f64x2;
+use coresimd::simd_llvm::simd_add;
+use coresimd::v128::f64x2;

 /// Vector add.
 #[inline]
@@ -41,8 +41,8 @@ pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {

 #[cfg(test)]
 mod tests {
-    use super::f64x2;
-    use aarch64::neon;
+    use simd::f64x2;
+    use coresimd::aarch64::neon;
    use stdsimd_test::simd_test;

    #[simd_test = "neon"]
--- a/library/stdarch/coresimd/src/aarch64/v8.rs
+++ b/library/stdarch/coresimd/src/aarch64/v8.rs
@@ -57,7 +57,7 @@ pub unsafe fn _cls_u64(x: u64) -> u64 {

 #[cfg(test)]
 mod tests {
-    use aarch64::v8;
+    use coresimd::aarch64::v8;

    #[test]
    fn _rev_u64() {
--- a/library/stdarch/coresimd/src/arm/mod.rs
+++ b/library/stdarch/coresimd/src/arm/mod.rs
--- a/library/stdarch/coresimd/src/arm/neon.rs
+++ b/library/stdarch/coresimd/src/arm/neon.rs
@@ -3,10 +3,9 @@
 #[cfg(test)]
 use stdsimd_test::assert_instr;

-use simd_llvm::simd_add;
-
-use v64::{f32x2, i16x4, i32x2, i8x8, u16x4, u32x2, u8x8};
-use v128::{f32x4, i16x8, i32x4, i64x2, i8x16, u16x8, u32x4, u64x2, u8x16};
+use coresimd::simd_llvm::simd_add;
+use coresimd::v64::*;
+use coresimd::v128::*;

 /// Vector add.
 #[inline]
@@ -216,7 +215,7 @@ pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
 mod tests {
    use stdsimd_test::simd_test;
    use simd::*;
-    use arm::neon;
+    use coresimd::arm::neon;

    #[simd_test = "neon"]
    unsafe fn vadd_s8() {
--- a/library/stdarch/coresimd/src/arm/v6.rs
+++ b/library/stdarch/coresimd/src/arm/v6.rs
@@ -25,7 +25,7 @@ pub unsafe fn _rev_u32(x: u32) -> u32 {

 #[cfg(test)]
 mod tests {
-    use arm::v6;
+    use coresimd::arm::v6;

    #[test]
    fn _rev_u16() {
--- a/library/stdarch/coresimd/src/arm/v7.rs
+++ b/library/stdarch/coresimd/src/arm/v7.rs
@@ -50,7 +50,7 @@ extern "C" {

 #[cfg(test)]
 mod tests {
-    use arm::v7;
+    use coresimd::arm::v7;

    #[test]
    fn _clz_u8() {
--- a/library/stdarch/coresimd/src/macros.rs
+++ b/library/stdarch/coresimd/src/macros.rs
@@ -82,8 +82,8 @@ macro_rules! define_impl {
                slice: &mut [$elemty],
                offset: usize,
            ) {
-                use core::mem::size_of;
-                use core::ptr;
+                use mem::size_of;
+                use ptr;

                ptr::copy_nonoverlapping(
                    &self as *const $name as *const u8,
@@ -102,8 +102,8 @@ macro_rules! define_impl {
                slice: &[$elemty],
                offset: usize,
            ) -> $name {
-                use core::mem::size_of;
-                use core::ptr;
+                use mem::size_of;
+                use ptr;

                let mut x = $name::splat(0 as $elemty);
                ptr::copy_nonoverlapping(
@@ -152,7 +152,7 @@ macro_rules! define_from {
            impl From<$from> for $to {
                #[inline(always)]
                fn from(f: $from) -> $to {
-                    unsafe { ::core::mem::transmute(f) }
+                    unsafe { ::mem::transmute(f) }
                }
            }
        )+
@@ -162,7 +162,7 @@ macro_rules! define_from {
 macro_rules! define_common_ops {
    ($($ty:ident),+) => {
        $(
-            impl ::core::ops::Add for $ty {
+            impl ::ops::Add for $ty {
                type Output = Self;
                #[inline(always)]
                fn add(self, other: Self) -> Self {
@@ -170,7 +170,7 @@ macro_rules! define_common_ops {
                }
            }

-            impl ::core::ops::Sub for $ty {
+            impl ::ops::Sub for $ty {
                type Output = Self;
                #[inline(always)]
                fn sub(self, other: Self) -> Self {
@@ -178,7 +178,7 @@ macro_rules! define_common_ops {
                }
            }

-            impl ::core::ops::Mul for $ty {
+            impl ::ops::Mul for $ty {
                type Output = Self;
                #[inline(always)]
                fn mul(self, other: Self) -> Self {
@@ -186,7 +186,7 @@ macro_rules! define_common_ops {
                }
            }

-            impl ::core::ops::Div for $ty {
+            impl ::ops::Div for $ty {
                type Output = Self;
                #[inline(always)]
                fn div(self, other: Self) -> Self {
@@ -194,7 +194,7 @@ macro_rules! define_common_ops {
                }
            }

-            impl ::core::ops::Rem for $ty {
+            impl ::ops::Rem for $ty {
                type Output = Self;
                #[inline(always)]
                fn rem(self, other: Self) -> Self {
@@ -202,35 +202,35 @@ macro_rules! define_common_ops {
                }
            }

-            impl ::core::ops::AddAssign for $ty {
+            impl ::ops::AddAssign for $ty {
                #[inline(always)]
                fn add_assign(&mut self, other: Self) {
                    *self = *self + other;
                }
            }

-            impl ::core::ops::SubAssign for $ty {
+            impl ::ops::SubAssign for $ty {
                #[inline(always)]
                fn sub_assign(&mut self, other: Self) {
                    *self = *self - other;
                }
            }

-            impl ::core::ops::MulAssign for $ty {
+            impl ::ops::MulAssign for $ty {
                #[inline(always)]
                fn mul_assign(&mut self, other: Self) {
                    *self = *self * other;
                }
            }

-            impl ::core::ops::DivAssign for $ty {
+            impl ::ops::DivAssign for $ty {
                #[inline(always)]
                fn div_assign(&mut self, other: Self) {
                    *self = *self / other;
                }
            }

-            impl ::core::ops::RemAssign for $ty {
+            impl ::ops::RemAssign for $ty {
                #[inline(always)]
                fn rem_assign(&mut self, other: Self) {
                    *self = *self % other;
@@ -244,14 +244,14 @@ macro_rules! define_common_ops {
 macro_rules! define_shifts {
    ($ty:ident, $elem:ident, $($by:ident),+) => {
        $(
-            impl ::core::ops::Shl<$by> for $ty {
+            impl ::ops::Shl<$by> for $ty {
                type Output = Self;
                #[inline(always)]
                fn shl(self, other: $by) -> Self {
                    unsafe { simd_shl(self, $ty::splat(other as $elem)) }
                }
            }
-            impl ::core::ops::Shr<$by> for $ty {
+            impl ::ops::Shr<$by> for $ty {
                type Output = Self;
                #[inline(always)]
                fn shr(self, other: $by) -> Self {
@@ -259,13 +259,13 @@ macro_rules! define_shifts {
                }
            }

-            impl ::core::ops::ShlAssign<$by> for $ty {
+            impl ::ops::ShlAssign<$by> for $ty {
                #[inline(always)]
                fn shl_assign(&mut self, other: $by) {
                    *self = *self << other;
                }
            }
-            impl ::core::ops::ShrAssign<$by> for $ty {
+            impl ::ops::ShrAssign<$by> for $ty {
                #[inline(always)]
                fn shr_assign(&mut self, other: $by) {
                    *self = *self >> other;
@@ -279,7 +279,7 @@ macro_rules! define_shifts {
 macro_rules! define_float_ops {
    ($($ty:ident),+) => {
        $(
-            impl ::core::ops::Neg for $ty {
+            impl ::ops::Neg for $ty {
                type Output = Self;
                #[inline(always)]
                fn neg(self) -> Self {
@@ -293,7 +293,7 @@ macro_rules! define_float_ops {
 macro_rules! define_signed_integer_ops {
    ($($ty:ident),+) => {
        $(
-            impl ::core::ops::Neg for $ty {
+            impl ::ops::Neg for $ty {
                type Output = Self;
                #[inline(always)]
                fn neg(self) -> Self {
@@ -307,7 +307,7 @@ macro_rules! define_signed_integer_ops {
 macro_rules! define_integer_ops {
    ($(($ty:ident, $elem:ident)),+) => {
        $(
-            impl ::core::ops::Not for $ty {
+            impl ::ops::Not for $ty {
                type Output = Self;
                #[inline(always)]
                fn not(self) -> Self {
@@ -315,40 +315,40 @@ macro_rules! define_integer_ops {
                }
            }

-            impl ::core::ops::BitAnd for $ty {
+            impl ::ops::BitAnd for $ty {
                type Output = Self;
                #[inline(always)]
                fn bitand(self, other: Self) -> Self {
                    unsafe { simd_and(self, other) }
                }
            }
-            impl ::core::ops::BitOr for $ty {
+            impl ::ops::BitOr for $ty {
                type Output = Self;
                #[inline(always)]
                fn bitor(self, other: Self) -> Self {
                    unsafe { simd_or(self, other) }
                }
            }
-            impl ::core::ops::BitXor for $ty {
+            impl ::ops::BitXor for $ty {
                type Output = Self;
                #[inline(always)]
                fn bitxor(self, other: Self) -> Self {
                    unsafe { simd_xor(self, other) }
                }
            }
-            impl ::core::ops::BitAndAssign for $ty {
+            impl ::ops::BitAndAssign for $ty {
                #[inline(always)]
                fn bitand_assign(&mut self, other: Self) {
                    *self = *self & other;
                }
            }
-            impl ::core::ops::BitOrAssign for $ty {
+            impl ::ops::BitOrAssign for $ty {
                #[inline(always)]
                fn bitor_assign(&mut self, other: Self) {
                    *self = *self | other;
                }
            }
-            impl ::core::ops::BitXorAssign for $ty {
+            impl ::ops::BitXorAssign for $ty {
                #[inline(always)]
                fn bitxor_assign(&mut self, other: Self) {
                    *self = *self ^ other;
@@ -360,12 +360,12 @@ macro_rules! define_integer_ops {
                u8, u16, u32, u64, usize,
                i8, i16, i32, i64, isize);

-            impl ::core::fmt::LowerHex for $ty {
-                fn fmt(&self, f: &mut ::core::fmt::Formatter)
-                       -> ::core::fmt::Result {
+            impl ::fmt::LowerHex for $ty {
+                fn fmt(&self, f: &mut ::fmt::Formatter)
+                       -> ::fmt::Result {
                    write!(f, "{}(", stringify!($ty))?;
-                    let n = ::core::mem::size_of_val(self)
-                        / ::core::mem::size_of::<$elem>();
+                    let n = ::mem::size_of_val(self)
+                        / ::mem::size_of::<$elem>();
                    for i in 0..n {
                        if i > 0 {
                            write!(f, ", ")?;
@@ -384,7 +384,7 @@ macro_rules! define_casts {
        $(
            impl $fromty {
                #[inline(always)]
-                pub fn $cast(self) -> ::simd::$toty {
+                pub fn $cast(self) -> ::coresimd::simd::$toty {
                    unsafe { simd_cast(self) }
                }
            }
--- a/library/stdarch/coresimd/mod.rs
+++ b/library/stdarch/coresimd/mod.rs
@@ -0,0 +1,80 @@
+/// Platform independent SIMD vector types and operations.
+pub mod simd {
+    pub use coresimd::v128::*;
+    pub use coresimd::v256::*;
+    pub use coresimd::v512::*;
+    pub use coresimd::v64::*;
+}
+
+/// Platform dependent vendor intrinsics.
+pub mod vendor {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    pub use coresimd::x86::*;
+
+    #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+    pub use coresimd::arm::*;
+
+    #[cfg(target_arch = "aarch64")]
+    pub use coresimd::aarch64::*;
+
+    // FIXME: rust does not expose the nvptx and nvptx64 targets yet
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
+                  target_arch = "arm", target_arch = "aarch64")))]
+    pub use coresimd::nvptx::*;
+}
+
+#[macro_use]
+mod macros;
+mod simd_llvm;
+mod v128;
+mod v256;
+mod v512;
+mod v64;
+
+/// 32-bit wide vector tpyes
+mod v32 {
+    #[cfg(not(test))]
+    use prelude::v1::*;
+    use coresimd::simd_llvm::*;
+
+    define_ty! { i16x2, i16, i16 }
+    define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
+    define_ty! { u16x2, u16, u16 }
+    define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
+
+    define_ty! { i8x4, i8, i8, i8, i8 }
+    define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
+    define_ty! { u8x4, u8, u8, u8, u8 }
+    define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
+
+    define_casts!(
+        (i16x2, i64x2, as_i64x2),
+        (u16x2, i64x2, as_i64x2),
+        (i8x4, i32x4, as_i32x4),
+        (u8x4, i32x4, as_i32x4)
+    );
+}
+
+/// 16-bit wide vector tpyes
+mod v16 {
+    #[cfg(not(test))]
+    use prelude::v1::*;
+    use coresimd::simd_llvm::*;
+
+    define_ty! { i8x2, i8, i8 }
+    define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
+    define_ty! { u8x2, u8, u8 }
+    define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
+
+    define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod x86;
+
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+mod arm;
+#[cfg(target_arch = "aarch64")]
+mod aarch64;
+
+mod nvptx;
--- a/library/stdarch/coresimd/src/nvptx/mod.rs
+++ b/library/stdarch/coresimd/src/nvptx/mod.rs
--- a/library/stdarch/coresimd/rustfmt.toml
+++ b/library/stdarch/coresimd/rustfmt.toml
@@ -1 +0,0 @@
-../rustfmt.toml
--- a/library/stdarch/coresimd/src/simd_llvm.rs
+++ b/library/stdarch/coresimd/src/simd_llvm.rs
--- a/library/stdarch/coresimd/src/lib.rs
+++ b/library/stdarch/coresimd/src/lib.rs
@@ -1,139 +0,0 @@
-//! SIMD and vendor intrinsics support library.
-//!
-//! This documentation is only for one particular architecture, you can find
-//! others at:
-//!
-//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
-//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
-//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
-//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
-
-#![cfg_attr(feature = "strict", deny(warnings))]
-#![allow(dead_code)]
-#![allow(unused_features)]
-#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
-           simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
-           integer_atomics, stmt_expr_attributes, core_intrinsics,
-           crate_in_paths, attr_literals, rustc_attrs)]
-#![cfg_attr(test, feature(proc_macro, test, abi_vectorcall))]
-#![cfg_attr(feature = "cargo-clippy",
-            allow(inline_always, too_many_arguments, cast_sign_loss,
-                  cast_lossless, cast_possible_wrap,
-                  cast_possible_truncation, cast_precision_loss,
-                  shadow_reuse, cyclomatic_complexity, similar_names,
-                  many_single_char_names))]
-#![no_std]
-
-#[cfg(test)]
-#[macro_use]
-extern crate std;
-
-#[cfg(test)]
-extern crate stdsimd_test;
-
-#[cfg(test)]
-extern crate test;
-
-/// Platform independent SIMD vector types and operations.
-pub mod simd {
-    pub use v128::*;
-    pub use v256::*;
-    pub use v512::*;
-    pub use v64::*;
-}
-
-/// Platform dependent vendor intrinsics.
-pub mod vendor {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    pub use x86::*;
-
-    #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
-    pub use arm::*;
-
-    #[cfg(target_arch = "aarch64")]
-    pub use aarch64::*;
-
-    // FIXME: rust does not expose the nvptx and nvptx64 targets yet
-    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
-                  target_arch = "arm", target_arch = "aarch64")))]
-    pub use nvptx::*;
-}
-
-/// Run-time feature detection.
-#[doc(hidden)]
-pub mod __vendor_runtime {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64",
-              all(target_os = "linux",
-                  any(target_arch = "arm", target_arch = "aarch64",
-                      target_arch = "powerpc64"))))]
-    pub use runtime::core::*;
-
-    // Re-exports `coresimd` run-time building blocks for usage in the
-    // `stdsimd` run-time.
-    #[cfg(all(target_os = "linux",
-              any(target_arch = "arm", target_arch = "aarch64",
-                  target_arch = "powerpc64")))]
-    #[doc(hidden)]
-    pub mod __runtime {
-        pub use runtime::*;
-    }
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64",
-          all(target_os = "linux",
-              any(target_arch = "arm", target_arch = "aarch64",
-                  target_arch = "powerpc64"))))]
-#[macro_use]
-mod runtime;
-
-#[macro_use]
-mod macros;
-mod simd_llvm;
-mod v128;
-mod v256;
-mod v512;
-mod v64;
-
-/// 32-bit wide vector tpyes
-mod v32 {
-    use simd_llvm::*;
-
-    define_ty! { i16x2, i16, i16 }
-    define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
-    define_ty! { u16x2, u16, u16 }
-    define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
-
-    define_ty! { i8x4, i8, i8, i8, i8 }
-    define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
-    define_ty! { u8x4, u8, u8, u8, u8 }
-    define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
-
-    define_casts!(
-        (i16x2, i64x2, as_i64x2),
-        (u16x2, i64x2, as_i64x2),
-        (i8x4, i32x4, as_i32x4),
-        (u8x4, i32x4, as_i32x4)
-    );
-}
-
-/// 16-bit wide vector tpyes
-mod v16 {
-    use simd_llvm::*;
-
-    define_ty! { i8x2, i8, i8 }
-    define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
-    define_ty! { u8x2, u8, u8 }
-    define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
-
-    define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2));
-}
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod x86;
-
-#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
-mod arm;
-#[cfg(target_arch = "aarch64")]
-mod aarch64;
-
-mod nvptx;
--- a/library/stdarch/coresimd/src/runtime/aarch64.rs
+++ b/library/stdarch/coresimd/src/runtime/aarch64.rs
@@ -1,47 +0,0 @@
-//! Run-time feature detection on ARM Aarch64.
-use runtime::cache;
-use runtime::arch::HasFeature;
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ("neon", $unstable_detect_feature:path) => {
-        // FIXME: this should be removed once we rename Aarch64 neon to asimd
-        $unstable_detect_feature($crate::__vendor_runtime::_Feature::asimd{})
-    };
-    ("asimd", $unstable_detect_feature:path) => {
-        $unstable_detect_feature($crate::__vendor_runtime::__Feature::asimd{})
-    };
-    ("pmull", $unstable_detect_feature:path) => {
-        $unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
-    };
-    ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
-}
-
-/// ARM Aarch64 CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
-    /// ARM Advanced SIMD (ASIMD) - Aarch64
-    asimd,
-    /// Polynomial Multiply
-    pmull,
-}
-
-pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
-    let mut value = cache::Initializer::default();
-    {
-        let mut enable_feature = |f| {
-            if x.has_feature(&f) {
-                value.set(f as u32);
-            }
-        };
-        enable_feature(__Feature::asimd);
-        enable_feature(__Feature::pmull);
-    }
-    value
-}
--- a/library/stdarch/coresimd/src/runtime/arm.rs
+++ b/library/stdarch/coresimd/src/runtime/arm.rs
@@ -1,43 +0,0 @@
-//! Run-time feature detection on ARM Aarch32.
-use runtime::cache;
-use runtime::arch::HasFeature;
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ("neon", $unstable_detect_feature:path) => {
-        $unstable_detect_feature($crate::__vendor_runtime::__Feature::neon{})
-    };
-    ("pmull", $unstable_detect_feature:path) => {
-        $unstable_detect_feature($crate::__vendor_runtime::__Feature::pmull{})
-    };
-    ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
-}
-
-/// ARM CPU Feature enum. Each variant denotes a position in a bitset for a
-/// particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
-    /// ARM Advanced SIMD (NEON) - Aarch32
-    neon,
-    /// Polynomial Multiply
-    pmull,
-}
-
-pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
-    let mut value = cache::Initializer::default();
-    {
-        let mut enable_feature = |f| {
-            if x.has_feature(&f) {
-                value.set(f as u32);
-            }
-        };
-        enable_feature(__Feature::neon);
-        enable_feature(__Feature::pmull);
-    }
-    value
-}
--- a/library/stdarch/coresimd/src/runtime/linux/aarch64.rs
+++ b/library/stdarch/coresimd/src/runtime/linux/aarch64.rs
@@ -1,20 +0,0 @@
-//! Run-time feature detection for Aarch64 on Linux and `core`.
-
-use runtime::bit;
-use runtime::linux::auxv::AuxVec;
-use runtime::arch::{HasFeature, __Feature};
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
-///
-/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
-impl HasFeature for AuxVec {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        match *x {
-            asimd => bit::test(self.hwcap, 1),
-            pmull => bit::test(self.hwcap, 4),
-        }
-    }
-}
--- a/library/stdarch/coresimd/src/runtime/linux/arm.rs
+++ b/library/stdarch/coresimd/src/runtime/linux/arm.rs
@@ -1,20 +0,0 @@
-//! Run-time feature detection for ARM32 on Linux and `core`.
-
-use runtime::bit;
-use runtime::linux::auxv::AuxVec;
-use runtime::arch::{HasFeature, __Feature};
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
-///
-/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
-impl HasFeature for AuxVec {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        match *x {
-            neon => bit::test(self.hwcap, 12),
-            pmull => bit::test(self.hwcap2, 1),
-        }
-    }
-}
--- a/library/stdarch/coresimd/src/runtime/linux/auxv.rs
+++ b/library/stdarch/coresimd/src/runtime/linux/auxv.rs
@@ -1,46 +0,0 @@
-//! ELF Auxiliary Vector
-//!
-//! The auxiliary vector is a memory region in a running ELF program's stack
-//! composed of (key: usize, value: usize) pairs.
-//!
-//! The keys used in the aux vector are platform dependent. For Linux, they are
-//! defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given
-//! CPU can be queried with the  `AT_HWCAP` and `AT_HWCAP2` keys.
-//!
-//! There is no perfect way of reading the auxiliary vector.
-//!
-//! - `coresimd`: if `getauxval` is available, `coresimd` will try to use it.
-//! - `stdsimd`: if `getauxval` is not available, it will try to read
-//! `/proc/self/auxv`, and if that fails it will try to read `/proc/cpuinfo`.
-//!
-//! For more information about when `getauxval` is available check the great
-//! [`auxv` crate documentation][auxv_docs].
-//!
-//! [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
-//! [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/
-
-/// Key to access the CPU Hardware capabilities bitfield.
-pub const AT_HWCAP: usize = 16;
-/// Key to access the CPU Hardware capabilities 2 bitfield.
-pub const AT_HWCAP2: usize = 26;
-
-/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
-///
-/// If an entry cannot be read all the bits in the bitfield
-/// are set to zero.
-#[cfg(any(target_arch = "arm", target_arch = "powerpc64"))]
-#[derive(Debug, Copy, Clone)]
-pub struct AuxVec {
-    pub hwcap: usize,
-    pub hwcap2: usize,
-}
-
-/// Cache HWCAP bitfields of the ELF Auxiliary Vector.
-///
-/// If an entry cannot be read all the bits in the bitfield
-/// are set to zero.
-#[cfg(target_arch = "aarch64")]
-#[derive(Debug, Copy, Clone)]
-pub struct AuxVec {
-    pub hwcap: usize,
-}
--- a/library/stdarch/coresimd/src/runtime/linux/mod.rs
+++ b/library/stdarch/coresimd/src/runtime/linux/mod.rs
@@ -1,12 +0,0 @@
-//! Run-time feature detection for ARM and PowerPC64 on Linux.
-
-#[cfg(target_arch = "arm")]
-mod arm;
-
-#[cfg(target_arch = "aarch64")]
-mod aarch64;
-
-#[cfg(target_arch = "powerpc64")]
-mod powerpc64;
-
-pub mod auxv;
--- a/library/stdarch/coresimd/src/runtime/linux/powerpc64.rs
+++ b/library/stdarch/coresimd/src/runtime/linux/powerpc64.rs
@@ -1,22 +0,0 @@
-//! Run-time feature detection for PowerPC64 on Linux and `core`.
-
-use runtime::linux::auxv::AuxVec;
-use runtime::arch::{HasFeature, __Feature};
-
-/// Probe the ELF Auxiliary vector for hardware capabilities
-///
-/// The values are part of the platform-specific [asm/cputable.h][cputable]
-///
-/// [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h
-impl HasFeature for AuxVec {
-    fn has_feature(&mut self, x: &__Feature) -> bool {
-        use self::__Feature::*;
-        // note: the PowerPC values are the mask to do the test (instead of the
-        // index of the bit to test like in ARM and Aarch64)
-        match *x {
-            altivec => self.hwcap & 0x10000000 != 0,
-            vsx => self.hwcap & 0x00000080 != 0,
-            power8 => self.hwcap2 & 0x80000000 != 0,
-        }
-    }
-}
--- a/library/stdarch/coresimd/src/runtime/macros.rs
+++ b/library/stdarch/coresimd/src/runtime/macros.rs
@@ -1,30 +0,0 @@
-//! Run-time feature detection macros.
-
-/// Is a feature supported by the host CPU?
-///
-/// This macro performs run-time feature detection in `coresimd`. It returns
-/// true if the host CPU in which the binary is running on supports a
-/// particular feature.
-#[macro_export]
-macro_rules! cfg_feature_enabled {
-    ($name:tt) => (
-        {
-            #[cfg(target_feature = $name)]
-            {
-                true
-            }
-            #[cfg(not(target_feature = $name))]
-            {
-                #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-                {
-                    __unstable_detect_feature!($name,
-                                               $crate::__vendor_runtime::__unstable_detect_feature)
-                }
-                #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-                {
-                    compile_error!("cfg_target_feature! is not supported in this architecture")
-                }
-            }
-        }
-    )
-}
--- a/library/stdarch/coresimd/src/runtime/mod.rs
+++ b/library/stdarch/coresimd/src/runtime/mod.rs
@@ -1,64 +0,0 @@
-//! Run-time feature detection
-pub mod cache;
-pub mod bit;
-
-#[macro_use]
-pub mod macros;
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[macro_use]
-pub mod x86;
-
-#[cfg(target_arch = "arm")]
-#[macro_use]
-pub mod arm;
-
-#[cfg(target_arch = "aarch64")]
-#[macro_use]
-pub mod aarch64;
-
-#[cfg(target_arch = "powerpc64")]
-#[macro_use]
-pub mod powerpc64;
-
-#[cfg(all(target_os = "linux",
-          any(target_arch = "arm", target_arch = "aarch64",
-              target_arch = "powerpc64")))]
-pub mod linux;
-
-/// Exports architecture specific functionality for
-/// reuse in `stdsimd`.
-pub mod arch {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    pub use super::x86::{detect_features, __Feature};
-
-    #[cfg(target_arch = "arm")]
-    pub use runtime::arm::{detect_features, __Feature};
-
-    #[cfg(target_arch = "aarch64")]
-    pub use runtime::aarch64::{detect_features, __Feature};
-
-    #[cfg(target_arch = "powerpc64")]
-    pub use runtime::powerpc64::{detect_features, __Feature};
-
-    /// Interface for querying whether a feature is enabled.
-    pub trait HasFeature {
-        /// Is the feature `x` enabled at run-time?
-        fn has_feature(&mut self, x: &__Feature) -> bool;
-    }
-}
-
-/// Run-time feature detection exposed by `coresimd`.
-pub mod core {
-    pub use super::arch::__Feature;
-
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    pub use super::arch::detect_features;
-
-    /// Performs run-time feature detection.
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    #[doc(hidden)]
-    pub fn __unstable_detect_feature(x: __Feature) -> bool {
-        super::cache::test(x as u32, detect_features)
-    }
-}
--- a/library/stdarch/coresimd/src/runtime/powerpc64.rs
+++ b/library/stdarch/coresimd/src/runtime/powerpc64.rs
@@ -1,49 +0,0 @@
-//! Run-time feature detection on PowerPC64.
-use runtime::cache;
-use runtime::arch::HasFeature;
-
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ("altivec", $unstable_detect_feature:path) => {
-        $unstable_detect_feature($crate::__vendor_runtime::__Feature::altivec{})
-    };
-    ("vsx", $unstable_detect_feature:path) => {
-        $unstable_detect_feature($crate::__vendor_runtime::__Feature::vsx{})
-    };
-    ("power8", $unstable_detect_feature:path) => {
-        $unstable_detect_feature($crate::__vendor_runtime::__Feature::power8{})
-    };
-    ($t:tt, $unstable_detect_feature:path) => { compile_error!(concat!("unknown PowerPC target feature: ", $t)) };
-}
-
-/// PowerPC CPU Feature enum. Each variant denotes a position in a bitset
-/// for a particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
-    /// Altivec
-    altivec,
-    /// VSX
-    vsx,
-    /// Power8
-    power8,
-}
-
-pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
-    let mut value = cache::Initializer::default();
-    {
-        let mut enable_feature = |f| {
-            if x.has_feature(&f) {
-                value.set(f as u32);
-            }
-        };
-        enable_feature(__Feature::altivec);
-        enable_feature(__Feature::vsx);
-        enable_feature(__Feature::power8);
-    }
-    value
-}
--- a/library/stdarch/coresimd/src/runtime/x86.rs
+++ b/library/stdarch/coresimd/src/runtime/x86.rs
@@ -1,554 +0,0 @@
-//! This module implements minimal run-time feature detection for x86.
-//!
-//! The features are detected using the `detect_features` function below.
-//! This function uses the CPUID instruction to read the feature flags from the
-//! CPU and encodes them in an `usize` where each bit position represents
-//! whether a feature is available (bit is set) or unavaiable (bit is cleared).
-//!
-//! The enum `__Feature` is used to map bit positions to feature names, and the
-//! the `__unstable_detect_feature!` macro is used to map string literals (e.g.
-//! "avx") to these bit positions (e.g. `__Feature::avx`).
-//!
-//!
-//! The run-time feature detection is performed by the
-//! `__unstable_detect_feature(__Feature) -> bool` function. On its first call,
-//! this functions queries the CPU for the available features and stores them
-//! in a global `AtomicUsize` variable. The query is performed by just checking
-//! whether the feature bit in this global variable is set or cleared.
-
-use core::mem;
-
-use super::{bit, cache};
-
-/// This macro maps the string-literal feature names to values of the
-/// `__Feature` enum at compile-time. The feature names used are the same as
-/// those of rustc `target_feature` and `cfg_target_feature` features.
-///
-/// PLESE: do not use this, it is an implementation detail subjected to change.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[macro_export]
-#[doc(hidden)]
-macro_rules! __unstable_detect_feature {
-    ("aes", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::aes{})  };
-    ("tsc", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::tsc{})  };
-    ("mmx", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::mmx{})  };
-    ("sse", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::sse{})  };
-    ("sse2", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::sse2{})
-    };
-    ("sse3", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::sse3{})
-    };
-    ("ssse3", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::ssse3{})
-    };
-    ("sse4.1", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::sse4_1{})
-    };
-    ("sse4.2", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::sse4_2{})
-    };
-    ("sse4a", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::sse4a{})
-    };
-    ("avx", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx{})
-    };
-    ("avx2", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx2{})
-    };
-    ("avx512f", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512f{})
-    };
-    ("avx512cd", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512cd{})
-    };
-    ("avx512er", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512er{})
-    };
-    ("avx512pf", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512pf{})
-    };
-    ("avx512bw", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512bw{})
-    };
-    ("avx512dq", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512dq{})
-    };
-    ("avx512vl", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512vl{})
-    };
-    ("avx512ifma", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512_ifma{})
-    };
-    ("avx512vbmi", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512_vbmi{})
-    };
-    ("avx512vpopcntdq", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::avx512_vpopcntdq{})
-    };
-    ("fma", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::fma{})
-    };
-    ("bmi", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::bmi{})
-    };
-    ("bmi2", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::bmi2{})
-    };
-    ("abm", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::abm{})
-    };
-    ("lzcnt", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::abm{})
-    };
-    ("tbm", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::tbm{})
-    };
-    ("popcnt", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::popcnt{})
-    };
-    ("fxsr", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::fxsr{})
-    };
-    ("xsave", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::xsave{})
-    };
-    ("xsaveopt", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::xsaveopt{})
-    };
-    ("xsaves", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::xsaves{})
-    };
-    ("xsavec", $unstable_detect_feature:path) => {
-        $unstable_detect_feature(
-            $crate::__vendor_runtime::__Feature::xsavec{})
-    };
-    ($t:tt, $unstable_detect_feature:path) => {
-        compile_error!(concat!("unknown target feature: ", $t))
-    };
-}
-
-/// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
-/// particular feature.
-///
-/// PLEASE: do not use this, it is an implementation detail subject to change.
-#[doc(hidden)]
-#[allow(non_camel_case_types)]
-#[repr(u8)]
-pub enum __Feature {
-    /// AES (Advanced Encryption Standard New Instructions AES-NI)
-    aes,
-    /// TSC (Time Stamp Counter)
-    tsc,
-    /// MMX
-    mmx,
-    /// SSE (Streaming SIMD Extensions)
-    sse,
-    /// SSE2 (Streaming SIMD Extensions 2)
-    sse2,
-    /// SSE3 (Streaming SIMD Extensions 3)
-    sse3,
-    /// SSSE3 (Supplemental Streaming SIMD Extensions 3)
-    ssse3,
-    /// SSE4.1 (Streaming SIMD Extensions 4.1)
-    sse4_1,
-    /// SSE4.2 (Streaming SIMD Extensions 4.2)
-    sse4_2,
-    /// SSE4a (Streaming SIMD Extensions 4a)
-    sse4a,
-    /// AVX (Advanced Vector Extensions)
-    avx,
-    /// AVX2 (Advanced Vector Extensions 2)
-    avx2,
-    /// AVX-512 F (Foundation)
-    avx512f,
-    /// AVX-512 CD (Conflict Detection Instructions)
-    avx512cd,
-    /// AVX-512 ER (Exponential and Reciprocal Instructions)
-    avx512er,
-    /// AVX-512 PF (Prefetch Instructions)
-    avx512pf,
-    /// AVX-512 BW (Byte and Word Instructions)
-    avx512bw,
-    /// AVX-512 DQ (Doubleword and Quadword)
-    avx512dq,
-    /// AVX-512 VL (Vector Length Extensions)
-    avx512vl,
-    /// AVX-512 IFMA (Integer Fused Multiply Add)
-    avx512_ifma,
-    /// AVX-512 VBMI (Vector Byte Manipulation Instructions)
-    avx512_vbmi,
-    /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
-    /// Quadword)
-    avx512_vpopcntdq,
-    /// FMA (Fused Multiply Add)
-    fma,
-    /// BMI1 (Bit Manipulation Instructions 1)
-    bmi,
-    /// BMI1 (Bit Manipulation Instructions 2)
-    bmi2,
-    /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
-    /// Count) on Intel
-    abm,
-    /// TBM (Trailing Bit Manipulation)
-    tbm,
-    /// POPCNT (Population Count)
-    popcnt,
-    /// FXSR (Floating-point context fast save and restor)
-    fxsr,
-    /// XSAVE (Save Processor Extended States)
-    xsave,
-    /// XSAVEOPT (Save Processor Extended States Optimized)
-    xsaveopt,
-    /// XSAVES (Save Processor Extended States Supervisor)
-    xsaves,
-    /// XSAVEC (Save Processor Extended States Compacted)
-    xsavec,
-    #[doc(hidden)]
-    __NonExhaustive,
-}
-
-/// Run-time feature detection on x86 works by using the CPUID instruction.
-///
-/// The [CPUID Wikipedia page][wiki_cpuid] contains
-/// all the information about which flags to set to query which values, and in
-/// which registers these are reported.
-///
-/// The definitive references are:
-/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
-///   Instruction Set Reference, A-Z][intel64_ref].
-/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
-///   System Instructions][amd64_ref].
-///
-/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
-/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
-pub fn detect_features() -> cache::Initializer {
-    use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
-    use vendor::_xgetbv;
-    let mut value = cache::Initializer::default();
-
-    // If the x86 CPU does not support the CPUID instruction then it is too
-    // old to support any of the currently-detectable features.
-    if !has_cpuid() {
-        return value;
-    }
-
-    // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU
-    // has `cpuid` support.
-
-    // 0. EAX = 0: Basic Information:
-    // - EAX returns the "Highest Function Parameter", that is, the maximum
-    // leaf value for subsequent calls of `cpuinfo` in range [0,
-    // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars,
-    // returned in EBX, EDX, and   ECX (in that order):
-    let (max_basic_leaf, vendor_id) = unsafe {
-        let CpuidResult {
-            eax: max_basic_leaf,
-            ebx,
-            ecx,
-            edx,
-        } = __cpuid(0);
-        let vendor_id: [[u8; 4]; 3] = [
-            mem::transmute(ebx),
-            mem::transmute(edx),
-            mem::transmute(ecx),
-        ];
-        let vendor_id: [u8; 12] = mem::transmute(vendor_id);
-        (max_basic_leaf, vendor_id)
-    };
-
-    if max_basic_leaf < 1 {
-        // Earlier Intel 486, CPUID not implemented
-        return value;
-    }
-
-    // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits";
-    // Contains information about most x86 features.
-    let CpuidResult {
-        ecx: proc_info_ecx,
-        edx: proc_info_edx,
-        ..
-    } = unsafe { __cpuid(0x0000_0001_u32) };
-
-    // EAX = 7, ECX = 0: Queries "Extended Features";
-    // Contains information about bmi,bmi2, and avx2 support.
-    let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7
-    {
-        let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
-        (ebx, ecx)
-    } else {
-        (0, 0) // CPUID does not support "Extended Features"
-    };
-
-    // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
-    // - EAX returns the max leaf value for extended information, that is,
-    // `cpuid` calls in range [0x8000_0000; u32::MAX]:
-    let CpuidResult {
-        eax: extended_max_basic_leaf,
-        ..
-    } = unsafe { __cpuid(0x8000_0000_u32) };
-
-    // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature
-    // Bits"
-    let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 {
-        let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) };
-        ecx
-    } else {
-        0
-    };
-
-    {
-        // borrows value till the end of this scope:
-        let mut enable = |r, rb, f| {
-            if bit::test(r as usize, rb) {
-                value.set(f as u32);
-            }
-        };
-
-        enable(proc_info_ecx, 0, __Feature::sse3);
-        enable(proc_info_ecx, 9, __Feature::ssse3);
-        enable(proc_info_ecx, 12, __Feature::fma);
-        enable(proc_info_ecx, 19, __Feature::sse4_1);
-        enable(proc_info_ecx, 20, __Feature::sse4_2);
-        enable(proc_info_ecx, 23, __Feature::popcnt);
-        enable(proc_info_ecx, 25, __Feature::aes);
-        enable(proc_info_edx, 4, __Feature::tsc);
-        enable(proc_info_edx, 23, __Feature::mmx);
-        enable(proc_info_edx, 24, __Feature::fxsr);
-        enable(proc_info_edx, 25, __Feature::sse);
-        enable(proc_info_edx, 26, __Feature::sse2);
-
-        enable(extended_features_ebx, 3, __Feature::bmi);
-        enable(extended_features_ebx, 8, __Feature::bmi2);
-
-        // `XSAVE` and `AVX` support:
-        let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
-        if cpu_xsave {
-            // 0. Here the CPU supports `XSAVE`.
-
-            // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and
-            // supports saving the state of the AVX/AVX2 vector registers on
-            // context-switches, see:
-            //
-            // - [intel: is avx enabled?][is_avx_enabled],
-            // - [mozilla: sse.cpp][mozilla_sse_cpp].
-            //
-            // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
-            // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
-            let cpu_osxsave = bit::test(proc_info_ecx as usize, 27);
-
-            // 2. The OS must have signaled the CPU that it supports saving and
-            // restoring the SSE and AVX registers by setting `XCR0.SSE[1]` and
-            // `XCR0.AVX[2]` to `1`.
-            //
-            // This is safe because the CPU supports `xsave`
-            let xcr0 = unsafe { _xgetbv(0) };
-            let os_avx_support = xcr0 & 6 == 6;
-            let os_avx512_support = xcr0 & 224 == 224;
-
-            // Only if the OS and the CPU support saving/restoring the AVX
-            // registers we enable `xsave` support:
-            if cpu_osxsave && os_avx_support {
-                // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED
-                // FEATURES" in the "Intel® 64 and IA-32 Architectures Software
-                // Developer’s Manual, Volume 1: Basic Architecture":
-                //
-                // "Software enables the XSAVE feature set by setting
-                // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4
-                // instruction). If this bit is 0, execution of any of XGETBV,
-                // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV
-                // causes an invalid-opcode exception (#UD)"
-                //
-                enable(proc_info_ecx, 26, __Feature::xsave);
-
-                // For `xsaveopt`, `xsavec`, and `xsaves` we need to query:
-                // Processor Extended State Enumeration Sub-leaf (EAX = 0DH,
-                // ECX = 1):
-                if max_basic_leaf >= 0xd {
-                    let CpuidResult {
-                        eax: proc_extended_state1_eax,
-                        ..
-                    } = unsafe { __cpuid_count(0xd_u32, 1) };
-                    enable(proc_extended_state1_eax, 0, __Feature::xsaveopt);
-                    enable(proc_extended_state1_eax, 1, __Feature::xsavec);
-                    enable(proc_extended_state1_eax, 3, __Feature::xsaves);
-                }
-
-                // And AVX/AVX2:
-                enable(proc_info_ecx, 28, __Feature::avx);
-                enable(extended_features_ebx, 5, __Feature::avx2);
-
-                // For AVX-512 the OS also needs to support saving/restoring
-                // the extended state, only then we enable AVX-512 support:
-                if os_avx512_support {
-                    enable(extended_features_ebx, 16, __Feature::avx512f);
-                    enable(extended_features_ebx, 17, __Feature::avx512dq);
-                    enable(extended_features_ebx, 21, __Feature::avx512_ifma);
-                    enable(extended_features_ebx, 26, __Feature::avx512pf);
-                    enable(extended_features_ebx, 27, __Feature::avx512er);
-                    enable(extended_features_ebx, 28, __Feature::avx512cd);
-                    enable(extended_features_ebx, 30, __Feature::avx512bw);
-                    enable(extended_features_ebx, 31, __Feature::avx512vl);
-                    enable(extended_features_ecx, 1, __Feature::avx512_vbmi);
-                    enable(
-                        extended_features_ecx,
-                        14,
-                        __Feature::avx512_vpopcntdq,
-                    );
-                }
-            }
-        }
-
-        // This detects ABM on AMD CPUs and LZCNT on Intel CPUs.
-        // On intel CPUs with popcnt, lzcnt implements the
-        // "missing part" of ABM, so we map both to the same
-        // internal feature.
-        //
-        // The `cfg_feature_enabled!("lzcnt")` macro then
-        // internally maps to __Feature::abm.
-        enable(extended_proc_info_ecx, 5, __Feature::abm);
-        if vendor_id == *b"AuthenticAMD" {
-            // These features are only available on AMD CPUs:
-            enable(extended_proc_info_ecx, 6, __Feature::sse4a);
-            enable(extended_proc_info_ecx, 21, __Feature::tbm);
-        }
-    }
-
-    value
-}
-
-#[cfg(test)]
-mod tests {
-    extern crate cupid;
-
-    #[test]
-    fn dump() {
-        println!("aes: {:?}", cfg_feature_enabled!("aes"));
-        println!("tsc: {:?}", cfg_feature_enabled!("tsc"));
-        println!("sse: {:?}", cfg_feature_enabled!("sse"));
-        println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
-        println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
-        println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
-        println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
-        println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
-        println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
-        println!("avx: {:?}", cfg_feature_enabled!("avx"));
-        println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
-        println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
-        println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
-        println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
-        println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
-        println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
-        println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
-        println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
-        println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
-        println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
-        println!(
-            "avx512_vpopcntdq {:?}",
-            cfg_feature_enabled!("avx512vpopcntdq")
-        );
-        println!("fma: {:?}", cfg_feature_enabled!("fma"));
-        println!("abm: {:?}", cfg_feature_enabled!("abm"));
-        println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
-        println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
-        println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
-        println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
-        println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
-        println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
-        println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
-        println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
-        println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
-        println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
-    }
-
-    #[test]
-    fn compare_with_cupid() {
-        let information = cupid::master().unwrap();
-        assert_eq!(cfg_feature_enabled!("aes"), information.aesni());
-        assert_eq!(cfg_feature_enabled!("tsc"), information.tsc());
-        assert_eq!(cfg_feature_enabled!("sse"), information.sse());
-        assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
-        assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());
-        assert_eq!(cfg_feature_enabled!("ssse3"), information.ssse3());
-        assert_eq!(cfg_feature_enabled!("sse4.1"), information.sse4_1());
-        assert_eq!(cfg_feature_enabled!("sse4.2"), information.sse4_2());
-        assert_eq!(cfg_feature_enabled!("sse4a"), information.sse4a());
-        assert_eq!(cfg_feature_enabled!("avx"), information.avx());
-        assert_eq!(cfg_feature_enabled!("avx2"), information.avx2());
-        assert_eq!(cfg_feature_enabled!("avx512f"), information.avx512f());
-        assert_eq!(cfg_feature_enabled!("avx512cd"), information.avx512cd());
-        assert_eq!(cfg_feature_enabled!("avx512er"), information.avx512er());
-        assert_eq!(cfg_feature_enabled!("avx512pf"), information.avx512pf());
-        assert_eq!(cfg_feature_enabled!("avx512bw"), information.avx512bw());
-        assert_eq!(cfg_feature_enabled!("avx512dq"), information.avx512dq());
-        assert_eq!(cfg_feature_enabled!("avx512vl"), information.avx512vl());
-        assert_eq!(
-            cfg_feature_enabled!("avx512ifma"),
-            information.avx512_ifma()
-        );
-        assert_eq!(
-            cfg_feature_enabled!("avx512vbmi"),
-            information.avx512_vbmi()
-        );
-        assert_eq!(
-            cfg_feature_enabled!("avx512vpopcntdq"),
-            information.avx512_vpopcntdq()
-        );
-        assert_eq!(cfg_feature_enabled!("fma"), information.fma());
-        assert_eq!(cfg_feature_enabled!("bmi"), information.bmi1());
-        assert_eq!(cfg_feature_enabled!("bmi2"), information.bmi2());
-        assert_eq!(cfg_feature_enabled!("popcnt"), information.popcnt());
-        assert_eq!(cfg_feature_enabled!("abm"), information.lzcnt());
-        assert_eq!(cfg_feature_enabled!("tbm"), information.tbm());
-        assert_eq!(cfg_feature_enabled!("lzcnt"), information.lzcnt());
-        assert_eq!(cfg_feature_enabled!("xsave"), information.xsave());
-        assert_eq!(cfg_feature_enabled!("xsaveopt"), information.xsaveopt());
-        assert_eq!(
-            cfg_feature_enabled!("xsavec"),
-            information.xsavec_and_xrstor()
-        );
-        assert_eq!(
-            cfg_feature_enabled!("xsaves"),
-            information.xsaves_xrstors_and_ia32_xss()
-        );
-    }
-}
--- a/library/stdarch/coresimd/tests/cpu-detection.rs
+++ b/library/stdarch/coresimd/tests/cpu-detection.rs
@@ -1,47 +0,0 @@
-#![feature(cfg_target_feature)]
-#![cfg_attr(feature = "strict", deny(warnings))]
-#![cfg_attr(feature = "cargo-clippy",
-            allow(option_unwrap_used, print_stdout, use_debug))]
-
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[macro_use]
-extern crate coresimd;
-
-#[test]
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-fn x86_all() {
-    println!("sse: {:?}", cfg_feature_enabled!("sse"));
-    println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
-    println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
-    println!("ssse3: {:?}", cfg_feature_enabled!("ssse3"));
-    println!("sse4.1: {:?}", cfg_feature_enabled!("sse4.1"));
-    println!("sse4.2: {:?}", cfg_feature_enabled!("sse4.2"));
-    println!("sse4a: {:?}", cfg_feature_enabled!("sse4a"));
-    println!("avx: {:?}", cfg_feature_enabled!("avx"));
-    println!("avx2: {:?}", cfg_feature_enabled!("avx2"));
-    println!("avx512f {:?}", cfg_feature_enabled!("avx512f"));
-    println!("avx512cd {:?}", cfg_feature_enabled!("avx512cd"));
-    println!("avx512er {:?}", cfg_feature_enabled!("avx512er"));
-    println!("avx512pf {:?}", cfg_feature_enabled!("avx512pf"));
-    println!("avx512bw {:?}", cfg_feature_enabled!("avx512bw"));
-    println!("avx512dq {:?}", cfg_feature_enabled!("avx512dq"));
-    println!("avx512vl {:?}", cfg_feature_enabled!("avx512vl"));
-    println!("avx512_ifma {:?}", cfg_feature_enabled!("avx512ifma"));
-    println!("avx512_vbmi {:?}", cfg_feature_enabled!("avx512vbmi"));
-    println!(
-        "avx512_vpopcntdq {:?}",
-        cfg_feature_enabled!("avx512vpopcntdq")
-    );
-    println!("fma: {:?}", cfg_feature_enabled!("fma"));
-    println!("abm: {:?}", cfg_feature_enabled!("abm"));
-    println!("bmi: {:?}", cfg_feature_enabled!("bmi"));
-    println!("bmi2: {:?}", cfg_feature_enabled!("bmi2"));
-    println!("tbm: {:?}", cfg_feature_enabled!("tbm"));
-    println!("popcnt: {:?}", cfg_feature_enabled!("popcnt"));
-    println!("lzcnt: {:?}", cfg_feature_enabled!("lzcnt"));
-    println!("fxsr: {:?}", cfg_feature_enabled!("fxsr"));
-    println!("xsave: {:?}", cfg_feature_enabled!("xsave"));
-    println!("xsaveopt: {:?}", cfg_feature_enabled!("xsaveopt"));
-    println!("xsaves: {:?}", cfg_feature_enabled!("xsaves"));
-    println!("xsavec: {:?}", cfg_feature_enabled!("xsavec"));
-}
--- a/library/stdarch/coresimd/src/v128.rs
+++ b/library/stdarch/coresimd/src/v128.rs
@@ -1,6 +1,8 @@
 //! 128-bit wide vector types

-use simd_llvm::*;
+use prelude::v1::*;
+
+use coresimd::simd_llvm::*;

 define_ty! { f64x2, f64, f64 }
 define_impl! { f64x2, f64, 2, i64x2, x0, x1 }
--- a/library/stdarch/coresimd/src/v256.rs
+++ b/library/stdarch/coresimd/src/v256.rs
@@ -1,6 +1,8 @@
 //! 256-bit wide vector types

-use simd_llvm::*;
+use prelude::v1::*;
+
+use coresimd::simd_llvm::*;

 define_ty! { f64x4, f64, f64, f64, f64 }
 define_impl! { f64x4, f64, 4, i64x4, x0, x1, x2, x3 }
--- a/library/stdarch/coresimd/src/v512.rs
+++ b/library/stdarch/coresimd/src/v512.rs
@@ -1,6 +1,8 @@
 //! 512-bit wide vector types

-use simd_llvm::*;
+use prelude::v1::*;
+
+use coresimd::simd_llvm::*;

 define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }
 define_impl! { f64x8, f64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }
--- a/library/stdarch/coresimd/src/v64.rs
+++ b/library/stdarch/coresimd/src/v64.rs
@@ -1,6 +1,8 @@
 //! 64-bit wide vector types

-use simd_llvm::*;
+use prelude::v1::*;
+
+use coresimd::simd_llvm::*;

 define_ty_doc! {
    f32x2, f32, f32 |
--- a/library/stdarch/coresimd/src/x86/i386/bswap.rs
+++ b/library/stdarch/coresimd/src/x86/i386/bswap.rs
--- a/library/stdarch/coresimd/src/x86/i386/eflags.rs
+++ b/library/stdarch/coresimd/src/x86/i386/eflags.rs
@@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) {

 #[cfg(test)]
 mod tests {
-    use x86::i386::*;
+    use coresimd::x86::i386::*;

    #[test]
    fn test_eflags() {
--- a/library/stdarch/coresimd/src/x86/i386/fxsr.rs
+++ b/library/stdarch/coresimd/src/x86/i386/fxsr.rs
@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor(mem_addr: *const u8) {

 #[cfg(test)]
 mod tests {
-    use x86::i386::fxsr;
+    use coresimd::x86::i386::fxsr;
    use stdsimd_test::simd_test;
    use std::fmt;

--- a/library/stdarch/coresimd/src/x86/i386/mod.rs
+++ b/library/stdarch/coresimd/src/x86/i386/mod.rs
--- a/library/stdarch/coresimd/src/x86/i386/rdtsc.rs
+++ b/library/stdarch/coresimd/src/x86/i386/rdtsc.rs
@@ -54,7 +54,7 @@ extern "C" {
 #[cfg(test)]
 mod tests {
    use stdsimd_test::simd_test;
-    use x86::i386::rdtsc;
+    use coresimd::x86::i386::rdtsc;

    #[simd_test = "sse2"]
    unsafe fn _rdtsc() {
--- a/library/stdarch/coresimd/src/x86/i586/abm.rs
+++ b/library/stdarch/coresimd/src/x86/i586/abm.rs
@@ -42,7 +42,7 @@ pub unsafe fn _popcnt32(x: i32) -> i32 {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::i586::abm;
+    use coresimd::x86::i586::abm;

    #[simd_test = "lzcnt"]
    unsafe fn _lzcnt_u32() {
--- a/library/stdarch/coresimd/src/x86/i586/avx.rs
+++ b/library/stdarch/coresimd/src/x86/i586/avx.rs
@@ -13,17 +13,17 @@
 //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
 //! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions

-use core::mem;
-use core::ptr;
+use coresimd::simd_llvm::*;
+use coresimd::v128::*;
+use coresimd::v256::*;
+use coresimd::x86::*;
+use intrinsics;
+use mem;
+use ptr;

 #[cfg(test)]
 use stdsimd_test::assert_instr;

-use simd_llvm::*;
-use v128::*;
-use v256::*;
-use x86::*;
-
 /// Add packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`.
 #[inline]
@@ -1547,7 +1547,7 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
 pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
-    ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
+    intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
 }

 /// Moves double-precision values from a 256-bit vector of [4 x double]
@@ -1557,7 +1557,7 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *const __m256i, a: __m256i) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
 pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
-    ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
+    intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
 }

 /// Moves single-precision floating point values from a 256-bit vector
@@ -1568,7 +1568,7 @@ pub unsafe fn _mm256_stream_pd(mem_addr: *const f64, a: __m256d) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovntps))]
 pub unsafe fn _mm256_stream_ps(mem_addr: *const f32, a: __m256) {
-    ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
+    intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
 }

 /// Compute the approximate reciprocal of packed single-precision (32-bit)
@@ -2366,7 +2366,6 @@ pub unsafe fn _mm256_loadu2_m128d(
 pub unsafe fn _mm256_loadu2_m128i(
    hiaddr: *const __m128i, loaddr: *const __m128i
 ) -> __m256i {
-    use x86::i586::sse2::_mm_loadu_si128;
    let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
    _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
 }
@@ -2412,7 +2411,6 @@ pub unsafe fn _mm256_storeu2_m128d(
 pub unsafe fn _mm256_storeu2_m128i(
    hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
 ) {
-    use x86::i586::sse2::_mm_storeu_si128;
    let lo = _mm256_castsi256_si128(a);
    _mm_storeu_si128(loaddr, lo);
    let hi = _mm256_extractf128_si256(a, 1);
@@ -2579,7 +2577,7 @@ mod tests {
    use stdsimd_test::simd_test;
    use test::black_box; // Used to inhibit constant-folding.

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "avx"]
    unsafe fn test_mm256_add_pd() {
--- a/library/stdarch/coresimd/src/x86/i586/avx2.rs
+++ b/library/stdarch/coresimd/src/x86/i586/avx2.rs
@@ -18,14 +18,13 @@
 //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate

-use core::mem;
-
-use simd_llvm::*;
-use v256::*;
-use v128::*;
-use v64::*;
-use v32::*;
-use x86::*;
+use coresimd::simd_llvm::*;
+use coresimd::v256::*;
+use coresimd::v128::*;
+use coresimd::v64::*;
+use coresimd::v32::*;
+use coresimd::x86::*;
+use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -1912,7 +1911,6 @@ pub unsafe fn _mm256_permute2x128_si256(
 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
-    use x86::i586::avx::_mm256_undefined_pd;
    let imm8 = (imm8 & 0xFF) as u8;
    let undef = _mm256_undefined_pd();
    macro_rules! shuffle_done {
@@ -2024,10 +2022,13 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
 ///
@@ -2627,10 +2628,13 @@ pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
 /// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2673,10 +2677,13 @@ pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
 /// 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2718,10 +2725,13 @@ pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 /// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
@@ -2759,10 +2769,13 @@ pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 /// let b = _mm256_setr_epi16(0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15);
@@ -2800,10 +2813,13 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
 /// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
@@ -2840,10 +2856,13 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
 /// let b = _mm256_setr_epi32(0,-1,-2,-3,-4,-5,-6,-7);
@@ -2877,10 +2896,13 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
 /// let b = _mm256_setr_epi64x(0,-1,-2,-3);
@@ -2913,10 +2935,13 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("avx2") {
+/// #     if is_target_feature_detected!("avx2") {
 /// #         #[target_feature(enable = "avx2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
 /// let b = _mm256_setr_epi64x(0,-1,-2,-3);
@@ -3253,10 +3278,10 @@ extern "C" {
 #[cfg(test)]
 mod tests {
    use stdsimd_test::simd_test;
-
-    use x86::*;
    use std;

+    use coresimd::x86::*;
+
    #[simd_test = "avx2"]
    unsafe fn test_mm256_abs_epi32() {
        #[cfg_attr(rustfmt, rustfmt_skip)]
--- a/library/stdarch/coresimd/src/x86/i586/bmi.rs
+++ b/library/stdarch/coresimd/src/x86/i586/bmi.rs
@@ -96,7 +96,7 @@ extern "C" {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::i586::bmi;
+    use coresimd::x86::i586::bmi;

    #[simd_test = "bmi"]
    unsafe fn _bextr_u32() {
--- a/library/stdarch/coresimd/src/x86/i586/bmi2.rs
+++ b/library/stdarch/coresimd/src/x86/i586/bmi2.rs
@@ -67,7 +67,7 @@ extern "C" {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::i586::bmi2;
+    use coresimd::x86::i586::bmi2;

    #[simd_test = "bmi2"]
    unsafe fn _pext_u32() {
--- a/library/stdarch/coresimd/src/x86/i586/cpuid.rs
+++ b/library/stdarch/coresimd/src/x86/i586/cpuid.rs
@@ -2,6 +2,8 @@

 #![cfg_attr(feature = "cargo-clippy", allow(stutter))]

+use mem;
+
 #[cfg(test)]
 use stdsimd_test::assert_instr;

@@ -45,7 +47,7 @@ pub struct CpuidResult {
 #[inline]
 #[cfg_attr(test, assert_instr(cpuid))]
 pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
-    let mut r = ::core::mem::uninitialized::<CpuidResult>();
+    let mut r = mem::uninitialized::<CpuidResult>();
    if cfg!(target_arch = "x86") {
        asm!("cpuid"
             : "={eax}"(r.eax), "={ebx}"(r.ebx), "={ecx}"(r.ecx), "={edx}"(r.edx)
@@ -77,7 +79,7 @@ pub fn has_cpuid() -> bool {
    }
    #[cfg(target_arch = "x86")]
    {
-        use x86::i386::{__readeflags, __writeeflags};
+        use coresimd::x86::i386::{__readeflags, __writeeflags};

        // On `x86` the `cpuid` instruction is not always available.
        // This follows the approach indicated in:
@@ -119,7 +121,7 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {

 #[cfg(test)]
 mod tests {
-    use x86::i586::cpuid;
+    use coresimd::x86::i586::cpuid;

    #[test]
    fn test_always_has_cpuid() {
@@ -131,7 +133,7 @@ mod tests {
    #[cfg(target_arch = "x86")]
    #[test]
    fn test_has_cpuid() {
-        use x86::i386::__readeflags;
+        use coresimd::x86::i386::__readeflags;
        unsafe {
            let before = __readeflags();

--- a/library/stdarch/coresimd/src/x86/i586/mod.rs
+++ b/library/stdarch/coresimd/src/x86/i586/mod.rs
--- a/library/stdarch/coresimd/src/x86/i586/sse.rs
+++ b/library/stdarch/coresimd/src/x86/i586/sse.rs
@@ -1,12 +1,12 @@
 //! Streaming SIMD Extensions (SSE)

-use core::mem;
-use core::ptr;
-
-use simd_llvm::*;
-use v128::*;
-use v64::*;
-use x86::*;
+use coresimd::simd_llvm::*;
+use coresimd::v128::*;
+use coresimd::v64::*;
+use coresimd::x86::*;
+use intrinsics;
+use mem;
+use ptr;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -873,12 +873,15 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
 /// #
 /// # // The real main function
 /// # fn main() {
-/// #     if cfg_feature_enabled!("sse") {
+/// #     if is_target_feature_detected!("sse") {
 /// #         #[target_feature(enable = "sse")]
 /// #         unsafe fn worker() {
 /// #
-/// #   use stdsimd::vendor::*;
-/// #
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
+///
 /// unsafe {
 ///     let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
 ///     let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
@@ -924,12 +927,15 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
 /// #
 /// # // The real main function
 /// # fn main() {
-/// #     if cfg_feature_enabled!("sse") {
+/// #     if is_target_feature_detected!("sse") {
 /// #         #[target_feature(enable = "sse")]
 /// #         unsafe fn worker() {
 /// #
-/// #   use stdsimd::vendor::*;
-/// #
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
+///
 /// unsafe {
 ///     let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
 ///     let data: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
@@ -1684,7 +1690,7 @@ extern "C" {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movntps))]
 pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
-    ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
+    intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
 }

 /// Store 64-bits of integer data from a into memory using a non-temporal
@@ -1701,12 +1707,13 @@ mod tests {
    use std::mem::transmute;
    use std::f32::NAN;

-    use v128::*;
-    use v64::*;
-    use x86::*;
    use stdsimd_test::simd_test;
    use test::black_box; // Used to inhibit constant-folding.

+    use coresimd::v128::*;
+    use coresimd::v64::*;
+    use coresimd::x86::*;
+
    #[simd_test = "sse"]
    unsafe fn test_mm_add_ps() {
        let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
--- a/library/stdarch/coresimd/src/x86/i586/sse2.rs
+++ b/library/stdarch/coresimd/src/x86/i586/sse2.rs
@@ -3,13 +3,13 @@
 #[cfg(test)]
 use stdsimd_test::assert_instr;

-use core::mem;
-use core::ptr;
-
-use simd_llvm::*;
-use v128::*;
-use v64::*;
-use x86::*;
+use coresimd::simd_llvm::*;
+use coresimd::v128::*;
+use coresimd::v64::*;
+use coresimd::x86::*;
+use intrinsics;
+use mem;
+use ptr;

 /// Provide a hint to the processor that the code sequence is a spin-wait loop.
 ///
@@ -952,7 +952,7 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
 pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
-    ::core::intrinsics::nontemporal_store(mem_addr, a);
+    ::intrinsics::nontemporal_store(mem_addr, a);
 }

 /// Stores a 32-bit integer value in the specified memory location.
@@ -962,7 +962,7 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movnti))]
 pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
-    ::core::intrinsics::nontemporal_store(mem_addr, a);
+    ::intrinsics::nontemporal_store(mem_addr, a);
 }

 /// Return a vector where the low element is extracted from `a` and its upper
@@ -1974,7 +1974,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
 pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
-    ::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
+    intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
 }

 /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a
@@ -2382,8 +2382,8 @@ mod tests {

    use stdsimd_test::simd_test;
    use test::black_box; // Used to inhibit constant-folding.
-    use x86::*;
-    use v128::*;
+    use coresimd::x86::*;
+    use coresimd::v128::*;

    #[simd_test = "sse2"]
    unsafe fn test_mm_pause() {
--- a/library/stdarch/coresimd/src/x86/i586/sse3.rs
+++ b/library/stdarch/coresimd/src/x86/i586/sse3.rs
@@ -1,8 +1,8 @@
 //! Streaming SIMD Extensions 3 (SSE3)

-use simd_llvm::{simd_shuffle2, simd_shuffle4};
-use v128::*;
-use x86::*;
+use coresimd::simd_llvm::{simd_shuffle2, simd_shuffle4};
+use coresimd::v128::*;
+use coresimd::x86::*;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -129,7 +129,7 @@ extern "C" {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse3"]
    unsafe fn test_mm_addsub_ps() {
--- a/library/stdarch/coresimd/src/x86/i586/sse41.rs
+++ b/library/stdarch/coresimd/src/x86/i586/sse41.rs
@@ -1,14 +1,16 @@
 //! Streaming SIMD Extensions 4.1 (SSE4.1)

-use core::mem;
+use coresimd::simd_llvm::*;
+use coresimd::v128::*;
+use coresimd::v64::*;
+use coresimd::v32::*;
+use coresimd::v16::*;
+use coresimd::x86::*;
+use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;

-use simd_llvm::*;
-use v128::*;
-use x86::*;
-
 // SSE4 rounding constans
 /// round to nearest
 pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
@@ -301,7 +303,7 @@ pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovsxbw))]
 pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
    let a = a.as_i8x16();
-    let a = simd_shuffle8::<_, ::v64::i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
    mem::transmute(simd_cast::<_, i16x8>(a))
 }

@@ -311,7 +313,7 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovsxbd))]
 pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
    let a = a.as_i8x16();
-    let a = simd_shuffle4::<_, ::v32::i8x4>(a, a, [0, 1, 2, 3]);
+    let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]);
    mem::transmute(simd_cast::<_, i32x4>(a))
 }

@@ -322,7 +324,7 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovsxbq))]
 pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
    let a = a.as_i8x16();
-    let a = simd_shuffle2::<_, ::v16::i8x2>(a, a, [0, 1]);
+    let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]);
    mem::transmute(simd_cast::<_, i64x2>(a))
 }

@@ -332,7 +334,7 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovsxwd))]
 pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
    let a = a.as_i16x8();
-    let a = simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]);
+    let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]);
    mem::transmute(simd_cast::<_, i32x4>(a))
 }

@@ -342,7 +344,7 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovsxwq))]
 pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
    let a = a.as_i16x8();
-    let a = simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]);
+    let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]);
    mem::transmute(simd_cast::<_, i64x2>(a))
 }

@@ -352,7 +354,7 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovsxdq))]
 pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
    let a = a.as_i32x4();
-    let a = simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]);
+    let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]);
    mem::transmute(simd_cast::<_, i64x2>(a))
 }

@@ -362,7 +364,7 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovzxbw))]
 pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
    let a = a.as_u8x16();
-    let a = simd_shuffle8::<_, ::v64::u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
    mem::transmute(simd_cast::<_, i16x8>(a))
 }

@@ -372,7 +374,7 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovzxbd))]
 pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
    let a = a.as_u8x16();
-    let a = simd_shuffle4::<_, ::v32::u8x4>(a, a, [0, 1, 2, 3]);
+    let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]);
    mem::transmute(simd_cast::<_, i32x4>(a))
 }

@@ -382,7 +384,7 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovzxbq))]
 pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
    let a = a.as_u8x16();
-    let a = simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]);
+    let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]);
    mem::transmute(simd_cast::<_, i64x2>(a))
 }

@@ -393,7 +395,7 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovzxwd))]
 pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
    let a = a.as_u16x8();
-    let a = simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]);
+    let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]);
    mem::transmute(simd_cast::<_, i32x4>(a))
 }

@@ -404,7 +406,7 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovzxwq))]
 pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
    let a = a.as_u16x8();
-    let a = simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]);
+    let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]);
    mem::transmute(simd_cast::<_, i64x2>(a))
 }

@@ -415,7 +417,7 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovzxdq))]
 pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
    let a = a.as_u32x4();
-    let a = simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]);
+    let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]);
    mem::transmute(simd_cast::<_, i64x2>(a))
 }

@@ -549,18 +551,25 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
 /// ```
-/// use coresimd::vendor;
+/// extern crate stdsimd;
 ///
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
+///
+/// # fn main() {
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
-/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
-/// vendor::_MM_FROUND_CUR_DIRECTION;
+/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
+/// _MM_FROUND_CUR_DIRECTION;
+/// # }
 /// ```
 #[inline]
 #[target_feature(enable = "sse4.1")]
@@ -579,18 +588,25 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
 /// ```
-/// use coresimd::vendor;
+/// extern crate stdsimd;
 ///
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
+///
+/// # fn main() {
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
-/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
-/// vendor::_MM_FROUND_CUR_DIRECTION;
+/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
+/// _MM_FROUND_CUR_DIRECTION;
+/// # }
 /// ```
 #[inline]
 #[target_feature(enable = "sse4.1")]
@@ -611,18 +627,25 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
 /// ```
-/// use coresimd::vendor;
+/// extern crate stdsimd;
 ///
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
+///
+/// # fn main() {
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
-/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
-/// vendor::_MM_FROUND_CUR_DIRECTION;
+/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
+/// _MM_FROUND_CUR_DIRECTION;
+/// # }
 /// ```
 #[inline]
 #[target_feature(enable = "sse4.1")]
@@ -643,18 +666,25 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
 /// Rounding is done according to the rounding parameter, which can be one of:
 ///
 /// ```
-/// use coresimd::vendor;
+/// extern crate stdsimd;
 ///
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
+///
+/// # fn main() {
 /// // round to nearest, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEAREST_INT | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 /// // round down, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_NEG_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
 /// // round up, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_POS_INF | vendor::_MM_FROUND_NO_EXC);
+/// (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
 /// // truncate, and suppress exceptions:
-/// (vendor::_MM_FROUND_TO_ZERO | vendor::_MM_FROUND_NO_EXC);
-/// // use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`:
-/// vendor::_MM_FROUND_CUR_DIRECTION;
+/// (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
+/// _MM_FROUND_CUR_DIRECTION;
+/// # }
 /// ```
 #[inline]
 #[target_feature(enable = "sse4.1")]
@@ -817,7 +847,7 @@ extern "C" {
 mod tests {
    use std::mem;
    use stdsimd_test::simd_test;
-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse4.1"]
    unsafe fn test_mm_blendv_epi8() {
--- a/library/stdarch/coresimd/src/x86/i586/sse42.rs
+++ b/library/stdarch/coresimd/src/x86/i586/sse42.rs
@@ -5,8 +5,8 @@
 #[cfg(test)]
 use stdsimd_test::assert_instr;

-use v128::*;
-use x86::*;
+use coresimd::v128::*;
+use coresimd::x86::*;

 /// String contains unsigned 8-bit characters *(Default)*
 pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
@@ -102,11 +102,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("sse4.2") {
+/// #     if is_target_feature_detected!("sse4.2") {
 /// #         #[target_feature(enable = "sse4.2")]
 /// #         unsafe fn worker() {
 ///
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// let haystack = b"This is a long string of text data\r\n\tthat extends
 /// multiple lines";
@@ -142,10 +145,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("sse4.2") {
+/// #     if is_target_feature_detected!("sse4.2") {
 /// #         #[target_feature(enable = "sse4.2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// // Ensure your input is 16 byte aligned
 /// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
@@ -180,10 +186,14 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("sse4.2") {
+/// #     if is_target_feature_detected!("sse4.2") {
 /// #         #[target_feature(enable = "sse4.2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
+///
 /// # let b = b":;<=>?@[\\]^_`abc";
 /// # let b = _mm_loadu_si128(b.as_ptr() as *const _);
 ///
@@ -217,10 +227,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("sse4.2") {
+/// #     if is_target_feature_detected!("sse4.2") {
 /// #         #[target_feature(enable = "sse4.2")]
 /// #         unsafe fn worker() {
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// # let mut some_utf16_words = [0u16; 8];
 /// # let mut more_utf16_words = [0u16; 8];
@@ -407,11 +420,14 @@ pub unsafe fn _mm_cmpestrm(
 /// # #[macro_use] extern crate stdsimd;
 /// #
 /// # fn main() {
-/// #     if cfg_feature_enabled!("sse4.2") {
+/// #     if is_target_feature_detected!("sse4.2") {
 /// #         #[target_feature(enable = "sse4.2")]
 /// #         unsafe fn worker() {
 ///
-/// use stdsimd::vendor::*;
+/// #[cfg(target_arch = "x86")]
+/// use stdsimd::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use stdsimd::arch::x86_64::*;
 ///
 /// // The string we want to find a substring in
 /// let haystack = b"Split \r\n\t line  ";
@@ -625,7 +641,7 @@ mod tests {
    use stdsimd_test::simd_test;

    use std::ptr;
-    use x86::*;
+    use coresimd::x86::*;

    // Currently one cannot `load` a &[u8] that is is less than 16
    // in length. This makes loading strings less than 16 in length
--- a/library/stdarch/coresimd/src/x86/i586/ssse3.rs
+++ b/library/stdarch/coresimd/src/x86/i586/ssse3.rs
@@ -1,14 +1,13 @@
 //! Supplemental Streaming SIMD Extensions 3 (SSSE3)

-use core::mem;
+use coresimd::simd_llvm::simd_shuffle16;
+use coresimd::v128::*;
+use coresimd::x86::*;
+use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;

-use simd_llvm::simd_shuffle16;
-use v128::*;
-use x86::*;
-
 /// Compute the absolute value of packed 8-bit signed integers in `a` and
 /// return the unsigned results.
 #[inline]
@@ -292,7 +291,7 @@ extern "C" {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "ssse3"]
    unsafe fn test_mm_abs_epi8() {
--- a/library/stdarch/coresimd/src/x86/i586/tbm.rs
+++ b/library/stdarch/coresimd/src/x86/i586/tbm.rs
@@ -263,7 +263,7 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::i586::tbm;
+    use coresimd::x86::i586::tbm;

    /*
    #[simd_test = "tbm"]
--- a/library/stdarch/coresimd/src/x86/i586/xsave.rs
+++ b/library/stdarch/coresimd/src/x86/i586/xsave.rs
@@ -137,9 +137,11 @@ pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {

 #[cfg(test)]
 mod tests {
-    use x86::i586::xsave;
-    use stdsimd_test::simd_test;
    use std::fmt;
+    use std::prelude::v1::*;
+
+    use coresimd::x86::i586::xsave;
+    use stdsimd_test::simd_test;

    #[repr(align(64))]
    struct XsaveArea {
--- a/library/stdarch/coresimd/src/x86/i686/aes.rs
+++ b/library/stdarch/coresimd/src/x86/i686/aes.rs
@@ -6,7 +6,8 @@
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-use x86::__m128i;
+
+use coresimd::x86::__m128i;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -92,7 +93,7 @@ mod tests {

    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "aes"]
    unsafe fn test_mm_aesdec_si128() {
--- a/library/stdarch/coresimd/src/x86/i686/mmx.rs
+++ b/library/stdarch/coresimd/src/x86/i686/mmx.rs
@@ -8,9 +8,9 @@
 //!
 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf

-use v64::*;
-use x86::*;
-use core::mem;
+use coresimd::v64::*;
+use coresimd::x86::*;
+use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -487,7 +487,7 @@ extern "C" {

 #[cfg(test)]
 mod tests {
-    use x86::*;
+    use coresimd::x86::*;
    use stdsimd_test::simd_test;

    #[simd_test = "mmx"]
--- a/library/stdarch/coresimd/src/x86/i686/mod.rs
+++ b/library/stdarch/coresimd/src/x86/i686/mod.rs
--- a/library/stdarch/coresimd/src/x86/i686/sse.rs
+++ b/library/stdarch/coresimd/src/x86/i686/sse.rs
@@ -1,6 +1,6 @@
 //! `i686` Streaming SIMD Extensions (SSE)

-use x86::*;
+use coresimd::x86::*;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -469,7 +469,7 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {

 #[cfg(test)]
 mod tests {
-    use x86::*;
+    use coresimd::x86::*;
    use stdsimd_test::simd_test;

    #[simd_test = "sse,mmx"]
--- a/library/stdarch/coresimd/src/x86/i686/sse2.rs
+++ b/library/stdarch/coresimd/src/x86/i686/sse2.rs
@@ -1,9 +1,8 @@
 //! `i686`'s Streaming SIMD Extensions 2 (SSE2)

-use core::mem;
-
-use simd_llvm::simd_extract;
-use x86::*;
+use coresimd::simd_llvm::simd_extract;
+use coresimd::x86::*;
+use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -137,7 +136,7 @@ mod tests {

    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse2,mmx"]
    unsafe fn test_mm_add_si64() {
--- a/library/stdarch/coresimd/src/x86/i686/sse41.rs
+++ b/library/stdarch/coresimd/src/x86/i686/sse41.rs
@@ -1,7 +1,7 @@
 //! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)

-use v128::*;
-use x86::*;
+use coresimd::v128::*;
+use coresimd::x86::*;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -138,7 +138,7 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
 #[cfg(test)]
 mod tests {
    use stdsimd_test::simd_test;
-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse4.1"]
    unsafe fn test_mm_testz_si128() {
--- a/library/stdarch/coresimd/src/x86/i686/sse42.rs
+++ b/library/stdarch/coresimd/src/x86/i686/sse42.rs
@@ -1,8 +1,8 @@
 //! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)

-use simd_llvm::*;
-use v128::*;
-use x86::*;
+use coresimd::simd_llvm::*;
+use coresimd::v128::*;
+use coresimd::x86::*;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -18,7 +18,7 @@ pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {

 #[cfg(test)]
 mod tests {
-    use x86::*;
+    use coresimd::x86::*;

    use stdsimd_test::simd_test;

--- a/library/stdarch/coresimd/src/x86/i686/sse4a.rs
+++ b/library/stdarch/coresimd/src/x86/i686/sse4a.rs
@@ -1,8 +1,8 @@
 //! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)

-use core::mem;
-use v128::*;
-use x86::*;
+use coresimd::v128::*;
+use coresimd::x86::*;
+use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -75,7 +75,7 @@ pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
 #[cfg(test)]
 mod tests {
    use stdsimd_test::simd_test;
-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse4a"]
    unsafe fn test_mm_extract_si64() {
--- a/library/stdarch/coresimd/src/x86/i686/ssse3.rs
+++ b/library/stdarch/coresimd/src/x86/i686/ssse3.rs
@@ -3,7 +3,7 @@
 #[cfg(test)]
 use stdsimd_test::assert_instr;

-use x86::*;
+use coresimd::x86::*;

 /// Compute the absolute value of packed 8-bit integers in `a` and
 /// return the unsigned results.
@@ -223,7 +223,7 @@ extern "C" {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "ssse3,mmx"]
    unsafe fn test_mm_abs_pi8() {
--- a/library/stdarch/coresimd/src/x86/macros.rs
+++ b/library/stdarch/coresimd/src/x86/macros.rs
--- a/library/stdarch/coresimd/src/x86/mod.rs
+++ b/library/stdarch/coresimd/src/x86/mod.rs
@@ -1,6 +1,7 @@
 //! `x86` and `x86_64` intrinsics.

-use core::mem;
+use prelude::v1::*;
+use mem;

 #[macro_use]
 mod macros;
@@ -59,13 +60,16 @@ types! {
    /// # fn main() {
    /// # #[target_feature(enable = "mmx")]
    /// # unsafe fn foo() {
-    /// use stdsimd::vendor::*;
+    /// #[cfg(target_arch = "x86")]
+    /// use stdsimd::arch::x86::*;
+    /// #[cfg(target_arch = "x86_64")]
+    /// use stdsimd::arch::x86_64::*;
    ///
    /// let all_bytes_zero = _mm_setzero_si64();
    /// let all_bytes_one = _mm_set1_pi8(1);
    /// let two_i32 = _mm_set_pi32(1, 2);
    /// # }
-    /// # if cfg_feature_enabled!("mmx") { unsafe { foo() } }
+    /// # if is_target_feature_detected!("mmx") { unsafe { foo() } }
    /// # }
    /// ```
    pub struct __m64(i64);
@@ -102,13 +106,16 @@ types! {
    /// # fn main() {
    /// # #[target_feature(enable = "sse2")]
    /// # unsafe fn foo() {
-    /// use stdsimd::vendor::*;
+    /// #[cfg(target_arch = "x86")]
+    /// use stdsimd::arch::x86::*;
+    /// #[cfg(target_arch = "x86_64")]
+    /// use stdsimd::arch::x86_64::*;
    ///
    /// let all_bytes_zero = _mm_setzero_si128();
    /// let all_bytes_one = _mm_set1_epi8(1);
    /// let four_i32 = _mm_set_epi32(1, 2, 3, 4);
    /// # }
-    /// # if cfg_feature_enabled!("sse2") { unsafe { foo() } }
+    /// # if is_target_feature_detected!("sse2") { unsafe { foo() } }
    /// # }
    /// ```
    pub struct __m128i(i64, i64);
@@ -138,13 +145,16 @@ types! {
    /// # fn main() {
    /// # #[target_feature(enable = "sse")]
    /// # unsafe fn foo() {
-    /// use stdsimd::vendor::*;
+    /// #[cfg(target_arch = "x86")]
+    /// use stdsimd::arch::x86::*;
+    /// #[cfg(target_arch = "x86_64")]
+    /// use stdsimd::arch::x86_64::*;
    ///
    /// let four_zeros = _mm_setzero_ps();
    /// let four_ones = _mm_set1_ps(1.0);
    /// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
    /// # }
-    /// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
+    /// # if is_target_feature_detected!("sse") { unsafe { foo() } }
    /// # }
    /// ```
    pub struct __m128(f32, f32, f32, f32);
@@ -174,13 +184,16 @@ types! {
    /// # fn main() {
    /// # #[target_feature(enable = "sse")]
    /// # unsafe fn foo() {
-    /// use stdsimd::vendor::*;
+    /// #[cfg(target_arch = "x86")]
+    /// use stdsimd::arch::x86::*;
+    /// #[cfg(target_arch = "x86_64")]
+    /// use stdsimd::arch::x86_64::*;
    ///
    /// let two_zeros = _mm_setzero_pd();
    /// let two_ones = _mm_set1_pd(1.0);
    /// let two_floats = _mm_set_pd(1.0, 2.0);
    /// # }
-    /// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
+    /// # if is_target_feature_detected!("sse") { unsafe { foo() } }
    /// # }
    /// ```
    pub struct __m128d(f64, f64);
@@ -214,13 +227,16 @@ types! {
    /// # fn main() {
    /// # #[target_feature(enable = "avx")]
    /// # unsafe fn foo() {
-    /// use stdsimd::vendor::*;
+    /// #[cfg(target_arch = "x86")]
+    /// use stdsimd::arch::x86::*;
+    /// #[cfg(target_arch = "x86_64")]
+    /// use stdsimd::arch::x86_64::*;
    ///
    /// let all_bytes_zero = _mm256_setzero_si256();
    /// let all_bytes_one = _mm256_set1_epi8(1);
    /// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
    /// # }
-    /// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
+    /// # if is_target_feature_detected!("avx") { unsafe { foo() } }
    /// # }
    /// ```
    pub struct __m256i(i64, i64, i64, i64);
@@ -250,13 +266,16 @@ types! {
    /// # fn main() {
    /// # #[target_feature(enable = "sse")]
    /// # unsafe fn foo() {
-    /// use stdsimd::vendor::*;
+    /// #[cfg(target_arch = "x86")]
+    /// use stdsimd::arch::x86::*;
+    /// #[cfg(target_arch = "x86_64")]
+    /// use stdsimd::arch::x86_64::*;
    ///
    /// let eight_zeros = _mm256_setzero_ps();
    /// let eight_ones = _mm256_set1_ps(1.0);
    /// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
    /// # }
-    /// # if cfg_feature_enabled!("sse") { unsafe { foo() } }
+    /// # if is_target_feature_detected!("sse") { unsafe { foo() } }
    /// # }
    /// ```
    pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
@@ -286,13 +305,16 @@ types! {
    /// # fn main() {
    /// # #[target_feature(enable = "avx")]
    /// # unsafe fn foo() {
-    /// use stdsimd::vendor::*;
+    /// #[cfg(target_arch = "x86")]
+    /// use stdsimd::arch::x86::*;
+    /// #[cfg(target_arch = "x86_64")]
+    /// use stdsimd::arch::x86_64::*;
    ///
    /// let four_zeros = _mm256_setzero_pd();
    /// let four_ones = _mm256_set1_pd(1.0);
    /// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
    /// # }
-    /// # if cfg_feature_enabled!("avx") { unsafe { foo() } }
+    /// # if is_target_feature_detected!("avx") { unsafe { foo() } }
    /// # }
    /// ```
    pub struct __m256d(f64, f64, f64, f64);
@@ -309,42 +331,42 @@ trait m128iExt: Sized {
    fn as_m128i(self) -> __m128i;

    #[inline]
-    fn as_u8x16(self) -> ::v128::u8x16 {
+    fn as_u8x16(self) -> ::coresimd::v128::u8x16 {
        unsafe { mem::transmute(self.as_m128i()) }
    }

    #[inline]
-    fn as_u16x8(self) -> ::v128::u16x8 {
+    fn as_u16x8(self) -> ::coresimd::v128::u16x8 {
        unsafe { mem::transmute(self.as_m128i()) }
    }

    #[inline]
-    fn as_u32x4(self) -> ::v128::u32x4 {
+    fn as_u32x4(self) -> ::coresimd::v128::u32x4 {
        unsafe { mem::transmute(self.as_m128i()) }
    }

    #[inline]
-    fn as_u64x2(self) -> ::v128::u64x2 {
+    fn as_u64x2(self) -> ::coresimd::v128::u64x2 {
        unsafe { mem::transmute(self.as_m128i()) }
    }

    #[inline]
-    fn as_i8x16(self) -> ::v128::i8x16 {
+    fn as_i8x16(self) -> ::coresimd::v128::i8x16 {
        unsafe { mem::transmute(self.as_m128i()) }
    }

    #[inline]
-    fn as_i16x8(self) -> ::v128::i16x8 {
+    fn as_i16x8(self) -> ::coresimd::v128::i16x8 {
        unsafe { mem::transmute(self.as_m128i()) }
    }

    #[inline]
-    fn as_i32x4(self) -> ::v128::i32x4 {
+    fn as_i32x4(self) -> ::coresimd::v128::i32x4 {
        unsafe { mem::transmute(self.as_m128i()) }
    }

    #[inline]
-    fn as_i64x2(self) -> ::v128::i64x2 {
+    fn as_i64x2(self) -> ::coresimd::v128::i64x2 {
        unsafe { mem::transmute(self.as_m128i()) }
    }
 }
@@ -362,42 +384,42 @@ trait m256iExt: Sized {
    fn as_m256i(self) -> __m256i;

    #[inline]
-    fn as_u8x32(self) -> ::v256::u8x32 {
+    fn as_u8x32(self) -> ::coresimd::v256::u8x32 {
        unsafe { mem::transmute(self.as_m256i()) }
    }

    #[inline]
-    fn as_u16x16(self) -> ::v256::u16x16 {
+    fn as_u16x16(self) -> ::coresimd::v256::u16x16 {
        unsafe { mem::transmute(self.as_m256i()) }
    }

    #[inline]
-    fn as_u32x8(self) -> ::v256::u32x8 {
+    fn as_u32x8(self) -> ::coresimd::v256::u32x8 {
        unsafe { mem::transmute(self.as_m256i()) }
    }

    #[inline]
-    fn as_u64x4(self) -> ::v256::u64x4 {
+    fn as_u64x4(self) -> ::coresimd::v256::u64x4 {
        unsafe { mem::transmute(self.as_m256i()) }
    }

    #[inline]
-    fn as_i8x32(self) -> ::v256::i8x32 {
+    fn as_i8x32(self) -> ::coresimd::v256::i8x32 {
        unsafe { mem::transmute(self.as_m256i()) }
    }

    #[inline]
-    fn as_i16x16(self) -> ::v256::i16x16 {
+    fn as_i16x16(self) -> ::coresimd::v256::i16x16 {
        unsafe { mem::transmute(self.as_m256i()) }
    }

    #[inline]
-    fn as_i32x8(self) -> ::v256::i32x8 {
+    fn as_i32x8(self) -> ::coresimd::v256::i32x8 {
        unsafe { mem::transmute(self.as_m256i()) }
    }

    #[inline]
-    fn as_i64x4(self) -> ::v256::i64x4 {
+    fn as_i64x4(self) -> ::coresimd::v256::i64x4 {
        unsafe { mem::transmute(self.as_m256i()) }
    }
 }
--- a/library/stdarch/coresimd/src/x86/test.rs
+++ b/library/stdarch/coresimd/src/x86/test.rs
@@ -1,6 +1,6 @@
 //! Utilities used in testing the x86 intrinsics

-use x86::*;
+use coresimd::x86::*;

 #[target_feature(enable = "mmx")]
 pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
@@ -107,7 +107,7 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
 // which doesn't exist on x86!
 #[cfg(target_arch = "x86")]
 mod x86_polyfill {
-    use x86::*;
+    use coresimd::x86::*;

    pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i {
        union A {
--- a/library/stdarch/coresimd/src/x86/x86_64/abm.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/abm.rs
@@ -42,7 +42,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "lzcnt"]
    unsafe fn test_lzcnt_u64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/avx.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/avx.rs
@@ -13,10 +13,9 @@
 //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
 //! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions

-use core::mem;
-
-use simd_llvm::*;
-use x86::*;
+use coresimd::simd_llvm::*;
+use coresimd::x86::*;
+use mem;

 /// Copy `a` to result, and insert the 64-bit integer `i` into result
 /// at the location specified by `index`.
@@ -32,7 +31,7 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "avx"]
    unsafe fn test_mm256_insert_epi64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/avx2.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/avx2.rs
@@ -18,8 +18,8 @@
 //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate

-use simd_llvm::*;
-use x86::*;
+use coresimd::simd_llvm::*;
+use coresimd::x86::*;

 /// Extract a 64-bit integer from `a`, selected with `imm8`.
 #[inline]
@@ -35,7 +35,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "avx2"]
    unsafe fn test_mm256_extract_epi64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/bmi.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/bmi.rs
@@ -101,7 +101,7 @@ extern "C" {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "bmi"]
    unsafe fn test_bextr_u64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/bmi2.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/bmi2.rs
@@ -69,7 +69,7 @@ extern "C" {
 mod tests {
    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "bmi2"]
    unsafe fn test_pext_u64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/bswap.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/bswap.rs
--- a/library/stdarch/coresimd/src/x86/x86_64/fxsr.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/fxsr.rs
@@ -51,7 +51,7 @@ pub unsafe fn _fxrstor64(mem_addr: *const u8) {

 #[cfg(test)]
 mod tests {
-    use x86::x86_64::fxsr;
+    use coresimd::x86::x86_64::fxsr;
    use stdsimd_test::simd_test;
    use std::fmt;

--- a/library/stdarch/coresimd/src/x86/x86_64/mod.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/mod.rs
--- a/library/stdarch/coresimd/src/x86/x86_64/sse.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/sse.rs
@@ -1,6 +1,6 @@
 //! `x86_64` Streaming SIMD Extensions (SSE)

-use x86::*;
+use coresimd::x86::*;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -66,7 +66,7 @@ mod tests {

    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse"]
    unsafe fn test_mm_cvtss_si64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/sse2.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/sse2.rs
@@ -1,7 +1,8 @@
 //! `x86_64`'s Streaming SIMD Extensions 2 (SSE2)

-use x86::*;
-use simd_llvm::*;
+use coresimd::x86::*;
+use coresimd::simd_llvm::*;
+use intrinsics;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -55,7 +56,7 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movnti))]
 pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
-    ::core::intrinsics::nontemporal_store(mem_addr, a);
+    intrinsics::nontemporal_store(mem_addr, a);
 }

 /// Return a vector whose lowest element is `a` and all higher elements are
@@ -116,7 +117,7 @@ mod tests {

    use stdsimd_test::simd_test;

-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse2"]
    unsafe fn test_mm_cvtsd_si64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/sse41.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/sse41.rs
@@ -1,9 +1,8 @@
 //! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)

-use core::mem;
-
-use x86::*;
-use simd_llvm::*;
+use coresimd::x86::*;
+use coresimd::simd_llvm::*;
+use mem;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@@ -32,7 +31,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
 #[cfg(test)]
 mod tests {
    use stdsimd_test::simd_test;
-    use x86::*;
+    use coresimd::x86::*;

    #[simd_test = "sse4.1"]
    unsafe fn test_mm_extract_epi64() {
--- a/library/stdarch/coresimd/src/x86/x86_64/sse42.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/sse42.rs
@@ -20,7 +20,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {

 #[cfg(test)]
 mod tests {
-    use x86::*;
+    use coresimd::x86::*;

    use stdsimd_test::simd_test;

--- a/library/stdarch/coresimd/src/x86/x86_64/xsave.rs
+++ b/library/stdarch/coresimd/src/x86/x86_64/xsave.rs
@@ -111,7 +111,7 @@ pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
 /*
 #[cfg(test)]
 mod tests {
-    use x86::x86_64::xsave;
+    use coresimd::x86::x86_64::xsave;
    use stdsimd_test::simd_test;
    use std::fmt;

--- a/library/stdarch/stdsimd-test/assert-instr-macro/Cargo.toml
+++ b/library/stdarch/stdsimd-test/assert-instr-macro/Cargo.toml
--- a/library/stdarch/stdsimd-test/assert-instr-macro/build.rs
+++ b/library/stdarch/stdsimd-test/assert-instr-macro/build.rs
--- a/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs
+++ b/library/stdarch/stdsimd-test/assert-instr-macro/src/lib.rs
--- a/library/stdarch/crates/coresimd/Cargo.toml
+++ b/library/stdarch/crates/coresimd/Cargo.toml
@@ -19,9 +19,8 @@ is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
 maintenance = { status = "experimental" }

 [dev-dependencies]
-cupid = "0.5.0"
 stdsimd-test = { version = "0.*", path = "../stdsimd-test" }
-stdsimd = { version = "0.0.3", path = ".." }
+stdsimd = { version = "0.0.3", path = "../stdsimd" }

 [features]
 # Internal-usage only: denies all warnings.
--- a/library/stdarch/crates/coresimd/src/lib.rs
+++ b/library/stdarch/crates/coresimd/src/lib.rs
@@ -0,0 +1,82 @@
+//! SIMD and vendor intrinsics support library.
+//!
+//! This documentation is only for one particular architecture, you can find
+//! others at:
+//!
+//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/)
+//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/)
+//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/)
+//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/)
+
+#![cfg_attr(feature = "strict", deny(warnings))]
+#![allow(dead_code)]
+#![allow(unused_features)]
+#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
+           simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
+           integer_atomics, stmt_expr_attributes, core_intrinsics,
+           crate_in_paths, no_core, attr_literals, rustc_attrs)]
+#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))]
+#![cfg_attr(feature = "cargo-clippy",
+            allow(inline_always, too_many_arguments, cast_sign_loss,
+                  cast_lossless, cast_possible_wrap,
+                  cast_possible_truncation, cast_precision_loss,
+                  shadow_reuse, cyclomatic_complexity, similar_names,
+                  many_single_char_names))]
+#![cfg_attr(test, allow(unused_imports))]
+#![no_core]
+
+#[cfg_attr(not(test), macro_use)]
+extern crate core as _core;
+#[cfg(test)]
+#[macro_use]
+extern crate std;
+#[cfg(test)]
+extern crate stdsimd_test;
+#[cfg(test)]
+extern crate test;
+#[cfg(test)]
+#[macro_use]
+extern crate stdsimd;
+
+#[path = "../../../coresimd/mod.rs"]
+mod coresimd;
+
+pub use coresimd::simd;
+
+pub mod arch {
+    #[cfg(target_arch = "x86")]
+    pub mod x86 { pub use coresimd::vendor::*; }
+    #[cfg(target_arch = "x86_64")]
+    pub mod x86_64 { pub use coresimd::vendor::*; }
+    #[cfg(target_arch = "arm")]
+    pub mod arm { pub use coresimd::vendor::*; }
+    #[cfg(target_arch = "aarch64")]
+    pub mod aarch64 { pub use coresimd::vendor::*; }
+}
+
+#[allow(unused_imports)]
+use _core::clone;
+#[allow(unused_imports)]
+use _core::cmp;
+#[allow(unused_imports)]
+use _core::convert;
+#[allow(unused_imports)]
+use _core::fmt;
+#[allow(unused_imports)]
+use _core::intrinsics;
+#[allow(unused_imports)]
+use _core::iter;
+#[allow(unused_imports)]
+use _core::marker;
+#[allow(unused_imports)]
+use _core::mem;
+#[allow(unused_imports)]
+use _core::ops;
+#[allow(unused_imports)]
+use _core::option;
+#[allow(unused_imports)]
+use _core::prelude;
+#[allow(unused_imports)]
+use _core::ptr;
+#[allow(unused_imports)]
+use _core::result;
--- a/library/stdarch/crates/coresimd/tests/cpu-detection.rs
+++ b/library/stdarch/crates/coresimd/tests/cpu-detection.rs
@@ -0,0 +1,47 @@
+#![feature(cfg_target_feature)]
+#![cfg_attr(feature = "strict", deny(warnings))]
+#![cfg_attr(feature = "cargo-clippy",
+            allow(option_unwrap_used, print_stdout, use_debug))]
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+#[macro_use]
+extern crate stdsimd;
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn x86_all() {
+    println!("sse: {:?}", is_target_feature_detected!("sse"));
+    println!("sse2: {:?}", is_target_feature_detected!("sse2"));
+    println!("sse3: {:?}", is_target_feature_detected!("sse3"));
+    println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
+    println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
+    println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
+    println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
+    println!("avx: {:?}", is_target_feature_detected!("avx"));
+    println!("avx2: {:?}", is_target_feature_detected!("avx2"));
+    println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
+    println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
+    println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
+    println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
+    println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
+    println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
+    println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
+    println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
+    println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
+    println!(
+        "avx512_vpopcntdq {:?}",
+        is_target_feature_detected!("avx512vpopcntdq")
+    );
+    println!("fma: {:?}", is_target_feature_detected!("fma"));
+    println!("abm: {:?}", is_target_feature_detected!("abm"));
+    println!("bmi: {:?}", is_target_feature_detected!("bmi"));
+    println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
+    println!("tbm: {:?}", is_target_feature_detected!("tbm"));
+    println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
+    println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
+    println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
+    println!("xsave: {:?}", is_target_feature_detected!("xsave"));
+    println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
+    println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
+    println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
+}
--- a/library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml
+++ b/library/stdarch/stdsimd-test/simd-test-macro/Cargo.toml
--- a/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
+++ b/library/stdarch/stdsimd-test/simd-test-macro/src/lib.rs
@@ -58,7 +58,7 @@ pub fn simd_test(
    for feature in target_features {
        let q = quote_spanned! {
            proc_macro2::Span::call_site() =>
-            cfg_feature_enabled!(#feature) &&
+            is_target_feature_detected!(#feature) &&
        };
        q.to_tokens(&mut cfg_target_features);
    }
--- a/library/stdarch/crates/stdsimd-test/Cargo.toml
+++ b/library/stdarch/crates/stdsimd-test/Cargo.toml
@@ -4,8 +4,8 @@ version = "0.1.0"
 authors = ["Alex Crichton <alex@alexcrichton.com>"]

 [dependencies]
-assert-instr-macro = { path = "assert-instr-macro" }
-simd-test-macro = { path = "simd-test-macro" }
+assert-instr-macro = { path = "../assert-instr-macro" }
+simd-test-macro = { path = "../simd-test-macro" }
 backtrace = "0.3"
 cc = "1.0"
 lazy_static = "0.2"
--- a/library/stdarch/crates/stdsimd-test/src/lib.rs
+++ b/library/stdarch/crates/stdsimd-test/src/lib.rs
--- a/library/stdarch/crates/stdsimd-verify/.gitattributes
+++ b/library/stdarch/crates/stdsimd-verify/.gitattributes
--- a/library/stdarch/crates/stdsimd-verify/Cargo.toml
+++ b/library/stdarch/crates/stdsimd-verify/Cargo.toml
--- a/library/stdarch/crates/stdsimd-verify/build.rs
+++ b/library/stdarch/crates/stdsimd-verify/build.rs
@@ -3,7 +3,7 @@ use std::path::Path;
 fn main() {
    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
    let root = dir.parent().unwrap();
-    let root = root.join("coresimd/src/x86");
+    let root = root.join("../coresimd/x86");
    walk(&root);
 }

--- a/library/stdarch/crates/stdsimd-verify/src/lib.rs
+++ b/library/stdarch/crates/stdsimd-verify/src/lib.rs
@@ -22,7 +22,7 @@ macro_rules! my_quote {
 pub fn x86_functions(input: TokenStream) -> TokenStream {
    let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
    let root = dir.parent().unwrap();
-    let root = root.join("coresimd/src/x86");
+    let root = root.join("../coresimd/x86");

    let mut files = Vec::new();
    walk(&root, &mut files);
--- a/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
+++ b/library/stdarch/crates/stdsimd-verify/tests/x86-intel.rs
--- a/library/stdarch/crates/stdsimd-verify/x86-intel.xml
+++ b/library/stdarch/crates/stdsimd-verify/x86-intel.xml
--- a/library/stdarch/crates/stdsimd/Cargo.toml
+++ b/library/stdarch/crates/stdsimd/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "stdsimd"
+version = "0.0.3"
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = "SIMD support in Rust's standard library."
+documentation = "https://docs.rs/stdsimd"
+homepage = "https://github.com/rust-lang-nursery/stdsimd"
+repository = "https://github.com/rust-lang-nursery/stdsimd"
+readme = "README.md"
+keywords = ["std", "simd", "intrinsics"]
+categories = ["hardware-support"]
+license = "MIT/Apache-2.0"
+
+[badges]
+travis-ci = { repository = "rust-lang-nursery/stdsimd" }
+appveyor = { repository = "rust-lang-nursery/stdsimd"  }
+is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" }
+is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" }
+maintenance = { status = "experimental" }
+
+[dependencies]
+coresimd = { version = "0.0.3", path = "../coresimd" }
+libc = "0.2"
+cfg-if = "0.1"
+
+[dev-dependencies]
+auxv = "0.3.3"
+quickcheck = "0.6"
+rand = "0.4"
+cupid = "0.5.0"
+
+[features]
+# Internal-usage only: denies all warnings.
+strict = [ "coresimd/strict" ]
+# Internal-usage only: enables only those intrinsics supported by Intel's
+# Software Development Environment (SDE).
+intel_sde = [ "coresimd/intel_sde" ]
--- a/library/stdarch/crates/stdsimd/src/lib.rs
+++ b/library/stdarch/crates/stdsimd/src/lib.rs
@@ -25,7 +25,7 @@
 //!
 //! * `cfg!(target_feature = "feature")`: returns `true` if the `feature` is
 //! enabled in all CPUs that the binary will run on (at compile-time)
-//! * `cfg_feature_enabled!("feature")`: returns `true` if the `feature` is
+//! * `is_target_feature_detected!("feature")`: returns `true` if the `feature` is
 //! enabled in the CPU in which the binary is currently running on (at
 //! run-time, unless the result is known at compile time)
 //!
@@ -36,7 +36,6 @@
 //!
 //! #[macro_use]
 //! extern crate stdsimd;
-//! use stdsimd::vendor;
 //! use stdsimd::simd::i32x4;
 //!
 //! fn main() {
@@ -65,11 +64,16 @@
 //! #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 //! #[target_feature(enable = "sse2")]
 //! unsafe fn sum_sse2(x: i32x4) -> i32 {
+//!     #[cfg(target_arch = "x86")]
+//!     use stdsimd::arch::x86::*;;
+//!     #[cfg(target_arch = "x86_64")]
+//!     use stdsimd::arch::x86_64::*;;
 //!     use std::mem;
-//!     let x: vendor::__m128i = mem::transmute(x);
-//!     let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 8));
-//!     let x = vendor::_mm_add_epi32(x, vendor::_mm_srli_si128(x, 4));
-//!     let ret = vendor::_mm_cvtsi128_si32(x);
+//!
+//!     let x: __m128i = mem::transmute(x);
+//!     let x = _mm_add_epi32(x, _mm_srli_si128(x, 8));
+//!     let x = _mm_add_epi32(x, _mm_srli_si128(x, 4));
+//!     let ret = _mm_cvtsi128_si32(x);
 //!     mem::transmute(ret)
 //! }
 //!
@@ -97,7 +101,7 @@
 //!     {
 //!         // If SSE2 is not enabled at compile-time, this
 //!         // detects whether SSE2 is available at run-time:
-//!         if cfg_feature_enabled!("sse2") {
+//!         if is_target_feature_detected!("sse2") {
 //!             return unsafe { sum_sse2(x) };
 //!         }
 //!     }
@@ -128,56 +132,25 @@
 //! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367
 //! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839

-#![feature(const_fn, const_size_of, use_extern_macros, cfg_target_feature)]
+#![feature(const_fn, integer_atomics)]
 #![cfg_attr(target_os = "linux", feature(linkage))]
+#![no_std]
+
+extern crate std as _std;
 extern crate coresimd;
-
-/// Re-export run-time feature detection macros.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "arm",
-          target_arch = "aarch64", target_arch = "powerpc64"))]
-pub use coresimd::__unstable_detect_feature;
-
-/// Platform dependent vendor intrinsics.
-pub mod vendor {
-    #[doc(inline)]
-    pub use coresimd::vendor::*;
-}
-
-/// Run-time feature detection.
-#[doc(hidden)]
-pub mod __vendor_runtime {
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64",
-              all(target_os = "linux",
-                  any(target_arch = "arm", target_arch = "aarch64",
-                      target_arch = "powerpc64"))))]
-    pub use runtime::std::*;
-}
-
-/// Platform independent SIMD vector types and operations.
-pub mod simd {
-    #[doc(inline)]
-    pub use coresimd::simd::*;
-}
-
-/// The `stdsimd` run-time.
+extern crate libc;
 #[macro_use]
-#[cfg(any(target_arch = "x86", target_arch = "x86_64",
-          all(target_os = "linux",
-              any(target_arch = "arm", target_arch = "aarch64",
-                  target_arch = "powerpc64"))))]
-mod runtime;
+extern crate cfg_if;

-/// Error gracefully in architectures without run-time detection support.
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64",
-              all(target_os = "linux",
-                  any(target_arch = "arm", target_arch = "aarch64",
-                      target_arch = "powerpc64")))))]
-#[doc(hidden)]
-#[macro_export]
-macro_rules! cfg_feature_enabled {
-    ($name:tt) => (
-        {
-            compile_error!("cfg_target_feature! is not supported in this architecture")
-        }
-    )
-}
+#[cfg(test)]
+#[macro_use]
+extern crate std;
+
+#[path = "../../../stdsimd/mod.rs"]
+mod stdsimd;
+
+pub use stdsimd::*;
+
+pub use _std::prelude;
+pub use _std::fs;
+pub use _std::io;
--- a/library/stdarch/crates/stdsimd/tests/cpu-detection.rs
+++ b/library/stdarch/crates/stdsimd/tests/cpu-detection.rs
@@ -0,0 +1,72 @@
+#![feature(cfg_target_feature)]
+#![cfg_attr(feature = "strict", deny(warnings))]
+#![cfg_attr(feature = "cargo-clippy",
+            allow(option_unwrap_used, use_debug, print_stdout))]
+
+#[cfg(any(target_arch = "arm", target_arch = "aarch64",
+          target_arch = "x86", target_arch = "x86_64",
+          target_arch = "powerpc64"))]
+#[macro_use]
+extern crate stdsimd;
+
+#[test]
+#[cfg(all(target_arch = "arm", target_os = "linux"))]
+fn arm_linux() {
+    println!("neon: {}", is_target_feature_detected!("neon"));
+    println!("pmull: {}", is_target_feature_detected!("pmull"));
+}
+
+#[test]
+#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
+fn aarch64_linux() {
+    println!("neon: {}", is_target_feature_detected!("neon"));
+    println!("asimd: {}", is_target_feature_detected!("asimd"));
+    println!("pmull: {}", is_target_feature_detected!("pmull"));
+}
+
+#[test]
+#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
+fn powerpc64_linux() {
+    println!("altivec: {}", is_target_feature_detected!("altivec"));
+    println!("vsx: {}", is_target_feature_detected!("vsx"));
+    println!("power8: {}", is_target_feature_detected!("power8"));
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn x86_all() {
+    println!("sse: {:?}", is_target_feature_detected!("sse"));
+    println!("sse2: {:?}", is_target_feature_detected!("sse2"));
+    println!("sse3: {:?}", is_target_feature_detected!("sse3"));
+    println!("ssse3: {:?}", is_target_feature_detected!("ssse3"));
+    println!("sse4.1: {:?}", is_target_feature_detected!("sse4.1"));
+    println!("sse4.2: {:?}", is_target_feature_detected!("sse4.2"));
+    println!("sse4a: {:?}", is_target_feature_detected!("sse4a"));
+    println!("avx: {:?}", is_target_feature_detected!("avx"));
+    println!("avx2: {:?}", is_target_feature_detected!("avx2"));
+    println!("avx512f {:?}", is_target_feature_detected!("avx512f"));
+    println!("avx512cd {:?}", is_target_feature_detected!("avx512cd"));
+    println!("avx512er {:?}", is_target_feature_detected!("avx512er"));
+    println!("avx512pf {:?}", is_target_feature_detected!("avx512pf"));
+    println!("avx512bw {:?}", is_target_feature_detected!("avx512bw"));
+    println!("avx512dq {:?}", is_target_feature_detected!("avx512dq"));
+    println!("avx512vl {:?}", is_target_feature_detected!("avx512vl"));
+    println!("avx512_ifma {:?}", is_target_feature_detected!("avx512ifma"));
+    println!("avx512_vbmi {:?}", is_target_feature_detected!("avx512vbmi"));
+    println!(
+        "avx512_vpopcntdq {:?}",
+        is_target_feature_detected!("avx512vpopcntdq")
+    );
+    println!("fma: {:?}", is_target_feature_detected!("fma"));
+    println!("abm: {:?}", is_target_feature_detected!("abm"));
+    println!("bmi: {:?}", is_target_feature_detected!("bmi"));
+    println!("bmi2: {:?}", is_target_feature_detected!("bmi2"));
+    println!("tbm: {:?}", is_target_feature_detected!("tbm"));
+    println!("popcnt: {:?}", is_target_feature_detected!("popcnt"));
+    println!("lzcnt: {:?}", is_target_feature_detected!("lzcnt"));
+    println!("fxsr: {:?}", is_target_feature_detected!("fxsr"));
+    println!("xsave: {:?}", is_target_feature_detected!("xsave"));
+    println!("xsaveopt: {:?}", is_target_feature_detected!("xsaveopt"));
+    println!("xsaves: {:?}", is_target_feature_detected!("xsaves"));
+    println!("xsavec: {:?}", is_target_feature_detected!("xsavec"));
+}
--- a/Show More
+++ b/Show More