2018-02-02 16:08:27 +01:00
|
|
|
//! Caches run-time feature detection so that it only needs to be computed
|
|
|
|
|
//! once.
|
2017-11-20 16:58:31 +01:00
|
|
|
|
2018-03-09 09:21:08 -06:00
|
|
|
#![allow(dead_code)] // not used on all platforms
|
|
|
|
|
|
2018-02-02 16:08:27 +01:00
|
|
|
use core::sync::atomic::Ordering;
|
2017-11-20 16:58:31 +01:00
|
|
|
|
2018-02-02 16:08:27 +01:00
|
|
|
#[cfg(target_pointer_width = "64")]
|
|
|
|
|
use core::sync::atomic::AtomicU64;
|
2017-11-20 16:58:31 +01:00
|
|
|
|
2018-02-02 16:08:27 +01:00
|
|
|
#[cfg(target_pointer_width = "32")]
|
|
|
|
|
use core::sync::atomic::AtomicU32;
|
|
|
|
|
|
|
|
|
|
/// Sets the `bit` of `x`.
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
const fn set_bit(x: u64, bit: u32) -> u64 {
|
2018-02-02 16:08:27 +01:00
|
|
|
x | 1 << bit
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Tests the `bit` of `x`.
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
const fn test_bit(x: u64, bit: u32) -> bool {
|
2018-02-02 16:08:27 +01:00
|
|
|
x & (1 << bit) != 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Maximum number of features that can be cached.
|
|
|
|
|
const CACHE_CAPACITY: u32 = 63;
|
|
|
|
|
|
|
|
|
|
/// This type is used to initialize the cache
|
Prepare portable packed vector types for RFCs (#338)
* Prepare portable packed SIMD vector types for RFCs
This commit cleans up the implementation of the Portable Packed Vector Types
(PPTV), adds some new features, and makes some breaking changes.
The implementation is moved to `coresimd/src/ppvt` (they are
still exposed via `coresimd::simd`).
As before, the vector types of a certain width are implemented in the `v{width}`
submodules. The `macros.rs` file has been rewritten as an `api` module that
exposes the macros to implement each API.
It should now hopefully be really clear where each API is implemented, and which types
implement these APIs. It should also now be really clear which APIs are tested and how.
- boolean vectors of the form `b{element_size}x{number_of_lanes}`.
- reductions: arithmetic, bitwise, min/max, and boolean - only the facade,
and a naive working implementation. These need to be implemented
as `llvm.experimental.vector.reduction.{...}` but this needs rustc support first.
- FromBits trait analogous to `{f32,f64}::from_bits` that perform "safe" transmutes.
Instead of writing `From::from`/`x.into()` (see below for breaking changes) now you write
`FromBits::from_bits`/`x.into_bits()`.
- portable vector types implement `Default` and `Hash`
- tests for all portable vector types and all portable operations (~2000 new tests).
- (hopefully) comprehensive implementation of bitwise transmutes and lane-wise
casts (before `From` and the `.as_...` methods where implemented "when they were needed".
- documentation for PPTV (not great yet, but better than nothing)
- conversions/transmutes from/to x86 architecture specific vector types
- `store/load` API has been replaced with `{store,load}_{aligned,unaligned}`
- `eq,ne,lt,le,gt,ge` APIs now return boolean vectors
- The `.as_{...}` methods have been removed. Lane-wise casts are now performed by `From`.
- `From` now perform casts (see above). It used to perform bitwise transmutes.
- `simd` vectors' `replace` method's result is now `#[must_use]`.
* enable backtrace and nocapture
* unalign load/store fail test by 1 byte
* update arm and aarch64 neon modules
* fix arm example
* fmt
* clippy and read example that rustfmt swallowed
* reductions should take self
* rename add/mul -> sum/product; delete other arith reductions
* clean up fmt::LowerHex impl
* revert incorret doc change
* make Hash equivalent to [T; lanes()]
* use travis_wait to increase timeout limit to 20 minutes
* remove travis_wait; did not help
* implement reductions on top of the llvm.experimental.vector.reduction intrinsics
* implement cmp for boolean vectors
* add missing eq impl file
* implement default
* rename llvm intrinsics
* fix aarch64 example error
* replace #[inline(always)] with #[inline]
* remove cargo clean from run.sh
* workaround broken product in aarch64
* make boolean vector constructors const fn
* fix more reductions on aarch64
* fix min/max reductions on aarch64
* remove whitespace
* remove all boolean vector types except for b8xN
* use a sum reduction fallback on aarch64
* disable llvm add reduction for aarch64
* rename the llvm intrinsics to use llvm names
* remove old macros.rs file
2018-03-05 21:32:35 +01:00
|
|
|
#[derive(Copy, Clone)]
|
2018-06-20 15:57:05 +02:00
|
|
|
pub(crate) struct Initializer(u64);
|
2018-02-02 16:08:27 +01:00
|
|
|
|
|
|
|
|
impl Default for Initializer {
|
|
|
|
|
fn default() -> Self {
|
|
|
|
|
Initializer(0)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Initializer {
|
|
|
|
|
/// Tests the `bit` of the cache.
|
2018-02-18 10:07:35 +09:00
|
|
|
#[allow(dead_code)]
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
2018-11-10 15:45:16 +01:00
|
|
|
pub(crate) fn test(self, bit: u32) -> bool {
|
2018-02-02 16:08:27 +01:00
|
|
|
// FIXME: this way of making sure that the cache is large enough is
|
|
|
|
|
// brittle.
|
|
|
|
|
debug_assert!(
|
|
|
|
|
bit < CACHE_CAPACITY,
|
|
|
|
|
"too many features, time to increase the cache size!"
|
|
|
|
|
);
|
|
|
|
|
test_bit(self.0, bit)
|
|
|
|
|
}
|
2018-02-18 10:07:35 +09:00
|
|
|
|
2018-02-02 16:08:27 +01:00
|
|
|
/// Sets the `bit` of the cache.
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn set(&mut self, bit: u32) {
|
2018-02-02 16:08:27 +01:00
|
|
|
// FIXME: this way of making sure that the cache is large enough is
|
|
|
|
|
// brittle.
|
|
|
|
|
debug_assert!(
|
|
|
|
|
bit < CACHE_CAPACITY,
|
|
|
|
|
"too many features, time to increase the cache size!"
|
|
|
|
|
);
|
|
|
|
|
let v = self.0;
|
|
|
|
|
self.0 = set_bit(v, bit);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// This global variable is a cache of the features supported by the CPU.
|
|
|
|
|
static CACHE: Cache = Cache::uninitialized();
|
|
|
|
|
|
|
|
|
|
/// Feature cache with capacity for `CACHE_CAPACITY` features.
|
|
|
|
|
///
|
|
|
|
|
/// Note: the last feature bit is used to represent an
|
|
|
|
|
/// uninitialized cache.
|
|
|
|
|
#[cfg(target_pointer_width = "64")]
|
|
|
|
|
struct Cache(AtomicU64);
|
|
|
|
|
|
|
|
|
|
#[cfg(target_pointer_width = "64")]
|
|
|
|
|
impl Cache {
|
|
|
|
|
/// Creates an uninitialized cache.
|
|
|
|
|
const fn uninitialized() -> Self {
|
2019-01-21 16:59:10 +01:00
|
|
|
const X: AtomicU64 = AtomicU64::new(u64::max_value());
|
|
|
|
|
Self(X)
|
2018-02-02 16:08:27 +01:00
|
|
|
}
|
|
|
|
|
/// Is the cache uninitialized?
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn is_uninitialized(&self) -> bool {
|
2018-02-02 16:08:27 +01:00
|
|
|
self.0.load(Ordering::Relaxed) == u64::max_value()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Is the `bit` in the cache set?
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn test(&self, bit: u32) -> bool {
|
2018-02-02 16:08:27 +01:00
|
|
|
test_bit(CACHE.0.load(Ordering::Relaxed), bit)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Initializes the cache.
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn initialize(&self, value: Initializer) {
|
2018-02-02 16:08:27 +01:00
|
|
|
self.0.store(value.0, Ordering::Relaxed);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Feature cache with capacity for `CACHE_CAPACITY` features.
|
|
|
|
|
///
|
|
|
|
|
/// Note: the last feature bit is used to represent an
|
|
|
|
|
/// uninitialized cache.
|
|
|
|
|
#[cfg(target_pointer_width = "32")]
|
|
|
|
|
struct Cache(AtomicU32, AtomicU32);
|
|
|
|
|
|
|
|
|
|
#[cfg(target_pointer_width = "32")]
|
|
|
|
|
impl Cache {
|
|
|
|
|
/// Creates an uninitialized cache.
|
|
|
|
|
const fn uninitialized() -> Self {
|
|
|
|
|
Cache(
|
|
|
|
|
AtomicU32::new(u32::max_value()),
|
|
|
|
|
AtomicU32::new(u32::max_value()),
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
/// Is the cache uninitialized?
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn is_uninitialized(&self) -> bool {
|
2018-02-02 16:08:27 +01:00
|
|
|
self.1.load(Ordering::Relaxed) == u32::max_value()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Is the `bit` in the cache set?
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn test(&self, bit: u32) -> bool {
|
2018-02-02 16:08:27 +01:00
|
|
|
if bit < 32 {
|
|
|
|
|
test_bit(CACHE.0.load(Ordering::Relaxed) as u64, bit)
|
|
|
|
|
} else {
|
|
|
|
|
test_bit(CACHE.1.load(Ordering::Relaxed) as u64, bit - 32)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Initializes the cache.
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn initialize(&self, value: Initializer) {
|
2018-02-02 16:08:27 +01:00
|
|
|
let lo: u32 = value.0 as u32;
|
|
|
|
|
let hi: u32 = (value.0 >> 32) as u32;
|
|
|
|
|
self.0.store(lo, Ordering::Relaxed);
|
|
|
|
|
self.1.store(hi, Ordering::Relaxed);
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-11-20 16:58:31 +01:00
|
|
|
|
|
|
|
|
/// Test the `bit` of the storage. If the storage has not been initialized,
|
|
|
|
|
/// initializes it with the result of `f()`.
|
|
|
|
|
///
|
|
|
|
|
/// On its first invocation, it detects the CPU features and caches them in the
|
2018-04-27 04:54:15 +02:00
|
|
|
/// `CACHE` global variable as an `AtomicU64`.
|
2017-11-20 16:58:31 +01:00
|
|
|
///
|
2018-02-18 10:07:35 +09:00
|
|
|
/// It uses the `Feature` variant to index into this variable as a bitset. If
|
2017-11-20 16:58:31 +01:00
|
|
|
/// the bit is set, the feature is enabled, and otherwise it is disabled.
|
2018-06-20 15:57:05 +02:00
|
|
|
#[inline]
|
|
|
|
|
pub(crate) fn test<F>(bit: u32, f: F) -> bool
|
2017-11-20 16:58:31 +01:00
|
|
|
where
|
2018-02-02 16:08:27 +01:00
|
|
|
F: FnOnce() -> Initializer,
|
2017-11-20 16:58:31 +01:00
|
|
|
{
|
2018-02-02 16:08:27 +01:00
|
|
|
if CACHE.is_uninitialized() {
|
|
|
|
|
CACHE.initialize(f());
|
2017-11-20 16:58:31 +01:00
|
|
|
}
|
2018-02-02 16:08:27 +01:00
|
|
|
CACHE.test(bit)
|
2017-11-20 16:58:31 +01:00
|
|
|
}
|