add vcls, vclz, vcagt, vcage, vcalt, vcale neon instructions (#1072)
This commit is contained in:
@@ -1015,6 +1015,90 @@ pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
|
||||
simd_lt(a, transmute(b))
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facgt))]
|
||||
pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")]
|
||||
fn vcagt_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
|
||||
}
|
||||
vcagt_f64_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facgt))]
|
||||
pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")]
|
||||
fn vcagtq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
|
||||
}
|
||||
vcagtq_f64_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facge))]
|
||||
pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")]
|
||||
fn vcage_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
|
||||
}
|
||||
vcage_f64_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facge))]
|
||||
pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")]
|
||||
fn vcageq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
|
||||
}
|
||||
vcageq_f64_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facgt))]
|
||||
pub unsafe fn vcalt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
vcagt_f64(b, a)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facgt))]
|
||||
pub unsafe fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
vcagtq_f64(b, a)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facge))]
|
||||
pub unsafe fn vcale_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
vcage_f64(b, a)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(facge))]
|
||||
pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
vcageq_f64(b, a)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@@ -2210,6 +2294,78 @@ mod test {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcagt_f64() {
|
||||
let a: f64 = -1.2;
|
||||
let b: f64 = -1.1;
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcagt_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcagtq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.2, 0.0);
|
||||
let b: f64x2 = f64x2::new(-1.1, 0.0);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
|
||||
let r: u64x2 = transmute(vcagtq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcage_f64() {
|
||||
let a: f64 = -1.2;
|
||||
let b: f64 = -1.1;
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcage_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcageq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.2, 0.0);
|
||||
let b: f64x2 = f64x2::new(-1.1, 0.0);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcageq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcalt_f64() {
|
||||
let a: f64 = -1.2;
|
||||
let b: f64 = -1.1;
|
||||
let e: u64x1 = u64x1::new(0);
|
||||
let r: u64x1 = transmute(vcalt_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcaltq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.2, 0.0);
|
||||
let b: f64x2 = f64x2::new(-1.1, 0.0);
|
||||
let e: u64x2 = u64x2::new(0, 0);
|
||||
let r: u64x2 = transmute(vcaltq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcale_f64() {
|
||||
let a: f64 = -1.2;
|
||||
let b: f64 = -1.1;
|
||||
let e: u64x1 = u64x1::new(0);
|
||||
let r: u64x1 = transmute(vcale_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcaleq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.2, 0.0);
|
||||
let b: f64x2 = f64x2::new(-1.1, 0.0);
|
||||
let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcaleq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_f64() {
|
||||
let a: f64 = 1.0;
|
||||
|
||||
@@ -1621,6 +1621,326 @@ pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
|
||||
simd_ge(a, b)
|
||||
}
|
||||
|
||||
/// Count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
|
||||
pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i8")]
|
||||
fn vcls_s8_(a: int8x8_t) -> int8x8_t;
|
||||
}
|
||||
vcls_s8_(a)
|
||||
}
|
||||
|
||||
/// Count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
|
||||
pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v16i8")]
|
||||
fn vclsq_s8_(a: int8x16_t) -> int8x16_t;
|
||||
}
|
||||
vclsq_s8_(a)
|
||||
}
|
||||
|
||||
/// Count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
|
||||
pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i16")]
|
||||
fn vcls_s16_(a: int16x4_t) -> int16x4_t;
|
||||
}
|
||||
vcls_s16_(a)
|
||||
}
|
||||
|
||||
/// Count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
|
||||
pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i16")]
|
||||
fn vclsq_s16_(a: int16x8_t) -> int16x8_t;
|
||||
}
|
||||
vclsq_s16_(a)
|
||||
}
|
||||
|
||||
/// Count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
|
||||
pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v2i32")]
|
||||
fn vcls_s32_(a: int32x2_t) -> int32x2_t;
|
||||
}
|
||||
vcls_s32_(a)
|
||||
}
|
||||
|
||||
/// Count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))]
|
||||
pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i32")]
|
||||
fn vclsq_s32_(a: int32x4_t) -> int32x4_t;
|
||||
}
|
||||
vclsq_s32_(a)
|
||||
}
|
||||
|
||||
/// Signed count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
|
||||
vclz_s8_(a)
|
||||
}
|
||||
|
||||
/// Signed count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
|
||||
vclzq_s8_(a)
|
||||
}
|
||||
|
||||
/// Signed count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
|
||||
vclz_s16_(a)
|
||||
}
|
||||
|
||||
/// Signed count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
|
||||
vclzq_s16_(a)
|
||||
}
|
||||
|
||||
/// Signed count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
|
||||
vclz_s32_(a)
|
||||
}
|
||||
|
||||
/// Signed count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
|
||||
vclzq_s32_(a)
|
||||
}
|
||||
|
||||
/// Unsigned count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
|
||||
transmute(vclz_s8_(transmute(a)))
|
||||
}
|
||||
|
||||
/// Unsigned count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
|
||||
transmute(vclzq_s8_(transmute(a)))
|
||||
}
|
||||
|
||||
/// Unsigned count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
|
||||
transmute(vclz_s16_(transmute(a)))
|
||||
}
|
||||
|
||||
/// Unsigned count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
|
||||
transmute(vclzq_s16_(transmute(a)))
|
||||
}
|
||||
|
||||
/// Unsigned count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
|
||||
transmute(vclz_s32_(transmute(a)))
|
||||
}
|
||||
|
||||
/// Unsigned count leading sign bits
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
|
||||
transmute(vclzq_s32_(transmute(a)))
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
|
||||
pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32")]
|
||||
fn vcagt_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
|
||||
}
|
||||
vcagt_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
|
||||
pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32")]
|
||||
fn vcagtq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
|
||||
}
|
||||
vcagtq_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
|
||||
pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i32.v2f32")]
|
||||
fn vcage_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
|
||||
}
|
||||
vcage_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
|
||||
pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v4i32.v4f32")]
|
||||
fn vcageq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
|
||||
}
|
||||
vcageq_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
|
||||
pub unsafe fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
|
||||
vcagt_f32(b, a)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))]
|
||||
pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
|
||||
vcagtq_f32(b, a)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
|
||||
pub unsafe fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
|
||||
vcage_f32(b, a)
|
||||
}
|
||||
|
||||
/// Floating-point absolute compare less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))]
|
||||
pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
|
||||
vcageq_f32(b, a)
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@@ -4949,6 +5269,222 @@ mod test {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcls_s8() {
|
||||
let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7);
|
||||
let r: i8x8 = transmute(vcls_s8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclsq_s8() {
|
||||
let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F);
|
||||
let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
|
||||
let r: i8x16 = transmute(vclsq_s8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcls_s16() {
|
||||
let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00);
|
||||
let e: i16x4 = i16x4::new(0, 15, 15, 15);
|
||||
let r: i16x4 = transmute(vcls_s16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclsq_s16() {
|
||||
let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
|
||||
let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15);
|
||||
let r: i16x8 = transmute(vclsq_s16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcls_s32() {
|
||||
let a: i32x2 = i32x2::new(-2147483648, -1);
|
||||
let e: i32x2 = i32x2::new(0, 31);
|
||||
let r: i32x2 = transmute(vcls_s32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclsq_s32() {
|
||||
let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00);
|
||||
let e: i32x4 = i32x4::new(0, 31, 31, 31);
|
||||
let r: i32x4 = transmute(vclsq_s32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclz_s8() {
|
||||
let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
|
||||
let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7);
|
||||
let r: i8x8 = transmute(vclz_s8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclzq_s8() {
|
||||
let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F);
|
||||
let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1);
|
||||
let r: i8x16 = transmute(vclzq_s8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclz_s16() {
|
||||
let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
|
||||
let e: i16x4 = i16x4::new(0, 0, 16, 15);
|
||||
let r: i16x4 = transmute(vclz_s16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclzq_s16() {
|
||||
let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
|
||||
let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15);
|
||||
let r: i16x8 = transmute(vclzq_s16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclz_s32() {
|
||||
let a: i32x2 = i32x2::new(-2147483648, -1);
|
||||
let e: i32x2 = i32x2::new(0, 0);
|
||||
let r: i32x2 = transmute(vclz_s32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclzq_s32() {
|
||||
let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
|
||||
let e: i32x4 = i32x4::new(0, 0, 32, 31);
|
||||
let r: i32x4 = transmute(vclzq_s32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclz_u8() {
|
||||
let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
|
||||
let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7);
|
||||
let r: u8x8 = transmute(vclz_u8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclzq_u8() {
|
||||
let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF);
|
||||
let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
|
||||
let r: u8x16 = transmute(vclzq_u8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclz_u16() {
|
||||
let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01);
|
||||
let e: u16x4 = u16x4::new(16, 16, 15, 15);
|
||||
let r: u16x4 = transmute(vclz_u16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclzq_u16() {
|
||||
let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
|
||||
let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15);
|
||||
let r: u16x8 = transmute(vclzq_u16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclz_u32() {
|
||||
let a: u32x2 = u32x2::new(0, 0x00);
|
||||
let e: u32x2 = u32x2::new(32, 32);
|
||||
let r: u32x2 = transmute(vclz_u32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclzq_u32() {
|
||||
let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01);
|
||||
let e: u32x4 = u32x4::new(32, 32, 31, 31);
|
||||
let r: u32x4 = transmute(vclzq_u32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcagt_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.2, 0.0);
|
||||
let b: f32x2 = f32x2::new(-1.1, 0.0);
|
||||
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
|
||||
let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcagtq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
|
||||
let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
|
||||
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
|
||||
let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcage_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.2, 0.0);
|
||||
let b: f32x2 = f32x2::new(-1.1, 0.0);
|
||||
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
|
||||
let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcageq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
|
||||
let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
|
||||
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
|
||||
let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcalt_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.2, 0.0);
|
||||
let b: f32x2 = f32x2::new(-1.1, 0.0);
|
||||
let e: u32x2 = u32x2::new(0, 0);
|
||||
let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcaltq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
|
||||
let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
|
||||
let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
|
||||
let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcale_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.2, 0.0);
|
||||
let b: f32x2 = f32x2::new(-1.1, 0.0);
|
||||
let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
|
||||
let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcaleq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
|
||||
let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
|
||||
let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF);
|
||||
let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqsub_u8() {
|
||||
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
|
||||
|
||||
@@ -288,6 +288,25 @@ extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v16i8")]
|
||||
fn vcntq_s8_(a: int8x16_t) -> int8x16_t;
|
||||
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i8")]
|
||||
fn vclz_s8_(a: int8x8_t) -> int8x8_t;
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v16i8")]
|
||||
fn vclzq_s8_(a: int8x16_t) -> int8x16_t;
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i16")]
|
||||
fn vclz_s16_(a: int16x4_t) -> int16x4_t;
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i16")]
|
||||
fn vclzq_s16_(a: int16x8_t) -> int16x8_t;
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v2i32")]
|
||||
fn vclz_s32_(a: int32x2_t) -> int32x2_t;
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i32")]
|
||||
fn vclzq_s32_(a: int32x4_t) -> int32x4_t;
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
@@ -9958,6 +9977,7 @@ mod tests {
|
||||
let r: u8x16 = transmute(vcntq_p8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrev16_s8() {
|
||||
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6);
|
||||
|
||||
@@ -504,6 +504,91 @@ validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
|
||||
aarch64 = fcmlt
|
||||
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
|
||||
|
||||
/// Count leading sign bits
|
||||
name = vcls
|
||||
a = MIN, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX
|
||||
validate 0, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
|
||||
|
||||
arm = vcls.s
|
||||
aarch64 = cls
|
||||
link-arm = vcls._EXT_
|
||||
link-aarch64 = cls._EXT_
|
||||
generate int*_t
|
||||
|
||||
/// Signed count leading sign bits
|
||||
name = vclz
|
||||
multi_fn = self-signed-ext, a
|
||||
a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
|
||||
validate 0, 0, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 1
|
||||
|
||||
arm = vclz.
|
||||
aarch64 = clz
|
||||
generate int*_t
|
||||
|
||||
/// Unsigned count leading sign bits
|
||||
name = vclz
|
||||
multi_fn = transmute, [self-signed-ext, transmute(a)]
|
||||
a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
|
||||
validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
|
||||
|
||||
arm = vclz.
|
||||
aarch64 = clz
|
||||
generate uint*_t
|
||||
|
||||
/// Floating-point absolute compare greater than
|
||||
name = vcagt
|
||||
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
|
||||
b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
|
||||
validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
|
||||
|
||||
aarch64 = facgt
|
||||
link-aarch64 = facgt._EXT2_._EXT_
|
||||
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
|
||||
|
||||
arm = vacgt.s
|
||||
link-arm = vacgt._EXT2_._EXT_
|
||||
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
|
||||
|
||||
/// Floating-point absolute compare greater than or equal
|
||||
name = vcage
|
||||
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
|
||||
b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
|
||||
validate TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
|
||||
|
||||
aarch64 = facge
|
||||
link-aarch64 = facge._EXT2_._EXT_
|
||||
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
|
||||
|
||||
arm = vacge.s
|
||||
link-arm = vacge._EXT2_._EXT_
|
||||
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
|
||||
|
||||
/// Floating-point absolute compare less than
|
||||
name = vcalt
|
||||
multi_fn = vcagt-self-noext, b, a
|
||||
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
|
||||
b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
|
||||
validate FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
|
||||
|
||||
aarch64 = facgt
|
||||
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
|
||||
|
||||
arm = vacgt.s
|
||||
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
|
||||
|
||||
/// Floating-point absolute compare less than or equal
|
||||
name = vcale
|
||||
multi_fn = vcage-self-noext , b, a
|
||||
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
|
||||
b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8
|
||||
validate FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE
|
||||
|
||||
aarch64 = facge
|
||||
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
|
||||
|
||||
arm = vacge.s
|
||||
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
|
||||
|
||||
/// Saturating subtract
|
||||
name = vqsub
|
||||
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
|
||||
|
||||
@@ -112,6 +112,54 @@ fn type_to_suffix(t: &str) -> &str {
|
||||
}
|
||||
}
|
||||
|
||||
fn type_to_signed_suffix(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x8_t" | "uint8x8_t" => "_s8",
|
||||
"int8x16_t" | "uint8x16_t" => "q_s8",
|
||||
"int16x4_t" | "uint16x4_t" => "_s16",
|
||||
"int16x8_t" | "uint16x8_t" => "q_s16",
|
||||
"int32x2_t" | "uint32x2_t" => "_s32",
|
||||
"int32x4_t" | "uint32x4_t" => "q_s32",
|
||||
"int64x1_t" | "uint64x1_t" => "_s64",
|
||||
"int64x2_t" | "uint64x2_t" => "q_s64",
|
||||
/*
|
||||
"float16x4_t" => "_f16",
|
||||
"float16x8_t" => "q_f16",
|
||||
"float32x2_t" => "_f32",
|
||||
"float32x4_t" => "q_f32",
|
||||
"float64x1_t" => "_f64",
|
||||
"float64x2_t" => "q_f64",
|
||||
"poly64x1_t" => "_p64",
|
||||
"poly64x2_t" => "q_p64",
|
||||
*/
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn type_to_unsigned_suffix(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x8_t" | "uint8x8_t" => "_u8",
|
||||
"int8x16_t" | "uint8x16_t" => "q_u8",
|
||||
"int16x4_t" | "uint16x4_t" => "_u16",
|
||||
"int16x8_t" | "uint16x8_t" => "q_u16",
|
||||
"int32x2_t" | "uint32x2_t" => "_u32",
|
||||
"int32x4_t" | "uint32x4_t" => "q_u32",
|
||||
"int64x1_t" | "uint64x1_t" => "_u64",
|
||||
"int64x2_t" | "uint64x2_t" => "q_u64",
|
||||
/*
|
||||
"float16x4_t" => "_f16",
|
||||
"float16x8_t" => "q_f16",
|
||||
"float32x2_t" => "_f32",
|
||||
"float32x4_t" => "q_f32",
|
||||
"float64x1_t" => "_f64",
|
||||
"float64x2_t" => "q_f64",
|
||||
"poly64x1_t" => "_p64",
|
||||
"poly64x2_t" => "q_p64",
|
||||
*/
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn type_to_global_type(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x8_t" => "i8x8",
|
||||
@@ -285,6 +333,40 @@ fn false_val(_t: &str) -> &'static str {
|
||||
"0"
|
||||
}
|
||||
|
||||
fn bits(t: &str) -> &'static str {
|
||||
match &t[..3] {
|
||||
"u8x" => "8",
|
||||
"u16" => "16",
|
||||
"u32" => "32",
|
||||
"u64" => "64",
|
||||
"i8x" => "8",
|
||||
"i16" => "16",
|
||||
"i32" => "32",
|
||||
"i64" => "64",
|
||||
"p8x" => "8",
|
||||
"p16" => "16",
|
||||
"p64" => "64",
|
||||
_ => panic!("Unknown bits for type {}", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn bits_minus_one(t: &str) -> &'static str {
|
||||
match &t[..3] {
|
||||
"u8x" => "7",
|
||||
"u16" => "15",
|
||||
"u32" => "31",
|
||||
"u64" => "63",
|
||||
"i8x" => "7",
|
||||
"i16" => "15",
|
||||
"i32" => "31",
|
||||
"i64" => "63",
|
||||
"p8x" => "7",
|
||||
"p16" => "15",
|
||||
"p64" => "63",
|
||||
_ => panic!("Unknown bits for type {}", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
|
||||
match v {
|
||||
"FALSE" => false_val(t),
|
||||
@@ -292,6 +374,8 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
|
||||
"MAX" => max_val(t),
|
||||
"MIN" => min_val(t),
|
||||
"FF" => ff_val(t),
|
||||
"BITS" => bits(t),
|
||||
"BITS_M1" => bits_minus_one(t),
|
||||
o => o,
|
||||
}
|
||||
}
|
||||
@@ -300,7 +384,7 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
|
||||
fn gen_aarch64(
|
||||
current_comment: &str,
|
||||
current_fn: &Option<String>,
|
||||
name: &str,
|
||||
current_name: &str,
|
||||
current_aarch64: &Option<String>,
|
||||
link_aarch64: &Option<String>,
|
||||
in_t: &str,
|
||||
@@ -312,6 +396,7 @@ fn gen_aarch64(
|
||||
) -> (String, String) {
|
||||
let _global_t = type_to_global_type(in_t);
|
||||
let _global_ret_t = type_to_global_type(out_t);
|
||||
let name = format!("{}{}", current_name, type_to_suffix(in_t));
|
||||
let current_fn = if let Some(current_fn) = current_fn.clone() {
|
||||
if link_aarch64.is_some() {
|
||||
panic!(
|
||||
@@ -340,7 +425,7 @@ fn gen_aarch64(
|
||||
let current_aarch64 = current_aarch64.clone().unwrap();
|
||||
let ext_c = if let Some(link_aarch64) = link_aarch64.clone() {
|
||||
let ext = type_to_ext(in_t);
|
||||
|
||||
let ext2 = type_to_ext(out_t);
|
||||
format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
@@ -348,7 +433,7 @@ fn gen_aarch64(
|
||||
fn {}({}) -> {};
|
||||
}}
|
||||
"#,
|
||||
link_aarch64.replace("_EXT_", ext),
|
||||
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2),
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
@@ -370,7 +455,7 @@ fn gen_aarch64(
|
||||
if i > 0 {
|
||||
calls.push_str("\n ");
|
||||
}
|
||||
calls.push_str(&get_call(&multi_fn[i], in_t, out_t, fixed));
|
||||
calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed));
|
||||
}
|
||||
calls
|
||||
} else {
|
||||
@@ -429,7 +514,14 @@ fn gen_aarch64(
|
||||
current_comment, current_aarch64, call
|
||||
);
|
||||
|
||||
let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), para_num);
|
||||
let test = gen_test(
|
||||
&name,
|
||||
&in_t,
|
||||
&out_t,
|
||||
current_tests,
|
||||
type_len(in_t),
|
||||
para_num,
|
||||
);
|
||||
(function, test)
|
||||
}
|
||||
|
||||
@@ -499,7 +591,7 @@ fn gen_test(
|
||||
fn gen_arm(
|
||||
current_comment: &str,
|
||||
current_fn: &Option<String>,
|
||||
name: &str,
|
||||
current_name: &str,
|
||||
current_arm: &str,
|
||||
link_arm: &Option<String>,
|
||||
current_aarch64: &Option<String>,
|
||||
@@ -513,6 +605,7 @@ fn gen_arm(
|
||||
) -> (String, String) {
|
||||
let _global_t = type_to_global_type(in_t);
|
||||
let _global_ret_t = type_to_global_type(out_t);
|
||||
let name = format!("{}{}", current_name, type_to_suffix(in_t));
|
||||
let current_aarch64 = current_aarch64
|
||||
.clone()
|
||||
.unwrap_or_else(|| current_arm.to_string());
|
||||
@@ -545,7 +638,7 @@ fn gen_arm(
|
||||
let ext_c =
|
||||
if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
|
||||
let ext = type_to_ext(in_t);
|
||||
|
||||
let ext2 = type_to_ext(out_t);
|
||||
format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
@@ -554,8 +647,8 @@ fn gen_arm(
|
||||
fn {}({}) -> {};
|
||||
}}
|
||||
"#,
|
||||
link_arm.replace("_EXT_", ext),
|
||||
link_aarch64.replace("_EXT_", ext),
|
||||
link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2),
|
||||
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2),
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
@@ -577,7 +670,7 @@ fn gen_arm(
|
||||
if i > 0 {
|
||||
calls.push_str("\n ");
|
||||
}
|
||||
calls.push_str(&get_call(&multi_fn[i], in_t, out_t, fixed));
|
||||
calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed));
|
||||
}
|
||||
calls
|
||||
} else {
|
||||
@@ -612,10 +705,10 @@ fn gen_arm(
|
||||
)
|
||||
}
|
||||
(_, 1, _) => format!(
|
||||
r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{
|
||||
r#"pub unsafe fn {}(a: {}) -> {} {{
|
||||
{}{}
|
||||
}}"#,
|
||||
name, in_t, in_t, out_t, ext_c, multi_calls,
|
||||
name, in_t, out_t, ext_c, multi_calls,
|
||||
),
|
||||
(_, 2, _) => format!(
|
||||
r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{
|
||||
@@ -640,7 +733,14 @@ fn gen_arm(
|
||||
expand_intrinsic(¤t_aarch64, in_t),
|
||||
call,
|
||||
);
|
||||
let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), para_num);
|
||||
let test = gen_test(
|
||||
&name,
|
||||
&in_t,
|
||||
&out_t,
|
||||
current_tests,
|
||||
type_len(in_t),
|
||||
para_num,
|
||||
);
|
||||
|
||||
(function, test)
|
||||
}
|
||||
@@ -715,15 +815,52 @@ fn expand_intrinsic(intr: &str, t: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_call(in_str: &str, in_t: &str, out_t: &str, fixed: &Vec<String>) -> String {
|
||||
fn get_call(
|
||||
in_str: &str,
|
||||
current_name: &str,
|
||||
in_t: &str,
|
||||
out_t: &str,
|
||||
fixed: &Vec<String>,
|
||||
) -> String {
|
||||
let params: Vec<_> = in_str.split(',').map(|v| v.trim().to_string()).collect();
|
||||
assert!(params.len() > 0);
|
||||
let fn_name = ¶ms[0];
|
||||
let mut fn_name = params[0].clone();
|
||||
let mut re: Option<(String, String)> = None;
|
||||
let mut param_str = String::new();
|
||||
for i in 1..params.len() {
|
||||
let mut i = 1;
|
||||
while i < params.len() {
|
||||
let s = ¶ms[i];
|
||||
if s.contains(':') {
|
||||
if s.starts_with('[') {
|
||||
let mut sub_fn = String::new();
|
||||
let mut brackets = 1;
|
||||
while i < params.len() {
|
||||
if !sub_fn.is_empty() {
|
||||
sub_fn.push_str(", ");
|
||||
}
|
||||
sub_fn.push_str(¶ms[i]);
|
||||
if params[i].starts_with('[') {
|
||||
brackets += 1;
|
||||
}
|
||||
if params[i].ends_with("]") {
|
||||
brackets -= 1;
|
||||
if brackets == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
let sub_call = get_call(
|
||||
&sub_fn[1..sub_fn.len() - 1],
|
||||
current_name,
|
||||
in_t,
|
||||
out_t,
|
||||
fixed,
|
||||
);
|
||||
if !param_str.is_empty() {
|
||||
param_str.push_str(", ");
|
||||
}
|
||||
param_str.push_str(&sub_call);
|
||||
} else if s.contains(':') {
|
||||
let re_params: Vec<_> = s.split(':').map(|v| v.to_string()).collect();
|
||||
if re_params[1] == "" {
|
||||
re = Some((re_params[0].clone(), in_t.to_string()));
|
||||
@@ -738,12 +875,34 @@ fn get_call(in_str: &str, in_t: &str, out_t: &str, fixed: &Vec<String>) -> Strin
|
||||
}
|
||||
param_str.push_str(s);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
if fn_name == "fixed" {
|
||||
let (re_name, re_type) = re.unwrap();
|
||||
let fixed: Vec<String> = fixed.iter().take(type_len(in_t)).cloned().collect();
|
||||
return format!(r#"let {}{};"#, re_name, values(&re_type, &fixed));
|
||||
}
|
||||
if fn_name.contains('-') {
|
||||
let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
|
||||
assert_eq!(fn_format.len(), 3);
|
||||
fn_name = if fn_format[0] == "self" {
|
||||
current_name.to_string()
|
||||
} else {
|
||||
fn_format[0].clone()
|
||||
};
|
||||
if fn_format[1] == "self" {
|
||||
fn_name.push_str(type_to_suffix(in_t));
|
||||
} else if fn_format[1] == "signed" {
|
||||
fn_name.push_str(type_to_signed_suffix(in_t));
|
||||
} else if fn_format[1] == "unsigned" {
|
||||
fn_name.push_str(type_to_unsigned_suffix(in_t));
|
||||
} else {
|
||||
fn_name.push_str(&fn_format[1]);
|
||||
};
|
||||
if fn_format[2] == "ext" {
|
||||
fn_name.push_str("_");
|
||||
}
|
||||
}
|
||||
if param_str.is_empty() {
|
||||
param_str.push_str("a, b");
|
||||
}
|
||||
@@ -909,12 +1068,11 @@ mod test {
|
||||
para_num = 1;
|
||||
}
|
||||
let current_name = current_name.clone().unwrap();
|
||||
let name = format!("{}{}", current_name, type_to_suffix(in_t),);
|
||||
if let Some(current_arm) = current_arm.clone() {
|
||||
let (function, test) = gen_arm(
|
||||
¤t_comment,
|
||||
¤t_fn,
|
||||
&name,
|
||||
¤t_name,
|
||||
¤t_arm,
|
||||
&link_arm,
|
||||
¤t_aarch64,
|
||||
@@ -932,7 +1090,7 @@ mod test {
|
||||
let (function, test) = gen_aarch64(
|
||||
¤t_comment,
|
||||
¤t_fn,
|
||||
&name,
|
||||
¤t_name,
|
||||
¤t_aarch64,
|
||||
&link_aarch64,
|
||||
&in_t,
|
||||
|
||||
Reference in New Issue
Block a user