diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index d36512ec532c..2feb81324338 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -1015,6 +1015,90 @@ pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t { simd_lt(a, transmute(b)) } +/// Floating-point absolute compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facgt))] +pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")] + fn vcagt_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t; + } + vcagt_f64_(a, b) +} + +/// Floating-point absolute compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facgt))] +pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")] + fn vcagtq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t; + } + vcagtq_f64_(a, b) +} + +/// Floating-point absolute compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facge))] +pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")] + fn vcage_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t; + } + vcage_f64_(a, b) +} + +/// Floating-point absolute compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facge))] +pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")] + fn vcageq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t; + } + vcageq_f64_(a, b) +} + +/// Floating-point absolute compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facgt))] +pub unsafe fn vcalt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + vcagt_f64(b, a) +} + +/// Floating-point absolute compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facgt))] +pub unsafe fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + vcagtq_f64(b, a) +} + +/// Floating-point absolute compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facge))] +pub unsafe fn vcale_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + vcage_f64(b, a) +} + +/// Floating-point absolute compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(facge))] +pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + vcageq_f64(b, a) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -2210,6 +2294,78 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcagt_f64() { + let a: f64 = -1.2; + let b: f64 = -1.1; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcagt_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcagtq_f64() { + let a: f64x2 = f64x2::new(-1.2, 0.0); + let b: f64x2 = f64x2::new(-1.1, 0.0); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let r: u64x2 = transmute(vcagtq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcage_f64() { + let a: f64 = -1.2; + let b: f64 = -1.1; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcage_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcageq_f64() { + let a: f64x2 = f64x2::new(-1.2, 0.0); + let b: f64x2 = f64x2::new(-1.1, 0.0); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcageq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcalt_f64() { + let a: f64 = -1.2; + let b: f64 = -1.1; + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vcalt_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcaltq_f64() { + let a: f64x2 = f64x2::new(-1.2, 0.0); + let b: f64x2 = f64x2::new(-1.1, 0.0); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vcaltq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcale_f64() { + let a: f64 = -1.2; + let b: f64 = -1.1; + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vcale_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcaleq_f64() { + let a: f64x2 = f64x2::new(-1.2, 0.0); + let b: f64x2 = f64x2::new(-1.1, 0.0); + let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcaleq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index a4770005a6b8..e57b67adfc5a 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -1621,6 +1621,326 @@ pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { simd_ge(a, b) } +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i8")] + fn vcls_s8_(a: int8x8_t) -> int8x8_t; + } +vcls_s8_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v16i8")] + fn vclsq_s8_(a: int8x16_t) -> int8x16_t; + } +vclsq_s8_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i16")] + fn vcls_s16_(a: int16x4_t) -> int16x4_t; + } +vcls_s16_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i16")] + fn vclsq_s16_(a: int16x8_t) -> int16x8_t; + } +vclsq_s16_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v2i32")] + fn vcls_s32_(a: int32x2_t) -> int32x2_t; + } +vcls_s32_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i32")] + fn vclsq_s32_(a: int32x4_t) -> int32x4_t; + } +vclsq_s32_(a) +} + +/// Signed count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t { + vclz_s8_(a) +} + +/// Signed count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t { + vclzq_s8_(a) +} + +/// Signed count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t { + vclz_s16_(a) +} + +/// Signed count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t { + vclzq_s16_(a) +} + +/// Signed count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t { + vclz_s32_(a) +} + +/// Signed count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t { + vclzq_s32_(a) +} + +/// Unsigned count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t { + transmute(vclz_s8_(transmute(a))) +} + +/// Unsigned count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { + transmute(vclzq_s8_(transmute(a))) +} + +/// Unsigned count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t { + transmute(vclz_s16_(transmute(a))) +} + +/// Unsigned count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { + transmute(vclzq_s16_(transmute(a))) +} + +/// Unsigned count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t { + transmute(vclz_s32_(transmute(a))) +} + +/// Unsigned count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { + transmute(vclzq_s32_(transmute(a))) +} + +/// Floating-point absolute compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32")] + fn vcagt_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t; + } +vcagt_f32_(a, b) +} + +/// Floating-point absolute compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32")] + fn vcagtq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t; + } +vcagtq_f32_(a, b) +} + +/// Floating-point absolute compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i32.v2f32")] + fn vcage_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t; + } +vcage_f32_(a, b) +} + +/// Floating-point absolute compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v4i32.v4f32")] + fn vcageq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t; + } +vcageq_f32_(a, b) +} + +/// Floating-point absolute compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +pub unsafe fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + vcagt_f32(b, a) +} + +/// Floating-point absolute compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + vcagtq_f32(b, a) +} + +/// Floating-point absolute compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +pub unsafe fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + vcage_f32(b, a) +} + +/// Floating-point absolute compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + vcageq_f32(b, a) +} + /// Saturating subtract #[inline] #[target_feature(enable = "neon")] @@ -4949,6 +5269,222 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcls_s8() { + let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7); + let r: i8x8 = transmute(vcls_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_s8() { + let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F); + let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); + let r: i8x16 = transmute(vclsq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_s16() { + let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00); + let e: i16x4 = i16x4::new(0, 15, 15, 15); + let r: i16x4 = transmute(vcls_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_s16() { + let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15); + let r: i16x8 = transmute(vclsq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_s32() { + let a: i32x2 = i32x2::new(-2147483648, -1); + let e: i32x2 = i32x2::new(0, 31); + let r: i32x2 = transmute(vcls_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_s32() { + let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00); + let e: i32x4 = i32x4::new(0, 31, 31, 31); + let r: i32x4 = transmute(vclsq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s8() { + let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7); + let r: i8x8 = transmute(vclz_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s8() { + let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F); + let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1); + let r: i8x16 = transmute(vclzq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s16() { + let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01); + let e: i16x4 = i16x4::new(0, 0, 16, 15); + let r: i16x4 = transmute(vclz_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s16() { + let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15); + let r: i16x8 = transmute(vclzq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s32() { + let a: i32x2 = i32x2::new(-2147483648, -1); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vclz_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s32() { + let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01); + let e: i32x4 = i32x4::new(0, 0, 32, 31); + let r: i32x4 = transmute(vclzq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u8() { + let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7); + let r: u8x8 = transmute(vclz_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u8() { + let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF); + let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); + let r: u8x16 = transmute(vclzq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u16() { + let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01); + let e: u16x4 = u16x4::new(16, 16, 15, 15); + let r: u16x4 = transmute(vclz_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u16() { + let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15); + let r: u16x8 = transmute(vclzq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u32() { + let a: u32x2 = u32x2::new(0, 0x00); + let e: u32x2 = u32x2::new(32, 32); + let r: u32x2 = transmute(vclz_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u32() { + let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01); + let e: u32x4 = u32x4::new(32, 32, 31, 31); + let r: u32x4 = transmute(vclzq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcagt_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcagtq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcage_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcageq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcalt_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcaltq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcale_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcaleq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqsub_u8() { let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs index 43e76103e12a..5f6504549dc0 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs @@ -288,6 +288,25 @@ extern "C" { #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v16i8")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v16i8")] fn vcntq_s8_(a: int8x16_t) -> int8x16_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i8")] + fn vclz_s8_(a: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v16i8")] + fn vclzq_s8_(a: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i16")] + fn vclz_s16_(a: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i16")] + fn vclzq_s16_(a: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v2i32")] + fn vclz_s32_(a: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i32")] + fn vclzq_s32_(a: int32x4_t) -> int32x4_t; } #[cfg(target_arch = "arm")] @@ -9958,6 +9977,7 @@ mod tests { let r: u8x16 = transmute(vcntq_p8(transmute(a))); assert_eq!(r, e); } + #[simd_test(enable = "neon")] unsafe fn test_vrev16_s8() { let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 6cdad2279eda..df6870c06c40 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -504,6 +504,91 @@ validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = fcmlt generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t +/// Count leading sign bits +name = vcls +a = MIN, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, MAX +validate 0, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0 + +arm = vcls.s +aarch64 = cls +link-arm = vcls._EXT_ +link-aarch64 = cls._EXT_ +generate int*_t + +/// Signed count leading sign bits +name = vclz +multi_fn = self-signed-ext, a +a = MIN, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX +validate 0, 0, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 1 + +arm = vclz. +aarch64 = clz +generate int*_t + +/// Unsigned count leading sign bits +name = vclz +multi_fn = transmute, [self-signed-ext, transmute(a)] +a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX +validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0 + +arm = vclz. +aarch64 = clz +generate uint*_t + +/// Floating-point absolute compare greater than +name = vcagt +a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 +b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 +validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE + +aarch64 = facgt +link-aarch64 = facgt._EXT2_._EXT_ +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vacgt.s +link-arm = vacgt._EXT2_._EXT_ +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +/// Floating-point absolute compare greater than or equal +name = vcage +a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 +b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 +validate TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE + +aarch64 = facge +link-aarch64 = facge._EXT2_._EXT_ +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vacge.s +link-arm = vacge._EXT2_._EXT_ +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +/// Floating-point absolute compare less than +name = vcalt +multi_fn = vcagt-self-noext, b, a +a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 +b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 +validate FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE + +aarch64 = facgt +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vacgt.s +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +/// Floating-point absolute compare less than or equal +name = vcale +multi_fn = vcage-self-noext , b, a +a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7 +b = -1.1, 0.0, 1.1, 2.4, 3.3, 4.6, 5.5, 6.8 +validate FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE + +aarch64 = facge +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vacge.s +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + /// Saturating subtract name = vqsub a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index 42cf93977cf3..e9c49d8a9528 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -112,6 +112,54 @@ fn type_to_suffix(t: &str) -> &str { } } +fn type_to_signed_suffix(t: &str) -> &str { + match t { + "int8x8_t" | "uint8x8_t" => "_s8", + "int8x16_t" | "uint8x16_t" => "q_s8", + "int16x4_t" | "uint16x4_t" => "_s16", + "int16x8_t" | "uint16x8_t" => "q_s16", + "int32x2_t" | "uint32x2_t" => "_s32", + "int32x4_t" | "uint32x4_t" => "q_s32", + "int64x1_t" | "uint64x1_t" => "_s64", + "int64x2_t" | "uint64x2_t" => "q_s64", + /* + "float16x4_t" => "_f16", + "float16x8_t" => "q_f16", + "float32x2_t" => "_f32", + "float32x4_t" => "q_f32", + "float64x1_t" => "_f64", + "float64x2_t" => "q_f64", + "poly64x1_t" => "_p64", + "poly64x2_t" => "q_p64", + */ + _ => panic!("unknown type: {}", t), + } +} + +fn type_to_unsigned_suffix(t: &str) -> &str { + match t { + "int8x8_t" | "uint8x8_t" => "_u8", + "int8x16_t" | "uint8x16_t" => "q_u8", + "int16x4_t" | "uint16x4_t" => "_u16", + "int16x8_t" | "uint16x8_t" => "q_u16", + "int32x2_t" | "uint32x2_t" => "_u32", + "int32x4_t" | "uint32x4_t" => "q_u32", + "int64x1_t" | "uint64x1_t" => "_u64", + "int64x2_t" | "uint64x2_t" => "q_u64", + /* + "float16x4_t" => "_f16", + "float16x8_t" => "q_f16", + "float32x2_t" => "_f32", + "float32x4_t" => "q_f32", + "float64x1_t" => "_f64", + "float64x2_t" => "q_f64", + "poly64x1_t" => "_p64", + "poly64x2_t" => "q_p64", + */ + _ => panic!("unknown type: {}", t), + } +} + fn type_to_global_type(t: &str) -> &str { match t { "int8x8_t" => "i8x8", @@ -285,6 +333,40 @@ fn false_val(_t: &str) -> &'static str { "0" } +fn bits(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "8", + "u16" => "16", + "u32" => "32", + "u64" => "64", + "i8x" => "8", + "i16" => "16", + "i32" => "32", + "i64" => "64", + "p8x" => "8", + "p16" => "16", + "p64" => "64", + _ => panic!("Unknown bits for type {}", t), + } +} + +fn bits_minus_one(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "7", + "u16" => "15", + "u32" => "31", + "u64" => "63", + "i8x" => "7", + "i16" => "15", + "i32" => "31", + "i64" => "63", + "p8x" => "7", + "p16" => "15", + "p64" => "63", + _ => panic!("Unknown bits for type {}", t), + } +} + fn map_val<'v>(t: &str, v: &'v str) -> &'v str { match v { "FALSE" => false_val(t), @@ -292,6 +374,8 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str { "MAX" => max_val(t), "MIN" => min_val(t), "FF" => ff_val(t), + "BITS" => bits(t), + "BITS_M1" => bits_minus_one(t), o => o, } } @@ -300,7 +384,7 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str { fn gen_aarch64( current_comment: &str, current_fn: &Option, - name: &str, + current_name: &str, current_aarch64: &Option, link_aarch64: &Option, in_t: &str, @@ -312,6 +396,7 @@ fn gen_aarch64( ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); + let name = format!("{}{}", current_name, type_to_suffix(in_t)); let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() { panic!( @@ -340,7 +425,7 @@ fn gen_aarch64( let current_aarch64 = current_aarch64.clone().unwrap(); let ext_c = if let Some(link_aarch64) = link_aarch64.clone() { let ext = type_to_ext(in_t); - + let ext2 = type_to_ext(out_t); format!( r#"#[allow(improper_ctypes)] extern "C" {{ @@ -348,7 +433,7 @@ fn gen_aarch64( fn {}({}) -> {}; }} "#, - link_aarch64.replace("_EXT_", ext), + link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2), current_fn, match para_num { 1 => { @@ -370,7 +455,7 @@ fn gen_aarch64( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call(&multi_fn[i], in_t, out_t, fixed)); + calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); } calls } else { @@ -429,7 +514,14 @@ fn gen_aarch64( current_comment, current_aarch64, call ); - let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), para_num); + let test = gen_test( + &name, + &in_t, + &out_t, + current_tests, + type_len(in_t), + para_num, + ); (function, test) } @@ -499,7 +591,7 @@ fn gen_test( fn gen_arm( current_comment: &str, current_fn: &Option, - name: &str, + current_name: &str, current_arm: &str, link_arm: &Option, current_aarch64: &Option, @@ -513,6 +605,7 @@ fn gen_arm( ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); + let name = format!("{}{}", current_name, type_to_suffix(in_t)); let current_aarch64 = current_aarch64 .clone() .unwrap_or_else(|| current_arm.to_string()); @@ -545,7 +638,7 @@ fn gen_arm( let ext_c = if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) { let ext = type_to_ext(in_t); - + let ext2 = type_to_ext(out_t); format!( r#"#[allow(improper_ctypes)] extern "C" {{ @@ -554,8 +647,8 @@ fn gen_arm( fn {}({}) -> {}; }} "#, - link_arm.replace("_EXT_", ext), - link_aarch64.replace("_EXT_", ext), + link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2), + link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2), current_fn, match para_num { 1 => { @@ -577,7 +670,7 @@ fn gen_arm( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call(&multi_fn[i], in_t, out_t, fixed)); + calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); } calls } else { @@ -612,10 +705,10 @@ fn gen_arm( ) } (_, 1, _) => format!( - r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ + r#"pub unsafe fn {}(a: {}) -> {} {{ {}{} }}"#, - name, in_t, in_t, out_t, ext_c, multi_calls, + name, in_t, out_t, ext_c, multi_calls, ), (_, 2, _) => format!( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ @@ -640,7 +733,14 @@ fn gen_arm( expand_intrinsic(¤t_aarch64, in_t), call, ); - let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t), para_num); + let test = gen_test( + &name, + &in_t, + &out_t, + current_tests, + type_len(in_t), + para_num, + ); (function, test) } @@ -715,15 +815,52 @@ fn expand_intrinsic(intr: &str, t: &str) -> String { } } -fn get_call(in_str: &str, in_t: &str, out_t: &str, fixed: &Vec) -> String { +fn get_call( + in_str: &str, + current_name: &str, + in_t: &str, + out_t: &str, + fixed: &Vec, +) -> String { let params: Vec<_> = in_str.split(',').map(|v| v.trim().to_string()).collect(); assert!(params.len() > 0); - let fn_name = ¶ms[0]; + let mut fn_name = params[0].clone(); let mut re: Option<(String, String)> = None; let mut param_str = String::new(); - for i in 1..params.len() { + let mut i = 1; + while i < params.len() { let s = ¶ms[i]; - if s.contains(':') { + if s.starts_with('[') { + let mut sub_fn = String::new(); + let mut brackets = 1; + while i < params.len() { + if !sub_fn.is_empty() { + sub_fn.push_str(", "); + } + sub_fn.push_str(¶ms[i]); + if params[i].starts_with('[') { + brackets += 1; + } + if params[i].ends_with("]") { + brackets -= 1; + if brackets == 0 { + break; + } + } + i += 1; + } + let sub_call = get_call( + &sub_fn[1..sub_fn.len() - 1], + current_name, + in_t, + out_t, + fixed, + ); + if !param_str.is_empty() { + param_str.push_str(", "); + } + param_str.push_str(&sub_call); + } else if s.contains(':') { let re_params: Vec<_> = s.split(':').map(|v| v.to_string()).collect(); if re_params[1] == "" { re = Some((re_params[0].clone(), in_t.to_string())); @@ -738,12 +875,34 @@ fn get_call(in_str: &str, in_t: &str, out_t: &str, fixed: &Vec) -> Strin } param_str.push_str(s); } + i += 1; } if fn_name == "fixed" { let (re_name, re_type) = re.unwrap(); let fixed: Vec = fixed.iter().take(type_len(in_t)).cloned().collect(); return format!(r#"let {}{};"#, re_name, values(&re_type, &fixed)); } + if fn_name.contains('-') { + let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); + assert_eq!(fn_format.len(), 3); + fn_name = if fn_format[0] == "self" { + current_name.to_string() + } else { + fn_format[0].clone() + }; + if fn_format[1] == "self" { + fn_name.push_str(type_to_suffix(in_t)); + } else if fn_format[1] == "signed" { + fn_name.push_str(type_to_signed_suffix(in_t)); + } else if fn_format[1] == "unsigned" { + fn_name.push_str(type_to_unsigned_suffix(in_t)); + } else { + fn_name.push_str(&fn_format[1]); + }; + if fn_format[2] == "ext" { + fn_name.push_str("_"); + } + } if param_str.is_empty() { param_str.push_str("a, b"); } @@ -909,12 +1068,11 @@ mod test { para_num = 1; } let current_name = current_name.clone().unwrap(); - let name = format!("{}{}", current_name, type_to_suffix(in_t),); if let Some(current_arm) = current_arm.clone() { let (function, test) = gen_arm( ¤t_comment, ¤t_fn, - &name, + ¤t_name, ¤t_arm, &link_arm, ¤t_aarch64, @@ -932,7 +1090,7 @@ mod test { let (function, test) = gen_aarch64( ¤t_comment, ¤t_fn, - &name, + ¤t_name, ¤t_aarch64, &link_aarch64, &in_t,