Add vmla_n, vmla_lane, vmls_n, vmls_lane neon instructions (#1145)
This commit is contained in:
@@ -2950,6 +2950,118 @@ pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin
|
||||
vmlal_u32(a, b, c)
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2))]
|
||||
pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
|
||||
vmlal_high_s16(a, b, vdupq_n_s16(c))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2))]
|
||||
pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
|
||||
vmlal_high_s32(a, b, vdupq_n_s32(c))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2))]
|
||||
pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
|
||||
vmlal_high_u16(a, b, vdupq_n_u16(c))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2))]
|
||||
pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
|
||||
vmlal_high_u32(a, b, vdupq_n_u32(c))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlal_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply-subtract from accumulator
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@@ -3026,6 +3138,118 @@ pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin
|
||||
vmlsl_u32(a, b, c)
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2))]
|
||||
pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
|
||||
vmlsl_high_s16(a, b, vdupq_n_s16(c))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2))]
|
||||
pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
|
||||
vmlsl_high_s32(a, b, vdupq_n_s32(c))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2))]
|
||||
pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
|
||||
vmlsl_high_u16(a, b, vdupq_n_u16(c))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2))]
|
||||
pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
|
||||
vmlsl_high_u32(a, b, vdupq_n_u32(c))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vmlsl_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@@ -9750,6 +9974,126 @@ mod test {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_n_s16() {
|
||||
let a: i32x4 = i32x4::new(8, 7, 6, 5);
|
||||
let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: i16 = 2;
|
||||
let e: i32x4 = i32x4::new(8, 9, 10, 11);
|
||||
let r: i32x4 = transmute(vmlal_high_n_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_n_s32() {
|
||||
let a: i64x2 = i64x2::new(8, 7);
|
||||
let b: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let c: i32 = 2;
|
||||
let e: i64x2 = i64x2::new(8, 9);
|
||||
let r: i64x2 = transmute(vmlal_high_n_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_n_u16() {
|
||||
let a: u32x4 = u32x4::new(8, 7, 6, 5);
|
||||
let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: u16 = 2;
|
||||
let e: u32x4 = u32x4::new(8, 9, 10, 11);
|
||||
let r: u32x4 = transmute(vmlal_high_n_u16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_n_u32() {
|
||||
let a: u64x2 = u64x2::new(8, 7);
|
||||
let b: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let c: u32 = 2;
|
||||
let e: u64x2 = u64x2::new(8, 9);
|
||||
let r: u64x2 = transmute(vmlal_high_n_u32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_lane_s16() {
|
||||
let a: i32x4 = i32x4::new(8, 7, 6, 5);
|
||||
let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: i16x4 = i16x4::new(0, 2, 0, 0);
|
||||
let e: i32x4 = i32x4::new(8, 9, 10, 11);
|
||||
let r: i32x4 = transmute(vmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_laneq_s16() {
|
||||
let a: i32x4 = i32x4::new(8, 7, 6, 5);
|
||||
let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: i32x4 = i32x4::new(8, 9, 10, 11);
|
||||
let r: i32x4 = transmute(vmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_lane_s32() {
|
||||
let a: i64x2 = i64x2::new(8, 7);
|
||||
let b: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let c: i32x2 = i32x2::new(0, 2);
|
||||
let e: i64x2 = i64x2::new(8, 9);
|
||||
let r: i64x2 = transmute(vmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_laneq_s32() {
|
||||
let a: i64x2 = i64x2::new(8, 7);
|
||||
let b: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let c: i32x4 = i32x4::new(0, 2, 0, 0);
|
||||
let e: i64x2 = i64x2::new(8, 9);
|
||||
let r: i64x2 = transmute(vmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_lane_u16() {
|
||||
let a: u32x4 = u32x4::new(8, 7, 6, 5);
|
||||
let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: u16x4 = u16x4::new(0, 2, 0, 0);
|
||||
let e: u32x4 = u32x4::new(8, 9, 10, 11);
|
||||
let r: u32x4 = transmute(vmlal_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_laneq_u16() {
|
||||
let a: u32x4 = u32x4::new(8, 7, 6, 5);
|
||||
let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: u32x4 = u32x4::new(8, 9, 10, 11);
|
||||
let r: u32x4 = transmute(vmlal_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_lane_u32() {
|
||||
let a: u64x2 = u64x2::new(8, 7);
|
||||
let b: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let c: u32x2 = u32x2::new(0, 2);
|
||||
let e: u64x2 = u64x2::new(8, 9);
|
||||
let r: u64x2 = transmute(vmlal_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_laneq_u32() {
|
||||
let a: u64x2 = u64x2::new(8, 7);
|
||||
let b: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let c: u32x4 = u32x4::new(0, 2, 0, 0);
|
||||
let e: u64x2 = u64x2::new(8, 9);
|
||||
let r: u64x2 = transmute(vmlal_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmls_f64() {
|
||||
let a: f64 = 6.;
|
||||
@@ -9830,6 +10174,126 @@ mod test {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_n_s16() {
|
||||
let a: i32x4 = i32x4::new(14, 15, 16, 17);
|
||||
let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: i16 = 2;
|
||||
let e: i32x4 = i32x4::new(14, 13, 12, 11);
|
||||
let r: i32x4 = transmute(vmlsl_high_n_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_n_s32() {
|
||||
let a: i64x2 = i64x2::new(14, 15);
|
||||
let b: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let c: i32 = 2;
|
||||
let e: i64x2 = i64x2::new(14, 13);
|
||||
let r: i64x2 = transmute(vmlsl_high_n_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_n_u16() {
|
||||
let a: u32x4 = u32x4::new(14, 15, 16, 17);
|
||||
let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: u16 = 2;
|
||||
let e: u32x4 = u32x4::new(14, 13, 12, 11);
|
||||
let r: u32x4 = transmute(vmlsl_high_n_u16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_n_u32() {
|
||||
let a: u64x2 = u64x2::new(14, 15);
|
||||
let b: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let c: u32 = 2;
|
||||
let e: u64x2 = u64x2::new(14, 13);
|
||||
let r: u64x2 = transmute(vmlsl_high_n_u32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_lane_s16() {
|
||||
let a: i32x4 = i32x4::new(14, 15, 16, 17);
|
||||
let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: i16x4 = i16x4::new(0, 2, 0, 0);
|
||||
let e: i32x4 = i32x4::new(14, 13, 12, 11);
|
||||
let r: i32x4 = transmute(vmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_laneq_s16() {
|
||||
let a: i32x4 = i32x4::new(14, 15, 16, 17);
|
||||
let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: i32x4 = i32x4::new(14, 13, 12, 11);
|
||||
let r: i32x4 = transmute(vmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_lane_s32() {
|
||||
let a: i64x2 = i64x2::new(14, 15);
|
||||
let b: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let c: i32x2 = i32x2::new(0, 2);
|
||||
let e: i64x2 = i64x2::new(14, 13);
|
||||
let r: i64x2 = transmute(vmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_laneq_s32() {
|
||||
let a: i64x2 = i64x2::new(14, 15);
|
||||
let b: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let c: i32x4 = i32x4::new(0, 2, 0, 0);
|
||||
let e: i64x2 = i64x2::new(14, 13);
|
||||
let r: i64x2 = transmute(vmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_lane_u16() {
|
||||
let a: u32x4 = u32x4::new(14, 15, 16, 17);
|
||||
let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: u16x4 = u16x4::new(0, 2, 0, 0);
|
||||
let e: u32x4 = u32x4::new(14, 13, 12, 11);
|
||||
let r: u32x4 = transmute(vmlsl_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_laneq_u16() {
|
||||
let a: u32x4 = u32x4::new(14, 15, 16, 17);
|
||||
let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: u32x4 = u32x4::new(14, 13, 12, 11);
|
||||
let r: u32x4 = transmute(vmlsl_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_lane_u32() {
|
||||
let a: u64x2 = u64x2::new(14, 15);
|
||||
let b: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let c: u32x2 = u32x2::new(0, 2);
|
||||
let e: u64x2 = u64x2::new(14, 13);
|
||||
let r: u64x2 = transmute(vmlsl_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_laneq_u32() {
|
||||
let a: u64x2 = u64x2::new(14, 15);
|
||||
let b: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let c: u32x4 = u32x4::new(0, 2, 0, 0);
|
||||
let e: u64x2 = u64x2::new(14, 13);
|
||||
let r: u64x2 = transmute(vmlsl_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmovn_high_s16() {
|
||||
let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -37,7 +37,8 @@
|
||||
external_doc,
|
||||
allow_internal_unstable,
|
||||
decl_macro,
|
||||
extended_key_value_attributes
|
||||
extended_key_value_attributes,
|
||||
bench_black_box
|
||||
)]
|
||||
#![cfg_attr(test, feature(test, abi_vectorcall))]
|
||||
#![cfg_attr(all(test, target_arch = "wasm32"), feature(wasm_simd))]
|
||||
|
||||
@@ -1222,6 +1222,68 @@ generate float64x*_t
|
||||
arm = vmla.
|
||||
generate float*_t
|
||||
|
||||
/// Vector multiply accumulate with scalar
|
||||
name = vmla
|
||||
n-suffix
|
||||
multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3
|
||||
validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
|
||||
aarch64 = mla
|
||||
arm = vmla.
|
||||
generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
|
||||
generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
|
||||
|
||||
/// Vector multiply accumulate with scalar
|
||||
name = vmla
|
||||
n-suffix
|
||||
multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c}
|
||||
a = 0., 1., 2., 3.
|
||||
b = 2., 2., 2., 2.
|
||||
c = 3.
|
||||
validate 6., 7., 8., 9.
|
||||
|
||||
aarch64 = fmul
|
||||
arm = vmla.
|
||||
generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
|
||||
|
||||
/// Vector multiply accumulate with scalar
|
||||
name = vmla
|
||||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
|
||||
aarch64 = mla
|
||||
arm = vmla.
|
||||
generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
|
||||
generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
|
||||
generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
|
||||
generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
|
||||
|
||||
/// Vector multiply accumulate with scalar
|
||||
name = vmla
|
||||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 0., 1., 2., 3.
|
||||
b = 2., 2., 2., 2.
|
||||
c = 0., 3., 0., 0.
|
||||
n = 1
|
||||
validate 6., 7., 8., 9.
|
||||
|
||||
aarch64 = fmul
|
||||
arm = vmla.
|
||||
generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
|
||||
|
||||
/// Signed multiply-add long
|
||||
name = vmlal
|
||||
multi_fn = simd_add, a, {vmull-self-noext, b, c}
|
||||
@@ -1246,6 +1308,41 @@ arm = vmlal.s
|
||||
aarch64 = umlal
|
||||
generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
|
||||
|
||||
/// Vector widening multiply accumulate with scalar
|
||||
name = vmlal
|
||||
n-suffix
|
||||
multi_fn = vmlal-self-noext, a, b, {vdup-nself-noext, c}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3
|
||||
validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
|
||||
arm = vmlal.s
|
||||
aarch64 = smlal
|
||||
generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
|
||||
aarch64 = umlal
|
||||
generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
|
||||
|
||||
/// Vector widening multiply accumulate with scalar
|
||||
name = vmlal_lane
|
||||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
|
||||
arm = vmlal.s
|
||||
aarch64 = smlal
|
||||
generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
|
||||
generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
|
||||
aarch64 = umlal
|
||||
generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
|
||||
generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Signed multiply-add long
|
||||
name = vmlal_high
|
||||
no-q
|
||||
@@ -1276,6 +1373,39 @@ validate 8, 9, 10, 11, 12, 13, 14, 15
|
||||
aarch64 = umlal2
|
||||
generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Multiply-add long
|
||||
name = vmlal_high_n
|
||||
no-q
|
||||
multi_fn = vmlal_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
|
||||
a = 8, 7, 6, 5, 4, 3, 2, 1
|
||||
b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
c = 2
|
||||
validate 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = smlal2
|
||||
generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
|
||||
aarch64 = umlal2
|
||||
generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
|
||||
|
||||
/// Multiply-add long
|
||||
name = vmlal_high_lane
|
||||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 8, 7, 6, 5, 4, 3, 2, 1
|
||||
b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = smlal2
|
||||
generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
|
||||
generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
aarch64 = umlal2
|
||||
generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
|
||||
generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Multiply-subtract from accumulator
|
||||
name = vmls
|
||||
multi_fn = simd_sub, a, {simd_mul, b, c}
|
||||
@@ -1302,6 +1432,68 @@ generate float64x*_t
|
||||
arm = vmls.
|
||||
generate float*_t
|
||||
|
||||
/// Vector multiply subtract with scalar
|
||||
name = vmls
|
||||
n-suffix
|
||||
multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = mls
|
||||
arm = vmls.
|
||||
generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t
|
||||
generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t
|
||||
|
||||
/// Vector multiply subtract with scalar
|
||||
name = vmls
|
||||
n-suffix
|
||||
multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c}
|
||||
a = 6., 7., 8., 9.
|
||||
b = 2., 2., 2., 2.
|
||||
c = 3.
|
||||
validate 0., 1., 2., 3.
|
||||
|
||||
aarch64 = fmul
|
||||
arm = vmls.
|
||||
generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t
|
||||
|
||||
/// Vector multiply subtract with scalar
|
||||
name = vmls
|
||||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = mls
|
||||
arm = vmls.
|
||||
generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t
|
||||
generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t
|
||||
generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
|
||||
generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
|
||||
|
||||
/// Vector multiply subtract with scalar
|
||||
name = vmls
|
||||
in2-lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 6., 7., 8., 9.
|
||||
b = 2., 2., 2., 2.
|
||||
c = 0., 3., 0., 0.
|
||||
n = 1
|
||||
validate 0., 1., 2., 3.
|
||||
|
||||
aarch64 = fmul
|
||||
arm = vmls.
|
||||
generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
name = vmlsl
|
||||
multi_fn = simd_sub, a, {vmull-self-noext, b, c}
|
||||
@@ -1314,7 +1506,7 @@ arm = vmlsl.s
|
||||
aarch64 = smlsl
|
||||
generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
/// Unsigned multiply-subtract long
|
||||
name = vmlsl
|
||||
multi_fn = simd_sub, a, {vmull-self-noext, b, c}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
@@ -1326,6 +1518,41 @@ arm = vmlsl.s
|
||||
aarch64 = umlsl
|
||||
generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
|
||||
|
||||
/// Vector widening multiply subtract with scalar
|
||||
name = vmlsl
|
||||
n-suffix
|
||||
multi_fn = vmlsl-self-noext, a, b, {vdup-nself-noext, c}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
arm = vmlsl.s
|
||||
aarch64 = smlsl
|
||||
generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
|
||||
aarch64 = umlsl
|
||||
generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t
|
||||
|
||||
/// Vector widening multiply subtract with scalar
|
||||
name = vmlsl_lane
|
||||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
arm = vmlsl.s
|
||||
aarch64 = smlsl
|
||||
generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t
|
||||
generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
|
||||
aarch64 = umlsl
|
||||
generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t
|
||||
generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
name = vmlsl_high
|
||||
no-q
|
||||
@@ -1356,6 +1583,39 @@ validate 14, 13, 12, 11, 10, 9, 8, 7
|
||||
aarch64 = umlsl2
|
||||
generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Multiply-subtract long
|
||||
name = vmlsl_high_n
|
||||
no-q
|
||||
multi_fn = vmlsl_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c}
|
||||
a = 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
c = 2
|
||||
validate 14, 13, 12, 11, 10, 9, 8, 7
|
||||
|
||||
aarch64 = smlsl2
|
||||
generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
|
||||
aarch64 = umlsl2
|
||||
generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t
|
||||
|
||||
/// Multiply-subtract long
|
||||
name = vmlsl_high_lane
|
||||
in2-suffix
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in2_exp_len-LANE
|
||||
multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}}
|
||||
a = 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 14, 13, 12, 11, 10, 9, 8, 7
|
||||
|
||||
aarch64 = smlsl2
|
||||
generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t
|
||||
generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
aarch64 = umlsl2
|
||||
generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t
|
||||
generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Extract narrow
|
||||
name = vmovn_high
|
||||
no-q
|
||||
|
||||
@@ -349,6 +349,7 @@ enum Suffix {
|
||||
OutSuffix,
|
||||
Lane,
|
||||
In2,
|
||||
In2Lane,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
@@ -847,6 +848,7 @@ fn gen_aarch64(
|
||||
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
|
||||
};
|
||||
let current_fn = if let Some(current_fn) = current_fn.clone() {
|
||||
if link_aarch64.is_some() {
|
||||
@@ -1259,6 +1261,7 @@ fn gen_arm(
|
||||
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
|
||||
};
|
||||
let current_aarch64 = current_aarch64
|
||||
.clone()
|
||||
@@ -2216,6 +2219,8 @@ mod test {
|
||||
suffix = Lane;
|
||||
} else if line.starts_with("in2-suffix") {
|
||||
suffix = In2;
|
||||
} else if line.starts_with("in2-lane-suffixes") {
|
||||
suffix = In2Lane;
|
||||
} else if line.starts_with("a = ") {
|
||||
a = line[4..].split(',').map(|v| v.trim().to_string()).collect();
|
||||
} else if line.starts_with("b = ") {
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! This basically just disassembles the current executable and then parses the
|
||||
//! output once globally and then provides the `assert` function which makes
|
||||
//! assertions about the disassembly of a function.
|
||||
#![feature(test)] // For black_box
|
||||
#![feature(bench_black_box)] // For black_box
|
||||
#![deny(rust_2018_idioms)]
|
||||
#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user