Complete vst1 neon instructions (#1221)
This commit is contained in:
@@ -13066,7 +13066,7 @@ mod test {
|
||||
let a: [f64; 3] = [0., 1., 2.];
|
||||
let e: [f64; 2] = [1., 2.];
|
||||
let mut r: [f64; 2] = [0f64; 2];
|
||||
vst1_f64_x2(r.as_mut_ptr(), vld1_f64_x2(a[1..].as_ptr()));
|
||||
vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -13075,7 +13075,7 @@ mod test {
|
||||
let a: [f64; 5] = [0., 1., 2., 3., 4.];
|
||||
let e: [f64; 4] = [1., 2., 3., 4.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst1q_f64_x2(r.as_mut_ptr(), vld1q_f64_x2(a[1..].as_ptr()));
|
||||
vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -13084,7 +13084,7 @@ mod test {
|
||||
let a: [f64; 4] = [0., 1., 2., 3.];
|
||||
let e: [f64; 3] = [1., 2., 3.];
|
||||
let mut r: [f64; 3] = [0f64; 3];
|
||||
vst1_f64_x3(r.as_mut_ptr(), vld1_f64_x3(a[1..].as_ptr()));
|
||||
vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -13093,7 +13093,7 @@ mod test {
|
||||
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
|
||||
let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
|
||||
let mut r: [f64; 6] = [0f64; 6];
|
||||
vst1q_f64_x3(r.as_mut_ptr(), vld1q_f64_x3(a[1..].as_ptr()));
|
||||
vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -13102,7 +13102,7 @@ mod test {
|
||||
let a: [f64; 5] = [0., 1., 2., 3., 4.];
|
||||
let e: [f64; 4] = [1., 2., 3., 4.];
|
||||
let mut r: [f64; 4] = [0f64; 4];
|
||||
vst1_f64_x4(r.as_mut_ptr(), vld1_f64_x4(a[1..].as_ptr()));
|
||||
vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -13111,7 +13111,7 @@ mod test {
|
||||
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let mut r: [f64; 8] = [0f64; 8];
|
||||
vst1q_f64_x4(r.as_mut_ptr(), vld1q_f64_x4(a[1..].as_ptr()));
|
||||
vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
||||
@@ -678,7 +678,7 @@ pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
|
||||
/// Load multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
#[cfg_attr(test, assert_instr(ld1r))]
|
||||
pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
|
||||
let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
|
||||
simd_shuffle2!(x, x, [0, 0])
|
||||
@@ -698,7 +698,7 @@ pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[cfg_attr(test, assert_instr(ldr, LANE = 1))]
|
||||
#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
|
||||
pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_insert(src, LANE as u32, *ptr)
|
||||
@@ -886,7 +886,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
|
||||
@@ -895,7 +895,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
|
||||
@@ -4803,29 +4803,6 @@ mod tests {
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p64() {
|
||||
let mut vals = [0_u64; 2];
|
||||
let a = u64x1::new(1);
|
||||
|
||||
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p64() {
|
||||
let mut vals = [0_u64; 3];
|
||||
let a = u64x2::new(1, 2);
|
||||
|
||||
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_f64() {
|
||||
let mut vals = [0_f64; 2];
|
||||
|
||||
@@ -480,6 +480,22 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
|
||||
vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes,v8")]
|
||||
#[cfg_attr(test, assert_instr("vst1.64"))]
|
||||
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
|
||||
vst1_v1i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes,v8")]
|
||||
#[cfg_attr(test, assert_instr("vst1.64"))]
|
||||
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
|
||||
vst1q_v2i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
|
||||
}
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,v7")]
|
||||
|
||||
@@ -6616,7 +6616,7 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
|
||||
pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
|
||||
transmute(vld1_s64_x3(transmute(a)))
|
||||
@@ -6626,7 +6626,7 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
|
||||
pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
|
||||
transmute(vld1_s64_x4(transmute(a)))
|
||||
@@ -6636,7 +6636,7 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
|
||||
pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
|
||||
transmute(vld1q_s64_x2(transmute(a)))
|
||||
@@ -6646,7 +6646,7 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
|
||||
pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
|
||||
transmute(vld1q_s64_x3(transmute(a)))
|
||||
@@ -6656,7 +6656,7 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
|
||||
pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
|
||||
transmute(vld1q_s64_x4(transmute(a)))
|
||||
@@ -7790,6 +7790,66 @@ pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
|
||||
vst1q_s16_x4(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
|
||||
pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
|
||||
vst1_s64_x2(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
|
||||
pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
|
||||
vst1_s64_x3(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
|
||||
pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
|
||||
vst1_s64_x4(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
|
||||
pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
|
||||
vst1q_s64_x2(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
|
||||
pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
|
||||
vst1q_s64_x3(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,aes")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
|
||||
pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
|
||||
vst1q_s64_x4(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[cfg(target_arch = "arm")]
|
||||
@@ -21642,7 +21702,7 @@ mod test {
|
||||
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [i8; 16] = [0i8; 16];
|
||||
vst1_s8_x2(r.as_mut_ptr(), vld1_s8_x2(a[1..].as_ptr()));
|
||||
vst1_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21651,7 +21711,7 @@ mod test {
|
||||
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [i16; 8] = [0i16; 8];
|
||||
vst1_s16_x2(r.as_mut_ptr(), vld1_s16_x2(a[1..].as_ptr()));
|
||||
vst1_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21660,7 +21720,7 @@ mod test {
|
||||
let a: [i32; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [i32; 4] = [1, 2, 3, 4];
|
||||
let mut r: [i32; 4] = [0i32; 4];
|
||||
vst1_s32_x2(r.as_mut_ptr(), vld1_s32_x2(a[1..].as_ptr()));
|
||||
vst1_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21669,7 +21729,7 @@ mod test {
|
||||
let a: [i64; 3] = [0, 1, 2];
|
||||
let e: [i64; 2] = [1, 2];
|
||||
let mut r: [i64; 2] = [0i64; 2];
|
||||
vst1_s64_x2(r.as_mut_ptr(), vld1_s64_x2(a[1..].as_ptr()));
|
||||
vst1_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21678,7 +21738,7 @@ mod test {
|
||||
let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [i8; 32] = [0i8; 32];
|
||||
vst1q_s8_x2(r.as_mut_ptr(), vld1q_s8_x2(a[1..].as_ptr()));
|
||||
vst1q_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21687,7 +21747,7 @@ mod test {
|
||||
let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [i16; 16] = [0i16; 16];
|
||||
vst1q_s16_x2(r.as_mut_ptr(), vld1q_s16_x2(a[1..].as_ptr()));
|
||||
vst1q_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21696,7 +21756,7 @@ mod test {
|
||||
let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [i32; 8] = [0i32; 8];
|
||||
vst1q_s32_x2(r.as_mut_ptr(), vld1q_s32_x2(a[1..].as_ptr()));
|
||||
vst1q_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21705,7 +21765,7 @@ mod test {
|
||||
let a: [i64; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [i64; 4] = [1, 2, 3, 4];
|
||||
let mut r: [i64; 4] = [0i64; 4];
|
||||
vst1q_s64_x2(r.as_mut_ptr(), vld1q_s64_x2(a[1..].as_ptr()));
|
||||
vst1q_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21714,7 +21774,7 @@ mod test {
|
||||
let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let e: [i8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let mut r: [i8; 24] = [0i8; 24];
|
||||
vst1_s8_x3(r.as_mut_ptr(), vld1_s8_x3(a[1..].as_ptr()));
|
||||
vst1_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21723,7 +21783,7 @@ mod test {
|
||||
let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let e: [i16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let mut r: [i16; 12] = [0i16; 12];
|
||||
vst1_s16_x3(r.as_mut_ptr(), vld1_s16_x3(a[1..].as_ptr()));
|
||||
vst1_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21732,7 +21792,7 @@ mod test {
|
||||
let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6];
|
||||
let e: [i32; 6] = [1, 2, 3, 4, 5, 6];
|
||||
let mut r: [i32; 6] = [0i32; 6];
|
||||
vst1_s32_x3(r.as_mut_ptr(), vld1_s32_x3(a[1..].as_ptr()));
|
||||
vst1_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21741,7 +21801,7 @@ mod test {
|
||||
let a: [i64; 4] = [0, 1, 2, 3];
|
||||
let e: [i64; 3] = [1, 2, 3];
|
||||
let mut r: [i64; 3] = [0i64; 3];
|
||||
vst1_s64_x3(r.as_mut_ptr(), vld1_s64_x3(a[1..].as_ptr()));
|
||||
vst1_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21750,7 +21810,7 @@ mod test {
|
||||
let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [i8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [i8; 48] = [0i8; 48];
|
||||
vst1q_s8_x3(r.as_mut_ptr(), vld1q_s8_x3(a[1..].as_ptr()));
|
||||
vst1q_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21759,7 +21819,7 @@ mod test {
|
||||
let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let e: [i16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let mut r: [i16; 24] = [0i16; 24];
|
||||
vst1q_s16_x3(r.as_mut_ptr(), vld1q_s16_x3(a[1..].as_ptr()));
|
||||
vst1q_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21768,7 +21828,7 @@ mod test {
|
||||
let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let e: [i32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let mut r: [i32; 12] = [0i32; 12];
|
||||
vst1q_s32_x3(r.as_mut_ptr(), vld1q_s32_x3(a[1..].as_ptr()));
|
||||
vst1q_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21777,7 +21837,7 @@ mod test {
|
||||
let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6];
|
||||
let e: [i64; 6] = [1, 2, 3, 4, 5, 6];
|
||||
let mut r: [i64; 6] = [0i64; 6];
|
||||
vst1q_s64_x3(r.as_mut_ptr(), vld1q_s64_x3(a[1..].as_ptr()));
|
||||
vst1q_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21786,7 +21846,7 @@ mod test {
|
||||
let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [i8; 32] = [0i8; 32];
|
||||
vst1_s8_x4(r.as_mut_ptr(), vld1_s8_x4(a[1..].as_ptr()));
|
||||
vst1_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21795,7 +21855,7 @@ mod test {
|
||||
let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [i16; 16] = [0i16; 16];
|
||||
vst1_s16_x4(r.as_mut_ptr(), vld1_s16_x4(a[1..].as_ptr()));
|
||||
vst1_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21804,7 +21864,7 @@ mod test {
|
||||
let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [i32; 8] = [0i32; 8];
|
||||
vst1_s32_x4(r.as_mut_ptr(), vld1_s32_x4(a[1..].as_ptr()));
|
||||
vst1_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21813,7 +21873,7 @@ mod test {
|
||||
let a: [i64; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [i64; 4] = [1, 2, 3, 4];
|
||||
let mut r: [i64; 4] = [0i64; 4];
|
||||
vst1_s64_x4(r.as_mut_ptr(), vld1_s64_x4(a[1..].as_ptr()));
|
||||
vst1_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21822,7 +21882,7 @@ mod test {
|
||||
let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [i8; 64] = [0i8; 64];
|
||||
vst1q_s8_x4(r.as_mut_ptr(), vld1q_s8_x4(a[1..].as_ptr()));
|
||||
vst1q_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21831,7 +21891,7 @@ mod test {
|
||||
let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [i16; 32] = [0i16; 32];
|
||||
vst1q_s16_x4(r.as_mut_ptr(), vld1q_s16_x4(a[1..].as_ptr()));
|
||||
vst1q_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21840,7 +21900,7 @@ mod test {
|
||||
let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [i32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [i32; 16] = [0i32; 16];
|
||||
vst1q_s32_x4(r.as_mut_ptr(), vld1q_s32_x4(a[1..].as_ptr()));
|
||||
vst1q_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21849,7 +21909,7 @@ mod test {
|
||||
let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [i64; 8] = [0i64; 8];
|
||||
vst1q_s64_x4(r.as_mut_ptr(), vld1q_s64_x4(a[1..].as_ptr()));
|
||||
vst1q_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21858,7 +21918,7 @@ mod test {
|
||||
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u8; 16] = [0u8; 16];
|
||||
vst1_u8_x2(r.as_mut_ptr(), vld1_u8_x2(a[1..].as_ptr()));
|
||||
vst1_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21867,7 +21927,7 @@ mod test {
|
||||
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [u16; 8] = [0u16; 8];
|
||||
vst1_u16_x2(r.as_mut_ptr(), vld1_u16_x2(a[1..].as_ptr()));
|
||||
vst1_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21876,7 +21936,7 @@ mod test {
|
||||
let a: [u32; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [u32; 4] = [1, 2, 3, 4];
|
||||
let mut r: [u32; 4] = [0u32; 4];
|
||||
vst1_u32_x2(r.as_mut_ptr(), vld1_u32_x2(a[1..].as_ptr()));
|
||||
vst1_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21885,7 +21945,7 @@ mod test {
|
||||
let a: [u64; 3] = [0, 1, 2];
|
||||
let e: [u64; 2] = [1, 2];
|
||||
let mut r: [u64; 2] = [0u64; 2];
|
||||
vst1_u64_x2(r.as_mut_ptr(), vld1_u64_x2(a[1..].as_ptr()));
|
||||
vst1_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21894,7 +21954,7 @@ mod test {
|
||||
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u8; 32] = [0u8; 32];
|
||||
vst1q_u8_x2(r.as_mut_ptr(), vld1q_u8_x2(a[1..].as_ptr()));
|
||||
vst1q_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21903,7 +21963,7 @@ mod test {
|
||||
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u16; 16] = [0u16; 16];
|
||||
vst1q_u16_x2(r.as_mut_ptr(), vld1q_u16_x2(a[1..].as_ptr()));
|
||||
vst1q_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21912,7 +21972,7 @@ mod test {
|
||||
let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [u32; 8] = [0u32; 8];
|
||||
vst1q_u32_x2(r.as_mut_ptr(), vld1q_u32_x2(a[1..].as_ptr()));
|
||||
vst1q_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21921,7 +21981,7 @@ mod test {
|
||||
let a: [u64; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [u64; 4] = [1, 2, 3, 4];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst1q_u64_x2(r.as_mut_ptr(), vld1q_u64_x2(a[1..].as_ptr()));
|
||||
vst1q_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21930,7 +21990,7 @@ mod test {
|
||||
let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let mut r: [u8; 24] = [0u8; 24];
|
||||
vst1_u8_x3(r.as_mut_ptr(), vld1_u8_x3(a[1..].as_ptr()));
|
||||
vst1_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21939,7 +21999,7 @@ mod test {
|
||||
let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let mut r: [u16; 12] = [0u16; 12];
|
||||
vst1_u16_x3(r.as_mut_ptr(), vld1_u16_x3(a[1..].as_ptr()));
|
||||
vst1_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21948,7 +22008,7 @@ mod test {
|
||||
let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6];
|
||||
let e: [u32; 6] = [1, 2, 3, 4, 5, 6];
|
||||
let mut r: [u32; 6] = [0u32; 6];
|
||||
vst1_u32_x3(r.as_mut_ptr(), vld1_u32_x3(a[1..].as_ptr()));
|
||||
vst1_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21957,7 +22017,7 @@ mod test {
|
||||
let a: [u64; 4] = [0, 1, 2, 3];
|
||||
let e: [u64; 3] = [1, 2, 3];
|
||||
let mut r: [u64; 3] = [0u64; 3];
|
||||
vst1_u64_x3(r.as_mut_ptr(), vld1_u64_x3(a[1..].as_ptr()));
|
||||
vst1_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21966,7 +22026,7 @@ mod test {
|
||||
let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u8; 48] = [0u8; 48];
|
||||
vst1q_u8_x3(r.as_mut_ptr(), vld1q_u8_x3(a[1..].as_ptr()));
|
||||
vst1q_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21975,7 +22035,7 @@ mod test {
|
||||
let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let mut r: [u16; 24] = [0u16; 24];
|
||||
vst1q_u16_x3(r.as_mut_ptr(), vld1q_u16_x3(a[1..].as_ptr()));
|
||||
vst1q_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21984,7 +22044,7 @@ mod test {
|
||||
let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let e: [u32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let mut r: [u32; 12] = [0u32; 12];
|
||||
vst1q_u32_x3(r.as_mut_ptr(), vld1q_u32_x3(a[1..].as_ptr()));
|
||||
vst1q_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -21993,7 +22053,7 @@ mod test {
|
||||
let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
|
||||
let e: [u64; 6] = [1, 2, 3, 4, 5, 6];
|
||||
let mut r: [u64; 6] = [0u64; 6];
|
||||
vst1q_u64_x3(r.as_mut_ptr(), vld1q_u64_x3(a[1..].as_ptr()));
|
||||
vst1q_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22002,7 +22062,7 @@ mod test {
|
||||
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u8; 32] = [0u8; 32];
|
||||
vst1_u8_x4(r.as_mut_ptr(), vld1_u8_x4(a[1..].as_ptr()));
|
||||
vst1_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22011,7 +22071,7 @@ mod test {
|
||||
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u16; 16] = [0u16; 16];
|
||||
vst1_u16_x4(r.as_mut_ptr(), vld1_u16_x4(a[1..].as_ptr()));
|
||||
vst1_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22020,7 +22080,7 @@ mod test {
|
||||
let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [u32; 8] = [0u32; 8];
|
||||
vst1_u32_x4(r.as_mut_ptr(), vld1_u32_x4(a[1..].as_ptr()));
|
||||
vst1_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22029,7 +22089,7 @@ mod test {
|
||||
let a: [u64; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [u64; 4] = [1, 2, 3, 4];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst1_u64_x4(r.as_mut_ptr(), vld1_u64_x4(a[1..].as_ptr()));
|
||||
vst1_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22038,7 +22098,7 @@ mod test {
|
||||
let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u8; 64] = [0u8; 64];
|
||||
vst1q_u8_x4(r.as_mut_ptr(), vld1q_u8_x4(a[1..].as_ptr()));
|
||||
vst1q_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22047,7 +22107,7 @@ mod test {
|
||||
let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u16; 32] = [0u16; 32];
|
||||
vst1q_u16_x4(r.as_mut_ptr(), vld1q_u16_x4(a[1..].as_ptr()));
|
||||
vst1q_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22056,7 +22116,7 @@ mod test {
|
||||
let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u32; 16] = [0u32; 16];
|
||||
vst1q_u32_x4(r.as_mut_ptr(), vld1q_u32_x4(a[1..].as_ptr()));
|
||||
vst1q_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22065,7 +22125,7 @@ mod test {
|
||||
let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [u64; 8] = [0u64; 8];
|
||||
vst1q_u64_x4(r.as_mut_ptr(), vld1q_u64_x4(a[1..].as_ptr()));
|
||||
vst1q_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22074,7 +22134,7 @@ mod test {
|
||||
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u8; 16] = [0u8; 16];
|
||||
vst1_p8_x2(r.as_mut_ptr(), vld1_p8_x2(a[1..].as_ptr()));
|
||||
vst1_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22083,7 +22143,7 @@ mod test {
|
||||
let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let mut r: [u8; 24] = [0u8; 24];
|
||||
vst1_p8_x3(r.as_mut_ptr(), vld1_p8_x3(a[1..].as_ptr()));
|
||||
vst1_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22092,7 +22152,7 @@ mod test {
|
||||
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u8; 32] = [0u8; 32];
|
||||
vst1_p8_x4(r.as_mut_ptr(), vld1_p8_x4(a[1..].as_ptr()));
|
||||
vst1_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22101,7 +22161,7 @@ mod test {
|
||||
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u8; 32] = [0u8; 32];
|
||||
vst1q_p8_x2(r.as_mut_ptr(), vld1q_p8_x2(a[1..].as_ptr()));
|
||||
vst1q_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22110,7 +22170,7 @@ mod test {
|
||||
let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u8; 48] = [0u8; 48];
|
||||
vst1q_p8_x3(r.as_mut_ptr(), vld1q_p8_x3(a[1..].as_ptr()));
|
||||
vst1q_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22119,7 +22179,7 @@ mod test {
|
||||
let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u8; 64] = [0u8; 64];
|
||||
vst1q_p8_x4(r.as_mut_ptr(), vld1q_p8_x4(a[1..].as_ptr()));
|
||||
vst1q_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22128,7 +22188,7 @@ mod test {
|
||||
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [u16; 8] = [0u16; 8];
|
||||
vst1_p16_x2(r.as_mut_ptr(), vld1_p16_x2(a[1..].as_ptr()));
|
||||
vst1_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22137,7 +22197,7 @@ mod test {
|
||||
let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
let mut r: [u16; 12] = [0u16; 12];
|
||||
vst1_p16_x3(r.as_mut_ptr(), vld1_p16_x3(a[1..].as_ptr()));
|
||||
vst1_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22146,7 +22206,7 @@ mod test {
|
||||
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u16; 16] = [0u16; 16];
|
||||
vst1_p16_x4(r.as_mut_ptr(), vld1_p16_x4(a[1..].as_ptr()));
|
||||
vst1_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22155,7 +22215,7 @@ mod test {
|
||||
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let mut r: [u16; 16] = [0u16; 16];
|
||||
vst1q_p16_x2(r.as_mut_ptr(), vld1q_p16_x2(a[1..].as_ptr()));
|
||||
vst1q_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22164,7 +22224,7 @@ mod test {
|
||||
let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
|
||||
let mut r: [u16; 24] = [0u16; 24];
|
||||
vst1q_p16_x3(r.as_mut_ptr(), vld1q_p16_x3(a[1..].as_ptr()));
|
||||
vst1q_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22173,7 +22233,61 @@ mod test {
|
||||
let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
|
||||
let mut r: [u16; 32] = [0u16; 32];
|
||||
vst1q_p16_x4(r.as_mut_ptr(), vld1q_p16_x4(a[1..].as_ptr()));
|
||||
vst1q_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p64_x2() {
|
||||
let a: [u64; 3] = [0, 1, 2];
|
||||
let e: [u64; 2] = [1, 2];
|
||||
let mut r: [u64; 2] = [0u64; 2];
|
||||
vst1_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p64_x3() {
|
||||
let a: [u64; 4] = [0, 1, 2, 3];
|
||||
let e: [u64; 3] = [1, 2, 3];
|
||||
let mut r: [u64; 3] = [0u64; 3];
|
||||
vst1_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_p64_x4() {
|
||||
let a: [u64; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [u64; 4] = [1, 2, 3, 4];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst1_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p64_x2() {
|
||||
let a: [u64; 5] = [0, 1, 2, 3, 4];
|
||||
let e: [u64; 4] = [1, 2, 3, 4];
|
||||
let mut r: [u64; 4] = [0u64; 4];
|
||||
vst1q_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p64_x3() {
|
||||
let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
|
||||
let e: [u64; 6] = [1, 2, 3, 4, 5, 6];
|
||||
let mut r: [u64; 6] = [0u64; 6];
|
||||
vst1q_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1q_p64_x4() {
|
||||
let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let mut r: [u64; 8] = [0u64; 8];
|
||||
vst1q_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22182,7 +22296,7 @@ mod test {
|
||||
let a: [f32; 5] = [0., 1., 2., 3., 4.];
|
||||
let e: [f32; 4] = [1., 2., 3., 4.];
|
||||
let mut r: [f32; 4] = [0f32; 4];
|
||||
vst1_f32_x2(r.as_mut_ptr(), vld1_f32_x2(a[1..].as_ptr()));
|
||||
vst1_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22191,7 +22305,7 @@ mod test {
|
||||
let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let mut r: [f32; 8] = [0f32; 8];
|
||||
vst1q_f32_x2(r.as_mut_ptr(), vld1q_f32_x2(a[1..].as_ptr()));
|
||||
vst1q_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22200,7 +22314,7 @@ mod test {
|
||||
let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.];
|
||||
let e: [f32; 6] = [1., 2., 3., 4., 5., 6.];
|
||||
let mut r: [f32; 6] = [0f32; 6];
|
||||
vst1_f32_x3(r.as_mut_ptr(), vld1_f32_x3(a[1..].as_ptr()));
|
||||
vst1_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22209,7 +22323,7 @@ mod test {
|
||||
let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
|
||||
let e: [f32; 12] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
|
||||
let mut r: [f32; 12] = [0f32; 12];
|
||||
vst1q_f32_x3(r.as_mut_ptr(), vld1q_f32_x3(a[1..].as_ptr()));
|
||||
vst1q_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22218,7 +22332,7 @@ mod test {
|
||||
let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
|
||||
let mut r: [f32; 8] = [0f32; 8];
|
||||
vst1_f32_x4(r.as_mut_ptr(), vld1_f32_x4(a[1..].as_ptr()));
|
||||
vst1_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
@@ -22227,7 +22341,7 @@ mod test {
|
||||
let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
|
||||
let e: [f32; 16] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
|
||||
let mut r: [f32; 16] = [0f32; 16];
|
||||
vst1q_f32_x4(r.as_mut_ptr(), vld1q_f32_x4(a[1..].as_ptr()));
|
||||
vst1q_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
||||
@@ -339,6 +339,29 @@ unsafe fn test_vst1q_p16() {
|
||||
assert_eq!(vals[8], 8);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vst1_p64() {
|
||||
let mut vals = [0_u64; 2];
|
||||
let a = u64x1::new(1);
|
||||
|
||||
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,aes")]
|
||||
unsafe fn test_vst1q_p64() {
|
||||
let mut vals = [0_u64; 3];
|
||||
let a = u64x2::new(1, 2);
|
||||
|
||||
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
|
||||
|
||||
assert_eq!(vals[0], 0);
|
||||
assert_eq!(vals[1], 1);
|
||||
assert_eq!(vals[2], 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vst1_f32() {
|
||||
let mut vals = [0_f32; 3];
|
||||
|
||||
@@ -2082,10 +2082,9 @@ generate *const p16:poly16x4x2_t, *const p16:poly16x4x3_t, *const p16:poly16x4x4
|
||||
generate *const p16:poly16x8x2_t, *const p16:poly16x8x3_t, *const p16:poly16x8x4_t
|
||||
target = aes
|
||||
generate *const p64:poly64x1x2_t
|
||||
arm = ldr
|
||||
arm = nop
|
||||
generate *const p64:poly64x1x3_t, *const p64:poly64x1x4_t
|
||||
generate *const p64:poly64x2x2_t, *const p64:poly64x2x3_t, *const p64:poly64x2x4_t
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers
|
||||
name = vld1
|
||||
out-suffix
|
||||
@@ -2122,13 +2121,16 @@ out-nox
|
||||
a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
|
||||
validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld2
|
||||
link-aarch64 = ld2._EXTv2_
|
||||
//generate *const i64:int64x2x2_t
|
||||
|
||||
arm = vld2
|
||||
link-arm = vld2._EXTpi82_
|
||||
//generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
|
||||
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
|
||||
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
name = vld2
|
||||
@@ -2139,10 +2141,17 @@ validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9,
|
||||
load_fn
|
||||
|
||||
aarch64 = ld2
|
||||
//generate *const u64:uint64x2x2_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x2x2_t
|
||||
|
||||
target = default
|
||||
arm = vld2
|
||||
//generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
|
||||
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
|
||||
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
|
||||
//generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x2_t
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
name = vld2
|
||||
@@ -2150,6 +2159,7 @@ out-nox
|
||||
a = 0., 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
|
||||
validate 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld2
|
||||
link-aarch64 = ld2._EXTv2_
|
||||
@@ -2166,12 +2176,14 @@ a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 1
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
load_fn
|
||||
|
||||
arm = vld2dup
|
||||
link-arm = vld2dup._EXTpi82_
|
||||
aarch64 = ld2r
|
||||
link-aarch64 = ld2r._EXT2_
|
||||
//generate *const i64:int64x2x2_t
|
||||
|
||||
arm = vld2dup
|
||||
link-arm = vld2dup._EXTpi82_
|
||||
//generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
|
||||
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
|
||||
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
|
||||
|
||||
/// Load single 2-element structure and replicate to all lanes of two registers
|
||||
name = vld2
|
||||
@@ -2181,11 +2193,18 @@ a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 1
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
load_fn
|
||||
|
||||
arm = vld2dup
|
||||
aarch64 = ld2r
|
||||
//generate *const u64:uint64x2x2_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x2x2_t
|
||||
|
||||
target = default
|
||||
arm = vld2dup
|
||||
//generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
|
||||
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
|
||||
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
|
||||
//generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x2_t
|
||||
|
||||
/// Load single 2-element structure and replicate to all lanes of two registers
|
||||
name = vld2
|
||||
@@ -2217,13 +2236,13 @@ arm-aarch64-separate
|
||||
aarch64 = ld2lane
|
||||
const-aarch64 = LANE
|
||||
link-aarch64 = ld2lane._EXTpi82_
|
||||
//generate *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
|
||||
//generate *const i8:int8x16x2_t:int8x16x2_t, *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
|
||||
|
||||
arm = vld2lane
|
||||
const-arm = LANE
|
||||
link-arm = vld2lane._EXTpi82_
|
||||
//generate *const i8:int8x8x2_t:int8x8x2_t, *const i16:int16x4x2_t:int16x4x2_t, *const i32:int32x2x2_t:int32x2x2_t
|
||||
//generate *const i8:int8x16x2_t:int8x16x2_t, *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
|
||||
//generate *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
name = vld2
|
||||
@@ -2236,7 +2255,6 @@ b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 1
|
||||
n = 0
|
||||
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld2lane
|
||||
const-aarch64 = LANE
|
||||
@@ -2245,14 +2263,15 @@ target = aes
|
||||
//generate *const p64:poly64x1x2_t:poly64x1x2_t, *const p64:poly64x2x2_t:poly64x2x2_t
|
||||
|
||||
target = default
|
||||
//generate *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
|
||||
//generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
|
||||
//generate *const p8:poly8x16x2_t:poly8x16x2_t
|
||||
|
||||
arm = vld2lane
|
||||
const-arm = LANE
|
||||
//generate *const u8:uint8x8x2_t:uint8x8x2_t, *const u16:uint16x4x2_t:uint16x4x2_t, *const u32:uint32x2x2_t:uint32x2x2_t
|
||||
//generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
|
||||
//generate *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
|
||||
//generate *const p8:poly8x8x2_t:poly8x8x2_t, *const p16:poly16x4x2_t:poly16x4x2_t
|
||||
//generate *const p8:poly8x16x2_t:poly8x16x2_t, *const p16:poly16x8x2_t:poly16x8x2_t
|
||||
//generate *const p16:poly16x8x2_t:poly16x8x2_t
|
||||
|
||||
/// Load multiple 2-element structures to two registers
|
||||
name = vld2
|
||||
@@ -2276,6 +2295,398 @@ const-arm = LANE
|
||||
link-arm = vld2lane._EXTpi82_
|
||||
//generate *const f32:float32x2x2_t:float32x2x2_t, *const f32:float32x4x2_t:float32x4x2_t
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
name = vld3
|
||||
out-nox
|
||||
a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
|
||||
validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld3
|
||||
link-aarch64 = ld3._EXTv2_
|
||||
//generate *const i64:int64x2x3_t
|
||||
|
||||
arm = vld3
|
||||
link-arm = vld3._EXTpi82_
|
||||
//generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
|
||||
//generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
name = vld3
|
||||
out-nox
|
||||
multi_fn = transmute, {vld3-outsignednox-noext, transmute(a)}
|
||||
a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
|
||||
validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
|
||||
load_fn
|
||||
|
||||
aarch64 = ld3
|
||||
//generate *const u64:uint64x2x3_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x2x3_t
|
||||
|
||||
target = default
|
||||
arm = vld3
|
||||
//generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
|
||||
//generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
|
||||
//generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x3_t
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
name = vld3
|
||||
out-nox
|
||||
a = 0., 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.
|
||||
validate 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld3
|
||||
link-aarch64 = ld3._EXTv2_
|
||||
//generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
|
||||
|
||||
arm = vld3
|
||||
link-arm = vld3._EXTpi82_
|
||||
//generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
|
||||
|
||||
/// Load single 3-element structure and replicate to all lanes of three registers
|
||||
name = vld3
|
||||
out-dup-nox
|
||||
a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
load_fn
|
||||
|
||||
aarch64 = ld3r
|
||||
link-aarch64 = ld3r._EXT2_
|
||||
//generate *const i64:int64x2x3_t
|
||||
|
||||
arm = vld3dup
|
||||
link-arm = vld3dup._EXTpi82_
|
||||
//generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
|
||||
//generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
|
||||
|
||||
/// Load single 3-element structure and replicate to all lanes of three registers
|
||||
name = vld3
|
||||
out-dup-nox
|
||||
multi_fn = transmute, {vld3-outsigneddupnox-noext, transmute(a)}
|
||||
a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
load_fn
|
||||
|
||||
aarch64 = ld3r
|
||||
//generate *const u64:uint64x2x3_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x2x3_t
|
||||
|
||||
target = default
|
||||
arm = vld3dup
|
||||
//generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
|
||||
//generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
|
||||
//generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x3_t
|
||||
|
||||
/// Load single 3-element structure and replicate to all lanes of three registers
|
||||
name = vld3
|
||||
out-dup-nox
|
||||
a = 0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.
|
||||
validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
|
||||
load_fn
|
||||
|
||||
aarch64 = ld3r
|
||||
link-aarch64 = ld3r._EXT2_
|
||||
//generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
|
||||
|
||||
arm = vld3dup
|
||||
link-arm = vld3dup._EXTpi82_
|
||||
//generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
|
||||
|
||||
/// Load multiple 3-element structures to two registers
|
||||
name = vld3
|
||||
out-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
constn = LANE
|
||||
a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
n = 0
|
||||
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld3lane
|
||||
const-aarch64 = LANE
|
||||
link-aarch64 = ld3lane._EXTpi82_
|
||||
//generate *const i8:int8x16x3_t:int8x16x3_t, *const i64:int64x1x3_t:int64x1x3_t, *const i64:int64x2x3_t:int64x2x3_t
|
||||
|
||||
arm = vld3lane
|
||||
const-arm = LANE
|
||||
link-arm = vld3lane._EXTpi82_
|
||||
//generate *const i8:int8x8x3_t:int8x8x3_t, *const i16:int16x4x3_t:int16x4x3_t, *const i32:int32x2x3_t:int32x2x3_t
|
||||
//generate *const i16:int16x8x3_t:int16x8x3_t, *const i32:int32x4x3_t:int32x4x3_t
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
name = vld3
|
||||
out-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = transmute, {vld3-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
|
||||
constn = LANE
|
||||
a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
n = 0
|
||||
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
load_fn
|
||||
|
||||
aarch64 = ld3lane
|
||||
const-aarch64 = LANE
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x3_t:poly64x1x3_t, *const p64:poly64x2x3_t:poly64x2x3_t
|
||||
target = default
|
||||
//generate *const p8:poly8x16x3_t:poly8x16x3_t, *const u8:uint8x16x3_t:uint8x16x3_t, *const u64:uint64x1x3_t:uint64x1x3_t, *const u64:uint64x2x3_t:uint64x2x3_t
|
||||
|
||||
arm = vld3lane
|
||||
const-arm = LANE
|
||||
//generate *const u8:uint8x8x3_t:uint8x8x3_t, *const u16:uint16x4x3_t:uint16x4x3_t, *const u32:uint32x2x3_t:uint32x2x3_t
|
||||
//generate *const u16:uint16x8x3_t:uint16x8x3_t, *const u32:uint32x4x3_t:uint32x4x3_t
|
||||
//generate *const p8:poly8x8x3_t:poly8x8x3_t, *const p16:poly16x4x3_t:poly16x4x3_t
|
||||
//generate *const p16:poly16x8x3_t:poly16x8x3_t
|
||||
|
||||
/// Load multiple 3-element structures to three registers
|
||||
name = vld3
|
||||
out-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
constn = LANE
|
||||
a = 0., 1., 2., 2., 4., 5., 6., 7., 8., 5., 6., 7., 8.
|
||||
b = 0., 2., 2., 14., 9., 16., 17., 18., 5., 6., 7., 8.
|
||||
n = 0
|
||||
validate 1., 2., 2., 14., 2., 16., 17., 18., 2., 6., 7., 8.
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld3lane
|
||||
const-aarch64 = LANE
|
||||
link-aarch64 = ld3lane._EXTpi82_
|
||||
//generate *const f64:float64x1x3_t:float64x1x3_t, *const f64:float64x2x3_t:float64x2x3_t
|
||||
|
||||
arm = vld3lane
|
||||
const-arm = LANE
|
||||
link-arm = vld3lane._EXTpi82_
|
||||
//generate *const f32:float32x2x3_t:float32x2x3_t, *const f32:float32x4x3_t:float32x4x3_t
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
name = vld4
|
||||
out-nox
|
||||
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld4
|
||||
link-aarch64 = ld4._EXTv2_
|
||||
//generate *const i64:int64x2x4_t
|
||||
|
||||
arm = vld4
|
||||
link-arm = vld4._EXTpi82_
|
||||
//generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
|
||||
//generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
name = vld4
|
||||
out-nox
|
||||
multi_fn = transmute, {vld4-outsignednox-noext, transmute(a)}
|
||||
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
load_fn
|
||||
|
||||
aarch64 = ld4
|
||||
//generate *const u64:uint64x2x4_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x2x4_t
|
||||
|
||||
target = default
|
||||
arm = vld4
|
||||
//generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
|
||||
//generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
|
||||
//generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x4_t
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
name = vld4
|
||||
out-nox
|
||||
a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 15., 16.
|
||||
validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 15., 6., 8., 8., 16.
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld4
|
||||
link-aarch64 = ld4._EXTv2_
|
||||
//generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
|
||||
|
||||
arm = vld4
|
||||
link-arm = vld4._EXTpi82_
|
||||
//generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
|
||||
|
||||
/// Load single 4-element structure and replicate to all lanes of four registers
|
||||
name = vld4
|
||||
out-dup-nox
|
||||
a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
load_fn
|
||||
|
||||
aarch64 = ld4r
|
||||
link-aarch64 = ld4r._EXT2_
|
||||
//generate *const i64:int64x2x4_t
|
||||
|
||||
arm = vld4dup
|
||||
link-arm = vld4dup._EXTpi82_
|
||||
//generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
|
||||
//generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
|
||||
|
||||
/// Load single 4-element structure and replicate to all lanes of four registers
|
||||
name = vld4
|
||||
out-dup-nox
|
||||
multi_fn = transmute, {vld4-outsigneddupnox-noext, transmute(a)}
|
||||
a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
load_fn
|
||||
|
||||
aarch64 = ld4r
|
||||
//generate *const u64:uint64x2x4_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x2x4_t
|
||||
|
||||
target = default
|
||||
arm = vld4dup
|
||||
//generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
|
||||
//generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
|
||||
//generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x4_t
|
||||
|
||||
/// Load single 4-element structure and replicate to all lanes of four registers
|
||||
name = vld4
|
||||
out-dup-nox
|
||||
a = 0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5., 9., 4., 3., 5.
|
||||
validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
|
||||
load_fn
|
||||
|
||||
aarch64 = ld4r
|
||||
link-aarch64 = ld4r._EXT2_
|
||||
//generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
|
||||
|
||||
arm = vld4dup
|
||||
link-arm = vld4dup._EXTpi82_
|
||||
//generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
name = vld4
|
||||
out-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
constn = LANE
|
||||
a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
|
||||
b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
|
||||
n = 0
|
||||
validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld4lane
|
||||
const-aarch64 = LANE
|
||||
link-aarch64 = ld4lane._EXTpi82_
|
||||
//generate *const i8:int8x16x4_t:int8x16x4_t, *const i64:int64x1x4_t:int64x1x4_t, *const i64:int64x2x4_t:int64x2x4_t
|
||||
|
||||
arm = vld4lane
|
||||
const-arm = LANE
|
||||
link-arm = vld4lane._EXTpi82_
|
||||
//generate *const i8:int8x8x4_t:int8x8x4_t, *const i16:int16x4x4_t:int16x4x4_t, *const i32:int32x2x4_t:int32x2x4_t
|
||||
//generate *const i16:int16x8x4_t:int16x8x4_t, *const i32:int32x4x4_t:int32x4x4_t
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
name = vld4
|
||||
out-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = transmute, {vld4-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
|
||||
constn = LANE
|
||||
a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
|
||||
b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
|
||||
n = 0
|
||||
validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
|
||||
load_fn
|
||||
|
||||
aarch64 = ld4lane
|
||||
const-aarch64 = LANE
|
||||
target = aes
|
||||
//generate *const p64:poly64x1x4_t:poly64x1x4_t, *const p64:poly64x2x4_t:poly64x2x4_t
|
||||
target = default
|
||||
//generate *const p8:poly8x16x4_t:poly8x16x4_t, *const u8:uint8x16x4_t:uint8x16x4_t, *const u64:uint64x1x4_t:uint64x1x4_t, *const u64:uint64x2x4_t:uint64x2x4_t
|
||||
|
||||
arm = vld4lane
|
||||
const-arm = LANE
|
||||
//generate *const u8:uint8x8x4_t:uint8x8x4_t, *const u16:uint16x4x4_t:uint16x4x4_t, *const u32:uint32x2x4_t:uint32x2x4_t
|
||||
//generate *const u16:uint16x8x4_t:uint16x8x4_t, *const u32:uint32x4x4_t:uint32x4x4_t
|
||||
//generate *const p8:poly8x8x4_t:poly8x8x4_t, *const p16:poly16x4x4_t:poly16x4x4_t
|
||||
//generate *const p16:poly16x8x4_t:poly16x8x4_t
|
||||
|
||||
/// Load multiple 4-element structures to four registers
|
||||
name = vld4
|
||||
out-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
constn = LANE
|
||||
a = 0., 1., 2., 2., 2., 5., 6., 7., 8., 5., 6., 7., 8., 1., 4., 3., 5.
|
||||
b = 0., 2., 2., 2., 2., 16., 2., 18., 5., 6., 7., 8., 1., 4., 3., 5.
|
||||
n = 0
|
||||
validate 1., 2., 2., 2., 2., 16., 2., 18., 2., 6., 7., 8., 2., 4., 3., 5.
|
||||
load_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = ld4lane
|
||||
const-aarch64 = LANE
|
||||
link-aarch64 = ld4lane._EXTpi82_
|
||||
//generate *const f64:float64x1x4_t:float64x1x4_t, *const f64:float64x2x4_t:float64x2x4_t
|
||||
|
||||
arm = vld4lane
|
||||
const-arm = LANE
|
||||
link-arm = vld4lane._EXTpi82_
|
||||
//generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float32x4x4_t
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers
|
||||
name = vst1
|
||||
in1-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = *a, {simd_extract, b, LANE as u32}
|
||||
constn = LANE
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
n = 0
|
||||
validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
store_fn
|
||||
|
||||
aarch64 = nop
|
||||
arm = nop
|
||||
//generate *mut i8:int8x8_t:void, *mut i16:int16x4_t:void, *mut i32:int32x2_t:void, *mut i64:int64x1_t:void
|
||||
//generate *mut i8:int8x16_t:void, *mut i16:int16x8_t:void, *mut i32:int32x4_t:void, *mut i64:int64x2_t:void
|
||||
//generate *mut u8:uint8x8_t:void, *mut u16:uint16x4_t:void, *mut u32:uint32x2_t:void, *mut u64:uint64x1_t:void
|
||||
//generate *mut u8:uint8x16_t:void, *mut u16:uint16x8_t:void, *mut u32:uint32x4_t:void, *mut u64:uint64x2_t:void
|
||||
//generate *mut p8:poly8x8_t:void, *mut p16:poly16x4_t:void, *mut p8:poly8x16_t:void, *mut p16:poly16x8_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers
|
||||
name = vst1
|
||||
in1-lane-nox
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = *a, {simd_extract, b, LANE as u32}
|
||||
constn = LANE
|
||||
a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
|
||||
n = 0
|
||||
validate 1., 0., 0., 0., 0., 0., 0., 0.
|
||||
store_fn
|
||||
|
||||
aarch64 = nop
|
||||
//generate *mut f64:float64x1_t:void, *mut f64:float64x2_t:void
|
||||
|
||||
arm = nop
|
||||
//generate *mut f32:float32x2_t:void, *mut f32:float32x4_t:void
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers
|
||||
name = vst1
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
|
||||
@@ -2319,6 +2730,11 @@ generate *mut p8:poly8x8x2_t:void, *mut p8:poly8x8x3_t:void, *mut p8:poly8x8x4_t
|
||||
generate *mut p8:poly8x16x2_t:void, *mut p8:poly8x16x3_t:void, *mut p8:poly8x16x4_t:void
|
||||
generate *mut p16:poly16x4x2_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x4x4_t:void
|
||||
generate *mut p16:poly16x8x2_t:void, *mut p16:poly16x8x3_t:void, *mut p16:poly16x8x4_t:void
|
||||
target = aes
|
||||
generate *mut p64:poly64x1x2_t:void
|
||||
arm = nop
|
||||
generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x1x4_t:void
|
||||
generate *mut p64:poly64x2x2_t:void, *mut p64:poly64x2x3_t:void, *mut p64:poly64x2x4_t:void
|
||||
|
||||
/// Store multiple single-element structures to one, two, three, or four registers
|
||||
name = vst1
|
||||
@@ -2350,6 +2766,363 @@ link-aarch64 = st1x4._EXT3_
|
||||
link-arm = vst1x4._EXTr3_
|
||||
generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
name = vst2
|
||||
in1-nox
|
||||
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st2
|
||||
link-aarch64 = st2._EXTpi8_
|
||||
//generate *mut i64:int64x2x2_t:void
|
||||
|
||||
arm = vst2
|
||||
link-arm = vst2._EXTpi8r_
|
||||
//generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void, *mut i64:int64x1x2_t:void
|
||||
//generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
name = vst2
|
||||
multi_fn = transmute, {vst2-in1signednox-noext, transmute(a), transmute(b)}
|
||||
in1-nox
|
||||
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
|
||||
store_fn
|
||||
|
||||
aarch64 = st2
|
||||
//generate *mut u64:uint64x2x2_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x2x2_t:void
|
||||
|
||||
target = default
|
||||
arm = vst2
|
||||
//generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void, *mut u64:uint64x1x2_t:void
|
||||
//generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
|
||||
//generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p8:poly8x16x2_t:void, *mut p16:poly16x8x2_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x1x2_t:void
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
name = vst2
|
||||
in1-nox
|
||||
a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
|
||||
validate 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st2
|
||||
link-aarch64 = st2._EXTpi8_
|
||||
//generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
|
||||
|
||||
arm = vst2
|
||||
link-arm = vst2._EXTpi8r_
|
||||
//generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
name = vst2
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
n = 0
|
||||
validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st2lane
|
||||
link-aarch64 = st2lane._EXTpi8_
|
||||
const-aarch64 = LANE
|
||||
//generate *mut i8:int8x16x2_t:void, *mut i64:int64x1x2_t:void, *mut i64:int64x2x2_t:void
|
||||
|
||||
arm = vst2lane
|
||||
link-arm = vst2lane._EXTpi8r_
|
||||
const-arm = LANE
|
||||
//generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
|
||||
//generate *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
name = vst2
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = transmute, {vst2-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
|
||||
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
n = 0
|
||||
validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
store_fn
|
||||
|
||||
aarch64 = st2lane
|
||||
//generate *mut u8:uint8x16x2_t:void, *mut u64:uint64x1x2_t:void, *mut u64:uint64x2x2_t:void, *mut p8:poly8x16x2_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x1x2_t:void, *mut p64:poly64x2x2_t:void
|
||||
|
||||
target = default
|
||||
arm = vst2lane
|
||||
//generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
|
||||
//generate *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
|
||||
//generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p16:poly16x8x2_t:void
|
||||
|
||||
/// Store multiple 2-element structures from two registers
|
||||
name = vst2
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
|
||||
n = 0
|
||||
validate 1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st2lane
|
||||
link-aarch64 = st2lane._EXTpi8_
|
||||
const-aarch64 = LANE
|
||||
//generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
|
||||
|
||||
arm = vst2lane
|
||||
link-arm = vst2lane._EXTpi8r_
|
||||
const-arm = LANE
|
||||
//generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
name = vst3
|
||||
in1-nox
|
||||
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
|
||||
validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st3
|
||||
link-aarch64 = st3._EXTpi8_
|
||||
//generate *mut i64:int64x2x3_t:void
|
||||
|
||||
arm = vst3
|
||||
link-arm = vst3._EXTpi8r_
|
||||
//generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void, *mut i64:int64x1x3_t:void
|
||||
//generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
name = vst3
|
||||
multi_fn = transmute, {vst3-in1signednox-noext, transmute(a), transmute(b)}
|
||||
in1-nox
|
||||
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
|
||||
validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
|
||||
store_fn
|
||||
|
||||
aarch64 = st3
|
||||
//generate *mut u64:uint64x2x3_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x2x3_t:void
|
||||
|
||||
target = default
|
||||
arm = vst3
|
||||
//generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void, *mut u64:uint64x1x3_t:void
|
||||
//generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
|
||||
//generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p8:poly8x16x3_t:void, *mut p16:poly16x8x3_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x1x3_t:void
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
name = vst3
|
||||
in1-nox
|
||||
a = 0., 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8., 13., 14., 15., 16
|
||||
validate 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8., 2., 13., 13., 4.
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st3
|
||||
link-aarch64 = st3._EXTpi8_
|
||||
//generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
|
||||
|
||||
arm = vst3
|
||||
link-arm = vst3._EXTpi8r_
|
||||
//generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
name = vst3
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
|
||||
n = 0
|
||||
validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st3lane
|
||||
link-aarch64 = st3lane._EXTpi8_
|
||||
const-aarch64 = LANE
|
||||
//generate *mut i8:int8x16x3_t:void, *mut i64:int64x1x3_t:void, *mut i64:int64x2x3_t:void
|
||||
|
||||
arm = vst3lane
|
||||
link-arm = vst3lane._EXTpi8r_
|
||||
const-arm = LANE
|
||||
//generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
|
||||
//generate *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
name = vst3
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = transmute, {vst3-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
|
||||
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
|
||||
n = 0
|
||||
validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
store_fn
|
||||
|
||||
aarch64 = st3lane
|
||||
//generate *mut u8:uint8x16x3_t:void, *mut u64:uint64x1x3_t:void, *mut u64:uint64x2x3_t:void, *mut p8:poly8x16x3_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x2x3_t:void
|
||||
|
||||
target = default
|
||||
arm = vst3lane
|
||||
//generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
|
||||
//generate *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
|
||||
//generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x8x3_t:void
|
||||
|
||||
/// Store multiple 3-element structures from three registers
|
||||
name = vst3
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
|
||||
n = 0
|
||||
validate 1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st3lane
|
||||
link-aarch64 = st3lane._EXTpi8_
|
||||
const-aarch64 = LANE
|
||||
//generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
|
||||
|
||||
arm = vst3lane
|
||||
link-arm = vst3lane._EXTpi8r_
|
||||
const-arm = LANE
|
||||
//generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
name = vst4
|
||||
in1-nox
|
||||
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st4
|
||||
link-aarch64 = st4._EXTpi8_
|
||||
//generate *mut i64:int64x2x4_t:void
|
||||
|
||||
arm = vst4
|
||||
link-arm = vst4._EXTpi8r_
|
||||
//generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void, *mut i64:int64x1x4_t:void
|
||||
//generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
name = vst4
|
||||
multi_fn = transmute, {vst4-in1signednox-noext, transmute(a), transmute(b)}
|
||||
in1-nox
|
||||
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
store_fn
|
||||
|
||||
aarch64 = st4
|
||||
//generate *mut u64:uint64x2x4_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x2x4_t:void
|
||||
|
||||
target = default
|
||||
arm = vst4
|
||||
//generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void, *mut u64:uint64x1x4_t:void
|
||||
//generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
|
||||
//generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p8:poly8x16x4_t:void, *mut p16:poly16x8x4_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x1x4_t:void
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
name = vst4
|
||||
in1-nox
|
||||
a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
|
||||
validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st4
|
||||
link-aarch64 = st4._EXTpi8_
|
||||
//generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
|
||||
|
||||
arm = vst4
|
||||
link-arm = vst4._EXTpi8r_
|
||||
//generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
name = vst4
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
n = 0
|
||||
validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st4lane
|
||||
link-aarch64 = st4lane._EXTpi8_
|
||||
const-aarch64 = LANE
|
||||
//generate *mut i8:int8x16x4_t:void, *mut i64:int64x1x4_t:void, *mut i64:int64x2x4_t:void
|
||||
|
||||
arm = vst4lane
|
||||
link-arm = vst4lane._EXTpi8r_
|
||||
const-arm = LANE
|
||||
//generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
|
||||
//generate *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
name = vst4
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = transmute, {vst4-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
|
||||
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
|
||||
n = 0
|
||||
validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
store_fn
|
||||
|
||||
aarch64 = st4lane
|
||||
//generate *mut u8:uint8x16x4_t:void, *mut u64:uint64x1x4_t:void, *mut u64:uint64x2x4_t:void, *mut p8:poly8x16x4_t:void
|
||||
target = aes
|
||||
//generate *mut p64:poly64x1x4_t:void, *mut p64:poly64x2x4_t:void
|
||||
|
||||
target = default
|
||||
arm = vst4lane
|
||||
//generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
|
||||
//generate *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
|
||||
//generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p16:poly16x8x4_t:void
|
||||
|
||||
/// Store multiple 4-element structures from four registers
|
||||
name = vst4
|
||||
in1-lane-nox
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
|
||||
n = 0
|
||||
validate 1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
|
||||
store_fn
|
||||
arm-aarch64-separate
|
||||
|
||||
aarch64 = st4lane
|
||||
link-aarch64 = st4lane._EXTpi8_
|
||||
const-aarch64 = LANE
|
||||
//generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
|
||||
|
||||
arm = vst4lane
|
||||
link-arm = vst4lane._EXTpi8r_
|
||||
const-arm = LANE
|
||||
//generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
|
||||
|
||||
/// Multiply
|
||||
name = vmul
|
||||
a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
|
||||
|
||||
@@ -427,8 +427,10 @@ enum Suffix {
|
||||
OutSuffix,
|
||||
OutNSuffix,
|
||||
OutNox,
|
||||
In1Nox,
|
||||
OutDupNox,
|
||||
OutLaneNox,
|
||||
In1LaneNox,
|
||||
Lane,
|
||||
In2,
|
||||
In2Lane,
|
||||
@@ -909,7 +911,25 @@ fn ext(s: &str, in_t: &[&str; 3], out_t: &str) -> String {
|
||||
.replace("_EXT3_", &type_to_ext(in_t[1], false, false, false))
|
||||
.replace("_EXTr3_", &type_to_ext(in_t[1], false, true, false))
|
||||
.replace("_EXTv2_", &type_to_ext(out_t, true, false, false))
|
||||
.replace("_EXTpi8_", &type_to_ext(in_t[1], false, false, true))
|
||||
.replace("_EXTpi82_", &type_to_ext(out_t, false, false, true))
|
||||
.replace("_EXTpi8r_", &type_to_ext(in_t[1], false, true, true))
|
||||
}
|
||||
|
||||
fn is_vldx(name: &str) -> bool {
|
||||
let s: Vec<_> = name.split('_').collect();
|
||||
s.len() == 2
|
||||
&& &name[0..3] == "vld"
|
||||
&& name[3..4].parse::<i32>().unwrap() > 1
|
||||
&& (s[1].starts_with("s") || s[1].starts_with("f"))
|
||||
}
|
||||
|
||||
fn is_vstx(name: &str) -> bool {
|
||||
let s: Vec<_> = name.split('_').collect();
|
||||
s.len() == 2
|
||||
&& &name[0..3] == "vst"
|
||||
&& name[3..4].parse::<i32>().unwrap() > 1
|
||||
&& (s[1].starts_with("s") || s[1].starts_with("f"))
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
@@ -964,6 +984,11 @@ fn gen_aarch64(
|
||||
current_name,
|
||||
type_to_suffix(&type_to_sub_type(out_t))
|
||||
),
|
||||
In1Nox => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
type_to_suffix(&type_to_sub_type(in_t[1]))
|
||||
),
|
||||
OutDupNox => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
@@ -974,6 +999,11 @@ fn gen_aarch64(
|
||||
current_name,
|
||||
type_to_lane_suffix(&type_to_sub_type(out_t))
|
||||
),
|
||||
In1LaneNox => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
type_to_lane_suffix(&type_to_sub_type(in_t[1]))
|
||||
),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
|
||||
@@ -1030,23 +1060,32 @@ fn gen_aarch64(
|
||||
};
|
||||
let (ext_inputs, ext_output) = {
|
||||
if const_aarch64.is_some() {
|
||||
if matches!(fn_type, Fntype::Load) {
|
||||
if !matches!(fn_type, Fntype::Normal) {
|
||||
let ptr_type = match fn_type {
|
||||
Fntype::Load => "*const i8",
|
||||
Fntype::Store => "*mut i8",
|
||||
_ => panic!("unsupported fn type"),
|
||||
};
|
||||
let sub = type_to_sub_type(in_t[1]);
|
||||
(
|
||||
match type_sub_len(in_t[1]) {
|
||||
1 => format!("a: {}, n: i64, ptr: *const i8", sub),
|
||||
2 => format!("a: {}, b: {}, n: i64, ptr: *const i8", sub, sub),
|
||||
1 => format!("a: {}, n: i64, ptr: {}", sub, ptr_type),
|
||||
2 => format!("a: {}, b: {}, n: i64, ptr: {}", sub, sub, ptr_type),
|
||||
3 => format!(
|
||||
"a: {}, b: {}, c: {}, n: i64, ptr: *const i8",
|
||||
sub, sub, sub
|
||||
"a: {}, b: {}, c: {}, n: i64, ptr: {}",
|
||||
sub, sub, sub, ptr_type
|
||||
),
|
||||
4 => format!(
|
||||
"a: {}, b: {}, c: {}, d: {}, n: i64, ptr: *const i8",
|
||||
sub, sub, sub, sub
|
||||
"a: {}, b: {}, c: {}, d: {}, n: i64, ptr: {}",
|
||||
sub, sub, sub, sub, ptr_type
|
||||
),
|
||||
_ => panic!("unsupported type: {}", in_t[1]),
|
||||
},
|
||||
format!(" -> {}", out_t),
|
||||
if out_t != "void" {
|
||||
format!(" -> {}", out_t)
|
||||
} else {
|
||||
String::new()
|
||||
},
|
||||
)
|
||||
} else {
|
||||
(
|
||||
@@ -1061,19 +1100,23 @@ fn gen_aarch64(
|
||||
}
|
||||
} else if matches!(fn_type, Fntype::Store) {
|
||||
let sub = type_to_sub_type(in_t[1]);
|
||||
let native = type_to_native_type(in_t[1]);
|
||||
(
|
||||
match type_sub_len(in_t[1]) {
|
||||
1 => format!("a: {}, ptr: *mut {}", sub, native),
|
||||
2 => format!("a: {}, b: {}, ptr: *mut {}", sub, sub, native),
|
||||
3 => format!("a: {}, b: {}, c: {}, ptr: *mut {}", sub, sub, sub, native),
|
||||
4 => format!(
|
||||
"a: {}, b: {}, c: {}, d: {}, ptr: *mut {}",
|
||||
sub, sub, sub, sub, native
|
||||
),
|
||||
let ptr_type = if is_vstx(&name) {
|
||||
"i8".to_string()
|
||||
} else {
|
||||
type_to_native_type(in_t[1])
|
||||
};
|
||||
let subs = match type_sub_len(in_t[1]) {
|
||||
1 => format!("a: {}", sub),
|
||||
2 => format!("a: {}, b: {}", sub, sub),
|
||||
3 => format!("a: {}, b: {}, c: {}", sub, sub, sub),
|
||||
4 => format!("a: {}, b: {}, c: {}, d: {}", sub, sub, sub, sub),
|
||||
_ => panic!("unsupported type: {}", in_t[1]),
|
||||
},
|
||||
String::new(),
|
||||
};
|
||||
(format!("{}, ptr: *mut {}", subs, ptr_type), String::new())
|
||||
} else if is_vldx(&name) {
|
||||
(
|
||||
format!("ptr: *const {}", type_to_sub_type(out_t)),
|
||||
format!(" -> {}", out_t),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
@@ -1185,7 +1228,7 @@ fn gen_aarch64(
|
||||
};
|
||||
let call_params = {
|
||||
if let (Some(const_aarch64), Some(_)) = (const_aarch64, link_aarch64) {
|
||||
if matches!(fn_type, Fntype::Load) {
|
||||
if !matches!(fn_type, Fntype::Normal) {
|
||||
let subs = match type_sub_len(in_t[1]) {
|
||||
1 => "b",
|
||||
2 => "b.0, b.1",
|
||||
@@ -1195,7 +1238,7 @@ fn gen_aarch64(
|
||||
};
|
||||
format!(
|
||||
r#"{}
|
||||
{}{}({}, {} as i64, a as *const i8)"#,
|
||||
{}{}({}, {} as i64, a.cast())"#,
|
||||
multi_calls,
|
||||
ext_c,
|
||||
current_fn,
|
||||
@@ -1217,14 +1260,17 @@ fn gen_aarch64(
|
||||
_ => String::new(),
|
||||
}
|
||||
}
|
||||
} else if matches!(fn_type, Fntype::Store) {
|
||||
} else if link_aarch64.is_some() && matches!(fn_type, Fntype::Store) {
|
||||
let cast = if is_vstx(&name) { ".cast()" } else { "" };
|
||||
match type_sub_len(in_t[1]) {
|
||||
1 => format!(r#"{}{}(b, a)"#, ext_c, current_fn),
|
||||
2 => format!(r#"{}{}(b.0, b.1, a)"#, ext_c, current_fn),
|
||||
3 => format!(r#"{}{}(b.0, b.1, b.2, a)"#, ext_c, current_fn),
|
||||
4 => format!(r#"{}{}(b.0, b.1, b.2, b.3, a)"#, ext_c, current_fn),
|
||||
1 => format!(r#"{}{}(b, a{})"#, ext_c, current_fn, cast),
|
||||
2 => format!(r#"{}{}(b.0, b.1, a{})"#, ext_c, current_fn, cast),
|
||||
3 => format!(r#"{}{}(b.0, b.1, b.2, a{})"#, ext_c, current_fn, cast),
|
||||
4 => format!(r#"{}{}(b.0, b.1, b.2, b.3, a{})"#, ext_c, current_fn, cast),
|
||||
_ => panic!("unsupported type: {}", in_t[1]),
|
||||
}
|
||||
} else if link_aarch64.is_some() && is_vldx(&name) {
|
||||
format!(r#"{}{}(a.cast())"#, ext_c, current_fn,)
|
||||
} else {
|
||||
let trans: [&str; 2] = if link_t[3] != out_t {
|
||||
["transmute(", ")"]
|
||||
@@ -1406,7 +1452,7 @@ fn gen_store_test(
|
||||
unsafe fn test_{}() {{"#,
|
||||
name,
|
||||
);
|
||||
for (a, _, _, _, e) in current_tests {
|
||||
for (a, _, _, constn, e) in current_tests {
|
||||
let a: Vec<String> = a.iter().take(type_len + 1).cloned().collect();
|
||||
let e: Vec<String> = e.iter().take(type_len).cloned().collect();
|
||||
let mut input = String::from("[");
|
||||
@@ -1425,12 +1471,15 @@ fn gen_store_test(
|
||||
output.push_str(&e[i])
|
||||
}
|
||||
output.push_str("]");
|
||||
let const_n = constn
|
||||
.as_deref()
|
||||
.map_or(String::new(), |n| format!("::<{}>", n.to_string()));
|
||||
let t = format!(
|
||||
r#"
|
||||
let a: [{}; {}] = {};
|
||||
let e: [{}; {}] = {};
|
||||
let mut r: [{}; {}] = [0{}; {}];
|
||||
{}(r.as_mut_ptr(), {}(a[1..].as_ptr()));
|
||||
{}{}(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
|
||||
assert_eq!(r, e);
|
||||
"#,
|
||||
type_to_native_type(in_t[1]),
|
||||
@@ -1444,7 +1493,7 @@ fn gen_store_test(
|
||||
type_to_native_type(in_t[1]),
|
||||
type_len,
|
||||
name,
|
||||
name.replace("st", "ld"),
|
||||
const_n,
|
||||
);
|
||||
test.push_str(&t);
|
||||
}
|
||||
@@ -1613,6 +1662,11 @@ fn gen_arm(
|
||||
current_name,
|
||||
type_to_suffix(&type_to_sub_type(out_t))
|
||||
),
|
||||
In1Nox => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
type_to_suffix(&type_to_sub_type(in_t[1]))
|
||||
),
|
||||
OutDupNox => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
@@ -1623,6 +1677,11 @@ fn gen_arm(
|
||||
current_name,
|
||||
type_to_lane_suffix(&type_to_sub_type(out_t))
|
||||
),
|
||||
In1LaneNox => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
type_to_lane_suffix(&type_to_sub_type(in_t[1]))
|
||||
),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
|
||||
@@ -1752,7 +1811,12 @@ fn gen_arm(
|
||||
};
|
||||
let (arm_ext_inputs, arm_ext_output) = {
|
||||
if let Some(const_arm) = const_arm {
|
||||
if matches!(fn_type, Fntype::Load) {
|
||||
if !matches!(fn_type, Fntype::Normal) {
|
||||
let ptr_type = match fn_type {
|
||||
Fntype::Load => "*const i8",
|
||||
Fntype::Store => "*mut i8",
|
||||
_ => panic!("unsupported fn type"),
|
||||
};
|
||||
let sub_type = type_to_sub_type(in_t[1]);
|
||||
let inputs = match type_sub_len(in_t[1]) {
|
||||
1 => format!("a: {}", sub_type),
|
||||
@@ -1765,7 +1829,7 @@ fn gen_arm(
|
||||
_ => panic!("unknown type: {}", in_t[1]),
|
||||
};
|
||||
(
|
||||
format!("ptr: *const i8, {}, n: i32, size: i32", inputs),
|
||||
format!("ptr: {}, {}, n: i32, size: i32", ptr_type, inputs),
|
||||
String::new(),
|
||||
)
|
||||
} else {
|
||||
@@ -1817,10 +1881,20 @@ fn gen_arm(
|
||||
),
|
||||
_ => panic!("unknown type: {}", in_t[1]),
|
||||
};
|
||||
let (ptr_type, size) = if is_vstx(&name) {
|
||||
("i8".to_string(), ", size: i32")
|
||||
} else {
|
||||
(type_to_native_type(in_t[1]), "")
|
||||
};
|
||||
(
|
||||
format!("ptr: *mut {}, {}", type_to_native_type(in_t[1]), inputs),
|
||||
format!("ptr: *mut {}, {}{}", ptr_type, inputs, size),
|
||||
String::new(),
|
||||
)
|
||||
} else if is_vldx(&name) {
|
||||
(
|
||||
format!("ptr: *const i8, size: i32"),
|
||||
format!(" -> {}", out_t),
|
||||
)
|
||||
} else {
|
||||
(String::new(), String::new())
|
||||
}
|
||||
@@ -1836,7 +1910,12 @@ fn gen_arm(
|
||||
));
|
||||
let (aarch64_ext_inputs, aarch64_ext_output) = {
|
||||
if const_aarch64.is_some() {
|
||||
if matches!(fn_type, Fntype::Load) {
|
||||
if !matches!(fn_type, Fntype::Normal) {
|
||||
let ptr_type = match fn_type {
|
||||
Fntype::Load => "*const i8",
|
||||
Fntype::Store => "*mut i8",
|
||||
_ => panic!("unsupported fn type"),
|
||||
};
|
||||
let sub_type = type_to_sub_type(in_t[1]);
|
||||
let mut inputs = match type_sub_len(in_t[1]) {
|
||||
1 => format!("a: {}", sub_type,),
|
||||
@@ -1848,8 +1927,13 @@ fn gen_arm(
|
||||
),
|
||||
_ => panic!("unknown type: {}", in_t[1]),
|
||||
};
|
||||
inputs.push_str(&format!(", n: i64, ptr: *const i8"));
|
||||
(inputs, format!(" -> {}", out_t))
|
||||
inputs.push_str(&format!(", n: i64, ptr: {}", ptr_type));
|
||||
let out = if out_t == "void" {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" -> {}", out_t)
|
||||
};
|
||||
(inputs, out)
|
||||
} else {
|
||||
(
|
||||
match para_num {
|
||||
@@ -1886,8 +1970,18 @@ fn gen_arm(
|
||||
),
|
||||
_ => panic!("unknown type: {}", in_t[1]),
|
||||
};
|
||||
inputs.push_str(&format!(", ptr: *mut {}", type_to_native_type(in_t[0])));
|
||||
let ptr_type = if is_vstx(&name) {
|
||||
"i8".to_string()
|
||||
} else {
|
||||
type_to_native_type(in_t[1])
|
||||
};
|
||||
inputs.push_str(&format!(", ptr: *mut {}", ptr_type));
|
||||
(inputs, String::new())
|
||||
} else if is_vldx(&name) {
|
||||
(
|
||||
format!("ptr: *const {}", type_to_sub_type(out_t)),
|
||||
format!(" -> {}", out_t),
|
||||
)
|
||||
} else {
|
||||
(String::new(), String::new())
|
||||
}
|
||||
@@ -1962,7 +2056,7 @@ fn gen_arm(
|
||||
let function = if separate {
|
||||
let call_arm = {
|
||||
let arm_params = if let (Some(const_arm), Some(_)) = (const_arm, link_arm) {
|
||||
if matches!(fn_type, Fntype::Load) {
|
||||
if !matches!(fn_type, Fntype::Normal) {
|
||||
let subs = match type_sub_len(in_t[1]) {
|
||||
1 => "b",
|
||||
2 => "b.0, b.1",
|
||||
@@ -1971,7 +2065,7 @@ fn gen_arm(
|
||||
_ => "",
|
||||
};
|
||||
format!(
|
||||
"{}(a as *const i8, {}, {}, {})",
|
||||
"{}(a.cast(), {}, {}, {})",
|
||||
current_fn,
|
||||
subs,
|
||||
constn.as_deref().unwrap(),
|
||||
@@ -2008,13 +2102,27 @@ fn gen_arm(
|
||||
_ => String::new(),
|
||||
}
|
||||
} else if matches!(fn_type, Fntype::Store) {
|
||||
let (cast, size) = if is_vstx(&name) {
|
||||
(
|
||||
".cast()",
|
||||
format!(", {}", type_bits(&type_to_sub_type(in_t[1])) / 8),
|
||||
)
|
||||
} else {
|
||||
("", String::new())
|
||||
};
|
||||
match type_sub_len(in_t[1]) {
|
||||
1 => format!("{}(a, b)", current_fn),
|
||||
2 => format!("{}(a, b.0, b.1)", current_fn),
|
||||
3 => format!("{}(a, b.0, b.1, b.2)", current_fn),
|
||||
4 => format!("{}(a, b.0, b.1, b.2, b.3)", current_fn),
|
||||
1 => format!("{}(a{}, b{})", current_fn, cast, size),
|
||||
2 => format!("{}(a{}, b.0, b.1{})", current_fn, cast, size),
|
||||
3 => format!("{}(a{}, b.0, b.1, b.2{})", current_fn, cast, size),
|
||||
4 => format!("{}(a{}, b.0, b.1, b.2, b.3{})", current_fn, cast, size),
|
||||
_ => String::new(),
|
||||
}
|
||||
} else if link_arm.is_some() && is_vldx(&name) {
|
||||
format!(
|
||||
"{}(a as *const i8, {})",
|
||||
current_fn,
|
||||
type_bits(&type_to_sub_type(out_t)) / 8
|
||||
)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
@@ -2028,7 +2136,7 @@ fn gen_arm(
|
||||
let call_aarch64 = {
|
||||
let aarch64_params =
|
||||
if let (Some(const_aarch64), Some(_)) = (const_aarch64, link_aarch64) {
|
||||
if matches!(fn_type, Fntype::Load) {
|
||||
if !matches!(fn_type, Fntype::Normal) {
|
||||
let subs = match type_sub_len(in_t[1]) {
|
||||
1 => "b",
|
||||
2 => "b.0, b.1",
|
||||
@@ -2037,7 +2145,7 @@ fn gen_arm(
|
||||
_ => "",
|
||||
};
|
||||
format!(
|
||||
"{}({}, {} as i64, a as *const i8)",
|
||||
"{}({}, {} as i64, a.cast())",
|
||||
current_fn,
|
||||
subs,
|
||||
constn.as_deref().unwrap()
|
||||
@@ -2056,13 +2164,16 @@ fn gen_arm(
|
||||
_ => String::new(),
|
||||
}
|
||||
} else if matches!(fn_type, Fntype::Store) {
|
||||
let cast = if is_vstx(&name) { ".cast()" } else { "" };
|
||||
match type_sub_len(in_t[1]) {
|
||||
1 => format!("{}(b, a)", current_fn),
|
||||
2 => format!("{}(b.0, b.1, a)", current_fn),
|
||||
3 => format!("{}(b.0, b.1, b.2, a)", current_fn),
|
||||
4 => format!("{}(b.0, b.1, b.2, b.3, a)", current_fn),
|
||||
1 => format!("{}(b, a{})", current_fn, cast),
|
||||
2 => format!("{}(b.0, b.1, a{})", current_fn, cast),
|
||||
3 => format!("{}(b.0, b.1, b.2, a{})", current_fn, cast),
|
||||
4 => format!("{}(b.0, b.1, b.2, b.3, a{})", current_fn, cast),
|
||||
_ => String::new(),
|
||||
}
|
||||
} else if link_aarch64.is_some() && is_vldx(&name) {
|
||||
format!("{}(a.cast())", current_fn)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
@@ -2599,6 +2710,10 @@ fn get_call(
|
||||
fn_name.push_str(&type_to_suffix(&type_to_sub_type(&type_to_signed(
|
||||
&String::from(out_t),
|
||||
))));
|
||||
} else if fn_format[1] == "in1signednox" {
|
||||
fn_name.push_str(&type_to_suffix(&type_to_sub_type(&type_to_signed(
|
||||
&String::from(in_t[1]),
|
||||
))));
|
||||
} else if fn_format[1] == "outsigneddupnox" {
|
||||
fn_name.push_str(&type_to_dup_suffix(&type_to_sub_type(&type_to_signed(
|
||||
&String::from(out_t),
|
||||
@@ -2607,6 +2722,10 @@ fn get_call(
|
||||
fn_name.push_str(&type_to_lane_suffix(&type_to_sub_type(&type_to_signed(
|
||||
&String::from(out_t),
|
||||
))));
|
||||
} else if fn_format[1] == "in1signedlanenox" {
|
||||
fn_name.push_str(&type_to_lane_suffix(&type_to_sub_type(&type_to_signed(
|
||||
&String::from(in_t[1]),
|
||||
))));
|
||||
} else if fn_format[1] == "unsigned" {
|
||||
fn_name.push_str(type_to_suffix(type_to_unsigned(in_t[1])));
|
||||
} else if fn_format[1] == "doubleself" {
|
||||
@@ -2672,6 +2791,8 @@ fn get_call(
|
||||
r#"let {}: {} = {}({});"#,
|
||||
re_name, re_type, fn_name, param_str
|
||||
)
|
||||
} else if fn_name.starts_with("*") {
|
||||
format!(r#"{} = {};"#, fn_name, param_str)
|
||||
} else {
|
||||
format!(r#"{}({})"#, fn_name, param_str)
|
||||
};
|
||||
@@ -2827,10 +2948,14 @@ mod test {
|
||||
suffix = OutSuffix;
|
||||
} else if line.starts_with("out-nox") {
|
||||
suffix = OutNox;
|
||||
} else if line.starts_with("in1-nox") {
|
||||
suffix = In1Nox;
|
||||
} else if line.starts_with("out-dup-nox") {
|
||||
suffix = OutDupNox;
|
||||
} else if line.starts_with("out-lane-nox") {
|
||||
suffix = OutLaneNox;
|
||||
} else if line.starts_with("in1-lane-nox") {
|
||||
suffix = In1LaneNox;
|
||||
} else if line.starts_with("lane-suffixes") {
|
||||
suffix = Lane;
|
||||
} else if line.starts_with("in2-suffix") {
|
||||
|
||||
@@ -138,6 +138,10 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
|
||||
// removed once it has been addressed in LLVM.
|
||||
"fcvtzu" | "fcvtzs" | "vcvt" => 64,
|
||||
|
||||
// core_arch/src/arm_shared/simd32
|
||||
// vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit)
|
||||
"nop" if fnname.contains("vst1q_p64") => 34,
|
||||
|
||||
// Original limit was 20 instructions, but ARM DSP Intrinsics
|
||||
// are exactly 20 instructions long. So, bump the limit to 22
|
||||
// instead of adding here a long list of exceptions.
|
||||
|
||||
Reference in New Issue
Block a user