Complete vst1 neon instructions (#1221)

This commit is contained in:
Sparrow Li
2021-09-24 20:26:29 +08:00
committed by GitHub
parent 26cce19427
commit bdea403c54
8 changed files with 1200 additions and 168 deletions

View File

@@ -13066,7 +13066,7 @@ mod test {
let a: [f64; 3] = [0., 1., 2.];
let e: [f64; 2] = [1., 2.];
let mut r: [f64; 2] = [0f64; 2];
vst1_f64_x2(r.as_mut_ptr(), vld1_f64_x2(a[1..].as_ptr()));
vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -13075,7 +13075,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 3., 4.];
let e: [f64; 4] = [1., 2., 3., 4.];
let mut r: [f64; 4] = [0f64; 4];
vst1q_f64_x2(r.as_mut_ptr(), vld1q_f64_x2(a[1..].as_ptr()));
vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -13084,7 +13084,7 @@ mod test {
let a: [f64; 4] = [0., 1., 2., 3.];
let e: [f64; 3] = [1., 2., 3.];
let mut r: [f64; 3] = [0f64; 3];
vst1_f64_x3(r.as_mut_ptr(), vld1_f64_x3(a[1..].as_ptr()));
vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -13093,7 +13093,7 @@ mod test {
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
let mut r: [f64; 6] = [0f64; 6];
vst1q_f64_x3(r.as_mut_ptr(), vld1q_f64_x3(a[1..].as_ptr()));
vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -13102,7 +13102,7 @@ mod test {
let a: [f64; 5] = [0., 1., 2., 3., 4.];
let e: [f64; 4] = [1., 2., 3., 4.];
let mut r: [f64; 4] = [0f64; 4];
vst1_f64_x4(r.as_mut_ptr(), vld1_f64_x4(a[1..].as_ptr()));
vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -13111,7 +13111,7 @@ mod test {
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
let mut r: [f64; 8] = [0f64; 8];
vst1q_f64_x4(r.as_mut_ptr(), vld1q_f64_x4(a[1..].as_ptr()));
vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}

View File

@@ -678,7 +678,7 @@ pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
/// Load multiple single-element structures to one, two, three, or four registers
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
#[cfg_attr(test, assert_instr(ld1r))]
pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
simd_shuffle2!(x, x, [0, 0])
@@ -698,7 +698,7 @@ pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t)
#[inline]
#[target_feature(enable = "neon")]
#[rustc_legacy_const_generics(2)]
#[cfg_attr(test, assert_instr(ldr, LANE = 1))]
#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
static_assert_imm1!(LANE);
simd_insert(src, LANE as u32, *ptr)
@@ -886,7 +886,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
@@ -895,7 +895,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
@@ -4803,29 +4803,6 @@ mod tests {
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_f64() {
let mut vals = [0_f64; 2];

View File

@@ -480,6 +480,22 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,aes,v8")]
#[cfg_attr(test, assert_instr("vst1.64"))]
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
vst1_v1i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,aes,v8")]
#[cfg_attr(test, assert_instr("vst1.64"))]
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
vst1q_v2i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]

View File

@@ -6616,7 +6616,7 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
transmute(vld1_s64_x3(transmute(a)))
@@ -6626,7 +6626,7 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
transmute(vld1_s64_x4(transmute(a)))
@@ -6636,7 +6636,7 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
transmute(vld1q_s64_x2(transmute(a)))
@@ -6646,7 +6646,7 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
transmute(vld1q_s64_x3(transmute(a)))
@@ -6656,7 +6656,7 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(ldr))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))]
pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
transmute(vld1q_s64_x4(transmute(a)))
@@ -7790,6 +7790,66 @@ pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
vst1q_s16_x4(transmute(a), transmute(b))
}
/// Store multiple single-element structures to one, two, three, or four registers
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
vst1_s64_x2(transmute(a), transmute(b))
}
/// Store multiple single-element structures to one, two, three, or four registers
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
vst1_s64_x3(transmute(a), transmute(b))
}
/// Store multiple single-element structures to one, two, three, or four registers
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
vst1_s64_x4(transmute(a), transmute(b))
}
/// Store multiple single-element structures to one, two, three, or four registers
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
vst1q_s64_x2(transmute(a), transmute(b))
}
/// Store multiple single-element structures to one, two, three, or four registers
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
vst1q_s64_x3(transmute(a), transmute(b))
}
/// Store multiple single-element structures to one, two, three, or four registers
#[inline]
#[target_feature(enable = "neon,aes")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))]
pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
vst1q_s64_x4(transmute(a), transmute(b))
}
/// Store multiple single-element structures to one, two, three, or four registers
#[inline]
#[cfg(target_arch = "arm")]
@@ -21642,7 +21702,7 @@ mod test {
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [i8; 16] = [0i8; 16];
vst1_s8_x2(r.as_mut_ptr(), vld1_s8_x2(a[1..].as_ptr()));
vst1_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21651,7 +21711,7 @@ mod test {
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [i16; 8] = [0i16; 8];
vst1_s16_x2(r.as_mut_ptr(), vld1_s16_x2(a[1..].as_ptr()));
vst1_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21660,7 +21720,7 @@ mod test {
let a: [i32; 5] = [0, 1, 2, 3, 4];
let e: [i32; 4] = [1, 2, 3, 4];
let mut r: [i32; 4] = [0i32; 4];
vst1_s32_x2(r.as_mut_ptr(), vld1_s32_x2(a[1..].as_ptr()));
vst1_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21669,7 +21729,7 @@ mod test {
let a: [i64; 3] = [0, 1, 2];
let e: [i64; 2] = [1, 2];
let mut r: [i64; 2] = [0i64; 2];
vst1_s64_x2(r.as_mut_ptr(), vld1_s64_x2(a[1..].as_ptr()));
vst1_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21678,7 +21738,7 @@ mod test {
let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [i8; 32] = [0i8; 32];
vst1q_s8_x2(r.as_mut_ptr(), vld1q_s8_x2(a[1..].as_ptr()));
vst1q_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21687,7 +21747,7 @@ mod test {
let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [i16; 16] = [0i16; 16];
vst1q_s16_x2(r.as_mut_ptr(), vld1q_s16_x2(a[1..].as_ptr()));
vst1q_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21696,7 +21756,7 @@ mod test {
let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [i32; 8] = [0i32; 8];
vst1q_s32_x2(r.as_mut_ptr(), vld1q_s32_x2(a[1..].as_ptr()));
vst1q_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21705,7 +21765,7 @@ mod test {
let a: [i64; 5] = [0, 1, 2, 3, 4];
let e: [i64; 4] = [1, 2, 3, 4];
let mut r: [i64; 4] = [0i64; 4];
vst1q_s64_x2(r.as_mut_ptr(), vld1q_s64_x2(a[1..].as_ptr()));
vst1q_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21714,7 +21774,7 @@ mod test {
let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let e: [i8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let mut r: [i8; 24] = [0i8; 24];
vst1_s8_x3(r.as_mut_ptr(), vld1_s8_x3(a[1..].as_ptr()));
vst1_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21723,7 +21783,7 @@ mod test {
let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let e: [i16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let mut r: [i16; 12] = [0i16; 12];
vst1_s16_x3(r.as_mut_ptr(), vld1_s16_x3(a[1..].as_ptr()));
vst1_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21732,7 +21792,7 @@ mod test {
let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6];
let e: [i32; 6] = [1, 2, 3, 4, 5, 6];
let mut r: [i32; 6] = [0i32; 6];
vst1_s32_x3(r.as_mut_ptr(), vld1_s32_x3(a[1..].as_ptr()));
vst1_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21741,7 +21801,7 @@ mod test {
let a: [i64; 4] = [0, 1, 2, 3];
let e: [i64; 3] = [1, 2, 3];
let mut r: [i64; 3] = [0i64; 3];
vst1_s64_x3(r.as_mut_ptr(), vld1_s64_x3(a[1..].as_ptr()));
vst1_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21750,7 +21810,7 @@ mod test {
let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [i8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [i8; 48] = [0i8; 48];
vst1q_s8_x3(r.as_mut_ptr(), vld1q_s8_x3(a[1..].as_ptr()));
vst1q_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21759,7 +21819,7 @@ mod test {
let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let e: [i16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let mut r: [i16; 24] = [0i16; 24];
vst1q_s16_x3(r.as_mut_ptr(), vld1q_s16_x3(a[1..].as_ptr()));
vst1q_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21768,7 +21828,7 @@ mod test {
let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let e: [i32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let mut r: [i32; 12] = [0i32; 12];
vst1q_s32_x3(r.as_mut_ptr(), vld1q_s32_x3(a[1..].as_ptr()));
vst1q_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21777,7 +21837,7 @@ mod test {
let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6];
let e: [i64; 6] = [1, 2, 3, 4, 5, 6];
let mut r: [i64; 6] = [0i64; 6];
vst1q_s64_x3(r.as_mut_ptr(), vld1q_s64_x3(a[1..].as_ptr()));
vst1q_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21786,7 +21846,7 @@ mod test {
let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [i8; 32] = [0i8; 32];
vst1_s8_x4(r.as_mut_ptr(), vld1_s8_x4(a[1..].as_ptr()));
vst1_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21795,7 +21855,7 @@ mod test {
let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [i16; 16] = [0i16; 16];
vst1_s16_x4(r.as_mut_ptr(), vld1_s16_x4(a[1..].as_ptr()));
vst1_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21804,7 +21864,7 @@ mod test {
let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [i32; 8] = [0i32; 8];
vst1_s32_x4(r.as_mut_ptr(), vld1_s32_x4(a[1..].as_ptr()));
vst1_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21813,7 +21873,7 @@ mod test {
let a: [i64; 5] = [0, 1, 2, 3, 4];
let e: [i64; 4] = [1, 2, 3, 4];
let mut r: [i64; 4] = [0i64; 4];
vst1_s64_x4(r.as_mut_ptr(), vld1_s64_x4(a[1..].as_ptr()));
vst1_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21822,7 +21882,7 @@ mod test {
let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [i8; 64] = [0i8; 64];
vst1q_s8_x4(r.as_mut_ptr(), vld1q_s8_x4(a[1..].as_ptr()));
vst1q_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21831,7 +21891,7 @@ mod test {
let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [i16; 32] = [0i16; 32];
vst1q_s16_x4(r.as_mut_ptr(), vld1q_s16_x4(a[1..].as_ptr()));
vst1q_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21840,7 +21900,7 @@ mod test {
let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [i32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [i32; 16] = [0i32; 16];
vst1q_s32_x4(r.as_mut_ptr(), vld1q_s32_x4(a[1..].as_ptr()));
vst1q_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21849,7 +21909,7 @@ mod test {
let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [i64; 8] = [0i64; 8];
vst1q_s64_x4(r.as_mut_ptr(), vld1q_s64_x4(a[1..].as_ptr()));
vst1q_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21858,7 +21918,7 @@ mod test {
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u8; 16] = [0u8; 16];
vst1_u8_x2(r.as_mut_ptr(), vld1_u8_x2(a[1..].as_ptr()));
vst1_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21867,7 +21927,7 @@ mod test {
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [u16; 8] = [0u16; 8];
vst1_u16_x2(r.as_mut_ptr(), vld1_u16_x2(a[1..].as_ptr()));
vst1_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21876,7 +21936,7 @@ mod test {
let a: [u32; 5] = [0, 1, 2, 3, 4];
let e: [u32; 4] = [1, 2, 3, 4];
let mut r: [u32; 4] = [0u32; 4];
vst1_u32_x2(r.as_mut_ptr(), vld1_u32_x2(a[1..].as_ptr()));
vst1_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21885,7 +21945,7 @@ mod test {
let a: [u64; 3] = [0, 1, 2];
let e: [u64; 2] = [1, 2];
let mut r: [u64; 2] = [0u64; 2];
vst1_u64_x2(r.as_mut_ptr(), vld1_u64_x2(a[1..].as_ptr()));
vst1_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21894,7 +21954,7 @@ mod test {
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u8; 32] = [0u8; 32];
vst1q_u8_x2(r.as_mut_ptr(), vld1q_u8_x2(a[1..].as_ptr()));
vst1q_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21903,7 +21963,7 @@ mod test {
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u16; 16] = [0u16; 16];
vst1q_u16_x2(r.as_mut_ptr(), vld1q_u16_x2(a[1..].as_ptr()));
vst1q_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21912,7 +21972,7 @@ mod test {
let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [u32; 8] = [0u32; 8];
vst1q_u32_x2(r.as_mut_ptr(), vld1q_u32_x2(a[1..].as_ptr()));
vst1q_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21921,7 +21981,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 3, 4];
let e: [u64; 4] = [1, 2, 3, 4];
let mut r: [u64; 4] = [0u64; 4];
vst1q_u64_x2(r.as_mut_ptr(), vld1q_u64_x2(a[1..].as_ptr()));
vst1q_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21930,7 +21990,7 @@ mod test {
let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let mut r: [u8; 24] = [0u8; 24];
vst1_u8_x3(r.as_mut_ptr(), vld1_u8_x3(a[1..].as_ptr()));
vst1_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21939,7 +21999,7 @@ mod test {
let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let mut r: [u16; 12] = [0u16; 12];
vst1_u16_x3(r.as_mut_ptr(), vld1_u16_x3(a[1..].as_ptr()));
vst1_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21948,7 +22008,7 @@ mod test {
let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6];
let e: [u32; 6] = [1, 2, 3, 4, 5, 6];
let mut r: [u32; 6] = [0u32; 6];
vst1_u32_x3(r.as_mut_ptr(), vld1_u32_x3(a[1..].as_ptr()));
vst1_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21957,7 +22017,7 @@ mod test {
let a: [u64; 4] = [0, 1, 2, 3];
let e: [u64; 3] = [1, 2, 3];
let mut r: [u64; 3] = [0u64; 3];
vst1_u64_x3(r.as_mut_ptr(), vld1_u64_x3(a[1..].as_ptr()));
vst1_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21966,7 +22026,7 @@ mod test {
let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u8; 48] = [0u8; 48];
vst1q_u8_x3(r.as_mut_ptr(), vld1q_u8_x3(a[1..].as_ptr()));
vst1q_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21975,7 +22035,7 @@ mod test {
let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let mut r: [u16; 24] = [0u16; 24];
vst1q_u16_x3(r.as_mut_ptr(), vld1q_u16_x3(a[1..].as_ptr()));
vst1q_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21984,7 +22044,7 @@ mod test {
let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let e: [u32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let mut r: [u32; 12] = [0u32; 12];
vst1q_u32_x3(r.as_mut_ptr(), vld1q_u32_x3(a[1..].as_ptr()));
vst1q_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -21993,7 +22053,7 @@ mod test {
let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
let e: [u64; 6] = [1, 2, 3, 4, 5, 6];
let mut r: [u64; 6] = [0u64; 6];
vst1q_u64_x3(r.as_mut_ptr(), vld1q_u64_x3(a[1..].as_ptr()));
vst1q_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22002,7 +22062,7 @@ mod test {
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u8; 32] = [0u8; 32];
vst1_u8_x4(r.as_mut_ptr(), vld1_u8_x4(a[1..].as_ptr()));
vst1_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22011,7 +22071,7 @@ mod test {
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u16; 16] = [0u16; 16];
vst1_u16_x4(r.as_mut_ptr(), vld1_u16_x4(a[1..].as_ptr()));
vst1_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22020,7 +22080,7 @@ mod test {
let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [u32; 8] = [0u32; 8];
vst1_u32_x4(r.as_mut_ptr(), vld1_u32_x4(a[1..].as_ptr()));
vst1_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22029,7 +22089,7 @@ mod test {
let a: [u64; 5] = [0, 1, 2, 3, 4];
let e: [u64; 4] = [1, 2, 3, 4];
let mut r: [u64; 4] = [0u64; 4];
vst1_u64_x4(r.as_mut_ptr(), vld1_u64_x4(a[1..].as_ptr()));
vst1_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22038,7 +22098,7 @@ mod test {
let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u8; 64] = [0u8; 64];
vst1q_u8_x4(r.as_mut_ptr(), vld1q_u8_x4(a[1..].as_ptr()));
vst1q_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22047,7 +22107,7 @@ mod test {
let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u16; 32] = [0u16; 32];
vst1q_u16_x4(r.as_mut_ptr(), vld1q_u16_x4(a[1..].as_ptr()));
vst1q_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22056,7 +22116,7 @@ mod test {
let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u32; 16] = [0u32; 16];
vst1q_u32_x4(r.as_mut_ptr(), vld1q_u32_x4(a[1..].as_ptr()));
vst1q_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22065,7 +22125,7 @@ mod test {
let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [u64; 8] = [0u64; 8];
vst1q_u64_x4(r.as_mut_ptr(), vld1q_u64_x4(a[1..].as_ptr()));
vst1q_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22074,7 +22134,7 @@ mod test {
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u8; 16] = [0u8; 16];
vst1_p8_x2(r.as_mut_ptr(), vld1_p8_x2(a[1..].as_ptr()));
vst1_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22083,7 +22143,7 @@ mod test {
let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let mut r: [u8; 24] = [0u8; 24];
vst1_p8_x3(r.as_mut_ptr(), vld1_p8_x3(a[1..].as_ptr()));
vst1_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22092,7 +22152,7 @@ mod test {
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u8; 32] = [0u8; 32];
vst1_p8_x4(r.as_mut_ptr(), vld1_p8_x4(a[1..].as_ptr()));
vst1_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22101,7 +22161,7 @@ mod test {
let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u8; 32] = [0u8; 32];
vst1q_p8_x2(r.as_mut_ptr(), vld1q_p8_x2(a[1..].as_ptr()));
vst1q_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22110,7 +22170,7 @@ mod test {
let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u8; 48] = [0u8; 48];
vst1q_p8_x3(r.as_mut_ptr(), vld1q_p8_x3(a[1..].as_ptr()));
vst1q_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22119,7 +22179,7 @@ mod test {
let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u8; 64] = [0u8; 64];
vst1q_p8_x4(r.as_mut_ptr(), vld1q_p8_x4(a[1..].as_ptr()));
vst1q_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22128,7 +22188,7 @@ mod test {
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [u16; 8] = [0u16; 8];
vst1_p16_x2(r.as_mut_ptr(), vld1_p16_x2(a[1..].as_ptr()));
vst1_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22137,7 +22197,7 @@ mod test {
let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
let mut r: [u16; 12] = [0u16; 12];
vst1_p16_x3(r.as_mut_ptr(), vld1_p16_x3(a[1..].as_ptr()));
vst1_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22146,7 +22206,7 @@ mod test {
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u16; 16] = [0u16; 16];
vst1_p16_x4(r.as_mut_ptr(), vld1_p16_x4(a[1..].as_ptr()));
vst1_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22155,7 +22215,7 @@ mod test {
let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let mut r: [u16; 16] = [0u16; 16];
vst1q_p16_x2(r.as_mut_ptr(), vld1q_p16_x2(a[1..].as_ptr()));
vst1q_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22164,7 +22224,7 @@ mod test {
let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
let mut r: [u16; 24] = [0u16; 24];
vst1q_p16_x3(r.as_mut_ptr(), vld1q_p16_x3(a[1..].as_ptr()));
vst1q_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22173,7 +22233,61 @@ mod test {
let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
let mut r: [u16; 32] = [0u16; 32];
vst1q_p16_x4(r.as_mut_ptr(), vld1q_p16_x4(a[1..].as_ptr()));
vst1q_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p64_x2() {
let a: [u64; 3] = [0, 1, 2];
let e: [u64; 2] = [1, 2];
let mut r: [u64; 2] = [0u64; 2];
vst1_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p64_x3() {
let a: [u64; 4] = [0, 1, 2, 3];
let e: [u64; 3] = [1, 2, 3];
let mut r: [u64; 3] = [0u64; 3];
vst1_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p64_x4() {
let a: [u64; 5] = [0, 1, 2, 3, 4];
let e: [u64; 4] = [1, 2, 3, 4];
let mut r: [u64; 4] = [0u64; 4];
vst1_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p64_x2() {
let a: [u64; 5] = [0, 1, 2, 3, 4];
let e: [u64; 4] = [1, 2, 3, 4];
let mut r: [u64; 4] = [0u64; 4];
vst1q_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p64_x3() {
let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
let e: [u64; 6] = [1, 2, 3, 4, 5, 6];
let mut r: [u64; 6] = [0u64; 6];
vst1q_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p64_x4() {
let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let mut r: [u64; 8] = [0u64; 8];
vst1q_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22182,7 +22296,7 @@ mod test {
let a: [f32; 5] = [0., 1., 2., 3., 4.];
let e: [f32; 4] = [1., 2., 3., 4.];
let mut r: [f32; 4] = [0f32; 4];
vst1_f32_x2(r.as_mut_ptr(), vld1_f32_x2(a[1..].as_ptr()));
vst1_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22191,7 +22305,7 @@ mod test {
let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
let mut r: [f32; 8] = [0f32; 8];
vst1q_f32_x2(r.as_mut_ptr(), vld1q_f32_x2(a[1..].as_ptr()));
vst1q_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22200,7 +22314,7 @@ mod test {
let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.];
let e: [f32; 6] = [1., 2., 3., 4., 5., 6.];
let mut r: [f32; 6] = [0f32; 6];
vst1_f32_x3(r.as_mut_ptr(), vld1_f32_x3(a[1..].as_ptr()));
vst1_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22209,7 +22323,7 @@ mod test {
let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
let e: [f32; 12] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
let mut r: [f32; 12] = [0f32; 12];
vst1q_f32_x3(r.as_mut_ptr(), vld1q_f32_x3(a[1..].as_ptr()));
vst1q_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22218,7 +22332,7 @@ mod test {
let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
let mut r: [f32; 8] = [0f32; 8];
vst1_f32_x4(r.as_mut_ptr(), vld1_f32_x4(a[1..].as_ptr()));
vst1_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}
@@ -22227,7 +22341,7 @@ mod test {
let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
let e: [f32; 16] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
let mut r: [f32; 16] = [0f32; 16];
vst1q_f32_x4(r.as_mut_ptr(), vld1q_f32_x4(a[1..].as_ptr()));
vst1q_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
}

View File

@@ -339,6 +339,29 @@ unsafe fn test_vst1q_p16() {
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vst1_p64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon,aes")]
unsafe fn test_vst1q_p64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_f32() {
let mut vals = [0_f32; 3];

View File

@@ -2082,10 +2082,9 @@ generate *const p16:poly16x4x2_t, *const p16:poly16x4x3_t, *const p16:poly16x4x4
generate *const p16:poly16x8x2_t, *const p16:poly16x8x3_t, *const p16:poly16x8x4_t
target = aes
generate *const p64:poly64x1x2_t
arm = ldr
arm = nop
generate *const p64:poly64x1x3_t, *const p64:poly64x1x4_t
generate *const p64:poly64x2x2_t, *const p64:poly64x2x3_t, *const p64:poly64x2x4_t
/// Load multiple single-element structures to one, two, three, or four registers
name = vld1
out-suffix
@@ -2122,13 +2121,16 @@ out-nox
a = 0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
load_fn
arm-aarch64-separate
aarch64 = ld2
link-aarch64 = ld2._EXTv2_
//generate *const i64:int64x2x2_t
arm = vld2
link-arm = vld2._EXTpi82_
//generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
/// Load multiple 2-element structures to two registers
name = vld2
@@ -2139,10 +2141,17 @@ validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9,
load_fn
aarch64 = ld2
//generate *const u64:uint64x2x2_t
target = aes
//generate *const p64:poly64x2x2_t
target = default
arm = vld2
//generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
//generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
target = aes
//generate *const p64:poly64x1x2_t
/// Load multiple 2-element structures to two registers
name = vld2
@@ -2150,6 +2159,7 @@ out-nox
a = 0., 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
validate 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
load_fn
arm-aarch64-separate
aarch64 = ld2
link-aarch64 = ld2._EXTv2_
@@ -2166,12 +2176,14 @@ a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 1
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn
arm = vld2dup
link-arm = vld2dup._EXTpi82_
aarch64 = ld2r
link-aarch64 = ld2r._EXT2_
//generate *const i64:int64x2x2_t
arm = vld2dup
link-arm = vld2dup._EXTpi82_
//generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
/// Load single 2-element structure and replicate to all lanes of two registers
name = vld2
@@ -2181,11 +2193,18 @@ a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 1
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn
arm = vld2dup
aarch64 = ld2r
//generate *const u64:uint64x2x2_t
target = aes
//generate *const p64:poly64x2x2_t
target = default
arm = vld2dup
//generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
//generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
target = aes
//generate *const p64:poly64x1x2_t
/// Load single 2-element structure and replicate to all lanes of two registers
name = vld2
@@ -2217,13 +2236,13 @@ arm-aarch64-separate
aarch64 = ld2lane
const-aarch64 = LANE
link-aarch64 = ld2lane._EXTpi82_
//generate *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
//generate *const i8:int8x16x2_t:int8x16x2_t, *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
arm = vld2lane
const-arm = LANE
link-arm = vld2lane._EXTpi82_
//generate *const i8:int8x8x2_t:int8x8x2_t, *const i16:int16x4x2_t:int16x4x2_t, *const i32:int32x2x2_t:int32x2x2_t
//generate *const i8:int8x16x2_t:int8x16x2_t, *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
//generate *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
/// Load multiple 2-element structures to two registers
name = vld2
@@ -2236,7 +2255,6 @@ b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 1
n = 0
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
load_fn
arm-aarch64-separate
aarch64 = ld2lane
const-aarch64 = LANE
@@ -2245,14 +2263,15 @@ target = aes
//generate *const p64:poly64x1x2_t:poly64x1x2_t, *const p64:poly64x2x2_t:poly64x2x2_t
target = default
//generate *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
//generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
//generate *const p8:poly8x16x2_t:poly8x16x2_t
arm = vld2lane
const-arm = LANE
//generate *const u8:uint8x8x2_t:uint8x8x2_t, *const u16:uint16x4x2_t:uint16x4x2_t, *const u32:uint32x2x2_t:uint32x2x2_t
//generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
//generate *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
//generate *const p8:poly8x8x2_t:poly8x8x2_t, *const p16:poly16x4x2_t:poly16x4x2_t
//generate *const p8:poly8x16x2_t:poly8x16x2_t, *const p16:poly16x8x2_t:poly16x8x2_t
//generate *const p16:poly16x8x2_t:poly16x8x2_t
/// Load multiple 2-element structures to two registers
name = vld2
@@ -2276,6 +2295,398 @@ const-arm = LANE
link-arm = vld2lane._EXTpi82_
//generate *const f32:float32x2x2_t:float32x2x2_t, *const f32:float32x4x2_t:float32x4x2_t
/// Load multiple 3-element structures to three registers
name = vld3
out-nox
a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
load_fn
arm-aarch64-separate
aarch64 = ld3
link-aarch64 = ld3._EXTv2_
//generate *const i64:int64x2x3_t
arm = vld3
link-arm = vld3._EXTpi82_
//generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
//generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
/// Load multiple 3-element structures to three registers
name = vld3
out-nox
multi_fn = transmute, {vld3-outsignednox-noext, transmute(a)}
a = 0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
load_fn
aarch64 = ld3
//generate *const u64:uint64x2x3_t
target = aes
//generate *const p64:poly64x2x3_t
target = default
arm = vld3
//generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
//generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
//generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
target = aes
//generate *const p64:poly64x1x3_t
/// Load multiple 3-element structures to three registers
name = vld3
out-nox
a = 0., 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.
validate 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.
load_fn
arm-aarch64-separate
aarch64 = ld3
link-aarch64 = ld3._EXTv2_
//generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
arm = vld3
link-arm = vld3._EXTpi82_
//generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
/// Load single 3-element structure and replicate to all lanes of three registers
name = vld3
out-dup-nox
a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn
aarch64 = ld3r
link-aarch64 = ld3r._EXT2_
//generate *const i64:int64x2x3_t
arm = vld3dup
link-arm = vld3dup._EXTpi82_
//generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
//generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
/// Load single 3-element structure and replicate to all lanes of three registers
name = vld3
out-dup-nox
multi_fn = transmute, {vld3-outsigneddupnox-noext, transmute(a)}
a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn
aarch64 = ld3r
//generate *const u64:uint64x2x3_t
target = aes
//generate *const p64:poly64x2x3_t
target = default
arm = vld3dup
//generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
//generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
//generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
target = aes
//generate *const p64:poly64x1x3_t
/// Load single 3-element structure and replicate to all lanes of three registers
name = vld3
out-dup-nox
a = 0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.
validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
load_fn
aarch64 = ld3r
link-aarch64 = ld3r._EXT2_
//generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
arm = vld3dup
link-arm = vld3dup._EXTpi82_
//generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
/// Load multiple 3-element structures to two registers
name = vld3
out-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
constn = LANE
a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
n = 0
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
load_fn
arm-aarch64-separate
aarch64 = ld3lane
const-aarch64 = LANE
link-aarch64 = ld3lane._EXTpi82_
//generate *const i8:int8x16x3_t:int8x16x3_t, *const i64:int64x1x3_t:int64x1x3_t, *const i64:int64x2x3_t:int64x2x3_t
arm = vld3lane
const-arm = LANE
link-arm = vld3lane._EXTpi82_
//generate *const i8:int8x8x3_t:int8x8x3_t, *const i16:int16x4x3_t:int16x4x3_t, *const i32:int32x2x3_t:int32x2x3_t
//generate *const i16:int16x8x3_t:int16x8x3_t, *const i32:int32x4x3_t:int32x4x3_t
/// Load multiple 3-element structures to three registers
name = vld3
out-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = transmute, {vld3-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
constn = LANE
a = 0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
b = 0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
n = 0
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
load_fn
aarch64 = ld3lane
const-aarch64 = LANE
target = aes
//generate *const p64:poly64x1x3_t:poly64x1x3_t, *const p64:poly64x2x3_t:poly64x2x3_t
target = default
//generate *const p8:poly8x16x3_t:poly8x16x3_t, *const u8:uint8x16x3_t:uint8x16x3_t, *const u64:uint64x1x3_t:uint64x1x3_t, *const u64:uint64x2x3_t:uint64x2x3_t
arm = vld3lane
const-arm = LANE
//generate *const u8:uint8x8x3_t:uint8x8x3_t, *const u16:uint16x4x3_t:uint16x4x3_t, *const u32:uint32x2x3_t:uint32x2x3_t
//generate *const u16:uint16x8x3_t:uint16x8x3_t, *const u32:uint32x4x3_t:uint32x4x3_t
//generate *const p8:poly8x8x3_t:poly8x8x3_t, *const p16:poly16x4x3_t:poly16x4x3_t
//generate *const p16:poly16x8x3_t:poly16x8x3_t
/// Load multiple 3-element structures to three registers
name = vld3
out-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
constn = LANE
a = 0., 1., 2., 2., 4., 5., 6., 7., 8., 5., 6., 7., 8.
b = 0., 2., 2., 14., 9., 16., 17., 18., 5., 6., 7., 8.
n = 0
validate 1., 2., 2., 14., 2., 16., 17., 18., 2., 6., 7., 8.
load_fn
arm-aarch64-separate
aarch64 = ld3lane
const-aarch64 = LANE
link-aarch64 = ld3lane._EXTpi82_
//generate *const f64:float64x1x3_t:float64x1x3_t, *const f64:float64x2x3_t:float64x2x3_t
arm = vld3lane
const-arm = LANE
link-arm = vld3lane._EXTpi82_
//generate *const f32:float32x2x3_t:float32x2x3_t, *const f32:float32x4x3_t:float32x4x3_t
/// Load multiple 4-element structures to four registers
name = vld4
out-nox
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
load_fn
arm-aarch64-separate
aarch64 = ld4
link-aarch64 = ld4._EXTv2_
//generate *const i64:int64x2x4_t
arm = vld4
link-arm = vld4._EXTpi82_
//generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
//generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
/// Load multiple 4-element structures to four registers
name = vld4
out-nox
multi_fn = transmute, {vld4-outsignednox-noext, transmute(a)}
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
load_fn
aarch64 = ld4
//generate *const u64:uint64x2x4_t
target = aes
//generate *const p64:poly64x2x4_t
target = default
arm = vld4
//generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
//generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
//generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
target = aes
//generate *const p64:poly64x1x4_t
/// Load multiple 4-element structures to four registers
name = vld4
out-nox
a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 15., 16.
validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 15., 6., 8., 8., 16.
load_fn
arm-aarch64-separate
aarch64 = ld4
link-aarch64 = ld4._EXTv2_
//generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
arm = vld4
link-arm = vld4._EXTpi82_
//generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
/// Load single 4-element structure and replicate to all lanes of four registers
name = vld4
out-dup-nox
a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn
aarch64 = ld4r
link-aarch64 = ld4r._EXT2_
//generate *const i64:int64x2x4_t
arm = vld4dup
link-arm = vld4dup._EXTpi82_
//generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
//generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
/// Load single 4-element structure and replicate to all lanes of four registers
name = vld4
out-dup-nox
multi_fn = transmute, {vld4-outsigneddupnox-noext, transmute(a)}
a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn
aarch64 = ld4r
//generate *const u64:uint64x2x4_t
target = aes
//generate *const p64:poly64x2x4_t
target = default
arm = vld4dup
//generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
//generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
//generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
target = aes
//generate *const p64:poly64x1x4_t
/// Load single 4-element structure and replicate to all lanes of four registers
name = vld4
out-dup-nox
a = 0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5., 9., 4., 3., 5.
validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
load_fn
aarch64 = ld4r
link-aarch64 = ld4r._EXT2_
//generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
arm = vld4dup
link-arm = vld4dup._EXTpi82_
//generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
/// Load multiple 4-element structures to four registers
name = vld4
out-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
constn = LANE
a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
n = 0
validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
load_fn
arm-aarch64-separate
aarch64 = ld4lane
const-aarch64 = LANE
link-aarch64 = ld4lane._EXTpi82_
//generate *const i8:int8x16x4_t:int8x16x4_t, *const i64:int64x1x4_t:int64x1x4_t, *const i64:int64x2x4_t:int64x2x4_t
arm = vld4lane
const-arm = LANE
link-arm = vld4lane._EXTpi82_
//generate *const i8:int8x8x4_t:int8x8x4_t, *const i16:int16x4x4_t:int16x4x4_t, *const i32:int32x2x4_t:int32x2x4_t
//generate *const i16:int16x8x4_t:int16x8x4_t, *const i32:int32x4x4_t:int32x4x4_t
/// Load multiple 4-element structures to four registers
name = vld4
out-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = transmute, {vld4-outsignedlanenox-::<LANE>, transmute(a), transmute(b)}
constn = LANE
a = 0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
b = 0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
n = 0
validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
load_fn
aarch64 = ld4lane
const-aarch64 = LANE
target = aes
//generate *const p64:poly64x1x4_t:poly64x1x4_t, *const p64:poly64x2x4_t:poly64x2x4_t
target = default
//generate *const p8:poly8x16x4_t:poly8x16x4_t, *const u8:uint8x16x4_t:uint8x16x4_t, *const u64:uint64x1x4_t:uint64x1x4_t, *const u64:uint64x2x4_t:uint64x2x4_t
arm = vld4lane
const-arm = LANE
//generate *const u8:uint8x8x4_t:uint8x8x4_t, *const u16:uint16x4x4_t:uint16x4x4_t, *const u32:uint32x2x4_t:uint32x2x4_t
//generate *const u16:uint16x8x4_t:uint16x8x4_t, *const u32:uint32x4x4_t:uint32x4x4_t
//generate *const p8:poly8x8x4_t:poly8x8x4_t, *const p16:poly16x4x4_t:poly16x4x4_t
//generate *const p16:poly16x8x4_t:poly16x8x4_t
/// Load multiple 4-element structures to four registers
name = vld4
out-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
constn = LANE
a = 0., 1., 2., 2., 2., 5., 6., 7., 8., 5., 6., 7., 8., 1., 4., 3., 5.
b = 0., 2., 2., 2., 2., 16., 2., 18., 5., 6., 7., 8., 1., 4., 3., 5.
n = 0
validate 1., 2., 2., 2., 2., 16., 2., 18., 2., 6., 7., 8., 2., 4., 3., 5.
load_fn
arm-aarch64-separate
aarch64 = ld4lane
const-aarch64 = LANE
link-aarch64 = ld4lane._EXTpi82_
//generate *const f64:float64x1x4_t:float64x1x4_t, *const f64:float64x2x4_t:float64x2x4_t
arm = vld4lane
const-arm = LANE
link-arm = vld4lane._EXTpi82_
//generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float32x4x4_t
/// Store multiple single-element structures from one, two, three, or four registers
name = vst1
in1-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = *a, {simd_extract, b, LANE as u32}
constn = LANE
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
n = 0
validate 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn
aarch64 = nop
arm = nop
//generate *mut i8:int8x8_t:void, *mut i16:int16x4_t:void, *mut i32:int32x2_t:void, *mut i64:int64x1_t:void
//generate *mut i8:int8x16_t:void, *mut i16:int16x8_t:void, *mut i32:int32x4_t:void, *mut i64:int64x2_t:void
//generate *mut u8:uint8x8_t:void, *mut u16:uint16x4_t:void, *mut u32:uint32x2_t:void, *mut u64:uint64x1_t:void
//generate *mut u8:uint8x16_t:void, *mut u16:uint16x8_t:void, *mut u32:uint32x4_t:void, *mut u64:uint64x2_t:void
//generate *mut p8:poly8x8_t:void, *mut p16:poly16x4_t:void, *mut p8:poly8x16_t:void, *mut p16:poly16x8_t:void
target = aes
//generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void
/// Store multiple single-element structures from one, two, three, or four registers
name = vst1
in1-lane-nox
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = *a, {simd_extract, b, LANE as u32}
constn = LANE
a = 0., 1., 2., 3., 4., 5., 6., 7., 8.
n = 0
validate 1., 0., 0., 0., 0., 0., 0., 0.
store_fn
aarch64 = nop
//generate *mut f64:float64x1_t:void, *mut f64:float64x2_t:void
arm = nop
//generate *mut f32:float32x2_t:void, *mut f32:float32x4_t:void
/// Store multiple single-element structures from one, two, three, or four registers
name = vst1
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
@@ -2319,6 +2730,11 @@ generate *mut p8:poly8x8x2_t:void, *mut p8:poly8x8x3_t:void, *mut p8:poly8x8x4_t
generate *mut p8:poly8x16x2_t:void, *mut p8:poly8x16x3_t:void, *mut p8:poly8x16x4_t:void
generate *mut p16:poly16x4x2_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x4x4_t:void
generate *mut p16:poly16x8x2_t:void, *mut p16:poly16x8x3_t:void, *mut p16:poly16x8x4_t:void
target = aes
generate *mut p64:poly64x1x2_t:void
arm = nop
generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x1x4_t:void
generate *mut p64:poly64x2x2_t:void, *mut p64:poly64x2x3_t:void, *mut p64:poly64x2x4_t:void
/// Store multiple single-element structures to one, two, three, or four registers
name = vst1
@@ -2350,6 +2766,363 @@ link-aarch64 = st1x4._EXT3_
link-arm = vst1x4._EXTr3_
generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
/// Store multiple 2-element structures from two registers
name = vst2
in1-nox
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
store_fn
arm-aarch64-separate
aarch64 = st2
link-aarch64 = st2._EXTpi8_
//generate *mut i64:int64x2x2_t:void
arm = vst2
link-arm = vst2._EXTpi8r_
//generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void, *mut i64:int64x1x2_t:void
//generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
/// Store multiple 2-element structures from two registers
name = vst2
multi_fn = transmute, {vst2-in1signednox-noext, transmute(a), transmute(b)}
in1-nox
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
store_fn
aarch64 = st2
//generate *mut u64:uint64x2x2_t:void
target = aes
//generate *mut p64:poly64x2x2_t:void
target = default
arm = vst2
//generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void, *mut u64:uint64x1x2_t:void
//generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
//generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p8:poly8x16x2_t:void, *mut p16:poly16x8x2_t:void
target = aes
//generate *mut p64:poly64x1x2_t:void
/// Store multiple 2-element structures from two registers
name = vst2
in1-nox
a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
validate 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
store_fn
arm-aarch64-separate
aarch64 = st2
link-aarch64 = st2._EXTpi8_
//generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
arm = vst2
link-arm = vst2._EXTpi8r_
//generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
/// Store multiple 2-element structures from two registers
name = vst2
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
n = 0
validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn
arm-aarch64-separate
aarch64 = st2lane
link-aarch64 = st2lane._EXTpi8_
const-aarch64 = LANE
//generate *mut i8:int8x16x2_t:void, *mut i64:int64x1x2_t:void, *mut i64:int64x2x2_t:void
arm = vst2lane
link-arm = vst2lane._EXTpi8r_
const-arm = LANE
//generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
//generate *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
/// Store multiple 2-element structures from two registers
name = vst2
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = transmute, {vst2-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
a = 0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
n = 0
validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn
aarch64 = st2lane
//generate *mut u8:uint8x16x2_t:void, *mut u64:uint64x1x2_t:void, *mut u64:uint64x2x2_t:void, *mut p8:poly8x16x2_t:void
target = aes
//generate *mut p64:poly64x1x2_t:void, *mut p64:poly64x2x2_t:void
target = default
arm = vst2lane
//generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
//generate *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
//generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p16:poly16x8x2_t:void
/// Store multiple 2-element structures from two registers
name = vst2
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
n = 0
validate 1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
store_fn
arm-aarch64-separate
aarch64 = st2lane
link-aarch64 = st2lane._EXTpi8_
const-aarch64 = LANE
//generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
arm = vst2lane
link-arm = vst2lane._EXTpi8r_
const-arm = LANE
//generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
/// Store multiple 3-element structures from three registers
name = vst3
in1-nox
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
store_fn
arm-aarch64-separate
aarch64 = st3
link-aarch64 = st3._EXTpi8_
//generate *mut i64:int64x2x3_t:void
arm = vst3
link-arm = vst3._EXTpi8r_
//generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void, *mut i64:int64x1x3_t:void
//generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
/// Store multiple 3-element structures from three registers
name = vst3
multi_fn = transmute, {vst3-in1signednox-noext, transmute(a), transmute(b)}
in1-nox
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48
store_fn
aarch64 = st3
//generate *mut u64:uint64x2x3_t:void
target = aes
//generate *mut p64:poly64x2x3_t:void
target = default
arm = vst3
//generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void, *mut u64:uint64x1x3_t:void
//generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
//generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p8:poly8x16x3_t:void, *mut p16:poly16x8x3_t:void
target = aes
//generate *mut p64:poly64x1x3_t:void
/// Store multiple 3-element structures from three registers
name = vst3
in1-nox
a = 0., 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8., 13., 14., 15., 16
validate 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8., 2., 13., 13., 4.
store_fn
arm-aarch64-separate
aarch64 = st3
link-aarch64 = st3._EXTpi8_
//generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
arm = vst3
link-arm = vst3._EXTpi8r_
//generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
/// Store multiple 3-element structures from three registers
name = vst3
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
n = 0
validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn
arm-aarch64-separate
aarch64 = st3lane
link-aarch64 = st3lane._EXTpi8_
const-aarch64 = LANE
//generate *mut i8:int8x16x3_t:void, *mut i64:int64x1x3_t:void, *mut i64:int64x2x3_t:void
arm = vst3lane
link-arm = vst3lane._EXTpi8r_
const-arm = LANE
//generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
//generate *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
/// Store multiple 3-element structures from three registers
name = vst3
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = transmute, {vst3-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
a = 0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48
n = 0
validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn
aarch64 = st3lane
//generate *mut u8:uint8x16x3_t:void, *mut u64:uint64x1x3_t:void, *mut u64:uint64x2x3_t:void, *mut p8:poly8x16x3_t:void
target = aes
//generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x2x3_t:void
target = default
arm = vst3lane
//generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
//generate *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
//generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x8x3_t:void
/// Store multiple 3-element structures from three registers
name = vst3
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
a = 0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
n = 0
validate 1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
store_fn
arm-aarch64-separate
aarch64 = st3lane
link-aarch64 = st3lane._EXTpi8_
const-aarch64 = LANE
//generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
arm = vst3lane
link-arm = vst3lane._EXTpi8r_
const-arm = LANE
//generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
/// Store multiple 4-element structures from four registers
name = vst4
in1-nox
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
store_fn
arm-aarch64-separate
aarch64 = st4
link-aarch64 = st4._EXTpi8_
//generate *mut i64:int64x2x4_t:void
arm = vst4
link-arm = vst4._EXTpi8r_
//generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void, *mut i64:int64x1x4_t:void
//generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
/// Store multiple 4-element structures from four registers
name = vst4
multi_fn = transmute, {vst4-in1signednox-noext, transmute(a), transmute(b)}
in1-nox
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
store_fn
aarch64 = st4
//generate *mut u64:uint64x2x4_t:void
target = aes
//generate *mut p64:poly64x2x4_t:void
target = default
arm = vst4
//generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void, *mut u64:uint64x1x4_t:void
//generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
//generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p8:poly8x16x4_t:void, *mut p16:poly16x8x4_t:void
target = aes
//generate *mut p64:poly64x1x4_t:void
/// Store multiple 4-element structures from four registers
name = vst4
in1-nox
a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
store_fn
arm-aarch64-separate
aarch64 = st4
link-aarch64 = st4._EXTpi8_
//generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
arm = vst4
link-arm = vst4._EXTpi8r_
//generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
/// Store multiple 4-element structures from four registers
name = vst4
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
n = 0
validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn
arm-aarch64-separate
aarch64 = st4lane
link-aarch64 = st4lane._EXTpi8_
const-aarch64 = LANE
//generate *mut i8:int8x16x4_t:void, *mut i64:int64x1x4_t:void, *mut i64:int64x2x4_t:void
arm = vst4lane
link-arm = vst4lane._EXTpi8r_
const-arm = LANE
//generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
//generate *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
/// Store multiple 4-element structures from four registers
name = vst4
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = transmute, {vst4-in1signedlanenox-::<LANE>, transmute(a), transmute(b)}
a = 0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64
n = 0
validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn
aarch64 = st4lane
//generate *mut u8:uint8x16x4_t:void, *mut u64:uint64x1x4_t:void, *mut u64:uint64x2x4_t:void, *mut p8:poly8x16x4_t:void
target = aes
//generate *mut p64:poly64x1x4_t:void, *mut p64:poly64x2x4_t:void
target = default
arm = vst4lane
//generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
//generate *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
//generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p16:poly16x8x4_t:void
/// Store multiple 4-element structures from four registers
name = vst4
in1-lane-nox
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
a = 0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
n = 0
validate 1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
store_fn
arm-aarch64-separate
aarch64 = st4lane
link-aarch64 = st4lane._EXTpi8_
const-aarch64 = LANE
//generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
arm = vst4lane
link-arm = vst4lane._EXTpi8r_
const-arm = LANE
//generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
/// Multiply
name = vmul
a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2

View File

@@ -427,8 +427,10 @@ enum Suffix {
OutSuffix,
OutNSuffix,
OutNox,
In1Nox,
OutDupNox,
OutLaneNox,
In1LaneNox,
Lane,
In2,
In2Lane,
@@ -909,7 +911,25 @@ fn ext(s: &str, in_t: &[&str; 3], out_t: &str) -> String {
.replace("_EXT3_", &type_to_ext(in_t[1], false, false, false))
.replace("_EXTr3_", &type_to_ext(in_t[1], false, true, false))
.replace("_EXTv2_", &type_to_ext(out_t, true, false, false))
.replace("_EXTpi8_", &type_to_ext(in_t[1], false, false, true))
.replace("_EXTpi82_", &type_to_ext(out_t, false, false, true))
.replace("_EXTpi8r_", &type_to_ext(in_t[1], false, true, true))
}
fn is_vldx(name: &str) -> bool {
let s: Vec<_> = name.split('_').collect();
s.len() == 2
&& &name[0..3] == "vld"
&& name[3..4].parse::<i32>().unwrap() > 1
&& (s[1].starts_with("s") || s[1].starts_with("f"))
}
fn is_vstx(name: &str) -> bool {
let s: Vec<_> = name.split('_').collect();
s.len() == 2
&& &name[0..3] == "vst"
&& name[3..4].parse::<i32>().unwrap() > 1
&& (s[1].starts_with("s") || s[1].starts_with("f"))
}
#[allow(clippy::too_many_arguments)]
@@ -964,6 +984,11 @@ fn gen_aarch64(
current_name,
type_to_suffix(&type_to_sub_type(out_t))
),
In1Nox => format!(
"{}{}",
current_name,
type_to_suffix(&type_to_sub_type(in_t[1]))
),
OutDupNox => format!(
"{}{}",
current_name,
@@ -974,6 +999,11 @@ fn gen_aarch64(
current_name,
type_to_lane_suffix(&type_to_sub_type(out_t))
),
In1LaneNox => format!(
"{}{}",
current_name,
type_to_lane_suffix(&type_to_sub_type(in_t[1]))
),
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
@@ -1030,23 +1060,32 @@ fn gen_aarch64(
};
let (ext_inputs, ext_output) = {
if const_aarch64.is_some() {
if matches!(fn_type, Fntype::Load) {
if !matches!(fn_type, Fntype::Normal) {
let ptr_type = match fn_type {
Fntype::Load => "*const i8",
Fntype::Store => "*mut i8",
_ => panic!("unsupported fn type"),
};
let sub = type_to_sub_type(in_t[1]);
(
match type_sub_len(in_t[1]) {
1 => format!("a: {}, n: i64, ptr: *const i8", sub),
2 => format!("a: {}, b: {}, n: i64, ptr: *const i8", sub, sub),
1 => format!("a: {}, n: i64, ptr: {}", sub, ptr_type),
2 => format!("a: {}, b: {}, n: i64, ptr: {}", sub, sub, ptr_type),
3 => format!(
"a: {}, b: {}, c: {}, n: i64, ptr: *const i8",
sub, sub, sub
"a: {}, b: {}, c: {}, n: i64, ptr: {}",
sub, sub, sub, ptr_type
),
4 => format!(
"a: {}, b: {}, c: {}, d: {}, n: i64, ptr: *const i8",
sub, sub, sub, sub
"a: {}, b: {}, c: {}, d: {}, n: i64, ptr: {}",
sub, sub, sub, sub, ptr_type
),
_ => panic!("unsupported type: {}", in_t[1]),
},
format!(" -> {}", out_t),
if out_t != "void" {
format!(" -> {}", out_t)
} else {
String::new()
},
)
} else {
(
@@ -1061,19 +1100,23 @@ fn gen_aarch64(
}
} else if matches!(fn_type, Fntype::Store) {
let sub = type_to_sub_type(in_t[1]);
let native = type_to_native_type(in_t[1]);
(
match type_sub_len(in_t[1]) {
1 => format!("a: {}, ptr: *mut {}", sub, native),
2 => format!("a: {}, b: {}, ptr: *mut {}", sub, sub, native),
3 => format!("a: {}, b: {}, c: {}, ptr: *mut {}", sub, sub, sub, native),
4 => format!(
"a: {}, b: {}, c: {}, d: {}, ptr: *mut {}",
sub, sub, sub, sub, native
),
let ptr_type = if is_vstx(&name) {
"i8".to_string()
} else {
type_to_native_type(in_t[1])
};
let subs = match type_sub_len(in_t[1]) {
1 => format!("a: {}", sub),
2 => format!("a: {}, b: {}", sub, sub),
3 => format!("a: {}, b: {}, c: {}", sub, sub, sub),
4 => format!("a: {}, b: {}, c: {}, d: {}", sub, sub, sub, sub),
_ => panic!("unsupported type: {}", in_t[1]),
},
String::new(),
};
(format!("{}, ptr: *mut {}", subs, ptr_type), String::new())
} else if is_vldx(&name) {
(
format!("ptr: *const {}", type_to_sub_type(out_t)),
format!(" -> {}", out_t),
)
} else {
(
@@ -1185,7 +1228,7 @@ fn gen_aarch64(
};
let call_params = {
if let (Some(const_aarch64), Some(_)) = (const_aarch64, link_aarch64) {
if matches!(fn_type, Fntype::Load) {
if !matches!(fn_type, Fntype::Normal) {
let subs = match type_sub_len(in_t[1]) {
1 => "b",
2 => "b.0, b.1",
@@ -1195,7 +1238,7 @@ fn gen_aarch64(
};
format!(
r#"{}
{}{}({}, {} as i64, a as *const i8)"#,
{}{}({}, {} as i64, a.cast())"#,
multi_calls,
ext_c,
current_fn,
@@ -1217,14 +1260,17 @@ fn gen_aarch64(
_ => String::new(),
}
}
} else if matches!(fn_type, Fntype::Store) {
} else if link_aarch64.is_some() && matches!(fn_type, Fntype::Store) {
let cast = if is_vstx(&name) { ".cast()" } else { "" };
match type_sub_len(in_t[1]) {
1 => format!(r#"{}{}(b, a)"#, ext_c, current_fn),
2 => format!(r#"{}{}(b.0, b.1, a)"#, ext_c, current_fn),
3 => format!(r#"{}{}(b.0, b.1, b.2, a)"#, ext_c, current_fn),
4 => format!(r#"{}{}(b.0, b.1, b.2, b.3, a)"#, ext_c, current_fn),
1 => format!(r#"{}{}(b, a{})"#, ext_c, current_fn, cast),
2 => format!(r#"{}{}(b.0, b.1, a{})"#, ext_c, current_fn, cast),
3 => format!(r#"{}{}(b.0, b.1, b.2, a{})"#, ext_c, current_fn, cast),
4 => format!(r#"{}{}(b.0, b.1, b.2, b.3, a{})"#, ext_c, current_fn, cast),
_ => panic!("unsupported type: {}", in_t[1]),
}
} else if link_aarch64.is_some() && is_vldx(&name) {
format!(r#"{}{}(a.cast())"#, ext_c, current_fn,)
} else {
let trans: [&str; 2] = if link_t[3] != out_t {
["transmute(", ")"]
@@ -1406,7 +1452,7 @@ fn gen_store_test(
unsafe fn test_{}() {{"#,
name,
);
for (a, _, _, _, e) in current_tests {
for (a, _, _, constn, e) in current_tests {
let a: Vec<String> = a.iter().take(type_len + 1).cloned().collect();
let e: Vec<String> = e.iter().take(type_len).cloned().collect();
let mut input = String::from("[");
@@ -1425,12 +1471,15 @@ fn gen_store_test(
output.push_str(&e[i])
}
output.push_str("]");
let const_n = constn
.as_deref()
.map_or(String::new(), |n| format!("::<{}>", n.to_string()));
let t = format!(
r#"
let a: [{}; {}] = {};
let e: [{}; {}] = {};
let mut r: [{}; {}] = [0{}; {}];
{}(r.as_mut_ptr(), {}(a[1..].as_ptr()));
{}{}(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr().cast()));
assert_eq!(r, e);
"#,
type_to_native_type(in_t[1]),
@@ -1444,7 +1493,7 @@ fn gen_store_test(
type_to_native_type(in_t[1]),
type_len,
name,
name.replace("st", "ld"),
const_n,
);
test.push_str(&t);
}
@@ -1613,6 +1662,11 @@ fn gen_arm(
current_name,
type_to_suffix(&type_to_sub_type(out_t))
),
In1Nox => format!(
"{}{}",
current_name,
type_to_suffix(&type_to_sub_type(in_t[1]))
),
OutDupNox => format!(
"{}{}",
current_name,
@@ -1623,6 +1677,11 @@ fn gen_arm(
current_name,
type_to_lane_suffix(&type_to_sub_type(out_t))
),
In1LaneNox => format!(
"{}{}",
current_name,
type_to_lane_suffix(&type_to_sub_type(in_t[1]))
),
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
@@ -1752,7 +1811,12 @@ fn gen_arm(
};
let (arm_ext_inputs, arm_ext_output) = {
if let Some(const_arm) = const_arm {
if matches!(fn_type, Fntype::Load) {
if !matches!(fn_type, Fntype::Normal) {
let ptr_type = match fn_type {
Fntype::Load => "*const i8",
Fntype::Store => "*mut i8",
_ => panic!("unsupported fn type"),
};
let sub_type = type_to_sub_type(in_t[1]);
let inputs = match type_sub_len(in_t[1]) {
1 => format!("a: {}", sub_type),
@@ -1765,7 +1829,7 @@ fn gen_arm(
_ => panic!("unknown type: {}", in_t[1]),
};
(
format!("ptr: *const i8, {}, n: i32, size: i32", inputs),
format!("ptr: {}, {}, n: i32, size: i32", ptr_type, inputs),
String::new(),
)
} else {
@@ -1817,10 +1881,20 @@ fn gen_arm(
),
_ => panic!("unknown type: {}", in_t[1]),
};
let (ptr_type, size) = if is_vstx(&name) {
("i8".to_string(), ", size: i32")
} else {
(type_to_native_type(in_t[1]), "")
};
(
format!("ptr: *mut {}, {}", type_to_native_type(in_t[1]), inputs),
format!("ptr: *mut {}, {}{}", ptr_type, inputs, size),
String::new(),
)
} else if is_vldx(&name) {
(
format!("ptr: *const i8, size: i32"),
format!(" -> {}", out_t),
)
} else {
(String::new(), String::new())
}
@@ -1836,7 +1910,12 @@ fn gen_arm(
));
let (aarch64_ext_inputs, aarch64_ext_output) = {
if const_aarch64.is_some() {
if matches!(fn_type, Fntype::Load) {
if !matches!(fn_type, Fntype::Normal) {
let ptr_type = match fn_type {
Fntype::Load => "*const i8",
Fntype::Store => "*mut i8",
_ => panic!("unsupported fn type"),
};
let sub_type = type_to_sub_type(in_t[1]);
let mut inputs = match type_sub_len(in_t[1]) {
1 => format!("a: {}", sub_type,),
@@ -1848,8 +1927,13 @@ fn gen_arm(
),
_ => panic!("unknown type: {}", in_t[1]),
};
inputs.push_str(&format!(", n: i64, ptr: *const i8"));
(inputs, format!(" -> {}", out_t))
inputs.push_str(&format!(", n: i64, ptr: {}", ptr_type));
let out = if out_t == "void" {
String::new()
} else {
format!(" -> {}", out_t)
};
(inputs, out)
} else {
(
match para_num {
@@ -1886,8 +1970,18 @@ fn gen_arm(
),
_ => panic!("unknown type: {}", in_t[1]),
};
inputs.push_str(&format!(", ptr: *mut {}", type_to_native_type(in_t[0])));
let ptr_type = if is_vstx(&name) {
"i8".to_string()
} else {
type_to_native_type(in_t[1])
};
inputs.push_str(&format!(", ptr: *mut {}", ptr_type));
(inputs, String::new())
} else if is_vldx(&name) {
(
format!("ptr: *const {}", type_to_sub_type(out_t)),
format!(" -> {}", out_t),
)
} else {
(String::new(), String::new())
}
@@ -1962,7 +2056,7 @@ fn gen_arm(
let function = if separate {
let call_arm = {
let arm_params = if let (Some(const_arm), Some(_)) = (const_arm, link_arm) {
if matches!(fn_type, Fntype::Load) {
if !matches!(fn_type, Fntype::Normal) {
let subs = match type_sub_len(in_t[1]) {
1 => "b",
2 => "b.0, b.1",
@@ -1971,7 +2065,7 @@ fn gen_arm(
_ => "",
};
format!(
"{}(a as *const i8, {}, {}, {})",
"{}(a.cast(), {}, {}, {})",
current_fn,
subs,
constn.as_deref().unwrap(),
@@ -2008,13 +2102,27 @@ fn gen_arm(
_ => String::new(),
}
} else if matches!(fn_type, Fntype::Store) {
let (cast, size) = if is_vstx(&name) {
(
".cast()",
format!(", {}", type_bits(&type_to_sub_type(in_t[1])) / 8),
)
} else {
("", String::new())
};
match type_sub_len(in_t[1]) {
1 => format!("{}(a, b)", current_fn),
2 => format!("{}(a, b.0, b.1)", current_fn),
3 => format!("{}(a, b.0, b.1, b.2)", current_fn),
4 => format!("{}(a, b.0, b.1, b.2, b.3)", current_fn),
1 => format!("{}(a{}, b{})", current_fn, cast, size),
2 => format!("{}(a{}, b.0, b.1{})", current_fn, cast, size),
3 => format!("{}(a{}, b.0, b.1, b.2{})", current_fn, cast, size),
4 => format!("{}(a{}, b.0, b.1, b.2, b.3{})", current_fn, cast, size),
_ => String::new(),
}
} else if link_arm.is_some() && is_vldx(&name) {
format!(
"{}(a as *const i8, {})",
current_fn,
type_bits(&type_to_sub_type(out_t)) / 8
)
} else {
String::new()
};
@@ -2028,7 +2136,7 @@ fn gen_arm(
let call_aarch64 = {
let aarch64_params =
if let (Some(const_aarch64), Some(_)) = (const_aarch64, link_aarch64) {
if matches!(fn_type, Fntype::Load) {
if !matches!(fn_type, Fntype::Normal) {
let subs = match type_sub_len(in_t[1]) {
1 => "b",
2 => "b.0, b.1",
@@ -2037,7 +2145,7 @@ fn gen_arm(
_ => "",
};
format!(
"{}({}, {} as i64, a as *const i8)",
"{}({}, {} as i64, a.cast())",
current_fn,
subs,
constn.as_deref().unwrap()
@@ -2056,13 +2164,16 @@ fn gen_arm(
_ => String::new(),
}
} else if matches!(fn_type, Fntype::Store) {
let cast = if is_vstx(&name) { ".cast()" } else { "" };
match type_sub_len(in_t[1]) {
1 => format!("{}(b, a)", current_fn),
2 => format!("{}(b.0, b.1, a)", current_fn),
3 => format!("{}(b.0, b.1, b.2, a)", current_fn),
4 => format!("{}(b.0, b.1, b.2, b.3, a)", current_fn),
1 => format!("{}(b, a{})", current_fn, cast),
2 => format!("{}(b.0, b.1, a{})", current_fn, cast),
3 => format!("{}(b.0, b.1, b.2, a{})", current_fn, cast),
4 => format!("{}(b.0, b.1, b.2, b.3, a{})", current_fn, cast),
_ => String::new(),
}
} else if link_aarch64.is_some() && is_vldx(&name) {
format!("{}(a.cast())", current_fn)
} else {
String::new()
};
@@ -2599,6 +2710,10 @@ fn get_call(
fn_name.push_str(&type_to_suffix(&type_to_sub_type(&type_to_signed(
&String::from(out_t),
))));
} else if fn_format[1] == "in1signednox" {
fn_name.push_str(&type_to_suffix(&type_to_sub_type(&type_to_signed(
&String::from(in_t[1]),
))));
} else if fn_format[1] == "outsigneddupnox" {
fn_name.push_str(&type_to_dup_suffix(&type_to_sub_type(&type_to_signed(
&String::from(out_t),
@@ -2607,6 +2722,10 @@ fn get_call(
fn_name.push_str(&type_to_lane_suffix(&type_to_sub_type(&type_to_signed(
&String::from(out_t),
))));
} else if fn_format[1] == "in1signedlanenox" {
fn_name.push_str(&type_to_lane_suffix(&type_to_sub_type(&type_to_signed(
&String::from(in_t[1]),
))));
} else if fn_format[1] == "unsigned" {
fn_name.push_str(type_to_suffix(type_to_unsigned(in_t[1])));
} else if fn_format[1] == "doubleself" {
@@ -2672,6 +2791,8 @@ fn get_call(
r#"let {}: {} = {}({});"#,
re_name, re_type, fn_name, param_str
)
} else if fn_name.starts_with("*") {
format!(r#"{} = {};"#, fn_name, param_str)
} else {
format!(r#"{}({})"#, fn_name, param_str)
};
@@ -2827,10 +2948,14 @@ mod test {
suffix = OutSuffix;
} else if line.starts_with("out-nox") {
suffix = OutNox;
} else if line.starts_with("in1-nox") {
suffix = In1Nox;
} else if line.starts_with("out-dup-nox") {
suffix = OutDupNox;
} else if line.starts_with("out-lane-nox") {
suffix = OutLaneNox;
} else if line.starts_with("in1-lane-nox") {
suffix = In1LaneNox;
} else if line.starts_with("lane-suffixes") {
suffix = Lane;
} else if line.starts_with("in2-suffix") {

View File

@@ -138,6 +138,10 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
// removed once it has been addressed in LLVM.
"fcvtzu" | "fcvtzs" | "vcvt" => 64,
// core_arch/src/arm_shared/simd32
// vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit)
"nop" if fnname.contains("vst1q_p64") => 34,
// Original limit was 20 instructions, but ARM DSP Intrinsics
// are exactly 20 instructions long. So, bump the limit to 22
// instead of adding here a long list of exceptions.