Change aarch64 vld1* instructions to not cause individual loads (#1207)

This commit is contained in:
Hans Kratz
2021-09-09 00:40:49 +02:00
committed by GitHub
parent bf2122753a
commit 00f611f305

View File

@@ -12,8 +12,8 @@ pub use self::generated::*;
use crate::{
core_arch::{arm_shared::*, simd::*, simd_llvm::*},
hint::unreachable_unchecked,
mem::{size_of, transmute, zeroed},
ptr::copy_nonoverlapping,
mem::{transmute, zeroed},
ptr::{read_unaligned, write_unaligned},
};
#[cfg(test)]
use stdarch_test::assert_instr;
@@ -464,16 +464,7 @@ pub unsafe fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
transmute(i8x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -481,24 +472,7 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
transmute(i8x16::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
*ptr.offset(8),
*ptr.offset(9),
*ptr.offset(10),
*ptr.offset(11),
*ptr.offset(12),
*ptr.offset(13),
*ptr.offset(14),
*ptr.offset(15),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -506,12 +480,7 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
transmute(i16x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -519,16 +488,7 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
transmute(i16x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -536,7 +496,7 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
transmute(i32x2::new(*ptr, *ptr.offset(1)))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -544,12 +504,7 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
transmute(i32x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -557,7 +512,7 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
transmute(i64x1::new(*ptr))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -565,7 +520,7 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
transmute(i64x2::new(*ptr, *ptr.offset(1)))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -573,16 +528,7 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
transmute(u8x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -590,24 +536,7 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
transmute(u8x16::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
*ptr.offset(8),
*ptr.offset(9),
*ptr.offset(10),
*ptr.offset(11),
*ptr.offset(12),
*ptr.offset(13),
*ptr.offset(14),
*ptr.offset(15),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -615,12 +544,7 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
transmute(u16x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -628,16 +552,7 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
transmute(u16x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -645,7 +560,7 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
transmute(u32x2::new(*ptr, *ptr.offset(1)))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -653,12 +568,7 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
transmute(u32x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -666,7 +576,7 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
transmute(u64x1::new(*ptr))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -674,7 +584,7 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
transmute(u64x2::new(*ptr, *ptr.offset(1)))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -682,16 +592,7 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
transmute(u8x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -699,24 +600,7 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
transmute(u8x16::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
*ptr.offset(8),
*ptr.offset(9),
*ptr.offset(10),
*ptr.offset(11),
*ptr.offset(12),
*ptr.offset(13),
*ptr.offset(14),
*ptr.offset(15),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -724,12 +608,7 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
transmute(u16x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -737,16 +616,7 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
transmute(u16x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -754,7 +624,7 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
transmute(f32x2::new(*ptr, *ptr.offset(1)))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -762,12 +632,7 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
transmute(f32x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -775,7 +640,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
transmute(f64x1::new(*ptr))
read_unaligned(ptr.cast())
}
/// Load multiple single-element structures to one, two, three, or four registers.
@@ -783,7 +648,7 @@ pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
transmute(f64x2::new(*ptr, *ptr.offset(1)))
read_unaligned(ptr.cast())
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -792,11 +657,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
copy_nonoverlapping(
&a as *const int8x8_t as *const i8,
ptr as *mut i8,
size_of::<int8x8_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -805,11 +666,7 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
copy_nonoverlapping(
&a as *const int8x16_t as *const i8,
ptr as *mut i8,
size_of::<int8x16_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -818,11 +675,7 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
copy_nonoverlapping(
&a as *const int16x4_t as *const i16,
ptr as *mut i16,
size_of::<int16x4_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -831,11 +684,7 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
copy_nonoverlapping(
&a as *const int16x8_t as *const i16,
ptr as *mut i16,
size_of::<int16x8_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -844,11 +693,7 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
copy_nonoverlapping(
&a as *const int32x2_t as *const i32,
ptr as *mut i32,
size_of::<int32x2_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -857,11 +702,7 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
copy_nonoverlapping(
&a as *const int32x4_t as *const i32,
ptr as *mut i32,
size_of::<int32x4_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -870,11 +711,7 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
copy_nonoverlapping(
&a as *const int64x1_t as *const i64,
ptr as *mut i64,
size_of::<int64x1_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -883,11 +720,7 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
copy_nonoverlapping(
&a as *const int64x2_t as *const i64,
ptr as *mut i64,
size_of::<int64x2_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -896,11 +729,7 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
copy_nonoverlapping(
&a as *const uint8x8_t as *const u8,
ptr as *mut u8,
size_of::<uint8x8_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -909,11 +738,7 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
copy_nonoverlapping(
&a as *const uint8x16_t as *const u8,
ptr as *mut u8,
size_of::<uint8x16_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -922,11 +747,7 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
copy_nonoverlapping(
&a as *const uint16x4_t as *const u16,
ptr as *mut u16,
size_of::<uint16x4_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -935,11 +756,7 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
copy_nonoverlapping(
&a as *const uint16x8_t as *const u16,
ptr as *mut u16,
size_of::<uint16x8_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -948,11 +765,7 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
copy_nonoverlapping(
&a as *const uint32x2_t as *const u32,
ptr as *mut u32,
size_of::<uint32x2_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -961,11 +774,7 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
copy_nonoverlapping(
&a as *const uint32x4_t as *const u32,
ptr as *mut u32,
size_of::<uint32x4_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -974,11 +783,7 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
copy_nonoverlapping(
&a as *const uint64x1_t as *const u64,
ptr as *mut u64,
size_of::<uint64x1_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -987,11 +792,7 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
copy_nonoverlapping(
&a as *const uint64x2_t as *const u64,
ptr as *mut u64,
size_of::<uint64x2_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -1000,11 +801,7 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
copy_nonoverlapping(
&a as *const poly8x8_t as *const p8,
ptr as *mut p8,
size_of::<poly8x8_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -1013,11 +810,7 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
copy_nonoverlapping(
&a as *const poly8x16_t as *const p8,
ptr as *mut p8,
size_of::<poly8x16_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -1026,11 +819,7 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
copy_nonoverlapping(
&a as *const poly16x4_t as *const p16,
ptr as *mut p16,
size_of::<poly16x4_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Store multiple single-element structures from one, two, three, or four registers.
@@ -1039,11 +828,7 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
copy_nonoverlapping(
&a as *const poly16x8_t as *const p16,
ptr as *mut p16,
size_of::<poly16x8_t>(),
)
write_unaligned(ptr.cast(), a);
}
// Store multiple single-element structures from one, two, three, or four registers.
@@ -1052,11 +837,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
copy_nonoverlapping(
&a as *const poly64x1_t as *const p64,
ptr as *mut p64,
size_of::<poly64x1_t>(),
)
write_unaligned(ptr.cast(), a);
}
// Store multiple single-element structures from one, two, three, or four registers.
@@ -1065,11 +846,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
copy_nonoverlapping(
&a as *const poly64x2_t as *const p64,
ptr as *mut p64,
size_of::<poly64x2_t>(),
)
write_unaligned(ptr.cast(), a);
}
// Store multiple single-element structures from one, two, three, or four registers.
@@ -1078,11 +855,7 @@ pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
copy_nonoverlapping(
&a as *const float32x2_t as *const f32,
ptr as *mut f32,
size_of::<float32x2_t>(),
)
write_unaligned(ptr.cast(), a);
}
// Store multiple single-element structures from one, two, three, or four registers.
@@ -1091,11 +864,7 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
copy_nonoverlapping(
&a as *const float32x4_t as *const f32,
ptr as *mut f32,
size_of::<float32x4_t>(),
)
write_unaligned(ptr.cast(), a);
}
// Store multiple single-element structures from one, two, three, or four registers.
@@ -1104,11 +873,7 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
copy_nonoverlapping(
&a as *const float64x1_t as *const f64,
ptr as *mut f64,
size_of::<float64x1_t>(),
)
write_unaligned(ptr.cast(), a);
}
// Store multiple single-element structures from one, two, three, or four registers.
@@ -1117,11 +882,7 @@ pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) {
copy_nonoverlapping(
&a as *const float64x2_t as *const f64,
ptr as *mut f64,
size_of::<float64x2_t>(),
)
write_unaligned(ptr.cast(), a);
}
/// Absolute Value (wrapping).