Change aarch64 vld1* instructions to not cause individual loads (#1207)
This commit is contained in:
@@ -12,8 +12,8 @@ pub use self::generated::*;
|
||||
use crate::{
|
||||
core_arch::{arm_shared::*, simd::*, simd_llvm::*},
|
||||
hint::unreachable_unchecked,
|
||||
mem::{size_of, transmute, zeroed},
|
||||
ptr::copy_nonoverlapping,
|
||||
mem::{transmute, zeroed},
|
||||
ptr::{read_unaligned, write_unaligned},
|
||||
};
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
@@ -464,16 +464,7 @@ pub unsafe fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
|
||||
transmute(i8x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -481,24 +472,7 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
|
||||
transmute(i8x16::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
*ptr.offset(8),
|
||||
*ptr.offset(9),
|
||||
*ptr.offset(10),
|
||||
*ptr.offset(11),
|
||||
*ptr.offset(12),
|
||||
*ptr.offset(13),
|
||||
*ptr.offset(14),
|
||||
*ptr.offset(15),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -506,12 +480,7 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
|
||||
transmute(i16x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -519,16 +488,7 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
|
||||
transmute(i16x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -536,7 +496,7 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
|
||||
transmute(i32x2::new(*ptr, *ptr.offset(1)))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -544,12 +504,7 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
|
||||
transmute(i32x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -557,7 +512,7 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
|
||||
transmute(i64x1::new(*ptr))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -565,7 +520,7 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
|
||||
transmute(i64x2::new(*ptr, *ptr.offset(1)))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -573,16 +528,7 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
|
||||
transmute(u8x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -590,24 +536,7 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
|
||||
transmute(u8x16::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
*ptr.offset(8),
|
||||
*ptr.offset(9),
|
||||
*ptr.offset(10),
|
||||
*ptr.offset(11),
|
||||
*ptr.offset(12),
|
||||
*ptr.offset(13),
|
||||
*ptr.offset(14),
|
||||
*ptr.offset(15),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -615,12 +544,7 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
|
||||
transmute(u16x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -628,16 +552,7 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
|
||||
transmute(u16x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -645,7 +560,7 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
|
||||
transmute(u32x2::new(*ptr, *ptr.offset(1)))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -653,12 +568,7 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
|
||||
transmute(u32x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -666,7 +576,7 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
|
||||
transmute(u64x1::new(*ptr))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -674,7 +584,7 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
|
||||
transmute(u64x2::new(*ptr, *ptr.offset(1)))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -682,16 +592,7 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
|
||||
transmute(u8x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -699,24 +600,7 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
|
||||
transmute(u8x16::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
*ptr.offset(8),
|
||||
*ptr.offset(9),
|
||||
*ptr.offset(10),
|
||||
*ptr.offset(11),
|
||||
*ptr.offset(12),
|
||||
*ptr.offset(13),
|
||||
*ptr.offset(14),
|
||||
*ptr.offset(15),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -724,12 +608,7 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
|
||||
transmute(u16x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -737,16 +616,7 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
|
||||
transmute(u16x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -754,7 +624,7 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
|
||||
transmute(f32x2::new(*ptr, *ptr.offset(1)))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -762,12 +632,7 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
|
||||
transmute(f32x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -775,7 +640,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
|
||||
transmute(f64x1::new(*ptr))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
@@ -783,7 +648,7 @@ pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
|
||||
transmute(f64x2::new(*ptr, *ptr.offset(1)))
|
||||
read_unaligned(ptr.cast())
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -792,11 +657,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int8x8_t as *const i8,
|
||||
ptr as *mut i8,
|
||||
size_of::<int8x8_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -805,11 +666,7 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int8x16_t as *const i8,
|
||||
ptr as *mut i8,
|
||||
size_of::<int8x16_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -818,11 +675,7 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int16x4_t as *const i16,
|
||||
ptr as *mut i16,
|
||||
size_of::<int16x4_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -831,11 +684,7 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int16x8_t as *const i16,
|
||||
ptr as *mut i16,
|
||||
size_of::<int16x8_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -844,11 +693,7 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int32x2_t as *const i32,
|
||||
ptr as *mut i32,
|
||||
size_of::<int32x2_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -857,11 +702,7 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int32x4_t as *const i32,
|
||||
ptr as *mut i32,
|
||||
size_of::<int32x4_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -870,11 +711,7 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int64x1_t as *const i64,
|
||||
ptr as *mut i64,
|
||||
size_of::<int64x1_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -883,11 +720,7 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const int64x2_t as *const i64,
|
||||
ptr as *mut i64,
|
||||
size_of::<int64x2_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -896,11 +729,7 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint8x8_t as *const u8,
|
||||
ptr as *mut u8,
|
||||
size_of::<uint8x8_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -909,11 +738,7 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint8x16_t as *const u8,
|
||||
ptr as *mut u8,
|
||||
size_of::<uint8x16_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -922,11 +747,7 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint16x4_t as *const u16,
|
||||
ptr as *mut u16,
|
||||
size_of::<uint16x4_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -935,11 +756,7 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint16x8_t as *const u16,
|
||||
ptr as *mut u16,
|
||||
size_of::<uint16x8_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -948,11 +765,7 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint32x2_t as *const u32,
|
||||
ptr as *mut u32,
|
||||
size_of::<uint32x2_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -961,11 +774,7 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint32x4_t as *const u32,
|
||||
ptr as *mut u32,
|
||||
size_of::<uint32x4_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -974,11 +783,7 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint64x1_t as *const u64,
|
||||
ptr as *mut u64,
|
||||
size_of::<uint64x1_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -987,11 +792,7 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const uint64x2_t as *const u64,
|
||||
ptr as *mut u64,
|
||||
size_of::<uint64x2_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1000,11 +801,7 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const poly8x8_t as *const p8,
|
||||
ptr as *mut p8,
|
||||
size_of::<poly8x8_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1013,11 +810,7 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const poly8x16_t as *const p8,
|
||||
ptr as *mut p8,
|
||||
size_of::<poly8x16_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1026,11 +819,7 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const poly16x4_t as *const p16,
|
||||
ptr as *mut p16,
|
||||
size_of::<poly16x4_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1039,11 +828,7 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const poly16x8_t as *const p16,
|
||||
ptr as *mut p16,
|
||||
size_of::<poly16x8_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1052,11 +837,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const poly64x1_t as *const p64,
|
||||
ptr as *mut p64,
|
||||
size_of::<poly64x1_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1065,11 +846,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const poly64x2_t as *const p64,
|
||||
ptr as *mut p64,
|
||||
size_of::<poly64x2_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1078,11 +855,7 @@ pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const float32x2_t as *const f32,
|
||||
ptr as *mut f32,
|
||||
size_of::<float32x2_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1091,11 +864,7 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const float32x4_t as *const f32,
|
||||
ptr as *mut f32,
|
||||
size_of::<float32x4_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1104,11 +873,7 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const float64x1_t as *const f64,
|
||||
ptr as *mut f64,
|
||||
size_of::<float64x1_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
// Store multiple single-element structures from one, two, three, or four registers.
|
||||
@@ -1117,11 +882,7 @@ pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
|
||||
#[cfg_attr(test, assert_instr(str))]
|
||||
#[allow(clippy::cast_ptr_alignment)]
|
||||
pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) {
|
||||
copy_nonoverlapping(
|
||||
&a as *const float64x2_t as *const f64,
|
||||
ptr as *mut f64,
|
||||
size_of::<float64x2_t>(),
|
||||
)
|
||||
write_unaligned(ptr.cast(), a);
|
||||
}
|
||||
|
||||
/// Absolute Value (wrapping).
|
||||
|
||||
Reference in New Issue
Block a user