Complete vld* and vst* neon instructions (#1224)

This commit is contained in:
Sparrow Li
2021-09-29 11:28:10 +08:00
committed by GitHub
parent bdea403c54
commit 68e35d306f
5 changed files with 10787 additions and 228 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -2125,12 +2125,15 @@ arm-aarch64-separate
aarch64 = ld2 aarch64 = ld2
link-aarch64 = ld2._EXTv2_ link-aarch64 = ld2._EXTv2_
//generate *const i64:int64x2x2_t generate *const i64:int64x2x2_t
arm = vld2 arm = vld2
link-arm = vld2._EXTpi82_ link-arm = vld2._EXTpi82_
//generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
arm = nop
aarch64 = nop
generate *const i64:int64x1x2_t
/// Load multiple 2-element structures to two registers /// Load multiple 2-element structures to two registers
name = vld2 name = vld2
@@ -2141,17 +2144,21 @@ validate 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9,
load_fn load_fn
aarch64 = ld2 aarch64 = ld2
//generate *const u64:uint64x2x2_t generate *const u64:uint64x2x2_t
target = aes target = aes
//generate *const p64:poly64x2x2_t generate *const p64:poly64x2x2_t
target = default target = default
arm = vld2 arm = vld2
//generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
//generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
arm = nop
aarch64 = nop
generate *const u64:uint64x1x2_t
target = aes target = aes
//generate *const p64:poly64x1x2_t generate *const p64:poly64x1x2_t
/// Load multiple 2-element structures to two registers /// Load multiple 2-element structures to two registers
name = vld2 name = vld2
@@ -2161,13 +2168,15 @@ validate 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5., 6., 7., 8., 9.
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld2 aarch64 = nop
link-aarch64 = ld2._EXTv2_ link-aarch64 = ld2._EXTv2_
//generate *const f64:float64x1x2_t, *const f64:float64x2x2_t generate *const f64:float64x1x2_t
aarch64 = ld2
generate *const f64:float64x2x2_t
arm = vld2 arm = vld2
link-arm = vld2._EXTpi82_ link-arm = vld2._EXTpi82_
//generate *const f32:float32x2x2_t, *const f32:float32x4x2_t generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
/// Load single 2-element structure and replicate to all lanes of two registers /// Load single 2-element structure and replicate to all lanes of two registers
name = vld2 name = vld2
@@ -2175,15 +2184,18 @@ out-dup-nox
a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17 a = 0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn load_fn
arm-aarch64-separate
aarch64 = ld2r aarch64 = ld2r
link-aarch64 = ld2r._EXT2_ link-aarch64 = ld2r._EXT2_
//generate *const i64:int64x2x2_t generate *const i64:int64x2x2_t
arm = vld2dup arm = vld2
link-arm = vld2dup._EXTpi82_ link-arm = vld2dup._EXTpi82_
//generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t
//generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t
arm = nop
generate *const i64:int64x1x2_t
/// Load single 2-element structure and replicate to all lanes of two registers /// Load single 2-element structure and replicate to all lanes of two registers
name = vld2 name = vld2
@@ -2194,17 +2206,19 @@ validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
load_fn load_fn
aarch64 = ld2r aarch64 = ld2r
//generate *const u64:uint64x2x2_t generate *const u64:uint64x2x2_t
target = aes target = aes
//generate *const p64:poly64x2x2_t generate *const p64:poly64x2x2_t
target = default target = default
arm = vld2dup arm = vld2
//generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t
//generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t
//generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t generate *const p8:poly8x8x2_t, *const p16:poly16x4x2_t, *const p8:poly8x16x2_t, *const p16:poly16x8x2_t
arm = nop
generate *const u64:uint64x1x2_t
target = aes target = aes
//generate *const p64:poly64x1x2_t generate *const p64:poly64x1x2_t
/// Load single 2-element structure and replicate to all lanes of two registers /// Load single 2-element structure and replicate to all lanes of two registers
name = vld2 name = vld2
@@ -2212,14 +2226,15 @@ out-dup-nox
a = 0., 1., 1., 2., 3., 1., 4., 3., 5. a = 0., 1., 1., 2., 3., 1., 4., 3., 5.
validate 1., 1., 1., 1., 1., 1., 1., 1. validate 1., 1., 1., 1., 1., 1., 1., 1.
load_fn load_fn
arm-aarch64-separate
aarch64 = ld2r aarch64 = ld2r
link-aarch64 = ld2r._EXT2_ link-aarch64 = ld2r._EXT2_
//generate *const f64:float64x1x2_t, *const f64:float64x2x2_t generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
arm = vld2dup arm = vld2
link-arm = vld2dup._EXTpi82_ link-arm = vld2dup._EXTpi82_
//generate *const f32:float32x2x2_t, *const f32:float32x4x2_t generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
/// Load multiple 2-element structures to two registers /// Load multiple 2-element structures to two registers
name = vld2 name = vld2
@@ -2233,16 +2248,16 @@ validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 1
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld2lane aarch64 = ld2
const-aarch64 = LANE const-aarch64 = LANE
link-aarch64 = ld2lane._EXTpi82_ link-aarch64 = ld2lane._EXTpi82_
//generate *const i8:int8x16x2_t:int8x16x2_t, *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t generate *const i8:int8x16x2_t:int8x16x2_t, *const i64:int64x1x2_t:int64x1x2_t, *const i64:int64x2x2_t:int64x2x2_t
arm = vld2lane arm = vld2
const-arm = LANE const-arm = LANE
link-arm = vld2lane._EXTpi82_ link-arm = vld2lane._EXTpi82_
//generate *const i8:int8x8x2_t:int8x8x2_t, *const i16:int16x4x2_t:int16x4x2_t, *const i32:int32x2x2_t:int32x2x2_t generate *const i8:int8x8x2_t:int8x8x2_t, *const i16:int16x4x2_t:int16x4x2_t, *const i32:int32x2x2_t:int32x2x2_t
//generate *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t generate *const i16:int16x8x2_t:int16x8x2_t, *const i32:int32x4x2_t:int32x4x2_t
/// Load multiple 2-element structures to two registers /// Load multiple 2-element structures to two registers
name = vld2 name = vld2
@@ -2256,22 +2271,22 @@ n = 0
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
load_fn load_fn
aarch64 = ld2lane aarch64 = ld2
const-aarch64 = LANE const-aarch64 = LANE
target = aes target = aes
//generate *const p64:poly64x1x2_t:poly64x1x2_t, *const p64:poly64x2x2_t:poly64x2x2_t generate *const p64:poly64x1x2_t:poly64x1x2_t, *const p64:poly64x2x2_t:poly64x2x2_t
target = default target = default
//generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t generate *const u8:uint8x16x2_t:uint8x16x2_t, *const u64:uint64x1x2_t:uint64x1x2_t, *const u64:uint64x2x2_t:uint64x2x2_t
//generate *const p8:poly8x16x2_t:poly8x16x2_t generate *const p8:poly8x16x2_t:poly8x16x2_t
arm = vld2lane arm = vld2
const-arm = LANE const-arm = LANE
//generate *const u8:uint8x8x2_t:uint8x8x2_t, *const u16:uint16x4x2_t:uint16x4x2_t, *const u32:uint32x2x2_t:uint32x2x2_t generate *const u8:uint8x8x2_t:uint8x8x2_t, *const u16:uint16x4x2_t:uint16x4x2_t, *const u32:uint32x2x2_t:uint32x2x2_t
//generate *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t generate *const u16:uint16x8x2_t:uint16x8x2_t, *const u32:uint32x4x2_t:uint32x4x2_t
//generate *const p8:poly8x8x2_t:poly8x8x2_t, *const p16:poly16x4x2_t:poly16x4x2_t generate *const p8:poly8x8x2_t:poly8x8x2_t, *const p16:poly16x4x2_t:poly16x4x2_t
//generate *const p16:poly16x8x2_t:poly16x8x2_t generate *const p16:poly16x8x2_t:poly16x8x2_t
/// Load multiple 2-element structures to two registers /// Load multiple 2-element structures to two registers
name = vld2 name = vld2
@@ -2285,15 +2300,15 @@ validate 1., 2., 2., 14., 2., 16., 17., 18.
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld2lane aarch64 = ld2
const-aarch64 = LANE const-aarch64 = LANE
link-aarch64 = ld2lane._EXTpi82_ link-aarch64 = ld2lane._EXTpi82_
//generate *const f64:float64x1x2_t:float64x1x2_t, *const f64:float64x2x2_t:float64x2x2_t generate *const f64:float64x1x2_t:float64x1x2_t, *const f64:float64x2x2_t:float64x2x2_t
arm = vld2lane arm = vld2
const-arm = LANE const-arm = LANE
link-arm = vld2lane._EXTpi82_ link-arm = vld2lane._EXTpi82_
//generate *const f32:float32x2x2_t:float32x2x2_t, *const f32:float32x4x2_t:float32x4x2_t generate *const f32:float32x2x2_t:float32x2x2_t, *const f32:float32x4x2_t:float32x4x2_t
/// Load multiple 3-element structures to three registers /// Load multiple 3-element structures to three registers
name = vld3 name = vld3
@@ -2305,12 +2320,15 @@ arm-aarch64-separate
aarch64 = ld3 aarch64 = ld3
link-aarch64 = ld3._EXTv2_ link-aarch64 = ld3._EXTv2_
//generate *const i64:int64x2x3_t generate *const i64:int64x2x3_t
arm = vld3 arm = vld3
link-arm = vld3._EXTpi82_ link-arm = vld3._EXTpi82_
//generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
//generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
arm = nop
aarch64 = nop
generate *const i64:int64x1x3_t
/// Load multiple 3-element structures to three registers /// Load multiple 3-element structures to three registers
name = vld3 name = vld3
@@ -2321,17 +2339,20 @@ validate 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14,
load_fn load_fn
aarch64 = ld3 aarch64 = ld3
//generate *const u64:uint64x2x3_t generate *const u64:uint64x2x3_t
target = aes target = aes
//generate *const p64:poly64x2x3_t generate *const p64:poly64x2x3_t
target = default target = default
arm = vld3 arm = vld3
//generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
//generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
//generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
arm = nop
aarch64 = nop
generate *const u64:uint64x1x3_t
target = aes target = aes
//generate *const p64:poly64x1x3_t generate *const p64:poly64x1x3_t
/// Load multiple 3-element structures to three registers /// Load multiple 3-element structures to three registers
name = vld3 name = vld3
@@ -2341,13 +2362,15 @@ validate 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld3 aarch64 = nop
link-aarch64 = ld3._EXTv2_ link-aarch64 = ld3._EXTv2_
//generate *const f64:float64x1x3_t, *const f64:float64x2x3_t generate *const f64:float64x1x3_t
aarch64 = ld3
generate *const f64:float64x2x3_t
arm = vld3 arm = vld3
link-arm = vld3._EXTpi82_ link-arm = vld3._EXTpi82_
//generate *const f32:float32x2x3_t, *const f32:float32x4x3_t generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
/// Load single 3-element structure and replicate to all lanes of three registers /// Load single 3-element structure and replicate to all lanes of three registers
name = vld3 name = vld3
@@ -2355,15 +2378,18 @@ out-dup-nox
a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17 a = 0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn load_fn
arm-aarch64-separate
aarch64 = ld3r aarch64 = ld3r
link-aarch64 = ld3r._EXT2_ link-aarch64 = ld3r._EXT2_
//generate *const i64:int64x2x3_t generate *const i64:int64x2x3_t
arm = vld3dup arm = vld3
link-arm = vld3dup._EXTpi82_ link-arm = vld3dup._EXTpi82_
//generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t
//generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t
arm = nop
generate *const i64:int64x1x3_t
/// Load single 3-element structure and replicate to all lanes of three registers /// Load single 3-element structure and replicate to all lanes of three registers
name = vld3 name = vld3
@@ -2374,17 +2400,19 @@ validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
load_fn load_fn
aarch64 = ld3r aarch64 = ld3r
//generate *const u64:uint64x2x3_t generate *const u64:uint64x2x3_t
target = aes target = aes
//generate *const p64:poly64x2x3_t generate *const p64:poly64x2x3_t
target = default target = default
arm = vld3dup arm = vld3
//generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t
//generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t
//generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t generate *const p8:poly8x8x3_t, *const p16:poly16x4x3_t, *const p8:poly8x16x3_t, *const p16:poly16x8x3_t
arm = nop
generate *const u64:uint64x1x3_t
target = aes target = aes
//generate *const p64:poly64x1x3_t generate *const p64:poly64x1x3_t
/// Load single 3-element structure and replicate to all lanes of three registers /// Load single 3-element structure and replicate to all lanes of three registers
name = vld3 name = vld3
@@ -2392,14 +2420,15 @@ out-dup-nox
a = 0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5. a = 0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.
validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1. validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
load_fn load_fn
arm-aarch64-separate
aarch64 = ld3r aarch64 = ld3r
link-aarch64 = ld3r._EXT2_ link-aarch64 = ld3r._EXT2_
//generate *const f64:float64x1x3_t, *const f64:float64x2x3_t generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
arm = vld3dup arm = vld3
link-arm = vld3dup._EXTpi82_ link-arm = vld3dup._EXTpi82_
//generate *const f32:float32x2x3_t, *const f32:float32x4x3_t generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
/// Load multiple 3-element structures to two registers /// Load multiple 3-element structures to two registers
name = vld3 name = vld3
@@ -2413,16 +2442,16 @@ validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 1
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld3lane aarch64 = ld3
const-aarch64 = LANE const-aarch64 = LANE
link-aarch64 = ld3lane._EXTpi82_ link-aarch64 = ld3lane._EXTpi82_
//generate *const i8:int8x16x3_t:int8x16x3_t, *const i64:int64x1x3_t:int64x1x3_t, *const i64:int64x2x3_t:int64x2x3_t generate *const i8:int8x16x3_t:int8x16x3_t, *const i64:int64x1x3_t:int64x1x3_t, *const i64:int64x2x3_t:int64x2x3_t
arm = vld3lane arm = vld3
const-arm = LANE const-arm = LANE
link-arm = vld3lane._EXTpi82_ link-arm = vld3lane._EXTpi82_
//generate *const i8:int8x8x3_t:int8x8x3_t, *const i16:int16x4x3_t:int16x4x3_t, *const i32:int32x2x3_t:int32x2x3_t generate *const i8:int8x8x3_t:int8x8x3_t, *const i16:int16x4x3_t:int16x4x3_t, *const i32:int32x2x3_t:int32x2x3_t
//generate *const i16:int16x8x3_t:int16x8x3_t, *const i32:int32x4x3_t:int32x4x3_t generate *const i16:int16x8x3_t:int16x8x3_t, *const i32:int32x4x3_t:int32x4x3_t
/// Load multiple 3-element structures to three registers /// Load multiple 3-element structures to three registers
name = vld3 name = vld3
@@ -2436,19 +2465,19 @@ n = 0
validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 validate 1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8
load_fn load_fn
aarch64 = ld3lane aarch64 = ld3
const-aarch64 = LANE const-aarch64 = LANE
target = aes target = aes
//generate *const p64:poly64x1x3_t:poly64x1x3_t, *const p64:poly64x2x3_t:poly64x2x3_t generate *const p64:poly64x1x3_t:poly64x1x3_t, *const p64:poly64x2x3_t:poly64x2x3_t
target = default target = default
//generate *const p8:poly8x16x3_t:poly8x16x3_t, *const u8:uint8x16x3_t:uint8x16x3_t, *const u64:uint64x1x3_t:uint64x1x3_t, *const u64:uint64x2x3_t:uint64x2x3_t generate *const p8:poly8x16x3_t:poly8x16x3_t, *const u8:uint8x16x3_t:uint8x16x3_t, *const u64:uint64x1x3_t:uint64x1x3_t, *const u64:uint64x2x3_t:uint64x2x3_t
arm = vld3lane arm = vld3
const-arm = LANE const-arm = LANE
//generate *const u8:uint8x8x3_t:uint8x8x3_t, *const u16:uint16x4x3_t:uint16x4x3_t, *const u32:uint32x2x3_t:uint32x2x3_t generate *const u8:uint8x8x3_t:uint8x8x3_t, *const u16:uint16x4x3_t:uint16x4x3_t, *const u32:uint32x2x3_t:uint32x2x3_t
//generate *const u16:uint16x8x3_t:uint16x8x3_t, *const u32:uint32x4x3_t:uint32x4x3_t generate *const u16:uint16x8x3_t:uint16x8x3_t, *const u32:uint32x4x3_t:uint32x4x3_t
//generate *const p8:poly8x8x3_t:poly8x8x3_t, *const p16:poly16x4x3_t:poly16x4x3_t generate *const p8:poly8x8x3_t:poly8x8x3_t, *const p16:poly16x4x3_t:poly16x4x3_t
//generate *const p16:poly16x8x3_t:poly16x8x3_t generate *const p16:poly16x8x3_t:poly16x8x3_t
/// Load multiple 3-element structures to three registers /// Load multiple 3-element structures to three registers
name = vld3 name = vld3
@@ -2462,15 +2491,15 @@ validate 1., 2., 2., 14., 2., 16., 17., 18., 2., 6., 7., 8.
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld3lane aarch64 = ld3
const-aarch64 = LANE const-aarch64 = LANE
link-aarch64 = ld3lane._EXTpi82_ link-aarch64 = ld3lane._EXTpi82_
//generate *const f64:float64x1x3_t:float64x1x3_t, *const f64:float64x2x3_t:float64x2x3_t generate *const f64:float64x1x3_t:float64x1x3_t, *const f64:float64x2x3_t:float64x2x3_t
arm = vld3lane arm = vld3
const-arm = LANE const-arm = LANE
link-arm = vld3lane._EXTpi82_ link-arm = vld3lane._EXTpi82_
//generate *const f32:float32x2x3_t:float32x2x3_t, *const f32:float32x4x3_t:float32x4x3_t generate *const f32:float32x2x3_t:float32x2x3_t, *const f32:float32x4x3_t:float32x4x3_t
/// Load multiple 4-element structures to four registers /// Load multiple 4-element structures to four registers
name = vld4 name = vld4
@@ -2482,12 +2511,15 @@ arm-aarch64-separate
aarch64 = ld4 aarch64 = ld4
link-aarch64 = ld4._EXTv2_ link-aarch64 = ld4._EXTv2_
//generate *const i64:int64x2x4_t generate *const i64:int64x2x4_t
arm = vld4 arm = vld4
link-arm = vld4._EXTpi82_ link-arm = vld4._EXTpi82_
//generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
//generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
aarch64 = nop
arm = nop
generate *const i64:int64x1x4_t
/// Load multiple 4-element structures to four registers /// Load multiple 4-element structures to four registers
name = vld4 name = vld4
@@ -2498,17 +2530,20 @@ validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 1
load_fn load_fn
aarch64 = ld4 aarch64 = ld4
//generate *const u64:uint64x2x4_t generate *const u64:uint64x2x4_t
target = aes target = aes
//generate *const p64:poly64x2x4_t generate *const p64:poly64x2x4_t
target = default target = default
arm = vld4 arm = vld4
//generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
//generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
//generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
aarch64 = nop
arm = nop
generate *const u64:uint64x1x4_t
target = aes target = aes
//generate *const p64:poly64x1x4_t generate *const p64:poly64x1x4_t
/// Load multiple 4-element structures to four registers /// Load multiple 4-element structures to four registers
name = vld4 name = vld4
@@ -2518,13 +2553,15 @@ validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 15., 6., 8., 8., 16.
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld4 aarch64 = nop
link-aarch64 = ld4._EXTv2_ link-aarch64 = ld4._EXTv2_
//generate *const f64:float64x1x4_t, *const f64:float64x2x4_t generate *const f64:float64x1x4_t
aarch64 = ld4
generate *const f64:float64x2x4_t
arm = vld4 arm = vld4
link-arm = vld4._EXTpi82_ link-arm = vld4._EXTpi82_
//generate *const f32:float32x2x4_t, *const f32:float32x4x4_t generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
/// Load single 4-element structure and replicate to all lanes of four registers /// Load single 4-element structure and replicate to all lanes of four registers
name = vld4 name = vld4
@@ -2532,15 +2569,18 @@ out-dup-nox
a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9 a = 0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9
validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
load_fn load_fn
arm-aarch64-separate
aarch64 = ld4r aarch64 = ld4r
link-aarch64 = ld4r._EXT2_ link-aarch64 = ld4r._EXT2_
//generate *const i64:int64x2x4_t generate *const i64:int64x2x4_t
arm = vld4dup arm = vld4
link-arm = vld4dup._EXTpi82_ link-arm = vld4dup._EXTpi82_
//generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t
//generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t
arm = nop
generate *const i64:int64x1x4_t
/// Load single 4-element structure and replicate to all lanes of four registers /// Load single 4-element structure and replicate to all lanes of four registers
name = vld4 name = vld4
@@ -2551,17 +2591,19 @@ validate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
load_fn load_fn
aarch64 = ld4r aarch64 = ld4r
//generate *const u64:uint64x2x4_t generate *const u64:uint64x2x4_t
target = aes target = aes
//generate *const p64:poly64x2x4_t generate *const p64:poly64x2x4_t
target = default target = default
arm = vld4dup arm = vld4
//generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t
//generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t
//generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t generate *const p8:poly8x8x4_t, *const p16:poly16x4x4_t, *const p8:poly8x16x4_t, *const p16:poly16x8x4_t
arm = nop
generate *const u64:uint64x1x4_t
target = aes target = aes
//generate *const p64:poly64x1x4_t generate *const p64:poly64x1x4_t
/// Load single 4-element structure and replicate to all lanes of four registers /// Load single 4-element structure and replicate to all lanes of four registers
name = vld4 name = vld4
@@ -2569,14 +2611,15 @@ out-dup-nox
a = 0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5., 9., 4., 3., 5. a = 0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5., 9., 4., 3., 5.
validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1. validate 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.
load_fn load_fn
arm-aarch64-separate
aarch64 = ld4r aarch64 = ld4r
link-aarch64 = ld4r._EXT2_ link-aarch64 = ld4r._EXT2_
//generate *const f64:float64x1x4_t, *const f64:float64x2x4_t generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
arm = vld4dup arm = vld4
link-arm = vld4dup._EXTpi82_ link-arm = vld4dup._EXTpi82_
//generate *const f32:float32x2x4_t, *const f32:float32x4x4_t generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
/// Load multiple 4-element structures to four registers /// Load multiple 4-element structures to four registers
name = vld4 name = vld4
@@ -2590,16 +2633,16 @@ validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14,
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld4lane aarch64 = ld4
const-aarch64 = LANE const-aarch64 = LANE
link-aarch64 = ld4lane._EXTpi82_ link-aarch64 = ld4lane._EXTpi82_
//generate *const i8:int8x16x4_t:int8x16x4_t, *const i64:int64x1x4_t:int64x1x4_t, *const i64:int64x2x4_t:int64x2x4_t generate *const i8:int8x16x4_t:int8x16x4_t, *const i64:int64x1x4_t:int64x1x4_t, *const i64:int64x2x4_t:int64x2x4_t
arm = vld4lane arm = vld4
const-arm = LANE const-arm = LANE
link-arm = vld4lane._EXTpi82_ link-arm = vld4lane._EXTpi82_
//generate *const i8:int8x8x4_t:int8x8x4_t, *const i16:int16x4x4_t:int16x4x4_t, *const i32:int32x2x4_t:int32x2x4_t generate *const i8:int8x8x4_t:int8x8x4_t, *const i16:int16x4x4_t:int16x4x4_t, *const i32:int32x2x4_t:int32x2x4_t
//generate *const i16:int16x8x4_t:int16x8x4_t, *const i32:int32x4x4_t:int32x4x4_t generate *const i16:int16x8x4_t:int16x8x4_t, *const i32:int32x4x4_t:int32x4x4_t
/// Load multiple 4-element structures to four registers /// Load multiple 4-element structures to four registers
name = vld4 name = vld4
@@ -2613,19 +2656,19 @@ n = 0
validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16 validate 1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26, 2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16
load_fn load_fn
aarch64 = ld4lane aarch64 = ld4
const-aarch64 = LANE const-aarch64 = LANE
target = aes target = aes
//generate *const p64:poly64x1x4_t:poly64x1x4_t, *const p64:poly64x2x4_t:poly64x2x4_t generate *const p64:poly64x1x4_t:poly64x1x4_t, *const p64:poly64x2x4_t:poly64x2x4_t
target = default target = default
//generate *const p8:poly8x16x4_t:poly8x16x4_t, *const u8:uint8x16x4_t:uint8x16x4_t, *const u64:uint64x1x4_t:uint64x1x4_t, *const u64:uint64x2x4_t:uint64x2x4_t generate *const p8:poly8x16x4_t:poly8x16x4_t, *const u8:uint8x16x4_t:uint8x16x4_t, *const u64:uint64x1x4_t:uint64x1x4_t, *const u64:uint64x2x4_t:uint64x2x4_t
arm = vld4lane arm = vld4
const-arm = LANE const-arm = LANE
//generate *const u8:uint8x8x4_t:uint8x8x4_t, *const u16:uint16x4x4_t:uint16x4x4_t, *const u32:uint32x2x4_t:uint32x2x4_t generate *const u8:uint8x8x4_t:uint8x8x4_t, *const u16:uint16x4x4_t:uint16x4x4_t, *const u32:uint32x2x4_t:uint32x2x4_t
//generate *const u16:uint16x8x4_t:uint16x8x4_t, *const u32:uint32x4x4_t:uint32x4x4_t generate *const u16:uint16x8x4_t:uint16x8x4_t, *const u32:uint32x4x4_t:uint32x4x4_t
//generate *const p8:poly8x8x4_t:poly8x8x4_t, *const p16:poly16x4x4_t:poly16x4x4_t generate *const p8:poly8x8x4_t:poly8x8x4_t, *const p16:poly16x4x4_t:poly16x4x4_t
//generate *const p16:poly16x8x4_t:poly16x8x4_t generate *const p16:poly16x8x4_t:poly16x8x4_t
/// Load multiple 4-element structures to four registers /// Load multiple 4-element structures to four registers
name = vld4 name = vld4
@@ -2639,15 +2682,15 @@ validate 1., 2., 2., 2., 2., 16., 2., 18., 2., 6., 7., 8., 2., 4., 3., 5.
load_fn load_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = ld4lane aarch64 = ld4
const-aarch64 = LANE const-aarch64 = LANE
link-aarch64 = ld4lane._EXTpi82_ link-aarch64 = ld4lane._EXTpi82_
//generate *const f64:float64x1x4_t:float64x1x4_t, *const f64:float64x2x4_t:float64x2x4_t generate *const f64:float64x1x4_t:float64x1x4_t, *const f64:float64x2x4_t:float64x2x4_t
arm = vld4lane arm = vld4
const-arm = LANE const-arm = LANE
link-arm = vld4lane._EXTpi82_ link-arm = vld4lane._EXTpi82_
//generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float32x4x4_t generate *const f32:float32x2x4_t:float32x2x4_t, *const f32:float32x4x4_t:float32x4x4_t
/// Store multiple single-element structures from one, two, three, or four registers /// Store multiple single-element structures from one, two, three, or four registers
name = vst1 name = vst1
@@ -2662,13 +2705,13 @@ store_fn
aarch64 = nop aarch64 = nop
arm = nop arm = nop
//generate *mut i8:int8x8_t:void, *mut i16:int16x4_t:void, *mut i32:int32x2_t:void, *mut i64:int64x1_t:void generate *mut i8:int8x8_t:void, *mut i16:int16x4_t:void, *mut i32:int32x2_t:void, *mut i64:int64x1_t:void
//generate *mut i8:int8x16_t:void, *mut i16:int16x8_t:void, *mut i32:int32x4_t:void, *mut i64:int64x2_t:void generate *mut i8:int8x16_t:void, *mut i16:int16x8_t:void, *mut i32:int32x4_t:void, *mut i64:int64x2_t:void
//generate *mut u8:uint8x8_t:void, *mut u16:uint16x4_t:void, *mut u32:uint32x2_t:void, *mut u64:uint64x1_t:void generate *mut u8:uint8x8_t:void, *mut u16:uint16x4_t:void, *mut u32:uint32x2_t:void, *mut u64:uint64x1_t:void
//generate *mut u8:uint8x16_t:void, *mut u16:uint16x8_t:void, *mut u32:uint32x4_t:void, *mut u64:uint64x2_t:void generate *mut u8:uint8x16_t:void, *mut u16:uint16x8_t:void, *mut u32:uint32x4_t:void, *mut u64:uint64x2_t:void
//generate *mut p8:poly8x8_t:void, *mut p16:poly16x4_t:void, *mut p8:poly8x16_t:void, *mut p16:poly16x8_t:void generate *mut p8:poly8x8_t:void, *mut p16:poly16x4_t:void, *mut p8:poly8x16_t:void, *mut p16:poly16x8_t:void
target = aes target = aes
//generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void generate *mut p64:poly64x1_t:void, *mut p64:poly64x2_t:void
/// Store multiple single-element structures from one, two, three, or four registers /// Store multiple single-element structures from one, two, three, or four registers
name = vst1 name = vst1
@@ -2682,10 +2725,10 @@ validate 1., 0., 0., 0., 0., 0., 0., 0.
store_fn store_fn
aarch64 = nop aarch64 = nop
//generate *mut f64:float64x1_t:void, *mut f64:float64x2_t:void generate *mut f64:float64x1_t:void, *mut f64:float64x2_t:void
arm = nop arm = nop
//generate *mut f32:float32x2_t:void, *mut f32:float32x4_t:void generate *mut f32:float32x2_t:void, *mut f32:float32x4_t:void
/// Store multiple single-element structures from one, two, three, or four registers /// Store multiple single-element structures from one, two, three, or four registers
name = vst1 name = vst1
@@ -2776,12 +2819,15 @@ arm-aarch64-separate
aarch64 = st2 aarch64 = st2
link-aarch64 = st2._EXTpi8_ link-aarch64 = st2._EXTpi8_
//generate *mut i64:int64x2x2_t:void generate *mut i64:int64x2x2_t:void
arm = vst2 arm = vst2
link-arm = vst2._EXTpi8r_ link-arm = vst2._EXTpi8r_
//generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void, *mut i64:int64x1x2_t:void generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
//generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
arm = nop
aarch64 = nop
generate *mut i64:int64x1x2_t:void
/// Store multiple 2-element structures from two registers /// Store multiple 2-element structures from two registers
name = vst2 name = vst2
@@ -2792,17 +2838,20 @@ validate 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5,
store_fn store_fn
aarch64 = st2 aarch64 = st2
//generate *mut u64:uint64x2x2_t:void generate *mut u64:uint64x2x2_t:void
target = aes target = aes
//generate *mut p64:poly64x2x2_t:void generate *mut p64:poly64x2x2_t:void
target = default target = default
arm = vst2 arm = vst2
//generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void, *mut u64:uint64x1x2_t:void generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
//generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
//generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p8:poly8x16x2_t:void, *mut p16:poly16x8x2_t:void generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p8:poly8x16x2_t:void, *mut p16:poly16x8x2_t:void
arm = nop
aarch64 = nop
generate *mut u64:uint64x1x2_t:void
target = aes target = aes
//generate *mut p64:poly64x1x2_t:void generate *mut p64:poly64x1x2_t:void
/// Store multiple 2-element structures from two registers /// Store multiple 2-element structures from two registers
name = vst2 name = vst2
@@ -2812,13 +2861,15 @@ validate 1., 2., 2., 3., 2., 4., 3., 5., 2., 6., 3., 7., 4., 8., 5., 9.
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st2 aarch64 = st1
link-aarch64 = st2._EXTpi8_ link-aarch64 = st2._EXTpi8_
//generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void generate *mut f64:float64x1x2_t:void
aarch64 = st2
generate *mut f64:float64x2x2_t:void
arm = vst2 arm = vst2
link-arm = vst2._EXTpi8r_ link-arm = vst2._EXTpi8r_
//generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
/// Store multiple 2-element structures from two registers /// Store multiple 2-element structures from two registers
name = vst2 name = vst2
@@ -2831,16 +2882,16 @@ validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st2lane aarch64 = st2
link-aarch64 = st2lane._EXTpi8_ link-aarch64 = st2lane._EXTpi8_
const-aarch64 = LANE const-aarch64 = LANE
//generate *mut i8:int8x16x2_t:void, *mut i64:int64x1x2_t:void, *mut i64:int64x2x2_t:void generate *mut i8:int8x16x2_t:void, *mut i64:int64x1x2_t:void, *mut i64:int64x2x2_t:void
arm = vst2lane arm = vst2
link-arm = vst2lane._EXTpi8r_ link-arm = vst2lane._EXTpi8r_
const-arm = LANE const-arm = LANE
//generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void
//generate *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void generate *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void
/// Store multiple 2-element structures from two registers /// Store multiple 2-element structures from two registers
name = vst2 name = vst2
@@ -2853,16 +2904,16 @@ n = 0
validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn store_fn
aarch64 = st2lane aarch64 = st2
//generate *mut u8:uint8x16x2_t:void, *mut u64:uint64x1x2_t:void, *mut u64:uint64x2x2_t:void, *mut p8:poly8x16x2_t:void generate *mut u8:uint8x16x2_t:void, *mut u64:uint64x1x2_t:void, *mut u64:uint64x2x2_t:void, *mut p8:poly8x16x2_t:void
target = aes target = aes
//generate *mut p64:poly64x1x2_t:void, *mut p64:poly64x2x2_t:void generate *mut p64:poly64x1x2_t:void, *mut p64:poly64x2x2_t:void
target = default target = default
arm = vst2lane arm = vst2
//generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void
//generate *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void generate *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void
//generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p16:poly16x8x2_t:void generate *mut p8:poly8x8x2_t:void, *mut p16:poly16x4x2_t:void, *mut p16:poly16x8x2_t:void
/// Store multiple 2-element structures from two registers /// Store multiple 2-element structures from two registers
name = vst2 name = vst2
@@ -2875,15 +2926,15 @@ validate 1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st2lane aarch64 = st2
link-aarch64 = st2lane._EXTpi8_ link-aarch64 = st2lane._EXTpi8_
const-aarch64 = LANE const-aarch64 = LANE
//generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void
arm = vst2lane arm = vst2
link-arm = vst2lane._EXTpi8r_ link-arm = vst2lane._EXTpi8r_
const-arm = LANE const-arm = LANE
//generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void
/// Store multiple 3-element structures from three registers /// Store multiple 3-element structures from three registers
name = vst3 name = vst3
@@ -2895,12 +2946,15 @@ arm-aarch64-separate
aarch64 = st3 aarch64 = st3
link-aarch64 = st3._EXTpi8_ link-aarch64 = st3._EXTpi8_
//generate *mut i64:int64x2x3_t:void generate *mut i64:int64x2x3_t:void
arm = vst3 arm = vst3
link-arm = vst3._EXTpi8r_ link-arm = vst3._EXTpi8r_
//generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void, *mut i64:int64x1x3_t:void generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
//generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
arm = nop
aarch64 = nop
generate *mut i64:int64x1x3_t:void
/// Store multiple 3-element structures from three registers /// Store multiple 3-element structures from three registers
name = vst3 name = vst3
@@ -2911,17 +2965,20 @@ validate 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8,
store_fn store_fn
aarch64 = st3 aarch64 = st3
//generate *mut u64:uint64x2x3_t:void generate *mut u64:uint64x2x3_t:void
target = aes target = aes
//generate *mut p64:poly64x2x3_t:void generate *mut p64:poly64x2x3_t:void
target = default target = default
arm = vst3 arm = vst3
//generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void, *mut u64:uint64x1x3_t:void generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
//generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
//generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p8:poly8x16x3_t:void, *mut p16:poly16x8x3_t:void generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p8:poly8x16x3_t:void, *mut p16:poly16x8x3_t:void
arm = nop
aarch64 = nop
generate *mut u64:uint64x1x3_t:void
target = aes target = aes
//generate *mut p64:poly64x1x3_t:void generate *mut p64:poly64x1x3_t:void
/// Store multiple 3-element structures from three registers /// Store multiple 3-element structures from three registers
name = vst3 name = vst3
@@ -2931,13 +2988,15 @@ validate 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8., 2., 13., 13., 4.
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st3 aarch64 = nop
link-aarch64 = st3._EXTpi8_ link-aarch64 = st3._EXTpi8_
//generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void generate *mut f64:float64x1x3_t:void
aarch64 = st3
generate *mut f64:float64x2x3_t:void
arm = vst3 arm = vst3
link-arm = vst3._EXTpi8r_ link-arm = vst3._EXTpi8r_
//generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
/// Store multiple 3-element structures from three registers /// Store multiple 3-element structures from three registers
name = vst3 name = vst3
@@ -2950,16 +3009,16 @@ validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st3lane aarch64 = st3
link-aarch64 = st3lane._EXTpi8_ link-aarch64 = st3lane._EXTpi8_
const-aarch64 = LANE const-aarch64 = LANE
//generate *mut i8:int8x16x3_t:void, *mut i64:int64x1x3_t:void, *mut i64:int64x2x3_t:void generate *mut i8:int8x16x3_t:void, *mut i64:int64x1x3_t:void, *mut i64:int64x2x3_t:void
arm = vst3lane arm = vst3
link-arm = vst3lane._EXTpi8r_ link-arm = vst3lane._EXTpi8r_
const-arm = LANE const-arm = LANE
//generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void
//generate *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void generate *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void
/// Store multiple 3-element structures from three registers /// Store multiple 3-element structures from three registers
name = vst3 name = vst3
@@ -2972,16 +3031,16 @@ n = 0
validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn store_fn
aarch64 = st3lane aarch64 = st3
//generate *mut u8:uint8x16x3_t:void, *mut u64:uint64x1x3_t:void, *mut u64:uint64x2x3_t:void, *mut p8:poly8x16x3_t:void generate *mut u8:uint8x16x3_t:void, *mut u64:uint64x1x3_t:void, *mut u64:uint64x2x3_t:void, *mut p8:poly8x16x3_t:void
target = aes target = aes
//generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x2x3_t:void generate *mut p64:poly64x1x3_t:void, *mut p64:poly64x2x3_t:void
target = default target = default
arm = vst3lane arm = vst3
//generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void
//generate *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void generate *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void
//generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x8x3_t:void generate *mut p8:poly8x8x3_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x8x3_t:void
/// Store multiple 3-element structures from three registers /// Store multiple 3-element structures from three registers
name = vst3 name = vst3
@@ -2994,15 +3053,15 @@ validate 1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st3lane aarch64 = st3
link-aarch64 = st3lane._EXTpi8_ link-aarch64 = st3lane._EXTpi8_
const-aarch64 = LANE const-aarch64 = LANE
//generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void
arm = vst3lane arm = vst3
link-arm = vst3lane._EXTpi8r_ link-arm = vst3lane._EXTpi8r_
const-arm = LANE const-arm = LANE
//generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void
/// Store multiple 4-element structures from four registers /// Store multiple 4-element structures from four registers
name = vst4 name = vst4
@@ -3014,12 +3073,15 @@ arm-aarch64-separate
aarch64 = st4 aarch64 = st4
link-aarch64 = st4._EXTpi8_ link-aarch64 = st4._EXTpi8_
//generate *mut i64:int64x2x4_t:void generate *mut i64:int64x2x4_t:void
arm = vst4 arm = vst4
link-arm = vst4._EXTpi8r_ link-arm = vst4._EXTpi8r_
//generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void, *mut i64:int64x1x4_t:void generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
//generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
arm = nop
aarch64 = nop
generate *mut i64:int64x1x4_t:void
/// Store multiple 4-element structures from four registers /// Store multiple 4-element structures from four registers
name = vst4 name = vst4
@@ -3030,17 +3092,20 @@ validate 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 1
store_fn store_fn
aarch64 = st4 aarch64 = st4
//generate *mut u64:uint64x2x4_t:void generate *mut u64:uint64x2x4_t:void
target = aes target = aes
//generate *mut p64:poly64x2x4_t:void generate *mut p64:poly64x2x4_t:void
target = default target = default
arm = vst4 arm = vst4
//generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void, *mut u64:uint64x1x4_t:void generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
//generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
//generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p8:poly8x16x4_t:void, *mut p16:poly16x8x4_t:void generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p8:poly8x16x4_t:void, *mut p16:poly16x8x4_t:void
arm = nop
aarch64 = nop
generate *mut u64:uint64x1x4_t:void
target = aes target = aes
//generate *mut p64:poly64x1x4_t:void generate *mut p64:poly64x1x4_t:void
/// Store multiple 4-element structures from four registers /// Store multiple 4-element structures from four registers
name = vst4 name = vst4
@@ -3050,13 +3115,15 @@ validate 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st4 aarch64 = nop
link-aarch64 = st4._EXTpi8_ link-aarch64 = st4._EXTpi8_
//generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void generate *mut f64:float64x1x4_t:void
aarch64 = st4
generate *mut f64:float64x2x4_t:void
arm = vst4 arm = vst4
link-arm = vst4._EXTpi8r_ link-arm = vst4._EXTpi8r_
//generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
/// Store multiple 4-element structures from four registers /// Store multiple 4-element structures from four registers
name = vst4 name = vst4
@@ -3069,16 +3136,16 @@ validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st4lane aarch64 = st4
link-aarch64 = st4lane._EXTpi8_ link-aarch64 = st4lane._EXTpi8_
const-aarch64 = LANE const-aarch64 = LANE
//generate *mut i8:int8x16x4_t:void, *mut i64:int64x1x4_t:void, *mut i64:int64x2x4_t:void generate *mut i8:int8x16x4_t:void, *mut i64:int64x1x4_t:void, *mut i64:int64x2x4_t:void
arm = vst4lane arm = vst4
link-arm = vst4lane._EXTpi8r_ link-arm = vst4lane._EXTpi8r_
const-arm = LANE const-arm = LANE
//generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void
//generate *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void generate *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void
/// Store multiple 4-element structures from four registers /// Store multiple 4-element structures from four registers
name = vst4 name = vst4
@@ -3091,16 +3158,16 @@ n = 0
validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate 1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
store_fn store_fn
aarch64 = st4lane aarch64 = st4
//generate *mut u8:uint8x16x4_t:void, *mut u64:uint64x1x4_t:void, *mut u64:uint64x2x4_t:void, *mut p8:poly8x16x4_t:void generate *mut u8:uint8x16x4_t:void, *mut u64:uint64x1x4_t:void, *mut u64:uint64x2x4_t:void, *mut p8:poly8x16x4_t:void
target = aes target = aes
//generate *mut p64:poly64x1x4_t:void, *mut p64:poly64x2x4_t:void generate *mut p64:poly64x1x4_t:void, *mut p64:poly64x2x4_t:void
target = default target = default
arm = vst4lane arm = vst4
//generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void
//generate *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void generate *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void
//generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p16:poly16x8x4_t:void generate *mut p8:poly8x8x4_t:void, *mut p16:poly16x4x4_t:void, *mut p16:poly16x8x4_t:void
/// Store multiple 4-element structures from four registers /// Store multiple 4-element structures from four registers
name = vst4 name = vst4
@@ -3113,15 +3180,15 @@ validate 1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.
store_fn store_fn
arm-aarch64-separate arm-aarch64-separate
aarch64 = st4lane aarch64 = st4
link-aarch64 = st4lane._EXTpi8_ link-aarch64 = st4lane._EXTpi8_
const-aarch64 = LANE const-aarch64 = LANE
//generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void
arm = vst4lane arm = vst4
link-arm = vst4lane._EXTpi8r_ link-arm = vst4lane._EXTpi8r_
const-arm = LANE const-arm = LANE
//generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void
/// Multiply /// Multiply
name = vmul name = vmul

View File

@@ -918,10 +918,9 @@ fn ext(s: &str, in_t: &[&str; 3], out_t: &str) -> String {
fn is_vldx(name: &str) -> bool { fn is_vldx(name: &str) -> bool {
let s: Vec<_> = name.split('_').collect(); let s: Vec<_> = name.split('_').collect();
s.len() == 2 &name[0..3] == "vld"
&& &name[0..3] == "vld"
&& name[3..4].parse::<i32>().unwrap() > 1 && name[3..4].parse::<i32>().unwrap() > 1
&& (s[1].starts_with("s") || s[1].starts_with("f")) && (s.last().unwrap().starts_with("s") || s.last().unwrap().starts_with("f"))
} }
fn is_vstx(name: &str) -> bool { fn is_vstx(name: &str) -> bool {
@@ -1114,8 +1113,13 @@ fn gen_aarch64(
}; };
(format!("{}, ptr: *mut {}", subs, ptr_type), String::new()) (format!("{}, ptr: *mut {}", subs, ptr_type), String::new())
} else if is_vldx(&name) { } else if is_vldx(&name) {
let ptr_type = if name.contains("dup") {
type_to_native_type(out_t)
} else {
type_to_sub_type(out_t)
};
( (
format!("ptr: *const {}", type_to_sub_type(out_t)), format!("ptr: *const {}", ptr_type),
format!(" -> {}", out_t), format!(" -> {}", out_t),
) )
} else { } else {
@@ -1828,9 +1832,14 @@ fn gen_arm(
), ),
_ => panic!("unknown type: {}", in_t[1]), _ => panic!("unknown type: {}", in_t[1]),
}; };
let out = if out_t == "void" {
String::new()
} else {
format!(" -> {}", out_t)
};
( (
format!("ptr: {}, {}, n: i32, size: i32", ptr_type, inputs), format!("ptr: {}, {}, n: i32, size: i32", ptr_type, inputs),
String::new(), out,
) )
} else { } else {
let (_, const_type) = if const_arm.contains(":") { let (_, const_type) = if const_arm.contains(":") {
@@ -1978,8 +1987,13 @@ fn gen_arm(
inputs.push_str(&format!(", ptr: *mut {}", ptr_type)); inputs.push_str(&format!(", ptr: *mut {}", ptr_type));
(inputs, String::new()) (inputs, String::new())
} else if is_vldx(&name) { } else if is_vldx(&name) {
let ptr_type = if name.contains("dup") {
type_to_native_type(out_t)
} else {
type_to_sub_type(out_t)
};
( (
format!("ptr: *const {}", type_to_sub_type(out_t)), format!("ptr: *const {}", ptr_type),
format!(" -> {}", out_t), format!(" -> {}", out_t),
) )
} else { } else {

View File

@@ -130,8 +130,17 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
"usad8" | "vfma" | "vfms" => 27, "usad8" | "vfma" | "vfms" => 27,
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
// core_arch/src/arm_shared/simd32 // core_arch/src/arm_shared/simd32
// vst1q_s64_x4_vst1 : #instructions = 22 >= 22 (limit)
"vld3" => 23,
// core_arch/src/arm_shared/simd32
// vld4q_lane_u32_vld4 : #instructions = 31 >= 22 (limit)
"vld4" => 32,
// core_arch/src/arm_shared/simd32
// vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit) // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit)
"vst1" => 41, "vst1" => 41,
// core_arch/src/arm_shared/simd32
// vst4q_u32_vst4 : #instructions = 26 >= 22 (limit)
"vst4" => 27,
// Temporary, currently the fptosi.sat and fptoui.sat LLVM // Temporary, currently the fptosi.sat and fptoui.sat LLVM
// intrinsics emit unnecessary code on arm. This can be // intrinsics emit unnecessary code on arm. This can be