Work around CI failures for the ARM target

These seem to have been introduced by recent LLVM changes.

* The instruction limit for vld*/vst* has been raised. This is not a
significant issue, it is only used for testing.
* vld*/vst* instructions are generated with overly strict alignments:
https://github.com/rust-lang/stdarch/issues/1217
* vtbl/vtbx instrinsics are failing intrinsic-test for unknown reasons.
This commit is contained in:
Amanieu d'Antras
2023-11-29 20:12:12 +00:00
parent 9b4a79c5d4
commit 4fe088329c
3 changed files with 28 additions and 22 deletions

View File

@@ -85,8 +85,9 @@ cargo_test() {
cmd="$cmd --skip test_vec_lde_u16 --skip test_vec_lde_u32 --skip test_vec_expte"
;;
# Miscompilation: https://github.com/rust-lang/rust/issues/112460
# Also LLVM bug: https://github.com/rust-lang/stdarch/issues/1217
arm*)
cmd="$cmd --skip vld2q_dup_f32"
cmd="$cmd --skip vld"
;;
esac

View File

@@ -213,3 +213,23 @@ vrndxq_f32
#vrshrn_n_u64
#vshrq_n_u64
#vshr_n_u64
# Seems to be miscompiled.
vtbl2_p8
vtbl2_s8
vtbl2_u8
vtbl3_p8
vtbl3_s8
vtbl3_u8
vtbl4_p8
vtbl4_s8
vtbl4_u8
vtbx2_p8
vtbx2_s8
vtbx2_u8
vtbx3_p8
vtbx3_s8
vtbx3_u8
vtbx4_p8
vtbx4_s8
vtbx4_u8

View File

@@ -124,29 +124,14 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
// Intrinsics using `cvtpi2ps` are typically "composites" and
// in some cases exceed the limit.
"cvtpi2ps" => 25,
// core_arch/src/arm_shared/simd32
// vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit)
"usad8" | "vfma" | "vfms" => 27,
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
"vfma" | "vfms" => 27,
// core_arch/src/arm_shared/simd32
// vst1q_s64_x4_vst1 : #instructions = 27 >= 22 (limit)
"vld3" => 28,
// core_arch/src/arm_shared/simd32
// vld4q_lane_u32_vld4 : #instructions = 36 >= 22 (limit)
"vld4" => 37,
// core_arch/src/arm_shared/simd32
// vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit)
"vst1" => 41,
// core_arch/src/arm_shared/simd32
// vst3q_u32_vst3 : #instructions = 25 >= 22 (limit)
"vst3" => 26,
// core_arch/src/arm_shared/simd32
// vst4q_u32_vst4 : #instructions = 33 >= 22 (limit)
"vst4" => 34,
// core_arch/src/arm_shared/simd32
// vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit)
"nop" if fnname.contains("vst1q_p64") => 34,
"usad8" | "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8"
| "ssub8" => 29,
// core_arch/src/arm_shared/neon
_ if fnname.contains("_vld") => 50,
_ if fnname.contains("_vst") => 50,
// Original limit was 20 instructions, but ARM DSP Intrinsics
// are exactly 20 instructions long. So, bump the limit to 22