Rollup merge of #146949 - pmur:murp/improve-ppc-inline-asm, r=Amanieu

Add vsx register support for ppc inline asm, and implement preserves_flag option

This should address the last(?) missing pieces of inline asm for ppc:

* Explicit VSX register support. ISA 2.06 (POWER7) added a 64x128b register overlay extending the fpr's to 128b, and unifies them with the vmx (altivec) registers. Implementations details within gcc/llvm percolate up, and require using the `x` template modifier. I have updated the inline asm to implicitly include this for vsx arguments which do not specify it. ~~Support for the gcc codegen backend is still a todo.~~

* Implement the `preserves_flags` option. All ABI's, and all ISAs store their flags in `cr`, and the carry bit lives inside `xer`. The other status registers hold sticky bits or control bits which do not affect branch instructions.

There is some interest in the e500 (powerpcspe) port. Architecturally, it has a very different FP ISA, and includes a simd extension called SPR (which is not IBM's cell SPE). Notably, it does not have altivec/fpr/vsx registers. It also has an SPE accumulator register which its ABI marks as volatile, but I am not sure if the compiler uses it.
This commit is contained in:
Matthias Krüger
2025-10-15 07:09:54 +02:00
committed by GitHub
13 changed files with 3066 additions and 186 deletions

View File

@@ -658,6 +658,7 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) ->
PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => "b",
PowerPC(PowerPCInlineAsmRegClass::freg) => "f",
PowerPC(PowerPCInlineAsmRegClass::vreg) => "v",
PowerPC(PowerPCInlineAsmRegClass::vsreg) => "^wa",
PowerPC(
PowerPCInlineAsmRegClass::cr
| PowerPCInlineAsmRegClass::ctr
@@ -748,6 +749,12 @@ fn modifier_to_llvm(
LoongArch(_) => None,
Mips(_) => None,
Nvptx(_) => None,
PowerPC(PowerPCInlineAsmRegClass::vsreg) => {
// The documentation for the 'x' modifier is missing for llvm, and the gcc
// documentation is simply "use this for any vsx argument". It is needed
// to ensure the correct vsx register number is used.
if modifier.is_none() { Some('x') } else { modifier }
}
PowerPC(_) => None,
RiscV(RiscVInlineAsmRegClass::reg) | RiscV(RiscVInlineAsmRegClass::freg) => None,
RiscV(RiscVInlineAsmRegClass::vreg) => unreachable!("clobber-only"),
@@ -831,6 +838,7 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &'
PowerPC(PowerPCInlineAsmRegClass::reg_nonzero) => cx.type_i32(),
PowerPC(PowerPCInlineAsmRegClass::freg) => cx.type_f64(),
PowerPC(PowerPCInlineAsmRegClass::vreg) => cx.type_vector(cx.type_i32(), 4),
PowerPC(PowerPCInlineAsmRegClass::vsreg) => cx.type_vector(cx.type_i32(), 4),
PowerPC(
PowerPCInlineAsmRegClass::cr
| PowerPCInlineAsmRegClass::ctr
@@ -1061,9 +1069,10 @@ fn llvm_fixup_input<'ll, 'tcx>(
let value = bx.or(value, bx.const_u32(0xFFFF_0000));
bx.bitcast(value, bx.type_f32())
}
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
if s.primitive() == Primitive::Float(Float::F32) =>
{
(
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
BackendRepr::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F32) => {
let value = bx.insert_element(
bx.const_undef(bx.type_vector(bx.type_f32(), 4)),
value,
@@ -1071,9 +1080,10 @@ fn llvm_fixup_input<'ll, 'tcx>(
);
bx.bitcast(value, bx.type_vector(bx.type_f32(), 4))
}
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
if s.primitive() == Primitive::Float(Float::F64) =>
{
(
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
BackendRepr::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F64) => {
let value = bx.insert_element(
bx.const_undef(bx.type_vector(bx.type_f64(), 2)),
value,
@@ -1224,15 +1234,17 @@ fn llvm_fixup_output<'ll, 'tcx>(
let value = bx.trunc(value, bx.type_i16());
bx.bitcast(value, bx.type_f16())
}
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
if s.primitive() == Primitive::Float(Float::F32) =>
{
(
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
BackendRepr::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F32) => {
let value = bx.bitcast(value, bx.type_vector(bx.type_f32(), 4));
bx.extract_element(value, bx.const_usize(0))
}
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
if s.primitive() == Primitive::Float(Float::F64) =>
{
(
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
BackendRepr::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F64) => {
let value = bx.bitcast(value, bx.type_vector(bx.type_f64(), 2));
bx.extract_element(value, bx.const_usize(0))
}
@@ -1366,16 +1378,14 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
{
cx.type_f32()
}
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
if s.primitive() == Primitive::Float(Float::F32) =>
{
cx.type_vector(cx.type_f32(), 4)
}
(PowerPC(PowerPCInlineAsmRegClass::vreg), BackendRepr::Scalar(s))
if s.primitive() == Primitive::Float(Float::F64) =>
{
cx.type_vector(cx.type_f64(), 2)
}
(
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
BackendRepr::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F32) => cx.type_vector(cx.type_f32(), 4),
(
PowerPC(PowerPCInlineAsmRegClass::vreg | PowerPCInlineAsmRegClass::vsreg),
BackendRepr::Scalar(s),
) if s.primitive() == Primitive::Float(Float::F64) => cx.type_vector(cx.type_f64(), 2),
_ => layout.llvm_type(cx),
}
}