Merge commit 'fda0bb9588912a3e0606e880ca9f6e913cf8a5a4' into subtree-update_cg_gcc_2025-06-18

2025-06-18 15:11:44 +02:00
parent 6f935a044d fda0bb9588
commit c48d8d4d80
54 changed files with 11917 additions and 10357 deletions
--- a/compiler/rustc_codegen_gcc/src/allocator.rs
+++ b/compiler/rustc_codegen_gcc/src/allocator.rs
@@ -158,6 +158,7 @@ fn create_wrapper_function(
        }
    } else {
        assert!(output.is_none());
+        block.add_eval(None, ret);
        block.end_with_void_return(None);
    }

--- a/compiler/rustc_codegen_gcc/src/asm.rs
+++ b/compiler/rustc_codegen_gcc/src/asm.rs
@@ -1,3 +1,5 @@
+// cSpell:ignoreRegExp [afkspqvwy]reg
+
 use std::borrow::Cow;

 use gccjit::{LValue, RValue, ToRValue, Type};
@@ -138,7 +140,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
        // `outputs.len() + inputs.len()`.
        let mut labels = vec![];

-        // Clobbers collected from `out("explicit register") _` and `inout("expl_reg") var => _`
+        // Clobbers collected from `out("explicit register") _` and `inout("explicit_reg") var => _`
        let mut clobbers = vec![];

        // We're trying to preallocate space for the template
@@ -203,7 +205,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                                // is also used as an in register, do not add it to the clobbers list.
                                // it will be treated as a lateout register with `out_place: None`
                                if !late {
-                                    bug!("input registers can only be used as lateout regisers");
+                                    bug!("input registers can only be used as lateout registers");
                                }
                                ("r", dummy_output_type(self.cx, reg.reg_class()))
                            } else {
@@ -641,7 +643,8 @@ fn explicit_reg_to_gcc(reg: InlineAsmReg) -> &'static str {
                },
            }
        }
-
+        InlineAsmReg::Arm(reg) => reg.name(),
+        InlineAsmReg::AArch64(reg) => reg.name(),
        _ => unimplemented!(),
    }
 }
--- a/compiler/rustc_codegen_gcc/src/attributes.rs
+++ b/compiler/rustc_codegen_gcc/src/attributes.rs
@@ -16,7 +16,7 @@ use crate::gcc_util::to_gcc_features;
 /// Checks if the function `instance` is recursively inline.
 /// Returns `false` if a functions is guaranteed to be non-recursive, and `true` if it *might* be recursive.
 #[cfg(feature = "master")]
-fn resursively_inline<'gcc, 'tcx>(
+fn recursively_inline<'gcc, 'tcx>(
    cx: &CodegenCx<'gcc, 'tcx>,
    instance: ty::Instance<'tcx>,
 ) -> bool {
@@ -61,7 +61,7 @@ fn inline_attr<'gcc, 'tcx>(
            //
            // That prevents issues steming from recursive `#[inline(always)]` at a *relatively* small cost.
            // We *only* need to check all the terminators of a function marked with this attribute.
-            if resursively_inline(cx, instance) {
+            if recursively_inline(cx, instance) {
                Some(FnAttribute::Inline)
            } else {
                Some(FnAttribute::AlwaysInline)
--- a/compiler/rustc_codegen_gcc/src/back/lto.rs
+++ b/compiler/rustc_codegen_gcc/src/back/lto.rs
@@ -11,11 +11,12 @@
 // does not remove it?
 //
 // TODO(antoyo): for performance, check which optimizations the C++ frontend enables.
-//
+// cSpell:disable
 // Fix these warnings:
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
+// cSpell:enable
 use std::ffi::{CStr, CString};
 use std::fs::{self, File};
 use std::path::{Path, PathBuf};
--- a/compiler/rustc_codegen_gcc/src/back/write.rs
+++ b/compiler/rustc_codegen_gcc/src/back/write.rs
@@ -186,6 +186,7 @@ pub(crate) fn codegen(

                    if fat_lto {
                        let lto_path = format!("{}.lto", path);
+                        // cSpell:disable
                        // FIXME(antoyo): The LTO frontend generates the following warning:
                        // ../build_sysroot/sysroot_src/library/core/src/num/dec2flt/lemire.rs:150:15: warning: type of ‘_ZN4core3num7dec2flt5table17POWER_OF_FIVE_12817ha449a68fb31379e4E’ does not match original declaration [-Wlto-type-mismatch]
                        // 150 |     let (lo5, hi5) = POWER_OF_FIVE_128[index];
@@ -193,6 +194,7 @@ pub(crate) fn codegen(
                        // lto1: note: ‘_ZN4core3num7dec2flt5table17POWER_OF_FIVE_12817ha449a68fb31379e4E’ was previously declared here
                        //
                        // This option is to mute it to make the UI tests pass with LTO enabled.
+                        // cSpell:enable
                        context.add_driver_option("-Wno-lto-type-mismatch");
                        // NOTE: this doesn't actually generate an executable. With the above
                        // flags, it combines the .o files together in another .o.
--- a/compiler/rustc_codegen_gcc/src/builder.rs
+++ b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -765,7 +765,15 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {

        #[cfg(feature = "master")]
        match self.cx.type_kind(a_type) {
-            TypeKind::Half | TypeKind::Float => {
+            TypeKind::Half => {
+                let fmodf = self.context.get_builtin_function("fmodf");
+                let f32_type = self.type_f32();
+                let a = self.context.new_cast(self.location, a, f32_type);
+                let b = self.context.new_cast(self.location, b, f32_type);
+                let result = self.context.new_call(self.location, fmodf, &[a, b]);
+                return self.context.new_cast(self.location, result, a_type);
+            }
+            TypeKind::Float => {
                let fmodf = self.context.get_builtin_function("fmodf");
                return self.context.new_call(self.location, fmodf, &[a, b]);
            }
@@ -774,8 +782,19 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
                return self.context.new_call(self.location, fmod, &[a, b]);
            }
            TypeKind::FP128 => {
-                let fmodl = self.context.get_builtin_function("fmodl");
-                return self.context.new_call(self.location, fmodl, &[a, b]);
+                let f128_type = self.type_f128();
+                let fmodf128 = self.context.new_function(
+                    None,
+                    gccjit::FunctionType::Extern,
+                    f128_type,
+                    &[
+                        self.context.new_parameter(None, f128_type, "a"),
+                        self.context.new_parameter(None, f128_type, "b"),
+                    ],
+                    "fmodf128",
+                    false,
+                );
+                return self.context.new_call(self.location, fmodf128, &[a, b]);
            }
            _ => (),
        }
@@ -924,7 +943,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
        // dereference after a drop, for instance.
        // FIXME(antoyo): this check that we don't call get_aligned() a second time on a type.
        // Ideally, we shouldn't need to do this check.
-        let aligned_type = if pointee_ty == self.cx.u128_type || pointee_ty == self.cx.i128_type {
+        // FractalFir: the `align == self.int128_align` check ensures we *do* call `get_aligned` if
+        // the alignment of a `u128`/`i128` is not the one mandated by the ABI. This ensures we handle
+        // under-aligned loads correctly.
+        let aligned_type = if (pointee_ty == self.cx.u128_type || pointee_ty == self.cx.i128_type)
+            && align == self.int128_align
+        {
            pointee_ty
        } else {
            pointee_ty.get_aligned(align.bytes())
@@ -1010,13 +1034,13 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
            let b_offset = a.size(self).align_to(b.align(self).abi);

            let mut load = |i, scalar: &abi::Scalar, align| {
-                let llptr = if i == 0 {
+                let ptr = if i == 0 {
                    place.val.llval
                } else {
                    self.inbounds_ptradd(place.val.llval, self.const_usize(b_offset.bytes()))
                };
                let llty = place.layout.scalar_pair_element_gcc_type(self, i);
-                let load = self.load(llty, llptr, align);
+                let load = self.load(llty, ptr, align);
                scalar_load_metadata(self, load, scalar);
                if scalar.is_bool() { self.trunc(load, self.type_i1()) } else { load }
            };
--- a/compiler/rustc_codegen_gcc/src/callee.rs
+++ b/compiler/rustc_codegen_gcc/src/callee.rs
@@ -34,7 +34,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
        unreachable!();
        /*
        // Create a fn pointer with the new signature.
-        let ptrty = fn_abi.ptr_to_gcc_type(cx);
+        let ptrtype = fn_abi.ptr_to_gcc_type(cx);

        // This is subtle and surprising, but sometimes we have to bitcast
        // the resulting fn pointer.  The reason has to do with external
@@ -59,7 +59,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
        // This can occur on either a crate-local or crate-external
        // reference. It also occurs when testing libcore and in some
        // other weird situations. Annoying.
-        if cx.val_ty(func) != ptrty {
+        if cx.val_ty(func) != ptrtype {
            // TODO(antoyo): cast the pointer.
            func
        }
--- a/compiler/rustc_codegen_gcc/src/common.rs
+++ b/compiler/rustc_codegen_gcc/src/common.rs
@@ -9,7 +9,6 @@ use rustc_middle::mir::Mutability;
 use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, Scalar};
 use rustc_middle::ty::layout::LayoutOf;

-use crate::consts::const_alloc_to_gcc;
 use crate::context::CodegenCx;
 use crate::type_of::LayoutGccExt;

@@ -46,12 +45,65 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 }

 pub fn bytes_in_context<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, bytes: &[u8]) -> RValue<'gcc> {
-    let context = &cx.context;
-    let byte_type = context.new_type::<u8>();
-    let typ = context.new_array_type(None, byte_type, bytes.len() as u64);
-    let elements: Vec<_> =
-        bytes.iter().map(|&byte| context.new_rvalue_from_int(byte_type, byte as i32)).collect();
-    context.new_array_constructor(None, typ, &elements)
+    // Instead of always using an array of bytes, use an array of larger integers of target endianness
+    // if possible. This reduces the amount of `rvalues` we use, which reduces memory usage significantly.
+    //
+    // FIXME(FractalFir): Consider using `global_set_initializer` instead. Before this is done, we need to confirm that
+    // `global_set_initializer` is more memory efficient than the current solution.
+    // `global_set_initializer` calls `global_set_initializer_rvalue` under the hood - does it generate an array of rvalues,
+    // or is it using a more efficient representation?
+    match bytes.len() % 8 {
+        0 => {
+            let context = &cx.context;
+            let byte_type = context.new_type::<u64>();
+            let typ = context.new_array_type(None, byte_type, bytes.len() as u64 / 8);
+            let elements: Vec<_> = bytes
+                .chunks_exact(8)
+                .map(|arr| {
+                    let arr: [u8; 8] = arr.try_into().unwrap();
+                    context.new_rvalue_from_long(
+                        byte_type,
+                        // Since we are representing arbitrary byte runs as integers, we need to follow the target
+                        // endianness.
+                        match cx.sess().target.options.endian {
+                            rustc_abi::Endian::Little => u64::from_le_bytes(arr) as i64,
+                            rustc_abi::Endian::Big => u64::from_be_bytes(arr) as i64,
+                        },
+                    )
+                })
+                .collect();
+            context.new_array_constructor(None, typ, &elements)
+        }
+        4 => {
+            let context = &cx.context;
+            let byte_type = context.new_type::<u32>();
+            let typ = context.new_array_type(None, byte_type, bytes.len() as u64 / 4);
+            let elements: Vec<_> = bytes
+                .chunks_exact(4)
+                .map(|arr| {
+                    let arr: [u8; 4] = arr.try_into().unwrap();
+                    context.new_rvalue_from_int(
+                        byte_type,
+                        match cx.sess().target.options.endian {
+                            rustc_abi::Endian::Little => u32::from_le_bytes(arr) as i32,
+                            rustc_abi::Endian::Big => u32::from_be_bytes(arr) as i32,
+                        },
+                    )
+                })
+                .collect();
+            context.new_array_constructor(None, typ, &elements)
+        }
+        _ => {
+            let context = cx.context;
+            let byte_type = context.new_type::<u8>();
+            let typ = context.new_array_type(None, byte_type, bytes.len() as u64);
+            let elements: Vec<_> = bytes
+                .iter()
+                .map(|&byte| context.new_rvalue_from_int(byte_type, byte as i32))
+                .collect();
+            context.new_array_constructor(None, typ, &elements)
+        }
+    }
 }

 pub fn type_is_pointer(typ: Type<'_>) -> bool {
@@ -185,14 +237,15 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {

                // FIXME(antoyo): there's some issues with using the u128 code that follows, so hard-code
                // the paths for floating-point values.
-                if ty == self.float_type {
+                // TODO: Remove this code?
+                /*if ty == self.float_type {
                    return self
                        .context
                        .new_rvalue_from_double(ty, f32::from_bits(data as u32) as f64);
                }
                if ty == self.double_type {
                    return self.context.new_rvalue_from_double(ty, f64::from_bits(data as u64));
-                }
+                }*/

                let value = self.const_uint_big(self.type_ix(bitsize), data);
                let bytesize = layout.size(self).bytes();
@@ -212,7 +265,20 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
                let alloc_id = prov.alloc_id();
                let base_addr = match self.tcx.global_alloc(alloc_id) {
                    GlobalAlloc::Memory(alloc) => {
-                        let init = const_alloc_to_gcc(self, alloc);
+                        // For ZSTs directly codegen an aligned pointer.
+                        // This avoids generating a zero-sized constant value and actually needing a
+                        // real address at runtime.
+                        if alloc.inner().len() == 0 {
+                            assert_eq!(offset.bytes(), 0);
+                            let val = self.const_usize(alloc.inner().align.bytes());
+                            return if matches!(layout.primitive(), Pointer(_)) {
+                                self.context.new_cast(None, val, ty)
+                            } else {
+                                self.const_bitcast(val, ty)
+                            };
+                        }
+
+                        let init = self.const_data_from_alloc(alloc);
                        let alloc = alloc.inner();
                        let value = match alloc.mutability {
                            Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None),
@@ -234,7 +300,7 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
                                }),
                            )))
                            .unwrap_memory();
-                        let init = const_alloc_to_gcc(self, alloc);
+                        let init = self.const_data_from_alloc(alloc);
                        self.static_addr_of(init, alloc.inner().align, None)
                    }
                    GlobalAlloc::Static(def_id) => {
@@ -257,7 +323,19 @@ impl<'gcc, 'tcx> ConstCodegenMethods for CodegenCx<'gcc, 'tcx> {
    }

    fn const_data_from_alloc(&self, alloc: ConstAllocation<'_>) -> Self::Value {
-        const_alloc_to_gcc(self, alloc)
+        // We ignore the alignment for the purpose of deduping RValues
+        // The alignment is not handled / used in any way by `const_alloc_to_gcc`,
+        // so it is OK to overwrite it here.
+        let mut mock_alloc = alloc.inner().clone();
+        mock_alloc.align = rustc_abi::Align::MAX;
+        // Check if the rvalue is already in the cache - if so, just return it directly.
+        if let Some(res) = self.const_cache.borrow().get(&mock_alloc) {
+            return *res;
+        }
+        // Rvalue not in the cache - convert and add it.
+        let res = crate::consts::const_alloc_to_gcc_uncached(self, alloc);
+        self.const_cache.borrow_mut().insert(mock_alloc, res);
+        res
    }

    fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value {
--- a/compiler/rustc_codegen_gcc/src/consts.rs
+++ b/compiler/rustc_codegen_gcc/src/consts.rs
@@ -36,18 +36,14 @@ fn set_global_alignment<'gcc, 'tcx>(

 impl<'gcc, 'tcx> StaticCodegenMethods for CodegenCx<'gcc, 'tcx> {
    fn static_addr_of(&self, cv: RValue<'gcc>, align: Align, kind: Option<&str>) -> RValue<'gcc> {
-        // TODO(antoyo): implement a proper rvalue comparison in libgccjit instead of doing the
-        // following:
-        for (value, variable) in &*self.const_globals.borrow() {
-            if format!("{:?}", value) == format!("{:?}", cv) {
-                if let Some(global_variable) = self.global_lvalues.borrow().get(variable) {
-                    let alignment = align.bits() as i32;
-                    if alignment > global_variable.get_alignment() {
-                        global_variable.set_alignment(alignment);
-                    }
+        if let Some(variable) = self.const_globals.borrow().get(&cv) {
+            if let Some(global_variable) = self.global_lvalues.borrow().get(variable) {
+                let alignment = align.bits() as i32;
+                if alignment > global_variable.get_alignment() {
+                    global_variable.set_alignment(alignment);
                }
-                return *variable;
            }
+            return *variable;
        }
        let global_value = self.static_addr_of_mut(cv, align, kind);
        #[cfg(feature = "master")]
@@ -288,8 +284,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
        global
    }
 }
-
-pub fn const_alloc_to_gcc<'gcc>(
+/// Converts a given const alloc to a gcc Rvalue, without any caching or deduplication.
+/// YOU SHOULD NOT call this function directly - that may break the semantics of Rust.
+/// Use `const_data_from_alloc` instead.
+pub(crate) fn const_alloc_to_gcc_uncached<'gcc>(
    cx: &CodegenCx<'gcc, '_>,
    alloc: ConstAllocation<'_>,
 ) -> RValue<'gcc> {
@@ -321,7 +319,7 @@ pub fn const_alloc_to_gcc<'gcc>(
            // and we properly interpret the provenance as a relocation pointer offset.
            alloc.inspect_with_uninit_and_ptr_outside_interpreter(offset..(offset + pointer_size)),
        )
-        .expect("const_alloc_to_llvm: could not read relocation pointer")
+        .expect("const_alloc_to_gcc_uncached: could not read relocation pointer")
            as u64;

        let address_space = cx.tcx.global_alloc(alloc_id).address_space(cx);
@@ -360,7 +358,7 @@ fn codegen_static_initializer<'gcc, 'tcx>(
    def_id: DefId,
 ) -> Result<(RValue<'gcc>, ConstAllocation<'tcx>), ErrorHandled> {
    let alloc = cx.tcx.eval_static_initializer(def_id)?;
-    Ok((const_alloc_to_gcc(cx, alloc), alloc))
+    Ok((cx.const_data_from_alloc(alloc), alloc))
 }

 fn check_and_apply_linkage<'gcc, 'tcx>(
--- a/compiler/rustc_codegen_gcc/src/context.rs
+++ b/compiler/rustc_codegen_gcc/src/context.rs
@@ -1,14 +1,16 @@
 use std::cell::{Cell, RefCell};
+use std::collections::HashMap;

 use gccjit::{
    Block, CType, Context, Function, FunctionPtrType, FunctionType, LValue, Location, RValue, Type,
 };
-use rustc_abi::{HasDataLayout, PointeeInfo, Size, TargetDataLayout, VariantIdx};
+use rustc_abi::{Align, HasDataLayout, PointeeInfo, Size, TargetDataLayout, VariantIdx};
 use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeCodegenMethods, MiscCodegenMethods};
 use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN};
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
+use rustc_middle::mir::interpret::Allocation;
 use rustc_middle::mir::mono::CodegenUnit;
 use rustc_middle::span_bug;
 use rustc_middle::ty::layout::{
@@ -28,6 +30,8 @@ use crate::common::SignType;

 #[cfg_attr(not(feature = "master"), allow(dead_code))]
 pub struct CodegenCx<'gcc, 'tcx> {
+    /// A cache of converted ConstAllocs
+    pub const_cache: RefCell<HashMap<Allocation, RValue<'gcc>>>,
    pub codegen_unit: &'tcx CodegenUnit<'tcx>,
    pub context: &'gcc Context<'gcc>,

@@ -129,6 +133,9 @@ pub struct CodegenCx<'gcc, 'tcx> {

    #[cfg(feature = "master")]
    pub cleanup_blocks: RefCell<FxHashSet<Block<'gcc>>>,
+    /// The alignment of a u128/i128 type.
+    // We cache this, since it is needed for alignment checks during loads.
+    pub int128_align: Align,
 }

 impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
@@ -220,6 +227,12 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
        }

        let mut cx = Self {
+            int128_align: tcx
+                .layout_of(ty::TypingEnv::fully_monomorphized().as_query_input(tcx.types.i128))
+                .expect("Can't get the layout of `i128`")
+                .align
+                .abi,
+            const_cache: Default::default(),
            codegen_unit,
            context,
            current_func: RefCell::new(None),
@@ -428,8 +441,8 @@ impl<'gcc, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
        // `rust_eh_personality` function, but rather we wired it up to the
        // CRT's custom personality function, which forces LLVM to consider
        // landing pads as "landing pads for SEH".
-        if let Some(llpersonality) = self.eh_personality.get() {
-            return llpersonality;
+        if let Some(personality_func) = self.eh_personality.get() {
+            return personality_func;
        }
        let tcx = self.tcx;
        let func = match tcx.lang_items().eh_personality() {
--- a/compiler/rustc_codegen_gcc/src/gcc_util.rs
+++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs
@@ -143,6 +143,7 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<Stri
 // To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
 pub fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
    let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
+    // cSpell:disable
    match (arch, s) {
        // FIXME: seems like x87 does not exist?
        ("x86", "x87") => smallvec![],
@@ -181,6 +182,7 @@ pub fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]>
        ("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
        (_, s) => smallvec![s],
    }
+    // cSpell:enable
 }

 fn arch_to_gcc(name: &str) -> &str {
--- a/compiler/rustc_codegen_gcc/src/int.rs
+++ b/compiler/rustc_codegen_gcc/src/int.rs
@@ -2,6 +2,8 @@
 //! This module exists because some integer types are not supported on some gcc platforms, e.g.
 //! 128-bit integers on 32-bit platforms and thus require to be handled manually.

+// cSpell:words cmpti divti modti mulodi muloti udivti umodti
+
 use gccjit::{BinaryOp, ComparisonOp, FunctionType, Location, RValue, ToRValue, Type, UnaryOp};
 use rustc_abi::{CanonAbi, Endian, ExternAbi};
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
@@ -913,9 +915,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {

        debug_assert!(value_type.dyncast_array().is_some());
        let name_suffix = match self.type_kind(dest_typ) {
+            // cSpell:disable
            TypeKind::Float => "tisf",
            TypeKind::Double => "tidf",
-            TypeKind::FP128 => "tixf",
+            TypeKind::FP128 => "titf",
+            // cSpell:enable
            kind => panic!("cannot cast a non-native integer to type {:?}", kind),
        };
        let sign = if signed { "" } else { "un" };
@@ -957,8 +961,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {

        debug_assert!(dest_typ.dyncast_array().is_some());
        let name_suffix = match self.type_kind(value_type) {
+            // cSpell:disable
            TypeKind::Float => "sfti",
            TypeKind::Double => "dfti",
+            // cSpell:enable
            kind => panic!("cannot cast a {:?} to non-native integer", kind),
        };
        let sign = if signed { "" } else { "uns" };
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
--- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs
@@ -1012,7 +1012,7 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
    };
    let func = cx.context.get_builtin_function(gcc_name);
    cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
-    return func;
+    func
 }

 #[cfg(feature = "master")]
@@ -1548,10 +1548,13 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
        "llvm.x86.tcmmrlfp16ps" => "__builtin_trap",

        // NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
-        _ => include!("archs.rs"),
+        _ => map_arch_intrinsic(name),
    };

    let func = cx.context.get_target_builtin_function(gcc_name);
    cx.functions.borrow_mut().insert(gcc_name.to_string(), func);
    func
 }
+
+#[cfg(feature = "master")]
+include!("archs.rs");
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -196,6 +196,95 @@ fn get_simple_function<'gcc, 'tcx>(
    ))
 }

+fn get_simple_function_f128<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    name: Symbol,
+) -> Option<Function<'gcc>> {
+    if !cx.supports_f128_type {
+        return None;
+    }
+
+    let f128_type = cx.type_f128();
+    let func_name = match name {
+        sym::ceilf128 => "ceilf128",
+        sym::floorf128 => "floorf128",
+        sym::truncf128 => "truncf128",
+        sym::roundf128 => "roundf128",
+        sym::round_ties_even_f128 => "roundevenf128",
+        sym::sqrtf128 => "sqrtf128",
+        _ => return None,
+    };
+    Some(cx.context.new_function(
+        None,
+        FunctionType::Extern,
+        f128_type,
+        &[cx.context.new_parameter(None, f128_type, "a")],
+        func_name,
+        false,
+    ))
+}
+
+fn get_simple_function_f128_2args<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    name: Symbol,
+) -> Option<Function<'gcc>> {
+    if !cx.supports_f128_type {
+        return None;
+    }
+
+    let f128_type = cx.type_f128();
+    let func_name = match name {
+        sym::maxnumf128 => "fmaxf128",
+        sym::minnumf128 => "fminf128",
+        _ => return None,
+    };
+    Some(cx.context.new_function(
+        None,
+        FunctionType::Extern,
+        f128_type,
+        &[
+            cx.context.new_parameter(None, f128_type, "a"),
+            cx.context.new_parameter(None, f128_type, "b"),
+        ],
+        func_name,
+        false,
+    ))
+}
+
+fn f16_builtin<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    name: Symbol,
+    args: &[OperandRef<'tcx, RValue<'gcc>>],
+) -> RValue<'gcc> {
+    let f32_type = cx.type_f32();
+    let builtin_name = match name {
+        sym::ceilf16 => "__builtin_ceilf",
+        sym::floorf16 => "__builtin_floorf",
+        sym::fmaf16 => "fmaf",
+        sym::maxnumf16 => "__builtin_fmaxf",
+        sym::minnumf16 => "__builtin_fminf",
+        sym::powf16 => "__builtin_powf",
+        sym::powif16 => {
+            let func = cx.context.get_builtin_function("__builtin_powif");
+            let arg0 = cx.context.new_cast(None, args[0].immediate(), f32_type);
+            let args = [arg0, args[1].immediate()];
+            let result = cx.context.new_call(None, func, &args);
+            return cx.context.new_cast(None, result, cx.type_f16());
+        }
+        sym::roundf16 => "__builtin_roundf",
+        sym::round_ties_even_f16 => "__builtin_rintf",
+        sym::sqrtf16 => "__builtin_sqrtf",
+        sym::truncf16 => "__builtin_truncf",
+        _ => unreachable!(),
+    };
+
+    let func = cx.context.get_builtin_function(builtin_name);
+    let args: Vec<_> =
+        args.iter().map(|arg| cx.context.new_cast(None, arg.immediate(), f32_type)).collect();
+    let result = cx.context.new_call(None, func, &args);
+    cx.context.new_cast(None, result, cx.type_f16())
+}
+
 impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
    fn codegen_intrinsic_call(
        &mut self,
@@ -211,7 +300,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
        let fn_args = instance.args;

        let simple = get_simple_intrinsic(self, name);
-        let simple_func = get_simple_function(self, name);
+        let simple_func = get_simple_function(self, name)
+            .or_else(|| get_simple_function_f128(self, name))
+            .or_else(|| get_simple_function_f128_2args(self, name));

        // FIXME(tempdragon): Re-enable `clippy::suspicious_else_formatting` if the following issue is solved:
        // https://github.com/rust-lang/rust-clippy/issues/12497
@@ -234,17 +325,55 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                )
            }
-            sym::fmaf16 => {
-                // TODO(antoyo): use the correct builtin for f16.
-                let func = self.cx.context.get_builtin_function("fmaf");
-                let args: Vec<_> = args
-                    .iter()
-                    .map(|arg| {
-                        self.cx.context.new_cast(self.location, arg.immediate(), self.cx.type_f32())
-                    })
-                    .collect();
-                let result = self.cx.context.new_call(self.location, func, &args);
-                self.cx.context.new_cast(self.location, result, self.cx.type_f16())
+            sym::ceilf16
+            | sym::floorf16
+            | sym::fmaf16
+            | sym::maxnumf16
+            | sym::minnumf16
+            | sym::powf16
+            | sym::powif16
+            | sym::roundf16
+            | sym::round_ties_even_f16
+            | sym::sqrtf16
+            | sym::truncf16 => f16_builtin(self, name, args),
+            sym::fmaf128 => {
+                let f128_type = self.cx.type_f128();
+                let func = self.cx.context.new_function(
+                    None,
+                    FunctionType::Extern,
+                    f128_type,
+                    &[
+                        self.cx.context.new_parameter(None, f128_type, "a"),
+                        self.cx.context.new_parameter(None, f128_type, "b"),
+                        self.cx.context.new_parameter(None, f128_type, "c"),
+                    ],
+                    "fmaf128",
+                    false,
+                );
+                self.cx.context.new_call(
+                    self.location,
+                    func,
+                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
+                )
+            }
+            sym::powif128 => {
+                let f128_type = self.cx.type_f128();
+                let func = self.cx.context.new_function(
+                    None,
+                    FunctionType::Extern,
+                    f128_type,
+                    &[
+                        self.cx.context.new_parameter(None, f128_type, "a"),
+                        self.cx.context.new_parameter(None, self.int_type, "b"),
+                    ],
+                    "__powitf2",
+                    false,
+                );
+                self.cx.context.new_call(
+                    self.location,
+                    func,
+                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
+                )
            }
            sym::is_val_statically_known => {
                let a = args[0].immediate();
@@ -526,7 +655,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc

    fn type_checked_load(
        &mut self,
-        _llvtable: Self::Value,
+        _vtable: Self::Value,
        _vtable_byte_offset: u64,
        _typeid: Self::Value,
    ) -> Self::Value {
@@ -622,23 +751,23 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                // We instead thus allocate some scratch space...
                let scratch_size = cast.size(bx);
                let scratch_align = cast.align(bx);
-                let llscratch = bx.alloca(scratch_size, scratch_align);
-                bx.lifetime_start(llscratch, scratch_size);
+                let scratch = bx.alloca(scratch_size, scratch_align);
+                bx.lifetime_start(scratch, scratch_size);

                // ... where we first store the value...
-                rustc_codegen_ssa::mir::store_cast(bx, cast, val, llscratch, scratch_align);
+                rustc_codegen_ssa::mir::store_cast(bx, cast, val, scratch, scratch_align);

                // ... and then memcpy it to the intended destination.
                bx.memcpy(
                    dst.val.llval,
                    self.layout.align.abi,
-                    llscratch,
+                    scratch,
                    scratch_align,
                    bx.const_usize(self.layout.size.bytes()),
                    MemFlags::empty(),
                );

-                bx.lifetime_end(llscratch, scratch_size);
+                bx.lifetime_end(scratch, scratch_size);
            }
        } else {
            OperandValue::Immediate(val).store(bx, dst);
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -1081,7 +1081,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
        let (_, element_ty1) = args[1].layout.ty.simd_size_and_type(bx.tcx());
        let (_, element_ty2) = args[2].layout.ty.simd_size_and_type(bx.tcx());
        let (pointer_count, underlying_ty) = match *element_ty1.kind() {
-            ty::RawPtr(p_ty, mutbl) if p_ty == in_elem && mutbl == hir::Mutability::Mut => {
+            ty::RawPtr(p_ty, mutability)
+                if p_ty == in_elem && mutability == hir::Mutability::Mut =>
+            {
                (ptr_count(element_ty1), non_ptr(element_ty1))
            }
            _ => {
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -3,10 +3,12 @@
 * TODO(antoyo): support #[inline] attributes.
 * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html).
 * For Thin LTO, this might be helpful:
+// cspell:disable-next-line
 * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none.
 * Or the new incremental LTO (https://www.phoronix.com/news/GCC-Incremental-LTO-Patches)?
 *
- * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+ * Maybe some missing optimizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
+// cspell:disable-next-line
 * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans.
 * TODO: disable debug info always being emitted. Perhaps this slows down things?
 *
@@ -206,7 +208,7 @@ impl CodegenBackend for GccCodegenBackend {
        #[cfg(not(feature = "master"))]
        {
            let temp_dir = TempDir::new().expect("cannot create temporary directory");
-            let temp_file = temp_dir.into_path().join("result.asm");
+            let temp_file = temp_dir.keep().join("result.asm");
            let check_context = Context::default();
            check_context.set_print_errors_to_stderr(false);
            let _int128_ty = check_context.new_c_type(CType::UInt128t);
@@ -430,10 +432,11 @@ impl WriteBackendMethods for GccCodegenBackend {
    ) -> Result<ModuleCodegen<Self::Module>, FatalError> {
        back::write::link(cgcx, dcx, modules)
    }
+
    fn autodiff(
        _cgcx: &CodegenContext<Self>,
        _module: &ModuleCodegen<Self::Module>,
-        _diff_fncs: Vec<AutoDiffItem>,
+        _diff_functions: Vec<AutoDiffItem>,
        _config: &ModuleConfig,
    ) -> Result<(), FatalError> {
        unimplemented!()
@@ -494,12 +497,14 @@ fn target_config(sess: &Session, target_info: &LockedTargetInfo) -> TargetConfig
                    return false;
                }
                target_info.cpu_supports(feature)
+                // cSpell:disable
                /*
                  adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
                  avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
                  bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
                  sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
                */
+                // cSpell:enable
            })
            .map(Symbol::intern)
            .collect()
@@ -508,13 +513,16 @@ fn target_config(sess: &Session, target_info: &LockedTargetInfo) -> TargetConfig
    let target_features = f(false);
    let unstable_target_features = f(true);

+    let has_reliable_f16 = target_info.supports_target_dependent_type(CType::Float16);
+    let has_reliable_f128 = target_info.supports_target_dependent_type(CType::Float128);
+
    TargetConfig {
        target_features,
        unstable_target_features,
        // There are no known bugs with GCC support for f16 or f128
-        has_reliable_f16: true,
-        has_reliable_f16_math: true,
-        has_reliable_f128: true,
-        has_reliable_f128_math: true,
+        has_reliable_f16,
+        has_reliable_f16_math: has_reliable_f16,
+        has_reliable_f128,
+        has_reliable_f128_math: has_reliable_f128,
    }
 }
--- a/compiler/rustc_codegen_gcc/src/type_.rs
+++ b/compiler/rustc_codegen_gcc/src/type_.rs
@@ -302,13 +302,13 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
    #[cfg_attr(feature = "master", allow(unused_mut))]
    fn type_array(&self, ty: Type<'gcc>, mut len: u64) -> Type<'gcc> {
        #[cfg(not(feature = "master"))]
-        if let Some(struct_type) = ty.is_struct() {
-            if struct_type.get_field_count() == 0 {
-                // NOTE: since gccjit only supports i32 for the array size and libcore's tests uses a
-                // size of usize::MAX in test_binary_search, we workaround this by setting the size to
-                // zero for ZSTs.
-                len = 0;
-            }
+        if let Some(struct_type) = ty.is_struct()
+            && struct_type.get_field_count() == 0
+        {
+            // NOTE: since gccjit only supports i32 for the array size and libcore's tests uses a
+            // size of usize::MAX in test_binary_search, we workaround this by setting the size to
+            // zero for ZSTs.
+            len = 0;
        }

        self.context.new_array_type(None, ty, len)
--- a/compiler/rustc_codegen_gcc/src/type_of.rs
+++ b/compiler/rustc_codegen_gcc/src/type_of.rs
@@ -217,7 +217,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
            let ty = match *self.ty.kind() {
                // NOTE: we cannot remove this match like in the LLVM codegen because the call
                // to fn_ptr_backend_type handle the on-stack attribute.
-                // TODO(antoyo): find a less hackish way to hande the on-stack attribute.
+                // TODO(antoyo): find a less hackish way to handle the on-stack attribute.
                ty::FnPtr(sig_tys, hdr) => cx
                    .fn_ptr_backend_type(cx.fn_abi_of_fn_ptr(sig_tys.with(hdr), ty::List::empty())),
                _ => self.scalar_gcc_type_at(cx, scalar, Size::ZERO),