Let codegen decide when to mem::swap with immediates

Making `libcore` decide this is silly; the backend has so much better information about when it's a good idea. So introduce a new `typed_swap` intrinsic with a fallback body, but replace that implementation for immediates and scalar pairs.
2024-03-15 22:09:05 -07:00
parent fd27e8745f
commit 7d537106a1
12 changed files with 270 additions and 33 deletions
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -1,22 +1,24 @@
 use super::abi::AbiBuilderMethods;
 use super::asm::AsmBuilderMethods;
+use super::consts::ConstMethods;
 use super::coverageinfo::CoverageInfoBuilderMethods;
 use super::debuginfo::DebugInfoBuilderMethods;
 use super::intrinsic::IntrinsicCallMethods;
 use super::misc::MiscMethods;
-use super::type_::{ArgAbiMethods, BaseTypeMethods};
+use super::type_::{ArgAbiMethods, BaseTypeMethods, LayoutTypeMethods};
 use super::{HasCodegen, StaticBuilderMethods};

 use crate::common::{
    AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
 };
-use crate::mir::operand::OperandRef;
+use crate::mir::operand::{OperandRef, OperandValue};
 use crate::mir::place::PlaceRef;
 use crate::MemFlags;

 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
 use rustc_middle::ty::Ty;
+use rustc_session::config::OptLevel;
 use rustc_span::Span;
 use rustc_target::abi::call::FnAbi;
 use rustc_target::abi::{Abi, Align, Scalar, Size, WrappingRange};
@@ -267,6 +269,54 @@ pub trait BuilderMethods<'a, 'tcx>:
        flags: MemFlags,
    );

+    /// *Typed* copy for non-overlapping places.
+    ///
+    /// Has a default implementation in terms of `memcpy`, but specific backends
+    /// can override to do something smarter if possible.
+    ///
+    /// (For example, typed load-stores with alias metadata.)
+    fn typed_place_copy(
+        &mut self,
+        dst: PlaceRef<'tcx, Self::Value>,
+        src: PlaceRef<'tcx, Self::Value>,
+    ) {
+        debug_assert!(src.llextra.is_none());
+        debug_assert!(dst.llextra.is_none());
+        debug_assert_eq!(dst.layout.size, src.layout.size);
+        if self.sess().opts.optimize == OptLevel::No && self.is_backend_immediate(dst.layout) {
+            // If we're not optimizing, the aliasing information from `memcpy`
+            // isn't useful, so just load-store the value for smaller code.
+            let temp = self.load_operand(src);
+            temp.val.store(self, dst);
+        } else if !dst.layout.is_zst() {
+            let bytes = self.const_usize(dst.layout.size.bytes());
+            self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
+        }
+    }
+
+    /// *Typed* swap for non-overlapping places.
+    ///
+    /// Avoids `alloca`s for Immediates and ScalarPairs.
+    ///
+    /// FIXME: Maybe do something smarter for Ref types too?
+    /// For now, the `typed_swap` intrinsic just doesn't call this for those
+    /// cases (in non-debug), preferring the fallback body instead.
+    fn typed_place_swap(
+        &mut self,
+        left: PlaceRef<'tcx, Self::Value>,
+        right: PlaceRef<'tcx, Self::Value>,
+    ) {
+        let mut temp = self.load_operand(left);
+        if let OperandValue::Ref(..) = temp.val {
+            // The SSA value isn't stand-alone, so we need to copy it elsewhere
+            let alloca = PlaceRef::alloca(self, left.layout);
+            self.typed_place_copy(alloca, left);
+            temp = self.load_operand(alloca);
+        }
+        self.typed_place_copy(left, right);
+        temp.val.store(self, right);
+    }
+
    fn select(
        &mut self,
        cond: Self::Value,