Rollup merge of #143410 - scottmcm:redo-transmute-again, r=RalfJung,workingjubilee

Block SIMD in transmute_immediate; delete `OperandValueKind` Vectors have been causing me problems for years in this code, for example https://github.com/rust-lang/rust/pull/110021#discussion_r1160975086 and https://github.com/rust-lang/rust/pull/143194 See conversation in <https://rust-lang.zulipchat.com/#narrow/channel/131828-t-compiler/topic/Is.20transmuting.20a.20.60T.60.20to.20.60Tx1.60.20.28one-element.20SIMD.20vector.29.20UB.3F/near/526262799>. By blocking SIMD in `transmute_immediate` it can be simplified to just take the `Scalar`s involved -- the backend types can be gotten from those `Scalar`s, rather than needing to be passed. And there's an assert added to ICE it if it does get hit. Accordingly, this changes `rvalue_creates_operand` to not send SIMD transmutes through the operand path, but to always go through memory instead, like they did back before rust-lang/rust#108442. And thanks to those changes, I could also remove the `OperandValueKind` type that I added back then which `@RalfJung` rightly considers pretty sketchy. cc `@folkertdev` `@workingjubilee` from the zulip conversation too
2025-07-04 23:26:24 -07:00
parent 5b509e6158 4e615272bf
commit 33eb552ceb
7 changed files with 138 additions and 214 deletions
--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -13,7 +13,7 @@ use rustc_session::config::OptLevel;
 use tracing::{debug, instrument};
 use super::place::{PlaceRef, PlaceValue};
-use super::rvalue::transmute_immediate;
+use super::rvalue::transmute_scalar;
 use super::{FunctionCx, LocalRef};
 use crate::common::IntPredicate;
 use crate::traits::*;
@@ -346,14 +346,16 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
        let val = if field.is_zst() {
            OperandValue::ZeroSized
        } else if let BackendRepr::SimdVector { .. } = self.layout.backend_repr {
            // codegen_transmute_operand doesn't support SIMD, but since the previous
            // check handled ZSTs, the only possible field access into something SIMD
            // is to the `non_1zst_field` that's the same SIMD. (Other things, even
            // just padding, would change the wrapper's representation type.)
            assert_eq!(field.size, self.layout.size);
            self.val
        } else if field.size == self.layout.size {
            assert_eq!(offset.bytes(), 0);
-            fx.codegen_transmute_operand(bx, *self, field).unwrap_or_else(|| {
+            fx.codegen_transmute_operand(bx, *self, field)
                bug!(
                    "Expected `codegen_transmute_operand` to handle equal-size \
                      field {i:?} projection from {self:?} to {field:?}"
                )
            })
        } else {
            let (in_scalar, imm) = match (self.val, self.layout.backend_repr) {
                // Extract a scalar component from a pair.
@@ -613,10 +615,8 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, Result<V, abi::Scalar>> {
        };
        let mut update = |tgt: &mut Result<V, abi::Scalar>, src, from_scalar| {
            let from_bty = bx.cx().type_from_scalar(from_scalar);
            let to_scalar = tgt.unwrap_err();
-            let to_bty = bx.cx().type_from_scalar(to_scalar);
+            let imm = transmute_scalar(bx, src, from_scalar, to_scalar);
            let imm = transmute_immediate(bx, src, from_scalar, from_bty, to_scalar, to_bty);
            *tgt = Ok(imm);
        };
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -1,10 +1,8 @@
 use std::assert_matches::assert_matches;
 use rustc_abi::{self as abi, FIRST_VARIANT};
 use rustc_middle::ty::adjustment::PointerCoercion;
 use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
-use rustc_middle::{bug, mir, span_bug};
+use rustc_middle::{bug, mir};
 use rustc_session::config::OptLevel;
 use rustc_span::{DUMMY_SP, Span};
 use tracing::{debug, instrument};
@@ -12,7 +10,7 @@ use tracing::{debug, instrument};
 use super::operand::{OperandRef, OperandValue};
 use super::place::{PlaceRef, codegen_tag_value};
 use super::{FunctionCx, LocalRef};
-use crate::common::IntPredicate;
+use crate::common::{IntPredicate, TypeKind};
 use crate::traits::*;
 use crate::{MemFlags, base};
@@ -190,6 +188,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
        }
    }
    /// Transmutes the `src` value to the destination type by writing it to `dst`.
    ///
    /// See also [`Self::codegen_transmute_operand`] for cases that can be done
    /// without needing a pre-allocated place for the destination.
    fn codegen_transmute(
        &mut self,
        bx: &mut Bx,
@@ -200,37 +202,36 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
        assert!(src.layout.is_sized());
        assert!(dst.layout.is_sized());
-        if let Some(val) = self.codegen_transmute_operand(bx, src, dst.layout) {
+        if src.layout.size != dst.layout.size
-            val.store(bx, dst);
+            || src.layout.is_uninhabited()
-            return;
+            || dst.layout.is_uninhabited()
-        }
+        {
-
+            // These cases are all UB to actually hit, so don't emit code for them.
-        match src.val {
+            // (The size mismatches are reachable via `transmute_unchecked`.)
-            OperandValue::Ref(..) | OperandValue::ZeroSized => {
+            // We can't use unreachable because that's a terminator, and we
-                span_bug!(
+            // need something that can be in the middle of a basic block.
-                    self.mir.span,
+            bx.assume(bx.cx().const_bool(false))
-                    "Operand path should have handled transmute \
+        } else {
-                    from {src:?} to place {dst:?}"
+            // Since in this path we have a place anyway, we can store or copy to it,
-                );
+            // making sure we use the destination place's alignment even if the
-            }
+            // source would normally have a higher one.
-            OperandValue::Immediate(..) | OperandValue::Pair(..) => {
+            src.val.store(bx, dst.val.with_type(src.layout));
                // When we have immediate(s), the alignment of the source is irrelevant,
                // so we can store them using the destination's alignment.
                src.val.store(bx, dst.val.with_type(src.layout));
            }
        }
    }
-    /// Attempts to transmute an `OperandValue` to another `OperandValue`.
+    /// Transmutes an `OperandValue` to another `OperandValue`.
    ///
-    /// Returns `None` for cases that can't work in that framework, such as for
+    /// This is supported only for cases where [`Self::rvalue_creates_operand`]
-    /// `Immediate`->`Ref` that needs an `alloc` to get the location.
+    /// returns `true`, and will ICE otherwise. (In particular, anything that
    /// would need to `alloca` in order to return a `PlaceValue` will ICE,
    /// expecting those to go via [`Self::codegen_transmute`] instead where
    /// the destination place is already allocated.)
    pub(crate) fn codegen_transmute_operand(
        &mut self,
        bx: &mut Bx,
        operand: OperandRef<'tcx, Bx::Value>,
        cast: TyAndLayout<'tcx>,
-    ) -> Option<OperandValue<Bx::Value>> {
+    ) -> OperandValue<Bx::Value> {
        // Check for transmutes that are always UB.
        if operand.layout.size != cast.size
            || operand.layout.is_uninhabited()
@@ -244,71 +245,34 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
            // Because this transmute is UB, return something easy to generate,
            // since it's fine that later uses of the value are probably UB.
-            return Some(OperandValue::poison(bx, cast));
+            return OperandValue::poison(bx, cast);
        }
-        let operand_kind = self.value_kind(operand.layout);
+        match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
-        let cast_kind = self.value_kind(cast);
+            _ if cast.is_zst() => OperandValue::ZeroSized,
-
+            (_, _, abi::BackendRepr::Memory { .. }) => {
-        match operand.val {
+                bug!("Cannot `codegen_transmute_operand` to non-ZST memory-ABI output {cast:?}");
-            OperandValue::Ref(source_place_val) => {
+            }
            (OperandValue::Ref(source_place_val), abi::BackendRepr::Memory { .. }, _) => {
                assert_eq!(source_place_val.llextra, None);
                assert_matches!(operand_kind, OperandValueKind::Ref);
                // The existing alignment is part of `source_place_val`,
                // so that alignment will be used, not `cast`'s.
-                Some(bx.load_operand(source_place_val.with_type(cast)).val)
+                bx.load_operand(source_place_val.with_type(cast)).val
            }
            OperandValue::ZeroSized => {
                let OperandValueKind::ZeroSized = operand_kind else {
                    bug!("Found {operand_kind:?} for operand {operand:?}");
                };
                if let OperandValueKind::ZeroSized = cast_kind {
                    Some(OperandValue::ZeroSized)
                } else {
                    None
                }
            }
            OperandValue::Immediate(imm) => {
                let OperandValueKind::Immediate(from_scalar) = operand_kind else {
                    bug!("Found {operand_kind:?} for operand {operand:?}");
                };
                if let OperandValueKind::Immediate(to_scalar) = cast_kind
                    && from_scalar.size(self.cx) == to_scalar.size(self.cx)
                {
                    let from_backend_ty = bx.backend_type(operand.layout);
                    let to_backend_ty = bx.backend_type(cast);
                    Some(OperandValue::Immediate(transmute_immediate(
                        bx,
                        imm,
                        from_scalar,
                        from_backend_ty,
                        to_scalar,
                        to_backend_ty,
                    )))
                } else {
                    None
                }
            }
            OperandValue::Pair(imm_a, imm_b) => {
                let OperandValueKind::Pair(in_a, in_b) = operand_kind else {
                    bug!("Found {operand_kind:?} for operand {operand:?}");
                };
                if let OperandValueKind::Pair(out_a, out_b) = cast_kind
                    && in_a.size(self.cx) == out_a.size(self.cx)
                    && in_b.size(self.cx) == out_b.size(self.cx)
                {
                    let in_a_ibty = bx.scalar_pair_element_backend_type(operand.layout, 0, false);
                    let in_b_ibty = bx.scalar_pair_element_backend_type(operand.layout, 1, false);
                    let out_a_ibty = bx.scalar_pair_element_backend_type(cast, 0, false);
                    let out_b_ibty = bx.scalar_pair_element_backend_type(cast, 1, false);
                    Some(OperandValue::Pair(
                        transmute_immediate(bx, imm_a, in_a, in_a_ibty, out_a, out_a_ibty),
                        transmute_immediate(bx, imm_b, in_b, in_b_ibty, out_b, out_b_ibty),
                    ))
                } else {
                    None
                }
            }
            (
                OperandValue::Immediate(imm),
                abi::BackendRepr::Scalar(from_scalar),
                abi::BackendRepr::Scalar(to_scalar),
            ) => OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar)),
            (
                OperandValue::Pair(imm_a, imm_b),
                abi::BackendRepr::ScalarPair(in_a, in_b),
                abi::BackendRepr::ScalarPair(out_a, out_b),
            ) => OperandValue::Pair(
                transmute_scalar(bx, imm_a, in_a, out_a),
                transmute_scalar(bx, imm_b, in_b, out_b),
            ),
            _ => bug!("Cannot `codegen_transmute_operand` {operand:?} to {cast:?}"),
        }
    }
@@ -479,9 +443,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                    // path as the other integer-to-X casts.
                    | mir::CastKind::PointerWithExposedProvenance => {
                        let imm = operand.immediate();
-                        let operand_kind = self.value_kind(operand.layout);
+                        let abi::BackendRepr::Scalar(from_scalar) = operand.layout.backend_repr else {
-                        let OperandValueKind::Immediate(from_scalar) = operand_kind else {
+                            bug!("Found non-scalar for operand {operand:?}");
                            bug!("Found {operand_kind:?} for operand {operand:?}");
                        };
                        let from_backend_ty = bx.cx().immediate_backend_type(operand.layout);
@@ -491,9 +454,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                            let val = OperandValue::Immediate(bx.cx().const_poison(to_backend_ty));
                            return OperandRef { val, layout: cast };
                        }
-                        let cast_kind = self.value_kind(cast);
+                        let abi::BackendRepr::Scalar(to_scalar) = cast.layout.backend_repr else {
-                        let OperandValueKind::Immediate(to_scalar) = cast_kind else {
+                            bug!("Found non-scalar for cast {cast:?}");
                            bug!("Found {cast_kind:?} for operand {cast:?}");
                        };
                        self.cast_immediate(bx, imm, from_scalar, from_backend_ty, to_scalar, to_backend_ty)
@@ -503,9 +465,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                            })
                    }
                    mir::CastKind::Transmute => {
-                        self.codegen_transmute_operand(bx, operand, cast).unwrap_or_else(|| {
+                        self.codegen_transmute_operand(bx, operand, cast)
                            bug!("Unsupported transmute-as-operand of {operand:?} to {cast:?}");
                        })
                    }
                };
                OperandRef { val, layout: cast }
@@ -1011,37 +971,46 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
        OperandValue::Pair(val, of)
    }
    /// Returns `true` if the `rvalue` can be computed into an [`OperandRef`],
    /// rather than needing a full `PlaceRef` for the assignment destination.
    ///
    /// This is used by the [`super::analyze`] code to decide which MIR locals
    /// can stay as SSA values (as opposed to generating `alloca` slots for them).
    /// As such, some paths here return `true` even where the specific rvalue
    /// will not actually take the operand path because the result type is such
    /// that it always gets an `alloca`, but where it's not worth re-checking the
    /// layout in this code when the right thing will happen anyway.
    pub(crate) fn rvalue_creates_operand(&self, rvalue: &mir::Rvalue<'tcx>, span: Span) -> bool {
        match *rvalue {
            mir::Rvalue::Cast(mir::CastKind::Transmute, ref operand, cast_ty) => {
                let operand_ty = operand.ty(self.mir, self.cx.tcx());
                let cast_layout = self.cx.layout_of(self.monomorphize(cast_ty));
                let operand_layout = self.cx.layout_of(self.monomorphize(operand_ty));
                match (operand_layout.backend_repr, cast_layout.backend_repr) {
                    // When the output will be in memory anyway, just use its place
                    // (instead of the operand path) unless it's the trivial ZST case.
                    (_, abi::BackendRepr::Memory { .. }) => cast_layout.is_zst(),
-                match (self.value_kind(operand_layout), self.value_kind(cast_layout)) {
+                    // Otherwise (for a non-memory output) if the input is memory
-                    // Can always load from a pointer as needed
+                    // then we can just read the value from the place.
-                    (OperandValueKind::Ref, _) => true,
+                    (abi::BackendRepr::Memory { .. }, _) => true,
                    // ZST-to-ZST is the easiest thing ever
                    (OperandValueKind::ZeroSized, OperandValueKind::ZeroSized) => true,
                    // But if only one of them is a ZST the sizes can't match
                    (OperandValueKind::ZeroSized, _) | (_, OperandValueKind::ZeroSized) => false,
                    // Need to generate an `alloc` to get a pointer from an immediate
                    (OperandValueKind::Immediate(..) | OperandValueKind::Pair(..), OperandValueKind::Ref) => false,
                    // When we have scalar immediates, we can only convert things
                    // where the sizes match, to avoid endianness questions.
-                    (OperandValueKind::Immediate(a), OperandValueKind::Immediate(b)) =>
+                    (abi::BackendRepr::Scalar(a), abi::BackendRepr::Scalar(b)) =>
                        a.size(self.cx) == b.size(self.cx),
-                    (OperandValueKind::Pair(a0, a1), OperandValueKind::Pair(b0, b1)) =>
+                    (abi::BackendRepr::ScalarPair(a0, a1), abi::BackendRepr::ScalarPair(b0, b1)) =>
                        a0.size(self.cx) == b0.size(self.cx) && a1.size(self.cx) == b1.size(self.cx),
-                    // Send mixings between scalars and pairs through the memory route
+                    // Mixing Scalars and ScalarPairs can get quite complicated when
-                    // FIXME: Maybe this could use insertvalue/extractvalue instead?
+                    // padding and undef get involved, so leave that to the memory path.
-                    (OperandValueKind::Immediate(..), OperandValueKind::Pair(..)) |
+                    (abi::BackendRepr::Scalar(_), abi::BackendRepr::ScalarPair(_, _)) |
-                    (OperandValueKind::Pair(..), OperandValueKind::Immediate(..)) => false,
+                    (abi::BackendRepr::ScalarPair(_, _), abi::BackendRepr::Scalar(_)) => false,
                    // SIMD vectors aren't worth the trouble of dealing with complex
                    // cases like from vectors of f32 to vectors of pointers or
                    // from fat pointers to vectors of u16. (See #143194 #110021 ...)
                    (abi::BackendRepr::SimdVector { .. }, _) | (_, abi::BackendRepr::SimdVector { .. }) => false,
                }
            }
            mir::Rvalue::Ref(..) |
@@ -1071,68 +1040,43 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
        // (*) this is only true if the type is suitable
    }
    /// Gets which variant of [`OperandValue`] is expected for a particular type.
    fn value_kind(&self, layout: TyAndLayout<'tcx>) -> OperandValueKind {
        if layout.is_zst() {
            OperandValueKind::ZeroSized
        } else if self.cx.is_backend_immediate(layout) {
            assert!(!self.cx.is_backend_scalar_pair(layout));
            OperandValueKind::Immediate(match layout.backend_repr {
                abi::BackendRepr::Scalar(s) => s,
                abi::BackendRepr::SimdVector { element, .. } => element,
                x => span_bug!(self.mir.span, "Couldn't translate {x:?} as backend immediate"),
            })
        } else if self.cx.is_backend_scalar_pair(layout) {
            let abi::BackendRepr::ScalarPair(s1, s2) = layout.backend_repr else {
                span_bug!(
                    self.mir.span,
                    "Couldn't translate {:?} as backend scalar pair",
                    layout.backend_repr,
                );
            };
            OperandValueKind::Pair(s1, s2)
        } else {
            OperandValueKind::Ref
        }
    }
 }
-/// The variants of this match [`OperandValue`], giving details about the
+/// Transmutes a single scalar value `imm` from `from_scalar` to `to_scalar`.
 /// backend values that will be held in that other type.
 #[derive(Debug, Copy, Clone)]
 enum OperandValueKind {
    Ref,
    Immediate(abi::Scalar),
    Pair(abi::Scalar, abi::Scalar),
    ZeroSized,
 }
 /// Transmutes one of the immediates from an [`OperandValue::Immediate`]
 /// or an [`OperandValue::Pair`] to an immediate of the target type.
 ///
-/// `to_backend_ty` must be the *non*-immediate backend type (so it will be
+/// This is expected to be in *immediate* form, as seen in [`OperandValue::Immediate`]
-/// `i8`, not `i1`, for `bool`-like types.)
+/// or [`OperandValue::Pair`] (so `i1` for bools, not `i8`, for example).
-pub(super) fn transmute_immediate<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
+///
 /// ICEs if the passed-in `imm` is not a value of the expected type for
 /// `from_scalar`, such as if it's a vector or a pair.
 pub(super) fn transmute_scalar<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
    bx: &mut Bx,
    mut imm: Bx::Value,
    from_scalar: abi::Scalar,
    from_backend_ty: Bx::Type,
    to_scalar: abi::Scalar,
    to_backend_ty: Bx::Type,
 ) -> Bx::Value {
    assert_eq!(from_scalar.size(bx.cx()), to_scalar.size(bx.cx()));
    let imm_ty = bx.cx().val_ty(imm);
    assert_ne!(
        bx.cx().type_kind(imm_ty),
        TypeKind::Vector,
        "Vector type {imm_ty:?} not allowed in transmute_scalar {from_scalar:?} -> {to_scalar:?}"
    );
    // While optimizations will remove no-op transmutes, they might still be
    // there in debug or things that aren't no-op in MIR because they change
    // the Rust type but not the underlying layout/niche.
-    if from_scalar == to_scalar && from_backend_ty == to_backend_ty {
+    if from_scalar == to_scalar {
        return imm;
    }
    use abi::Primitive::*;
    imm = bx.from_immediate(imm);
    let from_backend_ty = bx.cx().type_from_scalar(from_scalar);
    debug_assert_eq!(bx.cx().val_ty(imm), from_backend_ty);
    let to_backend_ty = bx.cx().type_from_scalar(to_scalar);
    // If we have a scalar, we must already know its range. Either
    //
    // 1) It's a parameter with `range` parameter metadata,
@@ -1163,6 +1107,8 @@ pub(super) fn transmute_immediate<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
        }
    };
    debug_assert_eq!(bx.cx().val_ty(imm), to_backend_ty);
    // This `assume` remains important for cases like (a conceptual)
    //    transmute::<u32, NonZeroU32>(x) == 0
    // since it's never passed to something with parameter metadata (especially
--- a/tests/codegen/intrinsics/transmute-x64.rs
+++ b/tests/codegen/intrinsics/transmute-x64.rs
@@ -9,17 +9,20 @@ use std::mem::transmute;
 // CHECK-LABEL: @check_sse_pair_to_avx(
 #[no_mangle]
 pub unsafe fn check_sse_pair_to_avx(x: (__m128i, __m128i)) -> __m256i {
    // CHECK: start:
    // CHECK-NOT: alloca
-    // CHECK: %0 = load <4 x i64>, ptr %x, align 16
+    // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 32 %_0, ptr align 16 %x, i64 32, i1 false)
-    // CHECK: store <4 x i64> %0, ptr %_0, align 32
+    // CHECK-NEXT: ret void
    transmute(x)
 }
 // CHECK-LABEL: @check_sse_pair_from_avx(
 #[no_mangle]
 pub unsafe fn check_sse_pair_from_avx(x: __m256i) -> (__m128i, __m128i) {
    // CHECK: start:
    // CHECK-NOT: alloca
-    // CHECK: %0 = load <4 x i64>, ptr %x, align 32
+    // CHECK-NEXT: %[[TEMP:.+]] = load <4 x i64>, ptr %x, align 32
-    // CHECK: store <4 x i64> %0, ptr %_0, align 16
+    // CHECK-NEXT: store <4 x i64> %[[TEMP]], ptr %_0, align 16
    // CHECK-NEXT: ret void
    transmute(x)
 }
--- a/tests/codegen/intrinsics/transmute.rs
+++ b/tests/codegen/intrinsics/transmute.rs
@@ -29,28 +29,28 @@ pub struct Aggregate8(u8);
 // CHECK-LABEL: @check_bigger_size(
 #[no_mangle]
 pub unsafe fn check_bigger_size(x: u16) -> u32 {
-    // CHECK: call void @llvm.trap
+    // CHECK: call void @llvm.assume(i1 false)
    transmute_unchecked(x)
 }
 // CHECK-LABEL: @check_smaller_size(
 #[no_mangle]
 pub unsafe fn check_smaller_size(x: u32) -> u16 {
-    // CHECK: call void @llvm.trap
+    // CHECK: call void @llvm.assume(i1 false)
    transmute_unchecked(x)
 }
 // CHECK-LABEL: @check_smaller_array(
 #[no_mangle]
 pub unsafe fn check_smaller_array(x: [u32; 7]) -> [u32; 3] {
-    // CHECK: call void @llvm.trap
+    // CHECK: call void @llvm.assume(i1 false)
    transmute_unchecked(x)
 }
 // CHECK-LABEL: @check_bigger_array(
 #[no_mangle]
 pub unsafe fn check_bigger_array(x: [u32; 3]) -> [u32; 7] {
-    // CHECK: call void @llvm.trap
+    // CHECK: call void @llvm.assume(i1 false)
    transmute_unchecked(x)
 }
@@ -73,9 +73,9 @@ pub unsafe fn check_to_empty_array(x: [u32; 5]) -> [u32; 0] {
 #[no_mangle]
 #[custom_mir(dialect = "runtime", phase = "optimized")]
 pub unsafe fn check_from_empty_array(x: [u32; 0]) -> [u32; 5] {
-    // CHECK-NOT: trap
+    // CHECK-NOT: call
-    // CHECK: call void @llvm.trap
+    // CHECK: call void @llvm.assume(i1 false)
-    // CHECK-NOT: trap
+    // CHECK-NOT: call
    mir! {
        {
            RET = CastTransmute(x);
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs
@@ -40,8 +40,7 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> {
 // CHECK-LABEL: @build_array_transmute_s
 #[no_mangle]
 pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
-    // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
+    // CHECK: call void @llvm.memcpy.{{.+}}({{.*}} align [[VECTOR_ALIGN]] {{.*}} align [[ARRAY_ALIGN]] {{.*}}, [[USIZE]] 16, i1 false)
    // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
    unsafe { std::mem::transmute(x) }
 }
@@ -55,7 +54,6 @@ pub fn build_array_t(x: [f32; 4]) -> T {
 // CHECK-LABEL: @build_array_transmute_t
 #[no_mangle]
 pub fn build_array_transmute_t(x: [f32; 4]) -> T {
-    // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
+    // CHECK: call void @llvm.memcpy.{{.+}}({{.*}} align [[VECTOR_ALIGN]] {{.*}} align [[ARRAY_ALIGN]] {{.*}}, [[USIZE]] 16, i1 false)
    // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
    unsafe { std::mem::transmute(x) }
 }
--- a/tests/codegen/transmute-scalar.rs
+++ b/tests/codegen/transmute-scalar.rs
@@ -111,8 +111,11 @@ pub fn fake_bool_unsigned_to_bool(b: FakeBoolUnsigned) -> bool {
 struct S([i64; 1]);
 // CHECK-LABEL: define{{.*}}i64 @single_element_simd_to_scalar(<1 x i64> %b)
-// CHECK: bitcast <1 x i64> %b to i64
+// CHECK-NEXT: start:
-// CHECK: ret i64
+// CHECK-NEXT: %[[RET:.+]] = alloca [8 x i8]
 // CHECK-NEXT: store <1 x i64> %b, ptr %[[RET]]
 // CHECK-NEXT: %[[TEMP:.+]] = load i64, ptr %[[RET]]
 // CHECK-NEXT: ret i64 %[[TEMP]]
 #[no_mangle]
 #[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
@@ -124,8 +127,11 @@ pub extern "C" fn single_element_simd_to_scalar(b: S) -> i64 {
 }
 // CHECK-LABEL: define{{.*}}<1 x i64> @scalar_to_single_element_simd(i64 %b)
-// CHECK: bitcast i64 %b to <1 x i64>
+// CHECK-NEXT: start:
-// CHECK: ret <1 x i64>
+// CHECK-NEXT: %[[RET:.+]] = alloca [8 x i8]
 // CHECK-NEXT: store i64 %b, ptr %[[RET]]
 // CHECK-NEXT: %[[TEMP:.+]] = load <1 x i64>, ptr %[[RET]]
 // CHECK-NEXT: ret <1 x i64> %[[TEMP]]
 #[no_mangle]
 #[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
--- a/tests/codegen/vec-in-place.rs
+++ b/tests/codegen/vec-in-place.rs
@@ -41,9 +41,6 @@ pub fn vec_iterator_cast_primitive(vec: Vec<i8>) -> Vec<u8> {
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    vec.into_iter().map(|e| e as u8).collect()
 }
@@ -55,9 +52,6 @@ pub fn vec_iterator_cast_wrapper(vec: Vec<u8>) -> Vec<Wrapper<u8>> {
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    vec.into_iter().map(|e| Wrapper(e)).collect()
 }
@@ -86,9 +80,6 @@ pub fn vec_iterator_cast_unwrap(vec: Vec<Wrapper<u8>>) -> Vec<u8> {
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    vec.into_iter().map(|e| e.0).collect()
 }
@@ -100,9 +91,6 @@ pub fn vec_iterator_cast_aggregate(vec: Vec<[u64; 4]>) -> Vec<Foo> {
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    vec.into_iter().map(|e| unsafe { std::mem::transmute(e) }).collect()
 }
@@ -114,9 +102,6 @@ pub fn vec_iterator_cast_deaggregate_tra(vec: Vec<Bar>) -> Vec<[u64; 4]> {
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // Safety: For the purpose of this test we assume that Bar layout matches [u64; 4].
    // This currently is not guaranteed for repr(Rust) types, but it happens to work here and
@@ -133,9 +118,6 @@ pub fn vec_iterator_cast_deaggregate_fold(vec: Vec<Baz>) -> Vec<[u64; 4]> {
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // CHECK: call{{.+}}void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: loop
    // CHECK-NOT: call
    // Safety: For the purpose of this test we assume that Bar layout matches [u64; 4].
    // This currently is not guaranteed for repr(Rust) types, but it happens to work here and
@@ -156,12 +138,7 @@ pub fn vec_iterator_cast_unwrap_drop(vec: Vec<Wrapper<String>>) -> Vec<String> {
    // CHECK-NOT: call
    // CHECK-NOT: %{{.*}} = mul
    // CHECK-NOT: %{{.*}} = udiv
-    // CHECK: call
+    // CHECK: ret void
    // CHECK-SAME: void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
    // CHECK-NOT: call
    // CHECK-NOT: %{{.*}} = mul
    // CHECK-NOT: %{{.*}} = udiv
    vec.into_iter().map(|Wrapper(e)| e).collect()
 }
@@ -178,12 +155,6 @@ pub fn vec_iterator_cast_wrap_drop(vec: Vec<String>) -> Vec<Wrapper<String>> {
    // CHECK-NOT: call
    // CHECK-NOT: %{{.*}} = mul
    // CHECK-NOT: %{{.*}} = udiv
    // CHECK: call
    // CHECK-SAME: void @llvm.assume(i1 %{{.+}})
    // CHECK-NOT: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
    // CHECK-NOT: call
    // CHECK-NOT: %{{.*}} = mul
    // CHECK-NOT: %{{.*}} = udiv
    // CHECK: ret void
    vec.into_iter().map(Wrapper).collect()