Auto merge of #115515 - the8472:zip-for-arrays, r=scottmcm

optimize zipping over array iterators

Fixes #115339 (somewhat)

the new assembly:

```asm
zip_arrays:
        .cfi_startproc
        vmovups (%rdx), %ymm0
        leaq    32(%rsi), %rcx
        vxorps  %xmm1, %xmm1, %xmm1
        vmovups %xmm1, -24(%rsp)
        movq    $0, -8(%rsp)
        movq    %rsi, -88(%rsp)
        movq    %rdi, %rax
        movq    %rcx, -80(%rsp)
        vmovups %ymm0, -72(%rsp)
        movq    $0, -40(%rsp)
        movq    $32, -32(%rsp)
        movq    -24(%rsp), %rcx
        vmovups (%rsi,%rcx), %ymm0
        vorps   -72(%rsp,%rcx), %ymm0, %ymm0
        vmovups %ymm0, (%rsi,%rcx)
        vmovups (%rsi), %ymm0
        vmovups %ymm0, (%rdi)
        vzeroupper
        retq
```

This is still longer than the slice version given in the issue but at least it eliminates the terrible  `vpextrb`/`orb` chain. I guess this is due to excessive memcpys again (haven't looked at the llvmir)?

The `TrustedLen` specialization is a drive-by change since I had to do something for the default impl anyway to be able to specialize the `TrustedRandomAccessNoCoerce` impl.
This commit is contained in:
bors
2023-10-15 00:49:21 +00:00
4 changed files with 146 additions and 2 deletions

View File

@@ -4,7 +4,7 @@ use crate::num::NonZeroUsize;
use crate::{
fmt,
intrinsics::transmute_unchecked,
iter::{self, ExactSizeIterator, FusedIterator, TrustedLen},
iter::{self, ExactSizeIterator, FusedIterator, TrustedLen, TrustedRandomAccessNoCoerce},
mem::MaybeUninit,
ops::{IndexRange, Range},
ptr,
@@ -294,6 +294,12 @@ impl<T, const N: usize> Iterator for IntoIter<T, N> {
NonZeroUsize::new(remaining).map_or(Ok(()), Err)
}
#[inline]
unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
// SAFETY: The caller must provide an idx that is in bound of the remainder.
unsafe { self.data.as_ptr().add(self.alive.start()).add(idx).cast::<T>().read() }
}
}
#[stable(feature = "array_value_iter_impls", since = "1.40.0")]
@@ -375,6 +381,25 @@ impl<T, const N: usize> FusedIterator for IntoIter<T, N> {}
#[stable(feature = "array_value_iter_impls", since = "1.40.0")]
unsafe impl<T, const N: usize> TrustedLen for IntoIter<T, N> {}
#[doc(hidden)]
#[unstable(issue = "none", feature = "std_internals")]
#[rustc_unsafe_specialization_marker]
pub trait NonDrop {}
// T: Copy as approximation for !Drop since get_unchecked does not advance self.alive
// and thus we can't implement drop-handling
#[unstable(issue = "none", feature = "std_internals")]
impl<T: Copy> NonDrop for T {}
#[doc(hidden)]
#[unstable(issue = "none", feature = "std_internals")]
unsafe impl<T, const N: usize> TrustedRandomAccessNoCoerce for IntoIter<T, N>
where
T: NonDrop,
{
const MAY_HAVE_SIDE_EFFECT: bool = false;
}
#[stable(feature = "array_value_iter_impls", since = "1.40.0")]
impl<T: Clone, const N: usize> Clone for IntoIter<T, N> {
fn clone(&self) -> Self {