|
|
|
|
@@ -10,9 +10,10 @@ use crate::ops;
|
|
|
|
|
impl [u8] {
|
|
|
|
|
/// Checks if all bytes in this slice are within the ASCII range.
|
|
|
|
|
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
|
|
|
|
|
#[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
|
|
|
|
|
#[must_use]
|
|
|
|
|
#[inline]
|
|
|
|
|
pub fn is_ascii(&self) -> bool {
|
|
|
|
|
pub const fn is_ascii(&self) -> bool {
|
|
|
|
|
is_ascii(self)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -21,7 +22,7 @@ impl [u8] {
|
|
|
|
|
#[unstable(feature = "ascii_char", issue = "110998")]
|
|
|
|
|
#[must_use]
|
|
|
|
|
#[inline]
|
|
|
|
|
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
|
|
|
|
|
pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
|
|
|
|
|
if self.is_ascii() {
|
|
|
|
|
// SAFETY: Just checked that it's ASCII
|
|
|
|
|
Some(unsafe { self.as_ascii_unchecked() })
|
|
|
|
|
@@ -262,7 +263,7 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
|
|
|
|
|
/// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
|
|
|
|
|
/// from `../str/mod.rs`, which does something similar for utf8 validation.
|
|
|
|
|
#[inline]
|
|
|
|
|
fn contains_nonascii(v: usize) -> bool {
|
|
|
|
|
const fn contains_nonascii(v: usize) -> bool {
|
|
|
|
|
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
|
|
|
|
|
(NONASCII_MASK & v) != 0
|
|
|
|
|
}
|
|
|
|
|
@@ -280,7 +281,7 @@ fn contains_nonascii(v: usize) -> bool {
|
|
|
|
|
/// If any of these loads produces something for which `contains_nonascii`
|
|
|
|
|
/// (above) returns true, then we know the answer is false.
|
|
|
|
|
#[inline]
|
|
|
|
|
fn is_ascii(s: &[u8]) -> bool {
|
|
|
|
|
const fn is_ascii(s: &[u8]) -> bool {
|
|
|
|
|
const USIZE_SIZE: usize = mem::size_of::<usize>();
|
|
|
|
|
|
|
|
|
|
let len = s.len();
|
|
|
|
|
@@ -292,7 +293,16 @@ fn is_ascii(s: &[u8]) -> bool {
|
|
|
|
|
// We also do this for architectures where `size_of::<usize>()` isn't
|
|
|
|
|
// sufficient alignment for `usize`, because it's a weird edge case.
|
|
|
|
|
if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem::align_of::<usize>() {
|
|
|
|
|
return s.iter().all(|b| b.is_ascii());
|
|
|
|
|
// FIXME: once iterators and closures can be used in `const fn`,
|
|
|
|
|
// return s.iter().all(|b| b.is_ascii());
|
|
|
|
|
let mut i = 0;
|
|
|
|
|
while i < len {
|
|
|
|
|
if !s[i].is_ascii() {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We always read the first word unaligned, which means `align_offset` is
|
|
|
|
|
@@ -321,18 +331,26 @@ fn is_ascii(s: &[u8]) -> bool {
|
|
|
|
|
// Paranoia check about alignment, since we're about to do a bunch of
|
|
|
|
|
// unaligned loads. In practice this should be impossible barring a bug in
|
|
|
|
|
// `align_offset` though.
|
|
|
|
|
debug_assert_eq!(word_ptr.addr() % mem::align_of::<usize>(), 0);
|
|
|
|
|
// While this method is allowed to spuriously fail in CTFE, if it doesn't
|
|
|
|
|
// have alignment information it should have given a `usize::MAX` for
|
|
|
|
|
// `align_offset` earlier, sending things through the scalar path instead of
|
|
|
|
|
// this one, so this check should pass if it's reachable.
|
|
|
|
|
debug_assert!(word_ptr.is_aligned_to(mem::align_of::<usize>()));
|
|
|
|
|
|
|
|
|
|
// Read subsequent words until the last aligned word, excluding the last
|
|
|
|
|
// aligned word by itself to be done in tail check later, to ensure that
|
|
|
|
|
// tail is always one `usize` at most to extra branch `byte_pos == len`.
|
|
|
|
|
while byte_pos < len - USIZE_SIZE {
|
|
|
|
|
debug_assert!(
|
|
|
|
|
// Sanity check that the read is in bounds
|
|
|
|
|
(word_ptr.addr() + USIZE_SIZE) <= start.addr().wrapping_add(len) &&
|
|
|
|
|
// And that our assumptions about `byte_pos` hold.
|
|
|
|
|
(word_ptr.addr() - start.addr()) == byte_pos
|
|
|
|
|
);
|
|
|
|
|
// Sanity check that the read is in bounds
|
|
|
|
|
debug_assert!(byte_pos + USIZE_SIZE <= len);
|
|
|
|
|
// And that our assumptions about `byte_pos` hold.
|
|
|
|
|
debug_assert!(matches!(
|
|
|
|
|
word_ptr.cast::<u8>().guaranteed_eq(start.wrapping_add(byte_pos)),
|
|
|
|
|
// These are from the same allocation, so will hopefully always be
|
|
|
|
|
// known to match even in CTFE, but if it refuses to compare them
|
|
|
|
|
// that's ok since it's just a debug check anyway.
|
|
|
|
|
None | Some(true),
|
|
|
|
|
));
|
|
|
|
|
|
|
|
|
|
// SAFETY: We know `word_ptr` is properly aligned (because of
|
|
|
|
|
// `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
|
|
|
|
|
|