add MAX_LEN_UTF8 and MAX_LEN_UTF16 constants

This commit is contained in:
HTGAzureX1212
2024-02-02 20:49:20 +08:00
parent a5db378dc1
commit eec49bbf59
14 changed files with 49 additions and 18 deletions

View File

@@ -105,6 +105,7 @@
#![feature(box_uninit_write)] #![feature(box_uninit_write)]
#![feature(bstr)] #![feature(bstr)]
#![feature(bstr_internals)] #![feature(bstr_internals)]
#![feature(char_max_len)]
#![feature(clone_to_uninit)] #![feature(clone_to_uninit)]
#![feature(coerce_unsized)] #![feature(coerce_unsized)]
#![feature(const_eval_select)] #![feature(const_eval_select)]

View File

@@ -1419,7 +1419,9 @@ impl String {
pub fn push(&mut self, ch: char) { pub fn push(&mut self, ch: char) {
match ch.len_utf8() { match ch.len_utf8() {
1 => self.vec.push(ch as u8), 1 => self.vec.push(ch as u8),
_ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), _ => {
self.vec.extend_from_slice(ch.encode_utf8(&mut [0; char::MAX_LEN_UTF8]).as_bytes())
}
} }
} }
@@ -1716,7 +1718,7 @@ impl String {
#[rustc_confusables("set")] #[rustc_confusables("set")]
pub fn insert(&mut self, idx: usize, ch: char) { pub fn insert(&mut self, idx: usize, ch: char) {
assert!(self.is_char_boundary(idx)); assert!(self.is_char_boundary(idx));
let mut bits = [0; 4]; let mut bits = [0; char::MAX_LEN_UTF8];
let bits = ch.encode_utf8(&mut bits).as_bytes(); let bits = ch.encode_utf8(&mut bits).as_bytes();
unsafe { unsafe {
@@ -2771,7 +2773,7 @@ impl SpecToString for core::ascii::Char {
impl SpecToString for char { impl SpecToString for char {
#[inline] #[inline]
fn spec_to_string(&self) -> String { fn spec_to_string(&self) -> String {
String::from(self.encode_utf8(&mut [0; 4])) String::from(self.encode_utf8(&mut [0; char::MAX_LEN_UTF8]))
} }
} }

View File

@@ -3,6 +3,7 @@
#![feature(iter_array_chunks)] #![feature(iter_array_chunks)]
#![feature(assert_matches)] #![feature(assert_matches)]
#![feature(btree_extract_if)] #![feature(btree_extract_if)]
#![feature(char_max_len)]
#![feature(cow_is_borrowed)] #![feature(cow_is_borrowed)]
#![feature(core_intrinsics)] #![feature(core_intrinsics)]
#![feature(downcast_unchecked)] #![feature(downcast_unchecked)]

View File

@@ -2,6 +2,7 @@
use std::assert_matches::assert_matches; use std::assert_matches::assert_matches;
use std::borrow::Cow; use std::borrow::Cow;
use std::char::MAX_LEN_UTF8;
use std::cmp::Ordering::{Equal, Greater, Less}; use std::cmp::Ordering::{Equal, Greater, Less};
use std::str::{from_utf8, from_utf8_unchecked}; use std::str::{from_utf8, from_utf8_unchecked};
@@ -1231,7 +1232,7 @@ fn test_to_uppercase_rev_iterator() {
#[test] #[test]
#[cfg_attr(miri, ignore)] // Miri is too slow #[cfg_attr(miri, ignore)] // Miri is too slow
fn test_chars_decoding() { fn test_chars_decoding() {
let mut bytes = [0; 4]; let mut bytes = [0; MAX_LEN_UTF8];
for c in (0..0x110000).filter_map(std::char::from_u32) { for c in (0..0x110000).filter_map(std::char::from_u32) {
let s = c.encode_utf8(&mut bytes); let s = c.encode_utf8(&mut bytes);
if Some(c) != s.chars().next() { if Some(c) != s.chars().next() {
@@ -1243,7 +1244,7 @@ fn test_chars_decoding() {
#[test] #[test]
#[cfg_attr(miri, ignore)] // Miri is too slow #[cfg_attr(miri, ignore)] // Miri is too slow
fn test_chars_rev_decoding() { fn test_chars_rev_decoding() {
let mut bytes = [0; 4]; let mut bytes = [0; MAX_LEN_UTF8];
for c in (0..0x110000).filter_map(std::char::from_u32) { for c in (0..0x110000).filter_map(std::char::from_u32) {
let s = c.encode_utf8(&mut bytes); let s = c.encode_utf8(&mut bytes);
if Some(c) != s.chars().rev().next() { if Some(c) != s.chars().rev().next() {

View File

@@ -71,6 +71,16 @@ impl char {
#[stable(feature = "assoc_char_consts", since = "1.52.0")] #[stable(feature = "assoc_char_consts", since = "1.52.0")]
pub const MAX: char = '\u{10FFFF}'; pub const MAX: char = '\u{10FFFF}';
/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
/// UTF-8 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF8: usize = 4;
/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
/// to UTF-16 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF16: usize = 2;
/// `U+FFFD REPLACEMENT CHARACTER` (<28>) is used in Unicode to represent a /// `U+FFFD REPLACEMENT CHARACTER` (<28>) is used in Unicode to represent a
/// decoding error. /// decoding error.
/// ///

View File

@@ -95,6 +95,16 @@ const MAX_THREE_B: u32 = 0x10000;
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
pub const MAX: char = char::MAX; pub const MAX: char = char::MAX;
/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
/// UTF-8 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;
/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
/// to UTF-16 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;
/// `U+FFFD REPLACEMENT CHARACTER` (<28>) is used in Unicode to represent a /// `U+FFFD REPLACEMENT CHARACTER` (<28>) is used in Unicode to represent a
/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead. /// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
#[stable(feature = "decode_utf16", since = "1.9.0")] #[stable(feature = "decode_utf16", since = "1.9.0")]

View File

@@ -3,7 +3,7 @@
#![stable(feature = "rust1", since = "1.0.0")] #![stable(feature = "rust1", since = "1.0.0")]
use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell}; use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell};
use crate::char::EscapeDebugExtArgs; use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8};
use crate::marker::PhantomData; use crate::marker::PhantomData;
use crate::num::fmt as numfmt; use crate::num::fmt as numfmt;
use crate::ops::Deref; use crate::ops::Deref;
@@ -187,7 +187,7 @@ pub trait Write {
/// ``` /// ```
#[stable(feature = "fmt_write_char", since = "1.1.0")] #[stable(feature = "fmt_write_char", since = "1.1.0")]
fn write_char(&mut self, c: char) -> Result { fn write_char(&mut self, c: char) -> Result {
self.write_str(c.encode_utf8(&mut [0; 4])) self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8]))
} }
/// Glue for usage of the [`write!`] macro with implementors of this trait. /// Glue for usage of the [`write!`] macro with implementors of this trait.
@@ -2768,7 +2768,7 @@ impl Display for char {
if f.options.width.is_none() && f.options.precision.is_none() { if f.options.width.is_none() && f.options.precision.is_none() {
f.write_char(*self) f.write_char(*self)
} else { } else {
f.pad(self.encode_utf8(&mut [0; 4])) f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))
} }
} }
} }

View File

@@ -38,6 +38,7 @@
issue = "27721" issue = "27721"
)] )]
use crate::char::MAX_LEN_UTF8;
use crate::cmp::Ordering; use crate::cmp::Ordering;
use crate::convert::TryInto as _; use crate::convert::TryInto as _;
use crate::slice::memchr; use crate::slice::memchr;
@@ -561,8 +562,8 @@ impl Pattern for char {
type Searcher<'a> = CharSearcher<'a>; type Searcher<'a> = CharSearcher<'a>;
#[inline] #[inline]
fn into_searcher(self, haystack: &str) -> Self::Searcher<'_> { fn into_searcher<'a>(self, haystack: &'a str) -> Self::Searcher<'a> {
let mut utf8_encoded = [0; 4]; let mut utf8_encoded = [0; MAX_LEN_UTF8];
let utf8_size = self let utf8_size = self
.encode_utf8(&mut utf8_encoded) .encode_utf8(&mut utf8_encoded)
.len() .len()

View File

@@ -1,3 +1,4 @@
use std::char::MAX_LEN_UTF8;
use std::str::FromStr; use std::str::FromStr;
use std::{char, str}; use std::{char, str};
@@ -259,7 +260,7 @@ fn test_escape_unicode() {
#[test] #[test]
fn test_encode_utf8() { fn test_encode_utf8() {
fn check(input: char, expect: &[u8]) { fn check(input: char, expect: &[u8]) {
let mut buf = [0; 4]; let mut buf = [0; MAX_LEN_UTF8];
let ptr = buf.as_ptr(); let ptr = buf.as_ptr();
let s = input.encode_utf8(&mut buf); let s = input.encode_utf8(&mut buf);
assert_eq!(s.as_ptr() as usize, ptr as usize); assert_eq!(s.as_ptr() as usize, ptr as usize);

View File

@@ -13,6 +13,7 @@
#![feature(bigint_helper_methods)] #![feature(bigint_helper_methods)]
#![feature(bstr)] #![feature(bstr)]
#![feature(cell_update)] #![feature(cell_update)]
#![feature(char_max_len)]
#![feature(clone_to_uninit)] #![feature(clone_to_uninit)]
#![feature(const_eval_select)] #![feature(const_eval_select)]
#![feature(const_swap_nonoverlapping)] #![feature(const_swap_nonoverlapping)]

View File

@@ -1,5 +1,6 @@
use rand::RngCore; use rand::RngCore;
use crate::char::MAX_LEN_UTF8;
use crate::fs::{self, File, FileTimes, OpenOptions}; use crate::fs::{self, File, FileTimes, OpenOptions};
use crate::io::prelude::*; use crate::io::prelude::*;
use crate::io::{BorrowedBuf, ErrorKind, SeekFrom}; use crate::io::{BorrowedBuf, ErrorKind, SeekFrom};
@@ -155,7 +156,7 @@ fn file_test_io_non_positional_read() {
#[test] #[test]
fn file_test_io_seek_and_tell_smoke_test() { fn file_test_io_seek_and_tell_smoke_test() {
let message = "ten-four"; let message = "ten-four";
let mut read_mem = [0; 4]; let mut read_mem = [0; MAX_LEN_UTF8];
let set_cursor = 4 as u64; let set_cursor = 4 as u64;
let tell_pos_pre_read; let tell_pos_pre_read;
let tell_pos_post_read; let tell_pos_post_read;
@@ -356,7 +357,7 @@ fn file_test_io_seek_shakedown() {
let chunk_one: &str = "qwer"; let chunk_one: &str = "qwer";
let chunk_two: &str = "asdf"; let chunk_two: &str = "asdf";
let chunk_three: &str = "zxcv"; let chunk_three: &str = "zxcv";
let mut read_mem = [0; 4]; let mut read_mem = [0; MAX_LEN_UTF8];
let tmpdir = tmpdir(); let tmpdir = tmpdir();
let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt"); let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt");
{ {
@@ -621,7 +622,7 @@ fn file_test_directoryinfo_readdir() {
check!(w.write(msg)); check!(w.write(msg));
} }
let files = check!(fs::read_dir(dir)); let files = check!(fs::read_dir(dir));
let mut mem = [0; 4]; let mut mem = [0; MAX_LEN_UTF8];
for f in files { for f in files {
let f = f.unwrap().path(); let f = f.unwrap().path();
{ {

View File

@@ -281,6 +281,7 @@
#![feature(cfg_sanitizer_cfi)] #![feature(cfg_sanitizer_cfi)]
#![feature(cfg_target_thread_local)] #![feature(cfg_target_thread_local)]
#![feature(cfi_encoding)] #![feature(cfi_encoding)]
#![feature(char_max_len)]
#![feature(concat_idents)] #![feature(concat_idents)]
#![feature(decl_macro)] #![feature(decl_macro)]
#![feature(deprecated_suggestion)] #![feature(deprecated_suggestion)]

View File

@@ -1,5 +1,6 @@
#![unstable(issue = "none", feature = "windows_stdio")] #![unstable(issue = "none", feature = "windows_stdio")]
use core::char::MAX_LEN_UTF8;
use core::str::utf8_char_width; use core::str::utf8_char_width;
use super::api::{self, WinError}; use super::api::{self, WinError};
@@ -426,7 +427,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
impl IncompleteUtf8 { impl IncompleteUtf8 {
pub const fn new() -> IncompleteUtf8 { pub const fn new() -> IncompleteUtf8 {
IncompleteUtf8 { bytes: [0; 4], len: 0 } IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 }
} }
} }

View File

@@ -18,7 +18,7 @@
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
use core::char::{encode_utf8_raw, encode_utf16_raw}; use core::char::{MAX_LEN_UTF8, MAX_LEN_UTF16, encode_utf8_raw, encode_utf16_raw};
use core::clone::CloneToUninit; use core::clone::CloneToUninit;
use core::str::next_code_point; use core::str::next_code_point;
@@ -240,7 +240,7 @@ impl Wtf8Buf {
/// Copied from String::push /// Copied from String::push
/// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
fn push_code_point_unchecked(&mut self, code_point: CodePoint) { fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
let mut bytes = [0; 4]; let mut bytes = [0; MAX_LEN_UTF8];
let bytes = encode_utf8_raw(code_point.value, &mut bytes); let bytes = encode_utf8_raw(code_point.value, &mut bytes);
self.bytes.extend_from_slice(bytes) self.bytes.extend_from_slice(bytes)
} }
@@ -1001,7 +1001,7 @@ impl<'a> Iterator for EncodeWide<'a> {
return Some(tmp); return Some(tmp);
} }
let mut buf = [0; 2]; let mut buf = [0; MAX_LEN_UTF16];
self.code_points.next().map(|code_point| { self.code_points.next().map(|code_point| {
let n = encode_utf16_raw(code_point.value, &mut buf).len(); let n = encode_utf16_raw(code_point.value, &mut buf).len();
if n == 2 { if n == 2 {