Auto merge of #140999 - hkBst:update-escaper, r=nnethercote

update to literal-escaper 0.0.4 for better API without `unreachable` and faster string parsing

This is the replacement for just the part of https://github.com/rust-lang/rust/pull/138163 dealing with the changed API of unescape functionality, since that got moved into its own crate.

<del>This uses an unpublished version of literal-escaper (https://github.com/rust-lang/literal-escaper/pull/8).</del>

r? `@nnethercote`
This commit is contained in:
bors
2025-06-25 01:03:30 +00:00
16 changed files with 67 additions and 94 deletions

View File

@@ -1,5 +1,3 @@
use std::ops::Range;
use diagnostics::make_unclosed_delims_error;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
@@ -10,7 +8,7 @@ use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
use rustc_lexer::{
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
};
use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
use rustc_session::lint::BuiltinLintDiag;
use rustc_session::lint::builtin::{
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
@@ -702,7 +700,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
}
err.emit()
}
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
}
rustc_lexer::LiteralKind::Byte { terminated } => {
if !terminated {
@@ -714,7 +712,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0763)
.emit()
}
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
}
rustc_lexer::LiteralKind::Str { terminated } => {
if !terminated {
@@ -726,7 +724,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0765)
.emit()
}
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
}
rustc_lexer::LiteralKind::ByteStr { terminated } => {
if !terminated {
@@ -738,7 +736,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0766)
.emit()
}
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1)
// b" "
}
rustc_lexer::LiteralKind::CStr { terminated } => {
if !terminated {
@@ -750,13 +749,14 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_code(E0767)
.emit()
}
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
}
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
let kind = token::StrRaw(n_hashes);
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n)
// r##" "##
} else {
self.report_raw_str_error(start, 1);
}
@@ -765,7 +765,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
let kind = token::ByteStrRaw(n_hashes);
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n)
// br##" "##
} else {
self.report_raw_str_error(start, 2);
}
@@ -774,7 +775,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
let kind = token::CStrRaw(n_hashes);
self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n)
// cr##" "##
} else {
self.report_raw_str_error(start, 2);
}
@@ -1091,7 +1093,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
}
fn cook_common(
fn cook_quoted(
&self,
mut kind: token::LitKind,
mode: Mode,
@@ -1099,32 +1101,28 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
end: BytePos,
prefix_len: u32,
postfix_len: u32,
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
) -> (token::LitKind, Symbol) {
let content_start = start + BytePos(prefix_len);
let content_end = end - BytePos(postfix_len);
let lit_content = self.str_from_to(content_start, content_end);
unescape(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result {
let span_with_quotes = self.mk_sp(start, end);
let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start);
let span = self.mk_sp(lo, hi);
let is_fatal = err.is_fatal();
if let Some(guar) = emit_unescape_error(
self.dcx(),
lit_content,
span_with_quotes,
span,
mode,
range,
err,
) {
assert!(is_fatal);
kind = token::Err(guar);
}
check_for_errors(lit_content, mode, |range, err| {
let span_with_quotes = self.mk_sp(start, end);
let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start);
let span = self.mk_sp(lo, hi);
let is_fatal = err.is_fatal();
if let Some(guar) = emit_unescape_error(
self.dcx(),
lit_content,
span_with_quotes,
span,
mode,
range,
err,
) {
assert!(is_fatal);
kind = token::Err(guar);
}
});
@@ -1137,34 +1135,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
};
(kind, sym)
}
fn cook_unicode(
&self,
kind: token::LitKind,
mode: Mode,
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape_unicode(src, mode, &mut |span, result| callback(span, result.map(drop)))
})
}
fn cook_mixed(
&self,
kind: token::LitKind,
mode: Mode,
start: BytePos,
end: BytePos,
prefix_len: u32,
postfix_len: u32,
) -> (token::LitKind, Symbol) {
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
unescape_mixed(src, mode, &mut |span, result| callback(span, result.map(drop)))
})
}
}
pub fn nfc_normalize(string: &str) -> Symbol {