Actually translate CRLF in raw byte strings and unify unescape impl
This commit is contained in:
@@ -1348,7 +1348,7 @@ impl<'a> StringReader<'a> {
|
||||
|
||||
fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
|
||||
self.with_str_from_to(content_start, content_end, |lit: &str| {
|
||||
unescape::unescape_raw_str(lit, unescape::Mode::Str, &mut |range, c| {
|
||||
unescape::unescape_raw_str(lit, &mut |range, c| {
|
||||
if let Err(err) = c {
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
@@ -1365,7 +1365,7 @@ impl<'a> StringReader<'a> {
|
||||
|
||||
fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
|
||||
self.with_str_from_to(content_start, content_end, |lit: &str| {
|
||||
unescape::unescape_raw_str(lit, unescape::Mode::ByteStr, &mut |range, c| {
|
||||
unescape::unescape_raw_byte_str(lit, &mut |range, c| {
|
||||
if let Err(err) = c {
|
||||
emit_unescape_error(
|
||||
&self.sess.span_diagnostic,
|
||||
|
||||
@@ -4,8 +4,9 @@ use crate::ast::{self, Lit, LitKind};
|
||||
use crate::parse::parser::Parser;
|
||||
use crate::parse::PResult;
|
||||
use crate::parse::token::{self, Token, TokenKind};
|
||||
use crate::parse::unescape::{self, unescape_str, unescape_byte_str, unescape_raw_str};
|
||||
use crate::parse::unescape::{unescape_char, unescape_byte};
|
||||
use crate::parse::unescape::{unescape_str, unescape_byte_str};
|
||||
use crate::parse::unescape::{unescape_raw_str, unescape_raw_byte_str};
|
||||
use crate::print::pprust;
|
||||
use crate::symbol::{kw, sym, Symbol};
|
||||
use crate::tokenstream::{TokenStream, TokenTree};
|
||||
@@ -144,7 +145,7 @@ impl LitKind {
|
||||
let symbol = if s.contains('\r') {
|
||||
let mut buf = String::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_raw_str(&s, unescape::Mode::Str, &mut |_, unescaped_char| {
|
||||
unescape_raw_str(&s, &mut |_, unescaped_char| {
|
||||
match unescaped_char {
|
||||
Ok(c) => buf.push(c),
|
||||
Err(_) => error = Err(LitError::LexerError),
|
||||
@@ -172,7 +173,26 @@ impl LitKind {
|
||||
buf.shrink_to_fit();
|
||||
LitKind::ByteStr(Lrc::new(buf))
|
||||
}
|
||||
token::ByteStrRaw(_) => LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())),
|
||||
token::ByteStrRaw(_) => {
|
||||
let s = symbol.as_str();
|
||||
let bytes = if s.contains('\r') {
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_raw_byte_str(&s, &mut |_, unescaped_byte| {
|
||||
match unescaped_byte {
|
||||
Ok(c) => buf.push(c),
|
||||
Err(_) => error = Err(LitError::LexerError),
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.shrink_to_fit();
|
||||
buf
|
||||
} else {
|
||||
symbol.to_string().into_bytes()
|
||||
};
|
||||
|
||||
LitKind::ByteStr(Lrc::new(bytes))
|
||||
},
|
||||
token::Err => LitKind::Err(symbol),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -71,29 +71,24 @@ where
|
||||
/// sequence of characters or errors.
|
||||
/// NOTE: Raw strings do not perform any explicit character escaping, here we
|
||||
/// only translate CRLF to LF and produce errors on bare CR.
|
||||
pub(crate) fn unescape_raw_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
|
||||
pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
{
|
||||
let mut byte_offset: usize = 0;
|
||||
unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback)
|
||||
}
|
||||
|
||||
let mut chars = literal_text.chars().peekable();
|
||||
while let Some(curr) = chars.next() {
|
||||
let (result, scanned) = match (curr, chars.peek()) {
|
||||
('\r', Some('\n')) => {
|
||||
chars.next();
|
||||
(Ok('\n'), [Some('\r'), Some('\n')])
|
||||
},
|
||||
('\r', _) =>
|
||||
(Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
|
||||
(c, _) if mode.is_bytes() && c > '\x7F' =>
|
||||
(Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
|
||||
(c, _) => (Ok(c), [Some(c), None]),
|
||||
};
|
||||
let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
|
||||
callback(byte_offset..(byte_offset + len_utf8), result);
|
||||
byte_offset += len_utf8;
|
||||
}
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
/// sequence of characters or errors.
|
||||
/// NOTE: Raw strings do not perform any explicit character escaping, here we
|
||||
/// only translate CRLF to LF and produce errors on bare CR.
|
||||
pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<u8, EscapeError>),
|
||||
{
|
||||
unescape_raw_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
|
||||
callback(range, char.map(byte_from_char))
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -284,9 +279,38 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
/// sequence of characters or errors.
|
||||
/// NOTE: Raw strings do not perform any explicit character escaping, here we
|
||||
/// only translate CRLF to LF and produce errors on bare CR.
|
||||
fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
{
|
||||
let mut byte_offset: usize = 0;
|
||||
|
||||
let mut chars = literal_text.chars().peekable();
|
||||
while let Some(curr) = chars.next() {
|
||||
let (result, scanned) = match (curr, chars.peek()) {
|
||||
('\r', Some('\n')) => {
|
||||
chars.next();
|
||||
(Ok('\n'), [Some('\r'), Some('\n')])
|
||||
},
|
||||
('\r', _) =>
|
||||
(Err(EscapeError::BareCarriageReturn), [Some('\r'), None]),
|
||||
(c, _) if mode.is_bytes() && !c.is_ascii() =>
|
||||
(Err(EscapeError::NonAsciiCharInByteString), [Some(c), None]),
|
||||
(c, _) => (Ok(c), [Some(c), None]),
|
||||
};
|
||||
let len_utf8: usize = scanned.iter().filter_map(|&x| x).map(char::len_utf8).sum();
|
||||
callback(byte_offset..(byte_offset + len_utf8), result);
|
||||
byte_offset += len_utf8;
|
||||
}
|
||||
}
|
||||
|
||||
fn byte_from_char(c: char) -> u8 {
|
||||
let res = c as u32;
|
||||
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
|
||||
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte(Str)");
|
||||
res as u8
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user