Auto merge of #60793 - Xanewok:raw-string-cleanup, r=petrochenkov
lexer: Disallow bare CR in raw byte strings Handles bare CR ~but doesn't translate `\r\n` to `\n` yet in raw strings yet~ and translates CRLF to LF in raw strings. As a side-note I think it'd be good to change the `unescape_` to return plain iterators to reduce some boilerplate (e.g. `has_error` could benefit from collecting `Result<T>` and aborting early on errors) but will do that separately, unless I missed something here that prevents it. @matklad @petrochenkov thoughts?
This commit is contained in:
@@ -130,7 +130,7 @@ impl<'a> StringReader<'a> {
|
|||||||
self.ch.is_none()
|
self.ch.is_none()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fail_unterminated_raw_string(&self, pos: BytePos, hash_count: u16) {
|
fn fail_unterminated_raw_string(&self, pos: BytePos, hash_count: u16) -> ! {
|
||||||
let mut err = self.struct_span_fatal(pos, pos, "unterminated raw string");
|
let mut err = self.struct_span_fatal(pos, pos, "unterminated raw string");
|
||||||
err.span_label(self.mk_sp(pos, pos), "unterminated raw string");
|
err.span_label(self.mk_sp(pos, pos), "unterminated raw string");
|
||||||
|
|
||||||
@@ -292,15 +292,6 @@ impl<'a> StringReader<'a> {
|
|||||||
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
|
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
|
|
||||||
/// escaped character to the error message
|
|
||||||
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
|
|
||||||
let mut m = m.to_string();
|
|
||||||
m.push_str(": ");
|
|
||||||
push_escaped_char(&mut m, c);
|
|
||||||
self.err_span_(from_pos, to_pos, &m[..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Advance peek_token to refer to the next token, and
|
/// Advance peek_token to refer to the next token, and
|
||||||
/// possibly update the interner.
|
/// possibly update the interner.
|
||||||
fn advance_token(&mut self) -> Result<(), ()> {
|
fn advance_token(&mut self) -> Result<(), ()> {
|
||||||
@@ -1070,7 +1061,13 @@ impl<'a> StringReader<'a> {
|
|||||||
self.validate_byte_str_escape(start_with_quote);
|
self.validate_byte_str_escape(start_with_quote);
|
||||||
(token::ByteStr, symbol)
|
(token::ByteStr, symbol)
|
||||||
},
|
},
|
||||||
Some('r') => self.scan_raw_byte_string(),
|
Some('r') => {
|
||||||
|
let (start, end, hash_count) = self.scan_raw_string();
|
||||||
|
let symbol = self.name_from_to(start, end);
|
||||||
|
self.validate_raw_byte_str_escape(start, end);
|
||||||
|
|
||||||
|
(token::ByteStrRaw(hash_count), symbol)
|
||||||
|
}
|
||||||
_ => unreachable!(), // Should have been a token::Ident above.
|
_ => unreachable!(), // Should have been a token::Ident above.
|
||||||
};
|
};
|
||||||
let suffix = self.scan_optional_raw_name();
|
let suffix = self.scan_optional_raw_name();
|
||||||
@@ -1086,79 +1083,9 @@ impl<'a> StringReader<'a> {
|
|||||||
Ok(TokenKind::lit(token::Str, symbol, suffix))
|
Ok(TokenKind::lit(token::Str, symbol, suffix))
|
||||||
}
|
}
|
||||||
'r' => {
|
'r' => {
|
||||||
let start_bpos = self.pos;
|
let (start, end, hash_count) = self.scan_raw_string();
|
||||||
self.bump();
|
let symbol = self.name_from_to(start, end);
|
||||||
let mut hash_count: u16 = 0;
|
self.validate_raw_str_escape(start, end);
|
||||||
while self.ch_is('#') {
|
|
||||||
if hash_count == 65535 {
|
|
||||||
let bpos = self.next_pos;
|
|
||||||
self.fatal_span_(start_bpos,
|
|
||||||
bpos,
|
|
||||||
"too many `#` symbols: raw strings may be \
|
|
||||||
delimited by up to 65535 `#` symbols").raise();
|
|
||||||
}
|
|
||||||
self.bump();
|
|
||||||
hash_count += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.is_eof() {
|
|
||||||
self.fail_unterminated_raw_string(start_bpos, hash_count);
|
|
||||||
} else if !self.ch_is('"') {
|
|
||||||
let last_bpos = self.pos;
|
|
||||||
let curr_char = self.ch.unwrap();
|
|
||||||
self.fatal_span_char(start_bpos,
|
|
||||||
last_bpos,
|
|
||||||
"found invalid character; only `#` is allowed \
|
|
||||||
in raw string delimitation",
|
|
||||||
curr_char).raise();
|
|
||||||
}
|
|
||||||
self.bump();
|
|
||||||
let content_start_bpos = self.pos;
|
|
||||||
let mut content_end_bpos;
|
|
||||||
let mut valid = true;
|
|
||||||
'outer: loop {
|
|
||||||
if self.is_eof() {
|
|
||||||
self.fail_unterminated_raw_string(start_bpos, hash_count);
|
|
||||||
}
|
|
||||||
// if self.ch_is('"') {
|
|
||||||
// content_end_bpos = self.pos;
|
|
||||||
// for _ in 0..hash_count {
|
|
||||||
// self.bump();
|
|
||||||
// if !self.ch_is('#') {
|
|
||||||
// continue 'outer;
|
|
||||||
let c = self.ch.unwrap();
|
|
||||||
match c {
|
|
||||||
'"' => {
|
|
||||||
content_end_bpos = self.pos;
|
|
||||||
for _ in 0..hash_count {
|
|
||||||
self.bump();
|
|
||||||
if !self.ch_is('#') {
|
|
||||||
continue 'outer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
'\r' => {
|
|
||||||
if !self.nextch_is('\n') {
|
|
||||||
let last_bpos = self.pos;
|
|
||||||
self.err_span_(start_bpos,
|
|
||||||
last_bpos,
|
|
||||||
"bare CR not allowed in raw string, use \\r \
|
|
||||||
instead");
|
|
||||||
valid = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
self.bump();
|
|
||||||
}
|
|
||||||
|
|
||||||
self.bump();
|
|
||||||
let symbol = if valid {
|
|
||||||
self.name_from_to(content_start_bpos, content_end_bpos)
|
|
||||||
} else {
|
|
||||||
Symbol::intern("??")
|
|
||||||
};
|
|
||||||
let suffix = self.scan_optional_raw_name();
|
let suffix = self.scan_optional_raw_name();
|
||||||
|
|
||||||
Ok(TokenKind::lit(token::StrRaw(hash_count), symbol, suffix))
|
Ok(TokenKind::lit(token::StrRaw(hash_count), symbol, suffix))
|
||||||
@@ -1315,16 +1242,18 @@ impl<'a> StringReader<'a> {
|
|||||||
id
|
id
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) {
|
/// Scans a raw (byte) string, returning byte position range for `"<literal>"`
|
||||||
|
/// (including quotes) along with `#` character count in `(b)r##..."<literal>"##...`;
|
||||||
|
fn scan_raw_string(&mut self) -> (BytePos, BytePos, u16) {
|
||||||
let start_bpos = self.pos;
|
let start_bpos = self.pos;
|
||||||
self.bump();
|
self.bump();
|
||||||
let mut hash_count = 0;
|
let mut hash_count: u16 = 0;
|
||||||
while self.ch_is('#') {
|
while self.ch_is('#') {
|
||||||
if hash_count == 65535 {
|
if hash_count == 65535 {
|
||||||
let bpos = self.next_pos;
|
let bpos = self.next_pos;
|
||||||
self.fatal_span_(start_bpos,
|
self.fatal_span_(start_bpos,
|
||||||
bpos,
|
bpos,
|
||||||
"too many `#` symbols: raw byte strings may be \
|
"too many `#` symbols: raw strings may be \
|
||||||
delimited by up to 65535 `#` symbols").raise();
|
delimited by up to 65535 `#` symbols").raise();
|
||||||
}
|
}
|
||||||
self.bump();
|
self.bump();
|
||||||
@@ -1334,13 +1263,13 @@ impl<'a> StringReader<'a> {
|
|||||||
if self.is_eof() {
|
if self.is_eof() {
|
||||||
self.fail_unterminated_raw_string(start_bpos, hash_count);
|
self.fail_unterminated_raw_string(start_bpos, hash_count);
|
||||||
} else if !self.ch_is('"') {
|
} else if !self.ch_is('"') {
|
||||||
let pos = self.pos;
|
let last_bpos = self.pos;
|
||||||
let ch = self.ch.unwrap();
|
let curr_char = self.ch.unwrap();
|
||||||
self.fatal_span_char(start_bpos,
|
self.fatal_span_char(start_bpos,
|
||||||
pos,
|
last_bpos,
|
||||||
"found invalid character; only `#` is allowed in raw \
|
"found invalid character; only `#` is allowed \
|
||||||
string delimitation",
|
in raw string delimitation",
|
||||||
ch).raise();
|
curr_char).raise();
|
||||||
}
|
}
|
||||||
self.bump();
|
self.bump();
|
||||||
let content_start_bpos = self.pos;
|
let content_start_bpos = self.pos;
|
||||||
@@ -1360,19 +1289,14 @@ impl<'a> StringReader<'a> {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Some(c) => {
|
_ => (),
|
||||||
if c > '\x7F' {
|
|
||||||
let pos = self.pos;
|
|
||||||
self.err_span_char(pos, pos, "raw byte string must be ASCII", c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
self.bump();
|
self.bump();
|
||||||
}
|
}
|
||||||
|
|
||||||
self.bump();
|
self.bump();
|
||||||
|
|
||||||
(token::ByteStrRaw(hash_count), self.name_from_to(content_start_bpos, content_end_bpos))
|
(content_start_bpos, content_end_bpos, hash_count)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn validate_char_escape(&self, start_with_quote: BytePos) {
|
fn validate_char_escape(&self, start_with_quote: BytePos) {
|
||||||
@@ -1422,6 +1346,40 @@ impl<'a> StringReader<'a> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
|
||||||
|
self.with_str_from_to(content_start, content_end, |lit: &str| {
|
||||||
|
unescape::unescape_raw_str(lit, &mut |range, c| {
|
||||||
|
if let Err(err) = c {
|
||||||
|
emit_unescape_error(
|
||||||
|
&self.sess.span_diagnostic,
|
||||||
|
lit,
|
||||||
|
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
|
||||||
|
unescape::Mode::Str,
|
||||||
|
range,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
|
||||||
|
self.with_str_from_to(content_start, content_end, |lit: &str| {
|
||||||
|
unescape::unescape_raw_byte_str(lit, &mut |range, c| {
|
||||||
|
if let Err(err) = c {
|
||||||
|
emit_unescape_error(
|
||||||
|
&self.sess.span_diagnostic,
|
||||||
|
lit,
|
||||||
|
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
|
||||||
|
unescape::Mode::ByteStr,
|
||||||
|
range,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
|
fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
|
||||||
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
|
||||||
unescape::unescape_byte_str(lit, &mut |range, c| {
|
unescape::unescape_byte_str(lit, &mut |range, c| {
|
||||||
|
|||||||
@@ -4,7 +4,9 @@ use crate::ast::{self, Lit, LitKind};
|
|||||||
use crate::parse::parser::Parser;
|
use crate::parse::parser::Parser;
|
||||||
use crate::parse::PResult;
|
use crate::parse::PResult;
|
||||||
use crate::parse::token::{self, Token, TokenKind};
|
use crate::parse::token::{self, Token, TokenKind};
|
||||||
use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte};
|
use crate::parse::unescape::{unescape_char, unescape_byte};
|
||||||
|
use crate::parse::unescape::{unescape_str, unescape_byte_str};
|
||||||
|
use crate::parse::unescape::{unescape_raw_str, unescape_raw_byte_str};
|
||||||
use crate::print::pprust;
|
use crate::print::pprust;
|
||||||
use crate::symbol::{kw, sym, Symbol};
|
use crate::symbol::{kw, sym, Symbol};
|
||||||
use crate::tokenstream::{TokenStream, TokenTree};
|
use crate::tokenstream::{TokenStream, TokenTree};
|
||||||
@@ -141,7 +143,17 @@ impl LitKind {
|
|||||||
// Ditto.
|
// Ditto.
|
||||||
let s = symbol.as_str();
|
let s = symbol.as_str();
|
||||||
let symbol = if s.contains('\r') {
|
let symbol = if s.contains('\r') {
|
||||||
Symbol::intern(&raw_str_lit(&s))
|
let mut buf = String::with_capacity(s.len());
|
||||||
|
let mut error = Ok(());
|
||||||
|
unescape_raw_str(&s, &mut |_, unescaped_char| {
|
||||||
|
match unescaped_char {
|
||||||
|
Ok(c) => buf.push(c),
|
||||||
|
Err(_) => error = Err(LitError::LexerError),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
error?;
|
||||||
|
buf.shrink_to_fit();
|
||||||
|
Symbol::intern(&buf)
|
||||||
} else {
|
} else {
|
||||||
symbol
|
symbol
|
||||||
};
|
};
|
||||||
@@ -161,7 +173,26 @@ impl LitKind {
|
|||||||
buf.shrink_to_fit();
|
buf.shrink_to_fit();
|
||||||
LitKind::ByteStr(Lrc::new(buf))
|
LitKind::ByteStr(Lrc::new(buf))
|
||||||
}
|
}
|
||||||
token::ByteStrRaw(_) => LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())),
|
token::ByteStrRaw(_) => {
|
||||||
|
let s = symbol.as_str();
|
||||||
|
let bytes = if s.contains('\r') {
|
||||||
|
let mut buf = Vec::with_capacity(s.len());
|
||||||
|
let mut error = Ok(());
|
||||||
|
unescape_raw_byte_str(&s, &mut |_, unescaped_byte| {
|
||||||
|
match unescaped_byte {
|
||||||
|
Ok(c) => buf.push(c),
|
||||||
|
Err(_) => error = Err(LitError::LexerError),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
error?;
|
||||||
|
buf.shrink_to_fit();
|
||||||
|
buf
|
||||||
|
} else {
|
||||||
|
symbol.to_string().into_bytes()
|
||||||
|
};
|
||||||
|
|
||||||
|
LitKind::ByteStr(Lrc::new(bytes))
|
||||||
|
},
|
||||||
token::Err => LitKind::Err(symbol),
|
token::Err => LitKind::Err(symbol),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -353,29 +384,6 @@ crate fn expect_no_suffix(diag: &Handler, sp: Span, kind: &str, suffix: Option<S
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a string representing a raw string literal into its final form. The
|
|
||||||
/// only operation this does is convert embedded CRLF into a single LF.
|
|
||||||
fn raw_str_lit(lit: &str) -> String {
|
|
||||||
debug!("raw_str_lit: {:?}", lit);
|
|
||||||
let mut res = String::with_capacity(lit.len());
|
|
||||||
|
|
||||||
let mut chars = lit.chars().peekable();
|
|
||||||
while let Some(c) = chars.next() {
|
|
||||||
if c == '\r' {
|
|
||||||
if *chars.peek().unwrap() != '\n' {
|
|
||||||
panic!("lexer accepted bare CR");
|
|
||||||
}
|
|
||||||
chars.next();
|
|
||||||
res.push('\n');
|
|
||||||
} else {
|
|
||||||
res.push(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res.shrink_to_fit();
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
||||||
// Checks if `s` looks like i32 or u1234 etc.
|
// Checks if `s` looks like i32 or u1234 etc.
|
||||||
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
|
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
|
||||||
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
|
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
//! Utilities for validating string and char literals and turning them into
|
//! Utilities for validating string and char literals and turning them into
|
||||||
//! values they represent.
|
//! values they represent.
|
||||||
|
|
||||||
use std::str::Chars;
|
use std::str::Chars;
|
||||||
@@ -12,6 +12,7 @@ pub(crate) enum EscapeError {
|
|||||||
LoneSlash,
|
LoneSlash,
|
||||||
InvalidEscape,
|
InvalidEscape,
|
||||||
BareCarriageReturn,
|
BareCarriageReturn,
|
||||||
|
BareCarriageReturnInRawString,
|
||||||
EscapeOnlyChar,
|
EscapeOnlyChar,
|
||||||
|
|
||||||
TooShortHexEscape,
|
TooShortHexEscape,
|
||||||
@@ -29,6 +30,7 @@ pub(crate) enum EscapeError {
|
|||||||
|
|
||||||
UnicodeEscapeInByte,
|
UnicodeEscapeInByte,
|
||||||
NonAsciiCharInByte,
|
NonAsciiCharInByte,
|
||||||
|
NonAsciiCharInByteString,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Takes a contents of a char literal (without quotes), and returns an
|
/// Takes a contents of a char literal (without quotes), and returns an
|
||||||
@@ -66,6 +68,30 @@ where
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Takes a contents of a string literal (without quotes) and produces a
|
||||||
|
/// sequence of characters or errors.
|
||||||
|
/// NOTE: Raw strings do not perform any explicit character escaping, here we
|
||||||
|
/// only translate CRLF to LF and produce errors on bare CR.
|
||||||
|
pub(crate) fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
|
||||||
|
where
|
||||||
|
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||||
|
{
|
||||||
|
unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Takes a contents of a string literal (without quotes) and produces a
|
||||||
|
/// sequence of characters or errors.
|
||||||
|
/// NOTE: Raw strings do not perform any explicit character escaping, here we
|
||||||
|
/// only translate CRLF to LF and produce errors on bare CR.
|
||||||
|
pub(crate) fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
|
||||||
|
where
|
||||||
|
F: FnMut(Range<usize>, Result<u8, EscapeError>),
|
||||||
|
{
|
||||||
|
unescape_raw_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
|
||||||
|
callback(range, char.map(byte_from_char))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub(crate) enum Mode {
|
pub(crate) enum Mode {
|
||||||
Char,
|
Char,
|
||||||
@@ -254,9 +280,40 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Takes a contents of a string literal (without quotes) and produces a
|
||||||
|
/// sequence of characters or errors.
|
||||||
|
/// NOTE: Raw strings do not perform any explicit character escaping, here we
|
||||||
|
/// only translate CRLF to LF and produce errors on bare CR.
|
||||||
|
fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
|
||||||
|
where
|
||||||
|
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||||
|
{
|
||||||
|
assert!(mode.in_double_quotes());
|
||||||
|
let initial_len = literal_text.len();
|
||||||
|
|
||||||
|
let mut chars = literal_text.chars();
|
||||||
|
while let Some(curr) = chars.next() {
|
||||||
|
let start = initial_len - chars.as_str().len() - curr.len_utf8();
|
||||||
|
|
||||||
|
let result = match (curr, chars.clone().next()) {
|
||||||
|
('\r', Some('\n')) => {
|
||||||
|
chars.next();
|
||||||
|
Ok('\n')
|
||||||
|
},
|
||||||
|
('\r', _) => Err(EscapeError::BareCarriageReturnInRawString),
|
||||||
|
(c, _) if mode.is_bytes() && !c.is_ascii() =>
|
||||||
|
Err(EscapeError::NonAsciiCharInByteString),
|
||||||
|
(c, _) => Ok(c),
|
||||||
|
};
|
||||||
|
let end = initial_len - chars.as_str().len();
|
||||||
|
|
||||||
|
callback(start..end, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn byte_from_char(c: char) -> u8 {
|
fn byte_from_char(c: char) -> u8 {
|
||||||
let res = c as u32;
|
let res = c as u32;
|
||||||
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
|
assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte(Str)");
|
||||||
res as u8
|
res as u8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -80,6 +80,11 @@ pub(crate) fn emit_unescape_error(
|
|||||||
};
|
};
|
||||||
handler.span_err(span, msg);
|
handler.span_err(span, msg);
|
||||||
}
|
}
|
||||||
|
EscapeError::BareCarriageReturnInRawString => {
|
||||||
|
assert!(mode.in_double_quotes());
|
||||||
|
let msg = "bare CR not allowed in raw string";
|
||||||
|
handler.span_err(span, msg);
|
||||||
|
}
|
||||||
EscapeError::InvalidEscape => {
|
EscapeError::InvalidEscape => {
|
||||||
let (c, span) = last_char();
|
let (c, span) = last_char();
|
||||||
|
|
||||||
@@ -124,6 +129,11 @@ pub(crate) fn emit_unescape_error(
|
|||||||
handler.span_err(span, "byte constant must be ASCII. \
|
handler.span_err(span, "byte constant must be ASCII. \
|
||||||
Use a \\xHH escape for a non-ASCII byte")
|
Use a \\xHH escape for a non-ASCII byte")
|
||||||
}
|
}
|
||||||
|
EscapeError::NonAsciiCharInByteString => {
|
||||||
|
assert!(mode.is_bytes());
|
||||||
|
let (_c, span) = last_char();
|
||||||
|
handler.span_err(span, "raw byte string must be ASCII")
|
||||||
|
}
|
||||||
EscapeError::OutOfRangeHexEscape => {
|
EscapeError::OutOfRangeHexEscape => {
|
||||||
handler.span_err(span, "this form of character escape may only be used \
|
handler.span_err(span, "this form of character escape may only be used \
|
||||||
with characters in the range [\\x00-\\x7f]")
|
with characters in the range [\\x00-\\x7f]")
|
||||||
|
|||||||
@@ -30,6 +30,9 @@ literal";
|
|||||||
let s = r"string
|
let s = r"string
|
||||||
literal";
|
literal";
|
||||||
assert_eq!(s, "string\nliteral");
|
assert_eq!(s, "string\nliteral");
|
||||||
|
let s = br"byte string
|
||||||
|
literal";
|
||||||
|
assert_eq!(s, "byte string\nliteral".as_bytes());
|
||||||
|
|
||||||
// validate that our source file has CRLF endings
|
// validate that our source file has CRLF endings
|
||||||
let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
|
let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
|
||||||
|
|||||||
@@ -28,11 +28,11 @@ error: bare CR not allowed in string, use \r instead
|
|||||||
|
|
||||||
error: bare CR not allowed in string, use \r instead
|
error: bare CR not allowed in string, use \r instead
|
||||||
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:21:18
|
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:21:18
|
||||||
|
|
|
|
||||||
LL | let _s = "foo
|
LL | let _s = "foo
|
||||||
bar";
|
bar";
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
error: bare CR not allowed in raw string
|
error: bare CR not allowed in raw string
|
||||||
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:24:19
|
--> $DIR/lex-bare-cr-string-literal-doc-comment.rs:24:19
|
||||||
|
|
|
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
|
// ignore-tidy-cr
|
||||||
|
// compile-flags: -Z continue-parse-after-error
|
||||||
pub fn main() {
|
pub fn main() {
|
||||||
|
br"a
|
||||||
"; //~ ERROR bare CR not allowed in raw string
|
"; //~ ERROR bare CR not allowed in raw string
|
||||||
br"é"; //~ ERROR raw byte string must be ASCII
|
br"é"; //~ ERROR raw byte string must be ASCII
|
||||||
br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation
|
br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation
|
||||||
|
|||||||
@@ -1,14 +1,20 @@
|
|||||||
error: raw byte string must be ASCII: \u{e9}
|
error: bare CR not allowed in raw string
|
||||||
--> $DIR/raw-byte-string-literals.rs:2:8
|
--> $DIR/raw-byte-string-literals.rs:4:9
|
||||||
|
|
|
||||||
|
LL | br"a
|
||||||
|
";
|
||||||
|
| ^
|
||||||
|
|
||||||
|
error: raw byte string must be ASCII
|
||||||
--> $DIR/raw-byte-string-literals.rs:5:8
|
--> $DIR/raw-byte-string-literals.rs:5:8
|
||||||
|
|
|
|
||||||
LL | br"é";
|
LL | br"é";
|
||||||
| ^
|
| ^
|
||||||
|
|
||||||
--> $DIR/raw-byte-string-literals.rs:3:6
|
error: found invalid character; only `#` is allowed in raw string delimitation: ~
|
||||||
--> $DIR/raw-byte-string-literals.rs:6:6
|
--> $DIR/raw-byte-string-literals.rs:6:6
|
||||||
|
|
|
|
||||||
LL | br##~"a"~##;
|
LL | br##~"a"~##;
|
||||||
| ^^^
|
| ^^^
|
||||||
error: aborting due to 2 previous errors
|
|
||||||
error: aborting due to 3 previous errors
|
error: aborting due to 3 previous errors
|
||||||
|
|||||||
Reference in New Issue
Block a user