Rollup merge of #144239 - xizheyin:clean-lexer, r=fee1-dead
Clean `rustc/parse/src/lexer` to improve maintainability This PR refactors the lexer code to improve maintainability and eliminate code duplication. In the first commit, I improve the error handling: - rename `make_unclosed_delims_error` to more appropriate `make_mismatched_closing_delims_errors` - changes return type from Option<Diag> to `Vec<Diag>` to avoid lengthy vec processing at `lex_token_trees` - use `splice` instead of `extend` to make the logic clearer, since `errs` sounds more generic and better suited as a return value In the second commit, I replace the magic number 5 with UNCLOSED_DELIMITER_SHOW_LIMIT constant. In the third commit, I moves `eof_err` function below parsing logic for better code flow. In the forth one, I extract `calculate_spacing` function to eliminate duplicate spacing logic between `bump` and `bump_minimal` functions. r? compiler
This commit is contained in:
@@ -126,23 +126,29 @@ pub(super) fn report_suspicious_mismatch_block(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn make_unclosed_delims_error(
|
||||
unmatched: UnmatchedDelim,
|
||||
psess: &ParseSess,
|
||||
) -> Option<Diag<'_>> {
|
||||
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
|
||||
// `unmatched_delims` only for error recovery in the `Parser`.
|
||||
let found_delim = unmatched.found_delim?;
|
||||
let mut spans = vec![unmatched.found_span];
|
||||
if let Some(sp) = unmatched.unclosed_span {
|
||||
spans.push(sp);
|
||||
};
|
||||
let err = psess.dcx().create_err(MismatchedClosingDelimiter {
|
||||
spans,
|
||||
delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind()).to_string(),
|
||||
unmatched: unmatched.found_span,
|
||||
opening_candidate: unmatched.candidate_span,
|
||||
unclosed: unmatched.unclosed_span,
|
||||
});
|
||||
Some(err)
|
||||
pub(crate) fn make_errors_for_mismatched_closing_delims<'psess>(
|
||||
unmatcheds: &[UnmatchedDelim],
|
||||
psess: &'psess ParseSess,
|
||||
) -> Vec<Diag<'psess>> {
|
||||
unmatcheds
|
||||
.iter()
|
||||
.filter_map(|unmatched| {
|
||||
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
|
||||
// `unmatched_delims` only for error recovery in the `Parser`.
|
||||
let found_delim = unmatched.found_delim?;
|
||||
let mut spans = vec![unmatched.found_span];
|
||||
if let Some(sp) = unmatched.unclosed_span {
|
||||
spans.push(sp);
|
||||
};
|
||||
let err = psess.dcx().create_err(MismatchedClosingDelimiter {
|
||||
spans,
|
||||
delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind())
|
||||
.to_string(),
|
||||
unmatched: unmatched.found_span,
|
||||
opening_candidate: unmatched.candidate_span,
|
||||
unclosed: unmatched.unclosed_span,
|
||||
});
|
||||
Some(err)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use diagnostics::make_unclosed_delims_error;
|
||||
use diagnostics::make_errors_for_mismatched_closing_delims;
|
||||
use rustc_ast::ast::{self, AttrStyle};
|
||||
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
|
||||
use rustc_ast::tokenstream::TokenStream;
|
||||
@@ -71,27 +71,23 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
|
||||
};
|
||||
let res = lexer.lex_token_trees(/* is_delimited */ false);
|
||||
|
||||
let mut unmatched_delims: Vec<_> = lexer
|
||||
.diag_info
|
||||
.unmatched_delims
|
||||
.into_iter()
|
||||
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
|
||||
.collect();
|
||||
let mut unmatched_closing_delims: Vec<_> =
|
||||
make_errors_for_mismatched_closing_delims(&lexer.diag_info.unmatched_delims, psess);
|
||||
|
||||
match res {
|
||||
Ok((_open_spacing, stream)) => {
|
||||
if unmatched_delims.is_empty() {
|
||||
if unmatched_closing_delims.is_empty() {
|
||||
Ok(stream)
|
||||
} else {
|
||||
// Return error if there are unmatched delimiters or unclosed delimiters.
|
||||
Err(unmatched_delims)
|
||||
Err(unmatched_closing_delims)
|
||||
}
|
||||
}
|
||||
Err(errs) => {
|
||||
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
|
||||
// because the delimiter mismatch is more likely to be the root cause of error
|
||||
unmatched_delims.extend(errs);
|
||||
Err(unmatched_delims)
|
||||
unmatched_closing_delims.extend(errs);
|
||||
Err(unmatched_closing_delims)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,45 +51,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
|
||||
}
|
||||
}
|
||||
|
||||
fn eof_err(&mut self) -> Diag<'psess> {
|
||||
let msg = "this file contains an unclosed delimiter";
|
||||
let mut err = self.dcx().struct_span_err(self.token.span, msg);
|
||||
|
||||
let unclosed_delimiter_show_limit = 5;
|
||||
let len = usize::min(unclosed_delimiter_show_limit, self.diag_info.open_delimiters.len());
|
||||
for &(_, span) in &self.diag_info.open_delimiters[..len] {
|
||||
err.span_label(span, "unclosed delimiter");
|
||||
self.diag_info.unmatched_delims.push(UnmatchedDelim {
|
||||
found_delim: None,
|
||||
found_span: self.token.span,
|
||||
unclosed_span: Some(span),
|
||||
candidate_span: None,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some((_, span)) = self.diag_info.open_delimiters.get(unclosed_delimiter_show_limit)
|
||||
&& self.diag_info.open_delimiters.len() >= unclosed_delimiter_show_limit + 2
|
||||
{
|
||||
err.span_label(
|
||||
*span,
|
||||
format!(
|
||||
"another {} unclosed delimiters begin from here",
|
||||
self.diag_info.open_delimiters.len() - unclosed_delimiter_show_limit
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
|
||||
report_suspicious_mismatch_block(
|
||||
&mut err,
|
||||
&self.diag_info,
|
||||
self.psess.source_map(),
|
||||
*delim,
|
||||
)
|
||||
}
|
||||
err
|
||||
}
|
||||
|
||||
fn lex_token_tree_open_delim(
|
||||
&mut self,
|
||||
open_delim: Delimiter,
|
||||
@@ -206,13 +167,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
|
||||
} else if let Some(glued) = self.token.glue(&next_tok) {
|
||||
self.token = glued;
|
||||
} else {
|
||||
let this_spacing = if next_tok.is_punct() {
|
||||
Spacing::Joint
|
||||
} else if next_tok == token::Eof {
|
||||
Spacing::Alone
|
||||
} else {
|
||||
Spacing::JointHidden
|
||||
};
|
||||
let this_spacing = self.calculate_spacing(&next_tok);
|
||||
break (this_spacing, next_tok);
|
||||
}
|
||||
};
|
||||
@@ -223,23 +178,64 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
|
||||
// Cut-down version of `bump` used when the token kind is known in advance.
|
||||
fn bump_minimal(&mut self) -> Spacing {
|
||||
let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
|
||||
|
||||
let this_spacing = if is_next_tok_preceded_by_whitespace {
|
||||
Spacing::Alone
|
||||
} else {
|
||||
if next_tok.is_punct() {
|
||||
Spacing::Joint
|
||||
} else if next_tok == token::Eof {
|
||||
Spacing::Alone
|
||||
} else {
|
||||
Spacing::JointHidden
|
||||
}
|
||||
self.calculate_spacing(&next_tok)
|
||||
};
|
||||
|
||||
self.token = next_tok;
|
||||
this_spacing
|
||||
}
|
||||
|
||||
fn calculate_spacing(&self, next_tok: &Token) -> Spacing {
|
||||
if next_tok.is_punct() {
|
||||
Spacing::Joint
|
||||
} else if *next_tok == token::Eof {
|
||||
Spacing::Alone
|
||||
} else {
|
||||
Spacing::JointHidden
|
||||
}
|
||||
}
|
||||
|
||||
fn eof_err(&mut self) -> Diag<'psess> {
|
||||
const UNCLOSED_DELIMITER_SHOW_LIMIT: usize = 5;
|
||||
let msg = "this file contains an unclosed delimiter";
|
||||
let mut err = self.dcx().struct_span_err(self.token.span, msg);
|
||||
|
||||
let len = usize::min(UNCLOSED_DELIMITER_SHOW_LIMIT, self.diag_info.open_delimiters.len());
|
||||
for &(_, span) in &self.diag_info.open_delimiters[..len] {
|
||||
err.span_label(span, "unclosed delimiter");
|
||||
self.diag_info.unmatched_delims.push(UnmatchedDelim {
|
||||
found_delim: None,
|
||||
found_span: self.token.span,
|
||||
unclosed_span: Some(span),
|
||||
candidate_span: None,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some((_, span)) = self.diag_info.open_delimiters.get(UNCLOSED_DELIMITER_SHOW_LIMIT)
|
||||
&& self.diag_info.open_delimiters.len() >= UNCLOSED_DELIMITER_SHOW_LIMIT + 2
|
||||
{
|
||||
err.span_label(
|
||||
*span,
|
||||
format!(
|
||||
"another {} unclosed delimiters begin from here",
|
||||
self.diag_info.open_delimiters.len() - UNCLOSED_DELIMITER_SHOW_LIMIT
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
|
||||
report_suspicious_mismatch_block(
|
||||
&mut err,
|
||||
&self.diag_info,
|
||||
self.psess.source_map(),
|
||||
*delim,
|
||||
)
|
||||
}
|
||||
err
|
||||
}
|
||||
|
||||
fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
|
||||
// An unexpected closing delimiter (i.e., there is no matching opening delimiter).
|
||||
let token_str = token_to_string(&self.token);
|
||||
|
||||
Reference in New Issue
Block a user