Rollup merge of #144239 - xizheyin:clean-lexer, r=fee1-dead

Clean `rustc/parse/src/lexer` to improve maintainability

This PR refactors the lexer code to improve maintainability and eliminate code duplication.
In the first commit, I improve the error handling:
- rename `make_unclosed_delims_error` to more appropriate `make_mismatched_closing_delims_errors`
- changes return type from Option<Diag> to `Vec<Diag>` to avoid lengthy vec processing at `lex_token_trees`
- use `splice` instead of `extend` to make the logic clearer, since `errs` sounds more generic and better suited as a return value

In the second commit, I replace the magic number 5 with UNCLOSED_DELIMITER_SHOW_LIMIT constant.

In the third commit, I moves `eof_err` function below parsing logic for better code flow.

In the forth one, I extract `calculate_spacing` function to eliminate duplicate spacing logic between `bump` and `bump_minimal` functions.

r? compiler
This commit is contained in:
Matthias Krüger
2025-07-23 15:59:30 +02:00
committed by GitHub
3 changed files with 83 additions and 85 deletions

View File

@@ -126,23 +126,29 @@ pub(super) fn report_suspicious_mismatch_block(
}
}
pub(crate) fn make_unclosed_delims_error(
unmatched: UnmatchedDelim,
psess: &ParseSess,
) -> Option<Diag<'_>> {
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
// `unmatched_delims` only for error recovery in the `Parser`.
let found_delim = unmatched.found_delim?;
let mut spans = vec![unmatched.found_span];
if let Some(sp) = unmatched.unclosed_span {
spans.push(sp);
};
let err = psess.dcx().create_err(MismatchedClosingDelimiter {
spans,
delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind()).to_string(),
unmatched: unmatched.found_span,
opening_candidate: unmatched.candidate_span,
unclosed: unmatched.unclosed_span,
});
Some(err)
pub(crate) fn make_errors_for_mismatched_closing_delims<'psess>(
unmatcheds: &[UnmatchedDelim],
psess: &'psess ParseSess,
) -> Vec<Diag<'psess>> {
unmatcheds
.iter()
.filter_map(|unmatched| {
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
// `unmatched_delims` only for error recovery in the `Parser`.
let found_delim = unmatched.found_delim?;
let mut spans = vec![unmatched.found_span];
if let Some(sp) = unmatched.unclosed_span {
spans.push(sp);
};
let err = psess.dcx().create_err(MismatchedClosingDelimiter {
spans,
delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind())
.to_string(),
unmatched: unmatched.found_span,
opening_candidate: unmatched.candidate_span,
unclosed: unmatched.unclosed_span,
});
Some(err)
})
.collect()
}

View File

@@ -1,4 +1,4 @@
use diagnostics::make_unclosed_delims_error;
use diagnostics::make_errors_for_mismatched_closing_delims;
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
use rustc_ast::tokenstream::TokenStream;
@@ -71,27 +71,23 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
};
let res = lexer.lex_token_trees(/* is_delimited */ false);
let mut unmatched_delims: Vec<_> = lexer
.diag_info
.unmatched_delims
.into_iter()
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
.collect();
let mut unmatched_closing_delims: Vec<_> =
make_errors_for_mismatched_closing_delims(&lexer.diag_info.unmatched_delims, psess);
match res {
Ok((_open_spacing, stream)) => {
if unmatched_delims.is_empty() {
if unmatched_closing_delims.is_empty() {
Ok(stream)
} else {
// Return error if there are unmatched delimiters or unclosed delimiters.
Err(unmatched_delims)
Err(unmatched_closing_delims)
}
}
Err(errs) => {
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
// because the delimiter mismatch is more likely to be the root cause of error
unmatched_delims.extend(errs);
Err(unmatched_delims)
unmatched_closing_delims.extend(errs);
Err(unmatched_closing_delims)
}
}
}

View File

@@ -51,45 +51,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
}
}
fn eof_err(&mut self) -> Diag<'psess> {
let msg = "this file contains an unclosed delimiter";
let mut err = self.dcx().struct_span_err(self.token.span, msg);
let unclosed_delimiter_show_limit = 5;
let len = usize::min(unclosed_delimiter_show_limit, self.diag_info.open_delimiters.len());
for &(_, span) in &self.diag_info.open_delimiters[..len] {
err.span_label(span, "unclosed delimiter");
self.diag_info.unmatched_delims.push(UnmatchedDelim {
found_delim: None,
found_span: self.token.span,
unclosed_span: Some(span),
candidate_span: None,
});
}
if let Some((_, span)) = self.diag_info.open_delimiters.get(unclosed_delimiter_show_limit)
&& self.diag_info.open_delimiters.len() >= unclosed_delimiter_show_limit + 2
{
err.span_label(
*span,
format!(
"another {} unclosed delimiters begin from here",
self.diag_info.open_delimiters.len() - unclosed_delimiter_show_limit
),
);
}
if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
report_suspicious_mismatch_block(
&mut err,
&self.diag_info,
self.psess.source_map(),
*delim,
)
}
err
}
fn lex_token_tree_open_delim(
&mut self,
open_delim: Delimiter,
@@ -206,13 +167,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
} else if let Some(glued) = self.token.glue(&next_tok) {
self.token = glued;
} else {
let this_spacing = if next_tok.is_punct() {
Spacing::Joint
} else if next_tok == token::Eof {
Spacing::Alone
} else {
Spacing::JointHidden
};
let this_spacing = self.calculate_spacing(&next_tok);
break (this_spacing, next_tok);
}
};
@@ -223,23 +178,64 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
// Cut-down version of `bump` used when the token kind is known in advance.
fn bump_minimal(&mut self) -> Spacing {
let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
let this_spacing = if is_next_tok_preceded_by_whitespace {
Spacing::Alone
} else {
if next_tok.is_punct() {
Spacing::Joint
} else if next_tok == token::Eof {
Spacing::Alone
} else {
Spacing::JointHidden
}
self.calculate_spacing(&next_tok)
};
self.token = next_tok;
this_spacing
}
fn calculate_spacing(&self, next_tok: &Token) -> Spacing {
if next_tok.is_punct() {
Spacing::Joint
} else if *next_tok == token::Eof {
Spacing::Alone
} else {
Spacing::JointHidden
}
}
fn eof_err(&mut self) -> Diag<'psess> {
const UNCLOSED_DELIMITER_SHOW_LIMIT: usize = 5;
let msg = "this file contains an unclosed delimiter";
let mut err = self.dcx().struct_span_err(self.token.span, msg);
let len = usize::min(UNCLOSED_DELIMITER_SHOW_LIMIT, self.diag_info.open_delimiters.len());
for &(_, span) in &self.diag_info.open_delimiters[..len] {
err.span_label(span, "unclosed delimiter");
self.diag_info.unmatched_delims.push(UnmatchedDelim {
found_delim: None,
found_span: self.token.span,
unclosed_span: Some(span),
candidate_span: None,
});
}
if let Some((_, span)) = self.diag_info.open_delimiters.get(UNCLOSED_DELIMITER_SHOW_LIMIT)
&& self.diag_info.open_delimiters.len() >= UNCLOSED_DELIMITER_SHOW_LIMIT + 2
{
err.span_label(
*span,
format!(
"another {} unclosed delimiters begin from here",
self.diag_info.open_delimiters.len() - UNCLOSED_DELIMITER_SHOW_LIMIT
),
);
}
if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
report_suspicious_mismatch_block(
&mut err,
&self.diag_info,
self.psess.source_map(),
*delim,
)
}
err
}
fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
// An unexpected closing delimiter (i.e., there is no matching opening delimiter).
let token_str = token_to_string(&self.token);