Speed up Parser::expected_token_types.

The parser pushes a `TokenType` to `Parser::expected_token_types` on
every call to the various `check`/`eat` methods, and clears it on every
call to `bump`. Some of those `TokenType` values are full tokens that
require cloning and dropping. This is a *lot* of work for something
that is only used in error messages and it accounts for a significant
fraction of parsing execution time.

This commit overhauls `TokenType` so that `Parser::expected_token_types`
can be implemented as a bitset. This requires changing `TokenType` to a
C-style parameterless enum, and adding `TokenTypeSet` which uses a
`u128` for the bits. (The new `TokenType` has 105 variants.)

The new types `ExpTokenPair` and `ExpKeywordPair` are now arguments to
the `check`/`eat` methods. This is for maximum speed. The elements in
the pairs are always statically known; e.g. a
`token::BinOp(token::Star)` is always paired with a `TokenType::Star`.
So we now compute `TokenType`s in advance and pass them in to
`check`/`eat` rather than the current approach of constructing them on
insertion into `expected_token_types`.

Values of these pair types can be produced by the new `exp!` macro,
which is used at every `check`/`eat` call site. The macro is for
convenience, allowing any pair to be generated from a single identifier.

The ident/keyword filtering in `expected_one_of_not_found` is no longer
necessary. It was there to account for some sloppiness in
`TokenKind`/`TokenType` comparisons.

The existing `TokenType` is moved to a new file `token_type.rs`, and all
its new infrastructure is added to that file. There is more boilerplate
code than I would like, but I can't see how to make it shorter.
This commit is contained in:
Nicholas Nethercote
2024-12-04 15:55:06 +11:00
parent d5370d981f
commit b9bf0b4b10
22 changed files with 1357 additions and 793 deletions

View File

@@ -1,8 +1,7 @@
use rustc_ast::token::{self, Delimiter};
use rustc_ast::{self as ast, Attribute, attr};
use rustc_ast::{self as ast, Attribute, attr, token};
use rustc_errors::codes::*;
use rustc_errors::{Diag, PResult};
use rustc_span::{BytePos, Span, kw};
use rustc_span::{BytePos, Span};
use thin_vec::ThinVec;
use tracing::debug;
@@ -10,7 +9,7 @@ use super::{
AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing,
UsePreAttrPos,
};
use crate::{errors, fluent_generated as fluent, maybe_whole};
use crate::{errors, exp, fluent_generated as fluent, maybe_whole};
// Public for rustfmt usage
#[derive(Debug)]
@@ -45,7 +44,7 @@ impl<'a> Parser<'a> {
let mut just_parsed_doc_comment = false;
let start_pos = self.num_bump_calls;
loop {
let attr = if self.check(&token::Pound) {
let attr = if self.check(exp!(Pound)) {
let prev_outer_attr_sp = outer_attrs.last().map(|attr: &Attribute| attr.span);
let inner_error_reason = if just_parsed_doc_comment {
@@ -126,14 +125,14 @@ impl<'a> Parser<'a> {
let lo = self.token.span;
// Attributes can't have attributes of their own [Editor's note: not with that attitude]
self.collect_tokens_no_attrs(|this| {
assert!(this.eat(&token::Pound), "parse_attribute called in non-attribute position");
assert!(this.eat(exp!(Pound)), "parse_attribute called in non-attribute position");
let style =
if this.eat(&token::Not) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer };
if this.eat(exp!(Not)) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer };
this.expect(&token::OpenDelim(Delimiter::Bracket))?;
this.expect(exp!(OpenBracket))?;
let item = this.parse_attr_item(ForceCollect::No)?;
this.expect(&token::CloseDelim(Delimiter::Bracket))?;
this.expect(exp!(CloseBracket))?;
let attr_sp = lo.to(this.prev_token.span);
// Emit error if inner attribute is encountered and forbidden.
@@ -274,10 +273,10 @@ impl<'a> Parser<'a> {
// Attr items don't have attributes.
self.collect_tokens(None, AttrWrapper::empty(), force_collect, |this, _empty_attrs| {
let is_unsafe = this.eat_keyword(kw::Unsafe);
let is_unsafe = this.eat_keyword(exp!(Unsafe));
let unsafety = if is_unsafe {
let unsafe_span = this.prev_token.span;
this.expect(&token::OpenDelim(Delimiter::Parenthesis))?;
this.expect(exp!(OpenParen))?;
ast::Safety::Unsafe(unsafe_span)
} else {
ast::Safety::Default
@@ -286,7 +285,7 @@ impl<'a> Parser<'a> {
let path = this.parse_path(PathStyle::Mod)?;
let args = this.parse_attr_args()?;
if is_unsafe {
this.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
this.expect(exp!(CloseParen))?;
}
Ok((
ast::AttrItem { unsafety, path, args, tokens: None },
@@ -306,7 +305,7 @@ impl<'a> Parser<'a> {
loop {
let start_pos = self.num_bump_calls;
// Only try to parse if it is an inner attribute (has `!`).
let attr = if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) {
let attr = if self.check(exp!(Pound)) && self.look_ahead(1, |t| t == &token::Not) {
Some(self.parse_attribute(InnerAttrPolicy::Permitted)?)
} else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind {
if attr_style == ast::AttrStyle::Inner {
@@ -358,7 +357,7 @@ impl<'a> Parser<'a> {
&mut self,
) -> PResult<'a, (ast::MetaItemInner, Vec<(ast::AttrItem, Span)>)> {
let cfg_predicate = self.parse_meta_item_inner()?;
self.expect(&token::Comma)?;
self.expect(exp!(Comma))?;
// Presumably, the majority of the time there will only be one attr.
let mut expanded_attrs = Vec::with_capacity(1);
@@ -366,7 +365,7 @@ impl<'a> Parser<'a> {
let lo = self.token.span;
let item = self.parse_attr_item(ForceCollect::Yes)?;
expanded_attrs.push((item, lo.to(self.prev_token.span)));
if !self.eat(&token::Comma) {
if !self.eat(exp!(Comma)) {
break;
}
}
@@ -380,7 +379,7 @@ impl<'a> Parser<'a> {
let mut nmis = ThinVec::with_capacity(1);
while self.token != token::Eof {
nmis.push(self.parse_meta_item_inner()?);
if !self.eat(&token::Comma) {
if !self.eat(exp!(Comma)) {
break;
}
}
@@ -413,13 +412,13 @@ impl<'a> Parser<'a> {
let lo = self.token.span;
let is_unsafe = if unsafe_allowed == AllowLeadingUnsafe::Yes {
self.eat_keyword(kw::Unsafe)
self.eat_keyword(exp!(Unsafe))
} else {
false
};
let unsafety = if is_unsafe {
let unsafe_span = self.prev_token.span;
self.expect(&token::OpenDelim(Delimiter::Parenthesis))?;
self.expect(exp!(OpenParen))?;
ast::Safety::Unsafe(unsafe_span)
} else {
@@ -429,7 +428,7 @@ impl<'a> Parser<'a> {
let path = self.parse_path(PathStyle::Mod)?;
let kind = self.parse_meta_item_kind()?;
if is_unsafe {
self.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
self.expect(exp!(CloseParen))?;
}
let span = lo.to(self.prev_token.span);
@@ -437,9 +436,9 @@ impl<'a> Parser<'a> {
}
pub(crate) fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> {
Ok(if self.eat(&token::Eq) {
Ok(if self.eat(exp!(Eq)) {
ast::MetaItemKind::NameValue(self.parse_unsuffixed_meta_item_lit()?)
} else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
} else if self.check(exp!(OpenParen)) {
let (list, _) = self.parse_paren_comma_seq(|p| p.parse_meta_item_inner())?;
ast::MetaItemKind::List(list)
} else {