Speed up Parser::expected_token_types.

The parser pushes a `TokenType` to `Parser::expected_token_types` on every call to the various `check`/`eat` methods, and clears it on every call to `bump`. Some of those `TokenType` values are full tokens that require cloning and dropping. This is a *lot* of work for something that is only used in error messages and it accounts for a significant fraction of parsing execution time. This commit overhauls `TokenType` so that `Parser::expected_token_types` can be implemented as a bitset. This requires changing `TokenType` to a C-style parameterless enum, and adding `TokenTypeSet` which uses a `u128` for the bits. (The new `TokenType` has 105 variants.) The new types `ExpTokenPair` and `ExpKeywordPair` are now arguments to the `check`/`eat` methods. This is for maximum speed. The elements in the pairs are always statically known; e.g. a `token::BinOp(token::Star)` is always paired with a `TokenType::Star`. So we now compute `TokenType`s in advance and pass them in to `check`/`eat` rather than the current approach of constructing them on insertion into `expected_token_types`. Values of these pair types can be produced by the new `exp!` macro, which is used at every `check`/`eat` call site. The macro is for convenience, allowing any pair to be generated from a single identifier. The ident/keyword filtering in `expected_one_of_not_found` is no longer necessary. It was there to account for some sloppiness in `TokenKind`/`TokenType` comparisons. The existing `TokenType` is moved to a new file `token_type.rs`, and all its new infrastructure is added to that file. There is more boilerplate code than I would like, but I can't see how to make it shorter.
2024-12-04 15:55:06 +11:00
parent d5370d981f
commit b9bf0b4b10
22 changed files with 1357 additions and 793 deletions
--- a/compiler/rustc_parse/src/parser/attr.rs
+++ b/compiler/rustc_parse/src/parser/attr.rs
@@ -1,8 +1,7 @@
-use rustc_ast::token::{self, Delimiter};
-use rustc_ast::{self as ast, Attribute, attr};
+use rustc_ast::{self as ast, Attribute, attr, token};
 use rustc_errors::codes::*;
 use rustc_errors::{Diag, PResult};
-use rustc_span::{BytePos, Span, kw};
+use rustc_span::{BytePos, Span};
 use thin_vec::ThinVec;
 use tracing::debug;

@@ -10,7 +9,7 @@ use super::{
    AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing,
    UsePreAttrPos,
 };
-use crate::{errors, fluent_generated as fluent, maybe_whole};
+use crate::{errors, exp, fluent_generated as fluent, maybe_whole};

 // Public for rustfmt usage
 #[derive(Debug)]
@@ -45,7 +44,7 @@ impl<'a> Parser<'a> {
        let mut just_parsed_doc_comment = false;
        let start_pos = self.num_bump_calls;
        loop {
-            let attr = if self.check(&token::Pound) {
+            let attr = if self.check(exp!(Pound)) {
                let prev_outer_attr_sp = outer_attrs.last().map(|attr: &Attribute| attr.span);

                let inner_error_reason = if just_parsed_doc_comment {
@@ -126,14 +125,14 @@ impl<'a> Parser<'a> {
        let lo = self.token.span;
        // Attributes can't have attributes of their own [Editor's note: not with that attitude]
        self.collect_tokens_no_attrs(|this| {
-            assert!(this.eat(&token::Pound), "parse_attribute called in non-attribute position");
+            assert!(this.eat(exp!(Pound)), "parse_attribute called in non-attribute position");

            let style =
-                if this.eat(&token::Not) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer };
+                if this.eat(exp!(Not)) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer };

-            this.expect(&token::OpenDelim(Delimiter::Bracket))?;
+            this.expect(exp!(OpenBracket))?;
            let item = this.parse_attr_item(ForceCollect::No)?;
-            this.expect(&token::CloseDelim(Delimiter::Bracket))?;
+            this.expect(exp!(CloseBracket))?;
            let attr_sp = lo.to(this.prev_token.span);

            // Emit error if inner attribute is encountered and forbidden.
@@ -274,10 +273,10 @@ impl<'a> Parser<'a> {

        // Attr items don't have attributes.
        self.collect_tokens(None, AttrWrapper::empty(), force_collect, |this, _empty_attrs| {
-            let is_unsafe = this.eat_keyword(kw::Unsafe);
+            let is_unsafe = this.eat_keyword(exp!(Unsafe));
            let unsafety = if is_unsafe {
                let unsafe_span = this.prev_token.span;
-                this.expect(&token::OpenDelim(Delimiter::Parenthesis))?;
+                this.expect(exp!(OpenParen))?;
                ast::Safety::Unsafe(unsafe_span)
            } else {
                ast::Safety::Default
@@ -286,7 +285,7 @@ impl<'a> Parser<'a> {
            let path = this.parse_path(PathStyle::Mod)?;
            let args = this.parse_attr_args()?;
            if is_unsafe {
-                this.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
+                this.expect(exp!(CloseParen))?;
            }
            Ok((
                ast::AttrItem { unsafety, path, args, tokens: None },
@@ -306,7 +305,7 @@ impl<'a> Parser<'a> {
        loop {
            let start_pos = self.num_bump_calls;
            // Only try to parse if it is an inner attribute (has `!`).
-            let attr = if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) {
+            let attr = if self.check(exp!(Pound)) && self.look_ahead(1, |t| t == &token::Not) {
                Some(self.parse_attribute(InnerAttrPolicy::Permitted)?)
            } else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind {
                if attr_style == ast::AttrStyle::Inner {
@@ -358,7 +357,7 @@ impl<'a> Parser<'a> {
        &mut self,
    ) -> PResult<'a, (ast::MetaItemInner, Vec<(ast::AttrItem, Span)>)> {
        let cfg_predicate = self.parse_meta_item_inner()?;
-        self.expect(&token::Comma)?;
+        self.expect(exp!(Comma))?;

        // Presumably, the majority of the time there will only be one attr.
        let mut expanded_attrs = Vec::with_capacity(1);
@@ -366,7 +365,7 @@ impl<'a> Parser<'a> {
            let lo = self.token.span;
            let item = self.parse_attr_item(ForceCollect::Yes)?;
            expanded_attrs.push((item, lo.to(self.prev_token.span)));
-            if !self.eat(&token::Comma) {
+            if !self.eat(exp!(Comma)) {
                break;
            }
        }
@@ -380,7 +379,7 @@ impl<'a> Parser<'a> {
        let mut nmis = ThinVec::with_capacity(1);
        while self.token != token::Eof {
            nmis.push(self.parse_meta_item_inner()?);
-            if !self.eat(&token::Comma) {
+            if !self.eat(exp!(Comma)) {
                break;
            }
        }
@@ -413,13 +412,13 @@ impl<'a> Parser<'a> {

        let lo = self.token.span;
        let is_unsafe = if unsafe_allowed == AllowLeadingUnsafe::Yes {
-            self.eat_keyword(kw::Unsafe)
+            self.eat_keyword(exp!(Unsafe))
        } else {
            false
        };
        let unsafety = if is_unsafe {
            let unsafe_span = self.prev_token.span;
-            self.expect(&token::OpenDelim(Delimiter::Parenthesis))?;
+            self.expect(exp!(OpenParen))?;

            ast::Safety::Unsafe(unsafe_span)
        } else {
@@ -429,7 +428,7 @@ impl<'a> Parser<'a> {
        let path = self.parse_path(PathStyle::Mod)?;
        let kind = self.parse_meta_item_kind()?;
        if is_unsafe {
-            self.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
+            self.expect(exp!(CloseParen))?;
        }
        let span = lo.to(self.prev_token.span);

@@ -437,9 +436,9 @@ impl<'a> Parser<'a> {
    }

    pub(crate) fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> {
-        Ok(if self.eat(&token::Eq) {
+        Ok(if self.eat(exp!(Eq)) {
            ast::MetaItemKind::NameValue(self.parse_unsuffixed_meta_item_lit()?)
-        } else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
+        } else if self.check(exp!(OpenParen)) {
            let (list, _) = self.parse_paren_comma_seq(|p| p.parse_meta_item_inner())?;
            ast::MetaItemKind::List(list)
        } else {