Move various token stream things from rustc_parse to rustc_ast.
Specifically: `TokenCursor`, `TokenTreeCursor`, `LazyAttrTokenStreamImpl`, `FlatToken`, `make_attr_token_stream`, `ParserRange`, `NodeRange`. `ParserReplacement`, and `NodeReplacement`. These are all related to token streams, rather than actual parsing. This will facilitate the simplifications in the next commit.
This commit is contained in:
@@ -12,7 +12,6 @@ pub mod token_type;
|
||||
mod ty;
|
||||
|
||||
use std::assert_matches::debug_assert_matches;
|
||||
use std::ops::Range;
|
||||
use std::{fmt, mem, slice};
|
||||
|
||||
use attr_wrapper::{AttrWrapper, UsePreAttrPos};
|
||||
@@ -25,7 +24,9 @@ use rustc_ast::ptr::P;
|
||||
use rustc_ast::token::{
|
||||
self, IdentIsRaw, InvisibleOrigin, MetaVarKind, NtExprKind, NtPatKind, Token, TokenKind,
|
||||
};
|
||||
use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree};
|
||||
use rustc_ast::tokenstream::{
|
||||
ParserRange, ParserReplacement, Spacing, TokenCursor, TokenStream, TokenTree, TokenTreeCursor,
|
||||
};
|
||||
use rustc_ast::util::case::Case;
|
||||
use rustc_ast::{
|
||||
self as ast, AnonConst, AttrArgs, AttrId, ByRef, Const, CoroutineKind, DUMMY_NODE_ID,
|
||||
@@ -37,7 +38,7 @@ use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
|
||||
use rustc_index::interval::IntervalSet;
|
||||
use rustc_session::parse::ParseSess;
|
||||
use rustc_span::{DUMMY_SP, Ident, Span, Symbol, kw, sym};
|
||||
use rustc_span::{Ident, Span, Symbol, kw, sym};
|
||||
use thin_vec::ThinVec;
|
||||
use token_type::TokenTypeSet;
|
||||
pub use token_type::{ExpKeywordPair, ExpTokenPair, TokenType};
|
||||
@@ -187,57 +188,6 @@ struct ClosureSpans {
|
||||
body: Span,
|
||||
}
|
||||
|
||||
/// A token range within a `Parser`'s full token stream.
|
||||
#[derive(Clone, Debug)]
|
||||
struct ParserRange(Range<u32>);
|
||||
|
||||
/// A token range within an individual AST node's (lazy) token stream, i.e.
|
||||
/// relative to that node's first token. Distinct from `ParserRange` so the two
|
||||
/// kinds of range can't be mixed up.
|
||||
#[derive(Clone, Debug)]
|
||||
struct NodeRange(Range<u32>);
|
||||
|
||||
/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
|
||||
/// (replacement) or be replaced by nothing (deletion). This is used in two
|
||||
/// places during token collection.
|
||||
///
|
||||
/// 1. Replacement. During the parsing of an AST node that may have a
|
||||
/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
|
||||
/// or `#[cfg_attr]`, we replace the entire inner AST node with
|
||||
/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
|
||||
/// `AttrTokenStream`.
|
||||
///
|
||||
/// 2. Deletion. We delete inner attributes from all collected token streams,
|
||||
/// and instead track them through the `attrs` field on the AST node. This
|
||||
/// lets us manipulate them similarly to outer attributes. When we create a
|
||||
/// `TokenStream`, the inner attributes are inserted into the proper place
|
||||
/// in the token stream.
|
||||
///
|
||||
/// Each replacement starts off in `ParserReplacement` form but is converted to
|
||||
/// `NodeReplacement` form when it is attached to a single AST node, via
|
||||
/// `LazyAttrTokenStreamImpl`.
|
||||
type ParserReplacement = (ParserRange, Option<AttrsTarget>);
|
||||
|
||||
/// See the comment on `ParserReplacement`.
|
||||
type NodeReplacement = (NodeRange, Option<AttrsTarget>);
|
||||
|
||||
impl NodeRange {
|
||||
// Converts a range within a parser's tokens to a range within a
|
||||
// node's tokens beginning at `start_pos`.
|
||||
//
|
||||
// For example, imagine a parser with 50 tokens in its token stream, a
|
||||
// function that spans `ParserRange(20..40)` and an inner attribute within
|
||||
// that function that spans `ParserRange(30..35)`. We would find the inner
|
||||
// attribute's range within the function's tokens by subtracting 20, which
|
||||
// is the position of the function's start token. This gives
|
||||
// `NodeRange(10..15)`.
|
||||
fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
|
||||
assert!(!parser_range.is_empty());
|
||||
assert!(parser_range.start >= start_pos);
|
||||
NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
|
||||
}
|
||||
}
|
||||
|
||||
/// Controls how we capture tokens. Capturing can be expensive,
|
||||
/// so we try to avoid performing capturing in cases where
|
||||
/// we will never need an `AttrTokenStream`.
|
||||
@@ -260,104 +210,6 @@ struct CaptureState {
|
||||
seen_attrs: IntervalSet<AttrId>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct TokenTreeCursor {
|
||||
stream: TokenStream,
|
||||
/// Points to the current token tree in the stream. In `TokenCursor::curr`,
|
||||
/// this can be any token tree. In `TokenCursor::stack`, this is always a
|
||||
/// `TokenTree::Delimited`.
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl TokenTreeCursor {
|
||||
#[inline]
|
||||
fn new(stream: TokenStream) -> Self {
|
||||
TokenTreeCursor { stream, index: 0 }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn curr(&self) -> Option<&TokenTree> {
|
||||
self.stream.get(self.index)
|
||||
}
|
||||
|
||||
fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
|
||||
self.stream.get(self.index + n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn bump(&mut self) {
|
||||
self.index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
|
||||
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
|
||||
/// use this type to emit them as a linear sequence. But a linear sequence is
|
||||
/// what the parser expects, for the most part.
|
||||
#[derive(Clone, Debug)]
|
||||
struct TokenCursor {
|
||||
// Cursor for the current (innermost) token stream. The index within the
|
||||
// cursor can point to any token tree in the stream (or one past the end).
|
||||
// The delimiters for this token stream are found in `self.stack.last()`;
|
||||
// if that is `None` we are in the outermost token stream which never has
|
||||
// delimiters.
|
||||
curr: TokenTreeCursor,
|
||||
|
||||
// Token streams surrounding the current one. The index within each cursor
|
||||
// always points to a `TokenTree::Delimited`.
|
||||
stack: Vec<TokenTreeCursor>,
|
||||
}
|
||||
|
||||
impl TokenCursor {
|
||||
fn next(&mut self) -> (Token, Spacing) {
|
||||
self.inlined_next()
|
||||
}
|
||||
|
||||
/// This always-inlined version should only be used on hot code paths.
|
||||
#[inline(always)]
|
||||
fn inlined_next(&mut self) -> (Token, Spacing) {
|
||||
loop {
|
||||
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
|
||||
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
|
||||
// below can be removed.
|
||||
if let Some(tree) = self.curr.curr() {
|
||||
match tree {
|
||||
&TokenTree::Token(token, spacing) => {
|
||||
debug_assert!(!token.kind.is_delim());
|
||||
let res = (token, spacing);
|
||||
self.curr.bump();
|
||||
return res;
|
||||
}
|
||||
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
|
||||
let trees = TokenTreeCursor::new(tts.clone());
|
||||
self.stack.push(mem::replace(&mut self.curr, trees));
|
||||
if !delim.skip() {
|
||||
return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open);
|
||||
}
|
||||
// No open delimiter to return; continue on to the next iteration.
|
||||
}
|
||||
};
|
||||
} else if let Some(parent) = self.stack.pop() {
|
||||
// We have exhausted this token stream. Move back to its parent token stream.
|
||||
let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
|
||||
panic!("parent should be Delimited")
|
||||
};
|
||||
self.curr = parent;
|
||||
self.curr.bump(); // move past the `Delimited`
|
||||
if !delim.skip() {
|
||||
return (Token::new(delim.as_close_token_kind(), span.close), spacing.close);
|
||||
}
|
||||
// No close delimiter to return; continue on to the next iteration.
|
||||
} else {
|
||||
// We have exhausted the outermost token stream. The use of
|
||||
// `Spacing::Alone` is arbitrary and immaterial, because the
|
||||
// `Eof` token's spacing is never used.
|
||||
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A sequence separator.
|
||||
#[derive(Debug)]
|
||||
struct SeqSep<'a> {
|
||||
@@ -1742,26 +1594,6 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper struct used when building an `AttrTokenStream` from
|
||||
/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
|
||||
/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
|
||||
/// is then 'parsed' to build up an `AttrTokenStream` with nested
|
||||
/// `AttrTokenTree::Delimited` tokens.
|
||||
#[derive(Debug, Clone)]
|
||||
enum FlatToken {
|
||||
/// A token - this holds both delimiter (e.g. '{' and '}')
|
||||
/// and non-delimiter tokens
|
||||
Token((Token, Spacing)),
|
||||
/// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
|
||||
/// directly into the constructed `AttrTokenStream` as an
|
||||
/// `AttrTokenTree::AttrsTarget`.
|
||||
AttrsTarget(AttrsTarget),
|
||||
/// A special 'empty' token that is ignored during the conversion
|
||||
/// to an `AttrTokenStream`. This is used to simplify the
|
||||
/// handling of replace ranges.
|
||||
Empty,
|
||||
}
|
||||
|
||||
// Metavar captures of various kinds.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ParseNtResult {
|
||||
|
||||
Reference in New Issue
Block a user