Files
rust/crates/mbe/src/subtree_source.rs

184 lines
5.8 KiB
Rust
Raw Normal View History

2021-05-22 17:20:22 +03:00
//! Our parser is generic over the source of tokens it parses.
//!
//! This module defines tokens sourced from declarative macros.
2020-08-12 17:06:49 +02:00
use parser::{Token, TokenSource};
2020-10-07 11:49:31 +02:00
use syntax::{lex_single_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T};
2021-01-05 01:50:34 +08:00
use tt::buffer::TokenBuffer;
2019-04-07 21:42:53 +08:00
#[derive(Debug, Clone, Eq, PartialEq)]
2019-04-07 21:42:53 +08:00
struct TtToken {
2021-01-05 01:50:34 +08:00
tt: Token,
text: SmolStr,
2019-04-07 21:42:53 +08:00
}
2021-01-05 01:50:34 +08:00
pub(crate) struct SubtreeTokenSource {
cached: Vec<TtToken>,
2019-05-27 22:56:21 +08:00
curr: (Token, usize),
}
2021-01-05 01:50:34 +08:00
impl<'a> SubtreeTokenSource {
2019-05-27 22:56:21 +08:00
// Helper function used in test
2019-05-28 10:55:08 +08:00
#[cfg(test)]
2020-11-02 16:31:38 +01:00
pub(crate) fn text(&self) -> SmolStr {
2021-01-05 01:50:34 +08:00
match self.cached.get(self.curr.1) {
2021-06-13 09:24:16 +05:30
Some(tt) => tt.text.clone(),
2019-05-27 22:56:21 +08:00
_ => SmolStr::new(""),
}
}
}
2021-01-05 01:50:34 +08:00
impl<'a> SubtreeTokenSource {
pub(crate) fn new(buffer: &TokenBuffer) -> SubtreeTokenSource {
let mut current = buffer.begin();
let mut cached = Vec::with_capacity(100);
2019-05-27 22:56:21 +08:00
2021-01-05 01:50:34 +08:00
while !current.eof() {
let cursor = current;
let tt = cursor.token_tree();
2019-05-27 22:56:21 +08:00
2021-01-05 01:50:34 +08:00
// Check if it is lifetime
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
2020-04-18 19:28:07 +08:00
if punct.char == '\'' {
2021-01-05 01:50:34 +08:00
let next = cursor.bump();
2021-01-05 00:22:42 +08:00
if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(ident), _)) =
next.token_tree()
{
2021-01-05 01:50:34 +08:00
let text = SmolStr::new("'".to_string() + &ident.text);
cached.push(TtToken {
tt: Token { kind: LIFETIME_IDENT, is_jointed_to_next: false },
text,
});
current = next.bump();
continue;
2020-04-18 19:28:07 +08:00
} else {
panic!("Next token must be ident : {:#?}", next.token_tree());
}
}
}
2021-01-05 01:50:34 +08:00
current = match tt {
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
2021-06-13 09:24:16 +05:30
cached.push(convert_leaf(leaf));
2021-01-05 01:50:34 +08:00
cursor.bump()
2019-05-23 02:00:34 +08:00
}
2021-01-05 01:50:34 +08:00
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
cached.push(convert_delim(subtree.delimiter_kind(), false));
cursor.subtree().unwrap()
2020-04-18 19:28:07 +08:00
}
2021-01-05 01:50:34 +08:00
None => {
if let Some(subtree) = cursor.end() {
cached.push(convert_delim(subtree.delimiter_kind(), true));
cursor.bump()
} else {
continue;
}
2019-05-23 02:00:34 +08:00
}
2021-01-05 01:50:34 +08:00
};
2019-04-22 22:46:39 +08:00
}
2021-01-05 01:50:34 +08:00
let mut res = SubtreeTokenSource {
curr: (Token { kind: EOF, is_jointed_to_next: false }, 0),
cached,
};
res.curr = (res.token(0), 0);
res
}
fn token(&self, pos: usize) -> Token {
match self.cached.get(pos) {
Some(it) => it.tt,
None => Token { kind: EOF, is_jointed_to_next: false },
}
2019-04-07 21:42:53 +08:00
}
}
2021-01-05 01:50:34 +08:00
impl<'a> TokenSource for SubtreeTokenSource {
2019-05-25 20:31:53 +08:00
fn current(&self) -> Token {
self.curr.0
2019-04-07 21:42:53 +08:00
}
2019-05-25 20:31:53 +08:00
/// Lookahead n token
fn lookahead_nth(&self, n: usize) -> Token {
2021-01-05 01:50:34 +08:00
self.token(self.curr.1 + n)
2019-05-25 20:31:53 +08:00
}
/// bump cursor to next token
fn bump(&mut self) {
if self.current().kind == EOF {
return;
}
2021-01-05 01:50:34 +08:00
self.curr = (self.token(self.curr.1 + 1), self.curr.1 + 1);
2019-04-07 21:42:53 +08:00
}
2019-05-25 20:31:53 +08:00
/// Is the current token a specified keyword?
fn is_keyword(&self, kw: &str) -> bool {
2021-01-05 01:50:34 +08:00
match self.cached.get(self.curr.1) {
2021-06-13 09:24:16 +05:30
Some(t) => t.text == *kw,
2021-06-07 13:59:01 +02:00
None => false,
}
2019-04-07 21:42:53 +08:00
}
}
2019-12-18 11:47:26 +08:00
fn convert_delim(d: Option<tt::DelimiterKind>, closing: bool) -> TtToken {
let (kinds, texts) = match d {
2019-12-13 01:41:44 +08:00
Some(tt::DelimiterKind::Parenthesis) => ([T!['('], T![')']], "()"),
Some(tt::DelimiterKind::Brace) => ([T!['{'], T!['}']], "{}"),
Some(tt::DelimiterKind::Bracket) => ([T!['['], T![']']], "[]"),
2019-12-13 21:53:34 +08:00
None => ([L_DOLLAR, R_DOLLAR], ""),
};
let idx = closing as usize;
let kind = kinds[idx];
let text = if !texts.is_empty() { &texts[idx..texts.len() - (1 - idx)] } else { "" };
2021-01-05 01:50:34 +08:00
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) }
}
2019-04-07 21:42:53 +08:00
fn convert_literal(l: &tt::Literal) -> TtToken {
2020-10-07 11:49:31 +02:00
let is_negated = l.text.starts_with('-');
let inner_text = &l.text[if is_negated { 1 } else { 0 }..];
let kind = lex_single_syntax_kind(inner_text)
.map(|(kind, _error)| kind)
.filter(|kind| {
kind.is_literal() && (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER))
})
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &l));
2019-04-24 23:01:32 +08:00
2021-01-05 01:50:34 +08:00
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: l.text.clone() }
}
2019-04-07 21:42:53 +08:00
fn convert_ident(ident: &tt::Ident) -> TtToken {
let kind = match ident.text.as_ref() {
"true" => T![true],
"false" => T![false],
"_" => UNDERSCORE,
2020-12-15 19:23:51 +01:00
i if i.starts_with('\'') => LIFETIME_IDENT,
_ => SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT),
};
2021-01-05 01:50:34 +08:00
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: ident.text.clone() }
}
2019-04-07 21:42:53 +08:00
2019-07-05 12:02:32 -04:00
fn convert_punct(p: tt::Punct) -> TtToken {
2020-04-18 19:28:07 +08:00
let kind = match SyntaxKind::from_char(p.char) {
None => panic!("{:#?} is not a valid punct", p),
Some(kind) => kind,
};
2019-05-02 10:19:12 +08:00
let text = {
let mut buf = [0u8; 4];
let s: &str = p.char.encode_utf8(&mut buf);
SmolStr::new(s)
};
2021-01-05 01:50:34 +08:00
TtToken { tt: Token { kind, is_jointed_to_next: p.spacing == tt::Spacing::Joint }, text }
2019-04-07 21:42:53 +08:00
}
2019-05-02 10:19:12 +08:00
fn convert_leaf(leaf: &tt::Leaf) -> TtToken {
match leaf {
tt::Leaf::Literal(l) => convert_literal(l),
tt::Leaf::Ident(ident) => convert_ident(ident),
2019-07-05 12:02:32 -04:00
tt::Leaf::Punct(punct) => convert_punct(*punct),
}
}