Files
rust/crates/ra_parser/src/grammar/patterns.rs

380 lines
9.4 KiB
Rust
Raw Normal View History

//! FIXME: write short doc here
use super::*;
2019-01-18 11:02:30 +03:00
pub(super) const PATTERN_FIRST: TokenSet = expressions::LITERAL_FIRST
.union(paths::PATH_FIRST)
2020-06-09 13:36:08 +02:00
.union(token_set![T![box], T![ref], T![mut], T!['('], T!['['], T![&], T![_], T![-], T![.]]);
2019-09-02 17:37:48 +03:00
pub(crate) fn pattern(p: &mut Parser) {
pattern_r(p, PAT_RECOVERY_SET);
Fix yet another parser infinite loop This commit is an example of fixing a common parser error: infinite loop due to error recovery. This error typically happens when we parse a list of items and fail to parse a specific item at the current position. One choices is to skip a token and try to parse a list item at the next position. This is a good, but not universal, default. When parsing a list of arguments in a function call, you, for example, don't want to skip over `fn`, because it's most likely that it is a function declaration, and not a mistyped arg: ``` fn foo() { quux(1, 2 fn bar() { } ``` Another choice is to bail out of the loop immediately, but it isn't perfect either: sometimes skipping over garbage helps: ``` quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar` ``` In general, parser tries to balance these two cases, though we don't have a definitive strategy yet. However, if the parser accidentally neither skips over a token, nor breaks out of the loop, then it becomes stuck in the loop infinitely (there's an internal counter to self-check this situation and panic though), and that's exactly what is demonstrated by the test. To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}. Then, run ``` RUST_BACKTRACE=short cargo test --package libsyntax2 ```` to verify that parser indeed panics, and to get an idea what grammar production is the culprit (look for `_list` functions!). In this case, I see ``` 10: libsyntax2::grammar::expressions::atom::match_arm_list at crates/libsyntax2/src/grammar/expressions/atom.rs:309 ``` and that's look like it might be a culprit. I verify it by adding `eprintln!("loopy {:?}", p.current());` and indeed I see that this is printed repeatedly. Diagnosing this a bit shows that the problem is that `pattern::pattern` function does not consume anything if the next token is `let`. That is a good default to make cases like ``` let let foo = 92; ``` where the user hasn't typed the pattern yet, to parse in a reasonable they correctly. For match arms, pretty much the single thing we expect is a pattern, so, for a fix, I introduce a special variant of pattern that does not do recovery.
2018-09-08 19:10:20 +03:00
}
/// Parses a pattern list separated by pipes `|`
2020-02-09 18:57:01 +00:00
pub(super) fn pattern_top(p: &mut Parser) {
pattern_top_r(p, PAT_RECOVERY_SET)
}
pub(crate) fn pattern_single(p: &mut Parser) {
pattern_single_r(p, PAT_RECOVERY_SET);
}
/// Parses a pattern list separated by pipes `|`
/// using the given `recovery_set`
2020-02-09 18:57:01 +00:00
pub(super) fn pattern_top_r(p: &mut Parser, recovery_set: TokenSet) {
2019-05-15 15:35:47 +03:00
p.eat(T![|]);
pattern_r(p, recovery_set);
2020-02-09 18:57:01 +00:00
}
2020-02-09 18:57:01 +00:00
/// Parses a pattern list separated by pipes `|`, with no leading `|`,using the
/// given `recovery_set`
// test or_pattern
// fn main() {
// match () {
// (_ | _) => (),
// &(_ | _) => (),
// (_ | _,) => (),
// [_ | _,] => (),
// }
// }
fn pattern_r(p: &mut Parser, recovery_set: TokenSet) {
let m = p.start();
pattern_single_r(p, recovery_set);
2020-02-10 20:11:44 +00:00
if !p.at(T![|]) {
m.abandon(p);
return;
}
2019-05-15 15:35:47 +03:00
while p.eat(T![|]) {
2020-02-09 18:57:01 +00:00
pattern_single_r(p, recovery_set);
}
2020-02-10 20:11:44 +00:00
m.complete(p, OR_PAT);
}
2020-02-09 18:57:01 +00:00
fn pattern_single_r(p: &mut Parser, recovery_set: TokenSet) {
Fix yet another parser infinite loop This commit is an example of fixing a common parser error: infinite loop due to error recovery. This error typically happens when we parse a list of items and fail to parse a specific item at the current position. One choices is to skip a token and try to parse a list item at the next position. This is a good, but not universal, default. When parsing a list of arguments in a function call, you, for example, don't want to skip over `fn`, because it's most likely that it is a function declaration, and not a mistyped arg: ``` fn foo() { quux(1, 2 fn bar() { } ``` Another choice is to bail out of the loop immediately, but it isn't perfect either: sometimes skipping over garbage helps: ``` quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar` ``` In general, parser tries to balance these two cases, though we don't have a definitive strategy yet. However, if the parser accidentally neither skips over a token, nor breaks out of the loop, then it becomes stuck in the loop infinitely (there's an internal counter to self-check this situation and panic though), and that's exactly what is demonstrated by the test. To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}. Then, run ``` RUST_BACKTRACE=short cargo test --package libsyntax2 ```` to verify that parser indeed panics, and to get an idea what grammar production is the culprit (look for `_list` functions!). In this case, I see ``` 10: libsyntax2::grammar::expressions::atom::match_arm_list at crates/libsyntax2/src/grammar/expressions/atom.rs:309 ``` and that's look like it might be a culprit. I verify it by adding `eprintln!("loopy {:?}", p.current());` and indeed I see that this is printed repeatedly. Diagnosing this a bit shows that the problem is that `pattern::pattern` function does not consume anything if the next token is `let`. That is a good default to make cases like ``` let let foo = 92; ``` where the user hasn't typed the pattern yet, to parse in a reasonable they correctly. For match arms, pretty much the single thing we expect is a pattern, so, for a fix, I introduce a special variant of pattern that does not do recovery.
2018-09-08 19:10:20 +03:00
if let Some(lhs) = atom_pat(p, recovery_set) {
2018-08-08 15:05:33 +03:00
// test range_pat
// fn main() {
// match 92 {
// 0 ... 100 => (),
// 101 ..= 200 => (),
// 200 .. 301=> (),
// }
2018-08-08 15:05:33 +03:00
// }
for &range_op in [T![...], T![..=], T![..]].iter() {
if p.at(range_op) {
let m = lhs.precede(p);
p.bump(range_op);
atom_pat(p, recovery_set);
m.complete(p, RANGE_PAT);
return;
}
2018-08-08 15:05:33 +03:00
}
}
}
2018-08-28 19:35:09 +03:00
const PAT_RECOVERY_SET: TokenSet =
2018-09-03 15:10:06 +03:00
token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA];
2018-08-28 19:35:09 +03:00
Fix yet another parser infinite loop This commit is an example of fixing a common parser error: infinite loop due to error recovery. This error typically happens when we parse a list of items and fail to parse a specific item at the current position. One choices is to skip a token and try to parse a list item at the next position. This is a good, but not universal, default. When parsing a list of arguments in a function call, you, for example, don't want to skip over `fn`, because it's most likely that it is a function declaration, and not a mistyped arg: ``` fn foo() { quux(1, 2 fn bar() { } ``` Another choice is to bail out of the loop immediately, but it isn't perfect either: sometimes skipping over garbage helps: ``` quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar` ``` In general, parser tries to balance these two cases, though we don't have a definitive strategy yet. However, if the parser accidentally neither skips over a token, nor breaks out of the loop, then it becomes stuck in the loop infinitely (there's an internal counter to self-check this situation and panic though), and that's exactly what is demonstrated by the test. To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}. Then, run ``` RUST_BACKTRACE=short cargo test --package libsyntax2 ```` to verify that parser indeed panics, and to get an idea what grammar production is the culprit (look for `_list` functions!). In this case, I see ``` 10: libsyntax2::grammar::expressions::atom::match_arm_list at crates/libsyntax2/src/grammar/expressions/atom.rs:309 ``` and that's look like it might be a culprit. I verify it by adding `eprintln!("loopy {:?}", p.current());` and indeed I see that this is printed repeatedly. Diagnosing this a bit shows that the problem is that `pattern::pattern` function does not consume anything if the next token is `let`. That is a good default to make cases like ``` let let foo = 92; ``` where the user hasn't typed the pattern yet, to parse in a reasonable they correctly. For match arms, pretty much the single thing we expect is a pattern, so, for a fix, I introduce a special variant of pattern that does not do recovery.
2018-09-08 19:10:20 +03:00
fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option<CompletedMarker> {
2019-08-23 13:54:43 -07:00
let m = match p.nth(0) {
T![box] => box_pat(p),
T![ref] | T![mut] => bind_pat(p, true),
IDENT => match p.nth(1) {
// Checks the token after an IDENT to see if a pattern is a path (Struct { .. }) or macro
// (T![x]).
2020-04-03 15:44:06 +02:00
T!['('] | T!['{'] | T![!] => path_or_macro_pat(p),
T![:] if p.nth_at(1, T![::]) => path_or_macro_pat(p),
_ => bind_pat(p, true),
},
2019-08-23 13:54:43 -07:00
// test type_path_in_pattern
// fn main() { let <_>::Foo = (); }
_ if paths::is_path_start(p) => path_or_macro_pat(p),
2019-08-23 13:54:43 -07:00
_ if is_literal_pat_start(p) => literal_pat(p),
2018-08-08 00:59:16 +03:00
2019-09-14 17:06:08 -07:00
T![.] if p.at(T![..]) => dot_dot_pat(p),
2019-05-15 15:35:47 +03:00
T![_] => placeholder_pat(p),
T![&] => ref_pat(p),
T!['('] => tuple_pat(p),
T!['['] => slice_pat(p),
2019-08-23 13:54:43 -07:00
2018-08-08 15:05:33 +03:00
_ => {
Fix yet another parser infinite loop This commit is an example of fixing a common parser error: infinite loop due to error recovery. This error typically happens when we parse a list of items and fail to parse a specific item at the current position. One choices is to skip a token and try to parse a list item at the next position. This is a good, but not universal, default. When parsing a list of arguments in a function call, you, for example, don't want to skip over `fn`, because it's most likely that it is a function declaration, and not a mistyped arg: ``` fn foo() { quux(1, 2 fn bar() { } ``` Another choice is to bail out of the loop immediately, but it isn't perfect either: sometimes skipping over garbage helps: ``` quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar` ``` In general, parser tries to balance these two cases, though we don't have a definitive strategy yet. However, if the parser accidentally neither skips over a token, nor breaks out of the loop, then it becomes stuck in the loop infinitely (there's an internal counter to self-check this situation and panic though), and that's exactly what is demonstrated by the test. To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}. Then, run ``` RUST_BACKTRACE=short cargo test --package libsyntax2 ```` to verify that parser indeed panics, and to get an idea what grammar production is the culprit (look for `_list` functions!). In this case, I see ``` 10: libsyntax2::grammar::expressions::atom::match_arm_list at crates/libsyntax2/src/grammar/expressions/atom.rs:309 ``` and that's look like it might be a culprit. I verify it by adding `eprintln!("loopy {:?}", p.current());` and indeed I see that this is printed repeatedly. Diagnosing this a bit shows that the problem is that `pattern::pattern` function does not consume anything if the next token is `let`. That is a good default to make cases like ``` let let foo = 92; ``` where the user hasn't typed the pattern yet, to parse in a reasonable they correctly. For match arms, pretty much the single thing we expect is a pattern, so, for a fix, I introduce a special variant of pattern that does not do recovery.
2018-09-08 19:10:20 +03:00
p.err_recover("expected pattern", recovery_set);
2018-08-08 15:05:33 +03:00
return None;
}
};
2019-08-23 13:54:43 -07:00
2018-08-08 15:05:33 +03:00
Some(m)
}
2019-08-23 13:54:43 -07:00
fn is_literal_pat_start(p: &Parser) -> bool {
2019-05-15 15:35:47 +03:00
p.at(T![-]) && (p.nth(1) == INT_NUMBER || p.nth(1) == FLOAT_NUMBER)
|| p.at_ts(expressions::LITERAL_FIRST)
}
// test literal_pattern
// fn main() {
// match () {
// -1 => (),
// 92 => (),
// 'c' => (),
// "hello" => (),
// }
// }
fn literal_pat(p: &mut Parser) -> CompletedMarker {
assert!(is_literal_pat_start(p));
let m = p.start();
2019-05-15 15:35:47 +03:00
if p.at(T![-]) {
2019-09-19 15:51:46 -04:00
p.bump(T![-]);
}
expressions::literal(p);
m.complete(p, LITERAL_PAT)
}
2018-08-04 15:47:45 +03:00
// test path_part
// fn foo() {
// let foo::Bar = ();
// let ::Bar = ();
// let Bar { .. } = ();
// let Bar(..) = ();
// }
2020-04-03 15:44:06 +02:00
fn path_or_macro_pat(p: &mut Parser) -> CompletedMarker {
assert!(paths::is_path_start(p));
2018-08-04 15:47:45 +03:00
let m = p.start();
paths::expr_path(p);
let kind = match p.current() {
2019-05-15 15:35:47 +03:00
T!['('] => {
2018-08-04 15:47:45 +03:00
tuple_pat_fields(p);
2018-08-07 14:41:03 +03:00
TUPLE_STRUCT_PAT
2018-08-04 15:47:45 +03:00
}
2019-05-15 15:35:47 +03:00
T!['{'] => {
2019-08-23 15:55:21 +03:00
record_field_pat_list(p);
RECORD_PAT
2018-08-04 15:47:45 +03:00
}
2020-04-03 15:44:06 +02:00
// test marco_pat
// fn main() {
// let m!(x) = 0;
// }
T![!] => {
items::macro_call_after_excl(p);
return m.complete(p, MACRO_CALL).precede(p).complete(p, MACRO_PAT);
2020-04-03 15:44:06 +02:00
}
_ => PATH_PAT,
2018-08-04 15:47:45 +03:00
};
2018-08-08 15:05:33 +03:00
m.complete(p, kind)
2018-08-04 15:47:45 +03:00
}
// test tuple_pat_fields
// fn foo() {
// let S() = ();
// let S(_) = ();
// let S(_,) = ();
// let S(_, .. , x) = ();
// }
fn tuple_pat_fields(p: &mut Parser) {
2019-05-15 15:35:47 +03:00
assert!(p.at(T!['(']));
2019-09-19 15:51:46 -04:00
p.bump(T!['(']);
2019-05-15 15:35:47 +03:00
pat_list(p, T![')']);
p.expect(T![')']);
2018-08-04 15:47:45 +03:00
}
2019-08-23 15:55:21 +03:00
// test record_field_pat_list
2018-08-04 15:47:45 +03:00
// fn foo() {
// let S {} = ();
// let S { f, ref mut g } = ();
// let S { h: _, ..} = ();
// let S { h: _, } = ();
// }
2019-08-23 15:55:21 +03:00
fn record_field_pat_list(p: &mut Parser) {
2019-05-15 15:35:47 +03:00
assert!(p.at(T!['{']));
2018-08-24 19:27:30 +03:00
let m = p.start();
2019-09-19 15:51:46 -04:00
p.bump(T!['{']);
2019-05-15 15:35:47 +03:00
while !p.at(EOF) && !p.at(T!['}']) {
2018-08-04 15:47:45 +03:00
match p.current() {
2020-07-31 21:45:29 +02:00
// A trailing `..` is *not* treated as a REST_PAT.
T![.] if p.at(T![..]) => p.bump(T![..]),
2019-05-15 15:35:47 +03:00
T!['{'] => error_block(p, "expected ident"),
c => {
let m = p.start();
match c {
// test record_field_pat
// fn foo() {
// let S { 0: 1 } = ();
// let S { x: 1 } = ();
// }
IDENT | INT_NUMBER if p.nth(1) == T![:] => {
name_ref_or_index(p);
p.bump(T![:]);
pattern(p);
}
T![box] => {
// FIXME: not all box patterns should be allowed
box_pat(p);
}
_ => {
bind_pat(p, false);
}
}
2020-07-31 19:54:16 +02:00
m.complete(p, RECORD_PAT_FIELD);
}
2018-08-04 15:47:45 +03:00
}
2019-05-15 15:35:47 +03:00
if !p.at(T!['}']) {
p.expect(T![,]);
2018-08-04 15:47:45 +03:00
}
}
2019-05-15 15:35:47 +03:00
p.expect(T!['}']);
2020-07-31 19:54:16 +02:00
m.complete(p, RECORD_PAT_FIELD_LIST);
2018-08-04 15:47:45 +03:00
}
// test placeholder_pat
// fn main() { let _ = (); }
2018-08-08 15:05:33 +03:00
fn placeholder_pat(p: &mut Parser) -> CompletedMarker {
2019-05-15 15:35:47 +03:00
assert!(p.at(T![_]));
let m = p.start();
2019-09-19 15:51:46 -04:00
p.bump(T![_]);
2020-07-31 20:07:21 +02:00
m.complete(p, WILDCARD_PAT)
}
2019-09-14 17:06:08 -07:00
// test dot_dot_pat
// fn main() {
// let .. = ();
// //
// // Tuples
// //
// let (a, ..) = ();
// let (a, ..,) = ();
// let Tuple(a, ..) = ();
// let Tuple(a, ..,) = ();
// let (.., ..) = ();
// let Tuple(.., ..) = ();
// let (.., a, ..) = ();
// let Tuple(.., a, ..) = ();
// //
// // Slices
// //
// let [..] = ();
// let [head, ..] = ();
// let [head, tail @ ..] = ();
// let [head, .., cons] = ();
// let [head, mid @ .., cons] = ();
// let [head, .., .., cons] = ();
// let [head, .., mid, tail @ ..] = ();
// let [head, .., mid, .., cons] = ();
// }
fn dot_dot_pat(p: &mut Parser) -> CompletedMarker {
assert!(p.at(T![..]));
let m = p.start();
p.bump(T![..]);
2020-07-31 21:45:29 +02:00
m.complete(p, REST_PAT)
2019-09-14 17:06:08 -07:00
}
// test ref_pat
// fn main() {
// let &a = ();
// let &mut b = ();
// }
2018-08-08 15:05:33 +03:00
fn ref_pat(p: &mut Parser) -> CompletedMarker {
2019-05-15 15:35:47 +03:00
assert!(p.at(T![&]));
let m = p.start();
2019-09-19 15:51:46 -04:00
p.bump(T![&]);
2019-05-15 15:35:47 +03:00
p.eat(T![mut]);
2020-02-09 18:57:01 +00:00
pattern_single(p);
2018-08-08 15:05:33 +03:00
m.complete(p, REF_PAT)
}
2018-08-07 14:41:03 +03:00
// test tuple_pat
// fn main() {
// let (a, b, ..) = ();
2020-02-10 20:11:44 +00:00
// let (a,) = ();
// let (..) = ();
// let () = ();
2018-08-07 14:41:03 +03:00
// }
2018-08-08 15:05:33 +03:00
fn tuple_pat(p: &mut Parser) -> CompletedMarker {
2019-05-15 15:35:47 +03:00
assert!(p.at(T!['(']));
2018-08-07 14:41:03 +03:00
let m = p.start();
2020-02-09 18:57:01 +00:00
p.bump(T!['(']);
let mut has_comma = false;
let mut has_pat = false;
let mut has_rest = false;
while !p.at(EOF) && !p.at(T![')']) {
has_pat = true;
if !p.at_ts(PATTERN_FIRST) {
p.error("expected a pattern");
break;
}
has_rest |= p.at(T![..]);
pattern(p);
if !p.at(T![')']) {
has_comma = true;
p.expect(T![,]);
}
}
p.expect(T![')']);
m.complete(p, if !has_comma && !has_rest && has_pat { PAREN_PAT } else { TUPLE_PAT })
2018-08-07 14:41:03 +03:00
}
2018-08-07 17:00:45 +03:00
// test slice_pat
// fn main() {
// let [a, b, ..] = [];
// }
2018-08-08 15:05:33 +03:00
fn slice_pat(p: &mut Parser) -> CompletedMarker {
2019-05-15 15:35:47 +03:00
assert!(p.at(T!['[']));
2018-08-07 17:00:45 +03:00
let m = p.start();
2019-09-19 15:51:46 -04:00
p.bump(T!['[']);
2019-05-15 15:35:47 +03:00
pat_list(p, T![']']);
p.expect(T![']']);
2018-09-12 11:26:52 +03:00
m.complete(p, SLICE_PAT)
}
fn pat_list(p: &mut Parser, ket: SyntaxKind) {
while !p.at(EOF) && !p.at(ket) {
2019-09-14 17:06:08 -07:00
if !p.at_ts(PATTERN_FIRST) {
p.error("expected a pattern");
break;
2018-08-07 17:00:45 +03:00
}
2019-09-14 17:06:08 -07:00
pattern(p);
2018-09-12 11:26:52 +03:00
if !p.at(ket) {
2019-05-15 15:35:47 +03:00
p.expect(T![,]);
2018-08-07 17:00:45 +03:00
}
}
}
// test bind_pat
// fn main() {
// let a = ();
2018-07-31 15:30:11 +03:00
// let mut b = ();
// let ref c = ();
// let ref mut d = ();
// let e @ _ = ();
// let ref mut f @ g @ _ = ();
// }
2018-08-08 15:05:33 +03:00
fn bind_pat(p: &mut Parser, with_at: bool) -> CompletedMarker {
let m = p.start();
2019-05-15 15:35:47 +03:00
p.eat(T![ref]);
p.eat(T![mut]);
name(p);
2019-05-15 15:35:47 +03:00
if with_at && p.eat(T![@]) {
2020-02-09 18:57:01 +00:00
pattern_single(p);
}
2020-07-31 20:09:09 +02:00
m.complete(p, IDENT_PAT)
}
2019-08-23 13:54:43 -07:00
// test box_pat
// fn main() {
// let box i = ();
// let box Outer { box i, j: box Inner(box &x) } = ();
// let box ref mut i = ();
// }
fn box_pat(p: &mut Parser) -> CompletedMarker {
assert!(p.at(T![box]));
let m = p.start();
2019-09-19 15:51:46 -04:00
p.bump(T![box]);
2020-02-09 18:57:01 +00:00
pattern_single(p);
2019-08-23 13:54:43 -07:00
m.complete(p, BOX_PAT)
}