Implement RFC 3503: frontmatters

Supercedes #137193
This commit is contained in:
Deadbeef
2025-04-12 15:53:46 +00:00
parent 0c33fe2c3d
commit 662182637e
61 changed files with 970 additions and 22 deletions

View File

@@ -735,6 +735,61 @@ pub(crate) struct FoundExprWouldBeStmt {
pub suggestion: ExprParenthesesNeeded,
}
#[derive(Diagnostic)]
#[diag(parse_frontmatter_extra_characters_after_close)]
pub(crate) struct FrontmatterExtraCharactersAfterClose {
#[primary_span]
pub span: Span,
}
#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_infostring)]
#[note]
pub(crate) struct FrontmatterInvalidInfostring {
#[primary_span]
pub span: Span,
}
#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)]
pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}
#[derive(Diagnostic)]
#[diag(parse_frontmatter_unclosed)]
pub(crate) struct FrontmatterUnclosed {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}
#[derive(Diagnostic)]
#[diag(parse_frontmatter_invalid_close_preceding_whitespace)]
pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace {
#[primary_span]
pub span: Span,
#[note]
pub note_span: Span,
}
#[derive(Diagnostic)]
#[diag(parse_frontmatter_length_mismatch)]
pub(crate) struct FrontmatterLengthMismatch {
#[primary_span]
pub span: Span,
#[label(parse_label_opening)]
pub opening: Span,
#[label(parse_label_close)]
pub close: Span,
pub len_opening: usize,
pub len_close: usize,
}
#[derive(Diagnostic)]
#[diag(parse_leading_plus_not_supported)]
pub(crate) struct LeadingPlusNotSupported {

View File

@@ -7,7 +7,9 @@ use rustc_ast::tokenstream::TokenStream;
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::codes::*;
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
use rustc_lexer::{
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
};
use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
use rustc_session::lint::BuiltinLintDiag;
use rustc_session::lint::builtin::{
@@ -15,7 +17,7 @@ use rustc_session::lint::builtin::{
TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
};
use rustc_session::parse::ParseSess;
use rustc_span::{BytePos, Pos, Span, Symbol};
use rustc_span::{BytePos, Pos, Span, Symbol, sym};
use tracing::debug;
use crate::errors;
@@ -56,7 +58,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
start_pos = start_pos + BytePos::from_usize(shebang_len);
}
let cursor = Cursor::new(src);
let cursor = Cursor::new(src, FrontmatterAllowed::Yes);
let mut lexer = Lexer {
psess,
start_pos,
@@ -193,6 +195,11 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
let content = self.str_from_to(content_start, content_end);
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
}
rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring);
preceded_by_whitespace = true;
continue;
}
rustc_lexer::TokenKind::Whitespace => {
preceded_by_whitespace = true;
continue;
@@ -256,7 +263,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
// was consumed.
let lit_start = start + BytePos(prefix_len);
self.pos = lit_start;
self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No);
self.report_unknown_prefix(start);
let prefix_span = self.mk_sp(start, lit_start);
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
@@ -361,7 +368,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
// Reset the state so we just lex the `'r`.
let lt_start = start + BytePos(2);
self.pos = lt_start;
self.cursor = Cursor::new(&str_before[2 as usize..]);
self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No);
let lifetime_name = self.str_from(start);
let ident = Symbol::intern(lifetime_name);
@@ -474,6 +481,91 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
}
}
fn validate_frontmatter(
&self,
start: BytePos,
has_invalid_preceding_whitespace: bool,
invalid_infostring: bool,
) {
let s = self.str_from(start);
let real_start = s.find("---").unwrap();
let frontmatter_opening_pos = BytePos(real_start as u32) + start;
let s_new = &s[real_start..];
let within = s_new.trim_start_matches('-');
let len_opening = s_new.len() - within.len();
let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32);
if has_invalid_preceding_whitespace {
let line_start =
BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start;
let span = self.mk_sp(line_start, frontmatter_opening_end_pos);
let label_span = self.mk_sp(line_start, frontmatter_opening_pos);
self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace {
span,
note_span: label_span,
});
}
if invalid_infostring {
let line_end = s[real_start..].find('\n').unwrap_or(s[real_start..].len());
let span = self.mk_sp(
frontmatter_opening_end_pos,
frontmatter_opening_pos + BytePos(line_end as u32),
);
self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span });
}
let last_line_start = within.rfind('\n').map_or(0, |i| i + 1);
let last_line = &within[last_line_start..];
let last_line_trimmed = last_line.trim_start_matches(is_whitespace);
let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32);
let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span);
if !last_line_trimmed.starts_with("---") {
let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
self.dcx().emit_err(errors::FrontmatterUnclosed {
span: frontmatter_span,
note_span: label_span,
});
return;
}
if last_line_trimmed.len() != last_line.len() {
let line_end = last_line_start_pos + BytePos(last_line.len() as u32);
let span = self.mk_sp(last_line_start_pos, line_end);
let whitespace_end =
last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32);
let label_span = self.mk_sp(last_line_start_pos, whitespace_end);
self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace {
span,
note_span: label_span,
});
}
let rest = last_line_trimmed.trim_start_matches('-');
let len_close = last_line_trimmed.len() - rest.len();
if len_close != len_opening {
let span = self.mk_sp(frontmatter_opening_pos, self.pos);
let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32);
let close = self.mk_sp(last_line_start_pos, last_line_close_pos);
self.dcx().emit_err(errors::FrontmatterLengthMismatch {
span,
opening,
close,
len_opening,
len_close,
});
}
if !rest.trim_matches(is_whitespace).is_empty() {
let span = self.mk_sp(last_line_start_pos, self.pos);
self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
}
}
fn cook_doc_comment(
&self,
content_start: BytePos,
@@ -839,7 +931,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
let space_pos = start + BytePos(1);
let space_span = self.mk_sp(space_pos, space_pos);
let mut cursor = Cursor::new(str_before);
let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No);
let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() {
Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => {
@@ -905,7 +997,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
// For backwards compatibility, roll back to after just the first `#`
// and return the `Pound` token.
self.pos = start + BytePos(1);
self.cursor = Cursor::new(&str_before[1..]);
self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No);
token::Pound
}
}