Introduce ByteSymbol.

It's like `Symbol` but for byte strings. The interner is now used for
both `Symbol` and `ByteSymbol`. E.g. if you intern `"dog"` and `b"dog"`
you'll get a `Symbol` and a `ByteSymbol` with the same index and the
characters will only be stored once.

The motivation for this is to eliminate the `Arc`s in `ast::LitKind`, to
make `ast::LitKind` impl `Copy`, and to avoid the need to arena-allocate
`ast::LitKind` in HIR. The latter change reduces peak memory by a
non-trivial amount on literal-heavy benchmarks such as `deep-vector` and
`tuple-stress`.

`Encoder`, `Decoder`, `SpanEncoder`, and `SpanDecoder` all get some
changes so that they can handle normal strings and byte strings.

This change does slow down compilation of programs that use
`include_bytes!` on large files, because the contents of those files are
now interned (hashed). This makes `include_bytes!` more similar to
`include_str!`, though `include_bytes!` contents still aren't escaped,
and hashing is still much cheaper than escaping.
This commit is contained in:
Nicholas Nethercote
2025-06-02 08:59:29 +10:00
parent ed2d759783
commit 478f8287c0
46 changed files with 449 additions and 269 deletions

View File

@@ -19,7 +19,6 @@
//! - [`UnOp`], [`BinOp`], and [`BinOpKind`]: Unary and binary operators. //! - [`UnOp`], [`BinOp`], and [`BinOpKind`]: Unary and binary operators.
use std::borrow::Cow; use std::borrow::Cow;
use std::sync::Arc;
use std::{cmp, fmt}; use std::{cmp, fmt};
pub use GenericArgs::*; pub use GenericArgs::*;
@@ -32,7 +31,7 @@ use rustc_data_structures::tagged_ptr::Tag;
use rustc_macros::{Decodable, Encodable, HashStable_Generic}; use rustc_macros::{Decodable, Encodable, HashStable_Generic};
pub use rustc_span::AttrId; pub use rustc_span::AttrId;
use rustc_span::source_map::{Spanned, respan}; use rustc_span::source_map::{Spanned, respan};
use rustc_span::{DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym}; use rustc_span::{ByteSymbol, DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
use thin_vec::{ThinVec, thin_vec}; use thin_vec::{ThinVec, thin_vec};
pub use crate::format::*; pub use crate::format::*;
@@ -1805,10 +1804,17 @@ pub enum ExprKind {
Become(P<Expr>), Become(P<Expr>),
/// Bytes included via `include_bytes!` /// Bytes included via `include_bytes!`
///
/// Added for optimization purposes to avoid the need to escape /// Added for optimization purposes to avoid the need to escape
/// large binary blobs - should always behave like [`ExprKind::Lit`] /// large binary blobs - should always behave like [`ExprKind::Lit`]
/// with a `ByteStr` literal. /// with a `ByteStr` literal.
IncludedBytes(Arc<[u8]>), ///
/// The value is stored as a `ByteSymbol`. It's unfortunate that we need to
/// intern (hash) the bytes because they're likely to be large and unique.
/// But it's necessary because this will eventually be lowered to
/// `LitKind::ByteStr`, which needs a `ByteSymbol` to impl `Copy` and avoid
/// arena allocation.
IncludedBytes(ByteSymbol),
/// A `format_args!()` expression. /// A `format_args!()` expression.
FormatArgs(P<FormatArgs>), FormatArgs(P<FormatArgs>),
@@ -2066,7 +2072,7 @@ impl YieldKind {
} }
/// A literal in a meta item. /// A literal in a meta item.
#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)] #[derive(Clone, Copy, Encodable, Decodable, Debug, HashStable_Generic)]
pub struct MetaItemLit { pub struct MetaItemLit {
/// The original literal as written in the source code. /// The original literal as written in the source code.
pub symbol: Symbol, pub symbol: Symbol,
@@ -2129,16 +2135,18 @@ pub enum LitFloatType {
/// deciding the `LitKind`. This means that float literals like `1f32` are /// deciding the `LitKind`. This means that float literals like `1f32` are
/// classified by this type as `Float`. This is different to `token::LitKind` /// classified by this type as `Float`. This is different to `token::LitKind`
/// which does *not* consider the suffix. /// which does *not* consider the suffix.
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)] #[derive(Clone, Copy, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
pub enum LitKind { pub enum LitKind {
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ /// A string literal (`"foo"`). The symbol is unescaped, and so may differ
/// from the original token's symbol. /// from the original token's symbol.
Str(Symbol, StrStyle), Str(Symbol, StrStyle),
/// A byte string (`b"foo"`). Not stored as a symbol because it might be /// A byte string (`b"foo"`). The symbol is unescaped, and so may differ
/// non-utf8, and symbols only allow utf8 strings. /// from the original token's symbol.
ByteStr(Arc<[u8]>, StrStyle), ByteStr(ByteSymbol, StrStyle),
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end. /// A C String (`c"foo"`). Guaranteed to only have `\0` at the end. The
CStr(Arc<[u8]>, StrStyle), /// symbol is unescaped, and so may differ from the original token's
/// symbol.
CStr(ByteSymbol, StrStyle),
/// A byte char (`b'f'`). /// A byte char (`b'f'`).
Byte(u8), Byte(u8),
/// A character literal (`'a'`). /// A character literal (`'a'`).

View File

@@ -5,7 +5,7 @@ use std::{ascii, fmt, str};
use rustc_literal_escaper::{ use rustc_literal_escaper::{
MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str, MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
}; };
use rustc_span::{Span, Symbol, kw, sym}; use rustc_span::{ByteSymbol, Span, Symbol, kw, sym};
use tracing::debug; use tracing::debug;
use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
@@ -116,13 +116,12 @@ impl LitKind {
assert!(!err.is_fatal(), "failed to unescape string literal") assert!(!err.is_fatal(), "failed to unescape string literal")
} }
}); });
LitKind::ByteStr(buf.into(), StrStyle::Cooked) LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
} }
token::ByteStrRaw(n) => { token::ByteStrRaw(n) => {
// Raw strings have no escapes so we can convert the symbol // Raw byte strings have no escapes so no work is needed here.
// directly to a `Arc<u8>`.
let buf = symbol.as_str().to_owned().into_bytes(); let buf = symbol.as_str().to_owned().into_bytes();
LitKind::ByteStr(buf.into(), StrStyle::Raw(n)) LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
} }
token::CStr => { token::CStr => {
let s = symbol.as_str(); let s = symbol.as_str();
@@ -137,7 +136,7 @@ impl LitKind {
} }
}); });
buf.push(0); buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Cooked) LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
} }
token::CStrRaw(n) => { token::CStrRaw(n) => {
// Raw strings have no escapes so we can convert the symbol // Raw strings have no escapes so we can convert the symbol
@@ -145,7 +144,7 @@ impl LitKind {
// char. // char.
let mut buf = symbol.as_str().to_owned().into_bytes(); let mut buf = symbol.as_str().to_owned().into_bytes();
buf.push(0); buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Raw(n)) LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
} }
token::Err(guar) => LitKind::Err(guar), token::Err(guar) => LitKind::Err(guar),
}) })
@@ -167,12 +166,12 @@ impl fmt::Display for LitKind {
delim = "#".repeat(n as usize), delim = "#".repeat(n as usize),
string = sym string = sym
)?, )?,
LitKind::ByteStr(ref bytes, StrStyle::Cooked) => { LitKind::ByteStr(ref byte_sym, StrStyle::Cooked) => {
write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))? write!(f, "b\"{}\"", escape_byte_str_symbol(byte_sym.as_byte_str()))?
} }
LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => { LitKind::ByteStr(ref byte_sym, StrStyle::Raw(n)) => {
// Unwrap because raw byte string literals can only contain ASCII. // Unwrap because raw byte string literals can only contain ASCII.
let symbol = str::from_utf8(bytes).unwrap(); let symbol = str::from_utf8(byte_sym.as_byte_str()).unwrap();
write!( write!(
f, f,
"br{delim}\"{string}\"{delim}", "br{delim}\"{string}\"{delim}",
@@ -181,11 +180,11 @@ impl fmt::Display for LitKind {
)?; )?;
} }
LitKind::CStr(ref bytes, StrStyle::Cooked) => { LitKind::CStr(ref bytes, StrStyle::Cooked) => {
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))? write!(f, "c\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))?
} }
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => { LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
// This can only be valid UTF-8. // This can only be valid UTF-8.
let symbol = str::from_utf8(bytes).unwrap(); let symbol = str::from_utf8(bytes.as_byte_str()).unwrap();
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?; write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
} }
LitKind::Int(n, ty) => { LitKind::Int(n, ty) => {

View File

@@ -144,11 +144,11 @@ impl<'hir> LoweringContext<'_, 'hir> {
hir::ExprKind::Unary(op, ohs) hir::ExprKind::Unary(op, ohs)
} }
ExprKind::Lit(token_lit) => hir::ExprKind::Lit(self.lower_lit(token_lit, e.span)), ExprKind::Lit(token_lit) => hir::ExprKind::Lit(self.lower_lit(token_lit, e.span)),
ExprKind::IncludedBytes(bytes) => { ExprKind::IncludedBytes(byte_sym) => {
let lit = self.arena.alloc(respan( let lit = respan(
self.lower_span(e.span), self.lower_span(e.span),
LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked), LitKind::ByteStr(*byte_sym, StrStyle::Cooked),
)); );
hir::ExprKind::Lit(lit) hir::ExprKind::Lit(lit)
} }
ExprKind::Cast(expr, ty) => { ExprKind::Cast(expr, ty) => {
@@ -421,11 +421,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
}) })
} }
pub(crate) fn lower_lit( pub(crate) fn lower_lit(&mut self, token_lit: &token::Lit, span: Span) -> hir::Lit {
&mut self,
token_lit: &token::Lit,
span: Span,
) -> &'hir Spanned<LitKind> {
let lit_kind = match LitKind::from_token_lit(*token_lit) { let lit_kind = match LitKind::from_token_lit(*token_lit) {
Ok(lit_kind) => lit_kind, Ok(lit_kind) => lit_kind,
Err(err) => { Err(err) => {
@@ -433,7 +429,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
LitKind::Err(guar) LitKind::Err(guar)
} }
}; };
self.arena.alloc(respan(self.lower_span(span), lit_kind)) respan(self.lower_span(span), lit_kind)
} }
fn lower_unop(&mut self, u: UnOp) -> hir::UnOp { fn lower_unop(&mut self, u: UnOp) -> hir::UnOp {
@@ -2141,10 +2137,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
} }
fn expr_uint(&mut self, sp: Span, ty: ast::UintTy, value: u128) -> hir::Expr<'hir> { fn expr_uint(&mut self, sp: Span, ty: ast::UintTy, value: u128) -> hir::Expr<'hir> {
let lit = self.arena.alloc(hir::Lit { let lit = hir::Lit {
span: sp, span: sp,
node: ast::LitKind::Int(value.into(), ast::LitIntType::Unsigned(ty)), node: ast::LitKind::Int(value.into(), ast::LitIntType::Unsigned(ty)),
}); };
self.expr(sp, hir::ExprKind::Lit(lit)) self.expr(sp, hir::ExprKind::Lit(lit))
} }
@@ -2161,9 +2157,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
} }
pub(super) fn expr_str(&mut self, sp: Span, value: Symbol) -> hir::Expr<'hir> { pub(super) fn expr_str(&mut self, sp: Span, value: Symbol) -> hir::Expr<'hir> {
let lit = self let lit = hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) };
.arena
.alloc(hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) });
self.expr(sp, hir::ExprKind::Lit(lit)) self.expr(sp, hir::ExprKind::Lit(lit))
} }

View File

@@ -390,19 +390,15 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
allow_paths: bool, allow_paths: bool,
) -> &'hir hir::PatExpr<'hir> { ) -> &'hir hir::PatExpr<'hir> {
let span = self.lower_span(expr.span); let span = self.lower_span(expr.span);
let err = |guar| hir::PatExprKind::Lit { let err =
lit: self.arena.alloc(respan(span, LitKind::Err(guar))), |guar| hir::PatExprKind::Lit { lit: respan(span, LitKind::Err(guar)), negated: false };
negated: false,
};
let kind = match &expr.kind { let kind = match &expr.kind {
ExprKind::Lit(lit) => { ExprKind::Lit(lit) => {
hir::PatExprKind::Lit { lit: self.lower_lit(lit, span), negated: false } hir::PatExprKind::Lit { lit: self.lower_lit(lit, span), negated: false }
} }
ExprKind::ConstBlock(c) => hir::PatExprKind::ConstBlock(self.lower_const_block(c)), ExprKind::ConstBlock(c) => hir::PatExprKind::ConstBlock(self.lower_const_block(c)),
ExprKind::IncludedBytes(bytes) => hir::PatExprKind::Lit { ExprKind::IncludedBytes(byte_sym) => hir::PatExprKind::Lit {
lit: self lit: respan(span, LitKind::ByteStr(*byte_sym, StrStyle::Cooked)),
.arena
.alloc(respan(span, LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked))),
negated: false, negated: false,
}, },
ExprKind::Err(guar) => err(*guar), ExprKind::Err(guar) => err(*guar),

View File

@@ -469,8 +469,12 @@ impl<'a> State<'a> {
ast::ExprKind::Lit(token_lit) => { ast::ExprKind::Lit(token_lit) => {
self.print_token_literal(*token_lit, expr.span); self.print_token_literal(*token_lit, expr.span);
} }
ast::ExprKind::IncludedBytes(bytes) => { ast::ExprKind::IncludedBytes(byte_sym) => {
let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None); let lit = token::Lit::new(
token::ByteStr,
escape_byte_str_symbol(byte_sym.as_byte_str()),
None,
);
self.print_token_literal(lit, expr.span) self.print_token_literal(lit, expr.span)
} }
ast::ExprKind::Cast(expr, ty) => { ast::ExprKind::Cast(expr, ty) => {

View File

@@ -177,15 +177,15 @@ pub(crate) fn expand_concat_bytes(
Ok(LitKind::Byte(val)) => { Ok(LitKind::Byte(val)) => {
accumulator.push(val); accumulator.push(val);
} }
Ok(LitKind::ByteStr(ref bytes, _)) => { Ok(LitKind::ByteStr(ref byte_sym, _)) => {
accumulator.extend_from_slice(bytes); accumulator.extend_from_slice(byte_sym.as_byte_str());
} }
_ => { _ => {
guar.get_or_insert_with(|| invalid_type_err(cx, token_lit, e.span, false)); guar.get_or_insert_with(|| invalid_type_err(cx, token_lit, e.span, false));
} }
}, },
ExprKind::IncludedBytes(bytes) => { ExprKind::IncludedBytes(byte_sym) => {
accumulator.extend_from_slice(bytes); accumulator.extend_from_slice(byte_sym.as_byte_str());
} }
ExprKind::Err(guarantee) => { ExprKind::Err(guarantee) => {
guar = Some(*guarantee); guar = Some(*guarantee);

View File

@@ -16,7 +16,7 @@ use rustc_parse::parser::{ForceCollect, Parser};
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error}; use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE; use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
use rustc_span::source_map::SourceMap; use rustc_span::source_map::SourceMap;
use rustc_span::{Pos, Span, Symbol}; use rustc_span::{ByteSymbol, Pos, Span, Symbol};
use smallvec::SmallVec; use smallvec::SmallVec;
use crate::errors; use crate::errors;
@@ -237,7 +237,7 @@ pub(crate) fn expand_include_bytes(
Ok((bytes, _bsp)) => { Ok((bytes, _bsp)) => {
// Don't care about getting the span for the raw bytes, // Don't care about getting the span for the raw bytes,
// because the console can't really show them anyway. // because the console can't really show them anyway.
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes)); let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(ByteSymbol::intern(&bytes)));
MacEager::expr(expr) MacEager::expr(expr)
} }
Err(dummy) => dummy, Err(dummy) => dummy,

View File

@@ -599,8 +599,12 @@ impl server::TokenStream for Rustc<'_, '_> {
ast::ExprKind::Lit(token_lit) => { ast::ExprKind::Lit(token_lit) => {
Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span)) Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span))
} }
ast::ExprKind::IncludedBytes(bytes) => { ast::ExprKind::IncludedBytes(byte_sym) => {
let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None); let lit = token::Lit::new(
token::ByteStr,
escape_byte_str_symbol(byte_sym.as_byte_str()),
None,
);
Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span)) Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span))
} }
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {

View File

@@ -8,7 +8,6 @@ macro_rules! arena_types {
[] asm_template: rustc_ast::InlineAsmTemplatePiece, [] asm_template: rustc_ast::InlineAsmTemplatePiece,
[] attribute: rustc_hir::Attribute, [] attribute: rustc_hir::Attribute,
[] owner_info: rustc_hir::OwnerInfo<'tcx>, [] owner_info: rustc_hir::OwnerInfo<'tcx>,
[] lit: rustc_hir::Lit,
[] macro_def: rustc_ast::MacroDef, [] macro_def: rustc_ast::MacroDef,
]); ]);
) )

View File

@@ -1807,7 +1807,7 @@ pub struct PatExpr<'hir> {
#[derive(Debug, Clone, Copy, HashStable_Generic)] #[derive(Debug, Clone, Copy, HashStable_Generic)]
pub enum PatExprKind<'hir> { pub enum PatExprKind<'hir> {
Lit { Lit {
lit: &'hir Lit, lit: Lit,
// FIXME: move this into `Lit` and handle negated literal expressions // FIXME: move this into `Lit` and handle negated literal expressions
// once instead of matching on unop neg expressions everywhere. // once instead of matching on unop neg expressions everywhere.
negated: bool, negated: bool,
@@ -2734,7 +2734,7 @@ pub enum ExprKind<'hir> {
/// A unary operation (e.g., `!x`, `*x`). /// A unary operation (e.g., `!x`, `*x`).
Unary(UnOp, &'hir Expr<'hir>), Unary(UnOp, &'hir Expr<'hir>),
/// A literal (e.g., `1`, `"foo"`). /// A literal (e.g., `1`, `"foo"`).
Lit(&'hir Lit), Lit(Lit),
/// A cast (e.g., `foo as f64`). /// A cast (e.g., `foo as f64`).
Cast(&'hir Expr<'hir>, &'hir Ty<'hir>), Cast(&'hir Expr<'hir>, &'hir Ty<'hir>),
/// A type ascription (e.g., `x: Foo`). See RFC 3307. /// A type ascription (e.g., `x: Foo`). See RFC 3307.

View File

@@ -347,7 +347,7 @@ pub trait Visitor<'v>: Sized {
fn visit_pat_expr(&mut self, expr: &'v PatExpr<'v>) -> Self::Result { fn visit_pat_expr(&mut self, expr: &'v PatExpr<'v>) -> Self::Result {
walk_pat_expr(self, expr) walk_pat_expr(self, expr)
} }
fn visit_lit(&mut self, _hir_id: HirId, _lit: &'v Lit, _negated: bool) -> Self::Result { fn visit_lit(&mut self, _hir_id: HirId, _lit: Lit, _negated: bool) -> Self::Result {
Self::Result::output() Self::Result::output()
} }
fn visit_anon_const(&mut self, c: &'v AnonConst) -> Self::Result { fn visit_anon_const(&mut self, c: &'v AnonConst) -> Self::Result {
@@ -786,7 +786,7 @@ pub fn walk_pat_expr<'v, V: Visitor<'v>>(visitor: &mut V, expr: &'v PatExpr<'v>)
let PatExpr { hir_id, span, kind } = expr; let PatExpr { hir_id, span, kind } = expr;
try_visit!(visitor.visit_id(*hir_id)); try_visit!(visitor.visit_id(*hir_id));
match kind { match kind {
PatExprKind::Lit { lit, negated } => visitor.visit_lit(*hir_id, lit, *negated), PatExprKind::Lit { lit, negated } => visitor.visit_lit(*hir_id, *lit, *negated),
PatExprKind::ConstBlock(c) => visitor.visit_inline_const(c), PatExprKind::ConstBlock(c) => visitor.visit_inline_const(c),
PatExprKind::Path(qpath) => visitor.visit_qpath(qpath, *hir_id, *span), PatExprKind::Path(qpath) => visitor.visit_qpath(qpath, *hir_id, *span),
} }

View File

@@ -2364,9 +2364,9 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ {
}; };
let lit_input = match expr.kind { let lit_input = match expr.kind {
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: false }), hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: false }),
hir::ExprKind::Unary(hir::UnOp::Neg, expr) => match expr.kind { hir::ExprKind::Unary(hir::UnOp::Neg, expr) => match expr.kind {
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: true }), hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: true }),
_ => None, _ => None,
}, },
_ => None, _ => None,

View File

@@ -1480,7 +1480,7 @@ impl<'a> State<'a> {
self.print_expr_addr_of(k, m, expr); self.print_expr_addr_of(k, m, expr);
} }
hir::ExprKind::Lit(lit) => { hir::ExprKind::Lit(lit) => {
self.print_literal(lit); self.print_literal(&lit);
} }
hir::ExprKind::Cast(expr, ty) => { hir::ExprKind::Cast(expr, ty) => {
self.print_expr_cond_paren(expr, self.precedence(expr) < ExprPrecedence::Cast); self.print_expr_cond_paren(expr, self.precedence(expr) < ExprPrecedence::Cast);

View File

@@ -1637,7 +1637,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
ast::LitKind::ByteStr(ref v, _) => Ty::new_imm_ref( ast::LitKind::ByteStr(ref v, _) => Ty::new_imm_ref(
tcx, tcx,
tcx.lifetimes.re_static, tcx.lifetimes.re_static,
Ty::new_array(tcx, tcx.types.u8, v.len() as u64), Ty::new_array(tcx, tcx.types.u8, v.as_byte_str().len() as u64),
), ),
ast::LitKind::Byte(_) => tcx.types.u8, ast::LitKind::Byte(_) => tcx.types.u8,
ast::LitKind::Char(_) => tcx.types.char, ast::LitKind::Char(_) => tcx.types.char,

View File

@@ -1624,7 +1624,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
node: rustc_ast::LitKind::Int(lit, rustc_ast::LitIntType::Unsuffixed), node: rustc_ast::LitKind::Int(lit, rustc_ast::LitIntType::Unsuffixed),
span, span,
}) => { }) => {
let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(*span) else { let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(span) else {
return false; return false;
}; };
if !(snippet.starts_with("0x") || snippet.starts_with("0X")) { if !(snippet.starts_with("0x") || snippet.starts_with("0X")) {
@@ -1683,7 +1683,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
// We have satisfied all requirements to provide a suggestion. Emit it. // We have satisfied all requirements to provide a suggestion. Emit it.
err.span_suggestion( err.span_suggestion(
*span, span,
format!("if you meant to create a null pointer, use `{null_path_str}()`"), format!("if you meant to create a null pointer, use `{null_path_str}()`"),
null_path_str + "()", null_path_str + "()",
Applicability::MachineApplicable, Applicability::MachineApplicable,

View File

@@ -108,8 +108,8 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
} }
match init.kind { match init.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => { ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes, _) = &lit if let LitKind::ByteStr(byte_sym, _) = &lit
&& let Err(utf8_error) = std::str::from_utf8(bytes) && let Err(utf8_error) = std::str::from_utf8(byte_sym.as_byte_str())
{ {
lint(init.span, utf8_error); lint(init.span, utf8_error);
} }

View File

@@ -152,7 +152,7 @@ impl<'tcx, T: LateLintPass<'tcx>> hir_visit::Visitor<'tcx> for LateContextAndPas
hir_visit::walk_pat(self, p); hir_visit::walk_pat(self, p);
} }
fn visit_lit(&mut self, hir_id: HirId, lit: &'tcx hir::Lit, negated: bool) { fn visit_lit(&mut self, hir_id: HirId, lit: hir::Lit, negated: bool) {
lint_callback!(self, check_lit, hir_id, lit, negated); lint_callback!(self, check_lit, hir_id, lit, negated);
} }

View File

@@ -23,7 +23,7 @@ macro_rules! late_lint_methods {
fn check_stmt(a: &'tcx rustc_hir::Stmt<'tcx>); fn check_stmt(a: &'tcx rustc_hir::Stmt<'tcx>);
fn check_arm(a: &'tcx rustc_hir::Arm<'tcx>); fn check_arm(a: &'tcx rustc_hir::Arm<'tcx>);
fn check_pat(a: &'tcx rustc_hir::Pat<'tcx>); fn check_pat(a: &'tcx rustc_hir::Pat<'tcx>);
fn check_lit(hir_id: rustc_hir::HirId, a: &'tcx rustc_hir::Lit, negated: bool); fn check_lit(hir_id: rustc_hir::HirId, a: rustc_hir::Lit, negated: bool);
fn check_expr(a: &'tcx rustc_hir::Expr<'tcx>); fn check_expr(a: &'tcx rustc_hir::Expr<'tcx>);
fn check_expr_post(a: &'tcx rustc_hir::Expr<'tcx>); fn check_expr_post(a: &'tcx rustc_hir::Expr<'tcx>);
fn check_ty(a: &'tcx rustc_hir::Ty<'tcx, rustc_hir::AmbigArg>); fn check_ty(a: &'tcx rustc_hir::Ty<'tcx, rustc_hir::AmbigArg>);

View File

@@ -547,18 +547,12 @@ fn lint_fn_pointer<'tcx>(
} }
impl<'tcx> LateLintPass<'tcx> for TypeLimits { impl<'tcx> LateLintPass<'tcx> for TypeLimits {
fn check_lit( fn check_lit(&mut self, cx: &LateContext<'tcx>, hir_id: HirId, lit: hir::Lit, negated: bool) {
&mut self,
cx: &LateContext<'tcx>,
hir_id: HirId,
lit: &'tcx hir::Lit,
negated: bool,
) {
if negated { if negated {
self.negated_expr_id = Some(hir_id); self.negated_expr_id = Some(hir_id);
self.negated_expr_span = Some(lit.span); self.negated_expr_span = Some(lit.span);
} }
lint_literal(cx, self, hir_id, lit.span, lit, negated); lint_literal(cx, self, hir_id, lit.span, &lit, negated);
} }
fn check_expr(&mut self, cx: &LateContext<'tcx>, e: &'tcx hir::Expr<'tcx>) { fn check_expr(&mut self, cx: &LateContext<'tcx>, e: &'tcx hir::Expr<'tcx>) {

View File

@@ -32,7 +32,9 @@ use rustc_session::Session;
use rustc_session::config::TargetModifier; use rustc_session::config::TargetModifier;
use rustc_session::cstore::{CrateSource, ExternCrate}; use rustc_session::cstore::{CrateSource, ExternCrate};
use rustc_span::hygiene::HygieneDecodeContext; use rustc_span::hygiene::HygieneDecodeContext;
use rustc_span::{BytePos, DUMMY_SP, Pos, SpanData, SpanDecoder, SyntaxContext, kw}; use rustc_span::{
BytePos, ByteSymbol, DUMMY_SP, Pos, SpanData, SpanDecoder, Symbol, SyntaxContext, kw,
};
use tracing::debug; use tracing::debug;
use crate::creader::CStore; use crate::creader::CStore;
@@ -384,6 +386,28 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
fn read_raw_bytes(&mut self, len: usize) -> &[u8] { fn read_raw_bytes(&mut self, len: usize) -> &[u8] {
self.opaque.read_raw_bytes(len) self.opaque.read_raw_bytes(len)
} }
fn decode_symbol_or_byte_symbol<S>(
&mut self,
new_from_index: impl Fn(u32) -> S,
read_and_intern_str_or_byte_str_this: impl Fn(&mut Self) -> S,
read_and_intern_str_or_byte_str_opaque: impl Fn(&mut MemDecoder<'a>) -> S,
) -> S {
let tag = self.read_u8();
match tag {
SYMBOL_STR => read_and_intern_str_or_byte_str_this(self),
SYMBOL_OFFSET => {
// read str offset
let pos = self.read_usize();
// move to str offset and read
self.opaque.with_position(pos, |d| read_and_intern_str_or_byte_str_opaque(d))
}
SYMBOL_PREDEFINED => new_from_index(self.read_u32()),
_ => unreachable!(),
}
}
} }
impl<'a, 'tcx> TyDecoder<'tcx> for DecodeContext<'a, 'tcx> { impl<'a, 'tcx> TyDecoder<'tcx> for DecodeContext<'a, 'tcx> {
@@ -545,29 +569,19 @@ impl<'a, 'tcx> SpanDecoder for DecodeContext<'a, 'tcx> {
} }
fn decode_symbol(&mut self) -> Symbol { fn decode_symbol(&mut self) -> Symbol {
let tag = self.read_u8(); self.decode_symbol_or_byte_symbol(
Symbol::new,
|this| Symbol::intern(this.read_str()),
|opaque| Symbol::intern(opaque.read_str()),
)
}
match tag { fn decode_byte_symbol(&mut self) -> ByteSymbol {
SYMBOL_STR => { self.decode_symbol_or_byte_symbol(
let s = self.read_str(); ByteSymbol::new,
Symbol::intern(s) |this| ByteSymbol::intern(this.read_byte_str()),
} |opaque| ByteSymbol::intern(opaque.read_byte_str()),
SYMBOL_OFFSET => { )
// read str offset
let pos = self.read_usize();
// move to str offset and read
self.opaque.with_position(pos, |d| {
let s = d.read_str();
Symbol::intern(s)
})
}
SYMBOL_PREDEFINED => {
let symbol_index = self.read_u32();
Symbol::new(symbol_index)
}
_ => unreachable!(),
}
} }
} }

View File

@@ -29,8 +29,8 @@ use rustc_serialize::{Decodable, Decoder, Encodable, Encoder, opaque};
use rustc_session::config::{CrateType, OptLevel, TargetModifier}; use rustc_session::config::{CrateType, OptLevel, TargetModifier};
use rustc_span::hygiene::HygieneEncodeContext; use rustc_span::hygiene::HygieneEncodeContext;
use rustc_span::{ use rustc_span::{
ExternalSource, FileName, SourceFile, SpanData, SpanEncoder, StableSourceFileId, SyntaxContext, ByteSymbol, ExternalSource, FileName, SourceFile, SpanData, SpanEncoder, StableSourceFileId,
sym, Symbol, SyntaxContext, sym,
}; };
use tracing::{debug, instrument, trace}; use tracing::{debug, instrument, trace};
@@ -63,7 +63,8 @@ pub(super) struct EncodeContext<'a, 'tcx> {
required_source_files: Option<FxIndexSet<usize>>, required_source_files: Option<FxIndexSet<usize>>,
is_proc_macro: bool, is_proc_macro: bool,
hygiene_ctxt: &'a HygieneEncodeContext, hygiene_ctxt: &'a HygieneEncodeContext,
symbol_table: FxHashMap<Symbol, usize>, // Used for both `Symbol`s and `ByteSymbol`s.
symbol_index_table: FxHashMap<u32, usize>,
} }
/// If the current crate is a proc-macro, returns early with `LazyArray::default()`. /// If the current crate is a proc-macro, returns early with `LazyArray::default()`.
@@ -200,27 +201,14 @@ impl<'a, 'tcx> SpanEncoder for EncodeContext<'a, 'tcx> {
} }
} }
fn encode_symbol(&mut self, symbol: Symbol) { fn encode_symbol(&mut self, sym: Symbol) {
// if symbol predefined, emit tag and symbol index self.encode_symbol_or_byte_symbol(sym.as_u32(), |this| this.emit_str(sym.as_str()));
if symbol.is_predefined() { }
self.opaque.emit_u8(SYMBOL_PREDEFINED);
self.opaque.emit_u32(symbol.as_u32()); fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) {
} else { self.encode_symbol_or_byte_symbol(byte_sym.as_u32(), |this| {
// otherwise write it as string or as offset to it this.emit_byte_str(byte_sym.as_byte_str())
match self.symbol_table.entry(symbol) { });
Entry::Vacant(o) => {
self.opaque.emit_u8(SYMBOL_STR);
let pos = self.opaque.position();
o.insert(pos);
self.emit_str(symbol.as_str());
}
Entry::Occupied(o) => {
let x = *o.get();
self.emit_u8(SYMBOL_OFFSET);
self.emit_usize(x);
}
}
}
} }
} }
@@ -492,6 +480,33 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
LazyArray::from_position_and_num_elems(pos, len) LazyArray::from_position_and_num_elems(pos, len)
} }
fn encode_symbol_or_byte_symbol(
&mut self,
index: u32,
emit_str_or_byte_str: impl Fn(&mut Self),
) {
// if symbol/byte symbol is predefined, emit tag and symbol index
if Symbol::is_predefined(index) {
self.opaque.emit_u8(SYMBOL_PREDEFINED);
self.opaque.emit_u32(index);
} else {
// otherwise write it as string or as offset to it
match self.symbol_index_table.entry(index) {
Entry::Vacant(o) => {
self.opaque.emit_u8(SYMBOL_STR);
let pos = self.opaque.position();
o.insert(pos);
emit_str_or_byte_str(self);
}
Entry::Occupied(o) => {
let x = *o.get();
self.emit_u8(SYMBOL_OFFSET);
self.emit_usize(x);
}
}
}
}
fn encode_def_path_table(&mut self) { fn encode_def_path_table(&mut self) {
let table = self.tcx.def_path_table(); let table = self.tcx.def_path_table();
if self.is_proc_macro { if self.is_proc_macro {
@@ -2427,7 +2442,7 @@ fn with_encode_metadata_header(
required_source_files, required_source_files,
is_proc_macro: tcx.crate_types().contains(&CrateType::ProcMacro), is_proc_macro: tcx.crate_types().contains(&CrateType::ProcMacro),
hygiene_ctxt: &hygiene_ctxt, hygiene_ctxt: &hygiene_ctxt,
symbol_table: Default::default(), symbol_index_table: Default::default(),
}; };
// Encode the rustc version string in a predictable location. // Encode the rustc version string in a predictable location.

View File

@@ -77,7 +77,7 @@ impl<'tcx> GlobalId<'tcx> {
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, HashStable)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, HashStable)]
pub struct LitToConstInput<'tcx> { pub struct LitToConstInput<'tcx> {
/// The absolute value of the resultant constant. /// The absolute value of the resultant constant.
pub lit: &'tcx LitKind, pub lit: LitKind,
/// The type of the constant. /// The type of the constant.
pub ty: Ty<'tcx>, pub ty: Ty<'tcx>,
/// If the constant is negative. /// If the constant is negative.

View File

@@ -20,8 +20,8 @@ use rustc_span::hygiene::{
}; };
use rustc_span::source_map::Spanned; use rustc_span::source_map::Spanned;
use rustc_span::{ use rustc_span::{
BytePos, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span, BytePos, ByteSymbol, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos,
SpanDecoder, SpanEncoder, StableSourceFileId, Symbol, SourceFile, Span, SpanDecoder, SpanEncoder, StableSourceFileId, Symbol,
}; };
use crate::dep_graph::{DepNodeIndex, SerializedDepNodeIndex}; use crate::dep_graph::{DepNodeIndex, SerializedDepNodeIndex};
@@ -42,7 +42,7 @@ const TAG_RELATIVE_SPAN: u8 = 2;
const TAG_SYNTAX_CONTEXT: u8 = 0; const TAG_SYNTAX_CONTEXT: u8 = 0;
const TAG_EXPN_DATA: u8 = 1; const TAG_EXPN_DATA: u8 = 1;
// Tags for encoding Symbol's // Tags for encoding Symbols and ByteSymbols
const SYMBOL_STR: u8 = 0; const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1; const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREDEFINED: u8 = 2; const SYMBOL_PREDEFINED: u8 = 2;
@@ -253,7 +253,7 @@ impl OnDiskCache {
source_map: CachingSourceMapView::new(tcx.sess.source_map()), source_map: CachingSourceMapView::new(tcx.sess.source_map()),
file_to_file_index, file_to_file_index,
hygiene_context: &hygiene_encode_context, hygiene_context: &hygiene_encode_context,
symbol_table: Default::default(), symbol_index_table: Default::default(),
}; };
// Encode query results. // Encode query results.
@@ -479,6 +479,30 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
.expect("failed to lookup `SourceFile` in new context") .expect("failed to lookup `SourceFile` in new context")
})) }))
} }
// copy&paste impl from rustc_metadata
#[inline]
fn decode_symbol_or_byte_symbol<S>(
&mut self,
new_from_index: impl Fn(u32) -> S,
read_and_intern_str_or_byte_str_this: impl Fn(&mut Self) -> S,
read_and_intern_str_or_byte_str_opaque: impl Fn(&mut MemDecoder<'a>) -> S,
) -> S {
let tag = self.read_u8();
match tag {
SYMBOL_STR => read_and_intern_str_or_byte_str_this(self),
SYMBOL_OFFSET => {
// read str offset
let pos = self.read_usize();
// move to str offset and read
self.opaque.with_position(pos, |d| read_and_intern_str_or_byte_str_opaque(d))
}
SYMBOL_PREDEFINED => new_from_index(self.read_u32()),
_ => unreachable!(),
}
}
} }
// Decodes something that was encoded with `encode_tagged()` and verify that the // Decodes something that was encoded with `encode_tagged()` and verify that the
@@ -653,32 +677,20 @@ impl<'a, 'tcx> SpanDecoder for CacheDecoder<'a, 'tcx> {
Span::new(lo, hi, ctxt, parent) Span::new(lo, hi, ctxt, parent)
} }
// copy&paste impl from rustc_metadata
#[inline]
fn decode_symbol(&mut self) -> Symbol { fn decode_symbol(&mut self) -> Symbol {
let tag = self.read_u8(); self.decode_symbol_or_byte_symbol(
Symbol::new,
|this| Symbol::intern(this.read_str()),
|opaque| Symbol::intern(opaque.read_str()),
)
}
match tag { fn decode_byte_symbol(&mut self) -> ByteSymbol {
SYMBOL_STR => { self.decode_symbol_or_byte_symbol(
let s = self.read_str(); ByteSymbol::new,
Symbol::intern(s) |this| ByteSymbol::intern(this.read_byte_str()),
} |opaque| ByteSymbol::intern(opaque.read_byte_str()),
SYMBOL_OFFSET => { )
// read str offset
let pos = self.read_usize();
// move to str offset and read
self.opaque.with_position(pos, |d| {
let s = d.read_str();
Symbol::intern(s)
})
}
SYMBOL_PREDEFINED => {
let symbol_index = self.read_u32();
Symbol::new(symbol_index)
}
_ => unreachable!(),
}
} }
fn decode_crate_num(&mut self) -> CrateNum { fn decode_crate_num(&mut self) -> CrateNum {
@@ -807,7 +819,8 @@ pub struct CacheEncoder<'a, 'tcx> {
source_map: CachingSourceMapView<'tcx>, source_map: CachingSourceMapView<'tcx>,
file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>, file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>,
hygiene_context: &'a HygieneEncodeContext, hygiene_context: &'a HygieneEncodeContext,
symbol_table: FxHashMap<Symbol, usize>, // Used for both `Symbol`s and `ByteSymbol`s.
symbol_index_table: FxHashMap<u32, usize>,
} }
impl<'a, 'tcx> CacheEncoder<'a, 'tcx> { impl<'a, 'tcx> CacheEncoder<'a, 'tcx> {
@@ -831,6 +844,34 @@ impl<'a, 'tcx> CacheEncoder<'a, 'tcx> {
((end_pos - start_pos) as u64).encode(self); ((end_pos - start_pos) as u64).encode(self);
} }
// copy&paste impl from rustc_metadata
fn encode_symbol_or_byte_symbol(
&mut self,
index: u32,
emit_str_or_byte_str: impl Fn(&mut Self),
) {
// if symbol/byte symbol is predefined, emit tag and symbol index
if Symbol::is_predefined(index) {
self.encoder.emit_u8(SYMBOL_PREDEFINED);
self.encoder.emit_u32(index);
} else {
// otherwise write it as string or as offset to it
match self.symbol_index_table.entry(index) {
Entry::Vacant(o) => {
self.encoder.emit_u8(SYMBOL_STR);
let pos = self.encoder.position();
o.insert(pos);
emit_str_or_byte_str(self);
}
Entry::Occupied(o) => {
let x = *o.get();
self.emit_u8(SYMBOL_OFFSET);
self.emit_usize(x);
}
}
}
}
#[inline] #[inline]
fn finish(mut self) -> FileEncodeResult { fn finish(mut self) -> FileEncodeResult {
self.encoder.finish() self.encoder.finish()
@@ -889,28 +930,14 @@ impl<'a, 'tcx> SpanEncoder for CacheEncoder<'a, 'tcx> {
len.encode(self); len.encode(self);
} }
// copy&paste impl from rustc_metadata fn encode_symbol(&mut self, sym: Symbol) {
fn encode_symbol(&mut self, symbol: Symbol) { self.encode_symbol_or_byte_symbol(sym.as_u32(), |this| this.emit_str(sym.as_str()));
// if symbol predefined, emit tag and symbol index }
if symbol.is_predefined() {
self.encoder.emit_u8(SYMBOL_PREDEFINED); fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) {
self.encoder.emit_u32(symbol.as_u32()); self.encode_symbol_or_byte_symbol(byte_sym.as_u32(), |this| {
} else { this.emit_byte_str(byte_sym.as_byte_str())
// otherwise write it as string or as offset to it });
match self.symbol_table.entry(symbol) {
Entry::Vacant(o) => {
self.encoder.emit_u8(SYMBOL_STR);
let pos = self.encoder.position();
o.insert(pos);
self.emit_str(symbol.as_str());
}
Entry::Occupied(o) => {
let x = *o.get();
self.emit_u8(SYMBOL_OFFSET);
self.emit_usize(x);
}
}
}
} }
fn encode_crate_num(&mut self, crate_num: CrateNum) { fn encode_crate_num(&mut self, crate_num: CrateNum) {

View File

@@ -526,7 +526,7 @@ pub enum ExprKind<'tcx> {
Closure(Box<ClosureExpr<'tcx>>), Closure(Box<ClosureExpr<'tcx>>),
/// A literal. /// A literal.
Literal { Literal {
lit: &'tcx hir::Lit, lit: hir::Lit,
neg: bool, neg: bool,
}, },
/// For literals that don't correspond to anything in the HIR /// For literals that don't correspond to anything in the HIR

View File

@@ -49,7 +49,7 @@ pub(crate) fn as_constant_inner<'tcx>(
let Expr { ty, temp_lifetime: _, span, ref kind } = *expr; let Expr { ty, temp_lifetime: _, span, ref kind } = *expr;
match *kind { match *kind {
ExprKind::Literal { lit, neg } => { ExprKind::Literal { lit, neg } => {
let const_ = lit_to_mir_constant(tcx, LitToConstInput { lit: &lit.node, ty, neg }); let const_ = lit_to_mir_constant(tcx, LitToConstInput { lit: lit.node, ty, neg });
ConstOperand { span, user_ty: None, const_ } ConstOperand { span, user_ty: None, const_ }
} }
@@ -128,34 +128,35 @@ fn lit_to_mir_constant<'tcx>(tcx: TyCtxt<'tcx>, lit_input: LitToConstInput<'tcx>
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _))
if matches!(inner_ty.kind(), ty::Slice(_)) => if matches!(inner_ty.kind(), ty::Slice(_)) =>
{ {
let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8], ()); let allocation = Allocation::from_bytes_byte_aligned_immutable(data.as_byte_str(), ());
let allocation = tcx.mk_const_alloc(allocation); let allocation = tcx.mk_const_alloc(allocation);
ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() } ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() }
} }
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { (ast::LitKind::ByteStr(byte_sym, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
let id = tcx.allocate_bytes_dedup(data, CTFE_ALLOC_SALT); let id = tcx.allocate_bytes_dedup(byte_sym.as_byte_str(), CTFE_ALLOC_SALT);
ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx))
} }
(ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => (ast::LitKind::CStr(byte_sym, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) =>
{ {
let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8], ()); let allocation =
Allocation::from_bytes_byte_aligned_immutable(byte_sym.as_byte_str(), ());
let allocation = tcx.mk_const_alloc(allocation); let allocation = tcx.mk_const_alloc(allocation);
ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() } ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() }
} }
(ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => { (ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => {
ConstValue::Scalar(Scalar::from_uint(*n, Size::from_bytes(1))) ConstValue::Scalar(Scalar::from_uint(n, Size::from_bytes(1)))
} }
(ast::LitKind::Int(n, _), ty::Uint(_)) if !neg => trunc(n.get()), (ast::LitKind::Int(n, _), ty::Uint(_)) if !neg => trunc(n.get()),
(ast::LitKind::Int(n, _), ty::Int(_)) => { (ast::LitKind::Int(n, _), ty::Int(_)) => {
trunc(if neg { (n.get() as i128).overflowing_neg().0 as u128 } else { n.get() }) trunc(if neg { (n.get() as i128).overflowing_neg().0 as u128 } else { n.get() })
} }
(ast::LitKind::Float(n, _), ty::Float(fty)) => { (ast::LitKind::Float(n, _), ty::Float(fty)) => {
parse_float_into_constval(*n, *fty, neg).unwrap() parse_float_into_constval(n, *fty, neg).unwrap()
} }
(ast::LitKind::Bool(b), ty::Bool) => ConstValue::Scalar(Scalar::from_bool(*b)), (ast::LitKind::Bool(b), ty::Bool) => ConstValue::Scalar(Scalar::from_bool(b)),
(ast::LitKind::Char(c), ty::Char) => ConstValue::Scalar(Scalar::from_char(*c)), (ast::LitKind::Char(c), ty::Char) => ConstValue::Scalar(Scalar::from_char(c)),
(ast::LitKind::Err(guar), _) => { (ast::LitKind::Err(guar), _) => {
return Const::Ty(Ty::new_error(tcx, *guar), ty::Const::new_error(tcx, *guar)); return Const::Ty(Ty::new_error(tcx, guar), ty::Const::new_error(tcx, guar));
} }
_ => bug!("invalid lit/ty combination in `lit_to_mir_constant`: {lit:?}: {ty:?}"), _ => bug!("invalid lit/ty combination in `lit_to_mir_constant`: {lit:?}: {ty:?}"),
}; };

View File

@@ -43,27 +43,23 @@ pub(crate) fn lit_to_const<'tcx>(
let str_bytes = s.as_str().as_bytes(); let str_bytes = s.as_str().as_bytes();
ty::ValTree::from_raw_bytes(tcx, str_bytes) ty::ValTree::from_raw_bytes(tcx, str_bytes)
} }
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) (ast::LitKind::ByteStr(byte_sym, _), ty::Ref(_, inner_ty, _))
if matches!(inner_ty.kind(), ty::Slice(_) | ty::Array(..)) => if matches!(inner_ty.kind(), ty::Slice(_) | ty::Array(..)) =>
{ {
let bytes = data as &[u8]; ty::ValTree::from_raw_bytes(tcx, byte_sym.as_byte_str())
ty::ValTree::from_raw_bytes(tcx, bytes)
} }
(ast::LitKind::ByteStr(data, _), ty::Slice(_) | ty::Array(..)) (ast::LitKind::ByteStr(byte_sym, _), ty::Slice(_) | ty::Array(..))
if tcx.features().deref_patterns() => if tcx.features().deref_patterns() =>
{ {
// Byte string literal patterns may have type `[u8]` or `[u8; N]` if `deref_patterns` is // Byte string literal patterns may have type `[u8]` or `[u8; N]` if `deref_patterns` is
// enabled, in order to allow, e.g., `deref!(b"..."): Vec<u8>`. // enabled, in order to allow, e.g., `deref!(b"..."): Vec<u8>`.
let bytes = data as &[u8]; ty::ValTree::from_raw_bytes(tcx, byte_sym.as_byte_str())
ty::ValTree::from_raw_bytes(tcx, bytes)
} }
(ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => { (ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => {
ty::ValTree::from_scalar_int(tcx, (*n).into()) ty::ValTree::from_scalar_int(tcx, n.into())
} }
(ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => (ast::LitKind::CStr(byte_sym, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => {
{ ty::ValTree::from_raw_bytes(tcx, byte_sym.as_byte_str())
let bytes = data as &[u8];
ty::ValTree::from_raw_bytes(tcx, bytes)
} }
(ast::LitKind::Int(n, _), ty::Uint(ui)) if !neg => { (ast::LitKind::Int(n, _), ty::Uint(ui)) if !neg => {
let scalar_int = trunc(n.get(), *ui); let scalar_int = trunc(n.get(), *ui);
@@ -76,15 +72,15 @@ pub(crate) fn lit_to_const<'tcx>(
); );
ty::ValTree::from_scalar_int(tcx, scalar_int) ty::ValTree::from_scalar_int(tcx, scalar_int)
} }
(ast::LitKind::Bool(b), ty::Bool) => ty::ValTree::from_scalar_int(tcx, (*b).into()), (ast::LitKind::Bool(b), ty::Bool) => ty::ValTree::from_scalar_int(tcx, b.into()),
(ast::LitKind::Float(n, _), ty::Float(fty)) => { (ast::LitKind::Float(n, _), ty::Float(fty)) => {
let bits = parse_float_into_scalar(*n, *fty, neg).unwrap_or_else(|| { let bits = parse_float_into_scalar(n, *fty, neg).unwrap_or_else(|| {
tcx.dcx().bug(format!("couldn't parse float literal: {:?}", lit_input.lit)) tcx.dcx().bug(format!("couldn't parse float literal: {:?}", lit_input.lit))
}); });
ty::ValTree::from_scalar_int(tcx, bits) ty::ValTree::from_scalar_int(tcx, bits)
} }
(ast::LitKind::Char(c), ty::Char) => ty::ValTree::from_scalar_int(tcx, (*c).into()), (ast::LitKind::Char(c), ty::Char) => ty::ValTree::from_scalar_int(tcx, c.into()),
(ast::LitKind::Err(guar), _) => return ty::Const::new_error(tcx, *guar), (ast::LitKind::Err(guar), _) => return ty::Const::new_error(tcx, guar),
_ => return ty::Const::new_misc_error(tcx), _ => return ty::Const::new_misc_error(tcx),
}; };

View File

@@ -680,7 +680,7 @@ impl<'a, 'tcx> PatCtxt<'a, 'tcx> {
Some(pat_ty) => pat_ty, Some(pat_ty) => pat_ty,
None => self.typeck_results.node_type(expr.hir_id), None => self.typeck_results.node_type(expr.hir_id),
}; };
let lit_input = LitToConstInput { lit: &lit.node, ty: ct_ty, neg: *negated }; let lit_input = LitToConstInput { lit: lit.node, ty: ct_ty, neg: *negated };
let constant = self.tcx.at(expr.span).lit_to_const(lit_input); let constant = self.tcx.at(expr.span).lit_to_const(lit_input);
self.const_to_pat(constant, ct_ty, expr.hir_id, lit.span).kind self.const_to_pat(constant, ct_ty, expr.hir_id, lit.span).kind
} }

View File

@@ -21,6 +21,11 @@ use thin_vec::ThinVec;
/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout /// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout
const STR_SENTINEL: u8 = 0xC1; const STR_SENTINEL: u8 = 0xC1;
/// For byte strings there are no bytes that canot occur. Just use this value
/// as a best-effort sentinel. There is no validation skipped so the potential
/// for badness is lower than in the `STR_SENTINEL` case.
const BYTE_STR_SENTINEL: u8 = 0xC2;
/// A note about error handling. /// A note about error handling.
/// ///
/// Encoders may be fallible, but in practice failure is rare and there are so /// Encoders may be fallible, but in practice failure is rare and there are so
@@ -72,6 +77,13 @@ pub trait Encoder {
self.emit_u8(STR_SENTINEL); self.emit_u8(STR_SENTINEL);
} }
#[inline]
fn emit_byte_str(&mut self, v: &[u8]) {
self.emit_usize(v.len());
self.emit_raw_bytes(v);
self.emit_u8(BYTE_STR_SENTINEL);
}
fn emit_raw_bytes(&mut self, s: &[u8]); fn emit_raw_bytes(&mut self, s: &[u8]);
} }
@@ -122,9 +134,19 @@ pub trait Decoder {
let len = self.read_usize(); let len = self.read_usize();
let bytes = self.read_raw_bytes(len + 1); let bytes = self.read_raw_bytes(len + 1);
assert!(bytes[len] == STR_SENTINEL); assert!(bytes[len] == STR_SENTINEL);
// SAFETY: the presence of `STR_SENTINEL` gives us high (but not
// perfect) confidence that the bytes we just read truly are UTF-8.
unsafe { std::str::from_utf8_unchecked(&bytes[..len]) } unsafe { std::str::from_utf8_unchecked(&bytes[..len]) }
} }
#[inline]
fn read_byte_str(&mut self) -> &[u8] {
let len = self.read_usize();
let bytes = self.read_raw_bytes(len + 1);
assert!(bytes[len] == BYTE_STR_SENTINEL);
&bytes[..len]
}
fn read_raw_bytes(&mut self, len: usize) -> &[u8]; fn read_raw_bytes(&mut self, len: usize) -> &[u8];
fn peek_byte(&self) -> u8; fn peek_byte(&self) -> u8;
@@ -239,7 +261,7 @@ impl<S: Encoder> Encodable<S> for str {
impl<S: Encoder> Encodable<S> for String { impl<S: Encoder> Encodable<S> for String {
fn encode(&self, s: &mut S) { fn encode(&self, s: &mut S) {
s.emit_str(&self[..]); s.emit_str(&self);
} }
} }

View File

@@ -66,7 +66,9 @@ mod span_encoding;
pub use span_encoding::{DUMMY_SP, Span}; pub use span_encoding::{DUMMY_SP, Span};
pub mod symbol; pub mod symbol;
pub use symbol::{Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym}; pub use symbol::{
ByteSymbol, Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym,
};
mod analyze_source_file; mod analyze_source_file;
pub mod fatal_error; pub mod fatal_error;
@@ -1184,11 +1186,12 @@ rustc_index::newtype_index! {
/// It is similar to rustc_type_ir's TyEncoder. /// It is similar to rustc_type_ir's TyEncoder.
pub trait SpanEncoder: Encoder { pub trait SpanEncoder: Encoder {
fn encode_span(&mut self, span: Span); fn encode_span(&mut self, span: Span);
fn encode_symbol(&mut self, symbol: Symbol); fn encode_symbol(&mut self, sym: Symbol);
fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol);
fn encode_expn_id(&mut self, expn_id: ExpnId); fn encode_expn_id(&mut self, expn_id: ExpnId);
fn encode_syntax_context(&mut self, syntax_context: SyntaxContext); fn encode_syntax_context(&mut self, syntax_context: SyntaxContext);
/// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx. /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a
/// Therefore, make sure to include the context when encode a `CrateNum`. /// tcx. Therefore, make sure to include the context when encode a `CrateNum`.
fn encode_crate_num(&mut self, crate_num: CrateNum); fn encode_crate_num(&mut self, crate_num: CrateNum);
fn encode_def_index(&mut self, def_index: DefIndex); fn encode_def_index(&mut self, def_index: DefIndex);
fn encode_def_id(&mut self, def_id: DefId); fn encode_def_id(&mut self, def_id: DefId);
@@ -1201,8 +1204,12 @@ impl SpanEncoder for FileEncoder {
span.hi.encode(self); span.hi.encode(self);
} }
fn encode_symbol(&mut self, symbol: Symbol) { fn encode_symbol(&mut self, sym: Symbol) {
self.emit_str(symbol.as_str()); self.emit_str(sym.as_str());
}
fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) {
self.emit_byte_str(byte_sym.as_byte_str());
} }
fn encode_expn_id(&mut self, _expn_id: ExpnId) { fn encode_expn_id(&mut self, _expn_id: ExpnId) {
@@ -1239,6 +1246,12 @@ impl<E: SpanEncoder> Encodable<E> for Symbol {
} }
} }
impl<E: SpanEncoder> Encodable<E> for ByteSymbol {
fn encode(&self, s: &mut E) {
s.encode_byte_symbol(*self);
}
}
impl<E: SpanEncoder> Encodable<E> for ExpnId { impl<E: SpanEncoder> Encodable<E> for ExpnId {
fn encode(&self, s: &mut E) { fn encode(&self, s: &mut E) {
s.encode_expn_id(*self) s.encode_expn_id(*self)
@@ -1280,6 +1293,7 @@ impl<E: SpanEncoder> Encodable<E> for AttrId {
pub trait SpanDecoder: Decoder { pub trait SpanDecoder: Decoder {
fn decode_span(&mut self) -> Span; fn decode_span(&mut self) -> Span;
fn decode_symbol(&mut self) -> Symbol; fn decode_symbol(&mut self) -> Symbol;
fn decode_byte_symbol(&mut self) -> ByteSymbol;
fn decode_expn_id(&mut self) -> ExpnId; fn decode_expn_id(&mut self) -> ExpnId;
fn decode_syntax_context(&mut self) -> SyntaxContext; fn decode_syntax_context(&mut self) -> SyntaxContext;
fn decode_crate_num(&mut self) -> CrateNum; fn decode_crate_num(&mut self) -> CrateNum;
@@ -1300,6 +1314,10 @@ impl SpanDecoder for MemDecoder<'_> {
Symbol::intern(self.read_str()) Symbol::intern(self.read_str())
} }
fn decode_byte_symbol(&mut self) -> ByteSymbol {
ByteSymbol::intern(self.read_byte_str())
}
fn decode_expn_id(&mut self) -> ExpnId { fn decode_expn_id(&mut self) -> ExpnId {
panic!("cannot decode `ExpnId` with `MemDecoder`"); panic!("cannot decode `ExpnId` with `MemDecoder`");
} }
@@ -1337,6 +1355,12 @@ impl<D: SpanDecoder> Decodable<D> for Symbol {
} }
} }
impl<D: SpanDecoder> Decodable<D> for ByteSymbol {
fn decode(s: &mut D) -> ByteSymbol {
s.decode_byte_symbol()
}
}
impl<D: SpanDecoder> Decodable<D> for ExpnId { impl<D: SpanDecoder> Decodable<D> for ExpnId {
fn decode(s: &mut D) -> ExpnId { fn decode(s: &mut D) -> ExpnId {
s.decode_expn_id() s.decode_expn_id()

View File

@@ -2583,7 +2583,7 @@ impl fmt::Display for MacroRulesNormalizedIdent {
} }
} }
/// An interned string. /// An interned UTF-8 string.
/// ///
/// Internally, a `Symbol` is implemented as an index, and all operations /// Internally, a `Symbol` is implemented as an index, and all operations
/// (including hashing, equality, and ordering) operate on that index. The use /// (including hashing, equality, and ordering) operate on that index. The use
@@ -2595,20 +2595,23 @@ impl fmt::Display for MacroRulesNormalizedIdent {
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Symbol(SymbolIndex); pub struct Symbol(SymbolIndex);
// Used within both `Symbol` and `ByteSymbol`.
rustc_index::newtype_index! { rustc_index::newtype_index! {
#[orderable] #[orderable]
struct SymbolIndex {} struct SymbolIndex {}
} }
impl Symbol { impl Symbol {
/// Avoid this except for things like deserialization of previously
/// serialized symbols, and testing. Use `intern` instead.
pub const fn new(n: u32) -> Self { pub const fn new(n: u32) -> Self {
Symbol(SymbolIndex::from_u32(n)) Symbol(SymbolIndex::from_u32(n))
} }
/// Maps a string to its interned representation. /// Maps a string to its interned representation.
#[rustc_diagnostic_item = "SymbolIntern"] #[rustc_diagnostic_item = "SymbolIntern"]
pub fn intern(string: &str) -> Self { pub fn intern(str: &str) -> Self {
with_session_globals(|session_globals| session_globals.symbol_interner.intern(string)) with_session_globals(|session_globals| session_globals.symbol_interner.intern_str(str))
} }
/// Access the underlying string. This is a slowish operation because it /// Access the underlying string. This is a slowish operation because it
@@ -2621,7 +2624,7 @@ impl Symbol {
/// it works out ok. /// it works out ok.
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
with_session_globals(|session_globals| unsafe { with_session_globals(|session_globals| unsafe {
std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self)) std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get_str(*self))
}) })
} }
@@ -2678,56 +2681,130 @@ impl StableCompare for Symbol {
} }
} }
/// Like `Symbol`, but for byte strings. `ByteSymbol` is used less widely, so
/// it has fewer operations defined than `Symbol`.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ByteSymbol(SymbolIndex);
impl ByteSymbol {
/// Avoid this except for things like deserialization of previously
/// serialized symbols, and testing. Use `intern` instead.
pub const fn new(n: u32) -> Self {
ByteSymbol(SymbolIndex::from_u32(n))
}
/// Maps a string to its interned representation.
pub fn intern(byte_str: &[u8]) -> Self {
with_session_globals(|session_globals| {
session_globals.symbol_interner.intern_byte_str(byte_str)
})
}
/// Like `Symbol::as_str`.
pub fn as_byte_str(&self) -> &[u8] {
with_session_globals(|session_globals| unsafe {
std::mem::transmute::<&[u8], &[u8]>(session_globals.symbol_interner.get_byte_str(*self))
})
}
pub fn as_u32(self) -> u32 {
self.0.as_u32()
}
}
impl fmt::Debug for ByteSymbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_byte_str(), f)
}
}
impl<CTX> HashStable<CTX> for ByteSymbol {
#[inline]
fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
self.as_byte_str().hash_stable(hcx, hasher);
}
}
// Interner used for both `Symbol`s and `ByteSymbol`s. If a string and a byte
// string with identical contents (e.g. "foo" and b"foo") are both interned,
// only one copy will be stored and the resulting `Symbol` and `ByteSymbol`
// will have the same index.
pub(crate) struct Interner(Lock<InternerInner>); pub(crate) struct Interner(Lock<InternerInner>);
// The `&'static str`s in this type actually point into the arena. // The `&'static [u8]`s in this type actually point into the arena.
// //
// This type is private to prevent accidentally constructing more than one // This type is private to prevent accidentally constructing more than one
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s // `Interner` on the same thread, which makes it easy to mix up `Symbol`s
// between `Interner`s. // between `Interner`s.
struct InternerInner { struct InternerInner {
arena: DroplessArena, arena: DroplessArena,
strings: FxIndexSet<&'static str>, byte_strs: FxIndexSet<&'static [u8]>,
} }
impl Interner { impl Interner {
// These arguments are `&str`, but because of the sharing, we are
// effectively pre-interning all these strings for both `Symbol` and
// `ByteSymbol`.
fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self { fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied())); let byte_strs = FxIndexSet::from_iter(
assert_eq!( init.iter().copied().chain(extra.iter().copied()).map(|str| str.as_bytes()),
strings.len(),
init.len() + extra.len(),
"there are duplicate symbols in the rustc symbol list and the extra symbols added by the driver",
); );
Interner(Lock::new(InternerInner { arena: Default::default(), strings })) assert_eq!(
byte_strs.len(),
init.len() + extra.len(),
"duplicate symbols in the rustc symbol list and the extra symbols added by the driver",
);
Interner(Lock::new(InternerInner { arena: Default::default(), byte_strs }))
}
fn intern_str(&self, str: &str) -> Symbol {
Symbol::new(self.intern_inner(str.as_bytes()))
}
fn intern_byte_str(&self, byte_str: &[u8]) -> ByteSymbol {
ByteSymbol::new(self.intern_inner(byte_str))
} }
#[inline] #[inline]
fn intern(&self, string: &str) -> Symbol { fn intern_inner(&self, byte_str: &[u8]) -> u32 {
let mut inner = self.0.lock(); let mut inner = self.0.lock();
if let Some(idx) = inner.strings.get_index_of(string) { if let Some(idx) = inner.byte_strs.get_index_of(byte_str) {
return Symbol::new(idx as u32); return idx as u32;
} }
let string: &str = inner.arena.alloc_str(string); let byte_str: &[u8] = inner.arena.alloc_slice(byte_str);
// SAFETY: we can extend the arena allocation to `'static` because we // SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive. // only access these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) }; let byte_str: &'static [u8] = unsafe { &*(byte_str as *const [u8]) };
// This second hash table lookup can be avoided by using `RawEntryMut`, // This second hash table lookup can be avoided by using `RawEntryMut`,
// but this code path isn't hot enough for it to be worth it. See // but this code path isn't hot enough for it to be worth it. See
// #91445 for details. // #91445 for details.
let (idx, is_new) = inner.strings.insert_full(string); let (idx, is_new) = inner.byte_strs.insert_full(byte_str);
debug_assert!(is_new); // due to the get_index_of check above debug_assert!(is_new); // due to the get_index_of check above
Symbol::new(idx as u32) idx as u32
} }
/// Get the symbol as a string. /// Get the symbol as a string.
/// ///
/// [`Symbol::as_str()`] should be used in preference to this function. /// [`Symbol::as_str()`] should be used in preference to this function.
fn get(&self, symbol: Symbol) -> &str { fn get_str(&self, symbol: Symbol) -> &str {
self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap() let byte_str = self.get_inner(symbol.0.as_usize());
// SAFETY: known to be a UTF8 string because it's a `Symbol`.
unsafe { str::from_utf8_unchecked(byte_str) }
}
/// Get the symbol as a string.
///
/// [`ByteSymbol::as_byte_str()`] should be used in preference to this function.
fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
self.get_inner(symbol.0.as_usize())
}
fn get_inner(&self, index: usize) -> &[u8] {
self.0.lock().byte_strs.get_index(index).unwrap()
} }
} }
@@ -2822,9 +2899,11 @@ impl Symbol {
self != sym::empty && self != kw::Underscore && !self.is_path_segment_keyword() self != sym::empty && self != kw::Underscore && !self.is_path_segment_keyword()
} }
/// Was this symbol predefined in the compiler's `symbols!` macro /// Was this symbol index predefined in the compiler's `symbols!` macro?
pub fn is_predefined(self) -> bool { /// Note: this applies to both `Symbol`s and `ByteSymbol`s, which is why it
self.as_u32() < PREDEFINED_SYMBOLS_COUNT /// takes a `u32` argument instead of a `&self` argument. Use with care.
pub fn is_predefined(index: u32) -> bool {
index < PREDEFINED_SYMBOLS_COUNT
} }
} }

View File

@@ -5,14 +5,14 @@ use crate::create_default_session_globals_then;
fn interner_tests() { fn interner_tests() {
let i = Interner::prefill(&[], &[]); let i = Interner::prefill(&[], &[]);
// first one is zero: // first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0)); assert_eq!(i.intern_str("dog"), Symbol::new(0));
// re-use gets the same entry: // re-use gets the same entry, even with a `ByteSymbol`
assert_eq!(i.intern("dog"), Symbol::new(0)); assert_eq!(i.intern_byte_str(b"dog"), ByteSymbol::new(0));
// different string gets a different #: // different string gets a different #:
assert_eq!(i.intern("cat"), Symbol::new(1)); assert_eq!(i.intern_byte_str(b"cat"), ByteSymbol::new(1));
assert_eq!(i.intern("cat"), Symbol::new(1)); assert_eq!(i.intern_str("cat"), Symbol::new(1));
// dog is still at zero // dog is still at zero
assert_eq!(i.intern("dog"), Symbol::new(0)); assert_eq!(i.intern_str("dog"), Symbol::new(0));
} }
#[test] #[test]

View File

@@ -120,7 +120,7 @@ fn recurse_build<'tcx>(
} }
&ExprKind::Literal { lit, neg } => { &ExprKind::Literal { lit, neg } => {
let sp = node.span; let sp = node.span;
tcx.at(sp).lit_to_const(LitToConstInput { lit: &lit.node, ty: node.ty, neg }) tcx.at(sp).lit_to_const(LitToConstInput { lit: lit.node, ty: node.ty, neg })
} }
&ExprKind::NonHirLiteral { lit, user_ty: _ } => { &ExprKind::NonHirLiteral { lit, user_ty: _ } => {
let val = ty::ValTree::from_scalar_int(tcx, lit); let val = ty::ValTree::from_scalar_int(tcx, lit);

View File

@@ -74,7 +74,7 @@ impl ApproxConstant {
} }
impl LateLintPass<'_> for ApproxConstant { impl LateLintPass<'_> for ApproxConstant {
fn check_lit(&mut self, cx: &LateContext<'_>, _hir_id: HirId, lit: &Lit, _negated: bool) { fn check_lit(&mut self, cx: &LateContext<'_>, _hir_id: HirId, lit: Lit, _negated: bool) {
match lit.node { match lit.node {
LitKind::Float(s, LitFloatType::Suffixed(fty)) => match fty { LitKind::Float(s, LitFloatType::Suffixed(fty)) => match fty {
FloatTy::F16 => self.check_known_consts(cx, lit.span, s, "f16"), FloatTy::F16 => self.check_known_consts(cx, lit.span, s, "f16"),

View File

@@ -42,7 +42,7 @@ fn extract_bool_lit(e: &Expr<'_>) -> Option<bool> {
}) = e.kind }) = e.kind
&& !e.span.from_expansion() && !e.span.from_expansion()
{ {
Some(*b) Some(b)
} else { } else {
None None
} }

View File

@@ -46,7 +46,7 @@ pub(super) fn check(cx: &LateContext<'_>, expr: &Expr<'_>, from: &Expr<'_>, to:
fn is_expr_const_aligned(cx: &LateContext<'_>, expr: &Expr<'_>, to: &Ty<'_>) -> bool { fn is_expr_const_aligned(cx: &LateContext<'_>, expr: &Expr<'_>, to: &Ty<'_>) -> bool {
match expr.kind { match expr.kind {
ExprKind::Call(fun, _) => is_align_of_call(cx, fun, to), ExprKind::Call(fun, _) => is_align_of_call(cx, fun, to),
ExprKind::Lit(lit) => is_literal_aligned(cx, lit, to), ExprKind::Lit(lit) => is_literal_aligned(cx, &lit, to),
_ => false, _ => false,
} }
} }

View File

@@ -243,7 +243,7 @@ fn lint_unnecessary_cast(
); );
} }
fn get_numeric_literal<'e>(expr: &'e Expr<'e>) -> Option<&'e Lit> { fn get_numeric_literal<'e>(expr: &'e Expr<'e>) -> Option<Lit> {
match expr.kind { match expr.kind {
ExprKind::Lit(lit) => Some(lit), ExprKind::Lit(lit) => Some(lit),
ExprKind::Unary(UnOp::Neg, e) => { ExprKind::Unary(UnOp::Neg, e) => {

View File

@@ -83,7 +83,7 @@ impl<'a, 'tcx> NumericFallbackVisitor<'a, 'tcx> {
} }
/// Check whether a passed literal has potential to cause fallback or not. /// Check whether a passed literal has potential to cause fallback or not.
fn check_lit(&self, lit: &Lit, lit_ty: Ty<'tcx>, emit_hir_id: HirId) { fn check_lit(&self, lit: Lit, lit_ty: Ty<'tcx>, emit_hir_id: HirId) {
if !lit.span.in_external_macro(self.cx.sess().source_map()) if !lit.span.in_external_macro(self.cx.sess().source_map())
&& matches!(self.ty_bounds.last(), Some(ExplicitTyBound(false))) && matches!(self.ty_bounds.last(), Some(ExplicitTyBound(false)))
&& matches!( && matches!(
@@ -210,7 +210,7 @@ impl<'tcx> Visitor<'tcx> for NumericFallbackVisitor<'_, 'tcx> {
ExprKind::Lit(lit) => { ExprKind::Lit(lit) => {
let ty = self.cx.typeck_results().expr_ty(expr); let ty = self.cx.typeck_results().expr_ty(expr);
self.check_lit(lit, ty, expr.hir_id); self.check_lit(*lit, ty, expr.hir_id);
return; return;
}, },

View File

@@ -57,7 +57,7 @@ impl LateLintPass<'_> for LargeIncludeFile {
if let ExprKind::Lit(lit) = &expr.kind if let ExprKind::Lit(lit) = &expr.kind
&& let len = match &lit.node { && let len = match &lit.node {
// include_bytes // include_bytes
LitKind::ByteStr(bstr, _) => bstr.len(), LitKind::ByteStr(bstr, _) => bstr.as_byte_str().len(),
// include_str // include_str
LitKind::Str(sym, _) => sym.as_str().len(), LitKind::Str(sym, _) => sym.as_str().len(),
_ => return, _ => return,

View File

@@ -41,12 +41,12 @@ declare_clippy_lint! {
declare_lint_pass!(ManualIgnoreCaseCmp => [MANUAL_IGNORE_CASE_CMP]); declare_lint_pass!(ManualIgnoreCaseCmp => [MANUAL_IGNORE_CASE_CMP]);
enum MatchType<'a, 'b> { enum MatchType<'a> {
ToAscii(bool, Ty<'a>), ToAscii(bool, Ty<'a>),
Literal(&'b LitKind), Literal(LitKind),
} }
fn get_ascii_type<'a, 'b>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'b>) -> Option<(Span, MatchType<'a, 'b>)> { fn get_ascii_type<'a>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'_>) -> Option<(Span, MatchType<'a>)> {
if let MethodCall(path, expr, _, _) = kind { if let MethodCall(path, expr, _, _) = kind {
let is_lower = match path.ident.name { let is_lower = match path.ident.name {
sym::to_ascii_lowercase => true, sym::to_ascii_lowercase => true,
@@ -63,7 +63,7 @@ fn get_ascii_type<'a, 'b>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'b>) -
return Some((expr.span, ToAscii(is_lower, ty_raw))); return Some((expr.span, ToAscii(is_lower, ty_raw)));
} }
} else if let Lit(expr) = kind { } else if let Lit(expr) = kind {
return Some((expr.span, Literal(&expr.node))); return Some((expr.span, Literal(expr.node)));
} }
None None
} }

View File

@@ -184,7 +184,7 @@ fn eq_pattern_length<'tcx>(cx: &LateContext<'tcx>, pattern: &Expr<'_>, expr: &'t
.. ..
}) = expr.kind }) = expr.kind
{ {
constant_length(cx, pattern).is_some_and(|length| *n == length) constant_length(cx, pattern).is_some_and(|length| n == length)
} else { } else {
len_arg(cx, expr).is_some_and(|arg| eq_expr_value(cx, pattern, arg)) len_arg(cx, expr).is_some_and(|arg| eq_expr_value(cx, pattern, arg))
} }

View File

@@ -159,7 +159,7 @@ fn find_bool_lit(ex: &ExprKind<'_>) -> Option<bool> {
node: LitKind::Bool(b), .. node: LitKind::Bool(b), ..
}) = exp.kind }) = exp.kind
{ {
Some(*b) Some(b)
} else { } else {
None None
} }

View File

@@ -12,7 +12,7 @@ use rustc_hir::{Arm, Expr, HirId, HirIdMap, HirIdMapEntry, HirIdSet, Pat, PatExp
use rustc_lint::builtin::NON_EXHAUSTIVE_OMITTED_PATTERNS; use rustc_lint::builtin::NON_EXHAUSTIVE_OMITTED_PATTERNS;
use rustc_lint::{LateContext, LintContext}; use rustc_lint::{LateContext, LintContext};
use rustc_middle::ty; use rustc_middle::ty;
use rustc_span::{ErrorGuaranteed, Span, Symbol}; use rustc_span::{ByteSymbol, ErrorGuaranteed, Span, Symbol};
use super::MATCH_SAME_ARMS; use super::MATCH_SAME_ARMS;
@@ -193,7 +193,7 @@ enum NormalizedPat<'a> {
Or(&'a [Self]), Or(&'a [Self]),
Path(Option<DefId>), Path(Option<DefId>),
LitStr(Symbol), LitStr(Symbol),
LitBytes(&'a [u8]), LitBytes(ByteSymbol),
LitInt(u128), LitInt(u128),
LitBool(bool), LitBool(bool),
Range(PatRange), Range(PatRange),
@@ -332,7 +332,9 @@ impl<'a> NormalizedPat<'a> {
// TODO: Handle negative integers. They're currently treated as a wild match. // TODO: Handle negative integers. They're currently treated as a wild match.
PatExprKind::Lit { lit, negated: false } => match lit.node { PatExprKind::Lit { lit, negated: false } => match lit.node {
LitKind::Str(sym, _) => Self::LitStr(sym), LitKind::Str(sym, _) => Self::LitStr(sym),
LitKind::ByteStr(ref bytes, _) | LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes), LitKind::ByteStr(byte_sym, _) | LitKind::CStr(byte_sym, _) => {
Self::LitBytes(byte_sym)
}
LitKind::Byte(val) => Self::LitInt(val.into()), LitKind::Byte(val) => Self::LitInt(val.into()),
LitKind::Char(val) => Self::LitInt(val.into()), LitKind::Char(val) => Self::LitInt(val.into()),
LitKind::Int(val, _) => Self::LitInt(val.get()), LitKind::Int(val, _) => Self::LitInt(val.get()),

View File

@@ -76,7 +76,7 @@ fn get_open_options(
.. ..
} = span } = span
{ {
Argument::Set(*lit) Argument::Set(lit)
} else { } else {
// The function is called with a literal which is not a boolean literal. // The function is called with a literal which is not a boolean literal.
// This is theoretically possible, but not very likely. // This is theoretically possible, but not very likely.

View File

@@ -104,7 +104,7 @@ fn len_comparison<'hir>(
) -> Option<(LengthComparison, usize, &'hir Expr<'hir>)> { ) -> Option<(LengthComparison, usize, &'hir Expr<'hir>)> {
macro_rules! int_lit_pat { macro_rules! int_lit_pat {
($id:ident) => { ($id:ident) => {
ExprKind::Lit(&Spanned { ExprKind::Lit(Spanned {
node: LitKind::Int(Pu128($id), _), node: LitKind::Int(Pu128($id), _),
.. ..
}) })

View File

@@ -324,7 +324,7 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> {
} }
} }
fn lit(&self, lit: &Binding<&Lit>) { fn lit(&self, lit: &Binding<Lit>) {
let kind = |kind| chain!(self, "let LitKind::{kind} = {lit}.node"); let kind = |kind| chain!(self, "let LitKind::{kind} = {lit}.node");
macro_rules! kind { macro_rules! kind {
($($t:tt)*) => (kind(format_args!($($t)*))); ($($t:tt)*) => (kind(format_args!($($t)*)));

View File

@@ -4,8 +4,6 @@
//! executable MIR bodies, so we have to do this instead. //! executable MIR bodies, so we have to do this instead.
#![allow(clippy::float_cmp)] #![allow(clippy::float_cmp)]
use std::sync::Arc;
use crate::source::{SpanRangeExt, walk_span_to_context}; use crate::source::{SpanRangeExt, walk_span_to_context};
use crate::{clip, is_direct_expn_of, sext, unsext}; use crate::{clip, is_direct_expn_of, sext, unsext};
@@ -38,7 +36,7 @@ pub enum Constant<'tcx> {
/// A `String` (e.g., "abc"). /// A `String` (e.g., "abc").
Str(String), Str(String),
/// A binary string (e.g., `b"abc"`). /// A binary string (e.g., `b"abc"`).
Binary(Arc<[u8]>), Binary(Vec<u8>),
/// A single `char` (e.g., `'a'`). /// A single `char` (e.g., `'a'`).
Char(char), Char(char),
/// An integer's bit representation. /// An integer's bit representation.
@@ -306,7 +304,9 @@ pub fn lit_to_mir_constant<'tcx>(lit: &LitKind, ty: Option<Ty<'tcx>>) -> Constan
match *lit { match *lit {
LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
LitKind::Byte(b) => Constant::Int(u128::from(b)), LitKind::Byte(b) => Constant::Int(u128::from(b)),
LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => Constant::Binary(Arc::clone(s)), LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => {
Constant::Binary(s.as_byte_str().to_vec())
}
LitKind::Char(c) => Constant::Char(c), LitKind::Char(c) => Constant::Char(c),
LitKind::Int(n, _) => Constant::Int(n.get()), LitKind::Int(n, _) => Constant::Int(n.get()),
LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty { LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {
@@ -568,7 +568,9 @@ impl<'tcx> ConstEvalCtxt<'tcx> {
} else { } else {
match &lit.node { match &lit.node {
LitKind::Str(is, _) => Some(is.is_empty()), LitKind::Str(is, _) => Some(is.is_empty()),
LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => Some(s.is_empty()), LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => {
Some(s.as_byte_str().is_empty())
}
_ => None, _ => None,
} }
} }