Introduce ByteSymbol.
It's like `Symbol` but for byte strings. The interner is now used for both `Symbol` and `ByteSymbol`. E.g. if you intern `"dog"` and `b"dog"` you'll get a `Symbol` and a `ByteSymbol` with the same index and the characters will only be stored once. The motivation for this is to eliminate the `Arc`s in `ast::LitKind`, to make `ast::LitKind` impl `Copy`, and to avoid the need to arena-allocate `ast::LitKind` in HIR. The latter change reduces peak memory by a non-trivial amount on literal-heavy benchmarks such as `deep-vector` and `tuple-stress`. `Encoder`, `Decoder`, `SpanEncoder`, and `SpanDecoder` all get some changes so that they can handle normal strings and byte strings. This change does slow down compilation of programs that use `include_bytes!` on large files, because the contents of those files are now interned (hashed). This makes `include_bytes!` more similar to `include_str!`, though `include_bytes!` contents still aren't escaped, and hashing is still much cheaper than escaping.
This commit is contained in:
@@ -177,15 +177,15 @@ pub(crate) fn expand_concat_bytes(
|
||||
Ok(LitKind::Byte(val)) => {
|
||||
accumulator.push(val);
|
||||
}
|
||||
Ok(LitKind::ByteStr(ref bytes, _)) => {
|
||||
accumulator.extend_from_slice(bytes);
|
||||
Ok(LitKind::ByteStr(ref byte_sym, _)) => {
|
||||
accumulator.extend_from_slice(byte_sym.as_byte_str());
|
||||
}
|
||||
_ => {
|
||||
guar.get_or_insert_with(|| invalid_type_err(cx, token_lit, e.span, false));
|
||||
}
|
||||
},
|
||||
ExprKind::IncludedBytes(bytes) => {
|
||||
accumulator.extend_from_slice(bytes);
|
||||
ExprKind::IncludedBytes(byte_sym) => {
|
||||
accumulator.extend_from_slice(byte_sym.as_byte_str());
|
||||
}
|
||||
ExprKind::Err(guarantee) => {
|
||||
guar = Some(*guarantee);
|
||||
|
||||
@@ -16,7 +16,7 @@ use rustc_parse::parser::{ForceCollect, Parser};
|
||||
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
|
||||
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
|
||||
use rustc_span::source_map::SourceMap;
|
||||
use rustc_span::{Pos, Span, Symbol};
|
||||
use rustc_span::{ByteSymbol, Pos, Span, Symbol};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::errors;
|
||||
@@ -237,7 +237,7 @@ pub(crate) fn expand_include_bytes(
|
||||
Ok((bytes, _bsp)) => {
|
||||
// Don't care about getting the span for the raw bytes,
|
||||
// because the console can't really show them anyway.
|
||||
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes));
|
||||
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(ByteSymbol::intern(&bytes)));
|
||||
MacEager::expr(expr)
|
||||
}
|
||||
Err(dummy) => dummy,
|
||||
|
||||
Reference in New Issue
Block a user