Make the allocator shim participate in LTO again
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
use std::ffi::CString;
|
||||
use std::sync::Arc;
|
||||
|
||||
use rustc_ast::expand::allocator::AllocatorKind;
|
||||
use rustc_data_structures::memmap::Mmap;
|
||||
use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
|
||||
use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportLevel};
|
||||
@@ -96,19 +95,6 @@ pub(super) fn exported_symbols_for_lto(
|
||||
.filter_map(|&(s, info): &(ExportedSymbol<'_>, SymbolExportInfo)| {
|
||||
if info.level.is_below_threshold(export_threshold) || info.used {
|
||||
Some(symbol_name_for_instance_in_crate(tcx, s, cnum))
|
||||
} else if export_threshold == SymbolExportLevel::C
|
||||
&& info.rustc_std_internal_symbol
|
||||
&& let Some(AllocatorKind::Default) = allocator_kind_for_codegen(tcx)
|
||||
{
|
||||
// Export the __rdl_* exports for usage by the allocator shim when not using
|
||||
// #[global_allocator]. Most of the conditions above are only used to avoid
|
||||
// unnecessary expensive symbol_name_for_instance_in_crate calls.
|
||||
let sym = symbol_name_for_instance_in_crate(tcx, s, cnum);
|
||||
if sym.contains("__rdl_") || sym.contains("__rg_oom") {
|
||||
Some(sym)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -334,6 +334,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
|
||||
pub output_filenames: Arc<OutputFilenames>,
|
||||
pub invocation_temp: Option<String>,
|
||||
pub module_config: Arc<ModuleConfig>,
|
||||
pub allocator_config: Arc<ModuleConfig>,
|
||||
pub tm_factory: TargetMachineFactoryFn<B>,
|
||||
pub msvc_imps_needed: bool,
|
||||
pub is_pe_coff: bool,
|
||||
@@ -794,12 +795,19 @@ pub(crate) fn compute_per_cgu_lto_type(
|
||||
sess_lto: &Lto,
|
||||
opts: &config::Options,
|
||||
sess_crate_types: &[CrateType],
|
||||
module_kind: ModuleKind,
|
||||
) -> ComputedLtoType {
|
||||
// If the linker does LTO, we don't have to do it. Note that we
|
||||
// keep doing full LTO, if it is requested, as not to break the
|
||||
// assumption that the output will be a single module.
|
||||
let linker_does_lto = opts.cg.linker_plugin_lto.enabled();
|
||||
|
||||
// When we're automatically doing ThinLTO for multi-codegen-unit
|
||||
// builds we don't actually want to LTO the allocator module if
|
||||
// it shows up. This is due to various linker shenanigans that
|
||||
// we'll encounter later.
|
||||
let is_allocator = module_kind == ModuleKind::Allocator;
|
||||
|
||||
// We ignore a request for full crate graph LTO if the crate type
|
||||
// is only an rlib, as there is no full crate graph to process,
|
||||
// that'll happen later.
|
||||
@@ -811,7 +819,7 @@ pub(crate) fn compute_per_cgu_lto_type(
|
||||
let is_rlib = matches!(sess_crate_types, [CrateType::Rlib]);
|
||||
|
||||
match sess_lto {
|
||||
Lto::ThinLocal if !linker_does_lto => ComputedLtoType::Thin,
|
||||
Lto::ThinLocal if !linker_does_lto && !is_allocator => ComputedLtoType::Thin,
|
||||
Lto::Thin if !linker_does_lto && !is_rlib => ComputedLtoType::Thin,
|
||||
Lto::Fat if !is_rlib => ComputedLtoType::Fat,
|
||||
_ => ComputedLtoType::No,
|
||||
@@ -825,18 +833,23 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
|
||||
let dcx = cgcx.create_dcx();
|
||||
let dcx = dcx.handle();
|
||||
|
||||
B::optimize(cgcx, dcx, &mut module, &cgcx.module_config);
|
||||
let module_config = match module.kind {
|
||||
ModuleKind::Regular => &cgcx.module_config,
|
||||
ModuleKind::Allocator => &cgcx.allocator_config,
|
||||
};
|
||||
|
||||
B::optimize(cgcx, dcx, &mut module, module_config);
|
||||
|
||||
// After we've done the initial round of optimizations we need to
|
||||
// decide whether to synchronously codegen this module or ship it
|
||||
// back to the coordinator thread for further LTO processing (which
|
||||
// has to wait for all the initial modules to be optimized).
|
||||
|
||||
let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types);
|
||||
let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types, module.kind);
|
||||
|
||||
// If we're doing some form of incremental LTO then we need to be sure to
|
||||
// save our module to disk first.
|
||||
let bitcode = if cgcx.module_config.emit_pre_lto_bc {
|
||||
let bitcode = if module_config.emit_pre_lto_bc {
|
||||
let filename = pre_lto_bitcode_filename(&module.name);
|
||||
cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename))
|
||||
} else {
|
||||
@@ -845,7 +858,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
|
||||
|
||||
match lto_type {
|
||||
ComputedLtoType::No => {
|
||||
let module = B::codegen(cgcx, module, &cgcx.module_config);
|
||||
let module = B::codegen(cgcx, module, module_config);
|
||||
WorkItemResult::Finished(module)
|
||||
}
|
||||
ComputedLtoType::Thin => {
|
||||
@@ -1133,6 +1146,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
diag_emitter: shared_emitter.clone(),
|
||||
output_filenames: Arc::clone(tcx.output_filenames(())),
|
||||
module_config: regular_config,
|
||||
allocator_config,
|
||||
tm_factory: backend.target_machine_factory(tcx.sess, ol, backend_features),
|
||||
msvc_imps_needed: msvc_imps_needed(tcx),
|
||||
is_pe_coff: tcx.sess.target.is_like_windows,
|
||||
@@ -1147,11 +1161,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
invocation_temp: sess.invocation_temp.clone(),
|
||||
};
|
||||
|
||||
let compiled_allocator_module = allocator_module.map(|mut allocator_module| {
|
||||
B::optimize(&cgcx, tcx.sess.dcx(), &mut allocator_module, &allocator_config);
|
||||
B::codegen(&cgcx, allocator_module, &allocator_config)
|
||||
});
|
||||
|
||||
// This is the "main loop" of parallel work happening for parallel codegen.
|
||||
// It's here that we manage parallelism, schedule work, and work with
|
||||
// messages coming from clients.
|
||||
@@ -1331,6 +1340,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
|
||||
let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
|
||||
|
||||
let compiled_allocator_module = allocator_module.and_then(|allocator_module| {
|
||||
match execute_optimize_work_item(&cgcx, allocator_module) {
|
||||
WorkItemResult::Finished(compiled_module) => return Some(compiled_module),
|
||||
WorkItemResult::NeedsFatLto(fat_lto_input) => needs_fat_lto.push(fat_lto_input),
|
||||
WorkItemResult::NeedsThinLto(name, thin_buffer) => {
|
||||
needs_thin_lto.push((name, thin_buffer))
|
||||
}
|
||||
}
|
||||
None
|
||||
});
|
||||
|
||||
// Run the message loop while there's still anything that needs message
|
||||
// processing. Note that as soon as codegen is aborted we simply want to
|
||||
// wait for all existing work to finish, so many of the conditions here
|
||||
|
||||
@@ -46,7 +46,9 @@ use crate::meth::load_vtable;
|
||||
use crate::mir::operand::OperandValue;
|
||||
use crate::mir::place::PlaceRef;
|
||||
use crate::traits::*;
|
||||
use crate::{CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, errors, meth, mir};
|
||||
use crate::{
|
||||
CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, ModuleKind, errors, meth, mir,
|
||||
};
|
||||
|
||||
pub(crate) fn bin_op_to_icmp_predicate(op: BinOp, signed: bool) -> IntPredicate {
|
||||
match (op, signed) {
|
||||
@@ -1124,7 +1126,12 @@ pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) ->
|
||||
// We can re-use either the pre- or the post-thinlto state. If no LTO is
|
||||
// being performed then we can use post-LTO artifacts, otherwise we must
|
||||
// reuse pre-LTO artifacts
|
||||
match compute_per_cgu_lto_type(&tcx.sess.lto(), &tcx.sess.opts, tcx.crate_types()) {
|
||||
match compute_per_cgu_lto_type(
|
||||
&tcx.sess.lto(),
|
||||
&tcx.sess.opts,
|
||||
tcx.crate_types(),
|
||||
ModuleKind::Regular,
|
||||
) {
|
||||
ComputedLtoType::No => CguReuse::PostLto,
|
||||
_ => CguReuse::PreLto,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user