Rollup merge of #143388 - bjorn3:lto_refactors, r=compiler-errors

Various refactors to the LTO handling code

In particular reducing the sharing of code paths between fat and thin-LTO and making the fat LTO implementation more self-contained. This also moves some autodiff handling out of cg_ssa into cg_llvm given that Enzyme only works with LLVM anyway and an implementation for another backend may do things entirely differently. This will also make it a bit easier to split LTO handling out of the coordinator thread main loop into a separate loop, which should reduce the complexity of the coordinator thread.
This commit is contained in:
León Orell Valerian Liehr
2025-07-17 03:58:28 +02:00
committed by GitHub
13 changed files with 154 additions and 240 deletions

View File

@@ -397,50 +397,31 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
}
}
fn generate_lto_work<B: ExtraBackendMethods>(
fn generate_thin_lto_work<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
autodiff: Vec<AutoDiffItem>,
needs_fat_lto: Vec<FatLtoInput<B>>,
needs_thin_lto: Vec<(String, B::ThinBuffer)>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
) -> Vec<(WorkItem<B>, u64)> {
let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work");
let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
if !needs_fat_lto.is_empty() {
assert!(needs_thin_lto.is_empty());
let mut module =
B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
if cgcx.lto == Lto::Fat && !autodiff.is_empty() {
let config = cgcx.config(ModuleKind::Regular);
module = module.autodiff(cgcx, autodiff, config).unwrap_or_else(|e| e.raise());
}
// We are adding a single work item, so the cost doesn't matter.
vec![(WorkItem::LTO(module), 0)]
} else {
if !autodiff.is_empty() {
let dcx = cgcx.create_dcx();
dcx.handle().emit_fatal(AutodiffWithoutLto {});
}
assert!(needs_fat_lto.is_empty());
let (lto_modules, copy_jobs) = B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules)
.unwrap_or_else(|e| e.raise());
lto_modules
.into_iter()
.map(|module| {
let cost = module.cost();
(WorkItem::LTO(module), cost)
})
.chain(copy_jobs.into_iter().map(|wp| {
(
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
name: wp.cgu_name.clone(),
source: wp,
}),
0, // copying is very cheap
)
}))
.collect()
}
let (lto_modules, copy_jobs) =
B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise());
lto_modules
.into_iter()
.map(|module| {
let cost = module.cost();
(WorkItem::ThinLto(module), cost)
})
.chain(copy_jobs.into_iter().map(|wp| {
(
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
name: wp.cgu_name.clone(),
source: wp,
}),
0, // copying is very cheap
)
}))
.collect()
}
struct CompiledModules {
@@ -470,6 +451,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
backend: B,
tcx: TyCtxt<'_>,
target_cpu: String,
autodiff_items: &[AutoDiffItem],
) -> OngoingCodegen<B> {
let (coordinator_send, coordinator_receive) = channel();
@@ -488,6 +470,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
backend.clone(),
tcx,
&crate_info,
autodiff_items,
shared_emitter,
codegen_worker_send,
coordinator_receive,
@@ -736,15 +719,23 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
/// Copy the post-LTO artifacts from the incremental cache to the output
/// directory.
CopyPostLtoArtifacts(CachedModuleCodegen),
/// Performs (Thin)LTO on the given module.
LTO(lto::LtoModuleCodegen<B>),
/// Performs fat LTO on the given module.
FatLto {
needs_fat_lto: Vec<FatLtoInput<B>>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
autodiff: Vec<AutoDiffItem>,
},
/// Performs thin-LTO on the given module.
ThinLto(lto::ThinModule<B>),
}
impl<B: WriteBackendMethods> WorkItem<B> {
fn module_kind(&self) -> ModuleKind {
match *self {
WorkItem::Optimize(ref m) => m.kind,
WorkItem::CopyPostLtoArtifacts(_) | WorkItem::LTO(_) => ModuleKind::Regular,
WorkItem::CopyPostLtoArtifacts(_) | WorkItem::FatLto { .. } | WorkItem::ThinLto(_) => {
ModuleKind::Regular
}
}
}
@@ -792,7 +783,8 @@ impl<B: WriteBackendMethods> WorkItem<B> {
match self {
WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
WorkItem::LTO(m) => desc("lto", "LTO module", m.name()),
WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"),
WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
}
}
}
@@ -996,12 +988,24 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
})
}
fn execute_lto_work_item<B: ExtraBackendMethods>(
fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
module: lto::LtoModuleCodegen<B>,
needs_fat_lto: Vec<FatLtoInput<B>>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
autodiff: Vec<AutoDiffItem>,
module_config: &ModuleConfig,
) -> Result<WorkItemResult<B>, FatalError> {
let module = module.optimize(cgcx)?;
let module = B::run_and_optimize_fat_lto(cgcx, needs_fat_lto, import_only_modules, autodiff)?;
let module = B::codegen(cgcx, module, module_config)?;
Ok(WorkItemResult::Finished(module))
}
fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
module: lto::ThinModule<B>,
module_config: &ModuleConfig,
) -> Result<WorkItemResult<B>, FatalError> {
let module = B::optimize_thin(cgcx, module)?;
finish_intra_module_work(cgcx, module, module_config)
}
@@ -1010,11 +1014,8 @@ fn finish_intra_module_work<B: ExtraBackendMethods>(
module: ModuleCodegen<B::Module>,
module_config: &ModuleConfig,
) -> Result<WorkItemResult<B>, FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
if !cgcx.opts.unstable_opts.combine_cgu || module.kind == ModuleKind::Allocator {
let module = B::codegen(cgcx, dcx, module, module_config)?;
let module = B::codegen(cgcx, module, module_config)?;
Ok(WorkItemResult::Finished(module))
} else {
Ok(WorkItemResult::NeedsLink(module))
@@ -1031,9 +1032,6 @@ pub(crate) enum Message<B: WriteBackendMethods> {
/// Sent from a backend worker thread.
WorkItem { result: Result<WorkItemResult<B>, Option<WorkerFatalError>>, worker_id: usize },
/// A vector containing all the AutoDiff tasks that we have to pass to Enzyme.
AddAutoDiffItems(Vec<AutoDiffItem>),
/// The frontend has finished generating something (backend IR or a
/// post-LTO artifact) for a codegen unit, and it should be passed to the
/// backend. Sent from the main thread.
@@ -1100,6 +1098,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
backend: B,
tcx: TyCtxt<'_>,
crate_info: &CrateInfo,
autodiff_items: &[AutoDiffItem],
shared_emitter: SharedEmitter,
codegen_worker_send: Sender<CguMessage>,
coordinator_receive: Receiver<Box<dyn Any + Send>>,
@@ -1109,6 +1108,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
) -> thread::JoinHandle<Result<CompiledModules, ()>> {
let coordinator_send = tx_to_llvm_workers;
let sess = tcx.sess;
let autodiff_items = autodiff_items.to_vec();
let mut each_linked_rlib_for_lto = Vec::new();
drop(link::each_linked_rlib(crate_info, None, &mut |cnum, path| {
@@ -1362,7 +1362,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
// This is where we collect codegen units that have gone all the way
// through codegen and LLVM.
let mut autodiff_items = Vec::new();
let mut compiled_modules = vec![];
let mut compiled_allocator_module = None;
let mut needs_link = Vec::new();
@@ -1474,20 +1473,37 @@ fn start_executing_work<B: ExtraBackendMethods>(
let needs_thin_lto = mem::take(&mut needs_thin_lto);
let import_only_modules = mem::take(&mut lto_import_only_modules);
for (work, cost) in generate_lto_work(
&cgcx,
autodiff_items.clone(),
needs_fat_lto,
needs_thin_lto,
import_only_modules,
) {
let insertion_index = work_items
.binary_search_by_key(&cost, |&(_, cost)| cost)
.unwrap_or_else(|e| e);
work_items.insert(insertion_index, (work, cost));
if !needs_fat_lto.is_empty() {
assert!(needs_thin_lto.is_empty());
work_items.push((
WorkItem::FatLto {
needs_fat_lto,
import_only_modules,
autodiff: autodiff_items.clone(),
},
0,
));
if cgcx.parallel {
helper.request_token();
}
} else {
if !autodiff_items.is_empty() {
let dcx = cgcx.create_dcx();
dcx.handle().emit_fatal(AutodiffWithoutLto {});
}
for (work, cost) in
generate_thin_lto_work(&cgcx, needs_thin_lto, import_only_modules)
{
let insertion_index = work_items
.binary_search_by_key(&cost, |&(_, cost)| cost)
.unwrap_or_else(|e| e);
work_items.insert(insertion_index, (work, cost));
if cgcx.parallel {
helper.request_token();
}
}
}
}
@@ -1616,10 +1632,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
main_thread_state = MainThreadState::Idle;
}
Message::AddAutoDiffItems(mut items) => {
autodiff_items.append(&mut items);
}
Message::CodegenComplete => {
if codegen_state != Aborted {
codegen_state = Completed;
@@ -1702,7 +1714,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
let dcx = dcx.handle();
let module = B::run_link(&cgcx, dcx, needs_link).map_err(|_| ())?;
let module =
B::codegen(&cgcx, dcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
B::codegen(&cgcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
compiled_modules.push(module);
}
@@ -1842,10 +1854,22 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
);
Ok(execute_copy_from_cache_work_item(&cgcx, m, module_config))
}
WorkItem::LTO(m) => {
WorkItem::FatLto { needs_fat_lto, import_only_modules, autodiff } => {
let _timer = cgcx
.prof
.generic_activity_with_arg("codegen_module_perform_lto", "everything");
execute_fat_lto_work_item(
&cgcx,
needs_fat_lto,
import_only_modules,
autodiff,
module_config,
)
}
WorkItem::ThinLto(m) => {
let _timer =
cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
execute_lto_work_item(&cgcx, m, module_config)
execute_thin_lto_work_item(&cgcx, m, module_config)
}
})
};
@@ -2082,10 +2106,6 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
drop(self.coordinator.sender.send(Box::new(Message::CodegenComplete::<B>)));
}
pub(crate) fn submit_autodiff_items(&self, items: Vec<AutoDiffItem>) {
drop(self.coordinator.sender.send(Box::new(Message::<B>::AddAutoDiffItems(items))));
}
pub(crate) fn check_for_errors(&self, sess: &Session) {
self.shared_emitter_main.check(sess, false);
}