Rollup merge of #143388 - bjorn3:lto_refactors, r=compiler-errors
Various refactors to the LTO handling code In particular reducing the sharing of code paths between fat and thin-LTO and making the fat LTO implementation more self-contained. This also moves some autodiff handling out of cg_ssa into cg_llvm given that Enzyme only works with LLVM anyway and an implementation for another backend may do things entirely differently. This will also make it a bit easier to split LTO handling out of the coordinator thread main loop into a separate loop, which should reduce the complexity of the coordinator thread.
This commit is contained in:
@@ -397,50 +397,31 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_lto_work<B: ExtraBackendMethods>(
|
||||
fn generate_thin_lto_work<B: ExtraBackendMethods>(
|
||||
cgcx: &CodegenContext<B>,
|
||||
autodiff: Vec<AutoDiffItem>,
|
||||
needs_fat_lto: Vec<FatLtoInput<B>>,
|
||||
needs_thin_lto: Vec<(String, B::ThinBuffer)>,
|
||||
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
|
||||
) -> Vec<(WorkItem<B>, u64)> {
|
||||
let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work");
|
||||
let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
|
||||
|
||||
if !needs_fat_lto.is_empty() {
|
||||
assert!(needs_thin_lto.is_empty());
|
||||
let mut module =
|
||||
B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
|
||||
if cgcx.lto == Lto::Fat && !autodiff.is_empty() {
|
||||
let config = cgcx.config(ModuleKind::Regular);
|
||||
module = module.autodiff(cgcx, autodiff, config).unwrap_or_else(|e| e.raise());
|
||||
}
|
||||
// We are adding a single work item, so the cost doesn't matter.
|
||||
vec![(WorkItem::LTO(module), 0)]
|
||||
} else {
|
||||
if !autodiff.is_empty() {
|
||||
let dcx = cgcx.create_dcx();
|
||||
dcx.handle().emit_fatal(AutodiffWithoutLto {});
|
||||
}
|
||||
assert!(needs_fat_lto.is_empty());
|
||||
let (lto_modules, copy_jobs) = B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules)
|
||||
.unwrap_or_else(|e| e.raise());
|
||||
lto_modules
|
||||
.into_iter()
|
||||
.map(|module| {
|
||||
let cost = module.cost();
|
||||
(WorkItem::LTO(module), cost)
|
||||
})
|
||||
.chain(copy_jobs.into_iter().map(|wp| {
|
||||
(
|
||||
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
|
||||
name: wp.cgu_name.clone(),
|
||||
source: wp,
|
||||
}),
|
||||
0, // copying is very cheap
|
||||
)
|
||||
}))
|
||||
.collect()
|
||||
}
|
||||
let (lto_modules, copy_jobs) =
|
||||
B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise());
|
||||
lto_modules
|
||||
.into_iter()
|
||||
.map(|module| {
|
||||
let cost = module.cost();
|
||||
(WorkItem::ThinLto(module), cost)
|
||||
})
|
||||
.chain(copy_jobs.into_iter().map(|wp| {
|
||||
(
|
||||
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
|
||||
name: wp.cgu_name.clone(),
|
||||
source: wp,
|
||||
}),
|
||||
0, // copying is very cheap
|
||||
)
|
||||
}))
|
||||
.collect()
|
||||
}
|
||||
|
||||
struct CompiledModules {
|
||||
@@ -470,6 +451,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
|
||||
backend: B,
|
||||
tcx: TyCtxt<'_>,
|
||||
target_cpu: String,
|
||||
autodiff_items: &[AutoDiffItem],
|
||||
) -> OngoingCodegen<B> {
|
||||
let (coordinator_send, coordinator_receive) = channel();
|
||||
|
||||
@@ -488,6 +470,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
|
||||
backend.clone(),
|
||||
tcx,
|
||||
&crate_info,
|
||||
autodiff_items,
|
||||
shared_emitter,
|
||||
codegen_worker_send,
|
||||
coordinator_receive,
|
||||
@@ -736,15 +719,23 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
|
||||
/// Copy the post-LTO artifacts from the incremental cache to the output
|
||||
/// directory.
|
||||
CopyPostLtoArtifacts(CachedModuleCodegen),
|
||||
/// Performs (Thin)LTO on the given module.
|
||||
LTO(lto::LtoModuleCodegen<B>),
|
||||
/// Performs fat LTO on the given module.
|
||||
FatLto {
|
||||
needs_fat_lto: Vec<FatLtoInput<B>>,
|
||||
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
|
||||
autodiff: Vec<AutoDiffItem>,
|
||||
},
|
||||
/// Performs thin-LTO on the given module.
|
||||
ThinLto(lto::ThinModule<B>),
|
||||
}
|
||||
|
||||
impl<B: WriteBackendMethods> WorkItem<B> {
|
||||
fn module_kind(&self) -> ModuleKind {
|
||||
match *self {
|
||||
WorkItem::Optimize(ref m) => m.kind,
|
||||
WorkItem::CopyPostLtoArtifacts(_) | WorkItem::LTO(_) => ModuleKind::Regular,
|
||||
WorkItem::CopyPostLtoArtifacts(_) | WorkItem::FatLto { .. } | WorkItem::ThinLto(_) => {
|
||||
ModuleKind::Regular
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -792,7 +783,8 @@ impl<B: WriteBackendMethods> WorkItem<B> {
|
||||
match self {
|
||||
WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
|
||||
WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
|
||||
WorkItem::LTO(m) => desc("lto", "LTO module", m.name()),
|
||||
WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"),
|
||||
WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -996,12 +988,24 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
|
||||
})
|
||||
}
|
||||
|
||||
fn execute_lto_work_item<B: ExtraBackendMethods>(
|
||||
fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
|
||||
cgcx: &CodegenContext<B>,
|
||||
module: lto::LtoModuleCodegen<B>,
|
||||
needs_fat_lto: Vec<FatLtoInput<B>>,
|
||||
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
|
||||
autodiff: Vec<AutoDiffItem>,
|
||||
module_config: &ModuleConfig,
|
||||
) -> Result<WorkItemResult<B>, FatalError> {
|
||||
let module = module.optimize(cgcx)?;
|
||||
let module = B::run_and_optimize_fat_lto(cgcx, needs_fat_lto, import_only_modules, autodiff)?;
|
||||
let module = B::codegen(cgcx, module, module_config)?;
|
||||
Ok(WorkItemResult::Finished(module))
|
||||
}
|
||||
|
||||
fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
|
||||
cgcx: &CodegenContext<B>,
|
||||
module: lto::ThinModule<B>,
|
||||
module_config: &ModuleConfig,
|
||||
) -> Result<WorkItemResult<B>, FatalError> {
|
||||
let module = B::optimize_thin(cgcx, module)?;
|
||||
finish_intra_module_work(cgcx, module, module_config)
|
||||
}
|
||||
|
||||
@@ -1010,11 +1014,8 @@ fn finish_intra_module_work<B: ExtraBackendMethods>(
|
||||
module: ModuleCodegen<B::Module>,
|
||||
module_config: &ModuleConfig,
|
||||
) -> Result<WorkItemResult<B>, FatalError> {
|
||||
let dcx = cgcx.create_dcx();
|
||||
let dcx = dcx.handle();
|
||||
|
||||
if !cgcx.opts.unstable_opts.combine_cgu || module.kind == ModuleKind::Allocator {
|
||||
let module = B::codegen(cgcx, dcx, module, module_config)?;
|
||||
let module = B::codegen(cgcx, module, module_config)?;
|
||||
Ok(WorkItemResult::Finished(module))
|
||||
} else {
|
||||
Ok(WorkItemResult::NeedsLink(module))
|
||||
@@ -1031,9 +1032,6 @@ pub(crate) enum Message<B: WriteBackendMethods> {
|
||||
/// Sent from a backend worker thread.
|
||||
WorkItem { result: Result<WorkItemResult<B>, Option<WorkerFatalError>>, worker_id: usize },
|
||||
|
||||
/// A vector containing all the AutoDiff tasks that we have to pass to Enzyme.
|
||||
AddAutoDiffItems(Vec<AutoDiffItem>),
|
||||
|
||||
/// The frontend has finished generating something (backend IR or a
|
||||
/// post-LTO artifact) for a codegen unit, and it should be passed to the
|
||||
/// backend. Sent from the main thread.
|
||||
@@ -1100,6 +1098,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
backend: B,
|
||||
tcx: TyCtxt<'_>,
|
||||
crate_info: &CrateInfo,
|
||||
autodiff_items: &[AutoDiffItem],
|
||||
shared_emitter: SharedEmitter,
|
||||
codegen_worker_send: Sender<CguMessage>,
|
||||
coordinator_receive: Receiver<Box<dyn Any + Send>>,
|
||||
@@ -1109,6 +1108,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
) -> thread::JoinHandle<Result<CompiledModules, ()>> {
|
||||
let coordinator_send = tx_to_llvm_workers;
|
||||
let sess = tcx.sess;
|
||||
let autodiff_items = autodiff_items.to_vec();
|
||||
|
||||
let mut each_linked_rlib_for_lto = Vec::new();
|
||||
drop(link::each_linked_rlib(crate_info, None, &mut |cnum, path| {
|
||||
@@ -1362,7 +1362,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
|
||||
// This is where we collect codegen units that have gone all the way
|
||||
// through codegen and LLVM.
|
||||
let mut autodiff_items = Vec::new();
|
||||
let mut compiled_modules = vec![];
|
||||
let mut compiled_allocator_module = None;
|
||||
let mut needs_link = Vec::new();
|
||||
@@ -1474,20 +1473,37 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
let needs_thin_lto = mem::take(&mut needs_thin_lto);
|
||||
let import_only_modules = mem::take(&mut lto_import_only_modules);
|
||||
|
||||
for (work, cost) in generate_lto_work(
|
||||
&cgcx,
|
||||
autodiff_items.clone(),
|
||||
needs_fat_lto,
|
||||
needs_thin_lto,
|
||||
import_only_modules,
|
||||
) {
|
||||
let insertion_index = work_items
|
||||
.binary_search_by_key(&cost, |&(_, cost)| cost)
|
||||
.unwrap_or_else(|e| e);
|
||||
work_items.insert(insertion_index, (work, cost));
|
||||
if !needs_fat_lto.is_empty() {
|
||||
assert!(needs_thin_lto.is_empty());
|
||||
|
||||
work_items.push((
|
||||
WorkItem::FatLto {
|
||||
needs_fat_lto,
|
||||
import_only_modules,
|
||||
autodiff: autodiff_items.clone(),
|
||||
},
|
||||
0,
|
||||
));
|
||||
if cgcx.parallel {
|
||||
helper.request_token();
|
||||
}
|
||||
} else {
|
||||
if !autodiff_items.is_empty() {
|
||||
let dcx = cgcx.create_dcx();
|
||||
dcx.handle().emit_fatal(AutodiffWithoutLto {});
|
||||
}
|
||||
|
||||
for (work, cost) in
|
||||
generate_thin_lto_work(&cgcx, needs_thin_lto, import_only_modules)
|
||||
{
|
||||
let insertion_index = work_items
|
||||
.binary_search_by_key(&cost, |&(_, cost)| cost)
|
||||
.unwrap_or_else(|e| e);
|
||||
work_items.insert(insertion_index, (work, cost));
|
||||
if cgcx.parallel {
|
||||
helper.request_token();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1616,10 +1632,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
main_thread_state = MainThreadState::Idle;
|
||||
}
|
||||
|
||||
Message::AddAutoDiffItems(mut items) => {
|
||||
autodiff_items.append(&mut items);
|
||||
}
|
||||
|
||||
Message::CodegenComplete => {
|
||||
if codegen_state != Aborted {
|
||||
codegen_state = Completed;
|
||||
@@ -1702,7 +1714,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
|
||||
let dcx = dcx.handle();
|
||||
let module = B::run_link(&cgcx, dcx, needs_link).map_err(|_| ())?;
|
||||
let module =
|
||||
B::codegen(&cgcx, dcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
|
||||
B::codegen(&cgcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
|
||||
compiled_modules.push(module);
|
||||
}
|
||||
|
||||
@@ -1842,10 +1854,22 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
|
||||
);
|
||||
Ok(execute_copy_from_cache_work_item(&cgcx, m, module_config))
|
||||
}
|
||||
WorkItem::LTO(m) => {
|
||||
WorkItem::FatLto { needs_fat_lto, import_only_modules, autodiff } => {
|
||||
let _timer = cgcx
|
||||
.prof
|
||||
.generic_activity_with_arg("codegen_module_perform_lto", "everything");
|
||||
execute_fat_lto_work_item(
|
||||
&cgcx,
|
||||
needs_fat_lto,
|
||||
import_only_modules,
|
||||
autodiff,
|
||||
module_config,
|
||||
)
|
||||
}
|
||||
WorkItem::ThinLto(m) => {
|
||||
let _timer =
|
||||
cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
|
||||
execute_lto_work_item(&cgcx, m, module_config)
|
||||
execute_thin_lto_work_item(&cgcx, m, module_config)
|
||||
}
|
||||
})
|
||||
};
|
||||
@@ -2082,10 +2106,6 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
|
||||
drop(self.coordinator.sender.send(Box::new(Message::CodegenComplete::<B>)));
|
||||
}
|
||||
|
||||
pub(crate) fn submit_autodiff_items(&self, items: Vec<AutoDiffItem>) {
|
||||
drop(self.coordinator.sender.send(Box::new(Message::<B>::AddAutoDiffItems(items))));
|
||||
}
|
||||
|
||||
pub(crate) fn check_for_errors(&self, sess: &Session) {
|
||||
self.shared_emitter_main.check(sess, false);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user