Rollup merge of #143388 - bjorn3:lto_refactors, r=compiler-errors

Various refactors to the LTO handling code In particular reducing the sharing of code paths between fat and thin-LTO and making the fat LTO implementation more self-contained. This also moves some autodiff handling out of cg_ssa into cg_llvm given that Enzyme only works with LLVM anyway and an implementation for another backend may do things entirely differently. This will also make it a bit easier to split LTO handling out of the coordinator thread main loop into a separate loop, which should reduce the complexity of the coordinator thread.
2025-07-17 03:58:28 +02:00
parent 9ac88eabed 21026cae8d
commit be5f8f299d
13 changed files with 154 additions and 240 deletions
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -397,50 +397,31 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
    }
 }

-fn generate_lto_work<B: ExtraBackendMethods>(
+fn generate_thin_lto_work<B: ExtraBackendMethods>(
    cgcx: &CodegenContext<B>,
-    autodiff: Vec<AutoDiffItem>,
-    needs_fat_lto: Vec<FatLtoInput<B>>,
    needs_thin_lto: Vec<(String, B::ThinBuffer)>,
    import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> Vec<(WorkItem<B>, u64)> {
-    let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work");
+    let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");

-    if !needs_fat_lto.is_empty() {
-        assert!(needs_thin_lto.is_empty());
-        let mut module =
-            B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
-        if cgcx.lto == Lto::Fat && !autodiff.is_empty() {
-            let config = cgcx.config(ModuleKind::Regular);
-            module = module.autodiff(cgcx, autodiff, config).unwrap_or_else(|e| e.raise());
-        }
-        // We are adding a single work item, so the cost doesn't matter.
-        vec![(WorkItem::LTO(module), 0)]
-    } else {
-        if !autodiff.is_empty() {
-            let dcx = cgcx.create_dcx();
-            dcx.handle().emit_fatal(AutodiffWithoutLto {});
-        }
-        assert!(needs_fat_lto.is_empty());
-        let (lto_modules, copy_jobs) = B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules)
-            .unwrap_or_else(|e| e.raise());
-        lto_modules
-            .into_iter()
-            .map(|module| {
-                let cost = module.cost();
-                (WorkItem::LTO(module), cost)
-            })
-            .chain(copy_jobs.into_iter().map(|wp| {
-                (
-                    WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
-                        name: wp.cgu_name.clone(),
-                        source: wp,
-                    }),
-                    0, // copying is very cheap
-                )
-            }))
-            .collect()
-    }
+    let (lto_modules, copy_jobs) =
+        B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise());
+    lto_modules
+        .into_iter()
+        .map(|module| {
+            let cost = module.cost();
+            (WorkItem::ThinLto(module), cost)
+        })
+        .chain(copy_jobs.into_iter().map(|wp| {
+            (
+                WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
+                    name: wp.cgu_name.clone(),
+                    source: wp,
+                }),
+                0, // copying is very cheap
+            )
+        }))
+        .collect()
 }

 struct CompiledModules {
@@ -470,6 +451,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
    backend: B,
    tcx: TyCtxt<'_>,
    target_cpu: String,
+    autodiff_items: &[AutoDiffItem],
 ) -> OngoingCodegen<B> {
    let (coordinator_send, coordinator_receive) = channel();

@@ -488,6 +470,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
        backend.clone(),
        tcx,
        &crate_info,
+        autodiff_items,
        shared_emitter,
        codegen_worker_send,
        coordinator_receive,
@@ -736,15 +719,23 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
    /// Copy the post-LTO artifacts from the incremental cache to the output
    /// directory.
    CopyPostLtoArtifacts(CachedModuleCodegen),
-    /// Performs (Thin)LTO on the given module.
-    LTO(lto::LtoModuleCodegen<B>),
+    /// Performs fat LTO on the given module.
+    FatLto {
+        needs_fat_lto: Vec<FatLtoInput<B>>,
+        import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
+        autodiff: Vec<AutoDiffItem>,
+    },
+    /// Performs thin-LTO on the given module.
+    ThinLto(lto::ThinModule<B>),
 }

 impl<B: WriteBackendMethods> WorkItem<B> {
    fn module_kind(&self) -> ModuleKind {
        match *self {
            WorkItem::Optimize(ref m) => m.kind,
-            WorkItem::CopyPostLtoArtifacts(_) | WorkItem::LTO(_) => ModuleKind::Regular,
+            WorkItem::CopyPostLtoArtifacts(_) | WorkItem::FatLto { .. } | WorkItem::ThinLto(_) => {
+                ModuleKind::Regular
+            }
        }
    }

@@ -792,7 +783,8 @@ impl<B: WriteBackendMethods> WorkItem<B> {
        match self {
            WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
            WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
-            WorkItem::LTO(m) => desc("lto", "LTO module", m.name()),
+            WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"),
+            WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
        }
    }
 }
@@ -996,12 +988,24 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
    })
 }

-fn execute_lto_work_item<B: ExtraBackendMethods>(
+fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
    cgcx: &CodegenContext<B>,
-    module: lto::LtoModuleCodegen<B>,
+    needs_fat_lto: Vec<FatLtoInput<B>>,
+    import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
+    autodiff: Vec<AutoDiffItem>,
    module_config: &ModuleConfig,
 ) -> Result<WorkItemResult<B>, FatalError> {
-    let module = module.optimize(cgcx)?;
+    let module = B::run_and_optimize_fat_lto(cgcx, needs_fat_lto, import_only_modules, autodiff)?;
+    let module = B::codegen(cgcx, module, module_config)?;
+    Ok(WorkItemResult::Finished(module))
+}
+
+fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
+    cgcx: &CodegenContext<B>,
+    module: lto::ThinModule<B>,
+    module_config: &ModuleConfig,
+) -> Result<WorkItemResult<B>, FatalError> {
+    let module = B::optimize_thin(cgcx, module)?;
    finish_intra_module_work(cgcx, module, module_config)
 }

@@ -1010,11 +1014,8 @@ fn finish_intra_module_work<B: ExtraBackendMethods>(
    module: ModuleCodegen<B::Module>,
    module_config: &ModuleConfig,
 ) -> Result<WorkItemResult<B>, FatalError> {
-    let dcx = cgcx.create_dcx();
-    let dcx = dcx.handle();
-
    if !cgcx.opts.unstable_opts.combine_cgu || module.kind == ModuleKind::Allocator {
-        let module = B::codegen(cgcx, dcx, module, module_config)?;
+        let module = B::codegen(cgcx, module, module_config)?;
        Ok(WorkItemResult::Finished(module))
    } else {
        Ok(WorkItemResult::NeedsLink(module))
@@ -1031,9 +1032,6 @@ pub(crate) enum Message<B: WriteBackendMethods> {
    /// Sent from a backend worker thread.
    WorkItem { result: Result<WorkItemResult<B>, Option<WorkerFatalError>>, worker_id: usize },

-    /// A vector containing all the AutoDiff tasks that we have to pass to Enzyme.
-    AddAutoDiffItems(Vec<AutoDiffItem>),
-
    /// The frontend has finished generating something (backend IR or a
    /// post-LTO artifact) for a codegen unit, and it should be passed to the
    /// backend. Sent from the main thread.
@@ -1100,6 +1098,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
    backend: B,
    tcx: TyCtxt<'_>,
    crate_info: &CrateInfo,
+    autodiff_items: &[AutoDiffItem],
    shared_emitter: SharedEmitter,
    codegen_worker_send: Sender<CguMessage>,
    coordinator_receive: Receiver<Box<dyn Any + Send>>,
@@ -1109,6 +1108,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
 ) -> thread::JoinHandle<Result<CompiledModules, ()>> {
    let coordinator_send = tx_to_llvm_workers;
    let sess = tcx.sess;
+    let autodiff_items = autodiff_items.to_vec();

    let mut each_linked_rlib_for_lto = Vec::new();
    drop(link::each_linked_rlib(crate_info, None, &mut |cnum, path| {
@@ -1362,7 +1362,6 @@ fn start_executing_work<B: ExtraBackendMethods>(

        // This is where we collect codegen units that have gone all the way
        // through codegen and LLVM.
-        let mut autodiff_items = Vec::new();
        let mut compiled_modules = vec![];
        let mut compiled_allocator_module = None;
        let mut needs_link = Vec::new();
@@ -1474,20 +1473,37 @@ fn start_executing_work<B: ExtraBackendMethods>(
                    let needs_thin_lto = mem::take(&mut needs_thin_lto);
                    let import_only_modules = mem::take(&mut lto_import_only_modules);

-                    for (work, cost) in generate_lto_work(
-                        &cgcx,
-                        autodiff_items.clone(),
-                        needs_fat_lto,
-                        needs_thin_lto,
-                        import_only_modules,
-                    ) {
-                        let insertion_index = work_items
-                            .binary_search_by_key(&cost, |&(_, cost)| cost)
-                            .unwrap_or_else(|e| e);
-                        work_items.insert(insertion_index, (work, cost));
+                    if !needs_fat_lto.is_empty() {
+                        assert!(needs_thin_lto.is_empty());
+
+                        work_items.push((
+                            WorkItem::FatLto {
+                                needs_fat_lto,
+                                import_only_modules,
+                                autodiff: autodiff_items.clone(),
+                            },
+                            0,
+                        ));
                        if cgcx.parallel {
                            helper.request_token();
                        }
+                    } else {
+                        if !autodiff_items.is_empty() {
+                            let dcx = cgcx.create_dcx();
+                            dcx.handle().emit_fatal(AutodiffWithoutLto {});
+                        }
+
+                        for (work, cost) in
+                            generate_thin_lto_work(&cgcx, needs_thin_lto, import_only_modules)
+                        {
+                            let insertion_index = work_items
+                                .binary_search_by_key(&cost, |&(_, cost)| cost)
+                                .unwrap_or_else(|e| e);
+                            work_items.insert(insertion_index, (work, cost));
+                            if cgcx.parallel {
+                                helper.request_token();
+                            }
+                        }
                    }
                }

@@ -1616,10 +1632,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
                    main_thread_state = MainThreadState::Idle;
                }

-                Message::AddAutoDiffItems(mut items) => {
-                    autodiff_items.append(&mut items);
-                }
-
                Message::CodegenComplete => {
                    if codegen_state != Aborted {
                        codegen_state = Completed;
@@ -1702,7 +1714,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
            let dcx = dcx.handle();
            let module = B::run_link(&cgcx, dcx, needs_link).map_err(|_| ())?;
            let module =
-                B::codegen(&cgcx, dcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
+                B::codegen(&cgcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
            compiled_modules.push(module);
        }

@@ -1842,10 +1854,22 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
                    );
                    Ok(execute_copy_from_cache_work_item(&cgcx, m, module_config))
                }
-                WorkItem::LTO(m) => {
+                WorkItem::FatLto { needs_fat_lto, import_only_modules, autodiff } => {
+                    let _timer = cgcx
+                        .prof
+                        .generic_activity_with_arg("codegen_module_perform_lto", "everything");
+                    execute_fat_lto_work_item(
+                        &cgcx,
+                        needs_fat_lto,
+                        import_only_modules,
+                        autodiff,
+                        module_config,
+                    )
+                }
+                WorkItem::ThinLto(m) => {
                    let _timer =
                        cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
-                    execute_lto_work_item(&cgcx, m, module_config)
+                    execute_thin_lto_work_item(&cgcx, m, module_config)
                }
            })
        };
@@ -2082,10 +2106,6 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
        drop(self.coordinator.sender.send(Box::new(Message::CodegenComplete::<B>)));
    }

-    pub(crate) fn submit_autodiff_items(&self, items: Vec<AutoDiffItem>) {
-        drop(self.coordinator.sender.send(Box::new(Message::<B>::AddAutoDiffItems(items))));
-    }
-
    pub(crate) fn check_for_errors(&self, sess: &Session) {
        self.shared_emitter_main.check(sess, false);
    }