coverage: Build an "expansion tree" and use it to unexpand raw spans

2025-08-16 17:46:05 +10:00
parent bd8e3db94d
commit c2eb45b4a1
9 changed files with 238 additions and 161 deletions
--- a/compiler/rustc_mir_transform/src/coverage/expansion.rs
+++ b/compiler/rustc_mir_transform/src/coverage/expansion.rs
@@ -0,0 +1,127 @@
+use rustc_data_structures::fx::{FxIndexMap, FxIndexSet, IndexEntry};
+use rustc_middle::mir::coverage::BasicCoverageBlock;
+use rustc_span::{ExpnId, ExpnKind, Span};
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct SpanWithBcb {
+    pub(crate) span: Span,
+    pub(crate) bcb: BasicCoverageBlock,
+}
+
+#[derive(Debug)]
+pub(crate) struct ExpnTree {
+    nodes: FxIndexMap<ExpnId, ExpnNode>,
+}
+
+impl ExpnTree {
+    pub(crate) fn get(&self, expn_id: ExpnId) -> Option<&ExpnNode> {
+        self.nodes.get(&expn_id)
+    }
+
+    /// Yields the tree node for the given expansion ID (if present), followed
+    /// by the nodes of all of its descendants in depth-first order.
+    pub(crate) fn iter_node_and_descendants(
+        &self,
+        root_expn_id: ExpnId,
+    ) -> impl Iterator<Item = &ExpnNode> {
+        gen move {
+            let Some(root_node) = self.get(root_expn_id) else { return };
+            yield root_node;
+
+            // Stack of child-node-ID iterators that drives the depth-first traversal.
+            let mut iter_stack = vec![root_node.child_expn_ids.iter()];
+
+            while let Some(curr_iter) = iter_stack.last_mut() {
+                // Pull the next ID from the top of the stack.
+                let Some(&curr_id) = curr_iter.next() else {
+                    iter_stack.pop();
+                    continue;
+                };
+
+                // Yield this node.
+                let Some(node) = self.get(curr_id) else { continue };
+                yield node;
+
+                // Push the node's children, to be traversed next.
+                if !node.child_expn_ids.is_empty() {
+                    iter_stack.push(node.child_expn_ids.iter());
+                }
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct ExpnNode {
+    /// Storing the expansion ID in its own node is not strictly necessary,
+    /// but is helpful for debugging and might be useful later.
+    #[expect(dead_code)]
+    pub(crate) expn_id: ExpnId,
+
+    // Useful info extracted from `ExpnData`.
+    pub(crate) expn_kind: ExpnKind,
+    /// Non-dummy `ExpnData::call_site` span.
+    pub(crate) call_site: Option<Span>,
+    /// Expansion ID of `call_site`, if present.
+    /// This links an expansion node to its parent in the tree.
+    pub(crate) call_site_expn_id: Option<ExpnId>,
+
+    /// Spans (and their associated BCBs) belonging to this expansion.
+    pub(crate) spans: Vec<SpanWithBcb>,
+    /// Expansions whose call-site is in this expansion.
+    pub(crate) child_expn_ids: FxIndexSet<ExpnId>,
+}
+
+impl ExpnNode {
+    fn new(expn_id: ExpnId) -> Self {
+        let expn_data = expn_id.expn_data();
+
+        let call_site = Some(expn_data.call_site).filter(|sp| !sp.is_dummy());
+        let call_site_expn_id = try { call_site?.ctxt().outer_expn() };
+
+        Self {
+            expn_id,
+
+            expn_kind: expn_data.kind.clone(),
+            call_site,
+            call_site_expn_id,
+
+            spans: vec![],
+            child_expn_ids: FxIndexSet::default(),
+        }
+    }
+}
+
+/// Given a collection of span/BCB pairs from potentially-different syntax contexts,
+/// arranges them into an "expansion tree" based on their expansion call-sites.
+pub(crate) fn build_expn_tree(spans: impl IntoIterator<Item = SpanWithBcb>) -> ExpnTree {
+    let mut nodes = FxIndexMap::default();
+    let new_node = |&expn_id: &ExpnId| ExpnNode::new(expn_id);
+
+    for span_with_bcb in spans {
+        // Create a node for this span's enclosing expansion, and add the span to it.
+        let expn_id = span_with_bcb.span.ctxt().outer_expn();
+        let node = nodes.entry(expn_id).or_insert_with_key(new_node);
+        node.spans.push(span_with_bcb);
+
+        // Now walk up the expansion call-site chain, creating nodes and registering children.
+        let mut prev = expn_id;
+        let mut curr_expn_id = node.call_site_expn_id;
+        while let Some(expn_id) = curr_expn_id {
+            let entry = nodes.entry(expn_id);
+            let node_existed = matches!(entry, IndexEntry::Occupied(_));
+
+            let node = entry.or_insert_with_key(new_node);
+            node.child_expn_ids.insert(prev);
+
+            if node_existed {
+                break;
+            }
+
+            prev = expn_id;
+            curr_expn_id = node.call_site_expn_id;
+        }
+    }
+
+    ExpnTree { nodes }
+}
--- a/compiler/rustc_mir_transform/src/coverage/mod.rs
+++ b/compiler/rustc_mir_transform/src/coverage/mod.rs
@@ -8,6 +8,7 @@ use crate::coverage::graph::CoverageGraph;
 use crate::coverage::mappings::ExtractedMappings;

 mod counters;
+mod expansion;
 mod graph;
 mod hir_info;
 mod mappings;
--- a/compiler/rustc_mir_transform/src/coverage/spans.rs
+++ b/compiler/rustc_mir_transform/src/coverage/spans.rs
@@ -1,15 +1,14 @@
-use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::mir;
 use rustc_middle::mir::coverage::{Mapping, MappingKind, START_BCB};
 use rustc_middle::ty::TyCtxt;
 use rustc_span::source_map::SourceMap;
-use rustc_span::{BytePos, DesugaringKind, ExpnKind, MacroKind, Span};
+use rustc_span::{BytePos, DesugaringKind, ExpnId, ExpnKind, MacroKind, Span};
 use tracing::instrument;

+use crate::coverage::expansion::{self, ExpnTree, SpanWithBcb};
 use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
 use crate::coverage::hir_info::ExtractedHirInfo;
-use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir, SpanFromMir};
-use crate::coverage::unexpand;
+use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir};

 mod from_mir;

@@ -34,19 +33,51 @@ pub(super) fn extract_refined_covspans<'tcx>(
    let &ExtractedHirInfo { body_span, .. } = hir_info;

    let raw_spans = from_mir::extract_raw_spans_from_mir(mir_body, graph);
-    let mut covspans = raw_spans
-        .into_iter()
-        .filter_map(|RawSpanFromMir { raw_span, bcb }| try {
-            let (span, expn_kind) =
-                unexpand::unexpand_into_body_span_with_expn_kind(raw_span, body_span)?;
-            // Discard any spans that fill the entire body, because they tend
-            // to represent compiler-inserted code, e.g. implicitly returning `()`.
-            if span.source_equal(body_span) {
-                return None;
-            };
-            SpanFromMir { span, expn_kind, bcb }
-        })
-        .collect::<Vec<_>>();
+    // Use the raw spans to build a tree of expansions for this function.
+    let expn_tree = expansion::build_expn_tree(
+        raw_spans
+            .into_iter()
+            .map(|RawSpanFromMir { raw_span, bcb }| SpanWithBcb { span: raw_span, bcb }),
+    );
+
+    let mut covspans = vec![];
+    let mut push_covspan = |covspan: Covspan| {
+        let covspan_span = covspan.span;
+        // Discard any spans not contained within the function body span.
+        // Also discard any spans that fill the entire body, because they tend
+        // to represent compiler-inserted code, e.g. implicitly returning `()`.
+        if !body_span.contains(covspan_span) || body_span.source_equal(covspan_span) {
+            return;
+        }
+
+        // Each pushed covspan should have the same context as the body span.
+        // If it somehow doesn't, discard the covspan, or panic in debug builds.
+        if !body_span.eq_ctxt(covspan_span) {
+            debug_assert!(
+                false,
+                "span context mismatch: body_span={body_span:?}, covspan.span={covspan_span:?}"
+            );
+            return;
+        }
+
+        covspans.push(covspan);
+    };
+
+    if let Some(node) = expn_tree.get(body_span.ctxt().outer_expn()) {
+        for &SpanWithBcb { span, bcb } in &node.spans {
+            push_covspan(Covspan { span, bcb });
+        }
+
+        // For each expansion with its call-site in the body span, try to
+        // distill a corresponding covspan.
+        for &child_expn_id in &node.child_expn_ids {
+            if let Some(covspan) =
+                single_covspan_for_child_expn(tcx, graph, &expn_tree, child_expn_id)
+            {
+                push_covspan(covspan);
+            }
+        }
+    }

    // Only proceed if we found at least one usable span.
    if covspans.is_empty() {
@@ -57,17 +88,10 @@ pub(super) fn extract_refined_covspans<'tcx>(
    // Otherwise, add a fake span at the start of the body, to avoid an ugly
    // gap between the start of the body and the first real span.
    // FIXME: Find a more principled way to solve this problem.
-    covspans.push(SpanFromMir::for_fn_sig(
-        hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()),
-    ));
-
-    // First, perform the passes that need macro information.
-    covspans.sort_by(|a, b| graph.cmp_in_dominator_order(a.bcb, b.bcb));
-    remove_unwanted_expansion_spans(&mut covspans);
-    shrink_visible_macro_spans(tcx, &mut covspans);
-
-    // We no longer need the extra information in `SpanFromMir`, so convert to `Covspan`.
-    let mut covspans = covspans.into_iter().map(SpanFromMir::into_covspan).collect::<Vec<_>>();
+    covspans.push(Covspan {
+        span: hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()),
+        bcb: START_BCB,
+    });

    let compare_covspans = |a: &Covspan, b: &Covspan| {
        compare_spans(a.span, b.span)
@@ -117,43 +141,37 @@ pub(super) fn extract_refined_covspans<'tcx>(
    }));
 }

-/// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate
-/// multiple condition/consequent blocks that have the span of the whole macro
-/// invocation, which is unhelpful. Keeping only the first such span seems to
-/// give better mappings, so remove the others.
-///
-/// Similarly, `await` expands to a branch on the discriminant of `Poll`, which
-/// leads to incorrect coverage if the `Future` is immediately ready (#98712).
-///
-/// (The input spans should be sorted in BCB dominator order, so that the
-/// retained "first" span is likely to dominate the others.)
-fn remove_unwanted_expansion_spans(covspans: &mut Vec<SpanFromMir>) {
-    let mut deduplicated_spans = FxHashSet::default();
+/// For a single child expansion, try to distill it into a single span+BCB mapping.
+fn single_covspan_for_child_expn(
+    tcx: TyCtxt<'_>,
+    graph: &CoverageGraph,
+    expn_tree: &ExpnTree,
+    expn_id: ExpnId,
+) -> Option<Covspan> {
+    let node = expn_tree.get(expn_id)?;

-    covspans.retain(|covspan| {
-        match covspan.expn_kind {
-            // Retain only the first await-related or macro-expanded covspan with this span.
-            Some(ExpnKind::Desugaring(DesugaringKind::Await)) => {
-                deduplicated_spans.insert(covspan.span)
-            }
-            Some(ExpnKind::Macro(MacroKind::Bang, _)) => deduplicated_spans.insert(covspan.span),
-            // Ignore (retain) other spans.
-            _ => true,
+    let bcbs =
+        expn_tree.iter_node_and_descendants(expn_id).flat_map(|n| n.spans.iter().map(|s| s.bcb));
+
+    let bcb = match node.expn_kind {
+        // For bang-macros (e.g. `assert!`, `trace!`) and for `await`, taking
+        // the "first" BCB in dominator order seems to give good results.
+        ExpnKind::Macro(MacroKind::Bang, _) | ExpnKind::Desugaring(DesugaringKind::Await) => {
+            bcbs.min_by(|&a, &b| graph.cmp_in_dominator_order(a, b))?
        }
-    });
-}
+        // For other kinds of expansion, taking the "last" (most-dominated) BCB
+        // seems to give good results.
+        _ => bcbs.max_by(|&a, &b| graph.cmp_in_dominator_order(a, b))?,
+    };

-/// When a span corresponds to a macro invocation that is visible from the
-/// function body, truncate it to just the macro name plus `!`.
-/// This seems to give better results for code that uses macros.
-fn shrink_visible_macro_spans(tcx: TyCtxt<'_>, covspans: &mut Vec<SpanFromMir>) {
-    let source_map = tcx.sess.source_map();
-
-    for covspan in covspans {
-        if matches!(covspan.expn_kind, Some(ExpnKind::Macro(MacroKind::Bang, _))) {
-            covspan.span = source_map.span_through_char(covspan.span, '!');
-        }
+    // For bang-macro expansions, limit the call-site span to just the macro
+    // name plus `!`, excluding the macro arguments.
+    let mut span = node.call_site?;
+    if matches!(node.expn_kind, ExpnKind::Macro(MacroKind::Bang, _)) {
+        span = tcx.sess.source_map().span_through_char(span, '!');
    }
+
+    Some(Covspan { span, bcb })
 }

 /// Discard all covspans that overlap a hole.
--- a/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
+++ b/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
@@ -5,10 +5,9 @@ use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::mir::{
    self, FakeReadCause, Statement, StatementKind, Terminator, TerminatorKind,
 };
-use rustc_span::{ExpnKind, Span};
+use rustc_span::Span;

-use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph, START_BCB};
-use crate::coverage::spans::Covspan;
+use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};

 #[derive(Debug)]
 pub(crate) struct RawSpanFromMir {
@@ -160,32 +159,3 @@ impl Hole {
        true
    }
 }
-
-#[derive(Debug)]
-pub(crate) struct SpanFromMir {
-    /// A span that has been extracted from MIR and then "un-expanded" back to
-    /// within the current function's `body_span`. After various intermediate
-    /// processing steps, this span is emitted as part of the final coverage
-    /// mappings.
-    ///
-    /// With the exception of `fn_sig_span`, this should always be contained
-    /// within `body_span`.
-    pub(crate) span: Span,
-    pub(crate) expn_kind: Option<ExpnKind>,
-    pub(crate) bcb: BasicCoverageBlock,
-}
-
-impl SpanFromMir {
-    pub(crate) fn for_fn_sig(fn_sig_span: Span) -> Self {
-        Self::new(fn_sig_span, None, START_BCB)
-    }
-
-    pub(crate) fn new(span: Span, expn_kind: Option<ExpnKind>, bcb: BasicCoverageBlock) -> Self {
-        Self { span, expn_kind, bcb }
-    }
-
-    pub(crate) fn into_covspan(self) -> Covspan {
-        let Self { span, expn_kind: _, bcb } = self;
-        Covspan { span, bcb }
-    }
-}
--- a/compiler/rustc_mir_transform/src/coverage/unexpand.rs
+++ b/compiler/rustc_mir_transform/src/coverage/unexpand.rs
@@ -1,4 +1,4 @@
-use rustc_span::{ExpnKind, Span};
+use rustc_span::Span;

 /// Walks through the expansion ancestors of `original_span` to find a span that
 /// is contained in `body_span` and has the same [syntax context] as `body_span`.
@@ -7,49 +7,3 @@ pub(crate) fn unexpand_into_body_span(original_span: Span, body_span: Span) -> O
    // we can just delegate directly to `find_ancestor_inside_same_ctxt`.
    original_span.find_ancestor_inside_same_ctxt(body_span)
 }
-
-/// Walks through the expansion ancestors of `original_span` to find a span that
-/// is contained in `body_span` and has the same [syntax context] as `body_span`.
-///
-/// If the returned span represents a bang-macro invocation (e.g. `foo!(..)`),
-/// the returned symbol will be the name of that macro (e.g. `foo`).
-pub(crate) fn unexpand_into_body_span_with_expn_kind(
-    original_span: Span,
-    body_span: Span,
-) -> Option<(Span, Option<ExpnKind>)> {
-    let (span, prev) = unexpand_into_body_span_with_prev(original_span, body_span)?;
-
-    let expn_kind = prev.map(|prev| prev.ctxt().outer_expn_data().kind);
-
-    Some((span, expn_kind))
-}
-
-/// Walks through the expansion ancestors of `original_span` to find a span that
-/// is contained in `body_span` and has the same [syntax context] as `body_span`.
-/// The ancestor that was traversed just before the matching span (if any) is
-/// also returned.
-///
-/// For example, a return value of `Some((ancestor, Some(prev)))` means that:
-/// - `ancestor == original_span.find_ancestor_inside_same_ctxt(body_span)`
-/// - `prev.parent_callsite() == ancestor`
-///
-/// [syntax context]: rustc_span::SyntaxContext
-fn unexpand_into_body_span_with_prev(
-    original_span: Span,
-    body_span: Span,
-) -> Option<(Span, Option<Span>)> {
-    let mut prev = None;
-    let mut curr = original_span;
-
-    while !body_span.contains(curr) || !curr.eq_ctxt(body_span) {
-        prev = Some(curr);
-        curr = curr.parent_callsite()?;
-    }
-
-    debug_assert_eq!(Some(curr), original_span.find_ancestor_inside_same_ctxt(body_span));
-    if let Some(prev) = prev {
-        debug_assert_eq!(Some(curr), prev.parent_callsite());
-    }
-
-    Some((curr, prev))
-}
--- a/compiler/rustc_mir_transform/src/lib.rs
+++ b/compiler/rustc_mir_transform/src/lib.rs
@@ -5,6 +5,7 @@
 #![feature(const_type_name)]
 #![feature(cow_is_borrowed)]
 #![feature(file_buffered)]
+#![feature(gen_blocks)]
 #![feature(if_let_guard)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(try_blocks)]
--- a/tests/coverage/closure.cov-map
+++ b/tests/coverage/closure.cov-map
@@ -118,21 +118,23 @@ Number of file 0 mappings: 4
 Highest counter ID seen: (none)

 Function name: closure::main::{closure#12} (unused)
-Raw bytes (10): 0x[01, 01, 00, 01, 00, a7, 01, 0a, 00, 16]
+Raw bytes (15): 0x[01, 01, 00, 02, 00, a7, 01, 01, 00, 09, 00, 00, 0a, 00, 16]
 Number of files: 1
 - file 0 => $DIR/closure.rs
 Number of expressions: 0
-Number of file 0 mappings: 1
- Code(Zero) at (prev + 167, 10) to (start + 0, 22)
+Number of file 0 mappings: 2
+- Code(Zero) at (prev + 167, 1) to (start + 0, 9)
+- Code(Zero) at (prev + 0, 10) to (start + 0, 22)
 Highest counter ID seen: (none)

 Function name: closure::main::{closure#13} (unused)
-Raw bytes (10): 0x[01, 01, 00, 01, 00, ad, 01, 11, 00, 1d]
+Raw bytes (15): 0x[01, 01, 00, 02, 00, ac, 01, 0d, 00, 15, 00, 01, 11, 00, 1d]
 Number of files: 1
 - file 0 => $DIR/closure.rs
 Number of expressions: 0
-Number of file 0 mappings: 1
- Code(Zero) at (prev + 173, 17) to (start + 0, 29)
+Number of file 0 mappings: 2
+- Code(Zero) at (prev + 172, 13) to (start + 0, 21)
+- Code(Zero) at (prev + 1, 17) to (start + 0, 29)
 Highest counter ID seen: (none)

 Function name: closure::main::{closure#14}
@@ -289,30 +291,33 @@ Number of file 0 mappings: 7
 Highest counter ID seen: (none)

 Function name: closure::main::{closure#5}
-Raw bytes (10): 0x[01, 01, 00, 01, 01, 8c, 01, 46, 00, 4e]
+Raw bytes (15): 0x[01, 01, 00, 02, 01, 8c, 01, 3d, 00, 45, 01, 00, 46, 00, 4e]
 Number of files: 1
 - file 0 => $DIR/closure.rs
 Number of expressions: 0
-Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 140, 70) to (start + 0, 78)
+Number of file 0 mappings: 2
+- Code(Counter(0)) at (prev + 140, 61) to (start + 0, 69)
+- Code(Counter(0)) at (prev + 0, 70) to (start + 0, 78)
 Highest counter ID seen: c0

 Function name: closure::main::{closure#6}
-Raw bytes (10): 0x[01, 01, 00, 01, 01, 8d, 01, 4a, 00, 56]
+Raw bytes (15): 0x[01, 01, 00, 02, 01, 8d, 01, 41, 00, 49, 01, 00, 4a, 00, 56]
 Number of files: 1
 - file 0 => $DIR/closure.rs
 Number of expressions: 0
-Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 141, 74) to (start + 0, 86)
+Number of file 0 mappings: 2
+- Code(Counter(0)) at (prev + 141, 65) to (start + 0, 73)
+- Code(Counter(0)) at (prev + 0, 74) to (start + 0, 86)
 Highest counter ID seen: c0

 Function name: closure::main::{closure#7} (unused)
-Raw bytes (10): 0x[01, 01, 00, 01, 00, 8e, 01, 44, 00, 50]
+Raw bytes (15): 0x[01, 01, 00, 02, 00, 8e, 01, 3b, 00, 43, 00, 00, 44, 00, 50]
 Number of files: 1
 - file 0 => $DIR/closure.rs
 Number of expressions: 0
-Number of file 0 mappings: 1
- Code(Zero) at (prev + 142, 68) to (start + 0, 80)
+Number of file 0 mappings: 2
+- Code(Zero) at (prev + 142, 59) to (start + 0, 67)
+- Code(Zero) at (prev + 0, 68) to (start + 0, 80)
 Highest counter ID seen: (none)

 Function name: closure::main::{closure#8} (unused)
--- a/tests/coverage/closure.coverage
+++ b/tests/coverage/closure.coverage
@@ -139,9 +139,9 @@
   LL|       |
   LL|      1|    let short_used_covered_closure_macro = | used_arg: u8 | println!("called");
   LL|      1|    let short_used_not_covered_closure_macro = | used_arg: u8 | println!("not called");
-                                                                                       ^0
+                                                                              ^0       ^0
   LL|      1|    let _short_unused_closure_macro = | _unused_arg: u8 | println!("not called");
-                                                                                 ^0
+                                                                        ^0       ^0
   LL|       |
   LL|       |
   LL|       |
@@ -173,7 +173,7 @@
   LL|       |
   LL|      1|    let _short_unused_closure_line_break_no_block2 =
   LL|       |        | _unused_arg: u8 |
-   LL|       |            println!(
+   LL|      0|            println!(
   LL|      0|                "not called"
   LL|       |            )
   LL|       |    ;
--- a/tests/coverage/macro_in_closure.cov-map
+++ b/tests/coverage/macro_in_closure.cov-map
@@ -1,10 +1,11 @@
 Function name: macro_in_closure::NO_BLOCK::{closure#0}
-Raw bytes (9): 0x[01, 01, 00, 01, 01, 07, 25, 00, 2c]
+Raw bytes (14): 0x[01, 01, 00, 02, 01, 07, 1c, 00, 24, 01, 00, 25, 00, 2c]
 Number of files: 1
 - file 0 => $DIR/macro_in_closure.rs
 Number of expressions: 0
-Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 7, 37) to (start + 0, 44)
+Number of file 0 mappings: 2
+- Code(Counter(0)) at (prev + 7, 28) to (start + 0, 36)
+- Code(Counter(0)) at (prev + 0, 37) to (start + 0, 44)
 Highest counter ID seen: c0

 Function name: macro_in_closure::WITH_BLOCK::{closure#0}