coverage: Build an "expansion tree" and use it to unexpand raw spans

This commit is contained in:
Zalathar
2025-08-16 17:46:05 +10:00
parent bd8e3db94d
commit c2eb45b4a1
9 changed files with 238 additions and 161 deletions

View File

@@ -0,0 +1,127 @@
use rustc_data_structures::fx::{FxIndexMap, FxIndexSet, IndexEntry};
use rustc_middle::mir::coverage::BasicCoverageBlock;
use rustc_span::{ExpnId, ExpnKind, Span};
#[derive(Clone, Copy, Debug)]
pub(crate) struct SpanWithBcb {
pub(crate) span: Span,
pub(crate) bcb: BasicCoverageBlock,
}
#[derive(Debug)]
pub(crate) struct ExpnTree {
nodes: FxIndexMap<ExpnId, ExpnNode>,
}
impl ExpnTree {
pub(crate) fn get(&self, expn_id: ExpnId) -> Option<&ExpnNode> {
self.nodes.get(&expn_id)
}
/// Yields the tree node for the given expansion ID (if present), followed
/// by the nodes of all of its descendants in depth-first order.
pub(crate) fn iter_node_and_descendants(
&self,
root_expn_id: ExpnId,
) -> impl Iterator<Item = &ExpnNode> {
gen move {
let Some(root_node) = self.get(root_expn_id) else { return };
yield root_node;
// Stack of child-node-ID iterators that drives the depth-first traversal.
let mut iter_stack = vec![root_node.child_expn_ids.iter()];
while let Some(curr_iter) = iter_stack.last_mut() {
// Pull the next ID from the top of the stack.
let Some(&curr_id) = curr_iter.next() else {
iter_stack.pop();
continue;
};
// Yield this node.
let Some(node) = self.get(curr_id) else { continue };
yield node;
// Push the node's children, to be traversed next.
if !node.child_expn_ids.is_empty() {
iter_stack.push(node.child_expn_ids.iter());
}
}
}
}
}
#[derive(Debug)]
pub(crate) struct ExpnNode {
/// Storing the expansion ID in its own node is not strictly necessary,
/// but is helpful for debugging and might be useful later.
#[expect(dead_code)]
pub(crate) expn_id: ExpnId,
// Useful info extracted from `ExpnData`.
pub(crate) expn_kind: ExpnKind,
/// Non-dummy `ExpnData::call_site` span.
pub(crate) call_site: Option<Span>,
/// Expansion ID of `call_site`, if present.
/// This links an expansion node to its parent in the tree.
pub(crate) call_site_expn_id: Option<ExpnId>,
/// Spans (and their associated BCBs) belonging to this expansion.
pub(crate) spans: Vec<SpanWithBcb>,
/// Expansions whose call-site is in this expansion.
pub(crate) child_expn_ids: FxIndexSet<ExpnId>,
}
impl ExpnNode {
fn new(expn_id: ExpnId) -> Self {
let expn_data = expn_id.expn_data();
let call_site = Some(expn_data.call_site).filter(|sp| !sp.is_dummy());
let call_site_expn_id = try { call_site?.ctxt().outer_expn() };
Self {
expn_id,
expn_kind: expn_data.kind.clone(),
call_site,
call_site_expn_id,
spans: vec![],
child_expn_ids: FxIndexSet::default(),
}
}
}
/// Given a collection of span/BCB pairs from potentially-different syntax contexts,
/// arranges them into an "expansion tree" based on their expansion call-sites.
pub(crate) fn build_expn_tree(spans: impl IntoIterator<Item = SpanWithBcb>) -> ExpnTree {
let mut nodes = FxIndexMap::default();
let new_node = |&expn_id: &ExpnId| ExpnNode::new(expn_id);
for span_with_bcb in spans {
// Create a node for this span's enclosing expansion, and add the span to it.
let expn_id = span_with_bcb.span.ctxt().outer_expn();
let node = nodes.entry(expn_id).or_insert_with_key(new_node);
node.spans.push(span_with_bcb);
// Now walk up the expansion call-site chain, creating nodes and registering children.
let mut prev = expn_id;
let mut curr_expn_id = node.call_site_expn_id;
while let Some(expn_id) = curr_expn_id {
let entry = nodes.entry(expn_id);
let node_existed = matches!(entry, IndexEntry::Occupied(_));
let node = entry.or_insert_with_key(new_node);
node.child_expn_ids.insert(prev);
if node_existed {
break;
}
prev = expn_id;
curr_expn_id = node.call_site_expn_id;
}
}
ExpnTree { nodes }
}

View File

@@ -8,6 +8,7 @@ use crate::coverage::graph::CoverageGraph;
use crate::coverage::mappings::ExtractedMappings;
mod counters;
mod expansion;
mod graph;
mod hir_info;
mod mappings;

View File

@@ -1,15 +1,14 @@
use rustc_data_structures::fx::FxHashSet;
use rustc_middle::mir;
use rustc_middle::mir::coverage::{Mapping, MappingKind, START_BCB};
use rustc_middle::ty::TyCtxt;
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, DesugaringKind, ExpnKind, MacroKind, Span};
use rustc_span::{BytePos, DesugaringKind, ExpnId, ExpnKind, MacroKind, Span};
use tracing::instrument;
use crate::coverage::expansion::{self, ExpnTree, SpanWithBcb};
use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
use crate::coverage::hir_info::ExtractedHirInfo;
use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir, SpanFromMir};
use crate::coverage::unexpand;
use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir};
mod from_mir;
@@ -34,19 +33,51 @@ pub(super) fn extract_refined_covspans<'tcx>(
let &ExtractedHirInfo { body_span, .. } = hir_info;
let raw_spans = from_mir::extract_raw_spans_from_mir(mir_body, graph);
let mut covspans = raw_spans
.into_iter()
.filter_map(|RawSpanFromMir { raw_span, bcb }| try {
let (span, expn_kind) =
unexpand::unexpand_into_body_span_with_expn_kind(raw_span, body_span)?;
// Discard any spans that fill the entire body, because they tend
// to represent compiler-inserted code, e.g. implicitly returning `()`.
if span.source_equal(body_span) {
return None;
};
SpanFromMir { span, expn_kind, bcb }
})
.collect::<Vec<_>>();
// Use the raw spans to build a tree of expansions for this function.
let expn_tree = expansion::build_expn_tree(
raw_spans
.into_iter()
.map(|RawSpanFromMir { raw_span, bcb }| SpanWithBcb { span: raw_span, bcb }),
);
let mut covspans = vec![];
let mut push_covspan = |covspan: Covspan| {
let covspan_span = covspan.span;
// Discard any spans not contained within the function body span.
// Also discard any spans that fill the entire body, because they tend
// to represent compiler-inserted code, e.g. implicitly returning `()`.
if !body_span.contains(covspan_span) || body_span.source_equal(covspan_span) {
return;
}
// Each pushed covspan should have the same context as the body span.
// If it somehow doesn't, discard the covspan, or panic in debug builds.
if !body_span.eq_ctxt(covspan_span) {
debug_assert!(
false,
"span context mismatch: body_span={body_span:?}, covspan.span={covspan_span:?}"
);
return;
}
covspans.push(covspan);
};
if let Some(node) = expn_tree.get(body_span.ctxt().outer_expn()) {
for &SpanWithBcb { span, bcb } in &node.spans {
push_covspan(Covspan { span, bcb });
}
// For each expansion with its call-site in the body span, try to
// distill a corresponding covspan.
for &child_expn_id in &node.child_expn_ids {
if let Some(covspan) =
single_covspan_for_child_expn(tcx, graph, &expn_tree, child_expn_id)
{
push_covspan(covspan);
}
}
}
// Only proceed if we found at least one usable span.
if covspans.is_empty() {
@@ -57,17 +88,10 @@ pub(super) fn extract_refined_covspans<'tcx>(
// Otherwise, add a fake span at the start of the body, to avoid an ugly
// gap between the start of the body and the first real span.
// FIXME: Find a more principled way to solve this problem.
covspans.push(SpanFromMir::for_fn_sig(
hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()),
));
// First, perform the passes that need macro information.
covspans.sort_by(|a, b| graph.cmp_in_dominator_order(a.bcb, b.bcb));
remove_unwanted_expansion_spans(&mut covspans);
shrink_visible_macro_spans(tcx, &mut covspans);
// We no longer need the extra information in `SpanFromMir`, so convert to `Covspan`.
let mut covspans = covspans.into_iter().map(SpanFromMir::into_covspan).collect::<Vec<_>>();
covspans.push(Covspan {
span: hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()),
bcb: START_BCB,
});
let compare_covspans = |a: &Covspan, b: &Covspan| {
compare_spans(a.span, b.span)
@@ -117,43 +141,37 @@ pub(super) fn extract_refined_covspans<'tcx>(
}));
}
/// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate
/// multiple condition/consequent blocks that have the span of the whole macro
/// invocation, which is unhelpful. Keeping only the first such span seems to
/// give better mappings, so remove the others.
///
/// Similarly, `await` expands to a branch on the discriminant of `Poll`, which
/// leads to incorrect coverage if the `Future` is immediately ready (#98712).
///
/// (The input spans should be sorted in BCB dominator order, so that the
/// retained "first" span is likely to dominate the others.)
fn remove_unwanted_expansion_spans(covspans: &mut Vec<SpanFromMir>) {
let mut deduplicated_spans = FxHashSet::default();
/// For a single child expansion, try to distill it into a single span+BCB mapping.
fn single_covspan_for_child_expn(
tcx: TyCtxt<'_>,
graph: &CoverageGraph,
expn_tree: &ExpnTree,
expn_id: ExpnId,
) -> Option<Covspan> {
let node = expn_tree.get(expn_id)?;
covspans.retain(|covspan| {
match covspan.expn_kind {
// Retain only the first await-related or macro-expanded covspan with this span.
Some(ExpnKind::Desugaring(DesugaringKind::Await)) => {
deduplicated_spans.insert(covspan.span)
}
Some(ExpnKind::Macro(MacroKind::Bang, _)) => deduplicated_spans.insert(covspan.span),
// Ignore (retain) other spans.
_ => true,
let bcbs =
expn_tree.iter_node_and_descendants(expn_id).flat_map(|n| n.spans.iter().map(|s| s.bcb));
let bcb = match node.expn_kind {
// For bang-macros (e.g. `assert!`, `trace!`) and for `await`, taking
// the "first" BCB in dominator order seems to give good results.
ExpnKind::Macro(MacroKind::Bang, _) | ExpnKind::Desugaring(DesugaringKind::Await) => {
bcbs.min_by(|&a, &b| graph.cmp_in_dominator_order(a, b))?
}
});
}
// For other kinds of expansion, taking the "last" (most-dominated) BCB
// seems to give good results.
_ => bcbs.max_by(|&a, &b| graph.cmp_in_dominator_order(a, b))?,
};
/// When a span corresponds to a macro invocation that is visible from the
/// function body, truncate it to just the macro name plus `!`.
/// This seems to give better results for code that uses macros.
fn shrink_visible_macro_spans(tcx: TyCtxt<'_>, covspans: &mut Vec<SpanFromMir>) {
let source_map = tcx.sess.source_map();
for covspan in covspans {
if matches!(covspan.expn_kind, Some(ExpnKind::Macro(MacroKind::Bang, _))) {
covspan.span = source_map.span_through_char(covspan.span, '!');
}
// For bang-macro expansions, limit the call-site span to just the macro
// name plus `!`, excluding the macro arguments.
let mut span = node.call_site?;
if matches!(node.expn_kind, ExpnKind::Macro(MacroKind::Bang, _)) {
span = tcx.sess.source_map().span_through_char(span, '!');
}
Some(Covspan { span, bcb })
}
/// Discard all covspans that overlap a hole.

View File

@@ -5,10 +5,9 @@ use rustc_middle::mir::coverage::CoverageKind;
use rustc_middle::mir::{
self, FakeReadCause, Statement, StatementKind, Terminator, TerminatorKind,
};
use rustc_span::{ExpnKind, Span};
use rustc_span::Span;
use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph, START_BCB};
use crate::coverage::spans::Covspan;
use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
#[derive(Debug)]
pub(crate) struct RawSpanFromMir {
@@ -160,32 +159,3 @@ impl Hole {
true
}
}
#[derive(Debug)]
pub(crate) struct SpanFromMir {
/// A span that has been extracted from MIR and then "un-expanded" back to
/// within the current function's `body_span`. After various intermediate
/// processing steps, this span is emitted as part of the final coverage
/// mappings.
///
/// With the exception of `fn_sig_span`, this should always be contained
/// within `body_span`.
pub(crate) span: Span,
pub(crate) expn_kind: Option<ExpnKind>,
pub(crate) bcb: BasicCoverageBlock,
}
impl SpanFromMir {
pub(crate) fn for_fn_sig(fn_sig_span: Span) -> Self {
Self::new(fn_sig_span, None, START_BCB)
}
pub(crate) fn new(span: Span, expn_kind: Option<ExpnKind>, bcb: BasicCoverageBlock) -> Self {
Self { span, expn_kind, bcb }
}
pub(crate) fn into_covspan(self) -> Covspan {
let Self { span, expn_kind: _, bcb } = self;
Covspan { span, bcb }
}
}

View File

@@ -1,4 +1,4 @@
use rustc_span::{ExpnKind, Span};
use rustc_span::Span;
/// Walks through the expansion ancestors of `original_span` to find a span that
/// is contained in `body_span` and has the same [syntax context] as `body_span`.
@@ -7,49 +7,3 @@ pub(crate) fn unexpand_into_body_span(original_span: Span, body_span: Span) -> O
// we can just delegate directly to `find_ancestor_inside_same_ctxt`.
original_span.find_ancestor_inside_same_ctxt(body_span)
}
/// Walks through the expansion ancestors of `original_span` to find a span that
/// is contained in `body_span` and has the same [syntax context] as `body_span`.
///
/// If the returned span represents a bang-macro invocation (e.g. `foo!(..)`),
/// the returned symbol will be the name of that macro (e.g. `foo`).
pub(crate) fn unexpand_into_body_span_with_expn_kind(
original_span: Span,
body_span: Span,
) -> Option<(Span, Option<ExpnKind>)> {
let (span, prev) = unexpand_into_body_span_with_prev(original_span, body_span)?;
let expn_kind = prev.map(|prev| prev.ctxt().outer_expn_data().kind);
Some((span, expn_kind))
}
/// Walks through the expansion ancestors of `original_span` to find a span that
/// is contained in `body_span` and has the same [syntax context] as `body_span`.
/// The ancestor that was traversed just before the matching span (if any) is
/// also returned.
///
/// For example, a return value of `Some((ancestor, Some(prev)))` means that:
/// - `ancestor == original_span.find_ancestor_inside_same_ctxt(body_span)`
/// - `prev.parent_callsite() == ancestor`
///
/// [syntax context]: rustc_span::SyntaxContext
fn unexpand_into_body_span_with_prev(
original_span: Span,
body_span: Span,
) -> Option<(Span, Option<Span>)> {
let mut prev = None;
let mut curr = original_span;
while !body_span.contains(curr) || !curr.eq_ctxt(body_span) {
prev = Some(curr);
curr = curr.parent_callsite()?;
}
debug_assert_eq!(Some(curr), original_span.find_ancestor_inside_same_ctxt(body_span));
if let Some(prev) = prev {
debug_assert_eq!(Some(curr), prev.parent_callsite());
}
Some((curr, prev))
}

View File

@@ -5,6 +5,7 @@
#![feature(const_type_name)]
#![feature(cow_is_borrowed)]
#![feature(file_buffered)]
#![feature(gen_blocks)]
#![feature(if_let_guard)]
#![feature(impl_trait_in_assoc_type)]
#![feature(try_blocks)]

View File

@@ -118,21 +118,23 @@ Number of file 0 mappings: 4
Highest counter ID seen: (none)
Function name: closure::main::{closure#12} (unused)
Raw bytes (10): 0x[01, 01, 00, 01, 00, a7, 01, 0a, 00, 16]
Raw bytes (15): 0x[01, 01, 00, 02, 00, a7, 01, 01, 00, 09, 00, 00, 0a, 00, 16]
Number of files: 1
- file 0 => $DIR/closure.rs
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Zero) at (prev + 167, 10) to (start + 0, 22)
Number of file 0 mappings: 2
- Code(Zero) at (prev + 167, 1) to (start + 0, 9)
- Code(Zero) at (prev + 0, 10) to (start + 0, 22)
Highest counter ID seen: (none)
Function name: closure::main::{closure#13} (unused)
Raw bytes (10): 0x[01, 01, 00, 01, 00, ad, 01, 11, 00, 1d]
Raw bytes (15): 0x[01, 01, 00, 02, 00, ac, 01, 0d, 00, 15, 00, 01, 11, 00, 1d]
Number of files: 1
- file 0 => $DIR/closure.rs
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Zero) at (prev + 173, 17) to (start + 0, 29)
Number of file 0 mappings: 2
- Code(Zero) at (prev + 172, 13) to (start + 0, 21)
- Code(Zero) at (prev + 1, 17) to (start + 0, 29)
Highest counter ID seen: (none)
Function name: closure::main::{closure#14}
@@ -289,30 +291,33 @@ Number of file 0 mappings: 7
Highest counter ID seen: (none)
Function name: closure::main::{closure#5}
Raw bytes (10): 0x[01, 01, 00, 01, 01, 8c, 01, 46, 00, 4e]
Raw bytes (15): 0x[01, 01, 00, 02, 01, 8c, 01, 3d, 00, 45, 01, 00, 46, 00, 4e]
Number of files: 1
- file 0 => $DIR/closure.rs
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 140, 70) to (start + 0, 78)
Number of file 0 mappings: 2
- Code(Counter(0)) at (prev + 140, 61) to (start + 0, 69)
- Code(Counter(0)) at (prev + 0, 70) to (start + 0, 78)
Highest counter ID seen: c0
Function name: closure::main::{closure#6}
Raw bytes (10): 0x[01, 01, 00, 01, 01, 8d, 01, 4a, 00, 56]
Raw bytes (15): 0x[01, 01, 00, 02, 01, 8d, 01, 41, 00, 49, 01, 00, 4a, 00, 56]
Number of files: 1
- file 0 => $DIR/closure.rs
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 141, 74) to (start + 0, 86)
Number of file 0 mappings: 2
- Code(Counter(0)) at (prev + 141, 65) to (start + 0, 73)
- Code(Counter(0)) at (prev + 0, 74) to (start + 0, 86)
Highest counter ID seen: c0
Function name: closure::main::{closure#7} (unused)
Raw bytes (10): 0x[01, 01, 00, 01, 00, 8e, 01, 44, 00, 50]
Raw bytes (15): 0x[01, 01, 00, 02, 00, 8e, 01, 3b, 00, 43, 00, 00, 44, 00, 50]
Number of files: 1
- file 0 => $DIR/closure.rs
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Zero) at (prev + 142, 68) to (start + 0, 80)
Number of file 0 mappings: 2
- Code(Zero) at (prev + 142, 59) to (start + 0, 67)
- Code(Zero) at (prev + 0, 68) to (start + 0, 80)
Highest counter ID seen: (none)
Function name: closure::main::{closure#8} (unused)

View File

@@ -139,9 +139,9 @@
LL| |
LL| 1| let short_used_covered_closure_macro = | used_arg: u8 | println!("called");
LL| 1| let short_used_not_covered_closure_macro = | used_arg: u8 | println!("not called");
^0
^0 ^0
LL| 1| let _short_unused_closure_macro = | _unused_arg: u8 | println!("not called");
^0
^0 ^0
LL| |
LL| |
LL| |
@@ -173,7 +173,7 @@
LL| |
LL| 1| let _short_unused_closure_line_break_no_block2 =
LL| | | _unused_arg: u8 |
LL| | println!(
LL| 0| println!(
LL| 0| "not called"
LL| | )
LL| | ;

View File

@@ -1,10 +1,11 @@
Function name: macro_in_closure::NO_BLOCK::{closure#0}
Raw bytes (9): 0x[01, 01, 00, 01, 01, 07, 25, 00, 2c]
Raw bytes (14): 0x[01, 01, 00, 02, 01, 07, 1c, 00, 24, 01, 00, 25, 00, 2c]
Number of files: 1
- file 0 => $DIR/macro_in_closure.rs
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 7, 37) to (start + 0, 44)
Number of file 0 mappings: 2
- Code(Counter(0)) at (prev + 7, 28) to (start + 0, 36)
- Code(Counter(0)) at (prev + 0, 37) to (start + 0, 44)
Highest counter ID seen: c0
Function name: macro_in_closure::WITH_BLOCK::{closure#0}