Rollup merge of #141666 - lolbinarycat:rustdoc-source_span_for_markdown_range-bug-141665, r=GuillaumeGomez

source_span_for_markdown_range: fix utf8 violation

it is non-trivial to reproduce this bug through rustdoc, which uses this function less than clippy, so the regression test was added as a unit test instead of an integration test.

fixes https://github.com/rust-lang/rust/issues/141665

r? ``@GuillaumeGomez``
This commit is contained in:
Guillaume Gomez
2025-06-01 19:35:43 +02:00
committed by GitHub
2 changed files with 72 additions and 2 deletions

View File

@@ -12,10 +12,14 @@ use rustc_data_structures::fx::FxIndexMap;
use rustc_data_structures::unord::UnordSet;
use rustc_middle::ty::TyCtxt;
use rustc_span::def_id::DefId;
use rustc_span::source_map::SourceMap;
use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
use thin_vec::ThinVec;
use tracing::{debug, trace};
#[cfg(test)]
mod tests;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum DocFragmentKind {
/// A doc fragment created from a `///` or `//!` doc comment.
@@ -531,10 +535,20 @@ pub fn source_span_for_markdown_range(
markdown: &str,
md_range: &Range<usize>,
fragments: &[DocFragment],
) -> Option<Span> {
let map = tcx.sess.source_map();
source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
}
// inner function used for unit testing
pub fn source_span_for_markdown_range_inner(
map: &SourceMap,
markdown: &str,
md_range: &Range<usize>,
fragments: &[DocFragment],
) -> Option<Span> {
use rustc_span::BytePos;
let map = tcx.sess.source_map();
if let &[fragment] = &fragments
&& fragment.kind == DocFragmentKind::RawDoc
&& let Ok(snippet) = map.span_to_snippet(fragment.span)
@@ -570,7 +584,13 @@ pub fn source_span_for_markdown_range(
{
// If there is either a match in a previous fragment, or
// multiple matches in this fragment, there is ambiguity.
if match_data.is_none() && !snippet[match_start + 1..].contains(pat) {
// the snippet cannot be zero-sized, because it matches
// the pattern, which is checked to not be zero sized.
if match_data.is_none()
&& !snippet.as_bytes()[match_start + 1..]
.windows(pat.len())
.any(|s| s == pat.as_bytes())
{
match_data = Some((i, match_start));
} else {
// Heirustic produced ambiguity, return nothing.

View File

@@ -0,0 +1,50 @@
use std::path::PathBuf;
use rustc_span::source_map::{FilePathMapping, SourceMap};
use rustc_span::symbol::sym;
use rustc_span::{BytePos, Span};
use super::{DocFragment, DocFragmentKind, source_span_for_markdown_range_inner};
#[test]
fn single_backtick() {
let sm = SourceMap::new(FilePathMapping::empty());
sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "`"] fn foo() {}"#.to_string());
let span = source_span_for_markdown_range_inner(
&sm,
"`",
&(0..1),
&[DocFragment {
span: Span::with_root_ctxt(BytePos(8), BytePos(11)),
item_id: None,
kind: DocFragmentKind::RawDoc,
doc: sym::empty, // unused placeholder
indent: 0,
}],
)
.unwrap();
assert_eq!(span.lo(), BytePos(9));
assert_eq!(span.hi(), BytePos(10));
}
#[test]
fn utf8() {
// regression test for https://github.com/rust-lang/rust/issues/141665
let sm = SourceMap::new(FilePathMapping::empty());
sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "⚠"] fn foo() {}"#.to_string());
let span = source_span_for_markdown_range_inner(
&sm,
"",
&(0..3),
&[DocFragment {
span: Span::with_root_ctxt(BytePos(8), BytePos(14)),
item_id: None,
kind: DocFragmentKind::RawDoc,
doc: sym::empty, // unused placeholder
indent: 0,
}],
)
.unwrap();
assert_eq!(span.lo(), BytePos(9));
assert_eq!(span.hi(), BytePos(12));
}