Rollup merge of #140159 - thaliaarchi:pathbuf-extension, r=workingjubilee

Avoid redundant WTF-8 checks in `PathBuf`

Eliminate checks for WTF-8 boundaries in `PathBuf::set_extension` and `add_extension`, where joining WTF-8 surrogate halves is impossible. Don't convert the `str` to `OsStr`, because `OsString::push` specializes to skip the joining when given strings.

To assist in this, mark the internal methods `OsString::truncate` and `extend_from_slice` as `unsafe` to communicate their safety invariants better than with module privacy.

Similar to #137777.

cc `@joboet` `@ChrisDenton`
This commit is contained in:
Stuart Cook
2025-05-02 22:17:00 +10:00
committed by GitHub
4 changed files with 63 additions and 29 deletions

View File

@@ -582,15 +582,25 @@ impl OsString {
#[unstable(feature = "os_string_truncate", issue = "133262")]
pub fn truncate(&mut self, len: usize) {
self.as_os_str().inner.check_public_boundary(len);
self.inner.truncate(len);
// SAFETY: The length was just checked to be at a valid boundary.
unsafe { self.inner.truncate_unchecked(len) };
}
/// Provides plumbing to core `Vec::extend_from_slice`.
/// More well behaving alternative to allowing outer types
/// full mutable access to the core `Vec`.
/// Provides plumbing to `Vec::extend_from_slice` without giving full
/// mutable access to the `Vec`.
///
/// # Safety
///
/// The slice must be valid for the platform encoding (as described in
/// [`OsStr::from_encoded_bytes_unchecked`]).
///
/// This bypasses the encoding-dependent surrogate joining, so `self` must
/// not end with a leading surrogate half and `other` must not start with
/// with a trailing surrogate half.
#[inline]
pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
self.inner.extend_from_slice(other);
pub(crate) unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) {
// SAFETY: Guaranteed by caller.
unsafe { self.inner.extend_from_slice_unchecked(other) };
}
}

View File

@@ -1529,11 +1529,13 @@ impl PathBuf {
self.inner.truncate(end_file_stem.wrapping_sub(start));
// add the new extension, if any
let new = extension;
let new = extension.as_encoded_bytes();
if !new.is_empty() {
self.inner.reserve_exact(new.len() + 1);
self.inner.push(OsStr::new("."));
self.inner.push(new);
self.inner.push(".");
// SAFETY: Since a UTF-8 string was just pushed, it is not possible
// for the buffer to end with a surrogate half.
unsafe { self.inner.extend_from_slice_unchecked(new) };
}
true
@@ -1597,7 +1599,7 @@ impl PathBuf {
Some(f) => f.as_encoded_bytes(),
};
let new = extension;
let new = extension.as_encoded_bytes();
if !new.is_empty() {
// truncate until right after the file name
// this is necessary for trimming the trailing slash
@@ -1607,8 +1609,10 @@ impl PathBuf {
// append the new extension
self.inner.reserve_exact(new.len() + 1);
self.inner.push(OsStr::new("."));
self.inner.push(new);
self.inner.push(".");
// SAFETY: Since a UTF-8 string was just pushed, it is not possible
// for the buffer to end with a surrogate half.
unsafe { self.inner.extend_from_slice_unchecked(new) };
}
true
@@ -2769,7 +2773,8 @@ impl Path {
};
let mut new_path = PathBuf::with_capacity(new_capacity);
new_path.inner.extend_from_slice(slice_to_copy);
// SAFETY: The path is empty, so cannot have surrogate halves.
unsafe { new_path.inner.extend_from_slice_unchecked(slice_to_copy) };
new_path.set_extension(extension);
new_path
}

View File

@@ -216,19 +216,26 @@ impl Buf {
self.as_slice().into_rc()
}
/// Provides plumbing to core `Vec::truncate`.
/// More well behaving alternative to allowing outer types
/// full mutable access to the core `Vec`.
/// Provides plumbing to `Vec::truncate` without giving full mutable access
/// to the `Vec`.
///
/// # Safety
///
/// The length must be at an `OsStr` boundary, according to
/// `Slice::check_public_boundary`.
#[inline]
pub(crate) fn truncate(&mut self, len: usize) {
pub unsafe fn truncate_unchecked(&mut self, len: usize) {
self.inner.truncate(len);
}
/// Provides plumbing to core `Vec::extend_from_slice`.
/// More well behaving alternative to allowing outer types
/// full mutable access to the core `Vec`.
/// Provides plumbing to `Vec::extend_from_slice` without giving full
/// mutable access to the `Vec`.
///
/// # Safety
///
/// This encoding has no safety requirements.
#[inline]
pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) {
self.inner.extend_from_slice(other);
}
}

View File

@@ -195,19 +195,31 @@ impl Buf {
self.as_slice().into_rc()
}
/// Provides plumbing to core `Vec::truncate`.
/// More well behaving alternative to allowing outer types
/// full mutable access to the core `Vec`.
/// Provides plumbing to `Vec::truncate` without giving full mutable access
/// to the `Vec`.
///
/// # Safety
///
/// The length must be at an `OsStr` boundary, according to
/// `Slice::check_public_boundary`.
#[inline]
pub(crate) fn truncate(&mut self, len: usize) {
pub unsafe fn truncate_unchecked(&mut self, len: usize) {
self.inner.truncate(len);
}
/// Provides plumbing to core `Vec::extend_from_slice`.
/// More well behaving alternative to allowing outer types
/// full mutable access to the core `Vec`.
/// Provides plumbing to `Vec::extend_from_slice` without giving full
/// mutable access to the `Vec`.
///
/// # Safety
///
/// The slice must be valid for the platform encoding (as described in
/// [`Slice::from_encoded_bytes_unchecked`]).
///
/// This bypasses the WTF-8 surrogate joining, so `self` must not end with a
/// leading surrogate half and `other` must not start with with a trailing
/// surrogate half.
#[inline]
pub(crate) fn extend_from_slice(&mut self, other: &[u8]) {
pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) {
self.inner.extend_from_slice(other);
}
}