From 7cb357a36b96781f9ff85f8a4168382243352ba1 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Fri, 11 Apr 2025 02:52:17 -0700 Subject: [PATCH 1/2] Make internal `OsString::truncate` and `extend_from_slice` unsafe Communicate the safety invariants of these methods with `unsafe fn` rather than privacy. --- library/std/src/ffi/os_str.rs | 22 +++++++++++++++------ library/std/src/path.rs | 3 ++- library/std/src/sys/os_str/bytes.rs | 25 +++++++++++++++--------- library/std/src/sys/os_str/wtf8.rs | 30 ++++++++++++++++++++--------- 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index ce01175309a77..72bdf03ee61a4 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -582,15 +582,25 @@ impl OsString { #[unstable(feature = "os_string_truncate", issue = "133262")] pub fn truncate(&mut self, len: usize) { self.as_os_str().inner.check_public_boundary(len); - self.inner.truncate(len); + // SAFETY: The length was just checked to be at a valid boundary. + unsafe { self.inner.truncate_unchecked(len) }; } - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. + /// Provides plumbing to `Vec::extend_from_slice` without giving full + /// mutable access to the `Vec`. + /// + /// # Safety + /// + /// The slice must be valid for the platform encoding (as described in + /// [`OsStr::from_encoded_bytes_unchecked`]). + /// + /// This bypasses the encoding-dependent surrogate joining, so `self` must + /// not end with a leading surrogate half and `other` must not start with + /// with a trailing surrogate half. #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { - self.inner.extend_from_slice(other); + pub(crate) unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { + // SAFETY: Guaranteed by caller. + unsafe { self.inner.extend_from_slice_unchecked(other) }; } } diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 50f403ba411b6..ae18891ad47d3 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -2759,7 +2759,8 @@ impl Path { }; let mut new_path = PathBuf::with_capacity(new_capacity); - new_path.inner.extend_from_slice(slice_to_copy); + // SAFETY: The path is empty, so cannot have surrogate halves. + unsafe { new_path.inner.extend_from_slice_unchecked(slice_to_copy) }; new_path.set_extension(extension); new_path } diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs index dfff2d3e5d31d..4a8808c923045 100644 --- a/library/std/src/sys/os_str/bytes.rs +++ b/library/std/src/sys/os_str/bytes.rs @@ -216,19 +216,26 @@ impl Buf { self.as_slice().into_rc() } - /// Provides plumbing to core `Vec::truncate`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn truncate(&mut self, len: usize) { + /// Provides plumbing to `Vec::truncate` without giving full mutable access + /// to the `Vec`. + /// + /// # Safety + /// + /// The length must be at an `OsStr` boundary, according to + /// `Slice::check_public_boundary`. + #[inline] + pub unsafe fn truncate_unchecked(&mut self, len: usize) { self.inner.truncate(len); } - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. + /// Provides plumbing to `Vec::extend_from_slice` without giving full + /// mutable access to the `Vec`. + /// + /// # Safety + /// + /// This encoding has no safety requirements. #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { self.inner.extend_from_slice(other); } } diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index a32f5d40f6a9c..5174ea65d0cd9 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -195,19 +195,31 @@ impl Buf { self.as_slice().into_rc() } - /// Provides plumbing to core `Vec::truncate`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn truncate(&mut self, len: usize) { + /// Provides plumbing to `Vec::truncate` without giving full mutable access + /// to the `Vec`. + /// + /// # Safety + /// + /// The length must be at an `OsStr` boundary, according to + /// `Slice::check_public_boundary`. + #[inline] + pub unsafe fn truncate_unchecked(&mut self, len: usize) { self.inner.truncate(len); } - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. + /// Provides plumbing to `Vec::extend_from_slice` without giving full + /// mutable access to the `Vec`. + /// + /// # Safety + /// + /// The slice must be valid for the platform encoding (as described in + /// [`Slice::from_encoded_bytes_unchecked`]). + /// + /// This bypasses the WTF-8 surrogate joining, so `self` must not end with a + /// leading surrogate half and `other` must not start with with a trailing + /// surrogate half. #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + pub unsafe fn extend_from_slice_unchecked(&mut self, other: &[u8]) { self.inner.extend_from_slice(other); } } From 0f0c0d8b16b265ac57cac9fd50f1dcfe78719a6a Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Tue, 22 Apr 2025 03:31:45 -0700 Subject: [PATCH 2/2] Avoid redundant WTF-8 checks in `PathBuf` Eliminate checks for WTF-8 boundaries in `PathBuf::set_extension` and `add_extension`, where joining WTF-8 surrogate halves is impossible. Don't convert the `str` to `OsStr`, because `OsString::push` specializes to skip the joining when given strings. --- library/std/src/path.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/library/std/src/path.rs b/library/std/src/path.rs index ae18891ad47d3..32a86e9f7b405 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -1526,11 +1526,13 @@ impl PathBuf { self.inner.truncate(end_file_stem.wrapping_sub(start)); // add the new extension, if any - let new = extension; + let new = extension.as_encoded_bytes(); if !new.is_empty() { self.inner.reserve_exact(new.len() + 1); - self.inner.push(OsStr::new(".")); - self.inner.push(new); + self.inner.push("."); + // SAFETY: Since a UTF-8 string was just pushed, it is not possible + // for the buffer to end with a surrogate half. + unsafe { self.inner.extend_from_slice_unchecked(new) }; } true @@ -1587,7 +1589,7 @@ impl PathBuf { Some(f) => f.as_encoded_bytes(), }; - let new = extension; + let new = extension.as_encoded_bytes(); if !new.is_empty() { // truncate until right after the file name // this is necessary for trimming the trailing slash @@ -1597,8 +1599,10 @@ impl PathBuf { // append the new extension self.inner.reserve_exact(new.len() + 1); - self.inner.push(OsStr::new(".")); - self.inner.push(new); + self.inner.push("."); + // SAFETY: Since a UTF-8 string was just pushed, it is not possible + // for the buffer to end with a surrogate half. + unsafe { self.inner.extend_from_slice_unchecked(new) }; } true