Skip to content

Commit 712c921

Browse files
committed
Implement feature string_from_utf8_lossy_owned
Implement feature for lossily converting from `Vec<u8>` to `String` - Add `String::from_utf8_lossy_owned` - Add `FromUtf8Error::into_utf8_lossy`
1 parent e23ae72 commit 712c921

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

library/alloc/src/string.rs

+72
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,54 @@ impl String {
662662
Cow::Owned(res)
663663
}
664664

665+
/// Converts a [`Vec<u8>`] to a `String`, substituting invalid UTF-8
666+
/// sequences with replacement characters.
667+
///
668+
/// See [`from_utf8_lossy`] for more details.
669+
///
670+
/// Note that this function does not guarantee reuse of the original `Vec`
671+
/// allocation.
672+
///
673+
/// # Examples
674+
///
675+
/// Basic usage:
676+
///
677+
/// ```
678+
/// #![feature(string_from_utf8_lossy_owned)]
679+
/// // some bytes, in a vector
680+
/// let sparkle_heart = vec![240, 159, 146, 150];
681+
///
682+
/// let sparkle_heart = String::from_utf8_lossy_owned(sparkle_heart);
683+
///
684+
/// assert_eq!(String::from("💖"), sparkle_heart);
685+
/// ```
686+
///
687+
/// Incorrect bytes:
688+
///
689+
/// ```
690+
/// #![feature(string_from_utf8_lossy_owned)]
691+
/// // some invalid bytes
692+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
693+
/// let output = String::from_utf8_lossy_owned(input);
694+
///
695+
/// assert_eq!(String::from("Hello �World"), output);
696+
/// ```
697+
#[must_use]
698+
#[cfg(not(no_global_oom_handling))]
699+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
700+
pub fn from_utf8_lossy_owned(v: Vec<u8>) -> String {
701+
if let Cow::Owned(string) = String::from_utf8_lossy(&v) {
702+
string
703+
} else {
704+
// SAFETY: `String::from_utf8_lossy`'s contract ensures that if
705+
// it returns a `Cow::Borrowed`, it is a valid UTF-8 string.
706+
// Otherwise, it returns a new allocation of an owned `String`, with
707+
// replacement characters for invalid sequences, which is returned
708+
// above.
709+
unsafe { String::from_utf8_unchecked(v) }
710+
}
711+
}
712+
665713
/// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`]
666714
/// if `v` contains any invalid data.
667715
///
@@ -2012,6 +2060,30 @@ impl FromUtf8Error {
20122060
&self.bytes[..]
20132061
}
20142062

2063+
/// Converts the bytes into a `String` lossily, substituting invalid UTF-8
2064+
/// sequences with replacement characters.
2065+
///
2066+
/// See [`String::from_utf8_lossy`] for more details on replacement of
2067+
/// invalid sequences, and [`String::from_utf8_lossy_owned`] for the
2068+
/// `String` function which corresponds to this function.
2069+
///
2070+
/// # Examples
2071+
///
2072+
/// ```
2073+
/// #![feature(string_from_utf8_lossy_owned)]
2074+
/// // some invalid bytes
2075+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
2076+
/// let output = String::from_utf8(input).unwrap_or_else(|e| e.into_utf8_lossy());
2077+
///
2078+
/// assert_eq!(String::from("Hello �World"), output);
2079+
/// ```
2080+
#[must_use]
2081+
#[cfg(not(no_global_oom_handling))]
2082+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
2083+
pub fn into_utf8_lossy(self) -> String {
2084+
String::from_utf8_lossy_owned(self.bytes)
2085+
}
2086+
20152087
/// Returns the bytes that were attempted to convert to a `String`.
20162088
///
20172089
/// This method is carefully constructed to avoid allocation. It will

0 commit comments

Comments
 (0)