Skip to content

Use arcstr for package, extra, and group names #10475

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ uv-workspace = { path = "crates/uv-workspace" }

anstream = { version = "0.6.15" }
anyhow = { version = "1.0.89" }
arcstr = { version = "1.2.0" }
async-channel = { version = "2.3.1" }
async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
async-trait = { version = "0.1.82" }
Expand Down
1 change: 1 addition & 0 deletions crates/uv-normalize/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ doctest = false
workspace = true

[dependencies]
arcstr = { workspace = true }
rkyv = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
5 changes: 3 additions & 2 deletions crates/uv-normalize/src/extra_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::str::FromStr;

use serde::{Deserialize, Deserializer, Serialize};

use crate::small_string::SmallString;
use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError};

/// The normalized name of an extra dependency.
Expand All @@ -14,9 +15,9 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
/// See:
/// - <https://peps.python.org/pep-0685/#specification/>
/// - <https://packaging.python.org/en/latest/specifications/name-normalization/>
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct ExtraName(String);
pub struct ExtraName(SmallString);

impl ExtraName {
/// Create a validated, normalized extra name.
Expand Down
5 changes: 3 additions & 2 deletions crates/uv-normalize/src/group_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@ use std::sync::LazyLock;

use serde::{Deserialize, Deserializer, Serialize, Serializer};

use crate::small_string::SmallString;
use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError};

/// The normalized name of a dependency group.
///
/// See:
/// - <https://peps.python.org/pep-0735/>
/// - <https://packaging.python.org/en/latest/specifications/name-normalization/>
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct GroupName(String);
pub struct GroupName(SmallString);

impl GroupName {
/// Create a validated, normalized group name.
Expand Down
34 changes: 26 additions & 8 deletions crates/uv-normalize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,37 @@ pub use dist_info_name::DistInfoName;
pub use extra_name::ExtraName;
pub use group_name::{GroupName, DEV_DEPENDENCIES};
pub use package_name::PackageName;
use small_string::SmallString;

mod dist_info_name;
mod extra_name;
mod group_name;
mod package_name;
mod small_string;

/// Validate and normalize an owned package or extra name.
pub(crate) fn validate_and_normalize_owned(name: String) -> Result<String, InvalidNameError> {
pub(crate) fn validate_and_normalize_owned(name: String) -> Result<SmallString, InvalidNameError> {
if is_normalized(&name)? {
Ok(name)
Ok(SmallString::from(name))
} else {
validate_and_normalize_ref(name)
Ok(SmallString::from(normalize(&name)?))
}
}

/// Validate and normalize an unowned package or extra name.
pub(crate) fn validate_and_normalize_ref(
name: impl AsRef<str>,
) -> Result<String, InvalidNameError> {
) -> Result<SmallString, InvalidNameError> {
let name = name.as_ref();
if is_normalized(name)? {
Ok(SmallString::from(name))
} else {
Ok(SmallString::from(normalize(name)?))
}
}

/// Normalize an unowned package or extra name.
fn normalize(name: &str) -> Result<String, InvalidNameError> {
let mut normalized = String::with_capacity(name.len());

let mut last = None;
Expand Down Expand Up @@ -136,9 +147,14 @@ mod tests {
"FrIeNdLy-._.-bArD",
];
for input in inputs {
assert_eq!(validate_and_normalize_ref(input).unwrap(), "friendly-bard");
assert_eq!(
validate_and_normalize_owned(input.to_string()).unwrap(),
validate_and_normalize_ref(input).unwrap().as_ref(),
"friendly-bard"
);
assert_eq!(
validate_and_normalize_owned(input.to_string())
.unwrap()
.as_ref(),
"friendly-bard"
);
}
Expand Down Expand Up @@ -169,9 +185,11 @@ mod tests {
// Unchanged
let unchanged = ["friendly-bard", "1okay", "okay2"];
for input in unchanged {
assert_eq!(validate_and_normalize_ref(input).unwrap(), input);
assert_eq!(validate_and_normalize_ref(input).unwrap().as_ref(), input);
assert_eq!(
validate_and_normalize_owned(input.to_string()).unwrap(),
validate_and_normalize_owned(input.to_string())
.unwrap()
.as_ref(),
input
);
assert!(is_normalized(input).unwrap());
Expand Down
7 changes: 4 additions & 3 deletions crates/uv-normalize/src/package_name.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use std::borrow::Cow;
use std::cmp::PartialEq;
use std::str::FromStr;

use serde::{Deserialize, Deserializer, Serialize};

use crate::small_string::SmallString;
use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError};

/// The normalized name of a package.
Expand All @@ -13,7 +15,6 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
/// See: <https://packaging.python.org/en/latest/specifications/name-normalization/>
#[derive(
Debug,
Default,
Clone,
PartialEq,
Eq,
Expand All @@ -27,7 +28,7 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam
)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[rkyv(derive(Debug))]
pub struct PackageName(String);
pub struct PackageName(SmallString);

impl PackageName {
/// Create a validated, normalized package name.
Expand Down Expand Up @@ -56,7 +57,7 @@ impl PackageName {

Cow::Owned(owned_string)
} else {
Cow::Borrowed(self.0.as_str())
Cow::Borrowed(self.0.as_ref())
}
}

Expand Down
119 changes: 119 additions & 0 deletions crates/uv-normalize/src/small_string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
use std::cmp::PartialEq;
use std::ops::Deref;

/// An optimized small string type for short identifiers, like package names.
///
/// Represented as an [`arcstr::ArcStr`] internally.
#[derive(Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct SmallString(arcstr::ArcStr);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I approve this encapsulation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!


impl From<&str> for SmallString {
#[inline]
fn from(s: &str) -> Self {
Self(s.into())
}
}

impl From<String> for SmallString {
#[inline]
fn from(s: String) -> Self {
Self(s.into())
}
}

impl AsRef<str> for SmallString {
#[inline]
fn as_ref(&self) -> &str {
&self.0
}
}

impl Deref for SmallString {
type Target = str;

#[inline]
fn deref(&self) -> &Self::Target {
&self.0
}
}

impl core::fmt::Debug for SmallString {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Debug::fmt(&self.0, f)
}
}

impl core::fmt::Display for SmallString {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Display::fmt(&self.0, f)
}
}

/// A [`serde::Serialize`] implementation for [`SmallString`].
impl serde::Serialize for SmallString {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.0.serialize(serializer)
}
}

/// An [`rkyv`] implementation for [`SmallString`].
impl rkyv::Archive for SmallString {
type Archived = rkyv::string::ArchivedString;
type Resolver = rkyv::string::StringResolver;

#[inline]
fn resolve(&self, resolver: Self::Resolver, out: rkyv::Place<Self::Archived>) {
rkyv::string::ArchivedString::resolve_from_str(&self.0, resolver, out);
}
}

impl<S> rkyv::Serialize<S> for SmallString
where
S: rkyv::rancor::Fallible + rkyv::ser::Allocator + rkyv::ser::Writer + ?Sized,
S::Error: rkyv::rancor::Source,
{
fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
rkyv::string::ArchivedString::serialize_from_str(&self.0, serializer)
}
}

impl<D: rkyv::rancor::Fallible + ?Sized> rkyv::Deserialize<SmallString, D>
for rkyv::string::ArchivedString
{
fn deserialize(&self, _deserializer: &mut D) -> Result<SmallString, D::Error> {
Ok(SmallString::from(self.as_str()))
}
}

impl PartialEq<SmallString> for rkyv::string::ArchivedString {
fn eq(&self, other: &SmallString) -> bool {
**other == **self
}
}

impl PartialOrd<SmallString> for rkyv::string::ArchivedString {
fn partial_cmp(&self, other: &SmallString) -> Option<::core::cmp::Ordering> {
Some(self.as_str().cmp(other))
}
}

/// An [`schemars::JsonSchema`] implementation for [`SmallString`].
#[cfg(feature = "schemars")]
impl schemars::JsonSchema for SmallString {
fn is_referenceable() -> bool {
String::is_referenceable()
}

fn schema_name() -> String {
String::schema_name()
}

fn json_schema(_gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
String::json_schema(_gen)
}
}
3 changes: 2 additions & 1 deletion crates/uv-resolver/src/universal_marker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ fn encode_package_group(package: &PackageName, group: &GroupName) -> ExtraName {
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;

use uv_pypi_types::ConflictSet;

Expand Down Expand Up @@ -516,7 +517,7 @@ mod tests {

/// Shortcut for creating a package name.
fn create_package(name: &str) -> PackageName {
PackageName::new(name.to_string()).unwrap()
PackageName::from_str(name).unwrap()
}

/// Shortcut for creating an extra name.
Expand Down
Loading