Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for 2 semconv registries #627

Merged
merged 24 commits into from
Apr 10, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e93c6ad
feat(semconv): Add dependencies in the manifest file.
lquerel Mar 2, 2025
cef36de
feat(semconv): Add dependencies in the manifest file.
lquerel Mar 3, 2025
8f8c728
feat(semconv): Merge weaver_cache into weaver_semconv to get rid of c…
lquerel Mar 3, 2025
4ed05fd
chore(semconv): Fix Rust code format.
lquerel Mar 3, 2025
ccf8592
feat(semconv): support 2 registries
lquerel Mar 25, 2025
2676dad
feat(semconv): support 2 registries
lquerel Mar 25, 2025
c085701
Merge remote-tracking branch 'upstream/main' into support-2-registries
lquerel Mar 25, 2025
ba2b480
chore: merge with main upstream
lquerel Mar 25, 2025
cf4b5f6
chore: merge with main upstream
lquerel Mar 25, 2025
3421621
Merge remote-tracking branch 'upstream/main' into support-2-registries
lquerel Mar 25, 2025
dcb2462
chore: update Cargo.lock
lquerel Mar 25, 2025
e7342ea
feat(2-registries): propagate registry id in provenance
lquerel Mar 25, 2025
ba7b254
feat(2-registries): add tests
lquerel Mar 26, 2025
30ff653
chore: merge with upstream main
lquerel Mar 28, 2025
c397f6f
feat(2-registries): replace the semconv spec path by a provenance dat…
lquerel Apr 1, 2025
7367a1f
feat(2-registries): load registry manifest at the creation of the reg…
lquerel Apr 4, 2025
e7f5f61
feat(2-registries): garbage collect unreferenced telemetry objects
lquerel Apr 5, 2025
2945e2e
feat(2-registries): improve unit tests for multi-registry
lquerel Apr 5, 2025
2d34263
feat(2-registries): introduce the concept of virtual directory
lquerel Apr 8, 2025
8796a66
feat(2-registries): introduce the concept of virtual directory
lquerel Apr 9, 2025
9dd3f59
feat(2-registries):merge with main upstream branch
lquerel Apr 10, 2025
9ba9a94
feat(2-registries): update CHANGELOG.md
lquerel Apr 10, 2025
5aaa3ad
feat(2-registries): fix rust doc issue
lquerel Apr 10, 2025
f732476
feat(2-registries): fix tests on windows
lquerel Apr 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 35 additions & 44 deletions crates/weaver_resolver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,57 +380,48 @@ impl SchemaResolver {
allow_registry_deps: bool,
follow_symlinks: bool,
) -> Option<WResult<Vec<(Provenance, SemConvSpec)>, weaver_semconv::Error>> {
match registry_repo.manifest_path() {
Some(manifest_path) => {
match RegistryManifest::try_from_file(manifest_path) {
Ok(manifest) => {
if let Some(dependencies) =
manifest.dependencies.filter(|deps| !deps.is_empty())
{
if !allow_registry_deps {
match registry_repo.manifest() {
Some(manifest) => {
if let Some(dependencies) = manifest
.dependencies
.as_ref()
.filter(|deps| !deps.is_empty())
{
if !allow_registry_deps {
Some(WResult::FatalErr(weaver_semconv::Error::SemConvSpecError {
error: format!(
"Registry dependencies are not allowed for the `{}` registry. Weaver currently supports only two registry levels.",
registry_repo.registry_path_repr()
),
}))
} else if dependencies.len() > 1 {
Some(WResult::FatalErr(weaver_semconv::Error::SemConvSpecError {
error: format!(
"Currently, Weaver supports only a single dependency per registry. Multiple dependencies have been found in the `{}` registry.",
registry_repo.registry_path_repr()
),
}))
} else {
let dependency = &dependencies[0];
match RegistryRepo::try_new(&dependency.name, &dependency.registry_path) {
Ok(registry_repo_dep) => Some(Self::load_semconv_specs(
&registry_repo_dep,
false,
follow_symlinks,
)),
Err(e) => {
Some(WResult::FatalErr(weaver_semconv::Error::SemConvSpecError {
error: format!(
"Registry dependencies are not allowed for the `{}` registry. Weaver currently supports only two registry levels.",
registry_repo.registry_path_repr()
"Failed to load the registry dependency `{}`: {}",
dependency.name, e
),
}))
} else if dependencies.len() > 1 {
Some(WResult::FatalErr(weaver_semconv::Error::SemConvSpecError {
error: format!(
"Currently, Weaver supports only a single dependency per registry. Multiple dependencies have been found in the `{}` registry.",
registry_repo.registry_path_repr()
),
}))
} else {
let dependency = &dependencies[0];
match RegistryRepo::try_new(
&dependency.name,
&dependency.registry_path,
) {
Ok(registry_repo_dep) => Some(Self::load_semconv_specs(
&registry_repo_dep,
false,
follow_symlinks,
)),
Err(e) => Some(WResult::FatalErr(
weaver_semconv::Error::SemConvSpecError {
error: format!(
"Failed to load the registry dependency `{}`: {}",
dependency.name, e
),
},
)),
}
}
} else {
// Manifest has no dependencies or dependencies are empty
None
}
}
Err(e) => {
// Manifest file is not valid or not found
Some(WResult::FatalErr(e))
}
} else {
// Manifest has no dependencies or dependencies are empty
None
}
}
None => None,
Expand Down
26 changes: 25 additions & 1 deletion crates/weaver_resolver/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ use weaver_resolved_schema::attribute::UnresolvedAttribute;
use weaver_resolved_schema::lineage::{AttributeLineage, GroupLineage};
use weaver_resolved_schema::registry::{Constraint, Group, Registry};
use weaver_semconv::attribute::AttributeSpec;
use weaver_semconv::group::GroupSpecWithProvenance;
use weaver_semconv::group::{GroupSpecWithProvenance, GroupType};
use weaver_semconv::manifest::RegistryManifest;
use weaver_semconv::provenance::Provenance;
use weaver_semconv::registry::SemConvRegistry;

Expand Down Expand Up @@ -152,9 +153,32 @@ pub fn resolve_semconv_registry(
);
check_root_attribute_id_duplicates(&ureg.registry, &attr_name_index, &mut errors);

gc_unreferenced_objects(registry.manifest(), &mut ureg.registry);

WResult::OkWithNFEs(ureg.registry, errors)
}

/// Garbage collect all the signals and attributes not defined or referenced in the
/// current registry, i.e. telemetry objects only defined in a dependency and not
/// referenced in the current registry.
fn gc_unreferenced_objects(manifest: Option<&RegistryManifest>, registry: &mut Registry) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"garbage collect" :)

I like keeping this concise, easy to read/maintain.

if let Some(manifest) = manifest {
if manifest.dependencies.as_ref().map_or(0, |d| d.len()) > 0 {
// This registry has dependencies.
let current_reg_id = manifest.name.clone();
println!("current registry id: {}", current_reg_id);
registry.groups.retain(|group| {
if let Some(lineage) = &group.lineage {
println!("group registry id: {}", lineage.provenance().registry_id);
lineage.provenance().registry_id.as_ref() == current_reg_id
} else {
true
}
});
}
}
}

/// Checks the `any_of` constraints in the given registry.
///
/// # Arguments
Expand Down
4 changes: 1 addition & 3 deletions crates/weaver_semconv/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,7 @@ impl SemConvRegistry {
registry.add_semconv_spec(SemConvSpecWithProvenance { spec, provenance });
}

if let Some(manifest_path) = registry_repo.manifest_path() {
registry.set_manifest(RegistryManifest::try_from_file(manifest_path)?);
} else {
if registry_repo.manifest().is_none() {
let mut semconv_version = "unversioned".to_owned();

// No registry manifest found.
Expand Down
41 changes: 35 additions & 6 deletions crates/weaver_semconv/src/registry_repo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use std::sync::Arc;

use crate::manifest::RegistryManifest;
use crate::registry_path::RegistryPath;
use crate::Error;
use crate::Error::{GitError, InvalidRegistryArchive, UnsupportedRegistryArchive};
Expand Down Expand Up @@ -37,6 +38,9 @@ pub struct RegistryRepo {
id: Arc<str>,
registry_path: String,
path: PathBuf,
// The registry manifest definition.
manifest: Option<RegistryManifest>,

// Need to keep the tempdir live for the lifetime of the RegistryRepo.
#[allow(dead_code)]
tmp_dir: Option<TempDir>,
Expand All @@ -45,24 +49,33 @@ pub struct RegistryRepo {
impl RegistryRepo {
/// Creates a new `RegistryRepo` from a `RegistryPath` object that
/// specifies the location of the registry.
pub fn try_new(id: &str, registry_path: &RegistryPath) -> Result<Self, Error> {
pub fn try_new(
registry_id_if_no_manifest: &str,
registry_path: &RegistryPath,
) -> Result<Self, Error> {
let registry_path_repr = registry_path.to_string();
match registry_path {
let mut registry_repo = match registry_path {
RegistryPath::LocalFolder { path } => Ok(Self {
id: Arc::from(id),
id: Arc::from(registry_id_if_no_manifest),
registry_path: registry_path_repr,
path: path.into(),
manifest: None,
tmp_dir: None,
}),
RegistryPath::GitRepo {
url, sub_folder, ..
} => Self::try_from_git_url(id, url, sub_folder, registry_path_repr),
} => Self::try_from_git_url(
registry_id_if_no_manifest,
url,
sub_folder,
registry_path_repr,
),
RegistryPath::LocalArchive { path, sub_folder } => {
// Create a temporary directory for the repo that will be deleted
// when the RegistryRepo goes out of scope.
let tmp_dir = Self::create_tmp_repo()?;
Self::try_from_local_archive(
id,
registry_id_if_no_manifest,
path,
sub_folder.as_ref(),
tmp_dir,
Expand All @@ -74,14 +87,22 @@ impl RegistryRepo {
// when the RegistryRepo goes out of scope.
let tmp_dir = Self::create_tmp_repo()?;
Self::try_from_remote_archive(
id,
registry_id_if_no_manifest,
url,
sub_folder.as_ref(),
tmp_dir,
registry_path_repr,
)
}
};
if let Ok(registry_repo) = &mut registry_repo {
if let Some(manifest) = registry_repo.manifest_path() {
let registry_manifest = RegistryManifest::try_from_file(manifest)?;
registry_repo.id = Arc::from(registry_manifest.name.as_str());
registry_repo.manifest = Some(registry_manifest);
}
}
registry_repo
}

/// Creates a new `RegistryRepo` from a Git URL.
Expand Down Expand Up @@ -150,6 +171,7 @@ impl RegistryRepo {
id: Arc::from(id),
registry_path,
path,
manifest: None,
tmp_dir: Some(tmp_dir),
})
}
Expand Down Expand Up @@ -201,6 +223,7 @@ impl RegistryRepo {
id: Arc::from(id),
registry_path,
path: target_path_buf,
manifest: None,
tmp_dir: Some(target_dir),
})
}
Expand Down Expand Up @@ -441,6 +464,12 @@ impl RegistryRepo {
&self.registry_path
}

/// Returns the registry manifest specified in the registry repo.
#[must_use]
pub fn manifest(&self) -> Option<&RegistryManifest> {
self.manifest.as_ref()
}

/// Returns the path to the `registry_manifest.yaml` file (if any).
#[must_use]
pub fn manifest_path(&self) -> Option<PathBuf> {
Expand Down
1 change: 0 additions & 1 deletion crates/weaver_semconv/src/semconv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ mod tests {
InvalidSemConvSpec, InvalidSpanMissingSpanKind, RegistryNotFound,
};
use std::path::PathBuf;
use std::process::id;
use weaver_common::test::ServeStaticFiles;

#[test]
Expand Down