Skip to content

Commit d85d761

Browse files
authored
fix(package): check dirtiness of symlinks source files (#14981)
### What does this PR try to resolve? This adds a special case for checking source files are symlinks and have been modified when under a VCS control This is required because those paths may link to a file outside the current package root, but still under the git workdir, affecting the final packaged `.crate` file. ### How should we test and review this PR? Pretty similar to #14966, as a part of #14967. This may have potential performance issue. If a package contains thousands of symlinks, Cargo will fire `git status` for each of them. Not sure if we want to do anything proactively now. The introduction of the `PathEntry` struct gives us more room for storing file metadata to satisfiy use cases in the future. For instances, * Knowing a source file is a symlink and resolving it when packaging on Windows * #5664 * #14965 * Knowing more about a file's metadata (e.g. permission bits from Git) * #4413 * #8006 * Provide richer information for `cargo package --list`, for example JSON output mode * #11666 * #13331 * #13953
2 parents d73d2ca + 24dd205 commit d85d761

File tree

4 files changed

+87
-32
lines changed

4 files changed

+87
-32
lines changed

crates/cargo-util/src/paths.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -710,9 +710,9 @@ pub fn set_file_time_no_err<P: AsRef<Path>>(path: P, time: FileTime) {
710710
/// This canonicalizes both paths before stripping. This is useful if the
711711
/// paths are obtained in different ways, and one or the other may or may not
712712
/// have been normalized in some way.
713-
pub fn strip_prefix_canonical<P: AsRef<Path>>(
714-
path: P,
715-
base: P,
713+
pub fn strip_prefix_canonical(
714+
path: impl AsRef<Path>,
715+
base: impl AsRef<Path>,
716716
) -> Result<PathBuf, std::path::StripPrefixError> {
717717
// Not all filesystems support canonicalize. Just ignore if it doesn't work.
718718
let safe_canonicalize = |path: &Path| match path.canonicalize() {

src/cargo/ops/cargo_package/vcs.rs

+35-27
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Helpers to gather the VCS information for `cargo package`.
22
3-
use std::path::Path;
3+
use std::collections::HashSet;
44
use std::path::PathBuf;
55

66
use anyhow::Context as _;
@@ -186,7 +186,7 @@ fn git(
186186
.iter()
187187
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
188188
.map(|p| p.as_ref())
189-
.chain(dirty_metadata_paths(pkg, repo)?.iter())
189+
.chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
190190
.map(|path| {
191191
pathdiff::diff_paths(path, cwd)
192192
.as_ref()
@@ -219,39 +219,47 @@ fn git(
219219
}
220220
}
221221

222-
/// Checks whether files at paths specified in `package.readme` and
223-
/// `package.license-file` have been modified.
222+
/// Checks whether "included" source files outside package root have been modified.
223+
///
224+
/// This currently looks at
225+
///
226+
/// * `package.readme` and `package.license-file` pointing to paths outside package root
227+
/// * symlinks targets reside outside package root
224228
///
225229
/// This is required because those paths may link to a file outside the
226230
/// current package root, but still under the git workdir, affecting the
227231
/// final packaged `.crate` file.
228-
fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<Vec<PathBuf>> {
229-
let mut dirty_files = Vec::new();
232+
fn dirty_files_outside_pkg_root(
233+
pkg: &Package,
234+
repo: &git2::Repository,
235+
src_files: &[PathEntry],
236+
) -> CargoResult<HashSet<PathBuf>> {
237+
let pkg_root = pkg.root();
230238
let workdir = repo.workdir().unwrap();
231-
let root = pkg.root();
239+
232240
let meta = pkg.manifest().metadata();
233-
for path in [&meta.license_file, &meta.readme] {
234-
let Some(path) = path.as_deref().map(Path::new) else {
235-
continue;
236-
};
237-
let abs_path = paths::normalize_path(&root.join(path));
238-
if paths::strip_prefix_canonical(abs_path.as_path(), root).is_ok() {
239-
// Inside package root. Don't bother checking git status.
240-
continue;
241-
}
242-
if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), workdir) {
243-
// Outside package root but under git workdir,
244-
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
245-
dirty_files.push(if abs_path.is_symlink() {
246-
// For symlinks, shows paths to symlink sources
247-
workdir.join(rel_path)
248-
} else {
249-
abs_path
250-
});
251-
}
241+
let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
242+
.into_iter()
243+
.filter_map(|p| p.as_deref())
244+
.map(|path| paths::normalize_path(&pkg_root.join(path)))
245+
.collect();
246+
247+
let mut dirty_symlinks = HashSet::new();
248+
for rel_path in src_files
249+
.iter()
250+
.filter(|p| p.is_symlink_or_under_symlink())
251+
.map(|p| p.as_ref())
252+
.chain(metadata_paths.iter())
253+
// If inside package root. Don't bother checking git status.
254+
.filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
255+
// Handle files outside package root but under git workdir,
256+
.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
257+
{
258+
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
259+
dirty_symlinks.insert(workdir.join(rel_path));
252260
}
253261
}
254-
Ok(dirty_files)
262+
Ok(dirty_symlinks)
255263
}
256264

257265
/// Helper to collect dirty statuses for a single repo.

src/cargo/sources/path.rs

+40
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,8 @@ impl From<gix::dir::entry::Kind> for FileType {
448448
pub struct PathEntry {
449449
path: PathBuf,
450450
ty: FileType,
451+
/// Whether this path was visited when traversing a symlink directory.
452+
under_symlink_dir: bool,
451453
}
452454

453455
impl PathEntry {
@@ -469,10 +471,21 @@ impl PathEntry {
469471

470472
/// Similar to [`std::path::Path::is_symlink`]
471473
/// but doesn't follow the symbolic link nor make any system call
474+
///
475+
/// If the path is not a symlink but under a symlink parent directory,
476+
/// this will return false.
477+
/// See [`PathEntry::is_symlink_or_under_symlink`] for an alternative.
472478
pub fn is_symlink(&self) -> bool {
473479
matches!(self.ty, FileType::Symlink)
474480
}
475481

482+
/// Whether a path is a symlink or a path under a symlink directory.
483+
///
484+
/// Use [`PathEntry::is_symlink`] to get the exact file type of the path only.
485+
pub fn is_symlink_or_under_symlink(&self) -> bool {
486+
self.is_symlink() || self.under_symlink_dir
487+
}
488+
476489
/// Whether this path might be a plain text symlink.
477490
///
478491
/// Git may check out symlinks as plain text files that contain the link texts,
@@ -826,6 +839,9 @@ fn list_files_gix(
826839
files.push(PathEntry {
827840
path: file_path,
828841
ty,
842+
// Git index doesn't include files from symlink diretory,
843+
// symlink dirs are handled in `list_files_walk`.
844+
under_symlink_dir: false,
829845
});
830846
}
831847
}
@@ -847,6 +863,10 @@ fn list_files_walk(
847863
) -> CargoResult<()> {
848864
let walkdir = WalkDir::new(path)
849865
.follow_links(true)
866+
// While this is the default, set it explicitly.
867+
// We need walkdir to visit the directory tree in depth-first order,
868+
// so we can ensure a path visited later be under a certain directory.
869+
.contents_first(false)
850870
.into_iter()
851871
.filter_entry(|entry| {
852872
let path = entry.path();
@@ -876,10 +896,27 @@ fn list_files_walk(
876896

877897
true
878898
});
899+
900+
let mut current_symlink_dir = None;
879901
for entry in walkdir {
880902
match entry {
881903
Ok(entry) => {
882904
let file_type = entry.file_type();
905+
906+
match current_symlink_dir.as_ref() {
907+
Some(dir) if entry.path().starts_with(dir) => {
908+
// Still walk under the same parent symlink dir, so keep it
909+
}
910+
Some(_) | None => {
911+
// Not under any parent symlink dir, update the current one.
912+
current_symlink_dir = if file_type.is_dir() && entry.path_is_symlink() {
913+
Some(entry.path().to_path_buf())
914+
} else {
915+
None
916+
};
917+
}
918+
}
919+
883920
if file_type.is_file() || file_type.is_symlink() {
884921
// We follow_links(true) here so check if entry was created from a symlink
885922
let ty = if entry.path_is_symlink() {
@@ -890,6 +927,8 @@ fn list_files_walk(
890927
ret.push(PathEntry {
891928
path: entry.into_path(),
892929
ty,
930+
// This rely on contents_first(false), which walks in depth-first order
931+
under_symlink_dir: current_symlink_dir.is_some(),
893932
});
894933
}
895934
}
@@ -907,6 +946,7 @@ fn list_files_walk(
907946
Some(path) => ret.push(PathEntry {
908947
path: path.to_path_buf(),
909948
ty: FileType::Other,
949+
under_symlink_dir: false,
910950
}),
911951
None => return Err(err.into()),
912952
},

tests/testsuite/package.rs

+9-2
Original file line numberDiff line numberDiff line change
@@ -1339,10 +1339,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
13391339
license-file = "../LICENSE"
13401340
"#,
13411341
)
1342+
.file("original-dir/file", "before")
13421343
.symlink("lib.rs", "isengard/src/lib.rs")
13431344
.symlink("README.md", "isengard/README.md")
13441345
.file(&main_outside_pkg_root, "fn main() {}")
13451346
.symlink(&main_outside_pkg_root, "isengard/src/main.rs")
1347+
.symlink_dir("original-dir", "isengard/symlink-dir")
13461348
});
13471349
git::commit(&repo);
13481350

@@ -1352,6 +1354,7 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
13521354
// * Changes in files outside package root that source files symlink to
13531355
p.change_file("README.md", "after");
13541356
p.change_file("lib.rs", "pub fn after() {}");
1357+
p.change_file("original-dir/file", "after");
13551358
// * Changes in files outside pkg root that `license-file`/`readme` point to
13561359
p.change_file("LICENSE", "after");
13571360
// * When workspace inheritance is involved and changed
@@ -1375,10 +1378,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
13751378
p.cargo("package --workspace --no-verify")
13761379
.with_status(101)
13771380
.with_stderr_data(str![[r#"
1378-
[ERROR] 2 files in the working directory contain changes that were not yet committed into git:
1381+
[ERROR] 4 files in the working directory contain changes that were not yet committed into git:
13791382
13801383
LICENSE
13811384
README.md
1385+
lib.rs
1386+
original-dir/file
13821387
13831388
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
13841389
@@ -1388,7 +1393,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
13881393
p.cargo("package --workspace --no-verify --allow-dirty")
13891394
.with_stderr_data(str![[r#"
13901395
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
1391-
[PACKAGED] 8 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
1396+
[PACKAGED] 9 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
13921397
13931398
"#]])
13941399
.run();
@@ -1411,13 +1416,15 @@ edition = "2021"
14111416
"Cargo.toml.orig",
14121417
"src/lib.rs",
14131418
"src/main.rs",
1419+
"symlink-dir/file",
14141420
"Cargo.lock",
14151421
"LICENSE",
14161422
"README.md",
14171423
],
14181424
[
14191425
("src/lib.rs", str!["pub fn after() {}"]),
14201426
("src/main.rs", str![r#"fn main() { eprintln!("after"); }"#]),
1427+
("symlink-dir/file", str!["after"]),
14211428
("README.md", str!["after"]),
14221429
("LICENSE", str!["after"]),
14231430
("Cargo.toml", cargo_toml),

0 commit comments

Comments
 (0)