Skip to content

Commit 485bd2d

Browse files
committed
Memoize filesystem operations during unpack
1 parent ad4ab12 commit 485bd2d

File tree

3 files changed

+40
-25
lines changed

3 files changed

+40
-25
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ contents are never required to be entirely resident in memory all at once.
2727
filetime = "0.2"
2828
futures-core = "0.3"
2929
portable-atomic = "1"
30+
rustc-hash = "2.1.0"
3031
tokio = { version = "1", features = ["fs", "io-util", "rt"] }
3132
tokio-stream = "0.1"
3233

src/archive.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use portable_atomic::{AtomicU64, Ordering};
2+
use rustc_hash::FxHashSet;
13
use std::{
24
cmp,
35
collections::VecDeque,
@@ -6,10 +8,6 @@ use std::{
68
sync::Arc,
79
task::{Context, Poll},
810
};
9-
use portable_atomic::{
10-
AtomicU64,
11-
Ordering,
12-
};
1311
use tokio::{
1412
fs,
1513
io::{self, AsyncRead as Read, AsyncReadExt},
@@ -244,6 +242,9 @@ impl<R: Read + Unpin> Archive<R> {
244242
.await
245243
.unwrap_or_else(|_| dst.to_path_buf());
246244

245+
// Memoize filesystem calls to canonicalize paths.
246+
let mut targets = FxHashSet::default();
247+
247248
// Delay any directory entries until the end (they will be created if needed by
248249
// descendants), to ensure that directory permissions do not interfer with descendant
249250
// extraction.
@@ -253,12 +254,12 @@ impl<R: Read + Unpin> Archive<R> {
253254
if file.header().entry_type() == crate::EntryType::Directory {
254255
directories.push(file);
255256
} else {
256-
file.unpack_in(&dst).await?;
257+
file.fields.unpack_in(&dst, &mut targets).await?;
257258
}
258259
}
259260

260261
for mut dir in directories {
261-
dir.unpack_in(&dst).await?;
262+
dir.fields.unpack_in(&dst, &mut targets).await?;
262263
}
263264

264265
Ok(())

src/entry.rs

+32-19
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::{
22
error::TarError, header::bytes2path, other, pax::pax_extensions, Archive, Header, PaxExtensions,
33
};
44
use filetime::{self, FileTime};
5+
use rustc_hash::FxHashSet;
56
use std::{
67
borrow::Cow,
78
cmp,
@@ -26,7 +27,7 @@ use tokio::{
2627
/// be inspected. It acts as a file handle by implementing the Reader trait. An
2728
/// entry cannot be rewritten once inserted into an archive.
2829
pub struct Entry<R: Read + Unpin> {
29-
fields: EntryFields<R>,
30+
pub(crate) fields: EntryFields<R>,
3031
_ignored: marker::PhantomData<Archive<R>>,
3132
}
3233

@@ -274,7 +275,9 @@ impl<R: Read + Unpin> Entry<R> {
274275
/// # Ok(()) }) }
275276
/// ```
276277
pub async fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Option<PathBuf>> {
277-
self.fields.unpack_in(dst.as_ref()).await
278+
self.fields
279+
.unpack_in(dst.as_ref(), &mut FxHashSet::default())
280+
.await
278281
}
279282

280283
/// Indicate whether extended file attributes (xattrs on Unix) are preserved
@@ -426,7 +429,20 @@ impl<R: Read + Unpin> EntryFields<R> {
426429
Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap())))
427430
}
428431

429-
async fn unpack_in(&mut self, dst: &Path) -> io::Result<Option<PathBuf>> {
432+
/// Unpack the [`Entry`] into the specified destination.
433+
///
434+
/// It's assumed that `dst` is already canonicalized, and that the memoized set of validated
435+
/// paths are tied to `dst`.
436+
pub(crate) async fn unpack_in(
437+
&mut self,
438+
dst: &Path,
439+
memo: &mut FxHashSet<PathBuf>,
440+
) -> io::Result<Option<PathBuf>> {
441+
// It's assumed that `dst` is already canonicalized.
442+
debug_assert!(dst
443+
.canonicalize()
444+
.is_ok_and(|canon_target| canon_target == dst));
445+
430446
// Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
431447
// * Leading '/'s are trimmed. For example, `///test` is treated as
432448
// `test`.
@@ -478,13 +494,16 @@ impl<R: Read + Unpin> EntryFields<R> {
478494
None => return Ok(None),
479495
};
480496

481-
self.ensure_dir_created(dst, parent)
482-
.await
483-
.map_err(|e| TarError::new(&format!("failed to create `{}`", parent.display()), e))?;
484-
485-
let canon_target = self.validate_inside_dst(dst, parent).await?;
497+
// Validate the parent, if we haven't seen it yet.
498+
if !memo.contains(parent) {
499+
self.ensure_dir_created(dst, parent).await.map_err(|e| {
500+
TarError::new(&format!("failed to create `{}`", parent.display()), e)
501+
})?;
502+
self.validate_inside_dst(dst, parent).await?;
503+
memo.insert(parent.to_path_buf());
504+
}
486505

487-
self.unpack(Some(&canon_target), &file_dst)
506+
self.unpack(Some(&parent), &file_dst)
488507
.await
489508
.map_err(|e| TarError::new(&format!("failed to unpack `{}`", file_dst.display()), e))?;
490509

@@ -890,32 +909,26 @@ impl<R: Read + Unpin> EntryFields<R> {
890909
Ok(())
891910
}
892911

893-
async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> {
912+
async fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<()> {
894913
// Abort if target (canonical) parent is outside of `dst`
895914
let canon_parent = file_dst.canonicalize().map_err(|err| {
896915
Error::new(
897916
err.kind(),
898917
format!("{} while canonicalizing {}", err, file_dst.display()),
899918
)
900919
})?;
901-
let canon_target = dst.canonicalize().map_err(|err| {
902-
Error::new(
903-
err.kind(),
904-
format!("{} while canonicalizing {}", err, dst.display()),
905-
)
906-
})?;
907-
if !canon_parent.starts_with(&canon_target) {
920+
if !canon_parent.starts_with(&dst) {
908921
let err = TarError::new(
909922
&format!(
910923
"trying to unpack outside of destination path: {}",
911-
canon_target.display()
924+
dst.display()
912925
),
913926
// TODO: use ErrorKind::InvalidInput here? (minor breaking change)
914927
Error::new(ErrorKind::Other, "Invalid argument"),
915928
);
916929
return Err(err.into());
917930
}
918-
Ok(canon_target)
931+
Ok(())
919932
}
920933
}
921934

0 commit comments

Comments
 (0)