Skip to content

Commit ad4ab12

Browse files
Buffer writes to archive entries (#4)
## Summary For a large PyTorch wheel, this improves performance by about 20%.
1 parent dc46f8a commit ad4ab12

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

src/entry.rs

+13-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::{
66
borrow::Cow,
77
cmp,
88
collections::VecDeque,
9+
convert::TryFrom,
910
fmt,
1011
io::{Error, ErrorKind, SeekFrom},
1112
marker,
@@ -16,7 +17,7 @@ use std::{
1617
use tokio::{
1718
fs,
1819
fs::{remove_dir_all, remove_file, OpenOptions},
19-
io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt},
20+
io::{self, AsyncRead as Read, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
2021
};
2122

2223
/// A read-only view into an entry of an archive.
@@ -689,22 +690,29 @@ impl<R: Read + Unpin> EntryFields<R> {
689690
}
690691
}
691692
}?;
693+
694+
let size = usize::try_from(self.size).unwrap_or(usize::MAX);
695+
let capacity = cmp::min(size, 128 * 1024);
696+
let mut writer = io::BufWriter::with_capacity(capacity, &mut f);
692697
for io in self.data.drain(..) {
693698
match io {
694699
EntryIo::Data(mut d) => {
695700
let expected = d.limit();
696-
if io::copy(&mut d, &mut f).await? != expected {
701+
if io::copy(&mut d, &mut writer).await? != expected {
697702
return Err(other("failed to write entire file"));
698703
}
699704
}
700705
EntryIo::Pad(d) => {
701706
// TODO: checked cast to i64
702-
let to = SeekFrom::Current(d.limit() as i64);
703-
let size = f.seek(to).await?;
704-
f.set_len(size).await?;
707+
let pad_len = d.limit() as i64;
708+
writer.flush().await?;
709+
let f = writer.get_mut();
710+
let new_size = f.seek(SeekFrom::Current(pad_len)).await?;
711+
f.set_len(new_size).await?;
705712
}
706713
}
707714
}
715+
writer.flush().await?;
708716
Ok::<fs::File, io::Error>(f)
709717
}
710718
.await

0 commit comments

Comments
 (0)