Skip to content

Commit b1380d3

Browse files
authored
Add into_inner() on Rust writer (#1314)
### Changelog Add an `into_inner()` method on the Rust writer to get back the underlying stream. ### Docs https://linear.app/foxglove/issue/FG-9969/[rust-sdk]-cant-close-mcap-writer ### Description Some I/O errors only show up when the output file is closed. This change allows users to check for such errors. <!-- Describe the problem, what has changed, and motivation behind those changes. Pretend you are advocating for this change and the reader is skeptical. --> <!-- If necessary, link relevant Linear or Github issues. Use `Fixes: foxglove/repo#1234` to auto-close the Github issue or Fixes: FG-### for Linear isses. --> Fixes: FG-9969
1 parent fe9dc10 commit b1380d3

File tree

2 files changed

+131
-31
lines changed

2 files changed

+131
-31
lines changed

rust/src/io_utils.rs

+4
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ impl<W> CountingCrcWriter<W> {
3737
pub fn finalize(self) -> (W, Hasher) {
3838
(self.inner, self.hasher)
3939
}
40+
41+
pub fn current_checksum(&self) -> u32 {
42+
self.hasher.clone().finalize()
43+
}
4044
}
4145

4246
impl<W: Write> Write for CountingCrcWriter<W> {

rust/src/write.rs

+127-31
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use std::{
1010
use bimap::BiHashMap;
1111
use binrw::prelude::*;
1212
use byteorder::{WriteBytesExt, LE};
13+
#[cfg(feature = "zstd")]
14+
use zstd::stream::{raw as zraw, zio};
1315

1416
use crate::{
1517
chunk_sink::{ChunkMode, ChunkSink},
@@ -311,6 +313,7 @@ struct SchemaContent<'a> {
311313
/// and check for errors when done; otherwise the result will be unwrapped on drop.
312314
pub struct Writer<W: Write + Seek> {
313315
writer: Option<WriteMode<W>>,
316+
is_finished: bool,
314317
chunk_mode: ChunkMode,
315318
options: WriteOptions,
316319
schemas: BiHashMap<SchemaContent<'static>, u16>,
@@ -363,6 +366,7 @@ impl<W: Write + Seek> Writer<W> {
363366

364367
Ok(Self {
365368
writer: Some(WriteMode::Raw(writer)),
369+
is_finished: false,
366370
options: opts,
367371
chunk_mode,
368372
schemas: Default::default(),
@@ -667,10 +671,8 @@ impl<W: Write + Seek> Writer<W> {
667671
) -> McapResult<()> {
668672
self.finish_chunk()?;
669673

670-
let prev_writer = self.writer.take().expect(Self::WHERE_WRITER);
671-
672-
let WriteMode::Raw(w) = prev_writer else {
673-
panic!(
674+
let WriteMode::Raw(w) = self.writer.take().expect(Self::WRITER_IS_NONE) else {
675+
unreachable!(
674676
"since finish_chunk was called, write mode is guaranteed to be raw at this point"
675677
);
676678
};
@@ -786,31 +788,35 @@ impl<W: Write + Seek> Writer<W> {
786788
Ok(())
787789
}
788790

789-
/// `.expect()` message when we go to write and self.writer is `None`,
790-
/// which should only happen when [`Writer::finish()`] was called.
791-
const WHERE_WRITER: &'static str = "Trying to write a record on a finished MCAP";
791+
const WRITER_IS_NONE: &'static str = "unreachable: self.writer should never be None";
792+
793+
fn assert_not_finished(&self) {
794+
assert!(
795+
!self.is_finished,
796+
"{}",
797+
"Trying to write a record on a finished MCAP"
798+
);
799+
}
792800

793801
/// Starts a new chunk if we haven't done so already.
794802
fn start_chunk(&mut self) -> McapResult<&mut ChunkWriter<W>> {
803+
self.assert_not_finished();
804+
795805
// It is not possible to start writing a chunk if we're still writing an attachment. Return
796806
// an error instead.
797807
if let Some(WriteMode::Attachment(..)) = self.writer {
798808
return Err(McapError::AttachmentNotInProgress);
799809
}
800810

801-
// Some Rust tricky: we can't move the writer out of self.writer,
802-
// leave that empty for a bit, and then replace it with a ChunkWriter.
803-
// (That would leave it in an unspecified state if we bailed here!)
804-
// Instead briefly swap it out for a null writer while we set up the chunker
805-
// The writer will only be None if finish() was called.
806811
assert!(
807812
self.options.use_chunks,
808813
"Trying to write to a chunk when chunking is disabled"
809814
);
810815

811-
let prev_writer = self.writer.take().expect(Self::WHERE_WRITER);
812-
813-
self.writer = Some(match prev_writer {
816+
// Rust forbids moving values out of a &mut reference. We made self.writer an Option so we
817+
// can work around this by using take() to temporarily replace it with None while we
818+
// construct the ChunkWriter.
819+
self.writer = Some(match self.writer.take().expect(Self::WRITER_IS_NONE) {
814820
WriteMode::Raw(w) => {
815821
// It's chunkin time.
816822
WriteMode::Chunk(ChunkWriter::new(
@@ -832,16 +838,15 @@ impl<W: Write + Seek> Writer<W> {
832838

833839
/// Finish the current chunk, if we have one.
834840
fn finish_chunk(&mut self) -> McapResult<&mut CountingCrcWriter<W>> {
841+
self.assert_not_finished();
835842
// If we're currently writing an attachment then we're not writing a chunk. Return an
836843
// error instead.
837844
if let Some(WriteMode::Attachment(..)) = self.writer {
838845
return Err(McapError::AttachmentNotInProgress);
839846
}
840847

841-
// See above
842-
let prev_writer = self.writer.take().expect(Self::WHERE_WRITER);
843-
844-
self.writer = Some(match prev_writer {
848+
// See start_chunk() for why we use take() here.
849+
self.writer = Some(match self.writer.take().expect(Self::WRITER_IS_NONE) {
845850
WriteMode::Chunk(c) => {
846851
let (w, mode, index) = c.finish()?;
847852
self.chunk_indexes.push(index);
@@ -862,28 +867,29 @@ impl<W: Write + Seek> Writer<W> {
862867
///
863868
/// Subsequent calls to other methods will panic.
864869
pub fn finish(&mut self) -> McapResult<()> {
865-
if self.writer.is_none() {
870+
if self.is_finished {
866871
// We already called finish().
867872
// Maybe we're dropping after the user called it?
868873
return Ok(());
869874
}
870875

871876
// Finish any chunk we were working on and update stats, indexes, etc.
872877
self.finish_chunk()?;
878+
self.is_finished = true;
873879

874880
// Grab the writer - self.writer becoming None makes subsequent writes fail.
875-
let writer = match self.writer.take() {
881+
let writer = match &mut self.writer {
876882
// We called finish_chunk() above, so we're back to raw writes for
877883
// the summary section.
878884
Some(WriteMode::Raw(w)) => w,
879885
_ => unreachable!(),
880886
};
881-
let (mut writer, data_section_crc) = writer.finalize();
882-
let data_section_crc = data_section_crc.finalize();
887+
let data_section_crc = writer.current_checksum();
888+
let writer = writer.get_mut();
883889

884890
// We're done with the data secton!
885891
write_record(
886-
&mut writer,
892+
writer,
887893
&Record::DataEnd(records::DataEnd { data_section_crc }),
888894
)?;
889895

@@ -952,8 +958,8 @@ impl<W: Write + Seek> Writer<W> {
952958
}
953959

954960
// Write all schemas.
955-
let schemas_start = summary_start;
956961
if self.options.repeat_schemas && !all_schemas.is_empty() {
962+
let schemas_start: u64 = summary_start;
957963
for schema in all_schemas.iter() {
958964
write_record(&mut ccw, schema)?;
959965
}
@@ -1053,14 +1059,30 @@ impl<W: Write + Seek> Writer<W> {
10531059
ccw.write_u64::<LE>(summary_start)?;
10541060
ccw.write_u64::<LE>(summary_offset_start)?;
10551061

1056-
let (mut writer, summary_crc) = ccw.finalize();
1062+
let (writer, summary_crc) = ccw.finalize();
10571063

10581064
writer.write_u32::<LE>(summary_crc.finalize())?;
10591065

10601066
writer.write_all(MAGIC)?;
10611067
writer.flush()?;
10621068
Ok(())
10631069
}
1070+
1071+
/// Consumes this writer, returning the underlying stream. Unless [`Self::finish()`] was called
1072+
/// first, the underlying stream __will not contain a complete MCAP.__
1073+
///
1074+
/// Use this if you wish to handle any errors returned when the underlying stream is closed. In
1075+
/// particular, if using [`std::fs::File`], you may wish to call [`std::fs::File::sync_all()`]
1076+
/// to ensure all data was sent to the filesystem.
1077+
pub fn into_inner(mut self) -> W {
1078+
self.is_finished = true;
1079+
// Peel away all the layers of the writer to get the underlying stream.
1080+
match self.writer.take().expect(Self::WRITER_IS_NONE) {
1081+
WriteMode::Raw(w) => w.finalize().0,
1082+
WriteMode::Attachment(w) => w.writer.finalize().0.finalize().0,
1083+
WriteMode::Chunk(w) => w.compressor.finalize().0.into_inner().finalize().0.inner,
1084+
}
1085+
}
10641086
}
10651087

10661088
impl<W: Write + Seek> Drop for Writer<W> {
@@ -1071,8 +1093,10 @@ impl<W: Write + Seek> Drop for Writer<W> {
10711093

10721094
enum Compressor<W: Write> {
10731095
Null(W),
1096+
// zstd's Encoder wrapper doesn't let us get the inner writer without calling finish(), so use
1097+
// zio::Writer directly instead.
10741098
#[cfg(feature = "zstd")]
1075-
Zstd(zstd::Encoder<'static, W>),
1099+
Zstd(zio::Writer<W, zraw::Encoder<'static>>),
10761100
#[cfg(feature = "lz4")]
10771101
Lz4(lz4::Encoder<W>),
10781102
}
@@ -1082,7 +1106,10 @@ impl<W: Write> Compressor<W> {
10821106
Ok(match self {
10831107
Compressor::Null(w) => w,
10841108
#[cfg(feature = "zstd")]
1085-
Compressor::Zstd(w) => w.finish()?,
1109+
Compressor::Zstd(mut w) => {
1110+
w.finish()?;
1111+
w.into_inner().0
1112+
}
10861113
#[cfg(feature = "lz4")]
10871114
Compressor::Lz4(w) => {
10881115
let (output, result) = w.finish();
@@ -1091,6 +1118,16 @@ impl<W: Write> Compressor<W> {
10911118
}
10921119
})
10931120
}
1121+
1122+
fn into_inner(self) -> W {
1123+
match self {
1124+
Compressor::Null(w) => w,
1125+
#[cfg(feature = "zstd")]
1126+
Compressor::Zstd(w) => w.into_inner().0,
1127+
#[cfg(feature = "lz4")]
1128+
Compressor::Lz4(w) => w.finish().0,
1129+
}
1130+
}
10941131
}
10951132

10961133
impl<W: Write> Write for Compressor<W> {
@@ -1178,10 +1215,11 @@ impl<W: Write + Seek> ChunkWriter<W> {
11781215
#[cfg(feature = "zstd")]
11791216
Some(Compression::Zstd) => {
11801217
#[allow(unused_mut)]
1181-
let mut enc = zstd::Encoder::new(sink, 0)?;
1218+
let mut enc = zraw::Encoder::with_dictionary(0, &[])?;
1219+
// Enable multithreaded encoding on non-WASM targets.
11821220
#[cfg(not(target_arch = "wasm32"))]
1183-
enc.multithread(num_cpus::get_physical() as u32)?;
1184-
Compressor::Zstd(enc)
1221+
enc.set_parameter(zraw::CParameter::NbWorkers(num_cpus::get_physical() as u32))?;
1222+
Compressor::Zstd(zio::Writer::new(sink, enc))
11851223
}
11861224
#[cfg(feature = "lz4")]
11871225
Some(Compression::Lz4) => Compressor::Lz4(
@@ -1510,4 +1548,62 @@ mod tests {
15101548
};
15111549
assert!(matches!(too_many, McapError::TooManySchemas));
15121550
}
1551+
1552+
#[test]
1553+
#[should_panic(expected = "Trying to write a record on a finished MCAP")]
1554+
fn panics_if_write_called_after_finish() {
1555+
let file = std::io::Cursor::new(Vec::new());
1556+
let mut writer = Writer::new(file).expect("failed to construct writer");
1557+
writer.finish().expect("failed to finish writer");
1558+
1559+
let custom_channel = std::sync::Arc::new(crate::Channel {
1560+
id: 1,
1561+
topic: "chat".into(),
1562+
message_encoding: "json".into(),
1563+
metadata: BTreeMap::new(),
1564+
schema: None,
1565+
});
1566+
1567+
writer
1568+
.write(&crate::Message {
1569+
channel: custom_channel.clone(),
1570+
sequence: 0,
1571+
log_time: 0,
1572+
publish_time: 0,
1573+
data: Cow::Owned(Vec::new()),
1574+
})
1575+
.expect("could not write message");
1576+
}
1577+
1578+
#[test]
1579+
fn writes_message_and_checks_stream_length() {
1580+
let file = std::io::Cursor::new(Vec::new());
1581+
let mut writer = Writer::new(file).expect("failed to construct writer");
1582+
1583+
let custom_channel = std::sync::Arc::new(crate::Channel {
1584+
id: 1,
1585+
topic: "chat".into(),
1586+
message_encoding: "json".into(),
1587+
metadata: BTreeMap::new(),
1588+
schema: None,
1589+
});
1590+
1591+
writer
1592+
.write(&crate::Message {
1593+
channel: custom_channel.clone(),
1594+
sequence: 0,
1595+
log_time: 0,
1596+
publish_time: 0,
1597+
data: Cow::Owned(Vec::new()),
1598+
})
1599+
.expect("could not write message");
1600+
1601+
writer.finish().expect("failed to finish writer");
1602+
1603+
let output_len = writer
1604+
.into_inner()
1605+
.stream_position()
1606+
.expect("failed to get stream position");
1607+
assert_eq!(output_len, 487);
1608+
}
15131609
}

0 commit comments

Comments
 (0)