Skip to content

Commit ffe0a30

Browse files
authored
SDK DataLoaders 1: barebones Rust support (#5327)
Exposes raw `DataLoader`s to the Rust SDK through 2 new methods: `RecordingStream::log_file_from_path` & `RecordingStream::log_file_from_contents`. Everything streams asynchronously, as expected. There's no way to configure the behavior of the `DataLoader` at all, yet. That comes next. ```rust rec.log_file_from_path(filepath)?; ``` ![image](https://github.com/rerun-io/rerun/assets/2910679/fac1514a-a00b-40c3-8950-df076080e1fc) This highlighted some brittleness on the shutdown path, see: - #5335 --- Part of series of PR to expose configurable `DataLoader`s to our SDKs: - #5327 - #5328 - #5330 - #5337
1 parent 07c5286 commit ffe0a30

File tree

10 files changed

+278
-16
lines changed

10 files changed

+278
-16
lines changed

Cargo.lock

+25-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/re_data_source/src/load_file.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ pub fn load_from_path(
2727
if !path.exists() {
2828
return Err(std::io::Error::new(
2929
std::io::ErrorKind::NotFound,
30-
"path does not exist: {path:?}",
30+
format!("path does not exist: {path:?}"),
3131
)
3232
.into());
3333
}

crates/re_log_types/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ pub enum FileSource {
312312
Cli,
313313
DragAndDrop,
314314
FileDialog,
315+
Sdk,
315316
}
316317

317318
/// The source of a recording or blueprint.
@@ -358,6 +359,7 @@ impl std::fmt::Display for StoreSource {
358359
FileSource::Cli => write!(f, "File via CLI"),
359360
FileSource::DragAndDrop => write!(f, "File via drag-and-drop"),
360361
FileSource::FileDialog => write!(f, "File via file dialog"),
362+
FileSource::Sdk => write!(f, "File via SDK"),
361363
},
362364
Self::Viewer => write!(f, "Viewer-generated"),
363365
Self::Other(string) => format!("{string:?}").fmt(f), // put it in quotes

crates/re_sdk/Cargo.toml

+7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ all-features = true
2121
[features]
2222
default = []
2323

24+
## Support for using Rerun's data-loaders directly from the SDK.
25+
##
26+
## See our `log_file` example and <https://www.rerun.io/docs/howto/open-any-file>
27+
## for more information.
28+
data_loaders = ["dep:re_data_source", "dep:re_smart_channel"]
29+
2430
## Support serving a web viewer over HTTP.
2531
##
2632
## Enabling this inflates the binary size quite a bit, since it embeds the viewer wasm.
@@ -58,6 +64,7 @@ thiserror.workspace = true
5864

5965
# Optional dependencies
6066

67+
re_data_source = { workspace = true, optional = true }
6168
re_smart_channel = { workspace = true, optional = true }
6269
re_ws_comms = { workspace = true, optional = true }
6370
re_web_viewer_server = { workspace = true, optional = true }

crates/re_sdk/src/recording_stream.rs

+143-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::sync::{atomic::AtomicI64, Arc};
55
use ahash::HashMap;
66
use crossbeam::channel::{Receiver, Sender};
77

8+
use parking_lot::Mutex;
89
use re_log_types::{
910
ApplicationId, ArrowChunkReleaseCallback, DataCell, DataCellError, DataRow, DataTable,
1011
DataTableBatcher, DataTableBatcherConfig, DataTableBatcherError, EntityPath, LogMsg, RowId,
@@ -61,7 +62,7 @@ pub enum RecordingStreamError {
6162
#[error("Failed to spawn background thread '{name}': {err}")]
6263
SpawnThread {
6364
/// Name of the thread
64-
name: &'static str,
65+
name: String,
6566

6667
/// Inner error explaining why the thread failed to spawn.
6768
err: std::io::Error,
@@ -79,6 +80,11 @@ pub enum RecordingStreamError {
7980
/// An error that can occur because a row in the store has inconsistent columns.
8081
#[error(transparent)]
8182
DataReadError(#[from] re_log_types::DataReadError),
83+
84+
/// An error occurred while attempting to use a [`re_data_source::DataLoader`].
85+
#[cfg(feature = "data_loaders")]
86+
#[error(transparent)]
87+
DataLoaderError(#[from] re_data_source::DataLoaderError),
8288
}
8389

8490
/// Results that can occur when creating/manipulating a [`RecordingStream`].
@@ -623,6 +629,12 @@ struct RecordingStreamInner {
623629
batcher: DataTableBatcher,
624630
batcher_to_sink_handle: Option<std::thread::JoinHandle<()>>,
625631

632+
/// Keeps track of the top-level threads that were spawned in order to execute the `DataLoader`
633+
/// machinery in the context of this `RecordingStream`.
634+
///
635+
/// See [`RecordingStream::log_file_from_path`] and [`RecordingStream::log_file_from_contents`].
636+
dataloader_handles: Mutex<Vec<std::thread::JoinHandle<()>>>,
637+
626638
pid_at_creation: u32,
627639
}
628640

@@ -633,6 +645,16 @@ impl Drop for RecordingStreamInner {
633645
return;
634646
}
635647

648+
// Run all pending top-level `DataLoader` threads that were started from the SDK to completion.
649+
//
650+
// TODO(cmc): At some point we might want to make it configurable, though I cannot really
651+
// think of a use case where you'd want to drop those threads immediately upon
652+
// disconnection.
653+
let dataloader_handles = std::mem::take(&mut *self.dataloader_handles.lock());
654+
for handle in dataloader_handles {
655+
handle.join().ok();
656+
}
657+
636658
// NOTE: The command channel is private, if we're here, nothing is currently capable of
637659
// sending data down the pipeline.
638660
self.batcher.flush_blocking();
@@ -679,7 +701,10 @@ impl RecordingStreamInner {
679701
let batcher = batcher.clone();
680702
move || forwarding_thread(info, sink, cmds_rx, batcher.tables(), on_release)
681703
})
682-
.map_err(|err| RecordingStreamError::SpawnThread { name: NAME, err })?
704+
.map_err(|err| RecordingStreamError::SpawnThread {
705+
name: NAME.into(),
706+
err,
707+
})?
683708
};
684709

685710
Ok(RecordingStreamInner {
@@ -688,6 +713,7 @@ impl RecordingStreamInner {
688713
cmds_tx,
689714
batcher,
690715
batcher_to_sink_handle: Some(batcher_to_sink_handle),
716+
dataloader_handles: Mutex::new(Vec::new()),
691717
pid_at_creation: std::process::id(),
692718
})
693719
}
@@ -989,6 +1015,103 @@ impl RecordingStream {
9891015

9901016
Ok(())
9911017
}
1018+
1019+
/// Logs the file at the given `path` using all [`re_data_source::DataLoader`]s available.
1020+
///
1021+
/// A single `path` might be handled by more than one loader.
1022+
///
1023+
/// This method blocks until either at least one [`re_data_source::DataLoader`] starts
1024+
/// streaming data in or all of them fail.
1025+
///
1026+
/// See <https://www.rerun.io/docs/howto/open-any-file> for more information.
1027+
#[cfg(feature = "data_loaders")]
1028+
pub fn log_file_from_path(
1029+
&self,
1030+
filepath: impl AsRef<std::path::Path>,
1031+
) -> RecordingStreamResult<()> {
1032+
self.log_file(filepath, None)
1033+
}
1034+
1035+
/// Logs the given `contents` using all [`re_data_source::DataLoader`]s available.
1036+
///
1037+
/// A single `path` might be handled by more than one loader.
1038+
///
1039+
/// This method blocks until either at least one [`re_data_source::DataLoader`] starts
1040+
/// streaming data in or all of them fail.
1041+
///
1042+
/// See <https://www.rerun.io/docs/howto/open-any-file> for more information.
1043+
#[cfg(feature = "data_loaders")]
1044+
pub fn log_file_from_contents(
1045+
&self,
1046+
filepath: impl AsRef<std::path::Path>,
1047+
contents: std::borrow::Cow<'_, [u8]>,
1048+
) -> RecordingStreamResult<()> {
1049+
self.log_file(filepath, Some(contents))
1050+
}
1051+
1052+
#[cfg(feature = "data_loaders")]
1053+
fn log_file(
1054+
&self,
1055+
filepath: impl AsRef<std::path::Path>,
1056+
contents: Option<std::borrow::Cow<'_, [u8]>>,
1057+
) -> RecordingStreamResult<()> {
1058+
let filepath = filepath.as_ref();
1059+
let has_contents = contents.is_some();
1060+
1061+
let (tx, rx) = re_smart_channel::smart_channel(
1062+
re_smart_channel::SmartMessageSource::Sdk,
1063+
re_smart_channel::SmartChannelSource::File(filepath.into()),
1064+
);
1065+
1066+
let Some(store_id) = &self.store_info().map(|info| info.store_id.clone()) else {
1067+
// There's no recording.
1068+
return Ok(());
1069+
};
1070+
if let Some(contents) = contents {
1071+
re_data_source::load_from_file_contents(
1072+
store_id,
1073+
re_log_types::FileSource::Sdk,
1074+
filepath,
1075+
contents,
1076+
&tx,
1077+
)?;
1078+
} else {
1079+
re_data_source::load_from_path(store_id, re_log_types::FileSource::Sdk, filepath, &tx)?;
1080+
}
1081+
drop(tx);
1082+
1083+
// We can safely ignore the error on `recv()` as we're in complete control of both ends of
1084+
// the channel.
1085+
let thread_name = if has_contents {
1086+
format!("log_file_from_contents({filepath:?})")
1087+
} else {
1088+
format!("log_file_from_path({filepath:?})")
1089+
};
1090+
let handle = std::thread::Builder::new()
1091+
.name(thread_name.clone())
1092+
.spawn({
1093+
let this = self.clone();
1094+
move || {
1095+
while let Some(msg) = rx.recv().ok().and_then(|msg| msg.into_data()) {
1096+
this.record_msg(msg);
1097+
}
1098+
}
1099+
})
1100+
.map_err(|err| RecordingStreamError::SpawnThread {
1101+
name: thread_name,
1102+
err,
1103+
})?;
1104+
1105+
debug_assert!(
1106+
self.inner.is_some(),
1107+
"recording should always be fully init at this stage"
1108+
);
1109+
if let Some(inner) = self.inner.as_ref() {
1110+
inner.dataloader_handles.lock().push(handle);
1111+
}
1112+
1113+
Ok(())
1114+
}
9921115
}
9931116

9941117
#[allow(clippy::needless_pass_by_value)]
@@ -1450,6 +1573,22 @@ impl RecordingStream {
14501573
/// terms of data durability and ordering.
14511574
/// See [`Self::set_sink`] for more information.
14521575
pub fn disconnect(&self) {
1576+
let Some(this) = &*self.inner else {
1577+
re_log::warn_once!("Recording disabled - call to disconnect() ignored");
1578+
return;
1579+
};
1580+
1581+
// When disconnecting, we need to make sure that pending top-level `DataLoader` threads that
1582+
// were started from the SDK run to completion.
1583+
//
1584+
// TODO(cmc): At some point we might want to make it configurable, though I cannot really
1585+
// think of a use case where you'd want to drop those threads immediately upon
1586+
// disconnection.
1587+
let dataloader_handles = std::mem::take(&mut *this.dataloader_handles.lock());
1588+
for handle in dataloader_handles {
1589+
handle.join().ok();
1590+
}
1591+
14531592
self.set_sink(Box::new(crate::sink::BufferedSink::new()));
14541593
}
14551594
}
@@ -1465,11 +1604,13 @@ impl fmt::Debug for RecordingStream {
14651604
cmds_tx: _,
14661605
batcher: _,
14671606
batcher_to_sink_handle: _,
1607+
dataloader_handles,
14681608
pid_at_creation,
14691609
}) => f
14701610
.debug_struct("RecordingStream")
14711611
.field("info", &info)
14721612
.field("tick", &tick)
1613+
.field("pending_dataloaders", &dataloader_handles.lock().len())
14731614
.field("pid_at_creation", &pid_at_creation)
14741615
.finish_non_exhaustive(),
14751616
None => write!(f, "RecordingStream {{ disabled }}"),

crates/re_viewer/src/viewer_analytics/event.rs

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ pub fn open_recording(
6666
re_log_types::FileSource::Cli => "file_cli".to_owned(),
6767
re_log_types::FileSource::DragAndDrop => "file_drag_and_drop".to_owned(),
6868
re_log_types::FileSource::FileDialog => "file_dialog".to_owned(),
69+
re_log_types::FileSource::Sdk => "file_sdk".to_owned(),
6970
},
7071
S::Viewer => "viewer".to_owned(),
7172
S::Other(other) => other.clone(),

0 commit comments

Comments
 (0)