Skip to content

Commit 166721d

Browse files
authored
SDK DataLoaders 6: customizable (external) loaders for Python (#5355)
Introduces the new `DataLoaderSettings` business to Python and update examples accordingly (`external_data_loader` & `log_file`). ```bash python examples/python/external_data_loader/main.py --recording-id this-one --entity-path-prefix a/b/c --time sim_time=1000 --time wall_time=1709204046 --sequence sim_frame=42 examples/python/dna/main.py | rerun - ``` ![image](https://github.com/rerun-io/rerun/assets/2910679/bfda567d-3d16-42cd-be8e-8b1a0767a784) Checks: - [x] external loader ran manually (`python loader | rerun`) - [x] external loader via rerun (`rerun xxx.py`) - [x] log_file with external loader (`log_file xxx.py`) --- Part of series of PR to expose configurable `DataLoader`s to our SDKs: - #5327 - #5328 - #5330 - #5337 - #5351 - #5355 - #5361
1 parent 020eab4 commit 166721d

File tree

6 files changed

+152
-33
lines changed

6 files changed

+152
-33
lines changed

crates/re_data_source/src/data_loader/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ pub struct DataLoaderSettings {
3030
// TODO(#5350): actually support this
3131
pub opened_store_id: Option<re_log_types::StoreId>,
3232

33-
/// What should the entity paths be prefixed with?
33+
/// What should the logged entity paths be prefixed with?
3434
pub entity_path_prefix: Option<EntityPath>,
3535

3636
/// At what time(s) should the data be logged to?

examples/python/external_data_loader/main.py

+45-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
# It is up to you whether you make use of that shared recording ID or not.
1515
# If you use it, the data will end up in the same recording as all other plugins interested in
1616
# that file, otherwise you can just create a dedicated recording for it. Or both.
17+
#
18+
# Check out `re_data_source::DataLoaderSettings` documentation for an exhaustive listing of
19+
# the available CLI parameters.
1720
parser = argparse.ArgumentParser(
1821
description="""
1922
This is an example executable data-loader plugin for the Rerun Viewer.
@@ -28,7 +31,23 @@
2831
"""
2932
)
3033
parser.add_argument("filepath", type=str)
31-
parser.add_argument("--recording-id", type=str)
34+
parser.add_argument("--recording-id", type=str, help="optional recommended ID for the recording")
35+
parser.add_argument("--entity-path-prefix", type=str, help="optional prefix for all entity paths")
36+
parser.add_argument(
37+
"--timeless", action="store_true", default=False, help="optionally mark data to be logged as timeless"
38+
)
39+
parser.add_argument(
40+
"--time",
41+
type=str,
42+
action="append",
43+
help="optional timestamps to log at (e.g. `--time sim_time=1709203426`)",
44+
)
45+
parser.add_argument(
46+
"--sequence",
47+
type=str,
48+
action="append",
49+
help="optional sequences to log at (e.g. `--sequence sim_frame=42`)",
50+
)
3251
args = parser.parse_args()
3352

3453

@@ -44,10 +63,34 @@ def main() -> None:
4463
# The most important part of this: log to standard output so the Rerun Viewer can ingest it!
4564
rr.stdout()
4665

66+
set_time_from_args()
67+
68+
if args.entity_path_prefix:
69+
entity_path = f"{args.entity_path_prefix}/{args.filepath}"
70+
else:
71+
entity_path = args.filepath
72+
4773
with open(args.filepath) as file:
4874
body = file.read()
4975
text = f"""## Some Python code\n```python\n{body}\n```\n"""
50-
rr.log(args.filepath, rr.TextDocument(text, media_type=rr.MediaType.MARKDOWN), timeless=True)
76+
rr.log(entity_path, rr.TextDocument(text, media_type=rr.MediaType.MARKDOWN), timeless=args.timeless)
77+
78+
79+
def set_time_from_args() -> None:
80+
if not args.timeless and args.time is not None:
81+
for time_str in args.time:
82+
parts = time_str.split("=")
83+
if len(parts) != 2:
84+
continue
85+
timeline_name, time = parts
86+
rr.set_time_seconds(timeline_name, float(time))
87+
88+
for time_str in args.time:
89+
parts = time_str.split("=")
90+
if len(parts) != 2:
91+
continue
92+
timeline_name, time = parts
93+
rr.set_time_sequence(timeline_name, int(time))
5194

5295

5396
if __name__ == "__main__":

examples/python/log_file/main.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@
3434
for filepath in args.filepaths:
3535
if not args.from_contents:
3636
# Either log the file using its path…
37-
rr.log_file_from_path(filepath)
37+
rr.log_file_from_path(filepath, entity_path_prefix="log_file_example")
3838
else:
3939
# …or using its contents if you already have them loaded for some reason.
4040
try:
4141
with open(filepath, "rb") as file:
42-
rr.log_file_from_contents(filepath, file.read())
42+
rr.log_file_from_contents(filepath, file.read(), entity_path_prefix="log_file_example")
4343
except Exception:
4444
pass
4545

examples/rust/external_data_loader/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ struct Args {
3737
#[argh(option)]
3838
entity_path_prefix: Option<String>,
3939

40-
/// optional mark data to be logged as timeless
40+
/// optionally mark data to be logged as timeless
4141
#[argh(switch)]
4242
timeless: bool,
4343

rerun_py/rerun_sdk/rerun/_log.py

+41-2
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,14 @@ def log_components(
283283
)
284284

285285

286+
# TODO(#3841): expose timepoint settings once we implement stateless APIs
286287
@catch_and_log_exceptions()
287288
def log_file_from_path(
288289
file_path: str | Path,
289290
*,
291+
recording_id: str | None = None,
292+
entity_path_prefix: str | None = None,
293+
timeless: bool | None = None,
290294
recording: RecordingStream | None = None,
291295
) -> None:
292296
r"""
@@ -304,21 +308,40 @@ def log_file_from_path(
304308
file_path:
305309
Path to the file to be logged.
306310
311+
recording_id:
312+
The recommended `RecordingId` to log the data to.
313+
314+
entity_path_prefix:
315+
What should the logged entity paths be prefixed with?
316+
317+
timeless:
318+
Should the logged data be timeless?
319+
307320
recording:
308321
Specifies the [`rerun.RecordingStream`][] to use. If left unspecified,
309322
defaults to the current active data recording, if there is one. See
310323
also: [`rerun.init`][], [`rerun.set_global_data_recording`][].
311324
312325
"""
313326

314-
bindings.log_file_from_path(Path(file_path), recording=recording)
327+
bindings.log_file_from_path(
328+
Path(file_path),
329+
recording_id=recording_id,
330+
entity_path_prefix=entity_path_prefix,
331+
timeless=timeless,
332+
recording=recording,
333+
)
315334

316335

336+
# TODO(cmc): expose timepoint settings once we implement stateless APIs
317337
@catch_and_log_exceptions()
318338
def log_file_from_contents(
319339
file_path: str | Path,
320340
file_contents: bytes,
321341
*,
342+
recording_id: str | None = None,
343+
entity_path_prefix: str | None = None,
344+
timeless: bool | None = None,
322345
recording: RecordingStream | None = None,
323346
) -> None:
324347
r"""
@@ -339,14 +362,30 @@ def log_file_from_contents(
339362
file_contents:
340363
Contents to be logged.
341364
365+
recording_id:
366+
The recommended `RecordingId` to log the data to.
367+
368+
entity_path_prefix:
369+
What should the logged entity paths be prefixed with?
370+
371+
timeless:
372+
Should the logged data be timeless?
373+
342374
recording:
343375
Specifies the [`rerun.RecordingStream`][] to use. If left unspecified,
344376
defaults to the current active data recording, if there is one. See
345377
also: [`rerun.init`][], [`rerun.set_global_data_recording`][].
346378
347379
"""
348380

349-
bindings.log_file_from_contents(Path(file_path), file_contents, recording=recording)
381+
bindings.log_file_from_contents(
382+
Path(file_path),
383+
file_contents,
384+
recording_id=recording_id,
385+
entity_path_prefix=entity_path_prefix,
386+
timeless=timeless,
387+
recording=recording,
388+
)
350389

351390

352391
def escape_entity_path_part(part: str) -> str:

rerun_py/src/python_bridge.rs

+62-25
Original file line numberDiff line numberDiff line change
@@ -967,59 +967,96 @@ fn log_arrow_msg(
967967
#[pyfunction]
968968
#[pyo3(signature = (
969969
file_path,
970+
recording_id = None,
971+
entity_path_prefix = None,
972+
timeless = None,
970973
recording=None,
971974
))]
972975
fn log_file_from_path(
973976
py: Python<'_>,
974977
file_path: std::path::PathBuf,
978+
recording_id: Option<String>,
979+
entity_path_prefix: Option<String>,
980+
timeless: Option<bool>,
975981
recording: Option<&PyRecordingStream>,
976982
) -> PyResult<()> {
977-
let Some(recording) = get_data_recording(recording) else {
978-
return Ok(());
979-
};
980-
981-
let Some(recording_id) = recording.store_info().map(|info| info.store_id.clone()) else {
982-
return Ok(());
983-
};
984-
let settings = rerun::DataLoaderSettings::recommended(recording_id);
985-
986-
recording
987-
.log_file_from_path(&settings, file_path)
988-
.map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
989-
990-
py.allow_threads(flush_garbage_queue);
991-
992-
Ok(())
983+
log_file(
984+
py,
985+
file_path,
986+
None,
987+
recording_id,
988+
entity_path_prefix,
989+
timeless,
990+
recording,
991+
)
993992
}
994993

995994
#[pyfunction]
996995
#[pyo3(signature = (
997996
file_path,
998997
file_contents,
998+
recording_id = None,
999+
entity_path_prefix = None,
1000+
timeless = None,
9991001
recording=None,
10001002
))]
10011003
fn log_file_from_contents(
10021004
py: Python<'_>,
10031005
file_path: std::path::PathBuf,
10041006
file_contents: &[u8],
1007+
recording_id: Option<String>,
1008+
entity_path_prefix: Option<String>,
1009+
timeless: Option<bool>,
1010+
recording: Option<&PyRecordingStream>,
1011+
) -> PyResult<()> {
1012+
log_file(
1013+
py,
1014+
file_path,
1015+
Some(file_contents),
1016+
recording_id,
1017+
entity_path_prefix,
1018+
timeless,
1019+
recording,
1020+
)
1021+
}
1022+
1023+
fn log_file(
1024+
py: Python<'_>,
1025+
file_path: std::path::PathBuf,
1026+
file_contents: Option<&[u8]>,
1027+
recording_id: Option<String>,
1028+
entity_path_prefix: Option<String>,
1029+
timeless: Option<bool>,
10051030
recording: Option<&PyRecordingStream>,
10061031
) -> PyResult<()> {
10071032
let Some(recording) = get_data_recording(recording) else {
10081033
return Ok(());
10091034
};
10101035

1011-
let Some(recording_id) = recording.store_info().map(|info| info.store_id.clone()) else {
1036+
let Some(recording_id) = recording
1037+
.store_info()
1038+
.map(|info| info.store_id.clone())
1039+
.or(recording_id.map(|id| StoreId::from_string(StoreKind::Recording, id)))
1040+
else {
10121041
return Ok(());
10131042
};
1014-
let settings = rerun::DataLoaderSettings::recommended(recording_id);
10151043

1016-
recording
1017-
.log_file_from_contents(
1018-
&settings,
1019-
file_path,
1020-
std::borrow::Cow::Borrowed(file_contents),
1021-
)
1022-
.map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
1044+
let settings = rerun::DataLoaderSettings {
1045+
store_id: recording_id,
1046+
opened_store_id: None,
1047+
entity_path_prefix: entity_path_prefix.map(Into::into),
1048+
timepoint: timeless.unwrap_or(false).then(TimePoint::timeless),
1049+
};
1050+
1051+
if let Some(contents) = file_contents {
1052+
recording
1053+
.log_file_from_contents(&settings, file_path, std::borrow::Cow::Borrowed(contents))
1054+
.map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
1055+
} else {
1056+
recording
1057+
.log_file_from_path(&settings, file_path)
1058+
.map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
1059+
}
10231060

10241061
py.allow_threads(flush_garbage_queue);
10251062

0 commit comments

Comments
 (0)