Skip to content

Commit 6001eda

Browse files
authored
Introduce IndexCell (#9226)
### Related * Part of #8635 ### What Introduces a new type, `IndexCell`: ```rs pub struct IndexCell { pub typ: TimeType, pub value: NonMinI64, } ``` And changes `TimePoint` to use it: ```rs pub struct TimePoint(BTreeMap<TimelineName, IndexCell>); ``` ### Naming The thought behind the name `IndexCell` is two-fold: A) It is symmetrical with `DataCell` B) It will allow us to at some point broaden to more types of indices (e.g. UUID) I’d like to rename `TimeType` to `IndexType` at some point. I haven’t planned this in detail though; I mostly want to avoid having to rename the _new_ things I add. ### TODO * [x] Full check ### Future work There are A LOT of tests using `Timeline` and `TimeInt` that would be better seved by using `TimelineName` and `IndexCell`, but this is too much to clean up in this PR. There is also A LOT of (old) parameters of the type `impl TryInt<TimeInt>` which in effect means "An i64 or TimeInt that will be converted to a non-static `TimeInt` via saturating cast". It took me a long time to figure out that this is what it meant. We should come up with a nicer pattern for this. Maybe deprecate `TimePoint::with` and `TimePoint::insert`?
1 parent 9ad0885 commit 6001eda

File tree

24 files changed

+467
-225
lines changed

24 files changed

+467
-225
lines changed

crates/store/re_chunk/src/batcher.rs

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -740,10 +740,11 @@ impl PendingRow {
740740

741741
let timelines = timepoint
742742
.into_iter()
743-
.map(|(timeline, time)| {
744-
let times = ArrowScalarBuffer::from(vec![time.as_i64()]);
745-
let time_column = TimeColumn::new(Some(true), timeline, times);
746-
(*timeline.name(), time_column)
743+
.map(|(timeline_name, cell)| {
744+
let times = ArrowScalarBuffer::from(vec![cell.as_i64()]);
745+
let time_column =
746+
TimeColumn::new(Some(true), Timeline::new(timeline_name, cell.typ()), times);
747+
(timeline_name, time_column)
747748
})
748749
.collect();
749750

@@ -801,9 +802,9 @@ impl PendingRow {
801802
// deterministic: `TimePoint` is backed by a `BTreeMap`.
802803
for row in rows {
803804
let mut hasher = ahash::AHasher::default();
804-
row.timepoint
805-
.timelines()
806-
.for_each(|timeline| timeline.hash(&mut hasher));
805+
row.timepoint.timeline_names().for_each(|timeline| {
806+
<TimelineName as std::hash::Hash>::hash(timeline, &mut hasher);
807+
});
807808

808809
per_timeline_set
809810
.entry(hasher.finish())
@@ -876,10 +877,10 @@ impl PendingRow {
876877
// Look for unsorted timelines -- if we find any, and the chunk is larger than
877878
// the pre-configured `chunk_max_rows_if_unsorted` threshold, then split _even_
878879
// further!
879-
for (&timeline, _) in row_timepoint {
880-
let time_column = timelines
881-
.entry(*timeline.name())
882-
.or_insert_with(|| PendingTimeColumn::new(timeline));
880+
for (&timeline_name, cell) in row_timepoint {
881+
let time_column = timelines.entry(timeline_name).or_insert_with(|| {
882+
PendingTimeColumn::new(Timeline::new(timeline_name, cell.typ()))
883+
});
883884

884885
if !row_ids.is_empty() // just being extra cautious
885886
&& row_ids.len() as u64 >= chunk_max_rows_if_unsorted
@@ -913,11 +914,11 @@ impl PendingRow {
913914

914915
row_ids.push(*row_id);
915916

916-
for (&timeline, &time) in row_timepoint {
917-
let time_column = timelines
918-
.entry(*timeline.name())
919-
.or_insert_with(|| PendingTimeColumn::new(timeline));
920-
time_column.push(time);
917+
for (&timeline_name, &cell) in row_timepoint {
918+
let time_column = timelines.entry(timeline_name).or_insert_with(|| {
919+
PendingTimeColumn::new(Timeline::new(timeline_name, cell.typ()))
920+
});
921+
time_column.push(cell.into());
921922
}
922923

923924
for (component_desc, arrays) in &mut components {

crates/store/re_chunk/src/builder.rs

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use arrow::{array::ArrayRef, datatypes::DataType as ArrowDatatype};
22
use itertools::Itertools as _;
33
use nohash_hasher::IntMap;
44

5-
use re_log_types::{EntityPath, TimeInt, TimePoint, TimeType, Timeline, TimelineName};
5+
use re_log_types::{EntityPath, NonMinI64, TimePoint, Timeline, TimelineName};
66
use re_types_core::{AsComponents, ComponentBatch, ComponentDescriptor, SerializedComponentBatch};
77

88
use crate::{chunk::ChunkComponents, Chunk, ChunkId, ChunkResult, RowId, TimeColumn};
@@ -72,11 +72,11 @@ impl ChunkBuilder {
7272

7373
self.row_ids.push(row_id);
7474

75-
for (timeline, time) in timepoint.into() {
75+
for (timeline, cell) in timepoint.into() {
7676
self.timelines
77-
.entry(*timeline.name())
78-
.or_insert_with(|| TimeColumn::builder(timeline))
79-
.with_row(timeline.typ(), time);
77+
.entry(timeline)
78+
.or_insert_with(|| TimeColumn::builder(Timeline::new(timeline, cell.typ())))
79+
.with_row(cell.value);
8080
}
8181

8282
for (component_name, array) in components {
@@ -387,20 +387,8 @@ impl TimeColumnBuilder {
387387

388388
/// Add a row's worth of time data using the given timestamp.
389389
#[inline]
390-
pub fn with_row(&mut self, typ: TimeType, time: TimeInt) -> &mut Self {
391-
let Self { timeline, times } = self;
392-
393-
if timeline.typ() != typ {
394-
re_log::warn_once!(
395-
"Mixing {:?} and {:?} in the same time column '{}'",
396-
typ,
397-
timeline.typ(),
398-
timeline.name()
399-
);
400-
}
401-
402-
times.push(time.as_i64());
403-
390+
pub fn with_row(&mut self, time: NonMinI64) -> &mut Self {
391+
self.times.push(time.into());
404392
self
405393
}
406394

crates/store/re_chunk/src/chunk.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use nohash_hasher::IntMap;
1414
use re_arrow_util::ArrowArrayDowncastRef as _;
1515
use re_byte_size::SizeBytes as _;
1616
use re_log_types::{
17-
EntityPath, ResolvedTimeRange, Time, TimeInt, TimePoint, Timeline, TimelineName,
17+
EntityPath, NonMinI64, ResolvedTimeRange, Time, TimeInt, TimePoint, Timeline, TimelineName,
1818
};
1919
use re_types_core::{
2020
ComponentDescriptor, ComponentName, DeserializationError, Loggable as _, SerializationError,
@@ -1240,11 +1240,23 @@ impl TimeColumn {
12401240
self.timeline.typ().make_arrow_array(self.times.clone())
12411241
}
12421242

1243+
/// All times in a time column are guaranteed not to have the value `i64::MIN`
1244+
/// (which is reserved for static data).
12431245
#[inline]
12441246
pub fn times_raw(&self) -> &[i64] {
12451247
self.times.as_ref()
12461248
}
12471249

1250+
/// All times in a time column are guaranteed not to have the value `i64::MIN`
1251+
/// (which is reserved for static data).
1252+
#[inline]
1253+
pub fn times_nonmin(&self) -> impl DoubleEndedIterator<Item = NonMinI64> + '_ {
1254+
self.times_raw()
1255+
.iter()
1256+
.copied()
1257+
.map(NonMinI64::saturating_from_i64)
1258+
}
1259+
12481260
#[inline]
12491261
pub fn times(&self) -> impl DoubleEndedIterator<Item = TimeInt> + '_ {
12501262
self.times_raw().iter().copied().map(TimeInt::new_temporal)

crates/store/re_chunk/src/latest_at.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ impl LatestAtQuery {
2929
/// The returned query is guaranteed to never include [`TimeInt::STATIC`].
3030
#[inline]
3131
pub fn new(timeline: TimelineName, at: impl TryInto<TimeInt>) -> Self {
32-
let at = at.try_into().unwrap_or(TimeInt::MIN);
33-
Self { timeline, at }
32+
Self {
33+
timeline,
34+
at: TimeInt::saturated_nonstatic(at),
35+
}
3436
}
3537

3638
#[inline]

crates/store/re_data_loader/src/lib.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,18 +144,19 @@ impl DataLoaderSettings {
144144
args.push("--static".to_owned());
145145
}
146146

147-
for (timeline, time) in timepoint.iter() {
148-
match timeline.typ() {
147+
for (timeline, cell) in timepoint.iter() {
148+
// TODO(#8635): update this
149+
match cell.typ() {
149150
re_log_types::TimeType::Time => {
150151
args.extend([
151152
"--time".to_owned(),
152-
format!("{}={}", timeline.name(), time.as_i64()),
153+
format!("{}={}", timeline, cell.as_i64()),
153154
]);
154155
}
155156
re_log_types::TimeType::Sequence => {
156157
args.extend([
157158
"--sequence".to_owned(),
158-
format!("{}={}", timeline.name(), time.as_i64()),
159+
format!("{}={}", timeline, cell.as_i64()),
159160
]);
160161
}
161162
}

crates/store/re_data_loader/src/loader_archetype.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use itertools::Either;
22

33
use re_chunk::{Chunk, RowId};
4-
use re_log_types::{EntityPath, TimeInt, TimePoint};
4+
use re_log_types::{EntityPath, TimePoint};
55
use re_types::archetypes::{AssetVideo, VideoFrameReference};
66
use re_types::components::VideoTimestamp;
77
use re_types::Archetype;
@@ -67,28 +67,28 @@ impl DataLoader for ArchetypeLoader {
6767
// TODO(cmc): log these once heuristics (I think?) are fixed
6868
if false {
6969
if let Ok(metadata) = filepath.metadata() {
70-
use re_log_types::{Time, Timeline};
70+
use re_log_types::IndexCell;
7171

7272
if let Some(created) = metadata
7373
.created()
7474
.ok()
75-
.and_then(|t| TimeInt::try_from(Time::try_from(t).ok()?).ok())
75+
.and_then(|t| IndexCell::try_from(t).ok())
7676
{
77-
timepoint.insert(Timeline::new_temporal("created_at"), created);
77+
timepoint.insert_index("created_at", created);
7878
}
7979
if let Some(modified) = metadata
8080
.modified()
8181
.ok()
82-
.and_then(|t| TimeInt::try_from(Time::try_from(t).ok()?).ok())
82+
.and_then(|t| IndexCell::try_from(t).ok())
8383
{
84-
timepoint.insert(Timeline::new_temporal("modified_at"), modified);
84+
timepoint.insert_index("modified_at", modified);
8585
}
8686
if let Some(accessed) = metadata
8787
.accessed()
8888
.ok()
89-
.and_then(|t| TimeInt::try_from(Time::try_from(t).ok()?).ok())
89+
.and_then(|t| IndexCell::try_from(t).ok())
9090
{
91-
timepoint.insert(Timeline::new_temporal("accessed_at"), accessed);
91+
timepoint.insert_index("accessed_at", accessed);
9292
}
9393
}
9494
}
@@ -184,7 +184,10 @@ fn load_video(
184184
re_tracing::profile_function!();
185185

186186
let video_timeline = re_log_types::Timeline::new_temporal("video");
187-
timepoint.insert(video_timeline, re_log_types::TimeInt::new_temporal(0));
187+
timepoint.insert_index(
188+
*video_timeline.name(),
189+
re_log_types::IndexCell::ZERO_DURATION,
190+
);
188191

189192
let video_asset = AssetVideo::new(contents);
190193

crates/store/re_data_loader/src/loader_lerobot.rs

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,7 @@ fn log_episode_task(
264264
continue;
265265
};
266266

267-
let mut timepoint = TimePoint::default();
268-
timepoint.insert(*timeline, time_int);
267+
let timepoint = TimePoint::default().with(*timeline, time_int);
269268
let text = TextDocument::new(task.task.clone());
270269
chunk = chunk.with_archetype(row_id, timepoint, &text);
271270

@@ -290,17 +289,14 @@ fn load_episode_images(
290289

291290
let mut chunk = Chunk::builder(observation.into());
292291
let mut row_id = RowId::new();
293-
let mut time_int = TimeInt::ZERO;
294292

295-
for idx in 0..image_bytes.len() {
296-
let img_buffer = image_bytes.value(idx);
293+
for frame_idx in 0..image_bytes.len() {
294+
let img_buffer = image_bytes.value(frame_idx);
297295
let encoded_image = EncodedImage::from_file_contents(img_buffer.to_owned());
298-
let mut timepoint = TimePoint::default();
299-
timepoint.insert(*timeline, time_int);
296+
let timepoint = TimePoint::default().with(*timeline, frame_idx as i64);
300297
chunk = chunk.with_archetype(row_id, timepoint, &encoded_image);
301298

302299
row_id = row_id.next();
303-
time_int = time_int.inc();
304300
}
305301

306302
Ok(std::iter::once(chunk.build().with_context(|| {
@@ -322,19 +318,16 @@ fn load_episode_depth_images(
322318

323319
let mut chunk = Chunk::builder(observation.into());
324320
let mut row_id = RowId::new();
325-
let mut time_int = TimeInt::ZERO;
326321

327-
for idx in 0..image_bytes.len() {
328-
let img_buffer = image_bytes.value(idx);
322+
for frame_idx in 0..image_bytes.len() {
323+
let img_buffer = image_bytes.value(frame_idx);
329324
let depth_image = DepthImage::from_file_contents(img_buffer.to_owned())
330325
.map_err(|err| anyhow!("Failed to decode image: {err}"))?;
331326

332-
let mut timepoint = TimePoint::default();
333-
timepoint.insert(*timeline, time_int);
327+
let timepoint = TimePoint::default().with(*timeline, frame_idx as i64);
334328
chunk = chunk.with_archetype(row_id, timepoint, &depth_image);
335329

336330
row_id = row_id.next();
337-
time_int = time_int.inc();
338331
}
339332

340333
Ok(std::iter::once(chunk.build().with_context(|| {

crates/store/re_dataframe/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ pub use self::external::re_chunk_store::{
1313
};
1414
#[doc(no_inline)]
1515
pub use self::external::re_log_types::{
16-
EntityPath, EntityPathFilter, EntityPathSubs, ResolvedEntityPathFilter, ResolvedTimeRange,
17-
StoreKind, TimeInt, Timeline, TimelineName,
16+
EntityPath, EntityPathFilter, EntityPathSubs, IndexCell, ResolvedEntityPathFilter,
17+
ResolvedTimeRange, StoreKind, TimeInt, Timeline, TimelineName,
1818
};
1919
#[doc(no_inline)]
2020
pub use self::external::re_query::{QueryCache, QueryCacheHandle, StorageEngine};

0 commit comments

Comments
 (0)