Skip to content

Commit 9b8dbe6

Browse files
authored
Make store aware of full component descriptors (#9842)
1 parent b383dba commit 9b8dbe6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+646
-429
lines changed

Cargo.lock

-1
Original file line numberDiff line numberDiff line change
@@ -6923,7 +6923,6 @@ dependencies = [
69236923
"re_query",
69246924
"re_sorbet",
69256925
"re_tracing",
6926-
"re_types",
69276926
"re_types_core",
69286927
"similar-asserts",
69296928
"tokio",

crates/store/re_chunk/examples/latest_at.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ fn main() -> anyhow::Result<()> {
1212
let query = LatestAtQuery::new(TimelineName::new("frame"), 4);
1313

1414
// Find all relevant data for a query:
15-
let chunk = chunk.latest_at(&query, MyPoint::name());
16-
eprintln!("{:?} @ {query:?}:\n{chunk}", MyPoint::name());
15+
let chunk = chunk.latest_at(&query, &MyPoint::descriptor());
16+
eprintln!("{:?} @ {query:?}:\n{chunk}", MyPoint::descriptor());
1717

1818
// And then slice it as appropriate:
1919
let chunk = chunk
2020
.timeline_sliced(TimelineName::log_time())
21-
.component_sliced(MyPoint::name());
21+
.component_sliced(&MyPoint::descriptor());
2222
eprintln!("Sliced down to specific timeline and component:\n{chunk}");
2323

2424
Ok(())

crates/store/re_chunk/examples/range.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ fn main() -> anyhow::Result<()> {
1515
let query = RangeQuery::new(TimelineName::new("frame"), ResolvedTimeRange::EVERYTHING);
1616

1717
// Find all relevant data for a query:
18-
let chunk = chunk.range(&query, MyPoint::name());
19-
eprintln!("{:?} @ {query:?}:\n{chunk}", MyPoint::name());
18+
let chunk = chunk.range(&query, &MyPoint::descriptor());
19+
eprintln!("{:?} @ {query:?}:\n{chunk}", MyPoint::descriptor());
2020

2121
// And then slice it as appropriate:
2222
let chunk = chunk
2323
.timeline_sliced(TimelineName::log_time())
24-
.component_sliced(MyPoint::name());
24+
.component_sliced(&MyPoint::descriptor());
2525
eprintln!("Sliced down to specific timeline and component:\n{chunk}");
2626

2727
Ok(())

crates/store/re_chunk/src/chunk.rs

+22-3
Original file line numberDiff line numberDiff line change
@@ -221,14 +221,30 @@ impl Chunk {
221221
///
222222
/// This is undefined behavior if there are more than one component with that name.
223223
//
224-
// TODO(cmc): Kinda disgusting but it makes our lives easier during the interim, as long as we're
224+
// TODO(#6889): Kinda disgusting but it makes our lives easier during the interim, as long as we're
225225
// in this weird halfway in-between state where we still have a bunch of things indexed by name only.
226226
#[inline]
227227
pub fn get_first_component(&self, component_name: ComponentName) -> Option<&ArrowListArray> {
228228
self.components.iter().find_map(move |(descr, array)| {
229229
(descr.component_name == component_name).then_some(array)
230230
})
231231
}
232+
233+
/// Returns any component descriptor with the given [`ComponentName`].
234+
///
235+
/// This is undefined behavior if there are more than one component with that name.
236+
//
237+
// TODO(#6889): Kinda disgusting but it makes our lives easier during the interim, as long as we're
238+
// in this weird halfway in-between state where we still have a bunch of things indexed by name only.
239+
#[inline]
240+
pub fn get_first_component_descriptor(
241+
&self,
242+
component_name: ComponentName,
243+
) -> Option<&ComponentDescriptor> {
244+
self.components
245+
.keys()
246+
.find(|descr| descr.component_name == component_name)
247+
}
232248
}
233249

234250
impl Chunk {
@@ -582,9 +598,12 @@ impl Chunk {
582598
//
583599
// TODO(cmc): This needs to be stored in chunk metadata and transported across IPC.
584600
#[inline]
585-
pub fn num_events_for_component(&self, component_name: ComponentName) -> Option<u64> {
601+
pub fn num_events_for_component(
602+
&self,
603+
component_descriptor: &ComponentDescriptor,
604+
) -> Option<u64> {
586605
// Reminder: component columns are sparse, we must check validity bitmap.
587-
self.get_first_component(component_name).map(|list_array| {
606+
self.components.get(component_descriptor).map(|list_array| {
588607
list_array.nulls().map_or_else(
589608
|| list_array.len() as u64,
590609
|validity| validity.len() as u64 - validity.null_count() as u64,

crates/store/re_chunk/src/helpers.rs

+45-11
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use arrow::array::Array as _;
44
use arrow::array::ArrayRef as ArrowArrayRef;
55

66
use re_log_types::{TimeInt, TimelineName};
7+
use re_types_core::ComponentDescriptor;
78
use re_types_core::{Component, ComponentName};
89

910
use crate::{Chunk, ChunkResult, RowId};
@@ -258,20 +259,45 @@ impl UnitChunkShared {
258259
impl UnitChunkShared {
259260
// --- Batch ---
260261

262+
/// Returns the raw data for the specified component name.
263+
///
264+
/// Indetermined which batch is returned if there are multiple components with the same name.
265+
/// TODO(#6889): Can we remove this method?
266+
#[inline]
267+
pub fn component_batch_raw_by_component_name(
268+
&self,
269+
component_name: ComponentName,
270+
) -> Option<ArrowArrayRef> {
271+
debug_assert!(self.num_rows() == 1);
272+
self.components
273+
.get_by_component_name(component_name)
274+
.next()
275+
.and_then(|list_array| list_array.is_valid(0).then(|| list_array.value(0)))
276+
}
277+
261278
/// Returns the raw data for the specified component.
262279
#[inline]
263-
pub fn component_batch_raw(&self, component_name: &ComponentName) -> Option<ArrowArrayRef> {
280+
pub fn component_batch_raw(
281+
&self,
282+
component_descr: &ComponentDescriptor,
283+
) -> Option<ArrowArrayRef> {
264284
debug_assert!(self.num_rows() == 1);
265-
self.get_first_component(*component_name)
285+
self.components
286+
.get(component_descr)
266287
.and_then(|list_array| list_array.is_valid(0).then(|| list_array.value(0)))
267288
}
268289

269290
/// Returns the deserialized data for the specified component.
270291
///
271292
/// Returns an error if the data cannot be deserialized.
293+
/// In debug builds, panics if the descriptor doesn't have the same component name as the component type.
272294
#[inline]
273-
pub fn component_batch<C: Component>(&self) -> Option<ChunkResult<Vec<C>>> {
274-
let data = C::from_arrow(&*self.component_batch_raw(&C::name())?);
295+
pub fn component_batch<C: Component>(
296+
&self,
297+
component_descr: &ComponentDescriptor,
298+
) -> Option<ChunkResult<Vec<C>>> {
299+
debug_assert_eq!(C::name(), component_descr.component_name);
300+
let data = C::from_arrow(&*self.component_batch_raw(component_descr)?);
275301
Some(data.map_err(Into::into))
276302
}
277303

@@ -283,10 +309,10 @@ impl UnitChunkShared {
283309
#[inline]
284310
pub fn component_instance_raw(
285311
&self,
286-
component_name: &ComponentName,
312+
component_descr: &ComponentDescriptor,
287313
instance_index: usize,
288314
) -> Option<ChunkResult<ArrowArrayRef>> {
289-
let array = self.component_batch_raw(component_name)?;
315+
let array = self.component_batch_raw(component_descr)?;
290316
if array.len() > instance_index {
291317
Some(Ok(array.slice(instance_index, 1)))
292318
} else {
@@ -301,12 +327,15 @@ impl UnitChunkShared {
301327
/// Returns the deserialized data for the specified component at the given instance index.
302328
///
303329
/// Returns an error if the data cannot be deserialized, or if the instance index is out of bounds.
330+
/// In debug builds, panics if the descriptor doesn't have the same component name as the component type.
304331
#[inline]
305332
pub fn component_instance<C: Component>(
306333
&self,
334+
component_descr: &ComponentDescriptor,
307335
instance_index: usize,
308336
) -> Option<ChunkResult<C>> {
309-
let res = self.component_instance_raw(&C::name(), instance_index)?;
337+
debug_assert_eq!(C::name(), component_descr.component_name);
338+
let res = self.component_instance_raw(component_descr, instance_index)?;
310339

311340
let array = match res {
312341
Ok(array) => array,
@@ -327,9 +356,9 @@ impl UnitChunkShared {
327356
#[inline]
328357
pub fn component_mono_raw(
329358
&self,
330-
component_name: &ComponentName,
359+
component_descr: &ComponentDescriptor,
331360
) -> Option<ChunkResult<ArrowArrayRef>> {
332-
let array = self.component_batch_raw(component_name)?;
361+
let array = self.component_batch_raw(component_descr)?;
333362
if array.len() == 1 {
334363
Some(Ok(array.slice(0, 1)))
335364
} else {
@@ -344,9 +373,14 @@ impl UnitChunkShared {
344373
/// Returns the deserialized data for the specified component, assuming a mono-batch.
345374
///
346375
/// Returns an error if the data cannot be deserialized, or if the underlying batch is not of unit length.
376+
/// In debug builds, panics if the descriptor doesn't have the same component name as the component type.
347377
#[inline]
348-
pub fn component_mono<C: Component>(&self) -> Option<ChunkResult<C>> {
349-
let res = self.component_mono_raw(&C::name())?;
378+
pub fn component_mono<C: Component>(
379+
&self,
380+
component_descr: &ComponentDescriptor,
381+
) -> Option<ChunkResult<C>> {
382+
debug_assert_eq!(C::name(), component_descr.component_name);
383+
let res = self.component_mono_raw(component_descr)?;
350384

351385
let array = match res {
352386
Ok(array) => array,

crates/store/re_chunk/src/latest_at.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use arrow::array::Array as _;
22

33
use re_log_types::{TimeInt, TimelineName};
4-
use re_types_core::ComponentName;
4+
use re_types_core::ComponentDescriptor;
55

66
use crate::{Chunk, RowId};
77

@@ -71,14 +71,14 @@ impl Chunk {
7171
/// information by inspecting the data, for examples timestamps on other timelines.
7272
/// See [`Self::timeline_sliced`] and [`Self::component_sliced`] if you do want to filter this
7373
/// extra data.
74-
pub fn latest_at(&self, query: &LatestAtQuery, component_name: ComponentName) -> Self {
74+
pub fn latest_at(&self, query: &LatestAtQuery, component_descr: &ComponentDescriptor) -> Self {
7575
if self.is_empty() {
7676
return self.clone();
7777
}
7878

7979
re_tracing::profile_function!(format!("{query:?}"));
8080

81-
let Some(component_list_array) = self.get_first_component(component_name) else {
81+
let Some(component_list_array) = self.components.get(component_descr) else {
8282
return self.emptied();
8383
};
8484

crates/store/re_chunk/src/range.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use re_log_types::{ResolvedTimeRange, TimeInt, TimelineName};
2-
use re_types_core::ComponentName;
2+
use re_types_core::ComponentDescriptor;
33

44
use crate::Chunk;
55

@@ -193,7 +193,7 @@ impl Chunk {
193193
//
194194
// TODO(apache/arrow-rs#5375): Since we don't have access to arrow's ListView yet, we must actually clone the
195195
// data if the chunk requires sorting.
196-
pub fn range(&self, query: &RangeQuery, component_name: ComponentName) -> Self {
196+
pub fn range(&self, query: &RangeQuery, component_descr: &ComponentDescriptor) -> Self {
197197
if self.is_empty() {
198198
return self.clone();
199199
}
@@ -215,7 +215,7 @@ impl Chunk {
215215
Cow::Borrowed(self)
216216
};
217217
let chunk = if !keep_extra_components {
218-
Cow::Owned(chunk.component_sliced(component_name))
218+
Cow::Owned(chunk.component_sliced(component_descr))
219219
} else {
220220
chunk
221221
};
@@ -226,7 +226,7 @@ impl Chunk {
226226
// equivalent to just running a latest-at query.
227227
chunk.latest_at(
228228
&crate::LatestAtQuery::new(*query.timeline(), TimeInt::MAX),
229-
component_name,
229+
component_descr,
230230
)
231231
} else {
232232
let Some(is_sorted_by_time) = chunk
@@ -237,7 +237,7 @@ impl Chunk {
237237
return chunk.emptied();
238238
};
239239

240-
let chunk = chunk.densified(component_name);
240+
let chunk = chunk.densified(component_descr);
241241

242242
let chunk = if is_sorted_by_time {
243243
// Temporal, row-sorted, time-sorted chunk

crates/store/re_chunk/src/slice.rs

+13-13
Original file line numberDiff line numberDiff line change
@@ -168,18 +168,18 @@ impl Chunk {
168168
chunk
169169
}
170170

171-
/// Slices the [`Chunk`] horizontally by keeping only the selected `component_name`.
171+
/// Slices the [`Chunk`] horizontally by keeping only the selected `component_descr`.
172172
///
173173
/// The result is a new [`Chunk`] with the same rows and (at-most) one component column.
174174
/// All non-component columns will be kept as-is.
175175
///
176-
/// If `component_name` is not found within the [`Chunk`], the end result will be the same as the
176+
/// If `component_descr` is not found within the [`Chunk`], the end result will be the same as the
177177
/// current chunk but without any component column.
178178
///
179179
/// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`].
180180
#[must_use]
181181
#[inline]
182-
pub fn component_sliced(&self, component_name: ComponentName) -> Self {
182+
pub fn component_sliced(&self, component_descr: &ComponentDescriptor) -> Self {
183183
let Self {
184184
id,
185185
entity_path,
@@ -199,9 +199,9 @@ impl Chunk {
199199
timelines: timelines.clone(),
200200
components: crate::ChunkComponents(
201201
components
202-
.iter()
203-
.filter(|&(desc, _list_array)| (desc.component_name == component_name))
204-
.map(|(desc, list_array)| (desc.clone(), list_array.clone()))
202+
.get(component_descr)
203+
.map(|list_array| (component_descr.clone(), list_array.clone()))
204+
.into_iter()
205205
.collect(),
206206
),
207207
};
@@ -305,20 +305,20 @@ impl Chunk {
305305
chunk
306306
}
307307

308-
/// Densifies the [`Chunk`] vertically based on the `component_name` column.
308+
/// Densifies the [`Chunk`] vertically based on the `component_descriptor` column.
309309
///
310-
/// Densifying here means dropping all rows where the associated value in the `component_name`
310+
/// Densifying here means dropping all rows where the associated value in the `component_descriptor`
311311
/// column is null.
312312
///
313-
/// The result is a new [`Chunk`] where the `component_name` column is guaranteed to be dense.
313+
/// The result is a new [`Chunk`] where the `component_descriptor` column is guaranteed to be dense.
314314
///
315-
/// If `component_name` doesn't exist in this [`Chunk`], or if it is already dense, this method
315+
/// If `component_descriptor` doesn't exist in this [`Chunk`], or if it is already dense, this method
316316
/// is a no-op.
317317
///
318318
/// WARNING: the returned chunk has the same old [`crate::ChunkId`]! Change it with [`Self::with_id`].
319319
#[must_use]
320320
#[inline]
321-
pub fn densified(&self, component_name_pov: ComponentName) -> Self {
321+
pub fn densified(&self, component_descr_pov: &ComponentDescriptor) -> Self {
322322
let Self {
323323
id,
324324
entity_path,
@@ -333,7 +333,7 @@ impl Chunk {
333333
return self.clone();
334334
}
335335

336-
let Some(component_list_array) = self.get_first_component(component_name_pov) else {
336+
let Some(component_list_array) = self.components.get(component_descr_pov) else {
337337
return self.clone();
338338
};
339339

@@ -361,7 +361,7 @@ impl Chunk {
361361
.iter()
362362
.map(|(component_desc, list_array)| {
363363
let filtered = re_arrow_util::filter_array(list_array, &validity_filter);
364-
let filtered = if component_desc.component_name == component_name_pov {
364+
let filtered = if component_desc == component_descr_pov {
365365
// Make sure we fully remove the validity bitmap for the densified
366366
// component.
367367
// This will allow further operations on this densified chunk to take some

crates/store/re_chunk/tests/latest_at.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ fn query_and_compare(
478478
) {
479479
re_log::setup_logging();
480480

481-
let results = chunk.latest_at(query, component_desc.component_name);
481+
let results = chunk.latest_at(query, &component_desc);
482482

483483
eprintln!("Query: {component_desc} @ {query:?}");
484484
eprintln!("Data:\n{chunk}");

crates/store/re_chunk/tests/range.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ fn query_and_compare(
447447
) {
448448
re_log::setup_logging();
449449

450-
let results = chunk.range(query, component_desc.component_name);
450+
let results = chunk.range(query, &component_desc);
451451

452452
eprintln!("Query: {component_desc} @ {query:?}");
453453
eprintln!("Data:\n{chunk}");

0 commit comments

Comments
 (0)