Skip to content

Commit 8f97013

Browse files
abey79emilk
andauthored
Assume rerun.kind: data when rerun.kind is missing (#9467)
### What - default `rerun.kind` to `"data"` This should solve the problem of the partition table not being Sorbet compliant, by making more things sorbet compliant. ### TODO * [x] Test with a real partition table <img width="1693" alt="image" src="https://github.com/user-attachments/assets/401ac9c4-aa0f-47cd-a8ba-8743468160cc" /> --------- Co-authored-by: Emil Ernerfeldt <[email protected]>
1 parent 46de401 commit 8f97013

File tree

8 files changed

+64
-27
lines changed

8 files changed

+64
-27
lines changed

crates/store/re_sorbet/src/column_descriptor.rs

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,18 @@ use arrow::datatypes::{
99

1010
use re_log_types::EntityPath;
1111

12-
use crate::{ComponentColumnDescriptor, IndexColumnDescriptor, MetadataExt as _};
12+
use crate::{ColumnKind, ComponentColumnDescriptor, IndexColumnDescriptor};
1313

1414
#[derive(thiserror::Error, Debug)]
1515
pub enum ColumnError {
1616
#[error(transparent)]
1717
MissingFieldMetadata(#[from] crate::MissingFieldMetadata),
1818

19+
#[error(transparent)]
20+
UnknownColumnKind(#[from] crate::UnknownColumnKind),
21+
1922
#[error("Unsupported column rerun.kind: {kind:?}. Expected one of: index, data")]
20-
UnsupportedColumnKind { kind: String },
23+
UnsupportedColumnKind { kind: ColumnKind },
2124

2225
#[error(transparent)]
2326
UnsupportedTimeType(#[from] crate::UnsupportedTimeType),
@@ -114,17 +117,16 @@ impl ColumnDescriptor {
114117
chunk_entity_path: Option<&EntityPath>,
115118
field: &ArrowField,
116119
) -> Result<Self, ColumnError> {
117-
let kind = field.get_or_err("rerun.kind")?;
118-
match kind {
119-
"index" | "time" => Ok(Self::Time(IndexColumnDescriptor::try_from(field)?)),
120+
match ColumnKind::try_from(field)? {
121+
ColumnKind::RowId => Err(ColumnError::UnsupportedColumnKind {
122+
kind: ColumnKind::RowId,
123+
}),
120124

121-
"data" => Ok(Self::Component(
125+
ColumnKind::Index => Ok(Self::Time(IndexColumnDescriptor::try_from(field)?)),
126+
127+
ColumnKind::Component => Ok(Self::Component(
122128
ComponentColumnDescriptor::from_arrow_field(chunk_entity_path, field),
123129
)),
124-
125-
_ => Err(ColumnError::UnsupportedColumnKind {
126-
kind: kind.to_owned(),
127-
}),
128130
}
129131
}
130132
}
Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,54 @@
11
use arrow::datatypes::Field as ArrowField;
22

3-
use crate::{MetadataExt as _, SorbetError};
3+
use crate::MetadataExt as _;
4+
5+
#[derive(thiserror::Error, Debug)]
6+
#[error("Unknown `rerun.kind` {kind:?} in column {column_name:?}. Expect one of `row_id`, `index`, or `component`.")]
7+
pub struct UnknownColumnKind {
8+
pub kind: String,
9+
pub column_name: String,
10+
}
411

512
/// The type of column in a sorbet batch.
6-
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
13+
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
714
pub enum ColumnKind {
15+
/// Row ID
816
RowId,
17+
18+
/// Timeline
919
Index,
20+
21+
/// Data (also the default when unknown)
22+
#[default]
1023
Component,
1124
}
1225

26+
impl std::fmt::Display for ColumnKind {
27+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28+
match self {
29+
Self::RowId => write!(f, "control"),
30+
Self::Index => write!(f, "index"),
31+
Self::Component => write!(f, "data"),
32+
}
33+
}
34+
}
35+
1336
impl TryFrom<&ArrowField> for ColumnKind {
14-
type Error = SorbetError;
37+
type Error = UnknownColumnKind;
1538

16-
fn try_from(fields: &ArrowField) -> Result<Self, Self::Error> {
17-
let kind = fields.get_or_err("rerun.kind")?;
39+
fn try_from(field: &ArrowField) -> Result<Self, Self::Error> {
40+
let Some(kind) = field.get_opt("rerun.kind") else {
41+
return Ok(Self::default());
42+
};
1843
match kind {
1944
"control" | "row_id" => Ok(Self::RowId),
2045
"index" | "time" => Ok(Self::Index),
2146
"component" | "data" => Ok(Self::Component),
2247

23-
_ => Err(SorbetError::custom(format!("Unknown column kind: {kind}"))),
48+
_ => Err(UnknownColumnKind {
49+
kind: kind.to_owned(),
50+
column_name: field.name().to_owned(),
51+
}),
2452
}
2553
}
2654
}

crates/store/re_sorbet/src/component_column_descriptor.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use arrow::datatypes::{DataType as ArrowDatatype, Field as ArrowField};
33
use re_log_types::{ComponentPath, EntityPath};
44
use re_types_core::{ArchetypeFieldName, ArchetypeName, ComponentDescriptor, ComponentName};
55

6-
use crate::{ArrowFieldMetadata, BatchType, MetadataExt as _};
6+
use crate::{ArrowFieldMetadata, BatchType, ColumnKind, MetadataExt as _};
77

88
/// Describes a data/component column, such as `Position3D`, in a dataframe.
99
///
@@ -196,7 +196,7 @@ impl ComponentColumnDescriptor {
196196
// TODO(#6889): This needs some proper sorbetization -- I just threw these names randomly.
197197
// We use the long names for the archetype and component names so that they roundtrip properly!
198198
let mut metadata = std::collections::HashMap::from([
199-
("rerun.kind".to_owned(), "data".to_owned()),
199+
("rerun.kind".to_owned(), ColumnKind::Component.to_string()),
200200
(
201201
"rerun.component".to_owned(),
202202
component_name.full_name().to_owned(),

crates/store/re_sorbet/src/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ use arrow::error::ArrowError;
22

33
#[derive(thiserror::Error, Debug)]
44
pub enum SorbetError {
5+
#[error(transparent)]
6+
UnknownColumnKind(#[from] crate::UnknownColumnKind),
7+
58
#[error(transparent)]
69
MissingMetadataKey(#[from] crate::MissingMetadataKey),
710

crates/store/re_sorbet/src/index_column_descriptor.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,10 @@ impl IndexColumnDescriptor {
105105
let nullable = true; // Time column must be nullable since static data doesn't have a time.
106106

107107
let mut metadata = std::collections::HashMap::from([
108-
("rerun.kind".to_owned(), "index".to_owned()),
108+
(
109+
"rerun.kind".to_owned(),
110+
crate::ColumnKind::Index.to_string(),
111+
),
109112
("rerun.index_name".to_owned(), timeline.name().to_string()),
110113
]);
111114
if *is_sorted {

crates/store/re_sorbet/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub use self::{
3636
chunk_schema::ChunkSchema,
3737
column_descriptor::{ColumnDescriptor, ColumnError},
3838
column_descriptor_ref::ColumnDescriptorRef,
39-
column_kind::ColumnKind,
39+
column_kind::{ColumnKind, UnknownColumnKind},
4040
component_column_descriptor::ComponentColumnDescriptor,
4141
error::SorbetError,
4242
index_column_descriptor::{IndexColumnDescriptor, UnsupportedTimeType},

crates/store/re_sorbet/src/row_id_column_descriptor.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@ impl RowIdColumnDescriptor {
5353
let Self { is_sorted } = self;
5454

5555
let mut metadata = std::collections::HashMap::from([
56-
("rerun.kind".to_owned(), "control".to_owned()),
56+
(
57+
"rerun.kind".to_owned(),
58+
crate::ColumnKind::RowId.to_string(),
59+
),
5760
(
5861
"ARROW:extension:name".to_owned(),
5962
re_tuid::Tuid::ARROW_EXTENSION_NAME.to_owned(),

crates/store/re_sorbet/src/sorbet_batch.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ use arrow::{
1010
use re_arrow_util::{into_arrow_ref, ArrowArrayDowncastRef as _};
1111

1212
use crate::{
13-
ArrowBatchMetadata, ColumnDescriptorRef, ComponentColumnDescriptor, IndexColumnDescriptor,
14-
RowIdColumnDescriptor, SorbetError, SorbetSchema,
13+
ArrowBatchMetadata, ColumnDescriptorRef, ColumnKind, ComponentColumnDescriptor,
14+
IndexColumnDescriptor, RowIdColumnDescriptor, SorbetError, SorbetSchema,
1515
};
1616

1717
/// Any rerun-compatible [`ArrowRecordBatch`].
@@ -205,10 +205,8 @@ fn make_all_data_columns_list_arrays(batch: &ArrowRecordBatch) -> ArrowRecordBat
205205

206206
for (field, array) in itertools::izip!(batch.schema().fields(), batch.columns()) {
207207
let is_list_array = array.downcast_array_ref::<ArrowListArray>().is_some();
208-
let is_data_column = field
209-
.metadata()
210-
.get("rerun.kind")
211-
.is_some_and(|kind| kind == "data");
208+
let is_data_column =
209+
ColumnKind::try_from(field.as_ref()).is_ok_and(|kind| kind == ColumnKind::Component);
212210
if is_data_column && !is_list_array {
213211
let (field, array) = re_arrow_util::wrap_in_list_array(field, array.clone());
214212
fields.push(field.into());

0 commit comments

Comments
 (0)