Skip to content

Commit fd5628e

Browse files
authored
Use Arrow IPC to encode the column schema (#8821)
### Related * Part of #8744 * Sibling PR: rerun-io/dataplatform#154
1 parent c8d00f2 commit fd5628e

File tree

54 files changed

+1564
-1659
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1564
-1659
lines changed

Cargo.lock

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
508508
checksum = "9579b9d8bce47aa41389fe344f2c6758279983b7c0ebb4013e283e3e91bb450e"
509509
dependencies = [
510510
"bitflags 2.8.0",
511-
"serde",
512511
]
513512

514513
[[package]]
@@ -5790,7 +5789,6 @@ dependencies = [
57905789
"ahash",
57915790
"anyhow",
57925791
"arrow",
5793-
"arrow-schema",
57945792
"criterion",
57955793
"document-features",
57965794
"indent",
@@ -5813,7 +5811,6 @@ dependencies = [
58135811
"re_tracing",
58145812
"re_types",
58155813
"re_types_core",
5816-
"serde_json",
58175814
"similar-asserts",
58185815
"thiserror 1.0.65",
58195816
"tinyvec",
@@ -6217,7 +6214,6 @@ dependencies = [
62176214
"re_byte_size",
62186215
"re_format",
62196216
"re_log",
6220-
"re_protos",
62216217
"re_string_interner",
62226218
"re_tracing",
62236219
"re_tuid",
@@ -6282,8 +6278,13 @@ dependencies = [
62826278
name = "re_protos"
62836279
version = "0.22.0-alpha.1+dev"
62846280
dependencies = [
6281+
"arrow",
62856282
"prost",
6283+
"re_build_info",
62866284
"re_byte_size",
6285+
"re_log_types",
6286+
"re_sorbet",
6287+
"re_tuid",
62876288
"thiserror 1.0.65",
62886289
"tonic",
62896290
"tonic-web-wasm-client",
@@ -6592,7 +6593,6 @@ dependencies = [
65926593
"getrandom",
65936594
"once_cell",
65946595
"re_byte_size",
6595-
"re_protos",
65966596
"serde",
65976597
"web-time",
65986598
]
@@ -7510,6 +7510,7 @@ dependencies = [
75107510
"re_memory",
75117511
"re_protos",
75127512
"re_sdk",
7513+
"re_sorbet",
75137514
"re_video",
75147515
"re_web_viewer_server",
75157516
"re_ws_comms",

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ anyhow = { version = "1.0", default-features = false }
155155
argh = "0.1.12"
156156
array-init = "2.1"
157157
arrow = { version = "53.4", default-features = false }
158-
arrow-schema = { version = "53.4", default-features = false }
159158
arrow2 = { package = "re_arrow2", version = "0.18.2", features = ["arrow"] }
160159
async-executor = "1.0"
161160
async-stream = "0.3"

crates/store/re_chunk_store/Cargo.toml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@ workspace = true
1818
[package.metadata.docs.rs]
1919
all-features = true
2020

21-
[package.metadata.cargo-machete]
22-
ignored = [
23-
# Needed to enable the serde feature of arrow `Datatype`
24-
"arrow-schema",
25-
]
26-
2721

2822
[features]
2923
default = []
@@ -50,14 +44,12 @@ re_types_core.workspace = true
5044
ahash.workspace = true
5145
anyhow.workspace = true
5246
arrow.workspace = true
53-
arrow-schema = { workspace = true, features = ["serde"] }
5447
document-features.workspace = true
5548
indent.workspace = true
5649
itertools.workspace = true
5750
nohash-hasher.workspace = true
5851
once_cell.workspace = true
5952
parking_lot = { workspace = true, features = ["arc_lock"] }
60-
serde_json.workspace = true
6153
thiserror.workspace = true
6254
web-time.workspace = true
6355

crates/store/re_chunk_store/src/dataframe.rs

Lines changed: 1 addition & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -13,70 +13,11 @@ use itertools::Itertools;
1313

1414
use re_chunk::TimelineName;
1515
use re_log_types::{EntityPath, ResolvedTimeRange, TimeInt, Timeline};
16-
use re_sorbet::{ComponentColumnDescriptor, TimeColumnDescriptor};
16+
use re_sorbet::{ColumnDescriptor, ComponentColumnDescriptor, TimeColumnDescriptor};
1717
use re_types_core::ComponentName;
1818

1919
use crate::{ChunkStore, ColumnMetadata};
2020

21-
// --- Descriptors ---
22-
23-
// TODO(#6889): At some point all these descriptors needs to be interned and have handles or
24-
// something. And of course they need to be codegen. But we'll get there once we're back to
25-
// natively tagged components.
26-
27-
// Describes any kind of column.
28-
//
29-
// See:
30-
// * [`TimeColumnDescriptor`]
31-
// * [`ComponentColumnDescriptor`]
32-
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
33-
pub enum ColumnDescriptor {
34-
Time(TimeColumnDescriptor),
35-
Component(ComponentColumnDescriptor),
36-
}
37-
38-
impl ColumnDescriptor {
39-
#[inline]
40-
pub fn entity_path(&self) -> Option<&EntityPath> {
41-
match self {
42-
Self::Time(_) => None,
43-
Self::Component(descr) => Some(&descr.entity_path),
44-
}
45-
}
46-
47-
#[inline]
48-
pub fn arrow_datatype(&self) -> ArrowDatatype {
49-
match self {
50-
Self::Time(descr) => descr.datatype.clone(),
51-
Self::Component(descr) => descr.returned_datatype(),
52-
}
53-
}
54-
55-
#[inline]
56-
pub fn to_arrow_field(&self) -> ArrowField {
57-
match self {
58-
Self::Time(descr) => descr.to_arrow_field(),
59-
Self::Component(descr) => descr.to_arrow_field(),
60-
}
61-
}
62-
63-
#[inline]
64-
pub fn short_name(&self) -> String {
65-
match self {
66-
Self::Time(descr) => descr.timeline.name().to_string(),
67-
Self::Component(descr) => descr.component_name.short_name().to_owned(),
68-
}
69-
}
70-
71-
#[inline]
72-
pub fn is_static(&self) -> bool {
73-
match self {
74-
Self::Time(_) => false,
75-
Self::Component(descr) => descr.is_static,
76-
}
77-
}
78-
}
79-
8021
// --- Selectors ---
8122

8223
/// Describes a column selection to return as part of a query.

crates/store/re_chunk_store/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,16 @@ mod protobuf_conversions;
2828

2929
pub use self::{
3030
dataframe::{
31-
ColumnDescriptor, ColumnSelector, ComponentColumnSelector, Index, IndexRange, IndexValue,
32-
QueryExpression, SparseFillStrategy, TimeColumnSelector, ViewContentsSelector,
31+
ColumnSelector, ComponentColumnSelector, Index, IndexRange, IndexValue, QueryExpression,
32+
SparseFillStrategy, TimeColumnSelector, ViewContentsSelector,
3333
},
3434
events::{ChunkCompactionReport, ChunkStoreDiff, ChunkStoreDiffKind, ChunkStoreEvent},
3535
gc::{GarbageCollectionOptions, GarbageCollectionTarget},
3636
stats::{ChunkStoreChunkStats, ChunkStoreStats},
3737
store::{ChunkStore, ChunkStoreConfig, ChunkStoreGeneration, ChunkStoreHandle, ColumnMetadata},
3838
subscribers::{ChunkStoreSubscriber, ChunkStoreSubscriberHandle, PerStoreChunkSubscriber},
3939
};
40-
pub use re_sorbet::{ComponentColumnDescriptor, TimeColumnDescriptor};
40+
pub use re_sorbet::{ColumnDescriptor, ComponentColumnDescriptor, TimeColumnDescriptor};
4141

4242
pub(crate) use self::store::ColumnMetadataState;
4343

crates/store/re_chunk_store/src/protobuf_conversions.rs

Lines changed: 3 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
use re_protos::invalid_field;
2-
use re_protos::missing_field;
3-
use re_protos::TypeConversionError;
4-
use std::collections::BTreeMap;
5-
use std::collections::BTreeSet;
1+
use std::collections::{BTreeMap, BTreeSet};
2+
3+
use re_protos::{missing_field, TypeConversionError};
64

75
impl TryFrom<re_protos::common::v0::ComponentColumnSelector> for crate::ComponentColumnSelector {
86
type Error = TypeConversionError;
@@ -258,112 +256,6 @@ impl From<crate::QueryExpression> for re_protos::common::v0::Query {
258256
}
259257
}
260258

261-
impl TryFrom<crate::ColumnDescriptor> for re_protos::common::v0::ColumnDescriptor {
262-
type Error = TypeConversionError;
263-
264-
fn try_from(value: crate::ColumnDescriptor) -> Result<Self, Self::Error> {
265-
match value {
266-
crate::ColumnDescriptor::Time(time_descriptor) => Ok(Self {
267-
descriptor_type: Some(
268-
re_protos::common::v0::column_descriptor::DescriptorType::TimeColumn(
269-
re_protos::common::v0::TimeColumnDescriptor {
270-
timeline: Some(re_protos::common::v0::Timeline {
271-
name: time_descriptor.timeline.name().to_string(),
272-
}),
273-
datatype: serde_json::to_string(&time_descriptor.datatype).map_err(
274-
|err| invalid_field!(Self, "time column descriptor", err),
275-
)?,
276-
},
277-
),
278-
),
279-
}),
280-
crate::ColumnDescriptor::Component(component_descriptor) => Ok(Self {
281-
descriptor_type: Some(
282-
re_protos::common::v0::column_descriptor::DescriptorType::ComponentColumn(
283-
re_protos::common::v0::ComponentColumnDescriptor {
284-
entity_path: Some(component_descriptor.entity_path.into()),
285-
archetype_name: component_descriptor
286-
.archetype_name
287-
.map(|an| an.to_string()),
288-
archetype_field_name: component_descriptor
289-
.archetype_field_name
290-
.map(|afn| afn.to_string()),
291-
component_name: component_descriptor.component_name.to_string(),
292-
datatype: serde_json::to_string(&component_descriptor.store_datatype)
293-
.map_err(|err| {
294-
invalid_field!(Self, "component column descriptor", err)
295-
})?,
296-
is_static: component_descriptor.is_static,
297-
is_tombstone: component_descriptor.is_tombstone,
298-
is_semantically_empty: component_descriptor.is_semantically_empty,
299-
is_indicator: component_descriptor.is_indicator,
300-
},
301-
),
302-
),
303-
}),
304-
}
305-
}
306-
}
307-
308-
impl TryFrom<re_protos::common::v0::ColumnDescriptor> for crate::ColumnDescriptor {
309-
type Error = TypeConversionError;
310-
311-
fn try_from(value: re_protos::common::v0::ColumnDescriptor) -> Result<Self, Self::Error> {
312-
let descriptor = value.descriptor_type.ok_or(missing_field!(
313-
re_protos::common::v0::ColumnDescriptor,
314-
"descriptor_type",
315-
))?;
316-
317-
match descriptor {
318-
re_protos::common::v0::column_descriptor::DescriptorType::TimeColumn(
319-
time_descriptor,
320-
) => Ok(Self::Time(crate::TimeColumnDescriptor {
321-
timeline: time_descriptor
322-
.timeline
323-
.ok_or(missing_field!(
324-
re_protos::common::v0::TimeColumnDescriptor,
325-
"timeline",
326-
))?
327-
.into(),
328-
datatype: serde_json::from_str(&time_descriptor.datatype).map_err(|err| {
329-
invalid_field!(
330-
re_protos::common::v0::ColumnDescriptor,
331-
"time column descriptor",
332-
err
333-
)
334-
})?,
335-
})),
336-
re_protos::common::v0::column_descriptor::DescriptorType::ComponentColumn(
337-
component_descriptor,
338-
) => Ok(Self::Component(crate::ComponentColumnDescriptor {
339-
entity_path: component_descriptor
340-
.entity_path
341-
.ok_or(missing_field!(
342-
re_protos::common::v0::ComponentColumnDescriptor,
343-
"entity_path",
344-
))?
345-
.try_into()?,
346-
archetype_name: component_descriptor.archetype_name.map(Into::into),
347-
archetype_field_name: component_descriptor.archetype_field_name.map(Into::into),
348-
component_name: component_descriptor.component_name.into(),
349-
store_datatype: serde_json::from_str(&component_descriptor.datatype).map_err(
350-
|err| {
351-
invalid_field!(
352-
re_protos::common::v0::ColumnDescriptor,
353-
"component column descriptor",
354-
err
355-
)
356-
},
357-
)?,
358-
is_static: component_descriptor.is_static,
359-
is_tombstone: component_descriptor.is_tombstone,
360-
is_semantically_empty: component_descriptor.is_semantically_empty,
361-
is_indicator: component_descriptor.is_indicator,
362-
})),
363-
}
364-
}
365-
}
366-
367259
#[cfg(test)]
368260
mod tests {
369261
use re_protos::common::v0::{
@@ -442,53 +334,4 @@ mod tests {
442334

443335
assert_eq!(grpc_query_before, grpc_query_after);
444336
}
445-
446-
#[test]
447-
fn test_time_column_descriptor_conversion() {
448-
let time_descriptor = crate::TimeColumnDescriptor {
449-
timeline: crate::Timeline::log_time(),
450-
datatype: arrow::datatypes::DataType::Timestamp(
451-
arrow::datatypes::TimeUnit::Nanosecond,
452-
None,
453-
),
454-
};
455-
456-
let descriptor = crate::ColumnDescriptor::Time(time_descriptor.clone());
457-
458-
let proto_descriptor: re_protos::common::v0::ColumnDescriptor =
459-
descriptor.try_into().unwrap();
460-
let descriptor_after = proto_descriptor.try_into().unwrap();
461-
let crate::ColumnDescriptor::Time(time_descriptor_after) = descriptor_after else {
462-
panic!("Expected TimeColumnDescriptor")
463-
};
464-
465-
assert_eq!(time_descriptor, time_descriptor_after);
466-
}
467-
468-
#[test]
469-
fn test_component_column_descriptor_conversion() {
470-
let component_descriptor = crate::ComponentColumnDescriptor {
471-
entity_path: re_log_types::EntityPath::from("/some/path"),
472-
archetype_name: Some("archetype".to_owned().into()),
473-
archetype_field_name: Some("field".to_owned().into()),
474-
component_name: re_chunk::ComponentName::new("component"),
475-
store_datatype: arrow::datatypes::DataType::Int64,
476-
is_static: true,
477-
is_tombstone: false,
478-
is_semantically_empty: false,
479-
is_indicator: true,
480-
};
481-
482-
let descriptor = crate::ColumnDescriptor::Component(component_descriptor.clone());
483-
484-
let proto_descriptor: re_protos::common::v0::ColumnDescriptor =
485-
descriptor.try_into().unwrap();
486-
let descriptor_after = proto_descriptor.try_into().unwrap();
487-
let crate::ColumnDescriptor::Component(component_descriptor_after) = descriptor_after
488-
else {
489-
panic!("Expected ComponentColumnDescriptor")
490-
};
491-
492-
assert_eq!(component_descriptor, component_descriptor_after);
493-
}
494337
}

0 commit comments

Comments
 (0)