Skip to content

Commit 097bc60

Browse files
authored
Port codegen arrow deserialization to arrow-rs (#8375)
### Related * Part of #3741 * ~Blocked by apache/arrow-rs#6901 * #6830 ### TODO * [x] Run `@rerun-bot full-check`
1 parent 287b8d5 commit 097bc60

File tree

282 files changed

+4018
-4327
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

282 files changed

+4018
-4327
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -6035,6 +6035,7 @@ version = "0.22.0-alpha.1+dev"
60356035
dependencies = [
60366036
"ahash",
60376037
"anyhow",
6038+
"arrow",
60386039
"backtrace",
60396040
"bytemuck",
60406041
"clean-path",

crates/build/re_types_builder/src/codegen/rust/api.rs

+26-27
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ fn generate_object_file(
181181

182182
code.push_str("\n\n");
183183

184-
code.push_str("use ::re_types_core::external::arrow2;\n");
184+
code.push_str("use ::re_types_core::external::arrow;\n");
185185
code.push_str("use ::re_types_core::SerializationResult;\n");
186186
code.push_str("use ::re_types_core::{DeserializationResult, DeserializationError};\n");
187187
code.push_str("use ::re_types_core::{ComponentDescriptor, ComponentName};\n");
@@ -880,8 +880,8 @@ fn quote_trait_impls_for_datatype_or_component(
880880
}
881881
};
882882

883-
let quoted_from_arrow2 = if optimize_for_buffer_slice {
884-
let from_arrow2_body = if let Some(forwarded_type) = forwarded_type.as_ref() {
883+
let quoted_from_arrow = if optimize_for_buffer_slice {
884+
let from_arrow_body = if let Some(forwarded_type) = forwarded_type.as_ref() {
885885
let is_pod = obj
886886
.try_get_attr::<String>(ATTR_RUST_DERIVE)
887887
.map_or(false, |d| d.contains("bytemuck::Pod"))
@@ -890,11 +890,11 @@ fn quote_trait_impls_for_datatype_or_component(
890890
.map_or(false, |d| d.contains("bytemuck::Pod"));
891891
if is_pod {
892892
quote! {
893-
#forwarded_type::from_arrow2(arrow_data).map(bytemuck::cast_vec)
893+
#forwarded_type::from_arrow(arrow_data).map(bytemuck::cast_vec)
894894
}
895895
} else {
896896
quote! {
897-
#forwarded_type::from_arrow2(arrow_data).map(|v| v.into_iter().map(Self).collect())
897+
#forwarded_type::from_arrow(arrow_data).map(|v| v.into_iter().map(Self).collect())
898898
}
899899
}
900900
} else {
@@ -906,14 +906,13 @@ fn quote_trait_impls_for_datatype_or_component(
906906
// re_tracing::profile_function!();
907907

908908
#![allow(clippy::wildcard_imports)]
909-
use arrow::datatypes::*;
910-
use arrow2::{ array::*, buffer::*};
911-
use ::re_types_core::{Loggable as _, ResultExt as _};
909+
use arrow::{array::*, buffer::*, datatypes::*};
910+
use ::re_types_core::{arrow_zip_validity::ZipValidity, Loggable as _, ResultExt as _};
912911

913-
// This code-path cannot have null fields. If it does have a validity mask
914-
// all bits must indicate valid data.
915-
if let Some(validity) = arrow_data.validity() {
916-
if validity.unset_bits() != 0 {
912+
// This code-path cannot have null fields.
913+
// If it does have a nulls-array, all bits must indicate valid data.
914+
if let Some(nulls) = arrow_data.nulls() {
915+
if nulls.null_count() != 0 {
917916
return Err(DeserializationError::missing_data());
918917
}
919918
}
@@ -924,13 +923,13 @@ fn quote_trait_impls_for_datatype_or_component(
924923

925924
quote! {
926925
#[inline]
927-
fn from_arrow2(
928-
arrow_data: &dyn arrow2::array::Array,
926+
fn from_arrow(
927+
arrow_data: &dyn arrow::array::Array,
929928
) -> DeserializationResult<Vec<Self>>
930929
where
931930
Self: Sized
932931
{
933-
#from_arrow2_body
932+
#from_arrow_body
934933
}
935934
}
936935
} else {
@@ -940,7 +939,7 @@ fn quote_trait_impls_for_datatype_or_component(
940939
// Forward deserialization to existing datatype if it's transparent.
941940
let quoted_deserializer = if let Some(forwarded_type) = forwarded_type.as_ref() {
942941
quote! {
943-
#forwarded_type::from_arrow2_opt(arrow_data).map(|v| v.into_iter().map(|v| v.map(Self)).collect())
942+
#forwarded_type::from_arrow_opt(arrow_data).map(|v| v.into_iter().map(|v| v.map(Self)).collect())
944943
}
945944
} else {
946945
let quoted_deserializer = quote_arrow_deserializer(arrow_registry, objects, obj);
@@ -949,9 +948,9 @@ fn quote_trait_impls_for_datatype_or_component(
949948
// re_tracing::profile_function!();
950949

951950
#![allow(clippy::wildcard_imports)]
952-
use arrow::datatypes::*;
953-
use arrow2::{ array::*, buffer::*};
954-
use ::re_types_core::{Loggable as _, ResultExt as _};
951+
use arrow::{array::*, buffer::*, datatypes::*};
952+
use ::re_types_core::{arrow_zip_validity::ZipValidity, Loggable as _, ResultExt as _};
953+
955954
Ok(#quoted_deserializer)
956955
}
957956
};
@@ -1019,16 +1018,16 @@ fn quote_trait_impls_for_datatype_or_component(
10191018
#quoted_serializer
10201019

10211020
// NOTE: Don't inline this, this gets _huge_.
1022-
fn from_arrow2_opt(
1023-
arrow_data: &dyn arrow2::array::Array,
1021+
fn from_arrow_opt(
1022+
arrow_data: &dyn arrow::array::Array,
10241023
) -> DeserializationResult<Vec<Option<Self>>>
10251024
where
10261025
Self: Sized
10271026
{
10281027
#quoted_deserializer
10291028
}
10301029

1031-
#quoted_from_arrow2
1030+
#quoted_from_arrow
10321031
}
10331032
}
10341033
}
@@ -1227,7 +1226,7 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {
12271226

12281227
quote! {
12291228
if let Some(array) = arrays_by_name.get(#field_typ_fqname_str) {
1230-
<#component>::from_arrow2_opt(&**array)
1229+
<#component>::from_arrow_opt(&**array)
12311230
.with_context(#obj_field_fqname)?
12321231
#quoted_collection
12331232
} else {
@@ -1238,7 +1237,7 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {
12381237
quote! {
12391238
if let Some(array) = arrays_by_name.get(#field_typ_fqname_str) {
12401239
Some({
1241-
<#component>::from_arrow2_opt(&**array)
1240+
<#component>::from_arrow_opt(&**array)
12421241
.with_context(#obj_field_fqname)?
12431242
#quoted_collection
12441243
})
@@ -1253,7 +1252,7 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {
12531252
.ok_or_else(DeserializationError::missing_data)
12541253
.with_context(#obj_field_fqname)?;
12551254

1256-
<#component>::from_arrow2_opt(&**array).with_context(#obj_field_fqname)? #quoted_collection
1255+
<#component>::from_arrow_opt(&**array).with_context(#obj_field_fqname)? #quoted_collection
12571256
}}
12581257
};
12591258

@@ -1323,10 +1322,10 @@ fn quote_trait_impls_for_archetype(obj: &Object) -> TokenStream {
13231322
}
13241323

13251324
#[inline]
1326-
fn from_arrow2_components(
1325+
fn from_arrow_components(
13271326
arrow_data: impl IntoIterator<Item = (
13281327
ComponentName,
1329-
Box<dyn arrow2::array::Array>,
1328+
arrow::array::ArrayRef,
13301329
)>,
13311330
) -> DeserializationResult<Self> {
13321331
re_tracing::profile_function!();

0 commit comments

Comments
 (0)