Skip to content

Commit 6c284a1

Browse files
kylebarrongadomski
andauthored
deps: update geoarrow-rs (#708)
## Notes: - geoarrow-rs no longer has `Table` or `ChunkedArray` concepts. This is for symmetry with upstream `arrow`, to nudge people to use iterators (especially `RecordBatchReader`) as much as possible, simplicity and maintenance, and because I think higher-level libraries like DataFusion will take care of the "table" case. I think for now, if you need a "table" construct, it's best to have a struct wrapping `(Vec<RecordBatch>, Arc<Schema>)` yourself. - ## Closes - List any issues that this pull request closes - Delete this section if it is not applicable ## Related to - List any issues that this pull request is related to - Delete this section if it is not applicable ## Description Description of the changes, including any "sidecar" changes that came along (e.g. small bugfixes you found along the way). ## Checklist Delete any checklist items that do not apply (e.g. if your change is minor, it may not require documentation updates). - [ ] Unit tests - [ ] Documentation, including doctests - [ ] Git history is linear - [ ] Commit messages are descriptive - [ ] (optional) Git commit messages follow [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) - [ ] Code is formatted (`cargo fmt`) - [ ] `cargo test` - [ ] Changes are added to the CHANGELOG <!-- markdownlint-disable-file MD041 --> --------- Co-authored-by: Pete Gadomski <[email protected]>
1 parent 4d6a60a commit 6c284a1

File tree

10 files changed

+266
-188
lines changed

10 files changed

+266
-188
lines changed

.github/workflows/docs.yml renamed to .github/workflows/cd.yml

+22-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
name: Docs
1+
name: CD
22

33
on:
44
push:
55
branches:
6-
- "main"
6+
- main
77

88
jobs:
9-
build:
9+
build-docs:
10+
name: Build docs
1011
runs-on: ubuntu-latest
1112
env:
1213
GIT_COMMITTER_NAME: ci-bot
@@ -22,8 +23,9 @@ jobs:
2223
id: deployment
2324
with:
2425
path: site/
25-
deploy:
26-
needs: build
26+
deploy-docs:
27+
needs: build-docs
28+
name: Deploy docs
2729
permissions:
2830
pages: write
2931
id-token: write
@@ -34,3 +36,18 @@ jobs:
3436
steps:
3537
- uses: actions/deploy-pages@v4
3638
id: deployment
39+
coverage:
40+
name: Coverage
41+
runs-on: ubuntu-latest
42+
steps:
43+
- uses: actions/checkout@v4
44+
- uses: Swatinem/rust-cache@v2
45+
- name: Install tarpaulin
46+
run: cargo install cargo-tarpaulin
47+
- name: Test w/ coverage
48+
run: cargo tarpaulin -p stac -p stac-api --all-features --out xml
49+
- uses: codecov/codecov-action@v5
50+
with:
51+
files: ./cobertura.xml
52+
token: ${{ secrets.CODECOV_TOKEN }}
53+
fail_ci_if_error: true

.github/workflows/ci.yml

-15
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,6 @@ jobs:
3131
- uses: Swatinem/rust-cache@v2
3232
- name: Test
3333
run: cargo test -p stac --all-features
34-
coverage:
35-
name: Coverage
36-
runs-on: ubuntu-latest
37-
steps:
38-
- uses: actions/checkout@v4
39-
- uses: Swatinem/rust-cache@v2
40-
- name: Install tarpaulin
41-
run: cargo install cargo-tarpaulin
42-
- name: Test w/ coverage
43-
run: cargo tarpaulin -p stac -p stac-api --all-features --out xml
44-
- uses: codecov/codecov-action@v5
45-
with:
46-
files: ./cobertura.xml
47-
token: ${{ secrets.CODECOV_TOKEN }}
48-
fail_ci_if_error: true
4934
check-features-core:
5035
name: Check all features
5136
runs-on: ubuntu-latest

Cargo.toml

+12-8
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ rust-version = "1.85"
2929

3030
[workspace.dependencies]
3131
anyhow = "1.0"
32-
arrow = "54.2.1"
32+
arrow-array = "55.0.0"
33+
arrow-cast = "55.0.0"
34+
arrow-json = "55.0.0"
35+
arrow-schema = "55.0.0"
3336
assert-json-diff = "2.0"
3437
assert_cmd = "2.0"
3538
async-stream = "0.3.6"
@@ -40,12 +43,15 @@ bytes = "1.7"
4043
chrono = "0.4.39"
4144
clap = "4.5"
4245
cql2 = "0.3.7-beta.0"
43-
duckdb = "1.2.1"
46+
duckdb = "1.2.2"
4447
fluent-uri = "0.3.2"
4548
futures = "0.3.31"
46-
geo = "0.29.0"
49+
geo = "0.30.0"
50+
geo-traits = "0.2"
4751
geo-types = "0.7.15"
48-
geoarrow = "0.4.0-beta.4"
52+
geoarrow-array = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "17bf33e4cf78b060afa08ca9560dc4efd73c2c76" }
53+
geoarrow-geoparquet = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "17bf33e4cf78b060afa08ca9560dc4efd73c2c76" }
54+
geoarrow-schema = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "17bf33e4cf78b060afa08ca9560dc4efd73c2c76" }
4955
geojson = "0.24.1"
5056
http = "1.1"
5157
indexmap = { version = "2.9.0", features = ["serde"] }
@@ -55,7 +61,7 @@ log = "0.4.25"
5561
mime = "0.3.17"
5662
mockito = "1.5"
5763
object_store = "0.12.0"
58-
parquet = { version = "54.2.1", default-features = false }
64+
parquet = { version = "55.0.0", default-features = false }
5965
pgstac = { version = "0.3.0", path = "crates/pgstac" }
6066
quote = "1.0"
6167
reqwest = { version = "0.12.8", default-features = false, features = [
@@ -91,6 +97,4 @@ url = "2.3"
9197
webpki-roots = "0.26.8"
9298

9399
[patch.crates-io]
94-
# TODO unpatch cql2 when we upgrade geoarrow to use geo v0.30
95-
cql2 = { git = "https://github.com/gadomski/cql2-rs", branch = "geo-v0.29" }
96-
geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs/", rev = "2cd0d623e4b9f1ac3bc5ff6563ccce689a47c641" }
100+
duckdb = { git = "https://github.com/gadomski/duckdb-rs/", branch = "arrow-v55" }

crates/core/Cargo.toml

+19-12
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,18 @@ rust-version.workspace = true
1313

1414
[features]
1515
geo = ["dep:geo"]
16-
geoarrow = ["dep:geoarrow", "dep:arrow", "dep:geo-types"]
17-
geoparquet = ["geoarrow", "geoarrow/parquet", "dep:parquet"]
18-
geoparquet-compression = [
19-
"geoparquet",
20-
"geoarrow/parquet_compression",
21-
"parquet/snap",
22-
"parquet/brotli",
23-
"parquet/flate2",
24-
"parquet/lz4",
25-
"parquet/zstd",
16+
geoarrow = [
17+
"dep:geoarrow-array",
18+
"dep:geoarrow-schema",
19+
"dep:arrow-array",
20+
"dep:arrow-cast",
21+
"dep:arrow-json",
22+
"dep:arrow-schema",
23+
"dep:geo-traits",
24+
"dep:geo-types",
2625
]
26+
geoparquet = ["geoarrow", "dep:geoarrow-geoparquet", "dep:parquet"]
27+
geoparquet-compression = ["geoparquet", "geoarrow-geoparquet/compression"]
2728
object-store = ["dep:object_store", "dep:tokio"]
2829
object-store-aws = ["object-store", "object_store/aws"]
2930
object-store-azure = ["object-store", "object_store/azure"]
@@ -39,13 +40,19 @@ reqwest = ["dep:reqwest"]
3940
validate = ["dep:jsonschema", "dep:fluent-uri", "reqwest"]
4041

4142
[dependencies]
42-
arrow = { workspace = true, optional = true }
43+
arrow-array = { workspace = true, optional = true, features = ["chrono-tz"] }
44+
arrow-cast = { workspace = true, optional = true }
45+
arrow-json = { workspace = true, optional = true }
46+
arrow-schema = { workspace = true, optional = true }
4347
bytes.workspace = true
4448
chrono = { workspace = true, features = ["serde"] }
4549
fluent-uri = { workspace = true, optional = true }
4650
geo = { workspace = true, optional = true }
51+
geo-traits = { workspace = true, optional = true }
4752
geo-types = { workspace = true, optional = true }
48-
geoarrow = { workspace = true, optional = true }
53+
geoarrow-array = { workspace = true, optional = true }
54+
geoarrow-geoparquet = { workspace = true, optional = true }
55+
geoarrow-schema = { workspace = true, optional = true }
4956
geojson.workspace = true
5057
indexmap.workspace = true
5158
jsonschema = { workspace = true, optional = true, features = ["resolve-http"] }

crates/core/src/error.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ use thiserror::Error;
55
#[derive(Error, Debug)]
66
#[non_exhaustive]
77
pub enum Error {
8-
/// [arrow::error::ArrowError]
8+
/// [arrow_schema::ArrowError]
99
#[error(transparent)]
1010
#[cfg(feature = "geoarrow")]
11-
Arrow(#[from] arrow::error::ArrowError),
11+
Arrow(#[from] arrow_schema::ArrowError),
1212

1313
/// [chrono::ParseError]
1414
#[error(transparent)]
@@ -34,10 +34,10 @@ pub enum Error {
3434
path: String,
3535
},
3636

37-
/// [geoarrow::error::GeoArrowError]
37+
/// [geoarrow_array::error::GeoArrowError]
3838
#[error(transparent)]
3939
#[cfg(feature = "geoarrow")]
40-
GeoArrow(#[from] geoarrow::error::GeoArrowError),
40+
GeoArrow(#[from] geoarrow_array::error::GeoArrowError),
4141

4242
/// [geojson::Error]
4343
#[error(transparent)]

crates/core/src/geoarrow/json.rs

+61-32
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,18 @@ const TOP_LEVEL_KEYS: [&str; 10] = [
3838
"collection",
3939
];
4040

41-
use arrow::array::{cast::*, types::*, *};
42-
use arrow::datatypes::*;
43-
use arrow::error::ArrowError;
44-
use arrow::json::JsonSerializable;
45-
use arrow::util::display::{ArrayFormatter, FormatOptions};
46-
use geoarrow::table::Table;
41+
use arrow_array::RecordBatchReader;
42+
use arrow_array::{cast::*, types::*, *};
43+
use arrow_cast::display::{ArrayFormatter, FormatOptions};
44+
use arrow_json::JsonSerializable;
45+
use arrow_schema::*;
46+
use geo_traits::to_geo::{
47+
ToGeoGeometry, ToGeoGeometryCollection, ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint,
48+
ToGeoMultiPolygon, ToGeoPoint, ToGeoPolygon, ToGeoRect,
49+
};
50+
use geoarrow_array::ArrayAccessor;
51+
use geoarrow_array::array::from_arrow_array;
52+
use geoarrow_array::cast::AsGeoArrowArray;
4753
use serde_json::{Value, json, map::Map as JsonMap};
4854
use std::iter;
4955

@@ -365,7 +371,7 @@ fn set_column_for_json_rows(
365371
})?;
366372
}
367373
DataType::Dictionary(_, value_type) => {
368-
let hydrated = arrow::compute::cast(&array, value_type)
374+
let hydrated = arrow_cast::cast(&array, value_type)
369375
.expect("cannot cast dictionary to underlying values");
370376
set_column_for_json_rows(rows, &hydrated, col_name, explicit_nulls)?;
371377
}
@@ -418,40 +424,63 @@ fn set_column_for_json_rows(
418424
Ok(())
419425
}
420426

421-
/// Converts a table to json rows.
422-
pub fn from_table(table: Table) -> Result<Vec<serde_json::Map<String, Value>>, crate::Error> {
423-
use geoarrow::{array::AsNativeArray, datatypes::NativeType::*, trait_::ArrayAccessor};
424-
use geojson::Value;
427+
/// Creates JSON values from a record batch reader.
428+
pub fn from_record_batch_reader<R: RecordBatchReader>(
429+
reader: R,
430+
) -> Result<Vec<serde_json::Map<String, Value>>, crate::Error> {
431+
use geoarrow_array::GeoArrowType;
425432

426-
let index = table
427-
.schema()
428-
.column_with_name("geometry")
429-
.map(|(index, _)| index);
430-
let mut json_rows = record_batches_to_json_rows(table.batches(), index)?;
433+
let schema = reader.schema();
434+
let geometry_index = schema.column_with_name("geometry").map(|(index, _)| index);
435+
436+
// For now we collect all batches into memory, but in the future we could iterate on the stream
437+
// directly.
438+
let batches = reader.collect::<Result<Vec<_>, _>>()?;
439+
let mut json_rows = record_batches_to_json_rows(&batches, geometry_index)?;
431440
let mut items = Vec::new();
432-
if let Some(index) = index {
433-
for chunk in table.geometry_column(Some(index))?.geometry_chunks() {
441+
if let Some(index) = geometry_index {
442+
let field = schema.field(index);
443+
for batch in &batches {
444+
let array = batch.column(index);
445+
let chunk = from_arrow_array(array, field)?;
434446
for i in 0..chunk.len() {
447+
use GeoArrowType::*;
435448
let value = match chunk.data_type() {
436-
Point(_, _) => Value::from(&chunk.as_ref().as_point().value_as_geo(i)),
437-
LineString(_, _) => {
438-
Value::from(&chunk.as_ref().as_line_string().value_as_geo(i))
449+
Point(_) => geojson::Value::from(&chunk.as_point().value(i)?.to_point()),
450+
LineString(_) => {
451+
geojson::Value::from(&chunk.as_line_string().value(i)?.to_line_string())
452+
}
453+
Polygon(_) => geojson::Value::from(&chunk.as_polygon().value(i)?.to_polygon()),
454+
MultiPoint(_) => {
455+
geojson::Value::from(&chunk.as_multi_point().value(i)?.to_multi_point())
439456
}
440-
Polygon(_, _) => Value::from(&chunk.as_ref().as_polygon().value_as_geo(i)),
441-
MultiPoint(_, _) => {
442-
Value::from(&chunk.as_ref().as_multi_point().value_as_geo(i))
457+
MultiLineString(_) => geojson::Value::from(
458+
&chunk
459+
.as_multi_line_string()
460+
.value(i)?
461+
.to_multi_line_string(),
462+
),
463+
MultiPolygon(_) => {
464+
geojson::Value::from(&chunk.as_multi_polygon().value(i)?.to_multi_polygon())
443465
}
444-
MultiLineString(_, _) => {
445-
Value::from(&chunk.as_ref().as_multi_line_string().value_as_geo(i))
466+
Geometry(_) => {
467+
geojson::Value::from(&chunk.as_geometry().value(i)?.to_geometry())
446468
}
447-
MultiPolygon(_, _) => {
448-
Value::from(&chunk.as_ref().as_multi_polygon().value_as_geo(i))
469+
GeometryCollection(_) => geojson::Value::from(
470+
&chunk
471+
.as_geometry_collection()
472+
.value(i)?
473+
.to_geometry_collection(),
474+
),
475+
Rect(_) => geojson::Value::from(&chunk.as_rect().value(i)?.to_rect()),
476+
Wkb(_) => geojson::Value::from(&chunk.as_wkb::<i32>().value(i)?.to_geometry()),
477+
LargeWkb(_) => {
478+
geojson::Value::from(&chunk.as_wkb::<i64>().value(i)?.to_geometry())
449479
}
450-
Geometry(_) => Value::from(&chunk.as_ref().as_geometry().value_as_geo(i)),
451-
GeometryCollection(_, _) => {
452-
Value::from(&chunk.as_ref().as_geometry_collection().value_as_geo(i))
480+
Wkt(_) => geojson::Value::from(&chunk.as_wkt::<i32>().value(i)?.to_geometry()),
481+
LargeWkt(_) => {
482+
geojson::Value::from(&chunk.as_wkt::<i64>().value(i)?.to_geometry())
453483
}
454-
Rect(_) => Value::from(&chunk.as_ref().as_rect().value_as_geo(i)),
455484
};
456485
let mut row = json_rows
457486
.next()

0 commit comments

Comments
 (0)