|
| 1 | +use std::sync::Arc; |
| 2 | + |
| 3 | +use arrow::{ |
| 4 | + array::{ArrayRef, RecordBatch, StringArray, TimestampNanosecondArray}, |
| 5 | + datatypes::{DataType, Field, Schema, TimeUnit}, |
| 6 | +}; |
| 7 | + |
| 8 | +use crate::manifest_registry::v1alpha1::CreatePartitionManifestsResponse; |
| 9 | + |
| 10 | +// --- CreatePartitionManifestsResponse --- |
| 11 | + |
| 12 | +impl CreatePartitionManifestsResponse { |
| 13 | + pub const FIELD_ID: &str = "id"; |
| 14 | + pub const FIELD_UPDATED_AT: &str = "updated_at"; |
| 15 | + pub const FIELD_URL: &str = "url"; |
| 16 | + pub const FIELD_ERROR: &str = "error"; |
| 17 | + |
| 18 | + /// The Arrow schema of the dataframe in [`Self::data`]. |
| 19 | + pub fn schema() -> Schema { |
| 20 | + Schema::new(vec![ |
| 21 | + Field::new(Self::FIELD_ID, DataType::Utf8, false), |
| 22 | + Field::new( |
| 23 | + Self::FIELD_UPDATED_AT, |
| 24 | + DataType::Timestamp(TimeUnit::Nanosecond, None), |
| 25 | + true, |
| 26 | + ), |
| 27 | + Field::new(Self::FIELD_URL, DataType::Utf8, true), |
| 28 | + Field::new(Self::FIELD_ERROR, DataType::Utf8, true), |
| 29 | + ]) |
| 30 | + } |
| 31 | + |
| 32 | + /// Helper to simplify instantiation of the dataframe in [`Self::data`]. |
| 33 | + pub fn create_dataframe( |
| 34 | + partition_ids: Vec<String>, |
| 35 | + updated_ats: Vec<Option<jiff::Timestamp>>, |
| 36 | + partition_manifest_urls: Vec<Option<String>>, |
| 37 | + errors: Vec<Option<String>>, |
| 38 | + ) -> arrow::error::Result<RecordBatch> { |
| 39 | + let updated_ats = updated_ats |
| 40 | + .into_iter() |
| 41 | + .map(|ts| ts.map(|ts| ts.as_nanosecond() as i64)) // ~300 years should be fine |
| 42 | + .collect::<Vec<_>>(); |
| 43 | + |
| 44 | + let schema = Arc::new(Self::schema()); |
| 45 | + let columns: Vec<ArrayRef> = vec![ |
| 46 | + Arc::new(StringArray::from(partition_ids)), |
| 47 | + Arc::new(TimestampNanosecondArray::from(updated_ats)), |
| 48 | + Arc::new(StringArray::from(partition_manifest_urls)), |
| 49 | + Arc::new(StringArray::from(errors)), |
| 50 | + ]; |
| 51 | + |
| 52 | + RecordBatch::try_new(schema, columns) |
| 53 | + } |
| 54 | +} |
| 55 | + |
| 56 | +// TODO(#9430): I'd love if I could do this, but this creates a nasty circular dep with `re_log_encoding`. |
| 57 | +#[cfg(all(unix, windows))] // always statically false |
| 58 | +impl TryFrom<RecordBatch> for CreatePartitionManifestsResponse { |
| 59 | + type Error = tonic::Status; |
| 60 | + |
| 61 | + fn try_from(batch: RecordBatch) -> Result<Self, Self::Error> { |
| 62 | + if !Self::schema().contains(batch.schema()) { |
| 63 | + let typ = std::any::type_name::<Self>(); |
| 64 | + return Err(tonic::Status::internal(format!( |
| 65 | + "invalid schema for {typ}: expected {:?} but got {:?}", |
| 66 | + Self::schema(), |
| 67 | + batch.schema(), |
| 68 | + ))); |
| 69 | + } |
| 70 | + |
| 71 | + use re_log_encoding::codec::wire::encoder::Encode as _; |
| 72 | + batch |
| 73 | + .encode() |
| 74 | + .map(|data| Self { data: Some(data) }) |
| 75 | + .map_err(|err| tonic::Status::internal(format!("failed to encode chunk: {err}")))?; |
| 76 | + } |
| 77 | +} |
| 78 | + |
| 79 | +// TODO(#9430): the other way around would be nice too, but same problem. |
0 commit comments