Skip to content

Commit 3b74b1f

Browse files
committed
remove test with custom created parquet file
1 parent c08f364 commit 3b74b1f

File tree

1 file changed

+0
-111
lines changed
  • datafusion/core/tests/parquet

1 file changed

+0
-111
lines changed

datafusion/core/tests/parquet/mod.rs

Lines changed: 0 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,114 +1139,3 @@ async fn test_predicate_filter_on_go_parquet_file_without_time_captured() {
11391139
let formatted = pretty_format_batches(&rows).unwrap().to_string();
11401140
assert_eq!(formatted, expected.join("\n"));
11411141
}
1142-
1143-
#[tokio::test]
1144-
async fn test_predicate_filter_on_custom_parquet_file_with_tz() {
1145-
use std::sync::Arc;
1146-
use tempfile::NamedTempFile;
1147-
1148-
use arrow::{
1149-
array::{
1150-
BooleanArray, Int32Array, StringArray, TimestampMillisecondArray,
1151-
TimestampMillisecondBuilder,
1152-
},
1153-
datatypes::{DataType, Field, Schema, TimeUnit},
1154-
record_batch::RecordBatch,
1155-
};
1156-
use datafusion::arrow::util::pretty::pretty_format_batches;
1157-
use datafusion::prelude::{ParquetReadOptions, SessionContext};
1158-
use parquet::{arrow::ArrowWriter, file::properties::WriterProperties};
1159-
1160-
// Create schema with a time zone in the Timestamp field
1161-
let schema = Arc::new(Schema::new(vec![
1162-
Field::new("city", DataType::Utf8, false),
1163-
Field::new("country", DataType::Utf8, false),
1164-
Field::new("age", DataType::Int32, false),
1165-
Field::new("scale", DataType::Int32, false),
1166-
Field::new("status", DataType::Int32, false),
1167-
Field::new("checked", DataType::Boolean, false),
1168-
Field::new(
1169-
"time_captured",
1170-
DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("Etc/GMT-8"))), // <--- note Some("Etc/GMT-8")
1171-
true,
1172-
),
1173-
]));
1174-
1175-
// Build arrays for other fields
1176-
let city = StringArray::from(vec!["Athens", "Madrid"]);
1177-
let country = StringArray::from(vec!["Greece", "Spain"]);
1178-
let age = Int32Array::from(vec![32, 10]);
1179-
let scale = Int32Array::from(vec![1, -1]);
1180-
let status = Int32Array::from(vec![20, 12]);
1181-
let checked = BooleanArray::from(vec![true, false]);
1182-
1183-
// (A) Build a normal "no time zone" timestamp array
1184-
let mut ts_builder = TimestampMillisecondBuilder::new();
1185-
ts_builder.append_value(1737740040715); // 2025-01-24T17:34:00.715Z
1186-
ts_builder.append_value(1737736440715); // 2025-01-24T16:34:00.715Z
1187-
let array_no_tz = ts_builder.finish();
1188-
1189-
// (B) Override array's data type to "Timestamp(Millisecond, Some(Etc/GMT-8))"
1190-
let array_data = array_no_tz.into_data();
1191-
let new_array_data = array_data
1192-
.into_builder()
1193-
.data_type(DataType::Timestamp(
1194-
TimeUnit::Millisecond,
1195-
Some(Arc::from("Etc/GMT-8")),
1196-
))
1197-
.build()
1198-
.unwrap();
1199-
let time_captured = TimestampMillisecondArray::from(new_array_data);
1200-
1201-
// Create a RecordBatch that matches the schema
1202-
let batch = RecordBatch::try_new(
1203-
schema.clone(),
1204-
vec![
1205-
Arc::new(city),
1206-
Arc::new(country),
1207-
Arc::new(age),
1208-
Arc::new(scale),
1209-
Arc::new(status),
1210-
Arc::new(checked),
1211-
Arc::new(time_captured),
1212-
],
1213-
)
1214-
.unwrap();
1215-
1216-
// Write data to Parquet file
1217-
let file = NamedTempFile::new().unwrap();
1218-
let props = WriterProperties::builder().build();
1219-
let mut writer = ArrowWriter::try_new(file.as_file(), schema, Some(props)).unwrap();
1220-
writer.write(&batch).unwrap();
1221-
writer.close().unwrap();
1222-
1223-
std::fs::copy(file.path(), "tests/data/custom-testfile.parquet").unwrap();
1224-
let parquet_path = "tests/data/custom-testfile.parquet";
1225-
1226-
// Query with DataFusion
1227-
let ctx = SessionContext::new();
1228-
ctx.register_parquet(
1229-
"custom_parquet",
1230-
parquet_path,
1231-
ParquetReadOptions::default(),
1232-
)
1233-
.await
1234-
.expect("Failed to register Parquet file");
1235-
1236-
let df = ctx
1237-
.sql("SELECT city, age, time_captured FROM custom_parquet WHERE age > 10")
1238-
.await
1239-
.unwrap();
1240-
1241-
let rows = df.collect().await.expect("Error collecting rows");
1242-
1243-
let expected = vec![
1244-
"+--------+-----+-------------------------------+",
1245-
"| city | age | time_captured |",
1246-
"+--------+-----+-------------------------------+",
1247-
"| Athens | 32 | 2025-01-25T01:34:00.715+08:00 |",
1248-
"+--------+-----+-------------------------------+",
1249-
];
1250-
let formatted = pretty_format_batches(&rows).unwrap().to_string();
1251-
assert_eq!(formatted, expected.join("\n"));
1252-
}

0 commit comments

Comments
 (0)