@@ -1139,114 +1139,3 @@ async fn test_predicate_filter_on_go_parquet_file_without_time_captured() {
1139
1139
let formatted = pretty_format_batches ( & rows) . unwrap ( ) . to_string ( ) ;
1140
1140
assert_eq ! ( formatted, expected. join( "\n " ) ) ;
1141
1141
}
1142
-
1143
- #[ tokio:: test]
1144
- async fn test_predicate_filter_on_custom_parquet_file_with_tz ( ) {
1145
- use std:: sync:: Arc ;
1146
- use tempfile:: NamedTempFile ;
1147
-
1148
- use arrow:: {
1149
- array:: {
1150
- BooleanArray , Int32Array , StringArray , TimestampMillisecondArray ,
1151
- TimestampMillisecondBuilder ,
1152
- } ,
1153
- datatypes:: { DataType , Field , Schema , TimeUnit } ,
1154
- record_batch:: RecordBatch ,
1155
- } ;
1156
- use datafusion:: arrow:: util:: pretty:: pretty_format_batches;
1157
- use datafusion:: prelude:: { ParquetReadOptions , SessionContext } ;
1158
- use parquet:: { arrow:: ArrowWriter , file:: properties:: WriterProperties } ;
1159
-
1160
- // Create schema with a time zone in the Timestamp field
1161
- let schema = Arc :: new ( Schema :: new ( vec ! [
1162
- Field :: new( "city" , DataType :: Utf8 , false ) ,
1163
- Field :: new( "country" , DataType :: Utf8 , false ) ,
1164
- Field :: new( "age" , DataType :: Int32 , false ) ,
1165
- Field :: new( "scale" , DataType :: Int32 , false ) ,
1166
- Field :: new( "status" , DataType :: Int32 , false ) ,
1167
- Field :: new( "checked" , DataType :: Boolean , false ) ,
1168
- Field :: new(
1169
- "time_captured" ,
1170
- DataType :: Timestamp ( TimeUnit :: Millisecond , Some ( Arc :: from( "Etc/GMT-8" ) ) ) , // <--- note Some("Etc/GMT-8")
1171
- true ,
1172
- ) ,
1173
- ] ) ) ;
1174
-
1175
- // Build arrays for other fields
1176
- let city = StringArray :: from ( vec ! [ "Athens" , "Madrid" ] ) ;
1177
- let country = StringArray :: from ( vec ! [ "Greece" , "Spain" ] ) ;
1178
- let age = Int32Array :: from ( vec ! [ 32 , 10 ] ) ;
1179
- let scale = Int32Array :: from ( vec ! [ 1 , -1 ] ) ;
1180
- let status = Int32Array :: from ( vec ! [ 20 , 12 ] ) ;
1181
- let checked = BooleanArray :: from ( vec ! [ true , false ] ) ;
1182
-
1183
- // (A) Build a normal "no time zone" timestamp array
1184
- let mut ts_builder = TimestampMillisecondBuilder :: new ( ) ;
1185
- ts_builder. append_value ( 1737740040715 ) ; // 2025-01-24T17:34:00.715Z
1186
- ts_builder. append_value ( 1737736440715 ) ; // 2025-01-24T16:34:00.715Z
1187
- let array_no_tz = ts_builder. finish ( ) ;
1188
-
1189
- // (B) Override array's data type to "Timestamp(Millisecond, Some(Etc/GMT-8))"
1190
- let array_data = array_no_tz. into_data ( ) ;
1191
- let new_array_data = array_data
1192
- . into_builder ( )
1193
- . data_type ( DataType :: Timestamp (
1194
- TimeUnit :: Millisecond ,
1195
- Some ( Arc :: from ( "Etc/GMT-8" ) ) ,
1196
- ) )
1197
- . build ( )
1198
- . unwrap ( ) ;
1199
- let time_captured = TimestampMillisecondArray :: from ( new_array_data) ;
1200
-
1201
- // Create a RecordBatch that matches the schema
1202
- let batch = RecordBatch :: try_new (
1203
- schema. clone ( ) ,
1204
- vec ! [
1205
- Arc :: new( city) ,
1206
- Arc :: new( country) ,
1207
- Arc :: new( age) ,
1208
- Arc :: new( scale) ,
1209
- Arc :: new( status) ,
1210
- Arc :: new( checked) ,
1211
- Arc :: new( time_captured) ,
1212
- ] ,
1213
- )
1214
- . unwrap ( ) ;
1215
-
1216
- // Write data to Parquet file
1217
- let file = NamedTempFile :: new ( ) . unwrap ( ) ;
1218
- let props = WriterProperties :: builder ( ) . build ( ) ;
1219
- let mut writer = ArrowWriter :: try_new ( file. as_file ( ) , schema, Some ( props) ) . unwrap ( ) ;
1220
- writer. write ( & batch) . unwrap ( ) ;
1221
- writer. close ( ) . unwrap ( ) ;
1222
-
1223
- std:: fs:: copy ( file. path ( ) , "tests/data/custom-testfile.parquet" ) . unwrap ( ) ;
1224
- let parquet_path = "tests/data/custom-testfile.parquet" ;
1225
-
1226
- // Query with DataFusion
1227
- let ctx = SessionContext :: new ( ) ;
1228
- ctx. register_parquet (
1229
- "custom_parquet" ,
1230
- parquet_path,
1231
- ParquetReadOptions :: default ( ) ,
1232
- )
1233
- . await
1234
- . expect ( "Failed to register Parquet file" ) ;
1235
-
1236
- let df = ctx
1237
- . sql ( "SELECT city, age, time_captured FROM custom_parquet WHERE age > 10" )
1238
- . await
1239
- . unwrap ( ) ;
1240
-
1241
- let rows = df. collect ( ) . await . expect ( "Error collecting rows" ) ;
1242
-
1243
- let expected = vec ! [
1244
- "+--------+-----+-------------------------------+" ,
1245
- "| city | age | time_captured |" ,
1246
- "+--------+-----+-------------------------------+" ,
1247
- "| Athens | 32 | 2025-01-25T01:34:00.715+08:00 |" ,
1248
- "+--------+-----+-------------------------------+" ,
1249
- ] ;
1250
- let formatted = pretty_format_batches ( & rows) . unwrap ( ) . to_string ( ) ;
1251
- assert_eq ! ( formatted, expected. join( "\n " ) ) ;
1252
- }
0 commit comments