1
1
"""Test pandas engine."""
2
2
3
- from datetime import date
4
- from typing import Any , Set
3
+ import datetime as dt
4
+ from typing import Tuple , List , Optional , Any , Set
5
+ from zoneinfo import ZoneInfo
5
6
6
7
import hypothesis
7
8
import hypothesis .extra .pandas as pd_st
13
14
import pytz
14
15
from hypothesis import given
15
16
17
+ from pandera import Field , DataFrameModel
16
18
from pandera .engines import pandas_engine
17
- from pandera .errors import ParserError
19
+ from pandera .errors import ParserError , SchemaError
18
20
19
21
UNSUPPORTED_DTYPE_CLS : Set [Any ] = set ()
20
22
@@ -202,6 +204,109 @@ def test_pandas_datetimetz_dtype(timezone_aware, data, timezone):
202
204
assert coerced_data .dt .tz == timezone
203
205
204
206
207
+ def generate_test_cases_timezone_flexible () -> List [
208
+ Tuple [List [dt .datetime ], Optional [dt .tzinfo ], bool , List [dt .datetime ], bool ]
209
+ ]:
210
+ """
211
+ Generate test parameter combinations for a given list of datetime lists.
212
+
213
+ Returns:
214
+ List of tuples:
215
+ - List of input datetimes
216
+ - tz for DateTime constructor
217
+ - coerce flag for Field constructor
218
+ - expected output datetimes
219
+ - raises flag (True if an exception is expected, False otherwise)
220
+ """
221
+ datetimes = [
222
+ # multi tz and tz naive
223
+ [
224
+ dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ('America/New_York' )),
225
+ dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ('America/Los_Angeles' )),
226
+ dt .datetime (2023 , 3 , 1 , 5 )
227
+ ],
228
+ # multiz tz
229
+ [
230
+ dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ('America/New_York' )),
231
+ dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ('America/Los_Angeles' ))
232
+ ],
233
+ # tz naive
234
+ [
235
+ dt .datetime (2023 , 3 , 1 , 4 ),
236
+ dt .datetime (2023 , 3 , 1 , 5 )
237
+ ],
238
+ # single tz
239
+ [
240
+ dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ('America/New_York' )),
241
+ dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ('America/New_York' ))
242
+ ]
243
+ ]
244
+
245
+ test_cases = []
246
+
247
+ for datetime_list in datetimes :
248
+ for coerce in [True , False ]:
249
+ for tz in [None , ZoneInfo ("America/Chicago" ), dt .timezone (dt .timedelta (hours = 2 ))]:
250
+ # Determine if the test should raise an exception
251
+ has_naive_datetime = any ([dt .tzinfo is None for dt in datetime_list ])
252
+ raises = has_naive_datetime and not coerce
253
+
254
+ # Generate expected output
255
+ if raises :
256
+ expected_output = None # No expected output since an exception will be raised
257
+ else :
258
+ if coerce :
259
+ # localize / convert the input datetimes to the specified tz or 'UTC' (default)
260
+ use_tz = tz if tz else ZoneInfo ("UTC" )
261
+ expected_output_naive = [
262
+ dt .replace (tzinfo = use_tz ) for dt in datetime_list if dt .tzinfo is None
263
+ ]
264
+ expected_output_aware = [
265
+ dt .astimezone (use_tz ) for dt in datetime_list if dt .tzinfo is not None
266
+ ]
267
+ expected_output = expected_output_naive + expected_output_aware
268
+ else :
269
+ # ignore tz
270
+ expected_output = datetime_list
271
+
272
+ test_case = (datetime_list , tz , coerce , expected_output , raises )
273
+ test_cases .append (test_case )
274
+
275
+ # define final test cases with improper type
276
+ datetime_list = [dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ('America/New_York' )), "hello world" ]
277
+ tz = None
278
+ expected_output = None
279
+ raises = True
280
+
281
+ bad_type_coerce = (datetime_list , tz , True , expected_output , raises )
282
+ bad_type_no_coerce = (datetime_list , tz , False , expected_output , raises )
283
+ test_cases .extend ([bad_type_coerce , bad_type_no_coerce ])
284
+
285
+ return test_cases
286
+
287
+
288
+ @pytest .mark .parametrize (
289
+ "examples, tz, coerce, expected_output, raises" ,
290
+ generate_test_cases_timezone_flexible ()
291
+ )
292
+ def test_dt_timezone_flexible (examples , tz , coerce , expected_output , raises ):
293
+ """Test that timezone_flexible works as expected"""
294
+
295
+ # Testing using a pandera DataFrameModel rather than directly calling dtype coerce or validate because with
296
+ # timezone_flexible, dtype is set dynamically based on the input data
297
+ class SimpleSchema (DataFrameModel ):
298
+ datetime_column : pandas_engine .DateTime (timezone_flexible = True , tz = tz ) = Field (coerce = coerce )
299
+
300
+ data = pd .DataFrame ({'datetime_column' : examples })
301
+
302
+ if raises :
303
+ with pytest .raises (SchemaError ):
304
+ SimpleSchema .validate (data )
305
+ else :
306
+ validated_df = SimpleSchema .validate (data )
307
+ assert sorted (validated_df ['datetime_column' ].tolist ()) == sorted (expected_output )
308
+
309
+
205
310
@hypothesis .settings (max_examples = 1000 )
206
311
@pytest .mark .parametrize ("to_df" , [True , False ])
207
312
@given (
@@ -225,7 +330,7 @@ def test_pandas_date_coerce_dtype(to_df, data):
225
330
)
226
331
227
332
assert (
228
- coerced_data .applymap (lambda x : isinstance (x , date ))
333
+ coerced_data .applymap (lambda x : isinstance (x , dt . date ))
229
334
| coerced_data .isna ()
230
335
).all (axis = None )
231
336
return
@@ -234,7 +339,7 @@ def test_pandas_date_coerce_dtype(to_df, data):
234
339
coerced_data .isna ().all () and coerced_data .dtype == "datetime64[ns]"
235
340
)
236
341
assert (
237
- coerced_data .map (lambda x : isinstance (x , date )) | coerced_data .isna ()
342
+ coerced_data .map (lambda x : isinstance (x , dt . date )) | coerced_data .isna ()
238
343
).all ()
239
344
240
345
@@ -246,8 +351,8 @@ def test_pandas_date_coerce_dtype(to_df, data):
246
351
pyarrow .struct ([("foo" , pyarrow .int64 ()), ("bar" , pyarrow .string ())]),
247
352
),
248
353
(pd .Series ([None , pd .NA , np .nan ]), pyarrow .null ),
249
- (pd .Series ([None , date (1970 , 1 , 1 )]), pyarrow .date32 ),
250
- (pd .Series ([None , date (1970 , 1 , 1 )]), pyarrow .date64 ),
354
+ (pd .Series ([None , dt . date (1970 , 1 , 1 )]), pyarrow .date32 ),
355
+ (pd .Series ([None , dt . date (1970 , 1 , 1 )]), pyarrow .date64 ),
251
356
(pd .Series ([1 , 2 ]), pyarrow .duration ("ns" )),
252
357
(pd .Series ([1 , 1e3 , 1e6 , 1e9 , None ]), pyarrow .time32 ("ms" )),
253
358
(pd .Series ([1 , 1e3 , 1e6 , 1e9 , None ]), pyarrow .time64 ("ns" )),
@@ -292,8 +397,8 @@ def test_pandas_arrow_dtype(data, dtype):
292
397
pyarrow .struct ([("foo" , pyarrow .string ()), ("bar" , pyarrow .int64 ())]),
293
398
),
294
399
(pd .Series (["a" , "1" ]), pyarrow .null ),
295
- (pd .Series (["a" , date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date32 ),
296
- (pd .Series (["a" , date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date64 ),
400
+ (pd .Series (["a" , dt . date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date32 ),
401
+ (pd .Series (["a" , dt . date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date64 ),
297
402
(pd .Series (["a" ]), pyarrow .duration ("ns" )),
298
403
(pd .Series (["a" , "b" ]), pyarrow .time32 ("ms" )),
299
404
(pd .Series (["a" , "b" ]), pyarrow .time64 ("ns" )),
0 commit comments