1
1
"""Test pandas engine."""
2
2
3
- from datetime import date
4
- from typing import Any , Set
3
+ import datetime as dt
4
+ from typing import Tuple , List , Optional , Any , Set
5
5
6
6
import hypothesis
7
7
import hypothesis .extra .pandas as pd_st
13
13
import pytz
14
14
from hypothesis import given
15
15
16
+ from pandera import Field , DataFrameModel , errors
16
17
from pandera .engines import pandas_engine
17
- from pandera .errors import ParserError
18
+ from pandera .errors import ParserError , SchemaError
18
19
19
20
UNSUPPORTED_DTYPE_CLS : Set [Any ] = set ()
20
21
@@ -202,6 +203,165 @@ def test_pandas_datetimetz_dtype(timezone_aware, data, timezone):
202
203
assert coerced_data .dt .tz == timezone
203
204
204
205
206
+ def generate_test_cases_time_zone_agnostic () -> List [
207
+ Tuple [
208
+ List [dt .datetime ],
209
+ Optional [dt .tzinfo ],
210
+ bool ,
211
+ List [dt .datetime ],
212
+ bool ,
213
+ ]
214
+ ]:
215
+ """
216
+ Generate test parameter combinations for a given list of datetime lists.
217
+
218
+ Returns:
219
+ List of tuples:
220
+ - List of input datetimes
221
+ - tz for DateTime constructor
222
+ - coerce flag for Field constructor
223
+ - expected output datetimes
224
+ - raises flag (True if an exception is expected, False otherwise)
225
+ """
226
+ datetimes = [
227
+ # multi tz and tz naive
228
+ [
229
+ pytz .timezone ("America/New_York" ).localize (
230
+ dt .datetime (2023 , 3 , 1 , 4 )
231
+ ),
232
+ pytz .timezone ("America/Los_Angeles" ).localize (
233
+ dt .datetime (2023 , 3 , 1 , 5 )
234
+ ),
235
+ dt .datetime (2023 , 3 , 1 , 5 ), # naive datetime
236
+ ],
237
+ # multi tz
238
+ [
239
+ pytz .timezone ("America/New_York" ).localize (
240
+ dt .datetime (2023 , 3 , 1 , 4 )
241
+ ),
242
+ pytz .timezone ("America/Los_Angeles" ).localize (
243
+ dt .datetime (2023 , 3 , 1 , 5 )
244
+ ),
245
+ ],
246
+ # tz naive
247
+ [dt .datetime (2023 , 3 , 1 , 4 ), dt .datetime (2023 , 3 , 1 , 5 )],
248
+ # single tz
249
+ [
250
+ pytz .timezone ("America/New_York" ).localize (
251
+ dt .datetime (2023 , 3 , 1 , 4 )
252
+ ),
253
+ pytz .timezone ("America/New_York" ).localize (
254
+ dt .datetime (2023 , 3 , 1 , 5 )
255
+ ),
256
+ ],
257
+ ]
258
+
259
+ test_cases = []
260
+
261
+ for datetime_list in datetimes :
262
+ for coerce in [True , False ]:
263
+ for tz in [
264
+ None ,
265
+ pytz .timezone ("America/Chicago" ),
266
+ pytz .FixedOffset (120 ), # 120 minutes = 2 hours offset
267
+ ]:
268
+ # Determine if the test should raise an exception
269
+ # Should raise error when:
270
+ # * coerce is False but there is a timezone-naive datetime
271
+ # * coerce is True but tz is not set
272
+ has_naive_datetime = any (
273
+ dt .tzinfo is None for dt in datetime_list
274
+ )
275
+ raises = (not coerce and has_naive_datetime ) or (
276
+ coerce and tz is None
277
+ )
278
+
279
+ # Generate expected output
280
+ if raises :
281
+ expected_output = None # No expected output since an exception will be raised
282
+ else :
283
+ if coerce :
284
+ # Replace naive datetimes with localized ones
285
+ expected_output_naive = [
286
+ tz .localize (dtime ) if tz is not None else dtime
287
+ for dtime in datetime_list
288
+ if dtime .tzinfo is None
289
+ ]
290
+
291
+ # Convert timezone-aware datetimes to the desired timezone
292
+ expected_output_aware = [
293
+ dtime .astimezone (
294
+ tz
295
+ ) # Use .astimezone() for aware datetimes
296
+ for dtime in datetime_list
297
+ if dtime .tzinfo is not None
298
+ ]
299
+ expected_output = (
300
+ expected_output_naive + expected_output_aware
301
+ )
302
+ else :
303
+ # ignore tz
304
+ expected_output = datetime_list
305
+
306
+ test_case = (
307
+ datetime_list ,
308
+ tz ,
309
+ coerce ,
310
+ expected_output ,
311
+ raises ,
312
+ )
313
+ test_cases .append (test_case )
314
+
315
+ # define final test cases with improper type
316
+ datetime_list = [
317
+ pytz .timezone ("America/New_York" ).localize (
318
+ dt .datetime (
319
+ 2023 ,
320
+ 3 ,
321
+ 1 ,
322
+ 4 ,
323
+ )
324
+ ),
325
+ "hello world" ,
326
+ ]
327
+ tz = None
328
+ expected_output = None
329
+ raises = True
330
+
331
+ bad_type_coerce = (datetime_list , tz , True , expected_output , raises )
332
+ bad_type_no_coerce = (datetime_list , tz , False , expected_output , raises )
333
+ test_cases .extend ([bad_type_coerce , bad_type_no_coerce ]) # type: ignore
334
+
335
+ return test_cases # type: ignore
336
+
337
+
338
+ @pytest .mark .parametrize (
339
+ "examples, tz, coerce, expected_output, raises" ,
340
+ generate_test_cases_time_zone_agnostic (),
341
+ )
342
+ def test_dt_time_zone_agnostic (examples , tz , coerce , expected_output , raises ):
343
+ """Test that time_zone_agnostic works as expected"""
344
+
345
+ # Testing using a pandera DataFrameModel rather than directly calling dtype coerce or validate because with
346
+ # time_zone_agnostic, dtype is set dynamically based on the input data
347
+ class SimpleSchema (DataFrameModel ):
348
+ # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
349
+ datetime_column : pandas_engine .DateTime (
350
+ time_zone_agnostic = True , tz = tz
351
+ ) = Field (coerce = coerce )
352
+
353
+ data = pd .DataFrame ({"datetime_column" : examples })
354
+
355
+ if raises :
356
+ with pytest .raises ((SchemaError , errors .ParserError )):
357
+ SimpleSchema .validate (data )
358
+ else :
359
+ validated_df = SimpleSchema .validate (data )
360
+ assert sorted (validated_df ["datetime_column" ].tolist ()) == sorted (
361
+ expected_output
362
+ )
363
+
364
+
205
365
@hypothesis .settings (max_examples = 1000 )
206
366
@pytest .mark .parametrize ("to_df" , [True , False ])
207
367
@given (
@@ -225,7 +385,7 @@ def test_pandas_date_coerce_dtype(to_df, data):
225
385
)
226
386
227
387
assert (
228
- coerced_data .applymap (lambda x : isinstance (x , date ))
388
+ coerced_data .applymap (lambda x : isinstance (x , dt . date ))
229
389
| coerced_data .isna ()
230
390
).all (axis = None )
231
391
return
@@ -234,7 +394,8 @@ def test_pandas_date_coerce_dtype(to_df, data):
234
394
coerced_data .isna ().all () and coerced_data .dtype == "datetime64[ns]"
235
395
)
236
396
assert (
237
- coerced_data .map (lambda x : isinstance (x , date )) | coerced_data .isna ()
397
+ coerced_data .map (lambda x : isinstance (x , dt .date ))
398
+ | coerced_data .isna ()
238
399
).all ()
239
400
240
401
@@ -246,8 +407,8 @@ def test_pandas_date_coerce_dtype(to_df, data):
246
407
pyarrow .struct ([("foo" , pyarrow .int64 ()), ("bar" , pyarrow .string ())]),
247
408
),
248
409
(pd .Series ([None , pd .NA , np .nan ]), pyarrow .null ),
249
- (pd .Series ([None , date (1970 , 1 , 1 )]), pyarrow .date32 ),
250
- (pd .Series ([None , date (1970 , 1 , 1 )]), pyarrow .date64 ),
410
+ (pd .Series ([None , dt . date (1970 , 1 , 1 )]), pyarrow .date32 ),
411
+ (pd .Series ([None , dt . date (1970 , 1 , 1 )]), pyarrow .date64 ),
251
412
(pd .Series ([1 , 2 ]), pyarrow .duration ("ns" )),
252
413
(pd .Series ([1 , 1e3 , 1e6 , 1e9 , None ]), pyarrow .time32 ("ms" )),
253
414
(pd .Series ([1 , 1e3 , 1e6 , 1e9 , None ]), pyarrow .time64 ("ns" )),
@@ -292,8 +453,8 @@ def test_pandas_arrow_dtype(data, dtype):
292
453
pyarrow .struct ([("foo" , pyarrow .string ()), ("bar" , pyarrow .int64 ())]),
293
454
),
294
455
(pd .Series (["a" , "1" ]), pyarrow .null ),
295
- (pd .Series (["a" , date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date32 ),
296
- (pd .Series (["a" , date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date64 ),
456
+ (pd .Series (["a" , dt . date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date32 ),
457
+ (pd .Series (["a" , dt . date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date64 ),
297
458
(pd .Series (["a" ]), pyarrow .duration ("ns" )),
298
459
(pd .Series (["a" , "b" ]), pyarrow .time32 ("ms" )),
299
460
(pd .Series (["a" , "b" ]), pyarrow .time64 ("ns" )),
0 commit comments