1
1
"""Test pandas engine."""
2
2
3
+ < << << << HEAD
3
4
< << << << HEAD
4
5
import datetime as dt
5
6
from typing import Tuple , List , Optional
8
9
from datetime import date
9
10
from typing import Any , Set
10
11
> >> >> >> add pandas pyarrow backend support (#1628)
12
+ == == == =
13
+ import datetime as dt
14
+ from typing import Any , Set , Tuple , List , Optional
15
+ from zoneinfo import ZoneInfo
16
+ > >> >> >> Enhancement : Add support for timezone - flexible DateTime (#1352)
11
17
12
18
import hypothesis
13
19
import hypothesis .extra .pandas as pd_st
@@ -210,7 +216,13 @@ def test_pandas_datetimetz_dtype(timezone_aware, data, timezone):
210
216
211
217
212
218
def generate_test_cases_timezone_flexible () -> List [
219
+ << << << < HEAD
213
220
Tuple [List [dt .datetime ], Optional [dt .tzinfo ], bool , List [dt .datetime ], bool ]
221
+ == == == =
222
+ Tuple [
223
+ List [dt .datetime ], Optional [dt .tzinfo ], bool , List [dt .datetime ], bool
224
+ ]
225
+ >> >> >> > Enhancement : Add support for timezone - flexible DateTime (#1352)
214
226
]:
215
227
"""
216
228
Generate test parameter combinations for a given list of datetime lists.
@@ -226,6 +238,7 @@ def generate_test_cases_timezone_flexible() -> List[
226
238
datetimes = [
227
239
# multi tz and tz naive
228
240
[
241
+ < << << << HEAD
229
242
dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ('America/New_York' )),
230
243
dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ('America/Los_Angeles' )),
231
244
dt .datetime (2023 , 3 , 1 , 5 )
@@ -245,15 +258,45 @@ def generate_test_cases_timezone_flexible() -> List[
245
258
dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ('America/New_York' )),
246
259
dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ('America/New_York' ))
247
260
]
261
+ == == == =
262
+ dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ("America/New_York" )),
263
+ dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ("America/Los_Angeles" )),
264
+ dt .datetime (2023 , 3 , 1 , 5 ),
265
+ ],
266
+ # multiz tz
267
+ [
268
+ dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ("America/New_York" )),
269
+ dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ("America/Los_Angeles" )),
270
+ ],
271
+ # tz naive
272
+ [dt .datetime (2023 , 3 , 1 , 4 ), dt .datetime (2023 , 3 , 1 , 5 )],
273
+ # single tz
274
+ [
275
+ dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ("America/New_York" )),
276
+ dt .datetime (2023 , 3 , 1 , 5 , tzinfo = ZoneInfo ("America/New_York" )),
277
+ ],
278
+ >> >> >> > Enhancement : Add support for timezone - flexible DateTime (#1352)
248
279
]
249
280
250
281
test_cases = []
251
282
252
283
for datetime_list in datetimes :
253
284
for coerce in [True , False ]:
285
+ << < << < < HEAD
254
286
for tz in [None , ZoneInfo ("America/Chicago" ), dt .timezone (dt .timedelta (hours = 2 ))]:
255
287
# Determine if the test should raise an exception
256
288
has_naive_datetime = any ([dt .tzinfo is None for dt in datetime_list ])
289
+ == == == =
290
+ for tz in [
291
+ None ,
292
+ ZoneInfo ("America/Chicago" ),
293
+ dt .timezone (dt .timedelta (hours = 2 )),
294
+ ]:
295
+ # Determine if the test should raise an exception
296
+ has_naive_datetime = any (
297
+ dt .tzinfo is None for dt in datetime_list
298
+ )
299
+ >> > >> >> Enhancement : Add support for timezone - flexible DateTime (#1352)
257
300
raises = has_naive_datetime and not coerce
258
301
259
302
# Generate expected output
@@ -264,52 +307,112 @@ def generate_test_cases_timezone_flexible() -> List[
264
307
# localize / convert the input datetimes to the specified tz or 'UTC' (default)
265
308
use_tz = tz if tz else ZoneInfo ("UTC" )
266
309
expected_output_naive = [
310
+ << < << < < HEAD
267
311
dt .replace (tzinfo = use_tz ) for dt in datetime_list if dt .tzinfo is None
268
312
]
269
313
expected_output_aware = [
270
314
dt .astimezone (use_tz ) for dt in datetime_list if dt .tzinfo is not None
271
315
]
272
316
expected_output = expected_output_naive + expected_output_aware
317
+ == == == =
318
+ dt .replace (tzinfo = use_tz )
319
+ for dt in datetime_list
320
+ if dt .tzinfo is None
321
+ ]
322
+ expected_output_aware = [
323
+ dt .astimezone (use_tz )
324
+ for dt in datetime_list
325
+ if dt .tzinfo is not None
326
+ ]
327
+ expected_output = (
328
+ expected_output_naive + expected_output_aware
329
+ )
330
+ >> >> > >> Enhancement : Add support for timezone - flexible DateTime (#1352)
273
331
else :
274
332
# ignore tz
275
333
expected_output = datetime_list
276
334
335
+ << < << << HEAD
277
336
test_case = (datetime_list , tz , coerce , expected_output , raises )
278
337
test_cases .append (test_case )
279
338
280
339
# define final test cases with improper type
281
340
datetime_list = [dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ('America/New_York' )), "hello world" ]
341
+ == == == =
342
+ test_case = (
343
+ datetime_list ,
344
+ tz ,
345
+ coerce ,
346
+ expected_output ,
347
+ raises ,
348
+ )
349
+ test_cases .append (test_case )
350
+
351
+ # define final test cases with improper type
352
+ datetime_list = [
353
+ dt .datetime (2023 , 3 , 1 , 4 , tzinfo = ZoneInfo ("America/New_York" )),
354
+ "hello world" ,
355
+ ]
356
+ >> >> > >> Enhancement : Add support for timezone - flexible DateTime (#1352)
282
357
tz = None
283
358
expected_output = None
284
359
raises = True
285
360
286
361
bad_type_coerce = (datetime_list , tz , True , expected_output , raises )
287
362
bad_type_no_coerce = (datetime_list , tz , False , expected_output , raises )
363
+ << << << < HEAD
288
364
test_cases .extend ([bad_type_coerce , bad_type_no_coerce ])
289
365
290
366
return test_cases
367
+ == == == =
368
+ test_cases .extend ([bad_type_coerce , bad_type_no_coerce ]) # type: ignore
369
+
370
+ return test_cases # type: ignore
371
+ >> >> >> > Enhancement : Add support for timezone - flexible DateTime (#1352)
291
372
292
373
293
374
@pytest .mark .parametrize (
294
375
"examples, tz, coerce, expected_output, raises" ,
376
+ << << << < HEAD
295
377
generate_test_cases_timezone_flexible ()
378
+ == == == =
379
+ generate_test_cases_timezone_flexible (),
380
+ >> >> >> > Enhancement : Add support for timezone - flexible DateTime (#1352)
296
381
)
297
382
def test_dt_timezone_flexible (examples , tz , coerce , expected_output , raises ):
298
383
"""Test that timezone_flexible works as expected"""
299
384
300
385
# Testing using a pandera DataFrameModel rather than directly calling dtype coerce or validate because with
301
386
# timezone_flexible, dtype is set dynamically based on the input data
302
387
class SimpleSchema (DataFrameModel ):
388
+ << << << < HEAD
303
389
datetime_column : pandas_engine .DateTime (timezone_flexible = True , tz = tz ) = Field (coerce = coerce )
304
390
305
391
data = pd .DataFrame ({'datetime_column' : examples })
392
+ == == == =
393
+ """Simple DF Model for testing"""
394
+
395
+ datetime_column : pandas_engine .DateTime ( # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
396
+ timezone_flexible = True , tz = tz
397
+ ) = Field (
398
+ coerce = coerce
399
+ )
400
+
401
+ data = pd .DataFrame ({"datetime_column" : examples })
402
+ >> > >> >> Enhancement : Add support for timezone - flexible DateTime (#1352)
306
403
307
404
if raises :
308
405
with pytest .raises (SchemaError ):
309
406
SimpleSchema .validate (data )
310
407
else :
311
408
validated_df = SimpleSchema .validate (data )
409
+ << < << << HEAD
312
410
assert sorted (validated_df ['datetime_column' ].tolist ()) == sorted (expected_output )
411
+ == == == =
412
+ assert sorted (validated_df ["datetime_column" ].tolist ()) == sorted (
413
+ expected_output
414
+ )
415
+ >> > >> >> Enhancement : Add support for timezone - flexible DateTime (#1352)
313
416
314
417
315
418
@hypothesis .settings (max_examples = 1000 )
@@ -344,7 +447,12 @@ def test_pandas_date_coerce_dtype(to_df, data):
344
447
coerced_data .isna ().all () and coerced_data .dtype == "datetime64[ns]"
345
448
)
346
449
assert (
450
+ << << < << HEAD
347
451
coerced_data .map (lambda x : isinstance (x , dt .date )) | coerced_data .isna ()
452
+ == == == =
453
+ coerced_data .map (lambda x : isinstance (x , dt .date ))
454
+ | coerced_data .isna ()
455
+ >> > >> > > Enhancement : Add support for timezone - flexible DateTime (#1352)
348
456
).all ()
349
457
350
458
@@ -356,8 +464,8 @@ def test_pandas_date_coerce_dtype(to_df, data):
356
464
pyarrow .struct ([("foo" , pyarrow .int64 ()), ("bar" , pyarrow .string ())]),
357
465
),
358
466
(pd .Series ([None , pd .NA , np .nan ]), pyarrow .null ),
359
- (pd .Series ([None , date (1970 , 1 , 1 )]), pyarrow .date32 ),
360
- (pd .Series ([None , date (1970 , 1 , 1 )]), pyarrow .date64 ),
467
+ (pd .Series ([None , dt . date (1970 , 1 , 1 )]), pyarrow .date32 ),
468
+ (pd .Series ([None , dt . date (1970 , 1 , 1 )]), pyarrow .date64 ),
361
469
(pd .Series ([1 , 2 ]), pyarrow .duration ("ns" )),
362
470
(pd .Series ([1 , 1e3 , 1e6 , 1e9 , None ]), pyarrow .time32 ("ms" )),
363
471
(pd .Series ([1 , 1e3 , 1e6 , 1e9 , None ]), pyarrow .time64 ("ns" )),
@@ -402,8 +510,8 @@ def test_pandas_arrow_dtype(data, dtype):
402
510
pyarrow .struct ([("foo" , pyarrow .string ()), ("bar" , pyarrow .int64 ())]),
403
511
),
404
512
(pd .Series (["a" , "1" ]), pyarrow .null ),
405
- (pd .Series (["a" , date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date32 ),
406
- (pd .Series (["a" , date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date64 ),
513
+ (pd .Series (["a" , dt . date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date32 ),
514
+ (pd .Series (["a" , dt . date (1970 , 1 , 1 ), "1970-01-01" ]), pyarrow .date64 ),
407
515
(pd .Series (["a" ]), pyarrow .duration ("ns" )),
408
516
(pd .Series (["a" , "b" ]), pyarrow .time32 ("ms" )),
409
517
(pd .Series (["a" , "b" ]), pyarrow .time64 ("ns" )),
0 commit comments