|
1 | 1 | """Test numpy engine."""
|
2 | 2 |
|
| 3 | +import hypothesis.strategies as st |
3 | 4 | import pandas as pd
|
4 | 5 | import pytest
|
| 6 | +from hypothesis import given |
5 | 7 |
|
6 | 8 | from pandera.engines import pandas_engine
|
7 | 9 | from pandera.errors import ParserError
|
@@ -42,3 +44,65 @@ def test_pandas_data_type_coerce(data_type):
|
42 | 44 | data_type().try_coerce(pd.Series(["1", "2", "a"]))
|
43 | 45 | except ParserError as exc:
|
44 | 46 | assert exc.failure_cases.shape[0] > 0
|
| 47 | + |
| 48 | + |
| 49 | +CATEGORIES = ["A", "B", "C"] |
| 50 | + |
| 51 | + |
| 52 | +@given(st.lists(st.sampled_from(CATEGORIES), min_size=5)) |
| 53 | +def test_pandas_category_dtype(data): |
| 54 | + """Test pandas_engine.Category correctly coerces valid categorical data.""" |
| 55 | + data = pd.Series(data) |
| 56 | + dtype = pandas_engine.Category(CATEGORIES) |
| 57 | + coerced_data = dtype.coerce(data) |
| 58 | + assert dtype.check(coerced_data.dtype) |
| 59 | + |
| 60 | + for _, value in data.iteritems(): |
| 61 | + coerced_value = dtype.coerce_value(value) |
| 62 | + assert coerced_value in CATEGORIES |
| 63 | + |
| 64 | + |
| 65 | +@given(st.lists(st.sampled_from(["X", "Y", "Z"]), min_size=5)) |
| 66 | +def test_pandas_category_dtype_error(data): |
| 67 | + """Test pandas_engine.Category raises TypeErrors on invalid data.""" |
| 68 | + data = pd.Series(data) |
| 69 | + dtype = pandas_engine.Category(CATEGORIES) |
| 70 | + |
| 71 | + with pytest.raises(TypeError): |
| 72 | + dtype.coerce(data) |
| 73 | + |
| 74 | + for _, value in data.iteritems(): |
| 75 | + with pytest.raises(TypeError): |
| 76 | + dtype.coerce_value(value) |
| 77 | + |
| 78 | + |
| 79 | +@given(st.lists(st.sampled_from([1, 0, 1.0, 0.0, True, False]), min_size=5)) |
| 80 | +def test_pandas_boolean_native_type(data): |
| 81 | + """Test native pandas bool type correctly coerces valid bool-like data.""" |
| 82 | + data = pd.Series(data) |
| 83 | + dtype = pandas_engine.Engine.dtype("boolean") |
| 84 | + |
| 85 | + # the BooleanDtype can't handle Series of non-boolean, mixed dtypes |
| 86 | + if data.dtype == "object": |
| 87 | + with pytest.raises(TypeError): |
| 88 | + dtype.coerce(data) |
| 89 | + else: |
| 90 | + coerced_data = dtype.coerce(data) |
| 91 | + assert dtype.check(coerced_data.dtype) |
| 92 | + |
| 93 | + for _, value in data.iteritems(): |
| 94 | + dtype.coerce_value(value) |
| 95 | + |
| 96 | + |
| 97 | +@given(st.lists(st.sampled_from(["A", "True", "False", 5, -1]), min_size=5)) |
| 98 | +def test_pandas_boolean_native_type_error(data): |
| 99 | + """Test native pandas bool type raises TypeErrors on non-bool-like data.""" |
| 100 | + data = pd.Series(data) |
| 101 | + dtype = pandas_engine.Engine.dtype("boolean") |
| 102 | + |
| 103 | + with pytest.raises(TypeError): |
| 104 | + dtype.coerce(data) |
| 105 | + |
| 106 | + for _, value in data.iteritems(): |
| 107 | + with pytest.raises(TypeError): |
| 108 | + dtype.coerce_value(value) |
0 commit comments