Skip to content

Commit 0ff0fd5

Browse files
committed
Support table-level checks, including for built-in
Signed-off-by: Deepyaman Datta <[email protected]>
1 parent 8bc1970 commit 0ff0fd5

File tree

4 files changed

+28
-24
lines changed

4 files changed

+28
-24
lines changed

pandera/backends/ibis/builtin_checks.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
from pandera.api.extensions import register_builtin_check
1212
from pandera.api.ibis.types import IbisData
1313
from pandera.backends.ibis.utils import select_column
14-
from pandera.constants import check_col_name
15-
1614

1715
T = TypeVar("T")
1816

@@ -46,9 +44,7 @@ def equal_to(data: IbisData, value: Any) -> ir.Table:
4644
equal to this value.
4745
"""
4846
value = _infer_interval_with_mixed_units(value)
49-
return data.table.mutate(
50-
s.across(_selector(data.key), _ == value, names=check_col_name)
51-
)
47+
return data.table.select(s.across(_selector(data.key), _ == value))
5248

5349

5450
@register_builtin_check(
@@ -63,9 +59,7 @@ def not_equal_to(data: IbisData, value: Any) -> ir.Table:
6359
:param value: This value must not occur in the checked data structure.
6460
"""
6561
value = _infer_interval_with_mixed_units(value)
66-
return data.table.mutate(
67-
s.across(_selector(data.key), _ != value, names=check_col_name)
68-
)
62+
return data.table.select(s.across(_selector(data.key), _ != value))
6963

7064

7165
@register_builtin_check(
@@ -82,9 +76,7 @@ def greater_than(data: IbisData, min_value: Any) -> ir.Table:
8276
to the dtype of the :class:`ir.Column` to be validated.
8377
"""
8478
value = _infer_interval_with_mixed_units(min_value)
85-
return data.table.mutate(
86-
s.across(_selector(data.key), _ > value, names=check_col_name)
87-
)
79+
return data.table.select(s.across(_selector(data.key), _ > value))
8880

8981

9082
@register_builtin_check(
@@ -100,9 +92,7 @@ def greater_than_or_equal_to(data: IbisData, min_value: Any) -> ir.Table:
10092
to the dtype of the :class:`ir.Column` to be validated.
10193
"""
10294
value = _infer_interval_with_mixed_units(min_value)
103-
return data.table.mutate(
104-
s.across(_selector(data.key), _ >= value, names=check_col_name)
105-
)
95+
return data.table.select(s.across(_selector(data.key), _ >= value))
10696

10797

10898
@register_builtin_check(
@@ -119,9 +109,7 @@ def less_than(data: IbisData, max_value: Any) -> ir.Table:
119109
:class:`ir.Column` to be validated.
120110
"""
121111
value = _infer_interval_with_mixed_units(max_value)
122-
return data.table.mutate(
123-
s.across(_selector(data.key), _ < value, names=check_col_name)
124-
)
112+
return data.table.select(s.across(_selector(data.key), _ < value))
125113

126114

127115
@register_builtin_check(
@@ -137,6 +125,4 @@ def less_than_or_equal_to(data: IbisData, max_value: Any) -> ir.Table:
137125
:class:`ir.Column` to be validated.
138126
"""
139127
value = _infer_interval_with_mixed_units(max_value)
140-
return data.table.mutate(
141-
s.across(_selector(data.key), _ <= value, names=check_col_name)
142-
)
128+
return data.table.select(s.across(_selector(data.key), _ <= value))

pandera/backends/ibis/checks.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,24 @@ def apply(self, check_obj: IbisData):
6565
out = check_obj.table.mutate(
6666
**{f"{k}{CHECK_OUTPUT_SUFFIX}": v for k, v in out.items()}
6767
)
68+
elif isinstance(out, ir.Table):
69+
out = out.rename(f"{{name}}{CHECK_OUTPUT_SUFFIX}")
70+
try:
71+
out = check_obj.table.join(out, how="positional")
72+
except Exception: # Backend doesn't support positional join
73+
index_col = "__idx__"
74+
out = (
75+
check_obj.table.mutate(
76+
**{index_col: ibis.row_number().over()}
77+
)
78+
.join(
79+
out.mutate(
80+
**{index_col: ibis.row_number().over()}
81+
),
82+
index_col,
83+
)
84+
.drop(index_col)
85+
)
6886

6987
if isinstance(out, ir.Table):
7088
# for checks that return a boolean table, make sure all columns

pandera/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@
33
CHECK_OUTPUT_KEY = "check_output"
44
CHECK_OUTPUT_SUFFIX = f"__{CHECK_OUTPUT_KEY}__"
55
FAILURE_CASE_KEY = "failure_case"
6-
check_col_name = f"{{col}}{CHECK_OUTPUT_SUFFIX}"

tests/ibis/test_ibis_check.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
import pandas as pd
88
import ibis
99
import ibis.expr.types as ir
10+
import ibis.selectors as s
1011
import pandera.ibis as pa
12+
from ibis import _, selectors as s
13+
1114
from pandera.backends.ibis.register import register_ibis_backends
1215
from pandera.constants import CHECK_OUTPUT_KEY
1316

@@ -66,9 +69,7 @@ def test_ibis_column_check(
6669

6770

6871
def _df_check_fn_table_out(data: pa.IbisData) -> ir.Table:
69-
return data.table.mutate(
70-
{col: data.table[col] >= 0 for col in data.table.columns}
71-
)
72+
return data.table.select(s.across(s.numeric(), _ >= 0))
7273

7374

7475
def _df_check_fn_dict_out(data: pa.IbisData) -> Dict[str, ir.BooleanColumn]:

0 commit comments

Comments
 (0)