Skip to content

Commit c49b18f

Browse files
[ibis 🦩] check backend: use positional join for duckdb and polars, fix ibis DataFrameModel.validate types (#2071)
* update docs banner with ibis announcement Signed-off-by: Niels Bantilan <[email protected]> * override ibis DataFrameModel.validate method to pass correct type Signed-off-by: Niels Bantilan <[email protected]> * [ibis 🦩] check backend: use positional join for duckdb and polars Signed-off-by: Niels Bantilan <[email protected]> * use typing_extensions for self in py<3.11 Signed-off-by: Niels Bantilan <[email protected]> * Update pandera/backends/ibis/checks.py Co-authored-by: Deepyaman Datta <[email protected]> * Update docs/source/conf.py Co-authored-by: Deepyaman Datta <[email protected]> * Update conf.py * Update checks.py --------- Signed-off-by: Niels Bantilan <[email protected]> Co-authored-by: Deepyaman Datta <[email protected]>
1 parent ad8f08d commit c49b18f

File tree

7 files changed

+91
-14
lines changed

7 files changed

+91
-14
lines changed

‎docs/source/conf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,9 @@
134134
# documentation.
135135

136136
announcement = """
137-
📢 Pandera 0.24.0 introduces the <i>pandera.pandas</i>
138-
module, which is the recommended way of defining schemas for <i>pandas objects</i>.
139-
Learn more details <a href='https://github.com/unionai-oss/pandera/releases/tag/v0.24.0'>here</a>
137+
📢 Pandera 0.25.0 introduces the <i>🦩 pandera-ibis integration </i>!
138+
Validate all supported Ibis backends, including Snowflake, BigQuery, and more.
139+
Learn more details <a href='./ibis.html'>here</a>
140140
"""
141141

142142
html_logo = "_static/pandera-banner.png"

‎pandera/api/ibis/model.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
11
"""Class-based API for Ibis models."""
22

3+
import sys
34
import inspect
4-
from typing import Dict, List, Tuple
5+
from typing import (
6+
Dict,
7+
List,
8+
Tuple,
9+
Type,
10+
Optional,
11+
cast,
12+
)
513

614
import ibis
715
import ibis.expr.datatypes as dt
816

17+
from pandera.api.base.schema import BaseSchema
918
from pandera.api.checks import Check
1019
from pandera.api.dataframe.model import DataFrameModel as _DataFrameModel
1120
from pandera.api.dataframe.model import get_dtype_kwargs
@@ -15,6 +24,15 @@
1524
from pandera.engines import ibis_engine
1625
from pandera.errors import SchemaInitError
1726
from pandera.typing import AnnotationInfo
27+
from pandera.typing.ibis import Table
28+
from pandera.utils import docstring_substitution
29+
30+
31+
# if python version is < 3.11, import Self from typing_extensions
32+
if sys.version_info < (3, 11):
33+
from typing_extensions import Self
34+
else:
35+
from typing import Self
1836

1937

2038
class DataFrameModel(_DataFrameModel[ibis.Table, DataFrameSchema]):
@@ -102,3 +120,21 @@ def _build_columns( # pylint:disable=too-many-locals
102120
)
103121

104122
return columns
123+
124+
@classmethod
125+
@docstring_substitution(validate_doc=BaseSchema.validate.__doc__)
126+
def validate(
127+
cls: Type[Self],
128+
check_obj: ibis.Table,
129+
head: Optional[int] = None,
130+
tail: Optional[int] = None,
131+
sample: Optional[int] = None,
132+
random_state: Optional[int] = None,
133+
lazy: bool = False,
134+
inplace: bool = False,
135+
) -> Table[Self]:
136+
"""%(validate_doc)s"""
137+
result = cls.to_schema().validate(
138+
check_obj, head, tail, sample, random_state, lazy, inplace
139+
)
140+
return cast(Table[Self], result)

‎pandera/backends/ibis/checks.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
from ibis.expr.types.groupby import GroupedTable
1919

2020

21+
# Manually maintained list derived from
22+
# https://github.com/ibis-project/ibis/blob/10.6.0/ibis/backends/tests/test_join.py#L370-L399
23+
POSITIONAL_JOIN_BACKENDS = {"duckdb", "polars"}
24+
25+
2126
class IbisCheckBackend(BaseCheckBackend):
2227
"""Check backend for Ibis."""
2328

@@ -75,9 +80,12 @@ def apply(self, check_obj: IbisData):
7580
)
7681
elif isinstance(out, ibis.Table):
7782
out = out.rename(f"{{name}}{CHECK_OUTPUT_SUFFIX}")
78-
try:
83+
if (
84+
check_obj.table.get_backend().name
85+
in POSITIONAL_JOIN_BACKENDS
86+
):
7987
out = check_obj.table.join(out, how="positional")
80-
except Exception: # pylint: disable=broad-exception-caught
88+
else:
8189
# For backends that do not support positional joins:
8290
# https://github.com/ibis-project/ibis/issues/9486
8391
index_col = "__idx__"

‎pandera/backends/ibis/components.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ def validate_column(check_obj, column_name):
9595
check_output=result.check_output,
9696
reason_code=result.reason_code,
9797
)
98-
error_handler.collect_error( # Why indent (unlike in container.py)?
99-
validation_type(result.reason_code),
100-
result.reason_code,
101-
error,
102-
original_exc=result.original_exc,
103-
)
98+
error_handler.collect_error(
99+
validation_type(result.reason_code),
100+
result.reason_code,
101+
error,
102+
original_exc=result.original_exc,
103+
)
104104

105105
finally:
106106
# revert the schema component mutations

‎pandera/typing/ibis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def _get_schema_model(cls, field):
219219
if not field.sub_fields:
220220
raise TypeError(
221221
"Expected a typed pandera.typing.ibis.Table,"
222-
" e.g. DataFrame[Schema]"
222+
" e.g. Table[Schema]"
223223
)
224224
schema_model = field.sub_fields[0].type_
225225
return schema_model

‎pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ testing = [
129129
"pytest-cov",
130130
"pytest-xdist",
131131
"pytest-asyncio",
132-
"ibis-framework[duckdb] >= 9.0.0",
132+
"ibis-framework[duckdb,sqlite] >= 9.0.0",
133133
]
134134
docs = [
135135
"setuptools",

‎tests/ibis/test_ibis_backends.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Unit tests for ibis backends."""
2+
3+
import ibis
4+
import pytest
5+
6+
import pandera.ibis as pa
7+
8+
9+
def test_ibis_sqlite_backend():
10+
con = ibis.sqlite.connect()
11+
schema = ibis.schema(dict(x="int64", y="float64", z="string"))
12+
13+
valid_t = con.create_table("valid_table", schema=schema)
14+
invalid_t = con.create_table("invalid_table", schema=schema)
15+
16+
con.insert(
17+
"valid_table", obj=[(1, 1.0, "a"), (2, 2.0, "b"), (3, 3.0, "c")]
18+
)
19+
con.insert(
20+
"invalid_table", obj=[(-1, 1.0, "a"), (2, 2.0, "b"), (3, 3.0, "d")]
21+
)
22+
23+
# pylint: disable=missing-class-docstring
24+
class TableSchema(pa.DataFrameModel):
25+
x: int = pa.Field(ge=0)
26+
y: float
27+
z: str
28+
29+
validated_t = TableSchema.validate(valid_t)
30+
assert validated_t.execute() is not None
31+
32+
with pytest.raises(pa.errors.SchemaErrors):
33+
TableSchema.validate(invalid_t, lazy=True)

0 commit comments

Comments
 (0)