Skip to content

Commit 33fe68b

Browse files
authored
only call parsers once (unionai-oss#1898)
* only call parsers once Signed-off-by: cosmicBboy <[email protected]> * fix dask test error Signed-off-by: cosmicBboy <[email protected]> --------- Signed-off-by: cosmicBboy <[email protected]>
1 parent a832172 commit 33fe68b

File tree

3 files changed

+35
-13
lines changed

3 files changed

+35
-13
lines changed

pandera/api/pandas/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def register_geopandas_backend():
108108
register_fn = {
109109
"pandas": register_pandas_backend,
110110
"dask_expr": register_dask_backend,
111+
"dask": register_dask_backend,
111112
"modin": register_modin_backend,
112113
"pyspark": register_pyspark_backend,
113114
"geopandas": register_geopandas_backend,

pandera/backends/pandas/components.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -119,27 +119,29 @@ def validate_column(check_obj, column_name, return_check_obj=False):
119119
except SchemaErrors as exc:
120120
error_handler.collect_errors(exc.schema_errors)
121121

122-
if schema.parsers:
123-
for parser_index, parser in enumerate(schema.parsers):
124-
check_obj[column_name] = self.run_parser(
125-
check_obj[column_name],
126-
parser,
127-
parser_index,
128-
).parser_output
129-
130122
if is_table(check_obj[column_name]):
131123
for i in range(check_obj[column_name].shape[1]):
132-
validate_column(
133-
check_obj[column_name].iloc[:, [i]], column_name
124+
validated_column = validate_column(
125+
check_obj[column_name].iloc[:, [i]],
126+
column_name,
127+
return_check_obj=True,
134128
)
129+
if schema.parsers:
130+
check_obj[column_name] = validated_column
135131
else:
136132
if getattr(schema, "drop_invalid_rows", False):
137-
# replace the check_obj with the validated check_obj
133+
# replace the check_obj with the validated
138134
check_obj = validate_column(
139135
check_obj, column_name, return_check_obj=True
140136
)
141-
else:
142-
validate_column(check_obj, column_name)
137+
138+
validated_column = validate_column(
139+
check_obj,
140+
column_name,
141+
return_check_obj=True,
142+
)
143+
if schema.parsers:
144+
check_obj[column_name] = validated_column
143145

144146
if lazy and error_handler.collected_errors:
145147
raise SchemaErrors(

tests/core/test_parsers.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,22 @@ def int_column_lt_100(cls, series: pd.Series):
9494
)
9595
with pytest.raises(pa.errors.SchemaInitError, match=err_msg):
9696
Schema.to_schema()
97+
98+
99+
def test_parser_called_once():
100+
101+
data = pd.DataFrame({"col": [2.0, 4.0, 9.0]})
102+
n_calls = 0
103+
104+
class DFModel(pa.DataFrameModel):
105+
col: float
106+
107+
@pa.parser("col")
108+
@classmethod
109+
def negate(cls, series):
110+
nonlocal n_calls
111+
n_calls += 1
112+
return series * -1
113+
114+
DFModel.validate(data)
115+
assert n_calls == 1

0 commit comments

Comments
 (0)