Skip to content

Commit 954d8a2

Browse files
authored
add human-readable __str__ and __repr__ methods to dataframeschema (#111)
* add human-readable __str__ and __repr__ methods to dataframeschema * remove gt/lt sign in __str__ method
1 parent ed4e8f8 commit 954d8a2

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

pandera/schemas.py

+46
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Core pandera schema class definitions."""
22

3+
import json
4+
35
import pandas as pd
46

57
from typing import Optional
@@ -8,6 +10,9 @@
810
from .checks import Check
911

1012

13+
N_INDENT_SPACES = 4
14+
15+
1116
class DataFrameSchema(object):
1217
"""A light-weight pandas DataFrame validator."""
1318

@@ -52,6 +57,7 @@ def __init__(
5257
self.coerce = coerce
5358
self.strict = strict
5459
self._validate_schema()
60+
self._set_column_names()
5561

5662
def __call__(
5763
self,
@@ -87,6 +93,12 @@ def _validate_schema(self):
8793
"specified in the DataFrameSchema." %
8894
(nonexistent_groupby_columns, column_name))
8995

96+
def _set_column_names(self):
97+
self.columns = {
98+
column_name: column.set_name(column_name)
99+
for column_name, column in self.columns.items()
100+
}
101+
90102
@staticmethod
91103
def _dataframe_to_validate(
92104
dataframe: pd.DataFrame,
@@ -168,6 +180,40 @@ def validate(
168180
dataframe = self.transformer(dataframe)
169181
return dataframe
170182

183+
def __repr__(self):
184+
return "%s(columns=%s, index=%s, transformer=%s, coerce=%s)" % \
185+
(self.__class__.__name__,
186+
self.columns,
187+
self.index,
188+
self.transformer,
189+
self.coerce)
190+
191+
def __str__(self):
192+
columns = {k: str(v) for k, v in self.columns.items()}
193+
columns = json.dumps(columns, indent=N_INDENT_SPACES)
194+
_indent = " " * N_INDENT_SPACES
195+
columns = "\n".join(
196+
"{}{}".format(_indent, line) if i != 0
197+
else "{}columns={}".format(_indent, line)
198+
for i, line in enumerate(columns.split("\n")))
199+
return (
200+
"{class_name}(\n"
201+
"{columns},\n"
202+
"{indent}index={index},\n"
203+
"{indent}transformer={transformer},\n"
204+
"{indent}coerce={coerce},\n"
205+
"{indent}strict={strict}\n"
206+
")"
207+
).format(
208+
class_name=self.__class__.__name__,
209+
columns=columns,
210+
index=str(self.index),
211+
transformer=str(self.transformer),
212+
coerce=self.coerce,
213+
strict=self.strict,
214+
indent=_indent,
215+
)
216+
171217

172218
class SeriesSchemaBase(object):
173219
"""Base series validator object."""

tests/test_pandera.py

+18
Original file line numberDiff line numberDiff line change
@@ -1022,3 +1022,21 @@ def test_dataframe_hypothesis_checks():
10221022
)
10231023
with pytest.raises(errors.SchemaDefinitionError):
10241024
hypothesis_check_schema_groupby.validate(df)
1025+
1026+
1027+
def test_dataframe_schema_str_repr():
1028+
schema = DataFrameSchema(
1029+
columns={
1030+
"col1": Column(Int),
1031+
"col2": Column(String),
1032+
"col3": Column(DateTime),
1033+
},
1034+
index=Index(Int, name="my_index"),
1035+
1036+
)
1037+
1038+
for x in [schema.__str__(), schema.__repr__()]:
1039+
assert isinstance(x, str)
1040+
assert schema.__class__.__name__ in x
1041+
for name in ["col1", "col2", "col3", "my_index"]:
1042+
assert name in x

0 commit comments

Comments
 (0)