Skip to content

Commit 05964b1

Browse files
kszucscpcloud
authored andcommitted
refactor(formats): move the TableProxy object to formats from the operations
1 parent 3575858 commit 05964b1

File tree

6 files changed

+70
-80
lines changed

6 files changed

+70
-80
lines changed

ibis/expr/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def _memtable_from_pyarrow_table(
445445
schema: SupportsSchema | None = None,
446446
columns: Iterable[str] | None = None,
447447
):
448-
from ibis.expr.operations.relations import PyArrowTableProxy
448+
from ibis.formats.pyarrow import PyArrowTableProxy
449449

450450
if columns is not None:
451451
assert schema is None, "if `columns` is not `None` then `schema` must be `None`"
@@ -467,7 +467,7 @@ def _memtable_from_dataframe(
467467
) -> Table:
468468
import pandas as pd
469469

470-
from ibis.expr.operations.relations import PandasDataFrameProxy
470+
from ibis.formats.pandas import PandasDataFrameProxy
471471

472472
if not isinstance(data, pd.DataFrame):
473473
df = pd.DataFrame(data, columns=columns)

ibis/expr/operations/relations.py

Lines changed: 2 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import abc
43
import itertools
54
from abc import abstractmethod
65
from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional
@@ -15,17 +14,15 @@
1514
from ibis.common.annotations import annotated, attribute
1615
from ibis.common.collections import FrozenDict # noqa: TCH001
1716
from ibis.common.deferred import Deferred
18-
from ibis.common.grounds import Concrete, Immutable
17+
from ibis.common.grounds import Concrete
1918
from ibis.common.patterns import Between, Coercible, Eq
2019
from ibis.common.typing import VarTuple # noqa: TCH001
2120
from ibis.expr.operations.core import Column, Named, Node, Scalar, Value
2221
from ibis.expr.operations.sortkeys import SortKey # noqa: TCH001
2322
from ibis.expr.schema import Schema
23+
from ibis.formats import TableProxy # noqa: TCH001
2424

2525
if TYPE_CHECKING:
26-
import pandas as pd
27-
import pyarrow as pa
28-
2926
import ibis.expr.types as ir
3027

3128

@@ -112,73 +109,6 @@ class SQLQueryResult(TableNode):
112109
source: Any
113110

114111

115-
# TODO(kszucs): Add a pseudohashable wrapper and use that from InMemoryTable
116-
# subclasses PandasTable, PyArrowTable
117-
118-
119-
class TableProxy(Immutable):
120-
__slots__ = ("_data", "_hash")
121-
_data: Any
122-
_hash: int
123-
124-
def __init__(self, data) -> None:
125-
object.__setattr__(self, "_data", data)
126-
object.__setattr__(self, "_hash", hash((type(data), id(data))))
127-
128-
def __hash__(self) -> int:
129-
return self._hash
130-
131-
def __repr__(self) -> str:
132-
data_repr = util.indent(repr(self._data), spaces=2)
133-
return f"{self.__class__.__name__}:\n{data_repr}"
134-
135-
@abc.abstractmethod
136-
def to_frame(self) -> pd.DataFrame: # pragma: no cover
137-
"""Convert this input to a pandas DataFrame."""
138-
139-
@abc.abstractmethod
140-
def to_pyarrow(self, schema: Schema) -> pa.Table: # pragma: no cover
141-
"""Convert this input to a PyArrow Table."""
142-
143-
def to_pyarrow_bytes(self, schema: Schema) -> bytes:
144-
import pyarrow as pa
145-
import pyarrow_hotfix # noqa: F401
146-
147-
data = self.to_pyarrow(schema=schema)
148-
out = pa.BufferOutputStream()
149-
with pa.RecordBatchFileWriter(out, data.schema) as writer:
150-
writer.write(data)
151-
return out.getvalue()
152-
153-
def __len__(self) -> int:
154-
return len(self._data)
155-
156-
157-
class PyArrowTableProxy(TableProxy):
158-
__slots__ = ()
159-
160-
def to_frame(self):
161-
return self._data.to_pandas()
162-
163-
def to_pyarrow(self, schema: Schema) -> pa.Table:
164-
return self._data
165-
166-
167-
class PandasDataFrameProxy(TableProxy):
168-
__slots__ = ()
169-
170-
def to_frame(self) -> pd.DataFrame:
171-
return self._data
172-
173-
def to_pyarrow(self, schema: Schema) -> pa.Table:
174-
import pyarrow as pa
175-
import pyarrow_hotfix # noqa: F401
176-
177-
from ibis.formats.pyarrow import PyArrowSchema
178-
179-
return pa.Table.from_pandas(self._data, schema=PyArrowSchema.from_ibis(schema))
180-
181-
182112
@public
183113
class InMemoryTable(PhysicalTable):
184114
name: str

ibis/formats/__init__.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
from __future__ import annotations
22

3+
from abc import abstractmethod
34
from typing import TYPE_CHECKING, Generic, TypeVar
45

6+
from ibis.util import PseudoHashable, indent
7+
58
if TYPE_CHECKING:
9+
import pandas as pd
10+
import pyarrow as pa
11+
612
from ibis.expr.datatypes import DataType
713
from ibis.expr.schema import Schema
814

@@ -214,3 +220,30 @@ def infer_table(cls, obj: T) -> Schema:
214220
Ibis schema corresponding to the given format-specific table.
215221
"""
216222
raise NotImplementedError
223+
224+
225+
class TableProxy(PseudoHashable[T]):
226+
def __repr__(self) -> str:
227+
data_repr = indent(repr(self.obj), spaces=2)
228+
return f"{self.__class__.__name__}:\n{data_repr}"
229+
230+
def __len__(self) -> int:
231+
return len(self.obj)
232+
233+
@abstractmethod
234+
def to_frame(self) -> pd.DataFrame: # pragma: no cover
235+
"""Convert this input to a pandas DataFrame."""
236+
237+
@abstractmethod
238+
def to_pyarrow(self, schema: Schema) -> pa.Table: # pragma: no cover
239+
"""Convert this input to a PyArrow Table."""
240+
241+
def to_pyarrow_bytes(self, schema: Schema) -> bytes:
242+
import pyarrow as pa
243+
import pyarrow_hotfix # noqa: F401
244+
245+
data = self.to_pyarrow(schema=schema)
246+
out = pa.BufferOutputStream()
247+
with pa.RecordBatchFileWriter(out, data.schema) as writer:
248+
writer.write(data)
249+
return out.getvalue()

ibis/formats/pandas.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
import numpy as np
77
import pandas as pd
88
import pandas.api.types as pdt
9+
import pyarrow as pa
910

1011
import ibis.expr.datatypes as dt
1112
import ibis.expr.schema as sch
12-
from ibis.formats import DataMapper, SchemaMapper
13+
from ibis.formats import DataMapper, SchemaMapper, TableProxy
1314
from ibis.formats.numpy import NumpyType
14-
from ibis.formats.pyarrow import PyArrowData, PyArrowType
15+
from ibis.formats.pyarrow import PyArrowData, PyArrowSchema, PyArrowType
1516

1617
_has_arrow_dtype = hasattr(pd, "ArrowDtype")
1718

@@ -284,3 +285,12 @@ class DaskData(PandasData):
284285
@classmethod
285286
def infer_column(cls, s):
286287
return PyArrowData.infer_column(s.compute())
288+
289+
290+
class PandasDataFrameProxy(TableProxy[pd.DataFrame]):
291+
def to_frame(self) -> pd.DataFrame:
292+
return self.obj
293+
294+
def to_pyarrow(self, schema: sch.Schema) -> pa.Table:
295+
pyarrow_schema = PyArrowSchema.from_ibis(schema)
296+
return pa.Table.from_pandas(self.obj, schema=pyarrow_schema)

ibis/formats/pyarrow.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import ibis.expr.datatypes as dt
1010
from ibis.expr.schema import Schema
11-
from ibis.formats import DataMapper, SchemaMapper, TypeMapper
11+
from ibis.formats import DataMapper, SchemaMapper, TableProxy, TypeMapper
1212

1313
if TYPE_CHECKING:
1414
from collections.abc import Sequence
@@ -293,5 +293,13 @@ def convert_table(cls, table: pa.Table, schema: Schema) -> pa.Table:
293293
return table
294294

295295

296+
class PyArrowTableProxy(TableProxy[pa.Table]):
297+
def to_frame(self):
298+
return self.obj.to_pandas()
299+
300+
def to_pyarrow(self, schema: Schema) -> pa.Table:
301+
return self.obj
302+
303+
296304
PYARROW_JSON_TYPE = JSONType()
297305
pa.register_extension_type(PYARROW_JSON_TYPE)

ibis/util.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@
2020
TYPE_CHECKING,
2121
Any,
2222
Callable,
23+
Generic,
2324
TypeVar,
2425
)
2526
from uuid import uuid4
2627

2728
import toolz
2829

30+
from ibis.common.typing import Coercible
31+
2932
if TYPE_CHECKING:
3033
from collections.abc import Iterator, Sequence
3134
from numbers import Real
@@ -652,13 +655,13 @@ def __getattr__(self, name: str):
652655
return self._factory(obj)
653656

654657

655-
# TODO(kszucs): use this for the TableProxy objects
656-
class PseudoHashable:
658+
class PseudoHashable(Coercible, Generic[V]):
657659
"""A wrapper that provides a best effort precomputed hash."""
658660

659661
__slots__ = ("obj", "hash")
662+
obj: V
660663

661-
def __init__(self, obj):
664+
def __init__(self, obj: V):
662665
if isinstance(obj, collections.abc.Hashable):
663666
raise TypeError(f"Cannot wrap a hashable object: {obj!r}")
664667
elif isinstance(obj, collections.abc.Sequence):
@@ -673,6 +676,12 @@ def __init__(self, obj):
673676
self.obj = obj
674677
self.hash = hash((type(obj), hashable_obj))
675678

679+
@classmethod
680+
def __coerce__(cls, value: V) -> PseudoHashable[V]:
681+
if isinstance(value, cls):
682+
return value
683+
return cls(value)
684+
676685
def __hash__(self):
677686
return self.hash
678687

0 commit comments

Comments
 (0)