Skip to content

Commit 133cf6f

Browse files
cpcloudkszucs
authored andcommitted
chore(selectors): add docstrings and examples
1 parent 306bc88 commit 133cf6f

File tree

4 files changed

+201
-28
lines changed

4 files changed

+201
-28
lines changed

docs/SUMMARY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
* [Timestamps + Dates + Times](api/expressions/timestamps.md)
2424
* [Collections](api/expressions/collections.md)
2525
* [Geospatial](api/expressions/geospatial.md)
26+
* [Column Selectors](api/selectors.md)
2627
* [Data Types](api/datatypes.md)
2728
* [Schemas](api/schemas.md)
2829
* [Backend Interfaces](api/backends/)

docs/api/selectors.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Column Selectors
2+
3+
<!-- prettier-ignore-start -->
4+
::: ibis.expr.selectors
5+
<!-- prettier-ignore-end -->

ibis/expr/selectors.py

Lines changed: 194 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,134 @@
1-
"""Construct column selectors."""
1+
"""Convenient column selectors.
2+
3+
## Rationale
4+
5+
Column selectors are convenience functions for selecting columns that share some property.
6+
7+
## Discussion
8+
9+
For example, a common task is to be able to select all numeric columns for a
10+
subsequent computation.
11+
12+
Without selectors this becomes quite verbose and tedious to write:
13+
14+
```python
15+
>>> t.select([t[c] for c in t.columns if t[c].type().is_numeric()])
16+
```
17+
18+
Compare that to the [`numeric`][ibis.expr.selectors.numeric] selector:
19+
20+
```python
21+
>>> t.select(s.numeric())
22+
```
23+
24+
When there are multiple properties to check it gets worse:
25+
26+
```python
27+
>>> t.select(
28+
... [
29+
... t[c] for c in t.columns
30+
... if t[c].type().is_numeric()
31+
... if ("a" in c.get_name() or "cd" in c.get_name())
32+
... ]
33+
... )
34+
```
35+
36+
Using a composition of selectors this is much less tiresome:
37+
38+
```python
39+
>>> t.select(s.numeric() & s.contains(("a", "cd")))
40+
```
41+
"""
242

343
from __future__ import annotations
444

45+
import inspect
546
import re
647
from typing import Callable, Iterable, Sequence
748

49+
from public import public
50+
851
import ibis.expr.datatypes as dt
952
import ibis.expr.types as ir
1053
from ibis import util
1154

1255

1356
class Selector:
57+
"""A column selector."""
58+
1459
__slots__ = ("predicate",)
1560

16-
def __init__(self, predicate: Callable[[ir.Column], bool]) -> None:
17-
"""Construct a `Selector` with `predicate`."""
61+
def __init__(self, predicate: Callable[[ir.Value], bool]) -> None:
62+
"""Construct a `Selector` with `predicate`.
63+
64+
Parameters
65+
----------
66+
predicate
67+
A callable that accepts an ibis value expression and returns a `bool`.
68+
"""
1869
self.predicate = predicate
1970

20-
def expand(self, table: ir.Table) -> Sequence[ir.Column]:
21-
"""Evaluate `self.predicate` on every column of `table`."""
71+
def expand(self, table: ir.Table) -> Sequence[ir.Value]:
72+
"""Evaluate `self.predicate` on every column of `table`.
73+
74+
Parameters
75+
----------
76+
table
77+
An ibis table expression
78+
"""
2279
return [col for column in table.columns if self.predicate(col := table[column])]
2380

2481
def __and__(self, other: Selector) -> Selector:
25-
"""Compute the conjunction of two `Selectors`."""
82+
"""Compute the conjunction of two `Selectors`.
83+
84+
Parameters
85+
----------
86+
other
87+
Another selector
88+
"""
2689
return self.__class__(lambda col: self.predicate(col) and other.predicate(col))
2790

2891
def __or__(self, other: Selector) -> Selector:
29-
"""Compute the disjunction of two `Selectors`."""
92+
"""Compute the disjunction of two `Selectors`.
93+
94+
Parameters
95+
----------
96+
other
97+
Another selector
98+
"""
3099
return self.__class__(lambda col: self.predicate(col) or other.predicate(col))
31100

32101
def __invert__(self) -> Selector:
33102
"""Compute the logical negation of two `Selectors`."""
34103
return self.__class__(lambda col: not self.predicate(col))
35104

36105

106+
@public
37107
def where(predicate: Callable[[ir.Value], bool]) -> Selector:
38108
"""Return columns that satisfy `predicate`.
39109
110+
Use this selector when one of the other selectors does not meet your needs.
111+
112+
Parameters
113+
----------
114+
predicate
115+
A callable that accepts an ibis value expression and returns a `bool`
116+
40117
Examples
41118
--------
42119
>>> t = ibis.table(dict(a="float32"), name="t")
43120
>>> t.select(s.where(lambda col: col.get_name() == "a"))
44121
r0 := UnboundTable: t
45122
a float32
46-
123+
<BLANKLINE>
47124
Selection[r0]
48125
selections:
49126
a: r0.a
50127
"""
51128
return Selector(predicate)
52129

53130

131+
@public
54132
def numeric() -> Selector:
55133
"""Return numeric columns.
56134
@@ -68,53 +146,141 @@ def numeric() -> Selector:
68146
a int64
69147
b string
70148
c array<string>
71-
149+
<BLANKLINE>
72150
Selection[r0]
73151
selections:
74152
a: r0.a
153+
154+
See Also
155+
--------
156+
[`of_type`][ibis.expr.selectors.of_type]
75157
"""
76-
return Selector(lambda col: col.type().is_numeric())
158+
return of_type(dt.Numeric)
77159

78160

161+
@public
79162
def of_type(dtype: dt.DataType | str | type[dt.DataType]) -> Selector:
80-
"""Select columns of type `dtype`."""
81-
if isinstance(dtype, type):
82-
predicate = lambda col, dtype=dtype: isinstance(col.type(), dtype)
163+
"""Select columns of type `dtype`.
164+
165+
Parameters
166+
----------
167+
dtype
168+
`DataType` instance, `str` or `DataType` class
169+
170+
Examples
171+
--------
172+
Select according to a specific `DataType` instance
173+
174+
>>> t.select(s.of_type(dt.Array(dt.string)))
175+
176+
Strings are also accepted
177+
178+
>>> t.select(s.of_type("map<string, float>"))
179+
180+
Select by category of `DataType` by passing the `DataType` class
181+
182+
>>> t.select(s.of_type(dt.Struct)) # all struct columns, regardless of field types
183+
184+
See Also
185+
--------
186+
[`numeric`][ibis.expr.selectors.numeric]
187+
"""
188+
if inspect.isclass(dtype):
189+
predicate = lambda col: isinstance(col.type(), dtype)
83190
else:
84191
dtype = dt.dtype(dtype)
85-
predicate = lambda col, dtype=dtype: col.type() == dtype
192+
predicate = lambda col: col.type() == dtype
86193
return where(predicate)
87194

88195

196+
@public
89197
def startswith(prefixes: str | tuple[str, ...]) -> Selector:
90-
"""Select columns whose name starts with one of `prefixes`."""
91-
return where(lambda col, prefixes=prefixes: col.get_name().startswith(prefixes))
198+
"""Select columns whose name starts with one of `prefixes`.
199+
200+
Parameters
201+
----------
202+
prefixes
203+
Prefixes to compare column names against
204+
205+
Examples
206+
--------
207+
>>> t = ibis.table(dict(apples="int", oranges="float", bananas="bool"), name="t")
208+
>>> t.select(s.startswith(("a", "b")))
92209
210+
See Also
211+
--------
212+
[`endswith`][ibis.expr.selectors.endswith]
213+
"""
214+
return where(lambda col: col.get_name().startswith(prefixes))
93215

216+
217+
@public
94218
def endswith(suffixes: str | tuple[str, ...]) -> Selector:
95-
"""Select columns whose name ends with one of `suffixes`."""
96-
return where(lambda col, suffixes=suffixes: col.get_name().endswith(suffixes))
219+
"""Select columns whose name ends with one of `suffixes`.
220+
221+
Parameters
222+
----------
223+
suffixes
224+
Suffixes to compare column names against
97225
226+
See Also
227+
--------
228+
[`startswith`][ibis.expr.selectors.startswith]
229+
"""
230+
return where(lambda col: col.get_name().endswith(suffixes))
98231

232+
233+
@public
99234
def contains(
100235
needles: str | tuple[str, ...], how: Callable[[Iterable[bool]], bool] = any
101236
) -> Selector:
102-
"""Return columns whose name contains `needles`."""
237+
"""Return columns whose name contains `needles`.
238+
239+
Parameters
240+
----------
241+
needles
242+
One or more strings to search for in column names
243+
how
244+
A boolean reduction to allow the configuration of how `needles` are summarized.
103245
104-
def predicate(
105-
col: ir.Column,
106-
needles: str | tuple[str, ...] = needles,
107-
how: Callable[[Iterable[bool]], bool] = how,
108-
) -> bool:
246+
Examples
247+
--------
248+
Select columns that contain either `"a"` or `"b"`
249+
250+
>>> t.select(s.contains(("a", "b")))
251+
252+
Select columns that contain all of `"a"` and `"b"`
253+
254+
>>> t.select(s.contains(("a", "b"), how=all))
255+
256+
See Also
257+
--------
258+
[`matches`][ibis.expr.selectors.matches]
259+
"""
260+
261+
def predicate(col: ir.Value) -> bool:
109262
name = col.get_name()
110263
return how(needle in name for needle in util.promote_list(needles))
111264

112265
return where(predicate)
113266

114267

268+
@public
115269
def matches(regex: str | re.Pattern) -> Selector:
116-
"""Return columns matching the regular expression `regex`."""
270+
"""Return columns whose name matches the regular expression `regex`.
271+
272+
Parameters
273+
----------
274+
regex
275+
A string or `re.Pattern` object
276+
277+
Examples
278+
--------
279+
>>> t.select(s.matches(r"ab+"))
280+
281+
See Also
282+
--------
283+
[`contains`][ibis.expr.selectors.contains]
284+
"""
117285
pattern = re.compile(regex)
118-
return where(
119-
lambda col, pattern=pattern: pattern.search(col.get_name()) is not None
120-
)
286+
return where(lambda col: pattern.search(col.get_name()) is not None)

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ plugins:
7575
- "!^scalar$"
7676
- "!^column$"
7777
- "!^option_context$"
78+
- "!Selector$"
7879
show_category_heading: true
7980
show_root_full_path: false
8081
show_root_heading: true

0 commit comments

Comments
 (0)