1
- """Construct column selectors."""
1
+ """Convenient column selectors.
2
+
3
+ ## Rationale
4
+
5
+ Column selectors are convenience functions for selecting columns that share some property.
6
+
7
+ ## Discussion
8
+
9
+ For example, a common task is to be able to select all numeric columns for a
10
+ subsequent computation.
11
+
12
+ Without selectors this becomes quite verbose and tedious to write:
13
+
14
+ ```python
15
+ >>> t.select([t[c] for c in t.columns if t[c].type().is_numeric()])
16
+ ```
17
+
18
+ Compare that to the [`numeric`][ibis.expr.selectors.numeric] selector:
19
+
20
+ ```python
21
+ >>> t.select(s.numeric())
22
+ ```
23
+
24
+ When there are multiple properties to check it gets worse:
25
+
26
+ ```python
27
+ >>> t.select(
28
+ ... [
29
+ ... t[c] for c in t.columns
30
+ ... if t[c].type().is_numeric()
31
+ ... if ("a" in c.get_name() or "cd" in c.get_name())
32
+ ... ]
33
+ ... )
34
+ ```
35
+
36
+ Using a composition of selectors this is much less tiresome:
37
+
38
+ ```python
39
+ >>> t.select(s.numeric() & s.contains(("a", "cd")))
40
+ ```
41
+ """
2
42
3
43
from __future__ import annotations
4
44
45
+ import inspect
5
46
import re
6
47
from typing import Callable , Iterable , Sequence
7
48
49
+ from public import public
50
+
8
51
import ibis .expr .datatypes as dt
9
52
import ibis .expr .types as ir
10
53
from ibis import util
11
54
12
55
13
56
class Selector :
57
+ """A column selector."""
58
+
14
59
__slots__ = ("predicate" ,)
15
60
16
- def __init__ (self , predicate : Callable [[ir .Column ], bool ]) -> None :
17
- """Construct a `Selector` with `predicate`."""
61
+ def __init__ (self , predicate : Callable [[ir .Value ], bool ]) -> None :
62
+ """Construct a `Selector` with `predicate`.
63
+
64
+ Parameters
65
+ ----------
66
+ predicate
67
+ A callable that accepts an ibis value expression and returns a `bool`.
68
+ """
18
69
self .predicate = predicate
19
70
20
- def expand (self , table : ir .Table ) -> Sequence [ir .Column ]:
21
- """Evaluate `self.predicate` on every column of `table`."""
71
+ def expand (self , table : ir .Table ) -> Sequence [ir .Value ]:
72
+ """Evaluate `self.predicate` on every column of `table`.
73
+
74
+ Parameters
75
+ ----------
76
+ table
77
+ An ibis table expression
78
+ """
22
79
return [col for column in table .columns if self .predicate (col := table [column ])]
23
80
24
81
def __and__ (self , other : Selector ) -> Selector :
25
- """Compute the conjunction of two `Selectors`."""
82
+ """Compute the conjunction of two `Selectors`.
83
+
84
+ Parameters
85
+ ----------
86
+ other
87
+ Another selector
88
+ """
26
89
return self .__class__ (lambda col : self .predicate (col ) and other .predicate (col ))
27
90
28
91
def __or__ (self , other : Selector ) -> Selector :
29
- """Compute the disjunction of two `Selectors`."""
92
+ """Compute the disjunction of two `Selectors`.
93
+
94
+ Parameters
95
+ ----------
96
+ other
97
+ Another selector
98
+ """
30
99
return self .__class__ (lambda col : self .predicate (col ) or other .predicate (col ))
31
100
32
101
def __invert__ (self ) -> Selector :
33
102
"""Compute the logical negation of two `Selectors`."""
34
103
return self .__class__ (lambda col : not self .predicate (col ))
35
104
36
105
106
+ @public
37
107
def where (predicate : Callable [[ir .Value ], bool ]) -> Selector :
38
108
"""Return columns that satisfy `predicate`.
39
109
110
+ Use this selector when one of the other selectors does not meet your needs.
111
+
112
+ Parameters
113
+ ----------
114
+ predicate
115
+ A callable that accepts an ibis value expression and returns a `bool`
116
+
40
117
Examples
41
118
--------
42
119
>>> t = ibis.table(dict(a="float32"), name="t")
43
120
>>> t.select(s.where(lambda col: col.get_name() == "a"))
44
121
r0 := UnboundTable: t
45
122
a float32
46
-
123
+ <BLANKLINE>
47
124
Selection[r0]
48
125
selections:
49
126
a: r0.a
50
127
"""
51
128
return Selector (predicate )
52
129
53
130
131
+ @public
54
132
def numeric () -> Selector :
55
133
"""Return numeric columns.
56
134
@@ -68,53 +146,141 @@ def numeric() -> Selector:
68
146
a int64
69
147
b string
70
148
c array<string>
71
-
149
+ <BLANKLINE>
72
150
Selection[r0]
73
151
selections:
74
152
a: r0.a
153
+
154
+ See Also
155
+ --------
156
+ [`of_type`][ibis.expr.selectors.of_type]
75
157
"""
76
- return Selector ( lambda col : col . type (). is_numeric () )
158
+ return of_type ( dt . Numeric )
77
159
78
160
161
+ @public
79
162
def of_type (dtype : dt .DataType | str | type [dt .DataType ]) -> Selector :
80
- """Select columns of type `dtype`."""
81
- if isinstance (dtype , type ):
82
- predicate = lambda col , dtype = dtype : isinstance (col .type (), dtype )
163
+ """Select columns of type `dtype`.
164
+
165
+ Parameters
166
+ ----------
167
+ dtype
168
+ `DataType` instance, `str` or `DataType` class
169
+
170
+ Examples
171
+ --------
172
+ Select according to a specific `DataType` instance
173
+
174
+ >>> t.select(s.of_type(dt.Array(dt.string)))
175
+
176
+ Strings are also accepted
177
+
178
+ >>> t.select(s.of_type("map<string, float>"))
179
+
180
+ Select by category of `DataType` by passing the `DataType` class
181
+
182
+ >>> t.select(s.of_type(dt.Struct)) # all struct columns, regardless of field types
183
+
184
+ See Also
185
+ --------
186
+ [`numeric`][ibis.expr.selectors.numeric]
187
+ """
188
+ if inspect .isclass (dtype ):
189
+ predicate = lambda col : isinstance (col .type (), dtype )
83
190
else :
84
191
dtype = dt .dtype (dtype )
85
- predicate = lambda col , dtype = dtype : col .type () == dtype
192
+ predicate = lambda col : col .type () == dtype
86
193
return where (predicate )
87
194
88
195
196
+ @public
89
197
def startswith (prefixes : str | tuple [str , ...]) -> Selector :
90
- """Select columns whose name starts with one of `prefixes`."""
91
- return where (lambda col , prefixes = prefixes : col .get_name ().startswith (prefixes ))
198
+ """Select columns whose name starts with one of `prefixes`.
199
+
200
+ Parameters
201
+ ----------
202
+ prefixes
203
+ Prefixes to compare column names against
204
+
205
+ Examples
206
+ --------
207
+ >>> t = ibis.table(dict(apples="int", oranges="float", bananas="bool"), name="t")
208
+ >>> t.select(s.startswith(("a", "b")))
92
209
210
+ See Also
211
+ --------
212
+ [`endswith`][ibis.expr.selectors.endswith]
213
+ """
214
+ return where (lambda col : col .get_name ().startswith (prefixes ))
93
215
216
+
217
+ @public
94
218
def endswith (suffixes : str | tuple [str , ...]) -> Selector :
95
- """Select columns whose name ends with one of `suffixes`."""
96
- return where (lambda col , suffixes = suffixes : col .get_name ().endswith (suffixes ))
219
+ """Select columns whose name ends with one of `suffixes`.
220
+
221
+ Parameters
222
+ ----------
223
+ suffixes
224
+ Suffixes to compare column names against
97
225
226
+ See Also
227
+ --------
228
+ [`startswith`][ibis.expr.selectors.startswith]
229
+ """
230
+ return where (lambda col : col .get_name ().endswith (suffixes ))
98
231
232
+
233
+ @public
99
234
def contains (
100
235
needles : str | tuple [str , ...], how : Callable [[Iterable [bool ]], bool ] = any
101
236
) -> Selector :
102
- """Return columns whose name contains `needles`."""
237
+ """Return columns whose name contains `needles`.
238
+
239
+ Parameters
240
+ ----------
241
+ needles
242
+ One or more strings to search for in column names
243
+ how
244
+ A boolean reduction to allow the configuration of how `needles` are summarized.
103
245
104
- def predicate (
105
- col : ir .Column ,
106
- needles : str | tuple [str , ...] = needles ,
107
- how : Callable [[Iterable [bool ]], bool ] = how ,
108
- ) -> bool :
246
+ Examples
247
+ --------
248
+ Select columns that contain either `"a"` or `"b"`
249
+
250
+ >>> t.select(s.contains(("a", "b")))
251
+
252
+ Select columns that contain all of `"a"` and `"b"`
253
+
254
+ >>> t.select(s.contains(("a", "b"), how=all))
255
+
256
+ See Also
257
+ --------
258
+ [`matches`][ibis.expr.selectors.matches]
259
+ """
260
+
261
+ def predicate (col : ir .Value ) -> bool :
109
262
name = col .get_name ()
110
263
return how (needle in name for needle in util .promote_list (needles ))
111
264
112
265
return where (predicate )
113
266
114
267
268
+ @public
115
269
def matches (regex : str | re .Pattern ) -> Selector :
116
- """Return columns matching the regular expression `regex`."""
270
+ """Return columns whose name matches the regular expression `regex`.
271
+
272
+ Parameters
273
+ ----------
274
+ regex
275
+ A string or `re.Pattern` object
276
+
277
+ Examples
278
+ --------
279
+ >>> t.select(s.matches(r"ab+"))
280
+
281
+ See Also
282
+ --------
283
+ [`contains`][ibis.expr.selectors.contains]
284
+ """
117
285
pattern = re .compile (regex )
118
- return where (
119
- lambda col , pattern = pattern : pattern .search (col .get_name ()) is not None
120
- )
286
+ return where (lambda col : pattern .search (col .get_name ()) is not None )
0 commit comments