@@ -3210,6 +3210,88 @@ def asof_join(
3210
3210
-------
3211
3211
Table
3212
3212
Table expression
3213
+
3214
+ Examples
3215
+ --------
3216
+ >>> from datetime import datetime, timedelta
3217
+ >>> import ibis
3218
+ >>> ibis.options.interactive = True
3219
+ >>> sensors = ibis.memtable(
3220
+ ... {
3221
+ ... "site": ["a", "b", "a", "b", "a"],
3222
+ ... "humidity": [0.3, 0.4, 0.5, 0.6, 0.7],
3223
+ ... "event_time": [
3224
+ ... datetime(2024, 11, 16, 12, 0, 15, 500000),
3225
+ ... datetime(2024, 11, 16, 12, 0, 15, 700000),
3226
+ ... datetime(2024, 11, 17, 18, 12, 14, 950000),
3227
+ ... datetime(2024, 11, 17, 18, 12, 15, 120000),
3228
+ ... datetime(2024, 11, 18, 18, 12, 15, 100000),
3229
+ ... ],
3230
+ ... }
3231
+ ... )
3232
+ >>> events = ibis.memtable(
3233
+ ... {
3234
+ ... "site": ["a", "b", "a"],
3235
+ ... "event_type": [
3236
+ ... "cloud coverage",
3237
+ ... "rain start",
3238
+ ... "rain stop",
3239
+ ... ],
3240
+ ... "event_time": [
3241
+ ... datetime(2024, 11, 16, 12, 0, 15, 400000),
3242
+ ... datetime(2024, 11, 17, 18, 12, 15, 100000),
3243
+ ... datetime(2024, 11, 18, 18, 12, 15, 100000),
3244
+ ... ],
3245
+ ... }
3246
+ ... )
3247
+
3248
+ This setup simulates time-series data by pairing irregularly collected sensor
3249
+ readings with weather events, enabling analysis of environmental conditions
3250
+ before each event. We will use the `asof_join` method to match each event with
3251
+ the most recent prior sensor reading from the sensors table at the same site.
3252
+
3253
+ >>> sensors
3254
+ ┏━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┓
3255
+ ┃ site ┃ humidity ┃ event_time ┃
3256
+ ┡━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━┩
3257
+ │ string │ float64 │ timestamp │
3258
+ ├────────┼──────────┼─────────────────────────┤
3259
+ │ a │ 0.3 │ 2024-11-16 12:00:15.500 │
3260
+ │ b │ 0.4 │ 2024-11-16 12:00:15.700 │
3261
+ │ a │ 0.5 │ 2024-11-17 18:12:14.950 │
3262
+ │ b │ 0.6 │ 2024-11-17 18:12:15.120 │
3263
+ │ a │ 0.7 │ 2024-11-18 18:12:15.100 │
3264
+ └────────┴──────────┴─────────────────────────┘
3265
+ >>> events
3266
+ ┏━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┓
3267
+ ┃ site ┃ event_type ┃ event_time ┃
3268
+ ┡━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━┩
3269
+ │ string │ string │ timestamp │
3270
+ ├────────┼────────────────┼─────────────────────────┤
3271
+ │ a │ cloud coverage │ 2024-11-16 12:00:15.400 │
3272
+ │ b │ rain start │ 2024-11-17 18:12:15.100 │
3273
+ │ a │ rain stop │ 2024-11-18 18:12:15.100 │
3274
+ └────────┴────────────────┴─────────────────────────┘
3275
+
3276
+ We can find the closest event to each sensor reading with a 1 second tolerance.
3277
+ Using the "site" column as a join predicate ensures we only match events that
3278
+ occurred at or near the same site as the sensor reading.
3279
+
3280
+ >>> tolerance = timedelta(seconds=1)
3281
+ >>> sensors.asof_join(events, on="event_time", predicates="site", tolerance=tolerance).drop(
3282
+ ... "event_time_right"
3283
+ ... ).order_by("event_time")
3284
+ ┏━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
3285
+ ┃ site ┃ humidity ┃ event_time ┃ site_right ┃ event_type ┃
3286
+ ┡━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
3287
+ │ string │ float64 │ timestamp │ string │ string │
3288
+ ├────────┼──────────┼─────────────────────────┼────────────┼────────────────┤
3289
+ │ a │ 0.3 │ 2024-11-16 12:00:15.500 │ a │ cloud coverage │
3290
+ │ b │ 0.4 │ 2024-11-16 12:00:15.700 │ NULL │ NULL │
3291
+ │ a │ 0.5 │ 2024-11-17 18:12:14.950 │ NULL │ NULL │
3292
+ │ b │ 0.6 │ 2024-11-17 18:12:15.120 │ b │ rain start │
3293
+ │ a │ 0.7 │ 2024-11-18 18:12:15.100 │ a │ rain stop │
3294
+ └────────┴──────────┴─────────────────────────┴────────────┴────────────────┘
3213
3295
"""
3214
3296
from ibis .expr .types .joins import Join
3215
3297
0 commit comments