Skip to content

Commit 321a3b5

Browse files
committed
feat(bigquery, impala, mssql, oracle, postgres): compile Table.sample to native TABLESAMPLE syntax when possible
1 parent b95a036 commit 321a3b5

File tree

73 files changed

+527
-14
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+527
-14
lines changed

ibis/backends/sql/compilers/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ class SQLGlotCompiler(abc.ABC):
293293
LOWERED_OPS: dict[type[ops.Node], pats.Replace | None] = {
294294
ops.Bucket: lower_bucket,
295295
ops.Capitalize: lower_capitalize,
296-
ops.Sample: lower_sample(supports_methods=()),
296+
ops.Sample: lower_sample(supported_methods=()),
297297
ops.StringSlice: lower_stringslice,
298298
}
299299
"""A mapping from an operation class to either a rewrite rule for rewriting that

ibis/backends/sql/compilers/bigquery/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
exclude_unsupported_window_frame_from_ops,
2323
exclude_unsupported_window_frame_from_rank,
2424
exclude_unsupported_window_frame_from_row_number,
25+
lower_sample,
2526
split_select_distinct_with_order_by,
2627
)
2728
from ibis.common.temporal import DateUnit, IntervalUnit, TimestampUnit, TimeUnit
@@ -118,6 +119,14 @@ class BigQueryCompiler(SQLGlotCompiler):
118119

119120
supports_qualify = True
120121

122+
LOWERED_OPS = {
123+
ops.Sample: lower_sample(
124+
supported_methods=("block",),
125+
supports_seed=False,
126+
physical_tables_only=True,
127+
),
128+
}
129+
121130
UNSUPPORTED_OPS = (
122131
ops.DateDiff,
123132
ops.ExtractAuthority,

ibis/backends/sql/compilers/druid.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class DruidCompiler(SQLGlotCompiler):
6565
ops.TypeOf,
6666
ops.Unnest,
6767
ops.Variance,
68+
ops.Sample,
6869
)
6970

7071
SIMPLE_OPS = {

ibis/backends/sql/compilers/impala.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ibis.backends.sql.compilers.base import NULL, STAR, SQLGlotCompiler
1111
from ibis.backends.sql.datatypes import ImpalaType
1212
from ibis.backends.sql.dialects import Impala
13-
from ibis.backends.sql.rewrites import rewrite_empty_order_by_window
13+
from ibis.backends.sql.rewrites import lower_sample, rewrite_empty_order_by_window
1414

1515

1616
class ImpalaCompiler(SQLGlotCompiler):
@@ -23,6 +23,12 @@ class ImpalaCompiler(SQLGlotCompiler):
2323
*SQLGlotCompiler.rewrites,
2424
)
2525

26+
LOWERED_OPS = {
27+
ops.Sample: lower_sample(
28+
supported_methods=("block",), physical_tables_only=True
29+
),
30+
}
31+
2632
UNSUPPORTED_OPS = (
2733
ops.ArgMax,
2834
ops.ArgMin,

ibis/backends/sql/compilers/mssql.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from ibis.backends.sql.rewrites import (
2323
exclude_unsupported_window_frame_from_ops,
2424
exclude_unsupported_window_frame_from_row_number,
25+
lower_sample,
2526
p,
2627
replace,
2728
split_select_distinct_with_order_by,
@@ -73,6 +74,12 @@ class MSSQLCompiler(SQLGlotCompiler):
7374
post_rewrites = (split_select_distinct_with_order_by,)
7475
copy_func_args = True
7576

77+
LOWERED_OPS = {
78+
ops.Sample: lower_sample(
79+
supported_methods=("block",), physical_tables_only=True
80+
),
81+
}
82+
7683
UNSUPPORTED_OPS = (
7784
ops.ApproxMedian,
7885
ops.ArgMax,

ibis/backends/sql/compilers/oracle.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
exclude_unsupported_window_frame_from_row_number,
1717
lower_log2,
1818
lower_log10,
19+
lower_sample,
1920
rewrite_empty_order_by_window,
2021
)
2122

@@ -46,6 +47,7 @@ class OracleCompiler(SQLGlotCompiler):
4647
LOWERED_OPS = {
4748
ops.Log2: lower_log2,
4849
ops.Log10: lower_log10,
50+
ops.Sample: lower_sample(physical_tables_only=True),
4951
}
5052

5153
UNSUPPORTED_OPS = (

ibis/backends/sql/compilers/postgres.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from ibis.backends.sql.compilers.base import NULL, STAR, AggGen, SQLGlotCompiler
1818
from ibis.backends.sql.datatypes import PostgresType
1919
from ibis.backends.sql.dialects import Postgres
20-
from ibis.backends.sql.rewrites import split_select_distinct_with_order_by
20+
from ibis.backends.sql.rewrites import lower_sample, split_select_distinct_with_order_by
2121
from ibis.common.exceptions import InvalidDecoratorError
2222
from ibis.util import gen_name
2323

@@ -50,6 +50,8 @@ class PostgresCompiler(SQLGlotCompiler):
5050
POS_INF = sge.Literal.number("'Inf'::double precision")
5151
NEG_INF = sge.Literal.number("'-Inf'::double precision")
5252

53+
LOWERED_OPS = {ops.Sample: lower_sample(physical_tables_only=True)}
54+
5355
UNSUPPORTED_OPS = (
5456
ops.RowID,
5557
ops.TimeDelta,

ibis/backends/sql/compilers/risingwave.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class RisingWaveCompiler(PostgresCompiler):
2626
ops.RandomUUID,
2727
ops.MultiQuantile,
2828
ops.ApproxMultiQuantile,
29+
ops.Sample,
2930
*(
3031
op
3132
for op in ALL_OPERATIONS

ibis/backends/sql/dialects.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,18 +307,30 @@ class Tokenizer(Hive.Tokenizer):
307307
STRING_ESCAPES = ["'"]
308308

309309

310+
def tablesample_percent_to_int(self, expr):
311+
"""Impala's TABLESAMPLE only supports integer percentages."""
312+
expr = expr.copy()
313+
expr.args["percent"] = sge.convert(round(float(expr.args["percent"].this)))
314+
return self.tablesample_sql(expr)
315+
316+
310317
class Impala(Hive):
311318
NULL_ORDERING = "nulls_are_large"
312319
REGEXP_EXTRACT_DEFAULT_GROUP = 0
320+
TABLESAMPLE_SIZE_IS_PERCENT = True
321+
ALIAS_POST_TABLESAMPLE = False
313322

314323
class Generator(Hive.Generator):
324+
TABLESAMPLE_WITH_METHOD = True
325+
315326
TRANSFORMS = Hive.Generator.TRANSFORMS.copy() | {
316327
sge.ApproxDistinct: rename_func("ndv"),
317328
sge.IsNan: rename_func("is_nan"),
318329
sge.IsInf: rename_func("is_inf"),
319330
sge.DayOfWeek: rename_func("dayofweek"),
320331
sge.Interval: lambda self, e: _interval(self, e, quote_arg=False),
321332
sge.CurrentDate: rename_func("current_date"),
333+
sge.TableSample: tablesample_percent_to_int,
322334
}
323335

324336

ibis/backends/sql/rewrites.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ def lower_capitalize(_, **kwargs):
593593

594594

595595
def lower_sample(
596-
supports_methods=("row", "block"),
596+
supported_methods=("row", "block"),
597597
supports_seed=True,
598598
physical_tables_only=False,
599599
):
@@ -605,7 +605,7 @@ def lower_sample(
605605
606606
Parameters
607607
----------
608-
supports_methods
608+
supported_methods
609609
The sampling methods supported by the backend's native TABLESAMPLE operation.
610610
supports_seed
611611
Whether the backend's native TABLESAMPLE supports setting a `seed`.
@@ -616,7 +616,7 @@ def lower_sample(
616616
@replace(p.Sample)
617617
def lower(_, **kwargs):
618618
if (
619-
(_.method not in supports_methods)
619+
_.method not in supported_methods
620620
or (_.seed is not None and not supports_seed)
621621
or (
622622
physical_tables_only
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
SELECT
2+
*
3+
FROM `test` AS `t0` TABLESAMPLE system (50.0 PERCENT)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM `test` AS `t0`
4+
WHERE
5+
RAND() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM `test` AS `t0`
7+
WHERE
8+
`t0`.`x` > 10
9+
) AS `t1`
10+
WHERE
11+
RAND() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM `test` AS `t0`
7+
WHERE
8+
`t0`.`x` > 10
9+
) AS `t1`
10+
WHERE
11+
RAND() <= 0.5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0"
4+
WHERE
5+
randCanonical() <= 0.5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0"
4+
WHERE
5+
randCanonical() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1"
10+
WHERE
11+
randCanonical() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1"
10+
WHERE
11+
randCanonical() <= 0.5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0"
4+
WHERE
5+
RANDOM() <= 0.5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0"
4+
WHERE
5+
RANDOM() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1"
10+
WHERE
11+
RANDOM() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1"
10+
WHERE
11+
RANDOM() <= 0.5
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0" TABLESAMPLE system (50.0 PERCENT)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0" TABLESAMPLE bernoulli (50.0 PERCENT)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1" TABLESAMPLE system (50.0 PERCENT)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1" TABLESAMPLE bernoulli (50.0 PERCENT)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0"
4+
WHERE
5+
RANDOM() <= 0.5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM "test" AS "t0"
4+
WHERE
5+
RANDOM() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1"
10+
WHERE
11+
RANDOM() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM "test" AS "t0"
7+
WHERE
8+
"t0"."x" > 10
9+
) AS "t1"
10+
WHERE
11+
RANDOM() <= 0.5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM `test` AS `t0`
4+
WHERE
5+
RAND() <= 0.5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
SELECT
2+
*
3+
FROM `test` AS `t0`
4+
WHERE
5+
RAND() <= 0.5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
SELECT
2+
*
3+
FROM (
4+
SELECT
5+
*
6+
FROM `test` AS `t0`
7+
WHERE
8+
`t0`.`x` > 10
9+
) AS `t1`
10+
WHERE
11+
RAND() <= 0.5

0 commit comments

Comments
 (0)