Skip to content

Commit bdb718d

Browse files
authored
fix(snowflake): only compile sample to TABLESAMPLE on physical tables (#10218)
1 parent 321a3b5 commit bdb718d

File tree

4 files changed

+13
-19
lines changed

4 files changed

+13
-19
lines changed

ibis/backends/sql/compilers/snowflake.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,12 @@ class SnowflakeCompiler(SQLGlotCompiler):
6060
LOWERED_OPS = {
6161
ops.Log2: lower_log2,
6262
ops.Log10: lower_log10,
63-
ops.Sample: lower_sample(),
63+
# Snowflake's TABLESAMPLE _can_ work on subqueries, but only by row and without
64+
# a seed. This is effectively the same as `t.filter(random() <= fraction)`, and
65+
# using TABLESAMPLE here would almost certainly have no benefit over the filter
66+
# version in the optimized physical plan. To avoid a special case just for
67+
# snowflake, we only use TABLESAMPLE on physical tables.
68+
ops.Sample: lower_sample(physical_tables_only=True),
6469
}
6570

6671
UNSUPPORTED_OPS = (

ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-table/block.sql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@ FROM (
66
FROM "test" AS "t0"
77
WHERE
88
"t0"."x" > 10
9-
) AS "t1" TABLESAMPLE system (50.0)
9+
) AS "t1"
10+
WHERE
11+
UNIFORM(TO_DOUBLE(0.0), TO_DOUBLE(1.0), RANDOM()) <= 0.5

ibis/backends/tests/snapshots/test_sql/test_sample/snowflake-table/row.sql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@ FROM (
66
FROM "test" AS "t0"
77
WHERE
88
"t0"."x" > 10
9-
) AS "t1" TABLESAMPLE bernoulli (50.0)
9+
) AS "t1"
10+
WHERE
11+
UNIFORM(TO_DOUBLE(0.0), TO_DOUBLE(1.0), RANDOM()) <= 0.5

ibis/backends/tests/test_generic.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2125,22 +2125,7 @@ def test_dynamic_table_slice_with_computed_offset(backend):
21252125

21262126

21272127
@pytest.mark.notimpl(["druid", "risingwave"], raises=com.OperationNotDefinedError)
2128-
@pytest.mark.parametrize(
2129-
"method",
2130-
[
2131-
"row",
2132-
param(
2133-
"block",
2134-
marks=[
2135-
pytest.mark.notimpl(
2136-
["snowflake"],
2137-
raises=SnowflakeProgrammingError,
2138-
reason="SAMPLE clause on views only supports row wise sampling without seed.",
2139-
)
2140-
],
2141-
),
2142-
],
2143-
)
2128+
@pytest.mark.parametrize("method", ["row", "block"])
21442129
@pytest.mark.parametrize("subquery", [True, False], ids=["subquery", "table"])
21452130
@pytest.mark.xfail_version(pyspark=["sqlglot==25.17.0"])
21462131
def test_sample(backend, method, alltypes, subquery):

0 commit comments

Comments
 (0)