Skip to content

Commit 8096552

Browse files
tswastcpcloud
authored andcommitted
fix(bigquery): escape the schema (project ID) for BQ builtin UDFs
1 parent ec979f0 commit 8096552

File tree

5 files changed

+47
-4
lines changed

5 files changed

+47
-4
lines changed

ibis/backends/base/sql/__init__.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import contextlib
55
import os
66
from functools import lru_cache
7-
from typing import TYPE_CHECKING, Any
7+
from typing import TYPE_CHECKING, Any, Optional
88

99
import toolz
1010

@@ -255,18 +255,21 @@ def _register_udfs(self, expr: ir.Expr) -> None:
255255
if self.supports_python_udfs:
256256
raise NotImplementedError(self.name)
257257

258+
def _gen_udf_name(self, name: str, schema: Optional[str]) -> str:
259+
return ".".join(filter(None, (schema, name)))
260+
258261
def _gen_udf_rule(self, op: ops.ScalarUDF):
259262
@self.add_operation(type(op))
260263
def _(t, op):
261-
func = ".".join(filter(None, (op.__udf_namespace__, op.__func_name__)))
264+
func = self._gen_udf_name(op.__func_name__, schema=op.__udf_namespace__)
262265
return f"{func}({', '.join(map(t.translate, op.args))})"
263266

264267
def _gen_udaf_rule(self, op: ops.AggUDF):
265268
from ibis import NA
266269

267270
@self.add_operation(type(op))
268271
def _(t, op):
269-
func = ".".join(filter(None, (op.__udf_namespace__, op.__func_name__)))
272+
func = self._gen_udf_name(op.__func_name__, schema=op.__udf_namespace__)
270273
args = ", ".join(
271274
t.translate(
272275
ops.IfElse(where, arg, NA)

ibis/backends/bigquery/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import re
1010
import warnings
1111
from functools import partial
12-
from typing import TYPE_CHECKING, Any, Callable
12+
from typing import TYPE_CHECKING, Any, Callable, Optional
1313
from urllib.parse import parse_qs, urlparse
1414

1515
import google.auth.credentials
@@ -785,6 +785,12 @@ def to_pyarrow_batches(
785785
)
786786
return pa.RecordBatchReader.from_batches(schema.to_pyarrow(), batch_iter)
787787

788+
def _gen_udf_name(self, name: str, schema: Optional[str]) -> str:
789+
func = ".".join(filter(None, (schema, name)))
790+
if "." in func:
791+
return ".".join(f"`{part}`" for part in func.split("."))
792+
return func
793+
788794
def get_schema(self, name, schema: str | None = None, database: str | None = None):
789795
table_ref = bq.TableReference(
790796
bq.DatasetReference(
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
SELECT
2+
`bqutil`.`fn`.from_hex('face') AS `from_hex_'face'`
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
SELECT
2+
farm_fingerprint(b'Hello, World!') AS `farm_fingerprint_b'Hello_ World_'`
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
2+
import ibis
3+
4+
to_sql = ibis.bigquery.compile
5+
6+
7+
@ibis.udf.scalar.builtin
8+
def farm_fingerprint(value: bytes) -> int:
9+
...
10+
11+
12+
@ibis.udf.scalar.builtin(schema="bqutil.fn")
13+
def from_hex(value: str) -> int:
14+
"""Community function to convert from hex string to integer.
15+
16+
See:
17+
https://github.com/GoogleCloudPlatform/bigquery-utils/tree/master/udfs/community#from_hexvalue-string
18+
"""
19+
20+
21+
def test_bqutil_fn_from_hex(snapshot):
22+
# Project ID should be enclosed in backticks.
23+
expr = from_hex("face")
24+
snapshot.assert_match(to_sql(expr), "out.sql")
25+
26+
27+
def test_farm_fingerprint(snapshot):
28+
# No backticks needed if there is no schema defined.
29+
expr = farm_fingerprint(b"Hello, World!")
30+
snapshot.assert_match(to_sql(expr), "out.sql")

0 commit comments

Comments
 (0)