Skip to content

Commit dfeaad0

Browse files
authored
feat: expose gcf max timeout in remote_function (#639)
* feat: expose gcf max timeout in `remote_function` * remove duplicate test case from parametrize
1 parent ce56495 commit dfeaad0

File tree

4 files changed

+88
-3
lines changed

4 files changed

+88
-3
lines changed

bigframes/functions/remote_function.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,9 @@ def generate_cloud_function_code(self, def_, dir, package_requirements=None):
341341
entry_point = self.generate_cloud_function_main_code(def_, dir)
342342
return entry_point
343343

344-
def create_cloud_function(self, def_, cf_name, package_requirements=None):
344+
def create_cloud_function(
345+
self, def_, cf_name, package_requirements=None, cloud_function_timeout=600
346+
):
345347
"""Create a cloud function from the given user defined function."""
346348

347349
# Build and deploy folder structure containing cloud function
@@ -409,7 +411,14 @@ def create_cloud_function(self, def_, cf_name, package_requirements=None):
409411
)
410412
function.service_config = functions_v2.ServiceConfig()
411413
function.service_config.available_memory = "1024M"
412-
function.service_config.timeout_seconds = 600
414+
if cloud_function_timeout is not None:
415+
if cloud_function_timeout > 1200:
416+
raise ValueError(
417+
"BigQuery remote function can wait only up to 20 minutes"
418+
", see for more details "
419+
"https://cloud.google.com/bigquery/quotas#remote_function_limits."
420+
)
421+
function.service_config.timeout_seconds = cloud_function_timeout
413422
function.service_config.service_account_email = (
414423
self._cloud_function_service_account
415424
)
@@ -456,6 +465,7 @@ def provision_bq_remote_function(
456465
name,
457466
package_requirements,
458467
max_batching_rows,
468+
cloud_function_timeout,
459469
):
460470
"""Provision a BigQuery remote function."""
461471
# If reuse of any existing function with the same name (indicated by the
@@ -477,7 +487,7 @@ def provision_bq_remote_function(
477487
# Create the cloud function if it does not exist
478488
if not cf_endpoint:
479489
cf_endpoint = self.create_cloud_function(
480-
def_, cloud_function_name, package_requirements
490+
def_, cloud_function_name, package_requirements, cloud_function_timeout
481491
)
482492
else:
483493
logger.info(f"Cloud function {cloud_function_name} already exists.")
@@ -631,6 +641,7 @@ def remote_function(
631641
cloud_function_kms_key_name: Optional[str] = None,
632642
cloud_function_docker_repository: Optional[str] = None,
633643
max_batching_rows: Optional[int] = 1000,
644+
cloud_function_timeout: Optional[int] = 600,
634645
):
635646
"""Decorator to turn a user defined function into a BigQuery remote function.
636647
@@ -756,6 +767,16 @@ def remote_function(
756767
`None` can be passed to let BQ remote functions service apply
757768
default batching. See for more details
758769
https://cloud.google.com/bigquery/docs/remote-functions#limiting_number_of_rows_in_a_batch_request.
770+
cloud_function_timeout (int, Optional):
771+
The maximum amount of time (in seconds) BigQuery should wait for
772+
the cloud function to return a response. See for more details
773+
https://cloud.google.com/functions/docs/configuring/timeout.
774+
Please note that even though the cloud function (2nd gen) itself
775+
allows seeting up to 60 minutes of timeout, BigQuery remote
776+
function can wait only up to 20 minutes, see for more details
777+
https://cloud.google.com/bigquery/quotas#remote_function_limits.
778+
By default BigQuery DataFrames uses a 10 minute timeout. `None`
779+
can be passed to let the cloud functions default timeout take effect.
759780
"""
760781
import bigframes.pandas as bpd
761782

@@ -880,6 +901,7 @@ def wrapper(f):
880901
name,
881902
packages,
882903
max_batching_rows,
904+
cloud_function_timeout,
883905
)
884906

885907
# TODO: Move ibis logic to compiler step

bigframes/pandas/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ def remote_function(
644644
cloud_function_kms_key_name: Optional[str] = None,
645645
cloud_function_docker_repository: Optional[str] = None,
646646
max_batching_rows: Optional[int] = 1000,
647+
cloud_function_timeout: Optional[int] = 600,
647648
):
648649
return global_session.with_default_session(
649650
bigframes.session.Session.remote_function,
@@ -658,6 +659,7 @@ def remote_function(
658659
cloud_function_kms_key_name=cloud_function_kms_key_name,
659660
cloud_function_docker_repository=cloud_function_docker_repository,
660661
max_batching_rows=max_batching_rows,
662+
cloud_function_timeout=cloud_function_timeout,
661663
)
662664

663665

bigframes/session/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,7 @@ def remote_function(
15291529
cloud_function_kms_key_name: Optional[str] = None,
15301530
cloud_function_docker_repository: Optional[str] = None,
15311531
max_batching_rows: Optional[int] = 1000,
1532+
cloud_function_timeout: Optional[int] = 600,
15321533
):
15331534
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
15341535
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
@@ -1632,6 +1633,16 @@ def remote_function(
16321633
`None` can be passed to let BQ remote functions service apply
16331634
default batching. See for more details
16341635
https://cloud.google.com/bigquery/docs/remote-functions#limiting_number_of_rows_in_a_batch_request.
1636+
cloud_function_timeout (int, Optional):
1637+
The maximum amount of time (in seconds) BigQuery should wait for
1638+
the cloud function to return a response. See for more details
1639+
https://cloud.google.com/functions/docs/configuring/timeout.
1640+
Please note that even though the cloud function (2nd gen) itself
1641+
allows seeting up to 60 minutes of timeout, BigQuery remote
1642+
function can wait only up to 20 minutes, see for more details
1643+
https://cloud.google.com/bigquery/quotas#remote_function_limits.
1644+
By default BigQuery DataFrames uses a 10 minute timeout. `None`
1645+
can be passed to let the cloud functions default timeout take effect.
16351646
Returns:
16361647
callable: A remote function object pointing to the cloud assets created
16371648
in the background to support the remote execution. The cloud assets can be
@@ -1654,6 +1665,7 @@ def remote_function(
16541665
cloud_function_kms_key_name=cloud_function_kms_key_name,
16551666
cloud_function_docker_repository=cloud_function_docker_repository,
16561667
max_batching_rows=max_batching_rows,
1668+
cloud_function_timeout=cloud_function_timeout,
16571669
)
16581670

16591671
def read_gbq_function(

tests/system/large/test_remote_function.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,3 +1336,52 @@ def square(x):
13361336
cleanup_remote_function_assets(
13371337
session.bqclient, session.cloudfunctionsclient, square_remote
13381338
)
1339+
1340+
1341+
@pytest.mark.parametrize(
1342+
("timeout_args", "effective_gcf_timeout"),
1343+
[
1344+
pytest.param({}, 600, id="no-set"),
1345+
pytest.param({"cloud_function_timeout": None}, 60, id="set-None"),
1346+
pytest.param({"cloud_function_timeout": 1200}, 1200, id="set-max-allowed"),
1347+
],
1348+
)
1349+
@pytest.mark.flaky(retries=2, delay=120)
1350+
def test_remote_function_gcf_timeout(
1351+
session, scalars_dfs, timeout_args, effective_gcf_timeout
1352+
):
1353+
try:
1354+
1355+
def square(x):
1356+
return x * x
1357+
1358+
square_remote = session.remote_function(
1359+
[int], int, reuse=False, **timeout_args
1360+
)(square)
1361+
1362+
# Assert that the GCF is created with the intended maximum timeout
1363+
gcf = session.cloudfunctionsclient.get_function(
1364+
name=square_remote.bigframes_cloud_function
1365+
)
1366+
assert gcf.service_config.timeout_seconds == effective_gcf_timeout
1367+
1368+
scalars_df, scalars_pandas_df = scalars_dfs
1369+
1370+
bf_result = scalars_df["int64_too"].apply(square_remote).to_pandas()
1371+
pd_result = scalars_pandas_df["int64_too"].apply(square)
1372+
1373+
pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
1374+
finally:
1375+
# clean up the gcp assets created for the remote function
1376+
cleanup_remote_function_assets(
1377+
session.bqclient, session.cloudfunctionsclient, square_remote
1378+
)
1379+
1380+
1381+
@pytest.mark.flaky(retries=2, delay=120)
1382+
def test_remote_function_gcf_timeout_max_supported_exceeded(session):
1383+
with pytest.raises(ValueError):
1384+
1385+
@session.remote_function([int], int, reuse=False, cloud_function_timeout=1201)
1386+
def square(x):
1387+
return x * x

0 commit comments

Comments
 (0)