Skip to content

Commit 9eb9089

Browse files
authored
feat!: make remote_function params keyword-only, and default service account explicit (#1537)
* feat!: make `remote_function` default service account explicit chore!: make `remote_function` params keyword only * pass cloud_function_service_account="default" in unit tests * pass default cloud_build_service_account in the doctests * pass default cloud_function_service_account in the samples * pass cloud_function_service_account="default" to the read_gbq_function doctest * use cloud_function_service_account="default" at more places * remove positional argument usage for remote_function
1 parent e9fb712 commit 9eb9089

File tree

16 files changed

+388
-249
lines changed

16 files changed

+388
-249
lines changed

bigframes/functions/_function_session.py

+12-19
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ def _try_delattr(self, func: Callable, attr: str) -> None:
237237
# https://github.com/ibis-project/ibis/blob/master/ibis/backends/bigquery/udf/__init__.py
238238
def remote_function(
239239
self,
240+
*,
240241
input_types: Union[None, type, Sequence[type]] = None,
241242
output_type: Optional[type] = None,
242243
session: Optional[Session] = None,
@@ -251,7 +252,7 @@ def remote_function(
251252
reuse: bool = True,
252253
name: Optional[str] = None,
253254
packages: Optional[Sequence[str]] = None,
254-
cloud_function_service_account: Optional[str] = None,
255+
cloud_function_service_account: str,
255256
cloud_function_kms_key_name: Optional[str] = None,
256257
cloud_function_docker_repository: Optional[str] = None,
257258
max_batching_rows: Optional[int] = 1000,
@@ -384,8 +385,8 @@ def remote_function(
384385
Explicit name of the external package dependencies. Each dependency
385386
is added to the `requirements.txt` as is, and can be of the form
386387
supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
387-
cloud_function_service_account (str, Optional):
388-
Service account to use for the cloud functions. If not provided then
388+
cloud_function_service_account (str):
389+
Service account to use for the cloud functions. If "default" provided then
389390
the default service account would be used. See
390391
https://cloud.google.com/functions/docs/securing/function-identity
391392
for more details. Please make sure the service account has the
@@ -455,22 +456,12 @@ def remote_function(
455456
# Some defaults may be used from the session if not provided otherwise.
456457
session = self._resolve_session(session)
457458

458-
# raise a UserWarning if user does not explicitly set cloud_function_service_account to a
459-
# user-managed cloud_function_service_account of to default
460-
msg = bfe.format_message(
461-
"You have not explicitly set a user-managed `cloud_function_service_account`. "
462-
"Using the default Compute Engine service account. "
463-
"In BigFrames 2.0 onwards, you would have to explicitly set `cloud_function_service_account` "
464-
'either to a user-managed service account (preferred) or to `"default"` '
465-
"to use the default Compute Engine service account (discouraged). "
466-
"See, https://cloud.google.com/functions/docs/securing/function-identity."
467-
)
468-
459+
# If the user forces the cloud function service argument to None, throw
460+
# an exception
469461
if cloud_function_service_account is None:
470-
warnings.warn(msg, stacklevel=2, category=FutureWarning)
471-
472-
if cloud_function_service_account == "default":
473-
cloud_function_service_account = None
462+
raise ValueError(
463+
'You must provide a user managed cloud_function_service_account, or "default" if you would like to let the default service account be used.'
464+
)
474465

475466
# A BigQuery client is required to perform BQ operations.
476467
bigquery_client = self._resolve_bigquery_client(session, bigquery_client)
@@ -615,7 +606,9 @@ def wrapper(func):
615606
bq_connection_manager,
616607
cloud_function_region,
617608
cloud_functions_client,
618-
cloud_function_service_account,
609+
None
610+
if cloud_function_service_account == "default"
611+
else cloud_function_service_account,
619612
cloud_function_kms_key_name,
620613
cloud_function_docker_repository,
621614
session=session, # type: ignore

bigframes/pandas/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,15 @@
6565

6666

6767
def remote_function(
68+
*,
6869
input_types: Union[None, type, Sequence[type]] = None,
6970
output_type: Optional[type] = None,
7071
dataset: Optional[str] = None,
7172
bigquery_connection: Optional[str] = None,
7273
reuse: bool = True,
7374
name: Optional[str] = None,
7475
packages: Optional[Sequence[str]] = None,
75-
cloud_function_service_account: Optional[str] = None,
76+
cloud_function_service_account: str,
7677
cloud_function_kms_key_name: Optional[str] = None,
7778
cloud_function_docker_repository: Optional[str] = None,
7879
max_batching_rows: Optional[int] = 1000,

bigframes/session/__init__.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -1202,14 +1202,15 @@ def _check_file_size(self, filepath: str):
12021202

12031203
def remote_function(
12041204
self,
1205+
*,
12051206
input_types: Union[None, type, Sequence[type]] = None,
12061207
output_type: Optional[type] = None,
12071208
dataset: Optional[str] = None,
12081209
bigquery_connection: Optional[str] = None,
12091210
reuse: bool = True,
12101211
name: Optional[str] = None,
12111212
packages: Optional[Sequence[str]] = None,
1212-
cloud_function_service_account: Optional[str] = None,
1213+
cloud_function_service_account: str,
12131214
cloud_function_kms_key_name: Optional[str] = None,
12141215
cloud_function_docker_repository: Optional[str] = None,
12151216
max_batching_rows: Optional[int] = 1000,
@@ -1327,8 +1328,8 @@ def remote_function(
13271328
Explicit name of the external package dependencies. Each dependency
13281329
is added to the `requirements.txt` as is, and can be of the form
13291330
supported in https://pip.pypa.io/en/stable/reference/requirements-file-format/.
1330-
cloud_function_service_account (str, Optional):
1331-
Service account to use for the cloud functions. If not provided
1331+
cloud_function_service_account (str):
1332+
Service account to use for the cloud functions. If "default" provided
13321333
then the default service account would be used. See
13331334
https://cloud.google.com/functions/docs/securing/function-identity
13341335
for more details. Please make sure the service account has the
@@ -1406,8 +1407,8 @@ def remote_function(
14061407
`bigframes_remote_function` - The bigquery remote function capable of calling into `bigframes_cloud_function`.
14071408
"""
14081409
return self._function_session.remote_function(
1409-
input_types,
1410-
output_type,
1410+
input_types=input_types,
1411+
output_type=output_type,
14111412
session=self,
14121413
dataset=dataset,
14131414
bigquery_connection=bigquery_connection,
@@ -1499,8 +1500,8 @@ def udf(
14991500
deployed for the user defined code.
15001501
"""
15011502
return self._function_session.udf(
1502-
input_types,
1503-
output_type,
1503+
input_types=input_types,
1504+
output_type=output_type,
15041505
session=self,
15051506
dataset=dataset,
15061507
bigquery_connection=bigquery_connection,
@@ -1593,7 +1594,7 @@ def read_gbq_function(
15931594
Another use case is to define your own remote function and use it later.
15941595
For example, define the remote function:
15951596
1596-
>>> @bpd.remote_function()
1597+
>>> @bpd.remote_function(cloud_function_service_account="default")
15971598
... def tenfold(num: int) -> float:
15981599
... return num * 10
15991600
@@ -1620,7 +1621,7 @@ def read_gbq_function(
16201621
note, row processor implies that the function has only one input
16211622
parameter.
16221623
1623-
>>> @bpd.remote_function()
1624+
>>> @bpd.remote_function(cloud_function_service_account="default")
16241625
... def row_sum(s: bpd.Series) -> float:
16251626
... return s['a'] + s['b'] + s['c']
16261627

notebooks/apps/synthetic_data_generation.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,8 @@
248248
},
249249
"outputs": [],
250250
"source": [
251-
"@bpd.remote_function([int], str, packages=['faker', 'pandas'])\n",
252-
"def data_generator(id):\n",
251+
"@bpd.remote_function(packages=['faker', 'pandas'], cloud_function_service_account=\"default\")\n",
252+
"def data_generator(id: int) -> str:\n",
253253
" context = {}\n",
254254
" exec(code, context)\n",
255255
" result_df = context.get(\"result_df\")\n",

notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -914,8 +914,8 @@
914914
},
915915
"outputs": [],
916916
"source": [
917-
"@bf.remote_function([str], str)\n",
918-
"def extract_code(text: str):\n",
917+
"@bf.remote_function(cloud_function_service_account=\"default\")\n",
918+
"def extract_code(text: str) -> str:\n",
919919
" try:\n",
920920
" res = text[text.find('\\n')+1:text.find('```', 3)]\n",
921921
" res = res.replace(\"import pandas as pd\", \"import bigframes.pandas as bf\")\n",

notebooks/getting_started/getting_started_bq_dataframes.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -1485,8 +1485,8 @@
14851485
},
14861486
"outputs": [],
14871487
"source": [
1488-
"@bpd.remote_function([float], str)\n",
1489-
"def get_bucket(num):\n",
1488+
"@bpd.remote_function(cloud_function_service_account=\"default\")\n",
1489+
"def get_bucket(num: float) -> str:\n",
14901490
" if not num: return \"NA\"\n",
14911491
" boundary = 4000\n",
14921492
" return \"at_or_above_4000\" if num >= boundary else \"below_4000\""

notebooks/location/regionalized.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -1475,8 +1475,8 @@
14751475
}
14761476
],
14771477
"source": [
1478-
"@bpd.remote_function([float], str, bigquery_connection='bigframes-rf-conn')\n",
1479-
"def get_bucket(num):\n",
1478+
"@bpd.remote_function(bigquery_connection='bigframes-rf-conn', cloud_function_service_account=\"default\")\n",
1479+
"def get_bucket(num: float) -> str:\n",
14801480
" if not num: return \"NA\"\n",
14811481
" boundary = 4000\n",
14821482
" return \"at_or_above_4000\" if num >= boundary else \"below_4000\""

notebooks/remote_functions/remote_function.ipynb

+4-4
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@
174174
"source": [
175175
"# User defined function\n",
176176
"# https://www.codespeedy.com/find-nth-prime-number-in-python/\n",
177-
"def nth_prime(n):\n",
177+
"def nth_prime(n: int) -> int:\n",
178178
" prime_numbers = [2,3]\n",
179179
" i=3\n",
180180
" if(0<n<=2):\n",
@@ -627,8 +627,8 @@
627627
"\n",
628628
"# User defined function\n",
629629
"# https://www.codespeedy.com/find-nth-prime-number-in-python/\n",
630-
"@pd.remote_function([int], int, reuse=False)\n",
631-
"def nth_prime(n):\n",
630+
"@pd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
631+
"def nth_prime(n: int) -> int:\n",
632632
" prime_numbers = [2,3]\n",
633633
" i=3\n",
634634
" if(0<n<=2):\n",
@@ -1179,7 +1179,7 @@
11791179
"name": "python",
11801180
"nbconvert_exporter": "python",
11811181
"pygments_lexer": "ipython3",
1182-
"version": "3.10.12"
1182+
"version": "3.11.4"
11831183
}
11841184
},
11851185
"nbformat": 4,

notebooks/remote_functions/remote_function_usecases.ipynb

+7-7
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@
259259
}
260260
],
261261
"source": [
262-
"@bpd.remote_function(reuse=False)\n",
262+
"@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
263263
"def duration_category(duration_minutes: int) -> str:\n",
264264
" if duration_minutes < 90:\n",
265265
" return \"short\"\n",
@@ -466,7 +466,7 @@
466466
}
467467
],
468468
"source": [
469-
"@bpd.remote_function(reuse=False)\n",
469+
"@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
470470
"def duration_category(duration_minutes: int) -> str:\n",
471471
" if duration_minutes < 90:\n",
472472
" return DURATION_CATEGORY_SHORT\n",
@@ -675,7 +675,7 @@
675675
}
676676
],
677677
"source": [
678-
"@bpd.remote_function(reuse=False)\n",
678+
"@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
679679
"def duration_category(duration_minutes: int) -> str:\n",
680680
" duration_hours = mymath.ceil(duration_minutes / 60)\n",
681681
" return f\"{duration_hours}h\"\n",
@@ -886,7 +886,7 @@
886886
}
887887
],
888888
"source": [
889-
"@bpd.remote_function(reuse=False)\n",
889+
"@bpd.remote_function(reuse=False, cloud_function_service_account=\"default\")\n",
890890
"def duration_category(duration_minutes: int) -> str:\n",
891891
" duration_hours = get_hour_ceiling(duration_minutes)\n",
892892
" return f\"{duration_hours} hrs\"\n",
@@ -1068,7 +1068,7 @@
10681068
}
10691069
],
10701070
"source": [
1071-
"@bpd.remote_function(reuse=False, packages=[\"cryptography\"])\n",
1071+
"@bpd.remote_function(reuse=False, packages=[\"cryptography\"], cloud_function_service_account=\"default\")\n",
10721072
"def get_hash(input: str) -> str:\n",
10731073
" from cryptography.fernet import Fernet\n",
10741074
"\n",
@@ -1271,7 +1271,7 @@
12711271
}
12721272
],
12731273
"source": [
1274-
"@bpd.remote_function(reuse=False, packages=[\"humanize\"])\n",
1274+
"@bpd.remote_function(reuse=False, packages=[\"humanize\"], cloud_function_service_account=\"default\")\n",
12751275
"def duration_category(duration_minutes: int) -> str:\n",
12761276
" timedelta = dt.timedelta(minutes=duration_minutes)\n",
12771277
" return humanize.naturaldelta(timedelta)\n",
@@ -1442,7 +1442,7 @@
14421442
"name": "python",
14431443
"nbconvert_exporter": "python",
14441444
"pygments_lexer": "ipython3",
1445-
"version": "3.9.19"
1445+
"version": "3.11.4"
14461446
}
14471447
},
14481448
"nbformat": 4,

notebooks/remote_functions/remote_function_vertex_claude_model.ipynb

+3-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,9 @@
286286
"source": [
287287
"@bpd.remote_function(packages=[\"anthropic[vertex]\", \"google-auth[requests]\"],\n",
288288
" max_batching_rows=1, \n",
289-
" bigquery_connection=\"bigframes-dev.us-east5.bigframes-rf-conn\") # replace with your connection\n",
289+
" bigquery_connection=\"bigframes-dev.us-east5.bigframes-rf-conn\", # replace with your connection\n",
290+
" cloud_function_service_account=\"default\",\n",
291+
")\n",
290292
"def anthropic_transformer(message: str) -> str:\n",
291293
" from anthropic import AnthropicVertex\n",
292294
" client = AnthropicVertex(region=LOCATION, project_id=PROJECT)\n",

samples/snippets/remote_function.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,8 @@ def run_remote_function_and_read_gbq_function(project_id: str) -> None:
4747
# of the penguins, which is a real number, into a category, which is a
4848
# string.
4949
@bpd.remote_function(
50-
float,
51-
str,
5250
reuse=False,
51+
cloud_function_service_account="default",
5352
)
5453
def get_bucket(num: float) -> str:
5554
if not num:
@@ -91,10 +90,9 @@ def get_bucket(num: float) -> str:
9190
# as a remote function. The custom function in this example has external
9291
# package dependency, which can be specified via `packages` parameter.
9392
@bpd.remote_function(
94-
str,
95-
str,
9693
reuse=False,
9794
packages=["cryptography"],
95+
cloud_function_service_account="default",
9896
)
9997
def get_hash(input: str) -> str:
10098
from cryptography.fernet import Fernet

0 commit comments

Comments
 (0)