Skip to content

Commit 4bf2e43

Browse files
shobsitswastgcf-owl-bot[bot]
authored
feat!: drop support for locational endpoints (#1542)
* chore!: drop support for locational endpoints BREAKING CHANGE: Locational endpoints support is not available in BigFrames 2.0. * Update bigframes/_config/bigquery_options.py * still try the regional endpoint even if not in the client-side list of supported regions * add bq connection regional url template * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * move location validation to a common place * add test for storage read client * `use_regional_endpoints` documentation update * fix mypy * move the exception tests to large tests to test the real flow --------- Co-authored-by: Tim Sweña (Swast) <[email protected]> Co-authored-by: Tim Sweña (Swast) <[email protected]> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent f6f697a commit 4bf2e43

File tree

4 files changed

+121
-105
lines changed

4 files changed

+121
-105
lines changed

bigframes/_config/bigquery_options.py

+12-13
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,8 @@ def allow_large_results(self, value: bool):
258258

259259
@property
260260
def use_regional_endpoints(self) -> bool:
261-
"""Flag to connect to regional API endpoints.
261+
"""Flag to connect to regional API endpoints for BigQuery API and
262+
BigQuery Storage API.
262263
263264
.. note::
264265
Use of regional endpoints is a feature in Preview and available only
@@ -267,18 +268,16 @@ def use_regional_endpoints(self) -> bool:
267268
"us-east5", "us-east7", "us-south1", "us-west1", "us-west2", "us-west3"
268269
and "us-west4".
269270
270-
.. deprecated:: 0.13.0
271-
Use of locational endpoints is available only in selected projects.
272-
273-
Requires that ``location`` is set. For supported regions, for example
274-
``europe-west3``, you need to specify ``location='europe-west3'`` and
275-
``use_regional_endpoints=True``, and then BigQuery DataFrames would
276-
connect to the BigQuery endpoint ``bigquery.europe-west3.rep.googleapis.com``.
277-
For not supported regions, for example ``asia-northeast1``, when you
278-
specify ``location='asia-northeast1'`` and ``use_regional_endpoints=True``,
279-
a different endpoint (called locational endpoint, now deprecated, used
280-
to provide weaker promise on the request remaining within the location
281-
during transit) ``europe-west3-bigquery.googleapis.com`` would be used.
271+
Requires that ``location`` is set. For [supported regions](https://cloud.google.com/bigquery/docs/regional-endpoints),
272+
for example ``europe-west3``, you need to specify
273+
``location='europe-west3'`` and ``use_regional_endpoints=True``, and
274+
then BigQuery DataFrames would connect to the BigQuery endpoint
275+
``bigquery.europe-west3.rep.googleapis.com``. For not supported regions,
276+
for example ``asia-northeast1``, when you specify
277+
``location='asia-northeast1'`` and ``use_regional_endpoints=True``,
278+
the global endpoint ``bigquery.googleapis.com`` would be used, which
279+
does not promise any guarantee on the request remaining within the
280+
location during transit.
282281
283282
Returns:
284283
bool:

bigframes/constants.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -96,22 +96,27 @@
9696
}
9797
)
9898

99-
# https://cloud.google.com/storage/docs/locational-endpoints
100-
LEP_ENABLED_BIGQUERY_LOCATIONS = frozenset(
99+
REP_NOT_ENABLED_BIGQUERY_LOCATIONS = frozenset(
101100
ALL_BIGQUERY_LOCATIONS - REP_ENABLED_BIGQUERY_LOCATIONS
102101
)
103102

104-
LEP_DEPRECATION_WARNING_MESSAGE = textwrap.dedent(
103+
LOCATION_NEEDED_FOR_REP_MESSAGE = textwrap.dedent(
105104
"""
106-
Support for regional endpoints is not yet available in the location
107-
{location} for BigQuery and BigQuery Storage APIs. For the supported
108-
locations and APIs see https://cloud.google.com/bigquery/docs/regional-endpoints.
109-
For other locations and APIs, currently an older, now deprecated locational
110-
endpoints are being used, which requires your project to be allowlisted. In
111-
future version 2.0 onwards the locational endpoints will no longer be
112-
supported automatically when you enable regional endpoints. However, if you
113-
still need them, you will be able to override the endpoints directly by
114-
doing the following:
105+
Must set location to use regional endpoints.
106+
You can do it via bigframaes.pandas.options.bigquery.location.
107+
The supported locations can be found at
108+
https://cloud.google.com/bigquery/docs/regional-endpoints#supported-locations.
109+
"""
110+
).strip()
111+
112+
REP_NOT_SUPPORTED_MESSAGE = textwrap.dedent(
113+
"""
114+
Support for regional endpoints for BigQuery and BigQuery Storage APIs may
115+
not be available in the location {location}. For the supported APIs and
116+
locations see https://cloud.google.com/bigquery/docs/regional-endpoints.
117+
If you have the (deprecated) locational endpoints enabled in your project
118+
(which requires your project to be allowlisted), you can override the
119+
endpoints directly by doing the following:
115120
bigframes.pandas.options.bigquery.client_endpoints_override = {{
116121
"bqclient": "https://{location}-bigquery.googleapis.com",
117122
"bqconnectionclient": "{location}-bigqueryconnection.googleapis.com",

bigframes/session/clients.py

+17-45
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import os
1818
import typing
1919
from typing import Optional
20-
import warnings
2120

2221
import google.api_core.client_info
2322
import google.api_core.client_options
@@ -32,7 +31,6 @@
3231
import pydata_google_auth
3332

3433
import bigframes.constants
35-
import bigframes.exceptions as bfe
3634
import bigframes.version
3735

3836
from . import environment
@@ -43,16 +41,11 @@
4341

4442

4543
# BigQuery is a REST API, which requires the protocol as part of the URL.
46-
_BIGQUERY_LOCATIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com"
4744
_BIGQUERY_REGIONAL_ENDPOINT = "https://bigquery.{location}.rep.googleapis.com"
4845

4946
# BigQuery Connection and Storage are gRPC APIs, which don't support the
5047
# https:// protocol in the API endpoint URL.
51-
_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT = "{location}-bigqueryconnection.googleapis.com"
52-
_BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT = "{location}-bigquerystorage.googleapis.com"
53-
_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = (
54-
"https://bigquerystorage.{location}.rep.googleapis.com"
55-
)
48+
_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "bigquerystorage.{location}.rep.googleapis.com"
5649

5750

5851
def _get_default_credentials_with_project():
@@ -114,19 +107,18 @@ def __init__(
114107
)
115108
self._project = project
116109

117-
if (
118-
use_regional_endpoints
119-
and location is not None
120-
and location.lower()
121-
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
122-
):
123-
msg = bfe.format_message(
124-
bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format(
125-
location=location
126-
),
127-
fill=False,
128-
)
129-
warnings.warn(msg, category=FutureWarning)
110+
if use_regional_endpoints:
111+
if location is None:
112+
raise ValueError(bigframes.constants.LOCATION_NEEDED_FOR_REP_MESSAGE)
113+
elif (
114+
location.lower()
115+
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
116+
):
117+
raise ValueError(
118+
bigframes.constants.REP_NOT_SUPPORTED_MESSAGE.format(
119+
location=location
120+
)
121+
)
130122
self._location = location
131123
self._use_regional_endpoints = use_regional_endpoints
132124

@@ -156,16 +148,8 @@ def _create_bigquery_client(self):
156148
api_endpoint=self._client_endpoints_override["bqclient"]
157149
)
158150
elif self._use_regional_endpoints:
159-
endpoint_template = _BIGQUERY_REGIONAL_ENDPOINT
160-
if (
161-
self._location is not None
162-
and self._location.lower()
163-
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
164-
):
165-
endpoint_template = _BIGQUERY_LOCATIONAL_ENDPOINT
166-
167151
bq_options = google.api_core.client_options.ClientOptions(
168-
api_endpoint=endpoint_template.format(location=self._location)
152+
api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format(location=self._location)
169153
)
170154

171155
bq_info = google.api_core.client_info.ClientInfo(
@@ -212,12 +196,6 @@ def bqconnectionclient(self):
212196
bqconnection_options = google.api_core.client_options.ClientOptions(
213197
api_endpoint=self._client_endpoints_override["bqconnectionclient"]
214198
)
215-
elif self._use_regional_endpoints:
216-
bqconnection_options = google.api_core.client_options.ClientOptions(
217-
api_endpoint=_BIGQUERYCONNECTION_LOCATIONAL_ENDPOINT.format(
218-
location=self._location
219-
)
220-
)
221199

222200
bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo(
223201
user_agent=self._application_name
@@ -241,16 +219,10 @@ def bqstoragereadclient(self):
241219
api_endpoint=self._client_endpoints_override["bqstoragereadclient"]
242220
)
243221
elif self._use_regional_endpoints:
244-
endpoint_template = _BIGQUERYSTORAGE_REGIONAL_ENDPOINT
245-
if (
246-
self._location is not None
247-
and self._location.lower()
248-
not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
249-
):
250-
endpoint_template = _BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT
251-
252222
bqstorage_options = google.api_core.client_options.ClientOptions(
253-
api_endpoint=endpoint_template.format(location=self._location)
223+
api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format(
224+
location=self._location
225+
)
254226
)
255227

256228
bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo(

tests/system/large/test_location.py

+75-35
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@
1313
# limitations under the License.
1414

1515
import typing
16-
import warnings
1716

1817
from google.cloud import bigquery
18+
from google.cloud.bigquery_storage import types as bqstorage_types
19+
import pandas
20+
import pandas.testing
1921
import pytest
2022

2123
import bigframes
@@ -41,6 +43,7 @@ def _assert_bq_execution_location(
4143

4244
assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location
4345

46+
# Ensure operation involving BQ client suceeds
4447
result = (
4548
df[["name", "number"]]
4649
.groupby("name")
@@ -53,6 +56,35 @@ def _assert_bq_execution_location(
5356
typing.cast(bigquery.QueryJob, result.query_job).location == expected_location
5457
)
5558

59+
expected_result = pandas.DataFrame(
60+
{"number": [444, 222]}, index=pandas.Index(["aaa", "bbb"], name="name")
61+
)
62+
pandas.testing.assert_frame_equal(
63+
expected_result, result.to_pandas(), check_dtype=False, check_index_type=False
64+
)
65+
66+
# Ensure BQ Storage Read client operation succceeds
67+
table = result.query_job.destination
68+
requested_session = bqstorage_types.ReadSession( # type: ignore[attr-defined]
69+
table=f"projects/{table.project}/datasets/{table.dataset_id}/tables/{table.table_id}",
70+
data_format=bqstorage_types.DataFormat.ARROW, # type: ignore[attr-defined]
71+
)
72+
read_session = session.bqstoragereadclient.create_read_session(
73+
parent=f"projects/{table.project}",
74+
read_session=requested_session,
75+
max_stream_count=1,
76+
)
77+
reader = session.bqstoragereadclient.read_rows(read_session.streams[0].name)
78+
frames = []
79+
for message in reader.rows().pages:
80+
frames.append(message.to_dataframe())
81+
read_dataframe = pandas.concat(frames)
82+
# normalize before comparing since we lost some of the bigframes column
83+
# naming abtractions in the direct read of the destination table
84+
read_dataframe = read_dataframe.set_index("name")
85+
read_dataframe.columns = result.columns
86+
pandas.testing.assert_frame_equal(expected_result, read_dataframe)
87+
5688

5789
def test_bq_location_default():
5890
session = bigframes.Session()
@@ -119,22 +151,14 @@ def test_bq_location_non_canonical(set_location, resolved_location):
119151
sorted(bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS),
120152
)
121153
def test_bq_rep_endpoints(bigquery_location):
122-
with warnings.catch_warnings(record=True) as record:
123-
warnings.simplefilter("always")
124-
session = bigframes.Session(
125-
context=bigframes.BigQueryOptions(
126-
location=bigquery_location, use_regional_endpoints=True
127-
)
128-
)
129-
assert (
130-
len([warn for warn in record if isinstance(warn.message, FutureWarning)])
131-
== 0
154+
session = bigframes.Session(
155+
context=bigframes.BigQueryOptions(
156+
location=bigquery_location, use_regional_endpoints=True
132157
)
158+
)
133159

134-
# Verify that location and endpoints are correctly set for the BigQuery API
160+
# Verify that location and endpoint is correctly set for the BigQuery API
135161
# client
136-
# TODO(shobs): Figure out if the same can be verified for the other API
137-
# clients.
138162
assert session.bqclient.location == bigquery_location
139163
assert (
140164
session.bqclient._connection.API_BASE_URL
@@ -143,36 +167,52 @@ def test_bq_rep_endpoints(bigquery_location):
143167
)
144168
)
145169

170+
# Verify that endpoint is correctly set for the BigQuery Storage API client
171+
# TODO(shobs): Figure out if we can verify that location is set in the
172+
# BigQuery Storage API client.
173+
assert (
174+
session.bqstoragereadclient.api_endpoint
175+
== f"bigquerystorage.{bigquery_location}.rep.googleapis.com"
176+
)
177+
146178
# assert that bigframes session honors the location
147179
_assert_bq_execution_location(session)
148180

149181

182+
def test_clients_provider_no_location():
183+
with pytest.raises(ValueError, match="Must set location to use regional endpoints"):
184+
bigframes.session.clients.ClientsProvider(use_regional_endpoints=True)
185+
186+
150187
@pytest.mark.parametrize(
151188
"bigquery_location",
152189
# Sort the set to avoid nondeterminism.
153-
sorted(bigframes.constants.LEP_ENABLED_BIGQUERY_LOCATIONS),
190+
sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS),
154191
)
155-
def test_bq_lep_endpoints(bigquery_location):
156-
# We are not testing BigFrames Session for LEP endpoints because it involves
157-
# query execution using the endpoint, which requires the project to be
158-
# allowlisted for LEP access. We could hardcode one project which is
159-
# allowlisted but then not every open source developer will have access to
160-
# that. Let's rely on just creating the clients for LEP.
161-
with pytest.warns(FutureWarning) as record:
162-
clients_provider = bigframes.session.clients.ClientsProvider(
192+
def test_clients_provider_use_regional_endpoints_non_rep_locations(bigquery_location):
193+
with pytest.raises(
194+
ValueError,
195+
match=f"not .*available in the location {bigquery_location}",
196+
):
197+
bigframes.session.clients.ClientsProvider(
163198
location=bigquery_location, use_regional_endpoints=True
164199
)
165-
assert len(record) == 1
166-
assert bigquery_location in typing.cast(Warning, record[0].message).args[0]
167200

168-
# Verify that location and endpoints are correctly set for the BigQuery API
169-
# client
170-
# TODO(shobs): Figure out if the same can be verified for the other API
171-
# clients.
172-
assert clients_provider.bqclient.location == bigquery_location
173-
assert (
174-
clients_provider.bqclient._connection.API_BASE_URL
175-
== "https://{location}-bigquery.googleapis.com".format(
176-
location=bigquery_location
201+
202+
@pytest.mark.parametrize(
203+
"bigquery_location",
204+
# Sort the set to avoid nondeterminism.
205+
sorted(bigframes.constants.REP_NOT_ENABLED_BIGQUERY_LOCATIONS),
206+
)
207+
def test_session_init_fails_to_use_regional_endpoints_non_rep_endpoints(
208+
bigquery_location,
209+
):
210+
with pytest.raises(
211+
ValueError,
212+
match=f"not .*available in the location {bigquery_location}",
213+
):
214+
bigframes.Session(
215+
context=bigframes.BigQueryOptions(
216+
location=bigquery_location, use_regional_endpoints=True
217+
)
177218
)
178-
)

0 commit comments

Comments
 (0)