Skip to content

Commit b929a90

Browse files
chalmerlowetswastgcf-owl-bot[bot]
authored
feat: adds ExternalCatalogDatasetOptions and tests (#2111)
* feat: adds ExternalCatalogDatasetOptions and tests * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) <[email protected]> * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim Sweña (Swast) <[email protected]> * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Tim Sweña (Swast) <[email protected]> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 55ca63c commit b929a90

File tree

4 files changed

+273
-1
lines changed

4 files changed

+273
-1
lines changed

google/cloud/bigquery/dataset.py

+25
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from google.cloud.bigquery.routine import Routine, RoutineReference
2828
from google.cloud.bigquery.table import Table, TableReference
2929
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
30+
from google.cloud.bigquery import external_config
3031

3132
from typing import Optional, List, Dict, Any, Union
3233

@@ -531,6 +532,7 @@ class Dataset(object):
531532
"max_time_travel_hours": "maxTimeTravelHours",
532533
"default_rounding_mode": "defaultRoundingMode",
533534
"resource_tags": "resourceTags",
535+
"external_catalog_dataset_options": "externalCatalogDatasetOptions",
534536
}
535537

536538
def __init__(self, dataset_ref) -> None:
@@ -898,6 +900,29 @@ def storage_billing_model(self, value):
898900
)
899901
self._properties["storageBillingModel"] = value
900902

903+
@property
904+
def external_catalog_dataset_options(self):
905+
"""Options defining open source compatible datasets living in the
906+
BigQuery catalog. Contains metadata of open source database, schema
907+
or namespace represented by the current dataset."""
908+
909+
prop = _helpers._get_sub_prop(
910+
self._properties, ["externalCatalogDatasetOptions"]
911+
)
912+
913+
if prop is not None:
914+
prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop)
915+
return prop
916+
917+
@external_catalog_dataset_options.setter
918+
def external_catalog_dataset_options(self, value):
919+
value = _helpers._isinstance_or_raise(
920+
value, external_config.ExternalCatalogDatasetOptions, none_allowed=True
921+
)
922+
self._properties[
923+
self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"]
924+
] = (value.to_api_repr() if value is not None else None)
925+
901926
@classmethod
902927
def from_string(cls, full_dataset_id: str) -> "Dataset":
903928
"""Construct a dataset from fully-qualified dataset ID.

google/cloud/bigquery/external_config.py

+75-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
Job.configuration.query.tableDefinitions.
1919
"""
2020

21-
from __future__ import absolute_import
21+
from __future__ import absolute_import, annotations
2222

2323
import base64
2424
import copy
@@ -28,6 +28,7 @@
2828
from google.cloud.bigquery._helpers import _bytes_to_json
2929
from google.cloud.bigquery._helpers import _int_or_none
3030
from google.cloud.bigquery._helpers import _str_or_none
31+
from google.cloud.bigquery import _helpers
3132
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
3233
from google.cloud.bigquery.schema import SchemaField
3334

@@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig":
10031004
config = cls(resource["sourceFormat"])
10041005
config._properties = copy.deepcopy(resource)
10051006
return config
1007+
1008+
1009+
class ExternalCatalogDatasetOptions:
1010+
"""Options defining open source compatible datasets living in the BigQuery catalog.
1011+
Contains metadata of open source database, schema or namespace represented
1012+
by the current dataset.
1013+
1014+
Args:
1015+
default_storage_location_uri (Optional[str]): The storage location URI for all
1016+
tables in the dataset. Equivalent to hive metastore's database
1017+
locationUri. Maximum length of 1024 characters. (str)
1018+
parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
1019+
and properties of the open source schema. Maximum size of 2Mib.
1020+
"""
1021+
1022+
def __init__(
1023+
self,
1024+
default_storage_location_uri: Optional[str] = None,
1025+
parameters: Optional[Dict[str, Any]] = None,
1026+
):
1027+
self._properties: Dict[str, Any] = {}
1028+
self.default_storage_location_uri = default_storage_location_uri
1029+
self.parameters = parameters
1030+
1031+
@property
1032+
def default_storage_location_uri(self) -> Optional[str]:
1033+
"""Optional. The storage location URI for all tables in the dataset.
1034+
Equivalent to hive metastore's database locationUri. Maximum length of
1035+
1024 characters."""
1036+
1037+
return self._properties.get("defaultStorageLocationUri")
1038+
1039+
@default_storage_location_uri.setter
1040+
def default_storage_location_uri(self, value: Optional[str]):
1041+
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1042+
self._properties["defaultStorageLocationUri"] = value
1043+
1044+
@property
1045+
def parameters(self) -> Optional[Dict[str, Any]]:
1046+
"""Optional. A map of key value pairs defining the parameters and
1047+
properties of the open source schema. Maximum size of 2Mib."""
1048+
1049+
return self._properties.get("parameters")
1050+
1051+
@parameters.setter
1052+
def parameters(self, value: Optional[Dict[str, Any]]):
1053+
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1054+
self._properties["parameters"] = value
1055+
1056+
def to_api_repr(self) -> dict:
1057+
"""Build an API representation of this object.
1058+
1059+
Returns:
1060+
Dict[str, Any]:
1061+
A dictionary in the format used by the BigQuery API.
1062+
"""
1063+
return self._properties
1064+
1065+
@classmethod
1066+
def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
1067+
"""Factory: constructs an instance of the class (cls)
1068+
given its API representation.
1069+
1070+
Args:
1071+
api_repr (Dict[str, Any]):
1072+
API representation of the object to be instantiated.
1073+
1074+
Returns:
1075+
An instance of the class initialized with data from 'resource'.
1076+
"""
1077+
config = cls()
1078+
config._properties = api_repr
1079+
return config

tests/unit/test_dataset.py

+84
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase):
650650
DS_ID = "dataset-id"
651651
DS_REF = DatasetReference(PROJECT, DS_ID)
652652
KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1"
653+
DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
654+
PARAMETERS = {"key": "value"}
655+
API_REPR = {
656+
"datasetReference": {"projectId": "project", "datasetId": "dataset-id"},
657+
"labels": {},
658+
"externalCatalogDatasetOptions": {
659+
"defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI,
660+
"parameters": PARAMETERS,
661+
},
662+
}
653663

654664
@staticmethod
655665
def _get_target_class():
@@ -1067,6 +1077,80 @@ def test___repr__(self):
10671077
expected = "Dataset(DatasetReference('project1', 'dataset1'))"
10681078
self.assertEqual(repr(dataset), expected)
10691079

1080+
def test_external_catalog_dataset_options_setter(self):
1081+
# GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS
1082+
# WHEN an ExternalCatalogDatasetOptions obj is created
1083+
# and added to a dataset.
1084+
# THEN the api representation of the dataset will match API_REPR
1085+
1086+
from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions
1087+
1088+
dataset = self._make_one(self.DS_REF)
1089+
1090+
ecdo_obj = ExternalCatalogDatasetOptions(
1091+
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
1092+
parameters=self.PARAMETERS,
1093+
)
1094+
dataset.external_catalog_dataset_options = ecdo_obj
1095+
1096+
result = dataset.to_api_repr()
1097+
expected = self.API_REPR
1098+
assert result == expected
1099+
1100+
def test_external_catalog_dataset_options_getter_prop_exists(self):
1101+
# GIVEN default dataset PLUS an ExternalCatalogDatasetOptions
1102+
# THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
1103+
# matches the api_repr of the external_catalog_dataset_options attribute.
1104+
1105+
from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions
1106+
1107+
dataset = self._make_one(self.DS_REF)
1108+
ecdo_obj = ExternalCatalogDatasetOptions(
1109+
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
1110+
parameters=self.PARAMETERS,
1111+
)
1112+
dataset.external_catalog_dataset_options = ecdo_obj
1113+
result = dataset.external_catalog_dataset_options.to_api_repr()
1114+
expected = ecdo_obj.to_api_repr()
1115+
assert result == expected
1116+
1117+
def test_external_catalog_dataset_options_getter_prop_is_none(self):
1118+
# GIVEN only a default dataset
1119+
# THEN confirm that external_catalog_dataset_options is None
1120+
1121+
dataset = self._make_one(self.DS_REF)
1122+
expected = None
1123+
result = dataset.external_catalog_dataset_options
1124+
assert result == expected
1125+
1126+
def test_external_catalog_dataset_options_from_api_repr(self):
1127+
# GIVEN default dataset including an ExternalCatalogDatasetOptions
1128+
# THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
1129+
# on a dataset object created via from_api_repr matches the api_repr
1130+
# of the "externalCatalogDatasetOptions" key.
1131+
1132+
api_repr = self.API_REPR
1133+
klass = self._get_target_class()
1134+
dataset = klass.from_api_repr(api_repr)
1135+
1136+
result = dataset.external_catalog_dataset_options.to_api_repr()
1137+
expected = api_repr["externalCatalogDatasetOptions"]
1138+
assert result == expected
1139+
1140+
def test_external_catalog_dataset_options_to_api_repr(self):
1141+
# GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key
1142+
# THEN confirm that the api_repr of that key from a dataset object created
1143+
# via the to_api_repr() method matches the value of the key
1144+
# used to create the dataset object
1145+
1146+
api_repr = self.API_REPR
1147+
klass = self._get_target_class()
1148+
dataset = klass.from_api_repr(api_repr)
1149+
1150+
result = dataset.to_api_repr()["externalCatalogDatasetOptions"]
1151+
expected = api_repr["externalCatalogDatasetOptions"]
1152+
assert result == expected
1153+
10701154

10711155
class TestDatasetListItem(unittest.TestCase):
10721156
@staticmethod

tests/unit/test_external_config.py

+89
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from google.cloud.bigquery import external_config
2020
from google.cloud.bigquery import schema
2121

22+
import pytest
23+
2224

2325
class TestExternalConfig(unittest.TestCase):
2426
SOURCE_URIS = ["gs://foo", "gs://bar"]
@@ -890,3 +892,90 @@ def _copy_and_update(d, u):
890892
d = copy.deepcopy(d)
891893
d.update(u)
892894
return d
895+
896+
897+
class TestExternalCatalogDatasetOptions:
898+
@staticmethod
899+
def _get_target_class():
900+
from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions
901+
902+
return ExternalCatalogDatasetOptions
903+
904+
def _make_one(self, *args, **kw):
905+
return self._get_target_class()(*args, **kw)
906+
907+
DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
908+
PARAMETERS = {"key": "value"}
909+
910+
@pytest.mark.parametrize(
911+
"default_storage_location_uri,parameters",
912+
[
913+
(DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params
914+
(DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time
915+
(None, PARAMETERS),
916+
(None, None), # use default parameters
917+
],
918+
)
919+
def test_ctor_initialization(
920+
self,
921+
default_storage_location_uri,
922+
parameters,
923+
):
924+
"""Test ExternalCatalogDatasetOptions constructor with explicit values."""
925+
926+
instance = self._make_one(
927+
default_storage_location_uri=default_storage_location_uri,
928+
parameters=parameters,
929+
)
930+
931+
assert instance.default_storage_location_uri == default_storage_location_uri
932+
assert instance.parameters == parameters
933+
934+
@pytest.mark.parametrize(
935+
"default_storage_location_uri,parameters",
936+
[
937+
(123, None), # does not accept integers
938+
(None, 123),
939+
],
940+
)
941+
def test_ctor_invalid_input(self, default_storage_location_uri, parameters):
942+
"""Test ExternalCatalogDatasetOptions constructor with invalid input."""
943+
944+
with pytest.raises(TypeError) as e:
945+
self._make_one(
946+
default_storage_location_uri=default_storage_location_uri,
947+
parameters=parameters,
948+
)
949+
950+
# Looking for the first word from the string "Pass <variable> as..."
951+
assert "Pass " in str(e.value)
952+
953+
def test_to_api_repr(self):
954+
"""Test ExternalCatalogDatasetOptions.to_api_repr method."""
955+
956+
instance = self._make_one(
957+
default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
958+
parameters=self.PARAMETERS,
959+
)
960+
resource = instance.to_api_repr()
961+
assert (
962+
resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI
963+
)
964+
assert resource["parameters"] == self.PARAMETERS
965+
966+
def test_from_api_repr(self):
967+
"""GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr)
968+
WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr()
969+
THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions
970+
object made directly (via _make_one()) and represented in dict format.
971+
"""
972+
973+
instance = self._make_one()
974+
api_repr = {
975+
"defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI,
976+
"parameters": self.PARAMETERS,
977+
}
978+
result = instance.from_api_repr(api_repr)
979+
980+
assert isinstance(result, external_config.ExternalCatalogDatasetOptions)
981+
assert result._properties == api_repr

0 commit comments

Comments
 (0)