Skip to content

Commit cdc1a6e

Browse files
authored
feat: add ExternalCatalogTableOptions class and tests (#2116)
* Updates most of external_catalog_table_options * Adds ExternalCatalogTableOptions and tests
1 parent 7de6822 commit cdc1a6e

File tree

5 files changed

+367
-1
lines changed

5 files changed

+367
-1
lines changed

google/cloud/bigquery/external_config.py

+107
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from google.cloud.bigquery._helpers import _str_or_none
3131
from google.cloud.bigquery import _helpers
3232
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
33+
from google.cloud.bigquery import schema
3334
from google.cloud.bigquery.schema import SchemaField
3435

3536

@@ -1077,3 +1078,109 @@ def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
10771078
config = cls()
10781079
config._properties = api_repr
10791080
return config
1081+
1082+
1083+
class ExternalCatalogTableOptions:
1084+
"""Metadata about open source compatible table. The fields contained in these
1085+
options correspond to hive metastore's table level properties.
1086+
1087+
Args:
1088+
connection_id (Optional[str]): The connection specifying the credentials to be
1089+
used to read external storage, such as Azure Blob, Cloud Storage, or
1090+
S3. The connection is needed to read the open source table from
1091+
BigQuery Engine. The connection_id can have the form `..` or
1092+
`projects//locations//connections/`.
1093+
parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters
1094+
and properties of the open source table. Corresponds with hive meta
1095+
store table parameters. Maximum size of 4Mib.
1096+
storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information
1097+
about the physical storage of this table.
1098+
"""
1099+
1100+
def __init__(
1101+
self,
1102+
connection_id: Optional[str] = None,
1103+
parameters: Union[Dict[str, Any], None] = None,
1104+
storage_descriptor: Optional[schema.StorageDescriptor] = None,
1105+
):
1106+
self._properties: Dict[str, Any] = {}
1107+
self.connection_id = connection_id
1108+
self.parameters = parameters
1109+
self.storage_descriptor = storage_descriptor
1110+
1111+
@property
1112+
def connection_id(self) -> Optional[str]:
1113+
"""Optional. The connection specifying the credentials to be
1114+
used to read external storage, such as Azure Blob, Cloud Storage, or
1115+
S3. The connection is needed to read the open source table from
1116+
BigQuery Engine. The connection_id can have the form `..` or
1117+
`projects//locations//connections/`.
1118+
"""
1119+
1120+
return self._properties.get("connectionId")
1121+
1122+
@connection_id.setter
1123+
def connection_id(self, value: Optional[str]):
1124+
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1125+
self._properties["connectionId"] = value
1126+
1127+
@property
1128+
def parameters(self) -> Union[Dict[str, Any], None]:
1129+
"""Optional. A map of key value pairs defining the parameters and
1130+
properties of the open source table. Corresponds with hive meta
1131+
store table parameters. Maximum size of 4Mib.
1132+
"""
1133+
1134+
return self._properties.get("parameters")
1135+
1136+
@parameters.setter
1137+
def parameters(self, value: Union[Dict[str, Any], None]):
1138+
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1139+
self._properties["parameters"] = value
1140+
1141+
@property
1142+
def storage_descriptor(self) -> Any:
1143+
"""Optional. A storage descriptor containing information about the
1144+
physical storage of this table."""
1145+
1146+
prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"])
1147+
1148+
if prop is not None:
1149+
return schema.StorageDescriptor.from_api_repr(prop)
1150+
return None
1151+
1152+
@storage_descriptor.setter
1153+
def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]):
1154+
value = _helpers._isinstance_or_raise(
1155+
value, (schema.StorageDescriptor, dict), none_allowed=True
1156+
)
1157+
if isinstance(value, schema.StorageDescriptor):
1158+
self._properties["storageDescriptor"] = value.to_api_repr()
1159+
else:
1160+
self._properties["storageDescriptor"] = value
1161+
1162+
def to_api_repr(self) -> dict:
1163+
"""Build an API representation of this object.
1164+
1165+
Returns:
1166+
Dict[str, Any]:
1167+
A dictionary in the format used by the BigQuery API.
1168+
"""
1169+
1170+
return self._properties
1171+
1172+
@classmethod
1173+
def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions:
1174+
"""Factory: constructs an instance of the class (cls)
1175+
given its API representation.
1176+
1177+
Args:
1178+
api_repr (Dict[str, Any]):
1179+
API representation of the object to be instantiated.
1180+
1181+
Returns:
1182+
An instance of the class initialized with data from 'api_repr'.
1183+
"""
1184+
config = cls()
1185+
config._properties = api_repr
1186+
return config

google/cloud/bigquery/magics/magics.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
bigquery_magics = None
5757

5858

59-
IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__)
59+
IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore
6060

6161

6262
class Context(object):

google/cloud/bigquery/table.py

+35
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
from google.cloud.bigquery.schema import _build_schema_resource
7070
from google.cloud.bigquery.schema import _parse_schema_resource
7171
from google.cloud.bigquery.schema import _to_schema_fields
72+
from google.cloud.bigquery import external_config
7273

7374
if typing.TYPE_CHECKING: # pragma: NO COVER
7475
# Unconditionally import optional dependencies again to tell pytype that
@@ -408,6 +409,7 @@ class Table(_TableBase):
408409
"require_partition_filter": "requirePartitionFilter",
409410
"table_constraints": "tableConstraints",
410411
"max_staleness": "maxStaleness",
412+
"external_catalog_table_options": "externalCatalogTableOptions",
411413
}
412414

413415
def __init__(self, table_ref, schema=None) -> None:
@@ -1023,6 +1025,39 @@ def table_constraints(self) -> Optional["TableConstraints"]:
10231025
table_constraints = TableConstraints.from_api_repr(table_constraints)
10241026
return table_constraints
10251027

1028+
@property
1029+
def external_catalog_table_options(
1030+
self,
1031+
) -> Optional[external_config.ExternalCatalogTableOptions]:
1032+
"""Options defining open source compatible datasets living in the
1033+
BigQuery catalog. Contains metadata of open source database, schema
1034+
or namespace represented by the current dataset."""
1035+
1036+
prop = self._properties.get(
1037+
self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
1038+
)
1039+
if prop is not None:
1040+
return external_config.ExternalCatalogTableOptions.from_api_repr(prop)
1041+
return None
1042+
1043+
@external_catalog_table_options.setter
1044+
def external_catalog_table_options(
1045+
self, value: Union[external_config.ExternalCatalogTableOptions, dict, None]
1046+
):
1047+
value = _helpers._isinstance_or_raise(
1048+
value,
1049+
(external_config.ExternalCatalogTableOptions, dict),
1050+
none_allowed=True,
1051+
)
1052+
if isinstance(value, external_config.ExternalCatalogTableOptions):
1053+
self._properties[
1054+
self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
1055+
] = value.to_api_repr()
1056+
else:
1057+
self._properties[
1058+
self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
1059+
] = value
1060+
10261061
@classmethod
10271062
def from_string(cls, full_table_id: str) -> "Table":
10281063
"""Construct a table from fully-qualified table ID.

tests/unit/test_external_config.py

+137
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import base64
1616
import copy
17+
from typing import Any, Dict, Optional
1718
import unittest
1819

1920
from google.cloud.bigquery import external_config
@@ -979,3 +980,139 @@ def test_from_api_repr(self):
979980

980981
assert isinstance(result, external_config.ExternalCatalogDatasetOptions)
981982
assert result._properties == api_repr
983+
984+
985+
class TestExternalCatalogTableOptions:
986+
@staticmethod
987+
def _get_target_class():
988+
from google.cloud.bigquery.external_config import ExternalCatalogTableOptions
989+
990+
return ExternalCatalogTableOptions
991+
992+
def _make_one(self, *args, **kw):
993+
return self._get_target_class()(*args, **kw)
994+
995+
storage_descriptor_repr = {
996+
"inputFormat": "testpath.to.OrcInputFormat",
997+
"locationUri": "gs://test/path/",
998+
"outputFormat": "testpath.to.OrcOutputFormat",
999+
"serDeInfo": {
1000+
"serializationLibrary": "testpath.to.LazySimpleSerDe",
1001+
"name": "serde_lib_name",
1002+
"parameters": {"key": "value"},
1003+
},
1004+
}
1005+
1006+
CONNECTIONID = "connection123"
1007+
PARAMETERS = {"key": "value"}
1008+
STORAGEDESCRIPTOR = schema.StorageDescriptor.from_api_repr(storage_descriptor_repr)
1009+
EXTERNALCATALOGTABLEOPTIONS = {
1010+
"connectionId": "connection123",
1011+
"parameters": {"key": "value"},
1012+
"storageDescriptor": STORAGEDESCRIPTOR.to_api_repr(),
1013+
}
1014+
1015+
@pytest.mark.parametrize(
1016+
"connection_id,parameters,storage_descriptor",
1017+
[
1018+
(
1019+
CONNECTIONID,
1020+
PARAMETERS,
1021+
STORAGEDESCRIPTOR,
1022+
), # set all parameters at once
1023+
(CONNECTIONID, None, None), # set only one parameter at a time
1024+
(None, PARAMETERS, None),
1025+
(None, None, STORAGEDESCRIPTOR), # set storage descriptor using obj
1026+
(None, None, storage_descriptor_repr), # set storage descriptor using dict
1027+
(None, None, None), # use default parameters
1028+
],
1029+
)
1030+
def test_ctor_initialization(
1031+
self,
1032+
connection_id,
1033+
parameters,
1034+
storage_descriptor,
1035+
):
1036+
instance = self._make_one(
1037+
connection_id=connection_id,
1038+
parameters=parameters,
1039+
storage_descriptor=storage_descriptor,
1040+
)
1041+
1042+
assert instance.connection_id == connection_id
1043+
assert instance.parameters == parameters
1044+
1045+
if isinstance(storage_descriptor, schema.StorageDescriptor):
1046+
assert (
1047+
instance.storage_descriptor.to_api_repr()
1048+
== storage_descriptor.to_api_repr()
1049+
)
1050+
elif isinstance(storage_descriptor, dict):
1051+
assert instance.storage_descriptor.to_api_repr() == storage_descriptor
1052+
else:
1053+
assert instance.storage_descriptor is None
1054+
1055+
@pytest.mark.parametrize(
1056+
"connection_id,parameters,storage_descriptor",
1057+
[
1058+
pytest.param(
1059+
123,
1060+
PARAMETERS,
1061+
STORAGEDESCRIPTOR,
1062+
id="connection_id-invalid-type",
1063+
),
1064+
pytest.param(
1065+
CONNECTIONID,
1066+
123,
1067+
STORAGEDESCRIPTOR,
1068+
id="parameters-invalid-type",
1069+
),
1070+
pytest.param(
1071+
CONNECTIONID,
1072+
PARAMETERS,
1073+
123,
1074+
id="storage_descriptor-invalid-type",
1075+
),
1076+
],
1077+
)
1078+
def test_ctor_invalid_input(
1079+
self,
1080+
connection_id: str,
1081+
parameters: Dict[str, Any],
1082+
storage_descriptor: Optional[schema.StorageDescriptor],
1083+
):
1084+
with pytest.raises(TypeError) as e:
1085+
external_config.ExternalCatalogTableOptions(
1086+
connection_id=connection_id,
1087+
parameters=parameters,
1088+
storage_descriptor=storage_descriptor,
1089+
)
1090+
1091+
# Looking for the first word from the string "Pass <variable> as..."
1092+
assert "Pass " in str(e.value)
1093+
1094+
def test_to_api_repr(self):
1095+
instance = self._make_one(
1096+
connection_id=self.CONNECTIONID,
1097+
parameters=self.PARAMETERS,
1098+
storage_descriptor=self.STORAGEDESCRIPTOR,
1099+
)
1100+
1101+
result = instance.to_api_repr()
1102+
expected = self.EXTERNALCATALOGTABLEOPTIONS
1103+
1104+
assert result == expected
1105+
1106+
def test_from_api_repr(self):
1107+
result = self._make_one(
1108+
connection_id=self.CONNECTIONID,
1109+
parameters=self.PARAMETERS,
1110+
storage_descriptor=self.STORAGEDESCRIPTOR,
1111+
)
1112+
1113+
instance = self._make_one()
1114+
api_repr = self.EXTERNALCATALOGTABLEOPTIONS
1115+
result = instance.from_api_repr(api_repr)
1116+
1117+
assert isinstance(result, external_config.ExternalCatalogTableOptions)
1118+
assert result._properties == api_repr

0 commit comments

Comments
 (0)