Skip to content

Commit 2c19681

Browse files
authored
Feat: Adds foreign_type_info attribute to table class and adds unit tests. (#2126)
* adds foreign_type_info attribute to table * feat: Adds foreign_type_info attribute and tests * updates docstrings for foreign_type_info * Updates property handling, especially as regards set/get_sub_prop * Removes extraneous comments and debug expressions * Refactors build_resource_from_properties w get/set_sub_prop * updates to foreign_type_info, tests and wiring * Adds logic to detect non-Sequence schema.fields value * updates assorted tests and logic
1 parent 7603bd7 commit 2c19681

File tree

7 files changed

+398
-104
lines changed

7 files changed

+398
-104
lines changed

google/cloud/bigquery/_helpers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields):
978978
"""
979979
partial = {}
980980
for filter_field in filter_fields:
981-
api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field)
981+
api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field)
982982
if api_field is None and filter_field not in obj._properties:
983983
raise ValueError("No property %s" % filter_field)
984984
elif api_field is not None:
985-
partial[api_field] = obj._properties.get(api_field)
985+
_set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field))
986986
else:
987987
# allows properties that are not defined in the library
988988
# and properties that have the same name as API resource key

google/cloud/bigquery/schema.py

+32-25
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
"""Schemas for BigQuery tables / queries."""
1616

1717
from __future__ import annotations
18-
import collections
1918
import enum
2019
import typing
21-
from typing import Any, cast, Dict, Iterable, Optional, Union
20+
from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence
2221

2322
from google.cloud.bigquery import _helpers
2423
from google.cloud.bigquery import standard_sql
@@ -489,6 +488,8 @@ def _parse_schema_resource(info):
489488
Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
490489
A list of parsed fields, or ``None`` if no "fields" key found.
491490
"""
491+
if isinstance(info, list):
492+
return [SchemaField.from_api_repr(f) for f in info]
492493
return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
493494

494495

@@ -501,40 +502,46 @@ def _build_schema_resource(fields):
501502
Returns:
502503
Sequence[Dict]: Mappings describing the schema of the supplied fields.
503504
"""
504-
return [field.to_api_repr() for field in fields]
505+
if isinstance(fields, Sequence):
506+
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
507+
return [field.to_api_repr() for field in fields]
508+
509+
else:
510+
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
505511

506512

507513
def _to_schema_fields(schema):
508-
"""Coerce `schema` to a list of schema field instances.
514+
"""Coerces schema to a list of SchemaField instances while
515+
preserving the original structure as much as possible.
509516
510517
Args:
511-
schema(Sequence[Union[ \
512-
:class:`~google.cloud.bigquery.schema.SchemaField`, \
513-
Mapping[str, Any] \
514-
]]):
515-
Table schema to convert. If some items are passed as mappings,
516-
their content must be compatible with
517-
:meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
518+
schema (Sequence[Union[ \
519+
:class:`~google.cloud.bigquery.schema.SchemaField`, \
520+
Mapping[str, Any] \
521+
]
522+
]
523+
)::
524+
Table schema to convert. Can be a list of SchemaField
525+
objects or mappings.
518526
519527
Returns:
520-
Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
528+
A list of SchemaField objects.
521529
522530
Raises:
523-
Exception: If ``schema`` is not a sequence, or if any item in the
524-
sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
525-
instance or a compatible mapping representation of the field.
531+
TypeError: If schema is not a Sequence.
526532
"""
527-
for field in schema:
528-
if not isinstance(field, (SchemaField, collections.abc.Mapping)):
529-
raise ValueError(
530-
"Schema items must either be fields or compatible "
531-
"mapping representations."
532-
)
533533

534-
return [
535-
field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
536-
for field in schema
537-
]
534+
if isinstance(schema, Sequence):
535+
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
536+
return [
537+
field
538+
if isinstance(field, SchemaField)
539+
else SchemaField.from_api_repr(field)
540+
for field in schema
541+
]
542+
543+
else:
544+
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
538545

539546

540547
class PolicyTagList(object):

google/cloud/bigquery/table.py

+69-6
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
import functools
2222
import operator
2323
import typing
24-
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
24+
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence
25+
2526
import warnings
2627

2728
try:
@@ -66,6 +67,7 @@
6667
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
6768
from google.cloud.bigquery.enums import DefaultPandasDTypes
6869
from google.cloud.bigquery.external_config import ExternalConfig
70+
from google.cloud.bigquery import schema as _schema
6971
from google.cloud.bigquery.schema import _build_schema_resource
7072
from google.cloud.bigquery.schema import _parse_schema_resource
7173
from google.cloud.bigquery.schema import _to_schema_fields
@@ -398,7 +400,7 @@ class Table(_TableBase):
398400
"partitioning_type": "timePartitioning",
399401
"range_partitioning": "rangePartitioning",
400402
"time_partitioning": "timePartitioning",
401-
"schema": "schema",
403+
"schema": ["schema", "fields"],
402404
"snapshot_definition": "snapshotDefinition",
403405
"clone_definition": "cloneDefinition",
404406
"streaming_buffer": "streamingBuffer",
@@ -411,6 +413,7 @@ class Table(_TableBase):
411413
"max_staleness": "maxStaleness",
412414
"resource_tags": "resourceTags",
413415
"external_catalog_table_options": "externalCatalogTableOptions",
416+
"foreign_type_info": ["schema", "foreignTypeInfo"],
414417
}
415418

416419
def __init__(self, table_ref, schema=None) -> None:
@@ -451,8 +454,20 @@ def schema(self):
451454
If ``schema`` is not a sequence, or if any item in the sequence
452455
is not a :class:`~google.cloud.bigquery.schema.SchemaField`
453456
instance or a compatible mapping representation of the field.
457+
458+
.. Note::
459+
If you are referencing a schema for an external catalog table such
460+
as a Hive table, it will also be necessary to populate the foreign_type_info
461+
attribute. This is not necessary if defining the schema for a BigQuery table.
462+
463+
For details, see:
464+
https://cloud.google.com/bigquery/docs/external-tables
465+
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
466+
454467
"""
455-
prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
468+
prop = _helpers._get_sub_prop(
469+
self._properties, self._PROPERTY_TO_API_FIELD["schema"]
470+
)
456471
if not prop:
457472
return []
458473
else:
@@ -463,10 +478,21 @@ def schema(self, value):
463478
api_field = self._PROPERTY_TO_API_FIELD["schema"]
464479

465480
if value is None:
466-
self._properties[api_field] = None
467-
else:
481+
_helpers._set_sub_prop(
482+
self._properties,
483+
api_field,
484+
None,
485+
)
486+
elif isinstance(value, Sequence):
468487
value = _to_schema_fields(value)
469-
self._properties[api_field] = {"fields": _build_schema_resource(value)}
488+
value = _build_schema_resource(value)
489+
_helpers._set_sub_prop(
490+
self._properties,
491+
api_field,
492+
value,
493+
)
494+
else:
495+
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
470496

471497
@property
472498
def labels(self):
@@ -1075,6 +1101,43 @@ def external_catalog_table_options(
10751101
self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
10761102
] = value
10771103

1104+
@property
1105+
def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]:
1106+
"""Optional. Specifies metadata of the foreign data type definition in
1107+
field schema (TableFieldSchema.foreign_type_definition).
1108+
1109+
Returns:
1110+
Optional[schema.ForeignTypeInfo]:
1111+
Foreign type information, or :data:`None` if not set.
1112+
1113+
.. Note::
1114+
foreign_type_info is only required if you are referencing an
1115+
external catalog such as a Hive table.
1116+
For details, see:
1117+
https://cloud.google.com/bigquery/docs/external-tables
1118+
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
1119+
"""
1120+
1121+
prop = _helpers._get_sub_prop(
1122+
self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"]
1123+
)
1124+
if prop is not None:
1125+
return _schema.ForeignTypeInfo.from_api_repr(prop)
1126+
return None
1127+
1128+
@foreign_type_info.setter
1129+
def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]):
1130+
value = _helpers._isinstance_or_raise(
1131+
value,
1132+
(_schema.ForeignTypeInfo, dict),
1133+
none_allowed=True,
1134+
)
1135+
if isinstance(value, _schema.ForeignTypeInfo):
1136+
value = value.to_api_repr()
1137+
_helpers._set_sub_prop(
1138+
self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value
1139+
)
1140+
10781141
@classmethod
10791142
def from_string(cls, full_table_id: str) -> "Table":
10801143
"""Construct a table from fully-qualified table ID.

tests/unit/job/test_load.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self):
272272

273273
config = LoadJobConfig()
274274
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
275-
with self.assertRaises(ValueError):
275+
with self.assertRaises(TypeError):
276276
config.schema = [full_name, object()]
277277

278278
def test_schema_setter(self):

tests/unit/test_client.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2051,7 +2051,7 @@ def test_update_dataset(self):
20512051
ds.labels = LABELS
20522052
ds.access_entries = [AccessEntry("OWNER", "userByEmail", "[email protected]")]
20532053
ds.resource_tags = RESOURCE_TAGS
2054-
fields = [
2054+
filter_fields = [
20552055
"description",
20562056
"friendly_name",
20572057
"location",
@@ -2065,12 +2065,12 @@ def test_update_dataset(self):
20652065
) as final_attributes:
20662066
ds2 = client.update_dataset(
20672067
ds,
2068-
fields=fields,
2068+
fields=filter_fields,
20692069
timeout=7.5,
20702070
)
20712071

20722072
final_attributes.assert_called_once_with(
2073-
{"path": "/%s" % PATH, "fields": fields}, client, None
2073+
{"path": "/%s" % PATH, "fields": filter_fields}, client, None
20742074
)
20752075

20762076
conn.api_request.assert_called_once_with(
@@ -2615,7 +2615,7 @@ def test_update_table_w_schema_None(self):
26152615
self.assertEqual(len(conn.api_request.call_args_list), 2)
26162616
req = conn.api_request.call_args_list[1]
26172617
self.assertEqual(req[1]["method"], "PATCH")
2618-
sent = {"schema": None}
2618+
sent = {"schema": {"fields": None}}
26192619
self.assertEqual(req[1]["data"], sent)
26202620
self.assertEqual(req[1]["path"], "/%s" % path)
26212621
self.assertEqual(len(updated_table.schema), 0)

0 commit comments

Comments
 (0)