Skip to content

Commit f308bd4

Browse files
authored
BigQuery: Use TableListItem for table listing. (#4427)
* BigQuery: Use TableListItem for table listing. The table list response only includes a subset of all table properties. This commit adds a new type to document explicitly which properties are included, but also make it clear that this object should not be used in place of a full Table object. * Get bigquery.client imports closer to accepted style. * Share property code between table and table list item for view_use_legacy_sql * Clarify TableListItem docs. * Fix link syntax. * Shrink property links.
1 parent 4402e40 commit f308bd4

File tree

4 files changed

+248
-34
lines changed

4 files changed

+248
-34
lines changed

bigquery/google/cloud/bigquery/client.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -31,26 +31,28 @@
3131
from google.api_core import page_iterator
3232
from google.api_core.exceptions import GoogleAPICallError
3333
from google.api_core.exceptions import NotFound
34-
3534
from google.cloud import exceptions
3635
from google.cloud.client import ClientWithProject
36+
37+
from google.cloud.bigquery._helpers import DEFAULT_RETRY
38+
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW
39+
from google.cloud.bigquery._helpers import _field_to_index_mapping
40+
from google.cloud.bigquery._helpers import _item_to_row
41+
from google.cloud.bigquery._helpers import _rows_page_start
42+
from google.cloud.bigquery._helpers import _snake_to_camel_case
3743
from google.cloud.bigquery._http import Connection
3844
from google.cloud.bigquery.dataset import Dataset
3945
from google.cloud.bigquery.dataset import DatasetReference
40-
from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
41-
from google.cloud.bigquery.table import TableReference
42-
from google.cloud.bigquery.table import _row_from_mapping
4346
from google.cloud.bigquery.job import CopyJob
4447
from google.cloud.bigquery.job import ExtractJob
4548
from google.cloud.bigquery.job import LoadJob
4649
from google.cloud.bigquery.job import QueryJob, QueryJobConfig
4750
from google.cloud.bigquery.query import QueryResults
48-
from google.cloud.bigquery._helpers import _item_to_row
49-
from google.cloud.bigquery._helpers import _rows_page_start
50-
from google.cloud.bigquery._helpers import _field_to_index_mapping
51-
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW
52-
from google.cloud.bigquery._helpers import DEFAULT_RETRY
53-
from google.cloud.bigquery._helpers import _snake_to_camel_case
51+
from google.cloud.bigquery.table import Table
52+
from google.cloud.bigquery.table import TableListItem
53+
from google.cloud.bigquery.table import TableReference
54+
from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA
55+
from google.cloud.bigquery.table import _row_from_mapping
5456

5557

5658
_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB
@@ -405,8 +407,9 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None,
405407
:param retry: (Optional) How to retry the RPC.
406408
407409
:rtype: :class:`~google.api_core.page_iterator.Iterator`
408-
:returns: Iterator of :class:`~google.cloud.bigquery.table.Table`
409-
contained within the current dataset.
410+
:returns:
411+
Iterator of :class:`~google.cloud.bigquery.table.TableListItem`
412+
contained within the current dataset.
410413
"""
411414
if not isinstance(dataset, (Dataset, DatasetReference)):
412415
raise TypeError('dataset must be a Dataset or a DatasetReference')
@@ -1367,7 +1370,7 @@ def _item_to_table(iterator, resource):
13671370
:rtype: :class:`~google.cloud.bigquery.table.Table`
13681371
:returns: The next table in the page.
13691372
"""
1370-
return Table.from_api_repr(resource)
1373+
return TableListItem(resource)
13711374

13721375

13731376
def _make_job_id(job_id, prefix=None):

bigquery/google/cloud/bigquery/table.py

+147-17
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,25 @@
3434
_MARKER = object()
3535

3636

37+
def _view_use_legacy_sql_getter(table):
38+
"""Specifies whether to execute the view with Legacy or Standard SQL.
39+
40+
If this table is not a view, None is returned.
41+
42+
Returns:
43+
bool: True if the view is using legacy SQL, or None if not a view
44+
"""
45+
view = table._properties.get('view')
46+
if view is not None:
47+
# The server-side default for useLegacySql is True.
48+
return view.get('useLegacySql', True)
49+
# In some cases, such as in a table list no view object is present, but the
50+
# resource still represents a view. Use the type as a fallback.
51+
if table.table_type == 'VIEW':
52+
# The server-side default for useLegacySql is True.
53+
return True
54+
55+
3756
class TableReference(object):
3857
"""TableReferences are pointers to tables.
3958
@@ -531,23 +550,7 @@ def view_query(self):
531550
"""Delete SQL query defining the table as a view."""
532551
self._properties.pop('view', None)
533552

534-
@property
535-
def view_use_legacy_sql(self):
536-
"""Specifies whether to execute the view with Legacy or Standard SQL.
537-
538-
The default is False for views (use Standard SQL).
539-
If this table is not a view, None is returned.
540-
541-
:rtype: bool or ``NoneType``
542-
:returns: The boolean for view.useLegacySql, or None if not a view.
543-
"""
544-
view = self._properties.get('view')
545-
if view is not None:
546-
# useLegacySql is never missing from the view dict if this table
547-
# was created client-side, because the view_query setter populates
548-
# it. So a missing or None can only come from the server, whose
549-
# default is True.
550-
return view.get('useLegacySql', True)
553+
view_use_legacy_sql = property(_view_use_legacy_sql_getter)
551554

552555
@view_use_legacy_sql.setter
553556
def view_use_legacy_sql(self, value):
@@ -713,6 +716,133 @@ def _build_resource(self, filter_fields):
713716
return resource
714717

715718

719+
class TableListItem(object):
720+
"""A read-only table resource from a list operation.
721+
722+
For performance reasons, the BigQuery API only includes some of the table
723+
properties when listing tables. Notably,
724+
:attr:`~google.cloud.bigquery.table.Table.schema` and
725+
:attr:`~google.cloud.bigquery.table.Table.num_rows` are missing.
726+
727+
For a full list of the properties that the BigQuery API returns, see the
728+
`REST documentation for tables.list
729+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list>`_.
730+
731+
732+
Args:
733+
resource (dict):
734+
A table-like resource object from a table list response.
735+
"""
736+
737+
def __init__(self, resource):
738+
self._properties = resource
739+
740+
@property
741+
def project(self):
742+
"""The project ID of the project this table belongs to.
743+
744+
Returns:
745+
str: the project ID of the table.
746+
"""
747+
return self._properties.get('tableReference', {}).get('projectId')
748+
749+
@property
750+
def dataset_id(self):
751+
"""The dataset ID of the dataset this table belongs to.
752+
753+
Returns:
754+
str: the dataset ID of the table.
755+
"""
756+
return self._properties.get('tableReference', {}).get('datasetId')
757+
758+
@property
759+
def table_id(self):
760+
"""The table ID.
761+
762+
Returns:
763+
str: the table ID.
764+
"""
765+
return self._properties.get('tableReference', {}).get('tableId')
766+
767+
@property
768+
def reference(self):
769+
"""A :class:`~google.cloud.bigquery.table.TableReference` pointing to
770+
this table.
771+
772+
Returns:
773+
google.cloud.bigquery.table.TableReference: pointer to this table
774+
"""
775+
from google.cloud.bigquery import dataset
776+
777+
dataset_ref = dataset.DatasetReference(self.project, self.dataset_id)
778+
return TableReference(dataset_ref, self.table_id)
779+
780+
@property
781+
def labels(self):
782+
"""Labels for the table.
783+
784+
This method always returns a dict. To change a table's labels,
785+
modify the dict, then call ``Client.update_table``. To delete a
786+
label, set its value to ``None`` before updating.
787+
788+
Returns:
789+
Map[str, str]: A dictionary of the the table's labels
790+
"""
791+
return self._properties.get('labels', {})
792+
793+
@property
794+
def full_table_id(self):
795+
"""ID for the table, in the form ``project_id:dataset_id:table_id``.
796+
797+
Returns:
798+
str: The fully-qualified ID of the table
799+
"""
800+
return self._properties.get('id')
801+
802+
@property
803+
def table_type(self):
804+
"""The type of the table.
805+
806+
Possible values are "TABLE", "VIEW", or "EXTERNAL".
807+
808+
Returns:
809+
str: The kind of table
810+
"""
811+
return self._properties.get('type')
812+
813+
@property
814+
def partitioning_type(self):
815+
"""Time partitioning of the table.
816+
817+
Returns:
818+
str:
819+
Type of partitioning if the table is partitioned, None
820+
otherwise.
821+
"""
822+
return self._properties.get('timePartitioning', {}).get('type')
823+
824+
@property
825+
def partition_expiration(self):
826+
"""Expiration time in ms for a partition
827+
828+
Returns:
829+
int: The time in ms for partition expiration
830+
"""
831+
return int(
832+
self._properties.get('timePartitioning', {}).get('expirationMs'))
833+
834+
@property
835+
def friendly_name(self):
836+
"""Title of the table.
837+
838+
Returns:
839+
str: The name as set by the user, or None (the default)
840+
"""
841+
return self._properties.get('friendlyName')
842+
843+
view_use_legacy_sql = property(_view_use_legacy_sql_getter)
844+
845+
716846
def _row_from_mapping(mapping, schema):
717847
"""Convert a mapping to a row tuple using the schema.
718848

bigquery/tests/unit/test_client.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,7 @@ def test_list_dataset_tables_empty(self):
10021002
self.assertEqual(req['path'], '/%s' % PATH)
10031003

10041004
def test_list_dataset_tables_defaults(self):
1005-
from google.cloud.bigquery.table import Table
1005+
from google.cloud.bigquery.table import TableListItem
10061006

10071007
TABLE_1 = 'table_one'
10081008
TABLE_2 = 'table_two'
@@ -1039,7 +1039,7 @@ def test_list_dataset_tables_defaults(self):
10391039

10401040
self.assertEqual(len(tables), len(DATA['tables']))
10411041
for found, expected in zip(tables, DATA['tables']):
1042-
self.assertIsInstance(found, Table)
1042+
self.assertIsInstance(found, TableListItem)
10431043
self.assertEqual(found.full_table_id, expected['id'])
10441044
self.assertEqual(found.table_type, expected['type'])
10451045
self.assertEqual(token, TOKEN)
@@ -1050,7 +1050,7 @@ def test_list_dataset_tables_defaults(self):
10501050
self.assertEqual(req['path'], '/%s' % PATH)
10511051

10521052
def test_list_dataset_tables_explicit(self):
1053-
from google.cloud.bigquery.table import Table
1053+
from google.cloud.bigquery.table import TableListItem
10541054

10551055
TABLE_1 = 'table_one'
10561056
TABLE_2 = 'table_two'
@@ -1087,7 +1087,7 @@ def test_list_dataset_tables_explicit(self):
10871087

10881088
self.assertEqual(len(tables), len(DATA['tables']))
10891089
for found, expected in zip(tables, DATA['tables']):
1090-
self.assertIsInstance(found, Table)
1090+
self.assertIsInstance(found, TableListItem)
10911091
self.assertEqual(found.full_table_id, expected['id'])
10921092
self.assertEqual(found.table_type, expected['type'])
10931093
self.assertIsNone(token)

bigquery/tests/unit/test_table.py

+81
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,87 @@ def test__row_from_mapping_w_schema(self):
753753
('Phred Phlyntstone', 32, ['red', 'green'], None))
754754

755755

756+
class TestTableListItem(unittest.TestCase):
757+
758+
@staticmethod
759+
def _get_target_class():
760+
from google.cloud.bigquery.table import TableListItem
761+
762+
return TableListItem
763+
764+
def _make_one(self, *args, **kw):
765+
return self._get_target_class()(*args, **kw)
766+
767+
def test_ctor(self):
768+
project = 'test-project'
769+
dataset_id = 'test_dataset'
770+
table_id = 'coffee_table'
771+
resource = {
772+
'kind': 'bigquery#table',
773+
'id': '{}:{}:{}'.format(project, dataset_id, table_id),
774+
'tableReference': {
775+
'projectId': project,
776+
'datasetId': dataset_id,
777+
'tableId': table_id,
778+
},
779+
'friendlyName': 'Mahogany Coffee Table',
780+
'type': 'TABLE',
781+
'timePartitioning': {
782+
'type': 'DAY',
783+
'expirationMs': '10000',
784+
},
785+
'labels': {
786+
'some-stuff': 'this-is-a-label',
787+
},
788+
}
789+
790+
table = self._make_one(resource)
791+
self.assertEqual(table.project, project)
792+
self.assertEqual(table.dataset_id, dataset_id)
793+
self.assertEqual(table.table_id, table_id)
794+
self.assertEqual(
795+
table.full_table_id,
796+
'{}:{}:{}'.format(project, dataset_id, table_id))
797+
self.assertEqual(table.reference.project, project)
798+
self.assertEqual(table.reference.dataset_id, dataset_id)
799+
self.assertEqual(table.reference.table_id, table_id)
800+
self.assertEqual(table.friendly_name, 'Mahogany Coffee Table')
801+
self.assertEqual(table.table_type, 'TABLE')
802+
self.assertEqual(table.partitioning_type, 'DAY')
803+
self.assertEqual(table.partition_expiration, 10000)
804+
self.assertEqual(table.labels['some-stuff'], 'this-is-a-label')
805+
self.assertIsNone(table.view_use_legacy_sql)
806+
807+
def test_ctor_view(self):
808+
project = 'test-project'
809+
dataset_id = 'test_dataset'
810+
table_id = 'just_looking'
811+
resource = {
812+
'kind': 'bigquery#table',
813+
'id': '{}:{}:{}'.format(project, dataset_id, table_id),
814+
'tableReference': {
815+
'projectId': project,
816+
'datasetId': dataset_id,
817+
'tableId': table_id,
818+
},
819+
'type': 'VIEW',
820+
}
821+
822+
table = self._make_one(resource)
823+
self.assertEqual(table.project, project)
824+
self.assertEqual(table.dataset_id, dataset_id)
825+
self.assertEqual(table.table_id, table_id)
826+
self.assertEqual(
827+
table.full_table_id,
828+
'{}:{}:{}'.format(project, dataset_id, table_id))
829+
self.assertEqual(table.reference.project, project)
830+
self.assertEqual(table.reference.dataset_id, dataset_id)
831+
self.assertEqual(table.reference.table_id, table_id)
832+
self.assertEqual(table.table_type, 'VIEW')
833+
# Server default for useLegacySql is True.
834+
self.assertTrue(table.view_use_legacy_sql)
835+
836+
756837
class TestRow(unittest.TestCase):
757838

758839
def test_row(self):

0 commit comments

Comments
 (0)