Skip to content

Commit cc852af

Browse files
authored
BigQuery: Use DatasetListItem for client.list_datasets (#4439)
* BigQuery: Use DatasetListItem for client.list_datasets Listing datasets only includes a subset of the properties available on a dataset. The DatasetListItem class is used to explicitly document which features are available and to prevent confusion from trying to use the resulting object in other contexts, like updating. * Fix lint errors. * Make dataset & table reference required, labels optional. * Fix lint error
1 parent 3fb9f16 commit cc852af

File tree

6 files changed

+292
-18
lines changed

6 files changed

+292
-18
lines changed

bigquery/google/cloud/bigquery/client.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from google.cloud.bigquery._helpers import _snake_to_camel_case
3737
from google.cloud.bigquery._http import Connection
3838
from google.cloud.bigquery.dataset import Dataset
39+
from google.cloud.bigquery.dataset import DatasetListItem
3940
from google.cloud.bigquery.dataset import DatasetReference
4041
from google.cloud.bigquery.job import CopyJob
4142
from google.cloud.bigquery.job import ExtractJob
@@ -181,8 +182,10 @@ def list_datasets(self, include_all=False, filter=None, max_results=None,
181182
:param retry: (Optional) How to retry the RPC.
182183
183184
:rtype: :class:`~google.api_core.page_iterator.Iterator`
184-
:returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`.
185-
accessible to the current client.
185+
:returns:
186+
Iterator of
187+
:class:`~google.cloud.bigquery.dataset.DatasetListItem`.
188+
associated with the client's project.
186189
"""
187190
extra_params = {}
188191
if include_all:
@@ -1275,10 +1278,10 @@ def _item_to_dataset(iterator, resource):
12751278
:type resource: dict
12761279
:param resource: An item to be converted to a dataset.
12771280
1278-
:rtype: :class:`.Dataset`
1281+
:rtype: :class:`.DatasetListItem`
12791282
:returns: The next dataset in the page.
12801283
"""
1281-
return Dataset.from_api_repr(resource)
1284+
return DatasetListItem(resource)
12821285

12831286

12841287
def _item_to_job(iterator, resource):

bigquery/google/cloud/bigquery/dataset.py

+104-4
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ def full_dataset_id(self):
281281

282282
@property
283283
def reference(self):
284-
"""A :class:`~google.cloud.bigquery.dataset.DatasetReference` pointing to
285-
this dataset.
284+
"""A reference to this dataset.
286285
287286
Returns:
288287
google.cloud.bigquery.dataset.DatasetReference:
@@ -420,7 +419,7 @@ def labels(self):
420419
:rtype: dict, {str -> str}
421420
:returns: A dict of the the dataset's labels.
422421
"""
423-
return self._properties['labels']
422+
return self._properties.get('labels', {})
424423

425424
@labels.setter
426425
def labels(self, value):
@@ -546,4 +545,105 @@ def table(self, table_id):
546545
:rtype: :class:`~google.cloud.bigquery.table.TableReference`
547546
:returns: a TableReference for a table in this dataset.
548547
"""
549-
return TableReference(self, table_id)
548+
return TableReference(self.reference, table_id)
549+
550+
551+
class DatasetListItem(object):
552+
"""A read-only dataset resource from a list operation.
553+
554+
For performance reasons, the BigQuery API only includes some of the
555+
dataset properties when listing datasets. Notably,
556+
:attr:`~google.cloud.bigquery.dataset.Dataset.access_entries` is missing.
557+
558+
For a full list of the properties that the BigQuery API returns, see the
559+
`REST documentation for datasets.list
560+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list>`_.
561+
562+
563+
Args:
564+
resource (dict):
565+
A dataset-like resource object from a dataset list response. A
566+
``datasetReference`` property is required.
567+
568+
Raises:
569+
ValueError:
570+
If ``datasetReference`` or one of its required members is missing
571+
from ``resource``.
572+
"""
573+
574+
def __init__(self, resource):
575+
if 'datasetReference' not in resource:
576+
raise ValueError('resource must contain a datasetReference value')
577+
if 'projectId' not in resource['datasetReference']:
578+
raise ValueError(
579+
"resource['datasetReference'] must contain a projectId value")
580+
if 'datasetId' not in resource['datasetReference']:
581+
raise ValueError(
582+
"resource['datasetReference'] must contain a datasetId value")
583+
self._properties = resource
584+
585+
@property
586+
def project(self):
587+
"""Project bound to the dataset.
588+
589+
:rtype: str
590+
:returns: the project.
591+
"""
592+
return self._properties['datasetReference']['projectId']
593+
594+
@property
595+
def dataset_id(self):
596+
"""Dataset ID.
597+
598+
:rtype: str
599+
:returns: the dataset ID.
600+
"""
601+
return self._properties['datasetReference']['datasetId']
602+
603+
@property
604+
def full_dataset_id(self):
605+
"""ID for the dataset resource, in the form "project_id:dataset_id".
606+
607+
:rtype: str, or ``NoneType``
608+
:returns: the ID (None until set from the server).
609+
"""
610+
return self._properties.get('id')
611+
612+
@property
613+
def friendly_name(self):
614+
"""Title of the dataset.
615+
616+
:rtype: str, or ``NoneType``
617+
:returns: The name as set by the user, or None (the default).
618+
"""
619+
return self._properties.get('friendlyName')
620+
621+
@property
622+
def labels(self):
623+
"""Labels for the dataset.
624+
625+
:rtype: dict, {str -> str}
626+
:returns: A dict of the the dataset's labels.
627+
"""
628+
return self._properties.get('labels', {})
629+
630+
@property
631+
def reference(self):
632+
"""A reference to this dataset.
633+
634+
Returns:
635+
google.cloud.bigquery.dataset.DatasetReference:
636+
A pointer to this dataset
637+
"""
638+
return DatasetReference(self.project, self.dataset_id)
639+
640+
def table(self, table_id):
641+
"""Constructs a TableReference.
642+
643+
:type table_id: str
644+
:param table_id: the ID of the table.
645+
646+
:rtype: :class:`~google.cloud.bigquery.table.TableReference`
647+
:returns: a TableReference for a table in this dataset.
648+
"""
649+
return TableReference(self.reference, table_id)

bigquery/google/cloud/bigquery/table.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def _reference_getter(table):
4949
this table.
5050
5151
Returns:
52-
google.cloud.bigquery.table.TableReference: pointer to this table
52+
google.cloud.bigquery.table.TableReference: pointer to this table.
5353
"""
5454
from google.cloud.bigquery import dataset
5555

@@ -295,7 +295,7 @@ def labels(self):
295295
:rtype: dict, {str -> str}
296296
:returns: A dict of the the table's labels.
297297
"""
298-
return self._properties['labels']
298+
return self._properties.get('labels', {})
299299

300300
@labels.setter
301301
def labels(self, value):
@@ -756,10 +756,28 @@ class TableListItem(object):
756756
757757
Args:
758758
resource (dict):
759-
A table-like resource object from a table list response.
759+
A table-like resource object from a table list response. A
760+
``tableReference`` property is required.
761+
762+
Raises:
763+
ValueError:
764+
If ``tableReference`` or one of its required members is missing
765+
from ``resource``.
760766
"""
761767

762768
def __init__(self, resource):
769+
if 'tableReference' not in resource:
770+
raise ValueError('resource must contain a tableReference value')
771+
if 'projectId' not in resource['tableReference']:
772+
raise ValueError(
773+
"resource['tableReference'] must contain a projectId value")
774+
if 'datasetId' not in resource['tableReference']:
775+
raise ValueError(
776+
"resource['tableReference'] must contain a datasetId value")
777+
if 'tableId' not in resource['tableReference']:
778+
raise ValueError(
779+
"resource['tableReference'] must contain a tableId value")
780+
763781
self._properties = resource
764782

765783
@property
@@ -769,7 +787,7 @@ def project(self):
769787
Returns:
770788
str: the project ID of the table.
771789
"""
772-
return self._properties.get('tableReference', {}).get('projectId')
790+
return self._properties['tableReference']['projectId']
773791

774792
@property
775793
def dataset_id(self):
@@ -778,7 +796,7 @@ def dataset_id(self):
778796
Returns:
779797
str: the dataset ID of the table.
780798
"""
781-
return self._properties.get('tableReference', {}).get('datasetId')
799+
return self._properties['tableReference']['datasetId']
782800

783801
@property
784802
def table_id(self):
@@ -787,7 +805,7 @@ def table_id(self):
787805
Returns:
788806
str: the table ID.
789807
"""
790-
return self._properties.get('tableReference', {}).get('tableId')
808+
return self._properties['tableReference']['tableId']
791809

792810
reference = property(_reference_getter)
793811

@@ -842,8 +860,10 @@ def partition_expiration(self):
842860
Returns:
843861
int: The time in ms for partition expiration
844862
"""
845-
return int(
846-
self._properties.get('timePartitioning', {}).get('expirationMs'))
863+
expiration = self._properties.get(
864+
'timePartitioning', {}).get('expirationMs')
865+
if expiration is not None:
866+
return int(expiration)
847867

848868
@property
849869
def friendly_name(self):

bigquery/tests/unit/test_client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def test_list_projects_explicit_response_missing_projects_key(self):
183183
{'maxResults': 3, 'pageToken': TOKEN})
184184

185185
def test_list_datasets_defaults(self):
186-
from google.cloud.bigquery.dataset import Dataset
186+
from google.cloud.bigquery.dataset import DatasetListItem
187187

188188
DATASET_1 = 'dataset_one'
189189
DATASET_2 = 'dataset_two'
@@ -215,7 +215,7 @@ def test_list_datasets_defaults(self):
215215

216216
self.assertEqual(len(datasets), len(DATA['datasets']))
217217
for found, expected in zip(datasets, DATA['datasets']):
218-
self.assertIsInstance(found, Dataset)
218+
self.assertIsInstance(found, DatasetListItem)
219219
self.assertEqual(found.full_dataset_id, expected['id'])
220220
self.assertEqual(found.friendly_name, expected['friendlyName'])
221221
self.assertEqual(token, TOKEN)

bigquery/tests/unit/test_dataset.py

+97
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,10 @@ def test_labels_setter_bad_value(self):
404404
with self.assertRaises(ValueError):
405405
dataset.labels = None
406406

407+
def test_labels_getter_missing_value(self):
408+
dataset = self._make_one(self.DS_REF)
409+
self.assertEqual(dataset.labels, {})
410+
407411
def test_from_api_repr_missing_identity(self):
408412
self._setUpConstants()
409413
RESOURCE = {}
@@ -460,3 +464,96 @@ def test_table(self):
460464
self.assertEqual(table.table_id, 'table_id')
461465
self.assertEqual(table.dataset_id, self.DS_ID)
462466
self.assertEqual(table.project, self.PROJECT)
467+
468+
469+
class TestDatasetListItem(unittest.TestCase):
470+
471+
@staticmethod
472+
def _get_target_class():
473+
from google.cloud.bigquery.dataset import DatasetListItem
474+
475+
return DatasetListItem
476+
477+
def _make_one(self, *args, **kw):
478+
return self._get_target_class()(*args, **kw)
479+
480+
def test_ctor(self):
481+
project = 'test-project'
482+
dataset_id = 'test_dataset'
483+
resource = {
484+
'kind': 'bigquery#dataset',
485+
'id': '{}:{}'.format(project, dataset_id),
486+
'datasetReference': {
487+
'projectId': project,
488+
'datasetId': dataset_id,
489+
},
490+
'friendlyName': 'Data of the Test',
491+
'labels': {
492+
'some-stuff': 'this-is-a-label',
493+
},
494+
}
495+
496+
dataset = self._make_one(resource)
497+
self.assertEqual(dataset.project, project)
498+
self.assertEqual(dataset.dataset_id, dataset_id)
499+
self.assertEqual(
500+
dataset.full_dataset_id,
501+
'{}:{}'.format(project, dataset_id))
502+
self.assertEqual(dataset.reference.project, project)
503+
self.assertEqual(dataset.reference.dataset_id, dataset_id)
504+
self.assertEqual(dataset.friendly_name, 'Data of the Test')
505+
self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label')
506+
507+
def test_ctor_missing_properties(self):
508+
resource = {
509+
'datasetReference': {
510+
'projectId': 'testproject',
511+
'datasetId': 'testdataset',
512+
},
513+
}
514+
dataset = self._make_one(resource)
515+
self.assertEqual(dataset.project, 'testproject')
516+
self.assertEqual(dataset.dataset_id, 'testdataset')
517+
self.assertIsNone(dataset.full_dataset_id)
518+
self.assertIsNone(dataset.friendly_name)
519+
self.assertEqual(dataset.labels, {})
520+
521+
def test_ctor_wo_project(self):
522+
resource = {
523+
'datasetReference': {
524+
'datasetId': 'testdataset',
525+
},
526+
}
527+
with self.assertRaises(ValueError):
528+
self._make_one(resource)
529+
530+
def test_ctor_wo_dataset(self):
531+
resource = {
532+
'datasetReference': {
533+
'projectId': 'testproject',
534+
},
535+
}
536+
with self.assertRaises(ValueError):
537+
self._make_one(resource)
538+
539+
def test_ctor_wo_reference(self):
540+
with self.assertRaises(ValueError):
541+
self._make_one({})
542+
543+
def test_table(self):
544+
from google.cloud.bigquery.table import TableReference
545+
546+
project = 'test-project'
547+
dataset_id = 'test_dataset'
548+
resource = {
549+
'datasetReference': {
550+
'projectId': project,
551+
'datasetId': dataset_id,
552+
},
553+
}
554+
dataset = self._make_one(resource)
555+
table = dataset.table('table_id')
556+
self.assertIsInstance(table, TableReference)
557+
self.assertEqual(table.table_id, 'table_id')
558+
self.assertEqual(table.dataset_id, dataset_id)
559+
self.assertEqual(table.project, project)

0 commit comments

Comments
 (0)