Skip to content

Commit 6762f95

Browse files
committed
adds to_dataframe() helper to QueryJob
1 parent 10fcd7c commit 6762f95

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

bigquery/google/cloud/bigquery/job.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -1929,7 +1929,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
19291929
:type retry: :class:`google.api_core.retry.Retry`
19301930
:param retry: (Optional) How to retry the call that retrieves rows.
19311931
1932-
:rtype: :class:`~google.api_core.page_iterator.Iterator`
1932+
:rtype: :class:`~google.cloud.bigquery.table.RowIterator`
19331933
:returns:
19341934
Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s.
19351935
During each page, the iterator will have the ``total_rows``
@@ -1949,6 +1949,19 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
19491949
return self._client.list_rows(dest_table, selected_fields=schema,
19501950
retry=retry)
19511951

1952+
def to_dataframe(self):
1953+
"""Return a pandas DataFrame from a QueryJob
1954+
1955+
Returns:
1956+
A :class:`~pandas.DataFrame` populated with row data and column
1957+
headers from the query results. The column headers are derived
1958+
from the destination table's schema.
1959+
1960+
Raises:
1961+
ValueError: If the `pandas` library cannot be imported.
1962+
"""
1963+
return self.result().to_dataframe()
1964+
19521965
def __iter__(self):
19531966
return iter(self.result())
19541967

bigquery/tests/unit/test_job.py

+35
Original file line numberDiff line numberDiff line change
@@ -2724,6 +2724,41 @@ def test_reload_w_alternate_client(self):
27242724
self.assertEqual(req['path'], PATH)
27252725
self._verifyResourceProperties(job, RESOURCE)
27262726

2727+
@unittest.skipIf(pandas is None, 'Requires `pandas`')
2728+
def test_to_dataframe(self):
2729+
begun_resource = self._make_resource()
2730+
query_resource = {
2731+
'jobComplete': True,
2732+
'jobReference': {
2733+
'projectId': self.PROJECT,
2734+
'jobId': self.JOB_ID,
2735+
},
2736+
'schema': {
2737+
'fields': [
2738+
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
2739+
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
2740+
],
2741+
},
2742+
'rows': [
2743+
{'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
2744+
{'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
2745+
{'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
2746+
{'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
2747+
],
2748+
}
2749+
done_resource = copy.deepcopy(begun_resource)
2750+
done_resource['status'] = {'state': 'DONE'}
2751+
connection = _Connection(
2752+
begun_resource, query_resource, done_resource, query_resource)
2753+
client = _make_client(project=self.PROJECT, connection=connection)
2754+
job = self._make_one(self.JOB_ID, self.QUERY, client)
2755+
2756+
df = job.to_dataframe()
2757+
2758+
self.assertIsInstance(df, pandas.DataFrame)
2759+
self.assertEqual(len(df), 4) # verify the number of rows
2760+
self.assertEqual(list(df), ['name', 'age']) # verify the column names
2761+
27272762
def test_iter(self):
27282763
import types
27292764

0 commit comments

Comments
 (0)