Skip to content

Commit edecb93

Browse files
committed
adds to_dataframe() to QueryJob
1 parent 1ed56fd commit edecb93

File tree

3 files changed

+90
-0
lines changed

3 files changed

+90
-0
lines changed

bigquery/google/cloud/bigquery/job.py

+9
Original file line numberDiff line numberDiff line change
@@ -1949,6 +1949,15 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
19491949
return self._client.list_rows(dest_table, selected_fields=schema,
19501950
retry=retry)
19511951

1952+
def to_dataframe(self):
1953+
import pandas as pd
1954+
1955+
iterator = self.result()
1956+
column_headers = [field.name for field in iterator.schema]
1957+
rows = [row.values() for row in iterator]
1958+
1959+
return pd.DataFrame(rows, columns=column_headers)
1960+
19521961

19531962
class QueryPlanEntryStep(object):
19541963
"""Map a single step in a query plan entry.

bigquery/tests/system.py

+17
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,23 @@ def test_query_future(self):
12351235
row_tuples = [r.values() for r in iterator]
12361236
self.assertEqual(row_tuples, [(1,)])
12371237

1238+
def test_query_to_dataframe(self):
1239+
import pandas as pd
1240+
1241+
query = """
1242+
SELECT corpus AS title, COUNT(*) AS unique_words
1243+
FROM `bigquery-public-data.samples.shakespeare`
1244+
GROUP BY title
1245+
ORDER BY unique_words DESC
1246+
LIMIT 10"""
1247+
1248+
query_job = Config.CLIENT.query(query)
1249+
df = query_job.to_dataframe()
1250+
1251+
self.assertIsInstance(df, pd.DataFrame)
1252+
self.assertEqual(list(df), ['title', 'unique_words'])
1253+
self.assertEqual(len(df), 10)
1254+
12381255
def test_query_table_def(self):
12391256
gs_url = self._write_csv_to_storage(
12401257
'bq_external_test' + unique_resource_id(), 'person_ages.csv',

bigquery/tests/unit/test_job.py

+64
Original file line numberDiff line numberDiff line change
@@ -2720,6 +2720,70 @@ def test_reload_w_alternate_client(self):
27202720
self.assertEqual(req['path'], PATH)
27212721
self._verifyResourceProperties(job, RESOURCE)
27222722

2723+
def test_to_dataframe(self):
2724+
import pandas as pd
2725+
2726+
begun_resource = self._makeResource()
2727+
query_resource = {
2728+
'jobComplete': True,
2729+
'jobReference': {
2730+
'projectId': self.PROJECT,
2731+
'jobId': self.JOB_ID,
2732+
},
2733+
'schema': {
2734+
'fields': [
2735+
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
2736+
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
2737+
],
2738+
},
2739+
'rows': [
2740+
{'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
2741+
{'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
2742+
{'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
2743+
{'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
2744+
],
2745+
}
2746+
done_resource = copy.deepcopy(begun_resource)
2747+
done_resource['status'] = {'state': 'DONE'}
2748+
connection = _Connection(
2749+
begun_resource, query_resource, done_resource, query_resource)
2750+
client = _make_client(project=self.PROJECT, connection=connection)
2751+
job = self._make_one(self.JOB_ID, self.QUERY, client)
2752+
df = job.to_dataframe()
2753+
2754+
self.assertIsInstance(df, pd.DataFrame)
2755+
self.assertEqual(len(df), 4)
2756+
self.assertEqual(list(df), ['name', 'age'])
2757+
2758+
def test_to_dataframe_w_empty_results(self):
2759+
import pandas as pd
2760+
2761+
begun_resource = self._makeResource()
2762+
query_resource = {
2763+
'jobComplete': True,
2764+
'jobReference': {
2765+
'projectId': self.PROJECT,
2766+
'jobId': self.JOB_ID,
2767+
},
2768+
'schema': {
2769+
'fields': [
2770+
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
2771+
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
2772+
],
2773+
},
2774+
}
2775+
done_resource = copy.deepcopy(begun_resource)
2776+
done_resource['status'] = {'state': 'DONE'}
2777+
connection = _Connection(
2778+
begun_resource, query_resource, done_resource, query_resource)
2779+
client = _make_client(project=self.PROJECT, connection=connection)
2780+
job = self._make_one(self.JOB_ID, self.QUERY, client)
2781+
df = job.to_dataframe()
2782+
2783+
self.assertIsInstance(df, pd.DataFrame)
2784+
self.assertEqual(len(df), 0)
2785+
self.assertEqual(list(df), ['name', 'age'])
2786+
27232787

27242788
class TestQueryPlanEntryStep(unittest.TestCase, _Base):
27252789
KIND = 'KIND'

0 commit comments

Comments
 (0)