Skip to content

Commit 7b4d967

Browse files
committed
adds to_dataframe() to QueryJob
1 parent 6ce2b86 commit 7b4d967

File tree

3 files changed

+90
-0
lines changed

3 files changed

+90
-0
lines changed

bigquery/google/cloud/bigquery/job.py

+9
Original file line numberDiff line numberDiff line change
@@ -1949,6 +1949,15 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
19491949
return self._client.list_rows(dest_table, selected_fields=schema,
19501950
retry=retry)
19511951

1952+
def to_dataframe(self):
1953+
import pandas as pd
1954+
1955+
iterator = self.result()
1956+
column_headers = [field.name for field in iterator.schema]
1957+
rows = [row.values() for row in iterator]
1958+
1959+
return pd.DataFrame(rows, columns=column_headers)
1960+
19521961
def __iter__(self):
19531962
return iter(self.result())
19541963

bigquery/tests/system.py

+17
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,23 @@ def test_query_iter(self):
12421242
row_tuples = [r.values() for r in query_job]
12431243
self.assertEqual(row_tuples, [(1,)])
12441244

1245+
def test_query_to_dataframe(self):
1246+
import pandas as pd
1247+
1248+
query = """
1249+
SELECT corpus AS title, COUNT(*) AS unique_words
1250+
FROM `bigquery-public-data.samples.shakespeare`
1251+
GROUP BY title
1252+
ORDER BY unique_words DESC
1253+
LIMIT 10"""
1254+
1255+
query_job = Config.CLIENT.query(query)
1256+
df = query_job.to_dataframe()
1257+
1258+
self.assertIsInstance(df, pd.DataFrame)
1259+
self.assertEqual(list(df), ['title', 'unique_words'])
1260+
self.assertEqual(len(df), 10)
1261+
12451262
def test_query_table_def(self):
12461263
gs_url = self._write_csv_to_storage(
12471264
'bq_external_test' + unique_resource_id(), 'person_ages.csv',

bigquery/tests/unit/test_job.py

+64
Original file line numberDiff line numberDiff line change
@@ -2720,6 +2720,70 @@ def test_reload_w_alternate_client(self):
27202720
self.assertEqual(req['path'], PATH)
27212721
self._verifyResourceProperties(job, RESOURCE)
27222722

2723+
def test_to_dataframe(self):
2724+
import pandas as pd
2725+
2726+
begun_resource = self._makeResource()
2727+
query_resource = {
2728+
'jobComplete': True,
2729+
'jobReference': {
2730+
'projectId': self.PROJECT,
2731+
'jobId': self.JOB_ID,
2732+
},
2733+
'schema': {
2734+
'fields': [
2735+
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
2736+
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
2737+
],
2738+
},
2739+
'rows': [
2740+
{'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
2741+
{'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
2742+
{'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
2743+
{'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
2744+
],
2745+
}
2746+
done_resource = copy.deepcopy(begun_resource)
2747+
done_resource['status'] = {'state': 'DONE'}
2748+
connection = _Connection(
2749+
begun_resource, query_resource, done_resource, query_resource)
2750+
client = _make_client(project=self.PROJECT, connection=connection)
2751+
job = self._make_one(self.JOB_ID, self.QUERY, client)
2752+
df = job.to_dataframe()
2753+
2754+
self.assertIsInstance(df, pd.DataFrame)
2755+
self.assertEqual(len(df), 4)
2756+
self.assertEqual(list(df), ['name', 'age'])
2757+
2758+
def test_to_dataframe_w_empty_results(self):
2759+
import pandas as pd
2760+
2761+
begun_resource = self._makeResource()
2762+
query_resource = {
2763+
'jobComplete': True,
2764+
'jobReference': {
2765+
'projectId': self.PROJECT,
2766+
'jobId': self.JOB_ID,
2767+
},
2768+
'schema': {
2769+
'fields': [
2770+
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
2771+
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
2772+
],
2773+
},
2774+
}
2775+
done_resource = copy.deepcopy(begun_resource)
2776+
done_resource['status'] = {'state': 'DONE'}
2777+
connection = _Connection(
2778+
begun_resource, query_resource, done_resource, query_resource)
2779+
client = _make_client(project=self.PROJECT, connection=connection)
2780+
job = self._make_one(self.JOB_ID, self.QUERY, client)
2781+
df = job.to_dataframe()
2782+
2783+
self.assertIsInstance(df, pd.DataFrame)
2784+
self.assertEqual(len(df), 0)
2785+
self.assertEqual(list(df), ['name', 'age'])
2786+
27232787
def test_iter(self):
27242788
import types
27252789

0 commit comments

Comments
 (0)