|
24 | 24 | import uuid
|
25 | 25 |
|
26 | 26 | import six
|
| 27 | +try: |
| 28 | + import pandas |
| 29 | +except ImportError: # pragma: NO COVER |
| 30 | + pandas = None |
27 | 31 |
|
28 | 32 | from google.api_core.exceptions import PreconditionFailed
|
29 | 33 | from google.cloud import bigquery
|
@@ -1244,6 +1248,28 @@ def test_query_iter(self):
|
1244 | 1248 | row_tuples = [r.values() for r in query_job]
|
1245 | 1249 | self.assertEqual(row_tuples, [(1,)])
|
1246 | 1250 |
|
| 1251 | + @unittest.skipIf(pandas is None, 'Requires `pandas`') |
| 1252 | + def test_query_results_to_dataframe(self): |
| 1253 | + QUERY = """ |
| 1254 | + SELECT id, author, time_ts, dead |
| 1255 | + from `bigquery-public-data.hacker_news.comments` |
| 1256 | + LIMIT 10 |
| 1257 | + """ |
| 1258 | + |
| 1259 | + df = Config.CLIENT.query(QUERY).result().to_dataframe() |
| 1260 | + |
| 1261 | + self.assertIsInstance(df, pandas.DataFrame) |
| 1262 | + self.assertEqual(len(df), 10) # verify the number of rows |
| 1263 | + column_names = ['id', 'author', 'time_ts', 'dead'] |
| 1264 | + self.assertEqual(list(df), column_names) # verify the column names |
| 1265 | + exp_datatypes = {'id': int, 'author': str, |
| 1266 | + 'time_ts': pandas.Timestamp, 'dead': bool} |
| 1267 | + for index, row in df.iterrows(): |
| 1268 | + for col in column_names: |
| 1269 | + # all the schema fields are nullable, so None is acceptable |
| 1270 | + if not row[col] is None: |
| 1271 | + self.assertIsInstance(row[col], exp_datatypes[col]) |
| 1272 | + |
1247 | 1273 | def test_query_table_def(self):
|
1248 | 1274 | gs_url = self._write_csv_to_storage(
|
1249 | 1275 | 'bq_external_test' + unique_resource_id(), 'person_ages.csv',
|
@@ -1419,6 +1445,56 @@ def test_create_table_rows_fetch_nested_schema(self):
|
1419 | 1445 | e_favtime = datetime.datetime(*parts[0:6])
|
1420 | 1446 | self.assertEqual(found[7], e_favtime)
|
1421 | 1447 |
|
| 1448 | + def _fetch_dataframe(self, query): |
| 1449 | + return Config.CLIENT.query(query).result().to_dataframe() |
| 1450 | + |
| 1451 | + @unittest.skipIf(pandas is None, 'Requires `pandas`') |
| 1452 | + def test_nested_table_to_dataframe(self): |
| 1453 | + SF = bigquery.SchemaField |
| 1454 | + schema = [ |
| 1455 | + SF('string_col', 'STRING', mode='NULLABLE'), |
| 1456 | + SF('record_col', 'RECORD', mode='NULLABLE', fields=[ |
| 1457 | + SF('nested_string', 'STRING', mode='NULLABLE'), |
| 1458 | + SF('nested_repeated', 'INTEGER', mode='REPEATED'), |
| 1459 | + SF('nested_record', 'RECORD', mode='NULLABLE', fields=[ |
| 1460 | + SF('nested_nested_string', 'STRING', mode='NULLABLE'), |
| 1461 | + ]), |
| 1462 | + ]), |
| 1463 | + ] |
| 1464 | + record = { |
| 1465 | + 'nested_string': 'another string value', |
| 1466 | + 'nested_repeated': [0, 1, 2], |
| 1467 | + 'nested_record': {'nested_nested_string': 'some deep insight'}, |
| 1468 | + } |
| 1469 | + to_insert = [ |
| 1470 | + ('Some value', record) |
| 1471 | + ] |
| 1472 | + table_id = 'test_table' |
| 1473 | + dataset = self.temp_dataset(_make_dataset_id('nested_df')) |
| 1474 | + table_arg = Table(dataset.table(table_id), schema=schema) |
| 1475 | + table = retry_403(Config.CLIENT.create_table)(table_arg) |
| 1476 | + self.to_delete.insert(0, table) |
| 1477 | + Config.CLIENT.create_rows(table, to_insert) |
| 1478 | + QUERY = 'SELECT * from `{}.{}.{}`'.format( |
| 1479 | + Config.CLIENT.project, dataset.dataset_id, table_id) |
| 1480 | + |
| 1481 | + retry = RetryResult(_has_rows, max_tries=8) |
| 1482 | + df = retry(self._fetch_dataframe)(QUERY) |
| 1483 | + |
| 1484 | + self.assertIsInstance(df, pandas.DataFrame) |
| 1485 | + self.assertEqual(len(df), 1) # verify the number of rows |
| 1486 | + exp_columns = ['string_col', 'record_col'] |
| 1487 | + self.assertEqual(list(df), exp_columns) # verify the column names |
| 1488 | + row = df.iloc[0] |
| 1489 | + # verify the row content |
| 1490 | + self.assertEqual(row['string_col'], 'Some value') |
| 1491 | + self.assertEqual(row['record_col'], record) |
| 1492 | + # verify that nested data can be accessed with indices/keys |
| 1493 | + self.assertEqual(row['record_col']['nested_repeated'][0], 0) |
| 1494 | + self.assertEqual( |
| 1495 | + row['record_col']['nested_record']['nested_nested_string'], |
| 1496 | + 'some deep insight') |
| 1497 | + |
1422 | 1498 | def temp_dataset(self, dataset_id):
|
1423 | 1499 | dataset = retry_403(Config.CLIENT.create_dataset)(
|
1424 | 1500 | Dataset(Config.CLIENT.dataset(dataset_id)))
|
|
0 commit comments