53
53
# Assumption: type checks are only used by library developers and CI environments
54
54
# that have all optional dependencies installed, thus no conditional imports.
55
55
import pandas
56
+ import geopandas
56
57
import pyarrow
57
58
from google .api_core import retry as retries
58
59
from google .cloud import bigquery_storage
@@ -1487,6 +1488,7 @@ def to_dataframe(
1487
1488
create_bqstorage_client : bool = True ,
1488
1489
date_as_object : bool = True ,
1489
1490
max_results : Optional [int ] = None ,
1491
+ geography_as_object : bool = False ,
1490
1492
) -> "pandas.DataFrame" :
1491
1493
"""Return a pandas DataFrame from a QueryJob
1492
1494
@@ -1538,13 +1540,27 @@ def to_dataframe(
1538
1540
1539
1541
.. versionadded:: 2.21.0
1540
1542
1543
+ geography_as_object (Optional[bool]):
1544
+ If ``True``, convert GEOGRAPHY data to :mod:`shapely`
1545
+ geometry objects. If ``False`` (default), don't cast
1546
+ geography data to :mod:`shapely` geometry objects.
1547
+
1548
+ .. versionadded:: 2.24.0
1549
+
1541
1550
Returns:
1542
- A :class:`~pandas.DataFrame` populated with row data and column
1543
- headers from the query results. The column headers are derived
1544
- from the destination table's schema.
1551
+ pandas.DataFrame:
1552
+ A :class:`~pandas.DataFrame` populated with row data
1553
+ and column headers from the query results. The column
1554
+ headers are derived from the destination table's
1555
+ schema.
1545
1556
1546
1557
Raises:
1547
- ValueError: If the `pandas` library cannot be imported.
1558
+ ValueError:
1559
+ If the :mod:`pandas` library cannot be imported, or
1560
+ the :mod:`google.cloud.bigquery_storage_v1` module is
1561
+ required but cannot be imported. Also if
1562
+ `geography_as_object` is `True`, but the
1563
+ :mod:`shapely` library cannot be imported.
1548
1564
"""
1549
1565
query_result = wait_for_query (self , progress_bar_type , max_results = max_results )
1550
1566
return query_result .to_dataframe (
@@ -1553,6 +1569,101 @@ def to_dataframe(
1553
1569
progress_bar_type = progress_bar_type ,
1554
1570
create_bqstorage_client = create_bqstorage_client ,
1555
1571
date_as_object = date_as_object ,
1572
+ geography_as_object = geography_as_object ,
1573
+ )
1574
+
1575
+ # If changing the signature of this method, make sure to apply the same
1576
+ # changes to table.RowIterator.to_dataframe(), except for the max_results parameter
1577
+ # that should only exist here in the QueryJob method.
1578
+ def to_geodataframe (
1579
+ self ,
1580
+ bqstorage_client : "bigquery_storage.BigQueryReadClient" = None ,
1581
+ dtypes : Dict [str , Any ] = None ,
1582
+ progress_bar_type : str = None ,
1583
+ create_bqstorage_client : bool = True ,
1584
+ date_as_object : bool = True ,
1585
+ max_results : Optional [int ] = None ,
1586
+ geography_column : Optional [str ] = None ,
1587
+ ) -> "geopandas.GeoDataFrame" :
1588
+ """Return a GeoPandas GeoDataFrame from a QueryJob
1589
+
1590
+ Args:
1591
+ bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
1592
+ A BigQuery Storage API client. If supplied, use the faster
1593
+ BigQuery Storage API to fetch rows from BigQuery. This
1594
+ API is a billable API.
1595
+
1596
+ This method requires the ``fastavro`` and
1597
+ ``google-cloud-bigquery-storage`` libraries.
1598
+
1599
+ Reading from a specific partition or snapshot is not
1600
+ currently supported by this method.
1601
+
1602
+ dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
1603
+ A dictionary of column names pandas ``dtype``s. The provided
1604
+ ``dtype`` is used when constructing the series for the column
1605
+ specified. Otherwise, the default pandas behavior is used.
1606
+
1607
+ progress_bar_type (Optional[str]):
1608
+ If set, use the `tqdm <https://tqdm.github.io/>`_ library to
1609
+ display a progress bar while the data downloads. Install the
1610
+ ``tqdm`` package to use this feature.
1611
+
1612
+ See
1613
+ :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`
1614
+ for details.
1615
+
1616
+ .. versionadded:: 1.11.0
1617
+ create_bqstorage_client (Optional[bool]):
1618
+ If ``True`` (default), create a BigQuery Storage API client
1619
+ using the default API settings. The BigQuery Storage API
1620
+ is a faster way to fetch rows from BigQuery. See the
1621
+ ``bqstorage_client`` parameter for more information.
1622
+
1623
+ This argument does nothing if ``bqstorage_client`` is supplied.
1624
+
1625
+ .. versionadded:: 1.24.0
1626
+
1627
+ date_as_object (Optional[bool]):
1628
+ If ``True`` (default), cast dates to objects. If ``False``, convert
1629
+ to datetime64[ns] dtype.
1630
+
1631
+ .. versionadded:: 1.26.0
1632
+
1633
+ max_results (Optional[int]):
1634
+ Maximum number of rows to include in the result. No limit by default.
1635
+
1636
+ .. versionadded:: 2.21.0
1637
+
1638
+ geography_column (Optional[str]):
1639
+ If there are more than one GEOGRAPHY column,
1640
+ identifies which one to use to construct a GeoPandas
1641
+ GeoDataFrame. This option can be ommitted if there's
1642
+ only one GEOGRAPHY column.
1643
+
1644
+ Returns:
1645
+ geopandas.GeoDataFrame:
1646
+ A :class:`geopandas.GeoDataFrame` populated with row
1647
+ data and column headers from the query results. The
1648
+ column headers are derived from the destination
1649
+ table's schema.
1650
+
1651
+ Raises:
1652
+ ValueError:
1653
+ If the :mod:`geopandas` library cannot be imported, or the
1654
+ :mod:`google.cloud.bigquery_storage_v1` module is
1655
+ required but cannot be imported.
1656
+
1657
+ .. versionadded:: 2.24.0
1658
+ """
1659
+ query_result = wait_for_query (self , progress_bar_type , max_results = max_results )
1660
+ return query_result .to_geodataframe (
1661
+ bqstorage_client = bqstorage_client ,
1662
+ dtypes = dtypes ,
1663
+ progress_bar_type = progress_bar_type ,
1664
+ create_bqstorage_client = create_bqstorage_client ,
1665
+ date_as_object = date_as_object ,
1666
+ geography_column = geography_column ,
1556
1667
)
1557
1668
1558
1669
def __iter__ (self ):
0 commit comments