@@ -1456,7 +1456,12 @@ def _to_arrow_iterable(self, bqstorage_client=None):
1456
1456
1457
1457
# If changing the signature of this method, make sure to apply the same
1458
1458
# changes to job.QueryJob.to_arrow()
1459
- def to_arrow (self , progress_bar_type = None , bqstorage_client = None ):
1459
+ def to_arrow (
1460
+ self ,
1461
+ progress_bar_type = None ,
1462
+ bqstorage_client = None ,
1463
+ create_bqstorage_client = False ,
1464
+ ):
1460
1465
"""[Beta] Create a class:`pyarrow.Table` by loading all pages of a
1461
1466
table or query.
1462
1467
@@ -1489,6 +1494,16 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None):
1489
1494
1490
1495
Reading from a specific partition or snapshot is not
1491
1496
currently supported by this method.
1497
+ create_bqstorage_client (bool):
1498
+ **Beta Feature** Optional. If ``True``, create a BigQuery
1499
+ Storage API client using the default API settings. The
1500
+ BigQuery Storage API is a faster way to fetch rows from
1501
+ BigQuery. See the ``bqstorage_client`` parameter for more
1502
+ information.
1503
+
1504
+ This argument does nothing if ``bqstorage_client`` is supplied.
1505
+
1506
+ ..versionadded:: 1.24.0
1492
1507
1493
1508
Returns:
1494
1509
pyarrow.Table
@@ -1504,22 +1519,33 @@ def to_arrow(self, progress_bar_type=None, bqstorage_client=None):
1504
1519
if pyarrow is None :
1505
1520
raise ValueError (_NO_PYARROW_ERROR )
1506
1521
1507
- progress_bar = self ._get_progress_bar (progress_bar_type )
1522
+ owns_bqstorage_client = False
1523
+ if not bqstorage_client and create_bqstorage_client :
1524
+ owns_bqstorage_client = True
1525
+ bqstorage_client = self .client ._create_bqstorage_client ()
1508
1526
1509
- record_batches = []
1510
- for record_batch in self ._to_arrow_iterable (bqstorage_client = bqstorage_client ):
1511
- record_batches .append (record_batch )
1527
+ try :
1528
+ progress_bar = self ._get_progress_bar (progress_bar_type )
1512
1529
1513
- if progress_bar is not None :
1514
- # In some cases, the number of total rows is not populated
1515
- # until the first page of rows is fetched. Update the
1516
- # progress bar's total to keep an accurate count.
1517
- progress_bar .total = progress_bar .total or self .total_rows
1518
- progress_bar .update (record_batch .num_rows )
1530
+ record_batches = []
1531
+ for record_batch in self ._to_arrow_iterable (
1532
+ bqstorage_client = bqstorage_client
1533
+ ):
1534
+ record_batches .append (record_batch )
1519
1535
1520
- if progress_bar is not None :
1521
- # Indicate that the download has finished.
1522
- progress_bar .close ()
1536
+ if progress_bar is not None :
1537
+ # In some cases, the number of total rows is not populated
1538
+ # until the first page of rows is fetched. Update the
1539
+ # progress bar's total to keep an accurate count.
1540
+ progress_bar .total = progress_bar .total or self .total_rows
1541
+ progress_bar .update (record_batch .num_rows )
1542
+
1543
+ if progress_bar is not None :
1544
+ # Indicate that the download has finished.
1545
+ progress_bar .close ()
1546
+ finally :
1547
+ if owns_bqstorage_client :
1548
+ bqstorage_client .transport .channel .close ()
1523
1549
1524
1550
if record_batches :
1525
1551
return pyarrow .Table .from_batches (record_batches )
@@ -1558,14 +1584,20 @@ def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
1558
1584
1559
1585
# If changing the signature of this method, make sure to apply the same
1560
1586
# changes to job.QueryJob.to_dataframe()
1561
- def to_dataframe (self , bqstorage_client = None , dtypes = None , progress_bar_type = None ):
1587
+ def to_dataframe (
1588
+ self ,
1589
+ bqstorage_client = None ,
1590
+ dtypes = None ,
1591
+ progress_bar_type = None ,
1592
+ create_bqstorage_client = False ,
1593
+ ):
1562
1594
"""Create a pandas DataFrame by loading all pages of a query.
1563
1595
1564
1596
Args:
1565
1597
bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient):
1566
1598
**Beta Feature** Optional. A BigQuery Storage API client. If
1567
1599
supplied, use the faster BigQuery Storage API to fetch rows
1568
- from BigQuery. This API is a billable API.
1600
+ from BigQuery.
1569
1601
1570
1602
This method requires the ``pyarrow`` and
1571
1603
``google-cloud-bigquery-storage`` libraries.
@@ -1602,6 +1634,16 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non
1602
1634
progress bar as a graphical dialog box.
1603
1635
1604
1636
..versionadded:: 1.11.0
1637
+ create_bqstorage_client (bool):
1638
+ **Beta Feature** Optional. If ``True``, create a BigQuery
1639
+ Storage API client using the default API settings. The
1640
+ BigQuery Storage API is a faster way to fetch rows from
1641
+ BigQuery. See the ``bqstorage_client`` parameter for more
1642
+ information.
1643
+
1644
+ This argument does nothing if ``bqstorage_client`` is supplied.
1645
+
1646
+ ..versionadded:: 1.24.0
1605
1647
1606
1648
Returns:
1607
1649
pandas.DataFrame:
@@ -1621,32 +1663,44 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non
1621
1663
if dtypes is None :
1622
1664
dtypes = {}
1623
1665
1624
- if bqstorage_client and self .max_results is not None :
1666
+ if (
1667
+ bqstorage_client or create_bqstorage_client
1668
+ ) and self .max_results is not None :
1625
1669
warnings .warn (
1626
1670
"Cannot use bqstorage_client if max_results is set, "
1627
1671
"reverting to fetching data with the tabledata.list endpoint." ,
1628
1672
stacklevel = 2 ,
1629
1673
)
1674
+ create_bqstorage_client = False
1630
1675
bqstorage_client = None
1631
1676
1632
- progress_bar = self ._get_progress_bar (progress_bar_type )
1677
+ owns_bqstorage_client = False
1678
+ if not bqstorage_client and create_bqstorage_client :
1679
+ owns_bqstorage_client = True
1680
+ bqstorage_client = self .client ._create_bqstorage_client ()
1633
1681
1634
- frames = []
1635
- for frame in self ._to_dataframe_iterable (
1636
- bqstorage_client = bqstorage_client , dtypes = dtypes
1637
- ):
1638
- frames .append (frame )
1682
+ try :
1683
+ progress_bar = self ._get_progress_bar (progress_bar_type )
1639
1684
1640
- if progress_bar is not None :
1641
- # In some cases, the number of total rows is not populated
1642
- # until the first page of rows is fetched. Update the
1643
- # progress bar's total to keep an accurate count.
1644
- progress_bar .total = progress_bar .total or self .total_rows
1645
- progress_bar .update (len (frame ))
1685
+ frames = []
1686
+ for frame in self ._to_dataframe_iterable (
1687
+ bqstorage_client = bqstorage_client , dtypes = dtypes
1688
+ ):
1689
+ frames .append (frame )
1690
+
1691
+ if progress_bar is not None :
1692
+ # In some cases, the number of total rows is not populated
1693
+ # until the first page of rows is fetched. Update the
1694
+ # progress bar's total to keep an accurate count.
1695
+ progress_bar .total = progress_bar .total or self .total_rows
1696
+ progress_bar .update (len (frame ))
1646
1697
1647
- if progress_bar is not None :
1648
- # Indicate that the download has finished.
1649
- progress_bar .close ()
1698
+ if progress_bar is not None :
1699
+ # Indicate that the download has finished.
1700
+ progress_bar .close ()
1701
+ finally :
1702
+ if owns_bqstorage_client :
1703
+ bqstorage_client .transport .channel .close ()
1650
1704
1651
1705
# Avoid concatting an empty list.
1652
1706
if not frames :
@@ -1667,11 +1721,18 @@ class _EmptyRowIterator(object):
1667
1721
pages = ()
1668
1722
total_rows = 0
1669
1723
1670
- def to_arrow (self , progress_bar_type = None ):
1724
+ def to_arrow (
1725
+ self ,
1726
+ progress_bar_type = None ,
1727
+ bqstorage_client = None ,
1728
+ create_bqstorage_client = False ,
1729
+ ):
1671
1730
"""[Beta] Create an empty class:`pyarrow.Table`.
1672
1731
1673
1732
Args:
1674
1733
progress_bar_type (Optional[str]): Ignored. Added for compatibility with RowIterator.
1734
+ bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.
1735
+ create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
1675
1736
1676
1737
Returns:
1677
1738
pyarrow.Table: An empty :class:`pyarrow.Table`.
@@ -1680,13 +1741,20 @@ def to_arrow(self, progress_bar_type=None):
1680
1741
raise ValueError (_NO_PYARROW_ERROR )
1681
1742
return pyarrow .Table .from_arrays (())
1682
1743
1683
- def to_dataframe (self , bqstorage_client = None , dtypes = None , progress_bar_type = None ):
1744
+ def to_dataframe (
1745
+ self ,
1746
+ bqstorage_client = None ,
1747
+ dtypes = None ,
1748
+ progress_bar_type = None ,
1749
+ create_bqstorage_client = False ,
1750
+ ):
1684
1751
"""Create an empty dataframe.
1685
1752
1686
1753
Args:
1687
1754
bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.
1688
1755
dtypes (Any): Ignored. Added for compatibility with RowIterator.
1689
1756
progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
1757
+ create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
1690
1758
1691
1759
Returns:
1692
1760
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
0 commit comments