@@ -1634,6 +1634,39 @@ def to_pandas(
1634
1634
) -> pandas .DataFrame | pandas .Series :
1635
1635
"""Write DataFrame to pandas DataFrame.
1636
1636
1637
+ **Examples:**
1638
+
1639
+ >>> import bigframes.pandas as bpd
1640
+ >>> bpd.options.display.progress_bar = None
1641
+ >>> df = bpd.DataFrame({'col': [4, 2, 2]})
1642
+
1643
+ Download the data from BigQuery and convert it into an in-memory pandas DataFrame.
1644
+
1645
+ >>> df.to_pandas()
1646
+ col
1647
+ 0 4
1648
+ 1 2
1649
+ 2 2
1650
+
1651
+ Estimate job statistics without processing or downloading data by using `dry_run=True`.
1652
+
1653
+ >>> df.to_pandas(dry_run=True) # doctest: +SKIP
1654
+ columnCount 1
1655
+ columnDtypes {'col': Int64}
1656
+ indexLevel 1
1657
+ indexDtypes [Int64]
1658
+ projectId bigframes-dev
1659
+ location US
1660
+ jobType QUERY
1661
+ destinationTable {'projectId': 'bigframes-dev', 'datasetId': '_...
1662
+ useLegacySql False
1663
+ referencedTables None
1664
+ totalBytesProcessed 0
1665
+ cacheHit False
1666
+ statementType SELECT
1667
+ creationTime 2025-04-02 20:17:12.038000+00:00
1668
+ dtype: object
1669
+
1637
1670
Args:
1638
1671
max_download_size (int, default None):
1639
1672
Download size threshold in MB. If max_download_size is exceeded when downloading data
@@ -1666,9 +1699,6 @@ def to_pandas(
1666
1699
downsampled rows and all columns of this DataFrame. If dry_run is set, a pandas
1667
1700
Series containing dry run statistics will be returned.
1668
1701
"""
1669
-
1670
- # TODO(orrbradford): Optimize this in future. Potentially some cases where we can return the stored query job
1671
-
1672
1702
if dry_run :
1673
1703
dry_run_stats , dry_run_job = self ._block ._compute_dry_run (
1674
1704
max_download_size = max_download_size ,
@@ -1702,11 +1732,40 @@ def to_pandas_batches(
1702
1732
page_size and max_results determine the size and number of batches,
1703
1733
see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result
1704
1734
1735
+ **Examples:**
1736
+
1737
+ >>> import bigframes.pandas as bpd
1738
+ >>> bpd.options.display.progress_bar = None
1739
+ >>> df = bpd.DataFrame({'col': [4, 3, 2, 2, 3]})
1740
+
1741
+ Iterate through the results in batches, limiting the total rows yielded
1742
+ across all batches via `max_results`:
1743
+
1744
+ >>> for df_batch in df.to_pandas_batches(max_results=3):
1745
+ ... print(df_batch)
1746
+ col
1747
+ 0 4
1748
+ 1 3
1749
+ 2 2
1750
+
1751
+ Alternatively, control the approximate size of each batch using `page_size`
1752
+ and fetch batches manually using `next()`:
1753
+
1754
+ >>> it = df.to_pandas_batches(page_size=2)
1755
+ >>> next(it)
1756
+ col
1757
+ 0 4
1758
+ 1 3
1759
+ >>> next(it)
1760
+ col
1761
+ 2 2
1762
+ 3 2
1763
+
1705
1764
Args:
1706
1765
page_size (int, default None):
1707
- The size of each batch.
1766
+ The maximum number of rows of each batch. Non-positive values are ignored .
1708
1767
max_results (int, default None):
1709
- If given, only download this many rows at maximum .
1768
+ The maximum total number of rows of all batches .
1710
1769
allow_large_results (bool, default None):
1711
1770
If not None, overrides the global setting to allow or disallow large query results
1712
1771
over the default size limit of 10 GB.
0 commit comments