@@ -1812,6 +1812,7 @@ def to_arrow_iterable(
1812
1812
self ,
1813
1813
bqstorage_client : Optional ["bigquery_storage.BigQueryReadClient" ] = None ,
1814
1814
max_queue_size : int = _pandas_helpers ._MAX_QUEUE_SIZE_DEFAULT , # type: ignore
1815
+ max_stream_count : Optional [int ] = None ,
1815
1816
) -> Iterator ["pyarrow.RecordBatch" ]:
1816
1817
"""[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream.
1817
1818
@@ -1836,6 +1837,22 @@ def to_arrow_iterable(
1836
1837
created by the server. If ``max_queue_size`` is :data:`None`, the queue
1837
1838
size is infinite.
1838
1839
1840
+ max_stream_count (Optional[int]):
1841
+ The maximum number of parallel download streams when
1842
+ using BigQuery Storage API. Ignored if
1843
+ BigQuery Storage API is not used.
1844
+
1845
+ This setting also has no effect if the query result
1846
+ is deterministically ordered with ORDER BY,
1847
+ in which case, the number of download stream is always 1.
1848
+
1849
+ If set to 0 or None (the default), the number of download
1850
+ streams is determined by BigQuery the server. However, this behaviour
1851
+ can require a lot of memory to store temporary download result,
1852
+ especially with very large queries. In that case,
1853
+ setting this parameter value to a value > 0 can help
1854
+ reduce system resource consumption.
1855
+
1839
1856
Returns:
1840
1857
pyarrow.RecordBatch:
1841
1858
A generator of :class:`~pyarrow.RecordBatch`.
@@ -1852,6 +1869,7 @@ def to_arrow_iterable(
1852
1869
preserve_order = self ._preserve_order ,
1853
1870
selected_fields = self ._selected_fields ,
1854
1871
max_queue_size = max_queue_size ,
1872
+ max_stream_count = max_stream_count ,
1855
1873
)
1856
1874
tabledata_list_download = functools .partial (
1857
1875
_pandas_helpers .download_arrow_row_iterator , iter (self .pages ), self .schema
@@ -1978,6 +1996,7 @@ def to_dataframe_iterable(
1978
1996
bqstorage_client : Optional ["bigquery_storage.BigQueryReadClient" ] = None ,
1979
1997
dtypes : Optional [Dict [str , Any ]] = None ,
1980
1998
max_queue_size : int = _pandas_helpers ._MAX_QUEUE_SIZE_DEFAULT , # type: ignore
1999
+ max_stream_count : Optional [int ] = None ,
1981
2000
) -> "pandas.DataFrame" :
1982
2001
"""Create an iterable of pandas DataFrames, to process the table as a stream.
1983
2002
@@ -2008,6 +2027,22 @@ def to_dataframe_iterable(
2008
2027
2009
2028
.. versionadded:: 2.14.0
2010
2029
2030
+ max_stream_count (Optional[int]):
2031
+ The maximum number of parallel download streams when
2032
+ using BigQuery Storage API. Ignored if
2033
+ BigQuery Storage API is not used.
2034
+
2035
+ This setting also has no effect if the query result
2036
+ is deterministically ordered with ORDER BY,
2037
+ in which case, the number of download stream is always 1.
2038
+
2039
+ If set to 0 or None (the default), the number of download
2040
+ streams is determined by BigQuery the server. However, this behaviour
2041
+ can require a lot of memory to store temporary download result,
2042
+ especially with very large queries. In that case,
2043
+ setting this parameter value to a value > 0 can help
2044
+ reduce system resource consumption.
2045
+
2011
2046
Returns:
2012
2047
pandas.DataFrame:
2013
2048
A generator of :class:`~pandas.DataFrame`.
@@ -2034,6 +2069,7 @@ def to_dataframe_iterable(
2034
2069
preserve_order = self ._preserve_order ,
2035
2070
selected_fields = self ._selected_fields ,
2036
2071
max_queue_size = max_queue_size ,
2072
+ max_stream_count = max_stream_count ,
2037
2073
)
2038
2074
tabledata_list_download = functools .partial (
2039
2075
_pandas_helpers .download_dataframe_row_iterator ,
@@ -2690,6 +2726,7 @@ def to_dataframe_iterable(
2690
2726
bqstorage_client : Optional ["bigquery_storage.BigQueryReadClient" ] = None ,
2691
2727
dtypes : Optional [Dict [str , Any ]] = None ,
2692
2728
max_queue_size : Optional [int ] = None ,
2729
+ max_stream_count : Optional [int ] = None ,
2693
2730
) -> Iterator ["pandas.DataFrame" ]:
2694
2731
"""Create an iterable of pandas DataFrames, to process the table as a stream.
2695
2732
@@ -2705,6 +2742,9 @@ def to_dataframe_iterable(
2705
2742
max_queue_size:
2706
2743
Ignored. Added for compatibility with RowIterator.
2707
2744
2745
+ max_stream_count:
2746
+ Ignored. Added for compatibility with RowIterator.
2747
+
2708
2748
Returns:
2709
2749
An iterator yielding a single empty :class:`~pandas.DataFrame`.
2710
2750
@@ -2719,6 +2759,7 @@ def to_arrow_iterable(
2719
2759
self ,
2720
2760
bqstorage_client : Optional ["bigquery_storage.BigQueryReadClient" ] = None ,
2721
2761
max_queue_size : Optional [int ] = None ,
2762
+ max_stream_count : Optional [int ] = None ,
2722
2763
) -> Iterator ["pyarrow.RecordBatch" ]:
2723
2764
"""Create an iterable of pandas DataFrames, to process the table as a stream.
2724
2765
@@ -2731,6 +2772,9 @@ def to_arrow_iterable(
2731
2772
max_queue_size:
2732
2773
Ignored. Added for compatibility with RowIterator.
2733
2774
2775
+ max_stream_count:
2776
+ Ignored. Added for compatibility with RowIterator.
2777
+
2734
2778
Returns:
2735
2779
An iterator yielding a single empty :class:`~pyarrow.RecordBatch`.
2736
2780
"""
0 commit comments