34
34
except ImportError : # pragma: NO COVER
35
35
pyarrow = None
36
36
37
+ try :
38
+ import db_dtypes # type: ignore
39
+ except ImportError : # pragma: NO COVER
40
+ db_dtypes = None
41
+
37
42
try :
38
43
import geopandas # type: ignore
39
44
except ImportError :
55
60
import google .cloud ._helpers # type: ignore
56
61
from google .cloud .bigquery import _helpers
57
62
from google .cloud .bigquery import _pandas_helpers
63
+ from google .cloud .bigquery .enums import DefaultPandasDTypes
58
64
from google .cloud .bigquery .exceptions import LegacyBigQueryStorageError
59
65
from google .cloud .bigquery .schema import _build_schema_resource
60
66
from google .cloud .bigquery .schema import _parse_schema_resource
88
94
89
95
_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"'
90
96
97
+ _NO_SUPPORTED_DTYPE = (
98
+ "The dtype cannot to be converted to a pandas ExtensionArray "
99
+ "because the necessary `__from_arrow__` attribute is missing."
100
+ )
101
+
91
102
92
103
def _reference_getter (table ):
93
104
"""A :class:`~google.cloud.bigquery.table.TableReference` pointing to
@@ -1920,6 +1931,10 @@ def to_dataframe(
1920
1931
progress_bar_type : str = None ,
1921
1932
create_bqstorage_client : bool = True ,
1922
1933
geography_as_object : bool = False ,
1934
+ bool_dtype : Union [Any , None ] = DefaultPandasDTypes .BOOL_DTYPE ,
1935
+ int_dtype : Union [Any , None ] = DefaultPandasDTypes .INT_DTYPE ,
1936
+ float_dtype : Union [Any , None ] = None ,
1937
+ string_dtype : Union [Any , None ] = None ,
1923
1938
) -> "pandas.DataFrame" :
1924
1939
"""Create a pandas DataFrame by loading all pages of a query.
1925
1940
@@ -1958,6 +1973,7 @@ def to_dataframe(
1958
1973
progress bar as a graphical dialog box.
1959
1974
1960
1975
.. versionadded:: 1.11.0
1976
+
1961
1977
create_bqstorage_client (Optional[bool]):
1962
1978
If ``True`` (default), create a BigQuery Storage API client
1963
1979
using the default API settings. The BigQuery Storage API
@@ -1975,6 +1991,46 @@ def to_dataframe(
1975
1991
1976
1992
.. versionadded:: 2.24.0
1977
1993
1994
+ bool_dtype (Optional[pandas.Series.dtype, None]):
1995
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)
1996
+ to convert BigQuery Boolean type, instead of relying on the default
1997
+ ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,
1998
+ then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean
1999
+ type can be found at:
2000
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
2001
+
2002
+ .. versionadded:: 3.7.1
2003
+
2004
+ int_dtype (Optional[pandas.Series.dtype, None]):
2005
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
2006
+ to convert BigQuery Integer types, instead of relying on the default
2007
+ ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,
2008
+ then the data type will be ``numpy.dtype("int64")``. A list of BigQuery
2009
+ Integer types can be found at:
2010
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
2011
+
2012
+ .. versionadded:: 3.7.1
2013
+
2014
+ float_dtype (Optional[pandas.Series.dtype, None]):
2015
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
2016
+ to convert BigQuery Float type, instead of relying on the default
2017
+ ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,
2018
+ then the data type will be ``numpy.dtype("float64")``. BigQuery Float
2019
+ type can be found at:
2020
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
2021
+
2022
+ .. versionadded:: 3.7.1
2023
+
2024
+ string_dtype (Optional[pandas.Series.dtype, None]):
2025
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
2026
+ convert BigQuery String type, instead of relying on the default
2027
+ ``numpy.dtype("object")``. If you explicitly set the value to ``None``,
2028
+ then the data type will be ``numpy.dtype("object")``. BigQuery String
2029
+ type can be found at:
2030
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
2031
+
2032
+ .. versionadded:: 3.7.1
2033
+
1978
2034
Returns:
1979
2035
pandas.DataFrame:
1980
2036
A :class:`~pandas.DataFrame` populated with row data and column
@@ -1987,14 +2043,34 @@ def to_dataframe(
1987
2043
the :mod:`google.cloud.bigquery_storage_v1` module is
1988
2044
required but cannot be imported. Also if
1989
2045
`geography_as_object` is `True`, but the
1990
- :mod:`shapely` library cannot be imported.
2046
+ :mod:`shapely` library cannot be imported. Also if
2047
+ `bool_dtype`, `int_dtype` or other dtype parameters
2048
+ is not supported dtype.
1991
2049
1992
2050
"""
1993
2051
_pandas_helpers .verify_pandas_imports ()
1994
2052
1995
2053
if geography_as_object and shapely is None :
1996
2054
raise ValueError (_NO_SHAPELY_ERROR )
1997
2055
2056
+ if bool_dtype is DefaultPandasDTypes .BOOL_DTYPE :
2057
+ bool_dtype = pandas .BooleanDtype ()
2058
+
2059
+ if int_dtype is DefaultPandasDTypes .INT_DTYPE :
2060
+ int_dtype = pandas .Int64Dtype ()
2061
+
2062
+ if bool_dtype is not None and not hasattr (bool_dtype , "__from_arrow__" ):
2063
+ raise ValueError ("bool_dtype" , _NO_SUPPORTED_DTYPE )
2064
+
2065
+ if int_dtype is not None and not hasattr (int_dtype , "__from_arrow__" ):
2066
+ raise ValueError ("int_dtype" , _NO_SUPPORTED_DTYPE )
2067
+
2068
+ if float_dtype is not None and not hasattr (float_dtype , "__from_arrow__" ):
2069
+ raise ValueError ("float_dtype" , _NO_SUPPORTED_DTYPE )
2070
+
2071
+ if string_dtype is not None and not hasattr (string_dtype , "__from_arrow__" ):
2072
+ raise ValueError ("string_dtype" , _NO_SUPPORTED_DTYPE )
2073
+
1998
2074
if dtypes is None :
1999
2075
dtypes = {}
2000
2076
@@ -2019,15 +2095,15 @@ def to_dataframe(
2019
2095
for col in record_batch
2020
2096
# Type can be date32 or date64 (plus units).
2021
2097
# See: https://arrow.apache.org/docs/python/api/datatypes.html
2022
- if str (col .type ). startswith ( "date" )
2098
+ if pyarrow . types . is_date (col .type )
2023
2099
)
2024
2100
2025
2101
timestamp_as_object = not all (
2026
2102
self .__can_cast_timestamp_ns (col )
2027
2103
for col in record_batch
2028
- # Type can be timestamp (plus units and time zone).
2104
+ # Type can be datetime and timestamp (plus units and time zone).
2029
2105
# See: https://arrow.apache.org/docs/python/api/datatypes.html
2030
- if str (col .type ). startswith ( "timestamp" )
2106
+ if pyarrow . types . is_timestamp (col .type )
2031
2107
)
2032
2108
2033
2109
if len (record_batch ) > 0 :
@@ -2036,7 +2112,11 @@ def to_dataframe(
2036
2112
timestamp_as_object = timestamp_as_object ,
2037
2113
integer_object_nulls = True ,
2038
2114
types_mapper = _pandas_helpers .default_types_mapper (
2039
- date_as_object = date_as_object
2115
+ date_as_object = date_as_object ,
2116
+ bool_dtype = bool_dtype ,
2117
+ int_dtype = int_dtype ,
2118
+ float_dtype = float_dtype ,
2119
+ string_dtype = string_dtype ,
2040
2120
),
2041
2121
)
2042
2122
else :
@@ -2233,6 +2313,10 @@ def to_dataframe(
2233
2313
progress_bar_type = None ,
2234
2314
create_bqstorage_client = True ,
2235
2315
geography_as_object = False ,
2316
+ bool_dtype = None ,
2317
+ int_dtype = None ,
2318
+ float_dtype = None ,
2319
+ string_dtype = None ,
2236
2320
) -> "pandas.DataFrame" :
2237
2321
"""Create an empty dataframe.
2238
2322
@@ -2241,6 +2325,11 @@ def to_dataframe(
2241
2325
dtypes (Any): Ignored. Added for compatibility with RowIterator.
2242
2326
progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
2243
2327
create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
2328
+ geography_as_object (bool): Ignored. Added for compatibility with RowIterator.
2329
+ bool_dtype (Any): Ignored. Added for compatibility with RowIterator.
2330
+ int_dtype (Any): Ignored. Added for compatibility with RowIterator.
2331
+ float_dtype (Any): Ignored. Added for compatibility with RowIterator.
2332
+ string_dtype (Any): Ignored. Added for compatibility with RowIterator.
2244
2333
2245
2334
Returns:
2246
2335
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
0 commit comments