@@ -1935,6 +1935,10 @@ def to_dataframe(
1935
1935
int_dtype : Union [Any , None ] = DefaultPandasDTypes .INT_DTYPE ,
1936
1936
float_dtype : Union [Any , None ] = None ,
1937
1937
string_dtype : Union [Any , None ] = None ,
1938
+ date_dtype : Union [Any , None ] = DefaultPandasDTypes .DATE_DTYPE ,
1939
+ datetime_dtype : Union [Any , None ] = None ,
1940
+ time_dtype : Union [Any , None ] = DefaultPandasDTypes .TIME_DTYPE ,
1941
+ timestamp_dtype : Union [Any , None ] = None ,
1938
1942
) -> "pandas.DataFrame" :
1939
1943
"""Create a pandas DataFrame by loading all pages of a query.
1940
1944
@@ -1999,7 +2003,7 @@ def to_dataframe(
1999
2003
type can be found at:
2000
2004
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
2001
2005
2002
- .. versionadded:: 3.7.1
2006
+ .. versionadded:: 3.8.0
2003
2007
2004
2008
int_dtype (Optional[pandas.Series.dtype, None]):
2005
2009
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
@@ -2009,7 +2013,7 @@ def to_dataframe(
2009
2013
Integer types can be found at:
2010
2014
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
2011
2015
2012
- .. versionadded:: 3.7.1
2016
+ .. versionadded:: 3.8.0
2013
2017
2014
2018
float_dtype (Optional[pandas.Series.dtype, None]):
2015
2019
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
@@ -2019,7 +2023,7 @@ def to_dataframe(
2019
2023
type can be found at:
2020
2024
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
2021
2025
2022
- .. versionadded:: 3.7.1
2026
+ .. versionadded:: 3.8.0
2023
2027
2024
2028
string_dtype (Optional[pandas.Series.dtype, None]):
2025
2029
If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
@@ -2029,7 +2033,50 @@ def to_dataframe(
2029
2033
type can be found at:
2030
2034
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
2031
2035
2032
- .. versionadded:: 3.7.1
2036
+ .. versionadded:: 3.8.0
2037
+
2038
+ date_dtype (Optional[pandas.Series.dtype, None]):
2039
+ If set, indicate a pandas ExtensionDtype (e.g.
2040
+ ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date
2041
+ type, instead of relying on the default ``db_dtypes.DateDtype()``.
2042
+ If you explicitly set the value to ``None``, then the data type will be
2043
+ ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
2044
+ Date type can be found at:
2045
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type
2046
+
2047
+ .. versionadded:: 3.10.0
2048
+
2049
+ datetime_dtype (Optional[pandas.Series.dtype, None]):
2050
+ If set, indicate a pandas ExtensionDtype (e.g.
2051
+ ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime
2052
+ type, instead of relying on the default ``numpy.dtype("datetime64[ns]``.
2053
+ If you explicitly set the value to ``None``, then the data type will be
2054
+ ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
2055
+ Datetime type can be found at:
2056
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type
2057
+
2058
+ .. versionadded:: 3.10.0
2059
+
2060
+ time_dtype (Optional[pandas.Series.dtype, None]):
2061
+ If set, indicate a pandas ExtensionDtype (e.g.
2062
+ ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time
2063
+ type, instead of relying on the default ``db_dtypes.TimeDtype()``.
2064
+ If you explicitly set the value to ``None``, then the data type will be
2065
+ ``numpy.dtype("object")``. BigQuery Time type can be found at:
2066
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type
2067
+
2068
+ .. versionadded:: 3.10.0
2069
+
2070
+ timestamp_dtype (Optional[pandas.Series.dtype, None]):
2071
+ If set, indicate a pandas ExtensionDtype (e.g.
2072
+ ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp
2073
+ type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``.
2074
+ If you explicitly set the value to ``None``, then the data type will be
2075
+ ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery
2076
+ Datetime type can be found at:
2077
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type
2078
+
2079
+ .. versionadded:: 3.10.0
2033
2080
2034
2081
Returns:
2035
2082
pandas.DataFrame:
@@ -2059,6 +2106,9 @@ def to_dataframe(
2059
2106
if int_dtype is DefaultPandasDTypes .INT_DTYPE :
2060
2107
int_dtype = pandas .Int64Dtype ()
2061
2108
2109
+ if time_dtype is DefaultPandasDTypes .TIME_DTYPE :
2110
+ time_dtype = db_dtypes .TimeDtype ()
2111
+
2062
2112
if bool_dtype is not None and not hasattr (bool_dtype , "__from_arrow__" ):
2063
2113
raise ValueError ("bool_dtype" , _NO_SUPPORTED_DTYPE )
2064
2114
@@ -2071,6 +2121,24 @@ def to_dataframe(
2071
2121
if string_dtype is not None and not hasattr (string_dtype , "__from_arrow__" ):
2072
2122
raise ValueError ("string_dtype" , _NO_SUPPORTED_DTYPE )
2073
2123
2124
+ if (
2125
+ date_dtype is not None
2126
+ and date_dtype is not DefaultPandasDTypes .DATE_DTYPE
2127
+ and not hasattr (date_dtype , "__from_arrow__" )
2128
+ ):
2129
+ raise ValueError ("date_dtype" , _NO_SUPPORTED_DTYPE )
2130
+
2131
+ if datetime_dtype is not None and not hasattr (datetime_dtype , "__from_arrow__" ):
2132
+ raise ValueError ("datetime_dtype" , _NO_SUPPORTED_DTYPE )
2133
+
2134
+ if time_dtype is not None and not hasattr (time_dtype , "__from_arrow__" ):
2135
+ raise ValueError ("time_dtype" , _NO_SUPPORTED_DTYPE )
2136
+
2137
+ if timestamp_dtype is not None and not hasattr (
2138
+ timestamp_dtype , "__from_arrow__"
2139
+ ):
2140
+ raise ValueError ("timestamp_dtype" , _NO_SUPPORTED_DTYPE )
2141
+
2074
2142
if dtypes is None :
2075
2143
dtypes = {}
2076
2144
@@ -2086,25 +2154,29 @@ def to_dataframe(
2086
2154
create_bqstorage_client = create_bqstorage_client ,
2087
2155
)
2088
2156
2089
- # When converting date or timestamp values to nanosecond precision, the result
2090
- # can be out of pyarrow bounds. To avoid the error when converting to
2091
- # Pandas, we set the date_as_object or timestamp_as_object parameter to True,
2092
- # if necessary.
2093
- date_as_object = not all (
2094
- self .__can_cast_timestamp_ns (col )
2095
- for col in record_batch
2096
- # Type can be date32 or date64 (plus units).
2097
- # See: https://arrow.apache.org/docs/python/api/datatypes.html
2098
- if pyarrow .types .is_date (col .type )
2099
- )
2157
+ # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error,
2158
+ # when pyarrow converts date values to nanosecond precision. To avoid the error, we
2159
+ # set the date_as_object parameter to True, if necessary.
2160
+ date_as_object = False
2161
+ if date_dtype is DefaultPandasDTypes .DATE_DTYPE :
2162
+ date_dtype = db_dtypes .DateDtype ()
2163
+ date_as_object = not all (
2164
+ self .__can_cast_timestamp_ns (col )
2165
+ for col in record_batch
2166
+ # Type can be date32 or date64 (plus units).
2167
+ # See: https://arrow.apache.org/docs/python/api/datatypes.html
2168
+ if pyarrow .types .is_date (col .type )
2169
+ )
2100
2170
2101
- timestamp_as_object = not all (
2102
- self .__can_cast_timestamp_ns (col )
2103
- for col in record_batch
2104
- # Type can be datetime and timestamp (plus units and time zone).
2105
- # See: https://arrow.apache.org/docs/python/api/datatypes.html
2106
- if pyarrow .types .is_timestamp (col .type )
2107
- )
2171
+ timestamp_as_object = False
2172
+ if datetime_dtype is None and timestamp_dtype is None :
2173
+ timestamp_as_object = not all (
2174
+ self .__can_cast_timestamp_ns (col )
2175
+ for col in record_batch
2176
+ # Type can be datetime and timestamp (plus units and time zone).
2177
+ # See: https://arrow.apache.org/docs/python/api/datatypes.html
2178
+ if pyarrow .types .is_timestamp (col .type )
2179
+ )
2108
2180
2109
2181
if len (record_batch ) > 0 :
2110
2182
df = record_batch .to_pandas (
@@ -2117,6 +2189,10 @@ def to_dataframe(
2117
2189
int_dtype = int_dtype ,
2118
2190
float_dtype = float_dtype ,
2119
2191
string_dtype = string_dtype ,
2192
+ date_dtype = date_dtype ,
2193
+ datetime_dtype = datetime_dtype ,
2194
+ time_dtype = time_dtype ,
2195
+ timestamp_dtype = timestamp_dtype ,
2120
2196
),
2121
2197
)
2122
2198
else :
@@ -2317,6 +2393,10 @@ def to_dataframe(
2317
2393
int_dtype = None ,
2318
2394
float_dtype = None ,
2319
2395
string_dtype = None ,
2396
+ date_dtype = None ,
2397
+ datetime_dtype = None ,
2398
+ time_dtype = None ,
2399
+ timestamp_dtype = None ,
2320
2400
) -> "pandas.DataFrame" :
2321
2401
"""Create an empty dataframe.
2322
2402
@@ -2330,6 +2410,10 @@ def to_dataframe(
2330
2410
int_dtype (Any): Ignored. Added for compatibility with RowIterator.
2331
2411
float_dtype (Any): Ignored. Added for compatibility with RowIterator.
2332
2412
string_dtype (Any): Ignored. Added for compatibility with RowIterator.
2413
+ date_dtype (Any): Ignored. Added for compatibility with RowIterator.
2414
+ datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.
2415
+ time_dtype (Any): Ignored. Added for compatibility with RowIterator.
2416
+ timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.
2333
2417
2334
2418
Returns:
2335
2419
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
0 commit comments