Skip to content

Commit 4333910

Browse files
authored
fix(deps): raise exception when pandas is installed but db-dtypes is not (googleapis#1191)
`db-dtypes` is already present in the `pandas` "extras", but this PR ensures that if pandas is present and db-dtypes is not, a more understandable error message is raised. ``` google/cloud/bigquery/_pandas_helpers.py:991: ValueError ____________________________________ test_list_rows_nullable_scalars_extreme_dtypes[10] _____________________________________ # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Shared helper functions for connecting BigQuery and pandas.""" import concurrent.futures from datetime import datetime import functools from itertools import islice import logging import queue import warnings try: import pandas # type: ignore pandas_import_exception = None except ImportError as exc: # pragma: NO COVER pandas = None pandas_import_exception = exc else: import numpy try: > import db_dtypes # type: ignore E ModuleNotFoundError: No module named 'db_dtypes' google/cloud/bigquery/_pandas_helpers.py:36: ModuleNotFoundError The above exception was the direct cause of the following exception: bigquery_client = <google.cloud.bigquery.client.Client object at 0x11e2d3580> scalars_extreme_table = 'swast-scratch.python_bigquery_tests_system_20220330160830_ffff89.scalars_extreme_jsonl0x3ffeb' max_results = 10 @pytest.mark.parametrize( ("max_results",), ( (None,), (10,), ), # Use BQ Storage API. # Use REST API. ) def test_list_rows_nullable_scalars_extreme_dtypes( bigquery_client, scalars_extreme_table, max_results ): # TODO(GH#836): Avoid INTERVAL columns until they are supported by the # BigQuery Storage API and pyarrow. schema = [ bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), ] df = bigquery_client.list_rows( scalars_extreme_table, max_results=max_results, selected_fields=schema, > ).to_dataframe() tests/system/test_pandas.py:1084: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ google/cloud/bigquery/table.py:1925: in to_dataframe _pandas_helpers.verify_pandas_imports() _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ def verify_pandas_imports(): if pandas is None: raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception if db_dtypes is None: > raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception E ValueError: Please install the 'db-dtypes' package to use this function. google/cloud/bigquery/_pandas_helpers.py:991: ValueError ``` Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes googleapis#1188 🦕
1 parent b4c7f5a commit 4333910

File tree

4 files changed

+49
-25
lines changed

4 files changed

+49
-25
lines changed

google/cloud/bigquery/_pandas_helpers.py

+26-7
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,25 @@
2424

2525
try:
2626
import pandas # type: ignore
27-
except ImportError: # pragma: NO COVER
27+
28+
pandas_import_exception = None
29+
except ImportError as exc: # pragma: NO COVER
2830
pandas = None
29-
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype
31+
pandas_import_exception = exc
3032
else:
3133
import numpy
3234

33-
from db_dtypes import DateDtype, TimeDtype # type: ignore
35+
try:
36+
import db_dtypes # type: ignore
37+
38+
date_dtype_name = db_dtypes.DateDtype.name
39+
time_dtype_name = db_dtypes.TimeDtype.name
40+
db_dtypes_import_exception = None
41+
except ImportError as exc: # pragma: NO COVER
42+
db_dtypes = None
43+
db_dtypes_import_exception = exc
44+
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype
3445

35-
date_dtype_name = DateDtype.name
36-
time_dtype_name = TimeDtype.name
3746

3847
import pyarrow # type: ignore
3948
import pyarrow.parquet # type: ignore
@@ -84,6 +93,9 @@ def _to_wkb(v):
8493

8594
_MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads
8695

96+
_NO_PANDAS_ERROR = "Please install the 'pandas' package to use this function."
97+
_NO_DB_TYPES_ERROR = "Please install the 'db-dtypes' package to use this function."
98+
8799
_PANDAS_DTYPE_TO_BQ = {
88100
"bool": "BOOLEAN",
89101
"datetime64[ns, UTC]": "TIMESTAMP",
@@ -290,13 +302,13 @@ def types_mapper(arrow_data_type):
290302
not date_as_object
291303
and pyarrow.types.is_date(arrow_data_type)
292304
):
293-
return DateDtype()
305+
return db_dtypes.DateDtype()
294306

295307
elif pyarrow.types.is_integer(arrow_data_type):
296308
return pandas.Int64Dtype()
297309

298310
elif pyarrow.types.is_time(arrow_data_type):
299-
return TimeDtype()
311+
return db_dtypes.TimeDtype()
300312

301313
return types_mapper
302314

@@ -970,3 +982,10 @@ def dataframe_to_json_generator(dataframe):
970982
output[column] = value
971983

972984
yield output
985+
986+
987+
def verify_pandas_imports():
988+
if pandas is None:
989+
raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception
990+
if db_dtypes is None:
991+
raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception

google/cloud/bigquery/table.py

+6-14
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828
import pandas # type: ignore
2929
except ImportError: # pragma: NO COVER
3030
pandas = None
31-
else:
32-
import db_dtypes # type: ignore # noqa
3331

3432
import pyarrow # type: ignore
3533

@@ -69,10 +67,6 @@
6967
from google.cloud.bigquery.dataset import DatasetReference
7068

7169

72-
_NO_PANDAS_ERROR = (
73-
"The pandas library is not installed, please install "
74-
"pandas to use the to_dataframe() function."
75-
)
7670
_NO_GEOPANDAS_ERROR = (
7771
"The geopandas library is not installed, please install "
7872
"geopandas to use the to_geodataframe() function."
@@ -1818,8 +1812,8 @@ def to_dataframe_iterable(
18181812
ValueError:
18191813
If the :mod:`pandas` library cannot be imported.
18201814
"""
1821-
if pandas is None:
1822-
raise ValueError(_NO_PANDAS_ERROR)
1815+
_pandas_helpers.verify_pandas_imports()
1816+
18231817
if dtypes is None:
18241818
dtypes = {}
18251819

@@ -1928,8 +1922,8 @@ def to_dataframe(
19281922
:mod:`shapely` library cannot be imported.
19291923
19301924
"""
1931-
if pandas is None:
1932-
raise ValueError(_NO_PANDAS_ERROR)
1925+
_pandas_helpers.verify_pandas_imports()
1926+
19331927
if geography_as_object and shapely is None:
19341928
raise ValueError(_NO_SHAPELY_ERROR)
19351929

@@ -2181,8 +2175,7 @@ def to_dataframe(
21812175
Returns:
21822176
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
21832177
"""
2184-
if pandas is None:
2185-
raise ValueError(_NO_PANDAS_ERROR)
2178+
_pandas_helpers.verify_pandas_imports()
21862179
return pandas.DataFrame()
21872180

21882181
def to_geodataframe(
@@ -2238,8 +2231,7 @@ def to_dataframe_iterable(
22382231
ValueError:
22392232
If the :mod:`pandas` library cannot be imported.
22402233
"""
2241-
if pandas is None:
2242-
raise ValueError(_NO_PANDAS_ERROR)
2234+
_pandas_helpers.verify_pandas_imports()
22432235
return iter((pandas.DataFrame(),))
22442236

22452237
def to_arrow_iterable(

tests/unit/test__pandas_helpers.py

+13
Original file line numberDiff line numberDiff line change
@@ -1751,3 +1751,16 @@ def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata):
17511751
).metadata
17521752
== metadata
17531753
)
1754+
1755+
1756+
def test_verify_pandas_imports_no_pandas(module_under_test, monkeypatch):
1757+
monkeypatch.setattr(module_under_test, "pandas", None)
1758+
with pytest.raises(ValueError, match="Please install the 'pandas' package"):
1759+
module_under_test.verify_pandas_imports()
1760+
1761+
1762+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1763+
def test_verify_pandas_imports_no_db_dtypes(module_under_test, monkeypatch):
1764+
monkeypatch.setattr(module_under_test, "db_dtypes", None)
1765+
with pytest.raises(ValueError, match="Please install the 'db-dtypes' package"):
1766+
module_under_test.verify_pandas_imports()

tests/unit/test_table.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1836,7 +1836,7 @@ def test_to_arrow_iterable(self):
18361836
self.assertEqual(record_batch.num_rows, 0)
18371837
self.assertEqual(record_batch.num_columns, 0)
18381838

1839-
@mock.patch("google.cloud.bigquery.table.pandas", new=None)
1839+
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
18401840
def test_to_dataframe_error_if_pandas_is_none(self):
18411841
row_iterator = self._make_one()
18421842
with self.assertRaises(ValueError):
@@ -1849,7 +1849,7 @@ def test_to_dataframe(self):
18491849
self.assertIsInstance(df, pandas.DataFrame)
18501850
self.assertEqual(len(df), 0) # verify the number of rows
18511851

1852-
@mock.patch("google.cloud.bigquery.table.pandas", new=None)
1852+
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
18531853
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
18541854
row_iterator = self._make_one()
18551855
with self.assertRaises(ValueError):
@@ -2967,7 +2967,7 @@ def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self):
29672967
assert isinstance(dataframes[0], pandas.DataFrame)
29682968
assert isinstance(dataframes[1], pandas.DataFrame)
29692969

2970-
@mock.patch("google.cloud.bigquery.table.pandas", new=None)
2970+
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
29712971
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
29722972
from google.cloud.bigquery.schema import SchemaField
29732973

@@ -3339,7 +3339,7 @@ def test_to_dataframe_datetime_objects(self):
33393339
self.assertEqual(df["ts"][0].date(), datetime.date(1336, 3, 23))
33403340
self.assertEqual(df["date"][0], datetime.date(1111, 1, 1))
33413341

3342-
@mock.patch("google.cloud.bigquery.table.pandas", new=None)
3342+
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
33433343
def test_to_dataframe_error_if_pandas_is_none(self):
33443344
from google.cloud.bigquery.schema import SchemaField
33453345

0 commit comments

Comments
 (0)