Skip to content

Commit 21714e1

Browse files
authored
fix: make pyarrow an optional dependency post-3.20.0 yanked release (#1879)
* fix: make `pyarrow` an optional dependency again * install older version of pyarrow * fix for older tqdm * remove many pragma: NO COVERs
1 parent 7dfee0c commit 21714e1

21 files changed

+126
-100
lines changed

google/cloud/bigquery/_pandas_helpers.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
import pandas # type: ignore
3333

3434
pandas_import_exception = None
35-
except ImportError as exc: # pragma: NO COVER
35+
except ImportError as exc:
3636
pandas = None
3737
pandas_import_exception = exc
3838
else:
@@ -44,25 +44,21 @@
4444
date_dtype_name = db_dtypes.DateDtype.name
4545
time_dtype_name = db_dtypes.TimeDtype.name
4646
db_dtypes_import_exception = None
47-
except ImportError as exc: # pragma: NO COVER
47+
except ImportError as exc:
4848
db_dtypes = None
4949
db_dtypes_import_exception = exc
5050
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype
5151

52-
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
53-
from pyarrow import ArrowTypeError # type: ignore # noqa: E402
54-
55-
_BIGNUMERIC_SUPPORT = False
56-
if pyarrow is not None: # pragma: NO COVER
57-
_BIGNUMERIC_SUPPORT = True
52+
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
5853

5954
try:
6055
# _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
6156
from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore
62-
except ImportError: # pragma: NO COVER
57+
except ImportError:
6358
# No shapely, use NoneType for _BaseGeometry as a placeholder.
6459
_BaseGeometry = type(None)
6560
else:
61+
# We don't have any unit test sessions that install shapely but not pandas.
6662
if pandas is not None: # pragma: NO COVER
6763

6864
def _to_wkb():
@@ -309,10 +305,10 @@ def bq_to_arrow_array(series, bq_field):
309305
if field_type_upper in schema._STRUCT_TYPES:
310306
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
311307
return pyarrow.Array.from_pandas(series, type=arrow_type)
312-
except ArrowTypeError: # pragma: NO COVER
308+
except pyarrow.ArrowTypeError:
313309
msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
314310
_LOGGER.error(msg)
315-
raise ArrowTypeError(msg)
311+
raise pyarrow.ArrowTypeError(msg)
316312

317313

318314
def get_column_or_index(dataframe, name):

google/cloud/bigquery/_pyarrow_helpers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
try:
2222
import pyarrow # type: ignore
23-
except ImportError: # pragma: NO COVER
23+
except ImportError:
2424
pyarrow = None
2525

2626

@@ -49,7 +49,7 @@ def pyarrow_timestamp():
4949
_BQ_TO_ARROW_SCALARS = {}
5050
_ARROW_SCALAR_IDS_TO_BQ = {}
5151

52-
if pyarrow: # pragma: NO COVER
52+
if pyarrow:
5353
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
5454
# When modifying it be sure to update it there as well.
5555
# Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py

google/cloud/bigquery/_tqdm_helpers.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@
2323

2424
try:
2525
import tqdm # type: ignore
26-
import tqdm.notebook as notebook # type: ignore
27-
28-
except ImportError: # pragma: NO COVER
26+
except ImportError:
2927
tqdm = None
3028

29+
try:
30+
import tqdm.notebook as tqdm_notebook # type: ignore
31+
except ImportError:
32+
tqdm_notebook = None
33+
3134
if typing.TYPE_CHECKING: # pragma: NO COVER
3235
from google.cloud.bigquery import QueryJob
3336
from google.cloud.bigquery.table import RowIterator
@@ -42,7 +45,7 @@
4245

4346
def get_progress_bar(progress_bar_type, description, total, unit):
4447
"""Construct a tqdm progress bar object, if tqdm is installed."""
45-
if tqdm is None:
48+
if tqdm is None or tqdm_notebook is None and progress_bar_type == "tqdm_notebook":
4649
if progress_bar_type is not None:
4750
warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3)
4851
return None
@@ -58,7 +61,7 @@ def get_progress_bar(progress_bar_type, description, total, unit):
5861
unit=unit,
5962
)
6063
elif progress_bar_type == "tqdm_notebook":
61-
return notebook.tqdm(
64+
return tqdm_notebook.tqdm(
6265
bar_format="{l_bar}{bar}|",
6366
desc=description,
6467
file=sys.stdout,

google/cloud/bigquery/_versions_helpers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def try_import(self, raise_if_error: bool = False) -> Any:
7373
"""
7474
try:
7575
import pyarrow
76-
except ImportError as exc: # pragma: NO COVER
76+
except ImportError as exc:
7777
if raise_if_error:
7878
raise exceptions.LegacyPyarrowError(
7979
"pyarrow package not found. Install pyarrow version >="
@@ -212,7 +212,7 @@ def try_import(self, raise_if_error: bool = False) -> Any:
212212
"""
213213
try:
214214
import pandas
215-
except ImportError as exc: # pragma: NO COVER
215+
except ImportError as exc:
216216
if raise_if_error:
217217
raise exceptions.LegacyPandasError(
218218
"pandas package not found. Install pandas version >="

google/cloud/bigquery/job/query.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,9 @@
5656

5757
try:
5858
import pandas # type: ignore
59-
except ImportError: # pragma: NO COVER
59+
except ImportError:
6060
pandas = None
6161

62-
try:
63-
import db_dtypes # type: ignore
64-
except ImportError: # pragma: NO COVER
65-
db_dtypes = None
66-
6762
if typing.TYPE_CHECKING: # pragma: NO COVER
6863
# Assumption: type checks are only used by library developers and CI environments
6964
# that have all optional dependencies installed, thus no conditional imports.

google/cloud/bigquery/magics/magics.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@
9595
import IPython # type: ignore
9696
from IPython import display # type: ignore
9797
from IPython.core import magic_arguments # type: ignore
98-
except ImportError: # pragma: NO COVER
98+
except ImportError:
9999
raise ImportError("This module can only be loaded in IPython.")
100100

101101
from google.api_core import client_info

google/cloud/bigquery/table.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,17 @@
2626

2727
try:
2828
import pandas # type: ignore
29-
except ImportError: # pragma: NO COVER
29+
except ImportError:
3030
pandas = None
3131

3232
try:
3333
import pyarrow # type: ignore
34-
except ImportError: # pragma: NO COVER
34+
except ImportError:
3535
pyarrow = None
3636

3737
try:
3838
import db_dtypes # type: ignore
39-
except ImportError: # pragma: NO COVER
39+
except ImportError:
4040
db_dtypes = None
4141

4242
try:

noxfile.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def default(session, install_extras=True):
8686
install_target = ".[all]"
8787
else:
8888
install_target = "."
89-
session.install("-e", install_target)
89+
session.install("-e", install_target, "-c", constraints_path)
9090
session.run("python", "-m", "pip", "freeze")
9191

9292
# Run py.test against the unit tests.
@@ -115,14 +115,15 @@ def unit(session):
115115
def unit_noextras(session):
116116
"""Run the unit test suite."""
117117

118-
# Install optional dependencies that are out-of-date.
118+
# Install optional dependencies that are out-of-date to see that
119+
# we fail gracefully.
119120
# https://github.com/googleapis/python-bigquery/issues/933
120-
# There is no pyarrow 1.0.0 package for Python 3.9.
121-
121+
#
122+
# We only install this extra package on one of the two Python versions
123+
# so that it continues to be an optional dependency.
124+
# https://github.com/googleapis/python-bigquery/issues/1877
122125
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
123-
session.install("pyarrow>=3.0.0")
124-
elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]:
125-
session.install("pyarrow")
126+
session.install("pyarrow==1.0.0")
126127

127128
default(session, install_extras=False)
128129

samples/desktopapp/requirements-test.txt

-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,3 @@ google-cloud-testutils==1.4.0
22
pytest===7.4.4; python_version == '3.7'
33
pytest==8.1.1; python_version >= '3.8'
44
mock==5.1.0
5-
pyarrow>=3.0.0
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
# samples/snippets should be runnable with no "extras"
12
google-cloud-testutils==1.4.0
23
pytest===7.4.4; python_version == '3.7'
34
pytest==8.1.1; python_version >= '3.8'
45
mock==5.1.0
5-
pyarrow>=3.0.0

samples/snippets/requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
google-cloud-bigquery==3.19.0
1+
# samples/snippets should be runnable with no "extras"
2+
google-cloud-bigquery==3.19.0

testing/constraints-3.11.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
pyarrow>=3.0.0

testing/constraints-3.12.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
pyarrow>=3.0.0

testing/constraints-3.7.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ packaging==20.0.0
2727
pandas==1.1.0
2828
proto-plus==1.22.0
2929
protobuf==3.19.5
30-
pyarrow>=3.0.0
30+
pyarrow==3.0.0
3131
python-dateutil==2.7.3
3232
requests==2.21.0
3333
Shapely==1.8.4
3434
six==1.13.0
35-
tqdm==4.7.4
35+
tqdm==4.7.4

tests/unit/job/test_query_pandas.py

+14-26
Original file line numberDiff line numberDiff line change
@@ -19,53 +19,38 @@
1919

2020
import pytest
2121

22+
from ..helpers import make_connection
23+
from .helpers import _make_client
24+
from .helpers import _make_job_resource
2225

2326
try:
2427
from google.cloud import bigquery_storage
2528
import google.cloud.bigquery_storage_v1.reader
2629
import google.cloud.bigquery_storage_v1.services.big_query_read.client
27-
except (ImportError, AttributeError): # pragma: NO COVER
30+
except (ImportError, AttributeError):
2831
bigquery_storage = None
2932

30-
try:
31-
import pandas
32-
except (ImportError, AttributeError): # pragma: NO COVER
33-
pandas = None
3433
try:
3534
import shapely
36-
except (ImportError, AttributeError): # pragma: NO COVER
35+
except (ImportError, AttributeError):
3736
shapely = None
3837
try:
3938
import geopandas
40-
except (ImportError, AttributeError): # pragma: NO COVER
39+
except (ImportError, AttributeError):
4140
geopandas = None
4241
try:
4342
import tqdm
44-
except (ImportError, AttributeError): # pragma: NO COVER
43+
except (ImportError, AttributeError):
4544
tqdm = None
4645

47-
try:
48-
import importlib.metadata as metadata
49-
except ImportError:
50-
import importlib_metadata as metadata
51-
52-
from ..helpers import make_connection
53-
from .helpers import _make_client
54-
from .helpers import _make_job_resource
55-
56-
if pandas is not None:
57-
PANDAS_INSTALLED_VERSION = metadata.version("pandas")
58-
else:
59-
PANDAS_INSTALLED_VERSION = "0.0.0"
60-
61-
pandas = pytest.importorskip("pandas")
62-
6346
try:
6447
import pyarrow
6548
import pyarrow.types
66-
except ImportError: # pragma: NO COVER
49+
except ImportError:
6750
pyarrow = None
6851

52+
pandas = pytest.importorskip("pandas")
53+
6954

7055
@pytest.fixture
7156
def table_read_options_kwarg():
@@ -660,7 +645,10 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression():
660645
)
661646

662647

663-
@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="")
648+
@pytest.mark.skipif(
649+
pandas.__version__.startswith("2."),
650+
reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those",
651+
)
664652
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
665653
def test_to_dataframe_column_dtypes():
666654
from google.cloud.bigquery.job import QueryJob as target_class

tests/unit/test__pandas_helpers.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@
3030
import pandas
3131
import pandas.api.types
3232
import pandas.testing
33-
except ImportError: # pragma: NO COVER
33+
except ImportError:
3434
pandas = None
3535

3636
try:
3737
import geopandas
38-
except ImportError: # pragma: NO COVER
38+
except ImportError:
3939
geopandas = None
4040

4141
import pytest
@@ -46,18 +46,19 @@
4646
from google.cloud.bigquery import _pyarrow_helpers
4747
from google.cloud.bigquery import _versions_helpers
4848
from google.cloud.bigquery import schema
49-
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
5049

5150
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
5251

5352
if pyarrow:
5453
import pyarrow.parquet
5554
import pyarrow.types
56-
from pyarrow import ArrowTypeError # type: ignore # noqa: E402
57-
else: # pragma: NO COVER
55+
56+
_BIGNUMERIC_SUPPORT = True
57+
else:
5858
# Mock out pyarrow when missing, because methods from pyarrow.types are
5959
# used in test parameterization.
6060
pyarrow = mock.Mock()
61+
_BIGNUMERIC_SUPPORT = False
6162

6263
bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import()
6364

@@ -572,9 +573,9 @@ def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO C
572573
series = pandas.Series(rows, name="test_col", dtype="object")
573574
bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED")
574575
exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
575-
with pytest.raises(ArrowTypeError, match=exc_msg):
576+
with pytest.raises(pyarrow.ArrowTypeError, match=exc_msg):
576577
module_under_test.bq_to_arrow_array(series, bq_field)
577-
raise ArrowTypeError(exc_msg)
578+
raise pyarrow.ArrowTypeError(exc_msg)
578579

579580

580581
@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])

0 commit comments

Comments
 (0)