Skip to content

Commit b289076

Browse files
authored
fix: guard imports against unsupported pyarrow versions (#934)
* fix: guard imports against unsupported pyarrow versions * add unit tests * fix pytype * second try at fixing pytype
1 parent 10fee52 commit b289076

File tree

9 files changed

+184
-39
lines changed

9 files changed

+184
-39
lines changed

google/cloud/bigquery/_helpers.py

+69-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import decimal
2020
import math
2121
import re
22-
from typing import Union
22+
from typing import Any, Union
2323

2424
from google.cloud._helpers import UTC
2525
from google.cloud._helpers import _date_from_iso8601_date
@@ -29,7 +29,10 @@
2929
from google.cloud._helpers import _to_bytes
3030
import packaging.version
3131

32-
from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
32+
from google.cloud.bigquery.exceptions import (
33+
LegacyBigQueryStorageError,
34+
LegacyPyarrowError,
35+
)
3336

3437

3538
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
@@ -42,6 +45,7 @@
4245
re.VERBOSE,
4346
)
4447

48+
_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
4549
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
4650
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
4751

@@ -95,12 +99,74 @@ def verify_version(self):
9599
if self.installed_version < _MIN_BQ_STORAGE_VERSION:
96100
msg = (
97101
"Dependency google-cloud-bigquery-storage is outdated, please upgrade "
98-
f"it to version >= 2.0.0 (version found: {self.installed_version})."
102+
f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})."
99103
)
100104
raise LegacyBigQueryStorageError(msg)
101105

102106

107+
class PyarrowVersions:
108+
"""Version comparisons for pyarrow package."""
109+
110+
def __init__(self):
111+
self._installed_version = None
112+
113+
@property
114+
def installed_version(self) -> packaging.version.Version:
115+
"""Return the parsed version of pyarrow."""
116+
if self._installed_version is None:
117+
import pyarrow
118+
119+
self._installed_version = packaging.version.parse(
120+
# Use 0.0.0, since it is earlier than any released version.
121+
# Legacy versions also have the same property, but
122+
# creating a LegacyVersion has been deprecated.
123+
# https://github.com/pypa/packaging/issues/321
124+
getattr(pyarrow, "__version__", "0.0.0")
125+
)
126+
127+
return self._installed_version
128+
129+
def try_import(self, raise_if_error: bool = False) -> Any:
130+
"""Verify that a recent enough version of pyarrow extra is
131+
installed.
132+
133+
The function assumes that pyarrow extra is installed, and should thus
134+
be used in places where this assumption holds.
135+
136+
Because `pip` can install an outdated version of this extra despite the
137+
constraints in `setup.py`, the calling code can use this helper to
138+
verify the version compatibility at runtime.
139+
140+
Returns:
141+
The ``pyarrow`` module or ``None``.
142+
143+
Raises:
144+
LegacyPyarrowError:
145+
If the pyarrow package is outdated and ``raise_if_error`` is ``True``.
146+
"""
147+
try:
148+
import pyarrow
149+
except ImportError as exc: # pragma: NO COVER
150+
if raise_if_error:
151+
raise LegacyPyarrowError(
152+
f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}."
153+
) from exc
154+
return None
155+
156+
if self.installed_version < _MIN_PYARROW_VERSION:
157+
if raise_if_error:
158+
msg = (
159+
"Dependency pyarrow is outdated, please upgrade "
160+
f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})."
161+
)
162+
raise LegacyPyarrowError(msg)
163+
return None
164+
165+
return pyarrow
166+
167+
103168
BQ_STORAGE_VERSIONS = BQStorageVersions()
169+
PYARROW_VERSIONS = PyarrowVersions()
104170

105171

106172
def _not_null(value, field):

google/cloud/bigquery/_pandas_helpers.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@ def _to_wkb(v):
5555

5656
_to_wkb = _to_wkb()
5757

58-
try:
59-
import pyarrow
60-
import pyarrow.parquet
61-
except ImportError: # pragma: NO COVER
62-
pyarrow = None
63-
6458
try:
6559
from google.cloud.bigquery_storage import ArrowSerializationOptions
6660
except ImportError:
@@ -73,12 +67,10 @@ def _to_wkb(v):
7367
from google.cloud.bigquery import schema
7468

7569

76-
_LOGGER = logging.getLogger(__name__)
70+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
7771

78-
_NO_BQSTORAGE_ERROR = (
79-
"The google-cloud-bigquery-storage library is not installed, "
80-
"please install google-cloud-bigquery-storage to use bqstorage features."
81-
)
72+
73+
_LOGGER = logging.getLogger(__name__)
8274

8375
_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds.
8476

@@ -548,8 +540,9 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN
548540
serializing method. Defaults to "SNAPPY".
549541
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
550542
"""
551-
if pyarrow is None:
552-
raise ValueError("pyarrow is required for BigQuery schema conversion.")
543+
pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
544+
545+
import pyarrow.parquet
553546

554547
bq_schema = schema._to_schema_fields(bq_schema)
555548
arrow_table = dataframe_to_arrow(dataframe, bq_schema)

google/cloud/bigquery/exceptions.py

+4
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,7 @@ class BigQueryError(Exception):
1919

2020
class LegacyBigQueryStorageError(BigQueryError):
2121
"""Raised when too old a version of BigQuery Storage extra is detected at runtime."""
22+
23+
24+
class LegacyPyarrowError(BigQueryError):
25+
"""Raised when too old a version of pyarrow package is detected at runtime."""

noxfile.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,16 @@ def unit(session):
9494
default(session)
9595

9696

97-
@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1])
97+
@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]])
9898
def unit_noextras(session):
9999
"""Run the unit test suite."""
100+
101+
# Install optional dependencies that are out-of-date.
102+
# https://github.com/googleapis/python-bigquery/issues/933
103+
# There is no pyarrow 1.0.0 package for Python 3.9.
104+
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
105+
session.install("pyarrow==1.0.0")
106+
100107
default(session, install_extras=False)
101108

102109

testing/constraints-3.6.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ proto-plus==1.10.0
1919
protobuf==3.12.0
2020
pyarrow==3.0.0
2121
requests==2.18.0
22-
shapely==1.6.0
22+
Shapely==1.6.0
2323
six==1.13.0
2424
tqdm==4.7.4

tests/unit/job/test_query_pandas.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@
3131
import geopandas
3232
except (ImportError, AttributeError): # pragma: NO COVER
3333
geopandas = None
34-
try:
35-
import pyarrow
36-
except (ImportError, AttributeError): # pragma: NO COVER
37-
pyarrow = None
3834
try:
3935
from google.cloud import bigquery_storage
4036
except (ImportError, AttributeError): # pragma: NO COVER
@@ -44,11 +40,15 @@
4440
except (ImportError, AttributeError): # pragma: NO COVER
4541
tqdm = None
4642

43+
from google.cloud.bigquery import _helpers
4744
from .helpers import _make_client
4845
from .helpers import _make_connection
4946
from .helpers import _make_job_resource
5047

5148

49+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
50+
51+
5252
@pytest.fixture
5353
def table_read_options_kwarg():
5454
# Create a BigQuery Storage table read options object with pyarrow compression

tests/unit/test__helpers.py

+68
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,20 @@
2424
except ImportError: # pragma: NO COVER
2525
bigquery_storage = None
2626

27+
try:
28+
import pyarrow
29+
except ImportError: # pragma: NO COVER
30+
pyarrow = None
31+
2732

2833
@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`")
2934
class TestBQStorageVersions(unittest.TestCase):
35+
def tearDown(self):
36+
from google.cloud.bigquery import _helpers
37+
38+
# Reset any cached versions since it may not match reality.
39+
_helpers.BQ_STORAGE_VERSIONS._installed_version = None
40+
3041
def _object_under_test(self):
3142
from google.cloud.bigquery import _helpers
3243

@@ -89,6 +100,63 @@ def test_is_read_session_optional_false(self):
89100
assert not versions.is_read_session_optional
90101

91102

103+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
104+
class TestPyarrowVersions(unittest.TestCase):
105+
def tearDown(self):
106+
from google.cloud.bigquery import _helpers
107+
108+
# Reset any cached versions since it may not match reality.
109+
_helpers.PYARROW_VERSIONS._installed_version = None
110+
111+
def _object_under_test(self):
112+
from google.cloud.bigquery import _helpers
113+
114+
return _helpers.PyarrowVersions()
115+
116+
def _call_try_import(self, **kwargs):
117+
from google.cloud.bigquery import _helpers
118+
119+
_helpers.PYARROW_VERSIONS._installed_version = None
120+
return _helpers.PYARROW_VERSIONS.try_import(**kwargs)
121+
122+
def test_try_import_raises_no_error_w_recent_pyarrow(self):
123+
from google.cloud.bigquery.exceptions import LegacyPyarrowError
124+
125+
with mock.patch("pyarrow.__version__", new="5.0.0"):
126+
try:
127+
pyarrow = self._call_try_import(raise_if_error=True)
128+
self.assertIsNotNone(pyarrow)
129+
except LegacyPyarrowError: # pragma: NO COVER
130+
self.fail("Legacy error raised with a non-legacy dependency version.")
131+
132+
def test_try_import_returns_none_w_legacy_pyarrow(self):
133+
with mock.patch("pyarrow.__version__", new="2.0.0"):
134+
pyarrow = self._call_try_import()
135+
self.assertIsNone(pyarrow)
136+
137+
def test_try_import_raises_error_w_legacy_pyarrow(self):
138+
from google.cloud.bigquery.exceptions import LegacyPyarrowError
139+
140+
with mock.patch("pyarrow.__version__", new="2.0.0"):
141+
with self.assertRaises(LegacyPyarrowError):
142+
self._call_try_import(raise_if_error=True)
143+
144+
def test_installed_version_returns_cached(self):
145+
versions = self._object_under_test()
146+
versions._installed_version = object()
147+
assert versions.installed_version is versions._installed_version
148+
149+
def test_installed_version_returns_parsed_version(self):
150+
versions = self._object_under_test()
151+
152+
with mock.patch("pyarrow.__version__", new="1.2.3"):
153+
version = versions.installed_version
154+
155+
assert version.major == 1
156+
assert version.minor == 2
157+
assert version.micro == 3
158+
159+
92160
class Test_not_null(unittest.TestCase):
93161
def _call_fut(self, value, field):
94162
from google.cloud.bigquery._helpers import _not_null

tests/unit/test__pandas_helpers.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,6 @@
2929
import pandas.testing
3030
except ImportError: # pragma: NO COVER
3131
pandas = None
32-
try:
33-
import pyarrow
34-
import pyarrow.types
35-
except ImportError: # pragma: NO COVER
36-
# Mock out pyarrow when missing, because methods from pyarrow.types are
37-
# used in test parameterization.
38-
pyarrow = mock.Mock()
3932
try:
4033
import geopandas
4134
except ImportError: # pragma: NO COVER
@@ -44,9 +37,19 @@
4437
import pytest
4538

4639
from google import api_core
40+
from google.cloud.bigquery import exceptions
4741
from google.cloud.bigquery import _helpers
4842
from google.cloud.bigquery import schema
4943

44+
45+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
46+
if pyarrow:
47+
import pyarrow.types
48+
else: # pragma: NO COVER
49+
# Mock out pyarrow when missing, because methods from pyarrow.types are
50+
# used in test parameterization.
51+
pyarrow = mock.Mock()
52+
5053
try:
5154
from google.cloud import bigquery_storage
5255

@@ -1120,15 +1123,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
11201123

11211124
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
11221125
def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch):
1123-
monkeypatch.setattr(module_under_test, "pyarrow", None)
1124-
with pytest.raises(ValueError) as exc_context:
1126+
mock_pyarrow_import = mock.Mock()
1127+
mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError(
1128+
"pyarrow not installed"
1129+
)
1130+
monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import)
1131+
1132+
with pytest.raises(exceptions.LegacyPyarrowError):
11251133
module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None)
1126-
assert "pyarrow is required" in str(exc_context.value)
11271134

11281135

11291136
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
11301137
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
1131-
def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch):
1138+
def test_dataframe_to_parquet_w_extra_fields(module_under_test):
11321139
with pytest.raises(ValueError) as exc_context:
11331140
module_under_test.dataframe_to_parquet(
11341141
pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None

tests/unit/test_table.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,18 @@
4545
except (ImportError, AttributeError): # pragma: NO COVER
4646
geopandas = None
4747

48-
try:
49-
import pyarrow
50-
import pyarrow.types
51-
except ImportError: # pragma: NO COVER
52-
pyarrow = None
53-
5448
try:
5549
from tqdm import tqdm
5650
except (ImportError, AttributeError): # pragma: NO COVER
5751
tqdm = None
5852

5953
from google.cloud.bigquery.dataset import DatasetReference
54+
from google.cloud.bigquery import _helpers
55+
56+
57+
pyarrow = _helpers.PYARROW_VERSIONS.try_import()
58+
if pyarrow:
59+
import pyarrow.types
6060

6161

6262
def _mock_client():

0 commit comments

Comments
 (0)