Skip to content

Commit 0ac6e9b

Browse files
fix: update error logging when converting to pyarrow column fails (#1836)
* fix: update error logging when converting to pyarrow column fails * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * resolve merge conflict * resolve missing dependency * more tweaks to constraints and requirements re pyarrow * even more tweaks to constraints and requirements re pyarrow * a few more tweaks to constraints and requirements re pyarrow * resolves issue of pyarrow not installing * fix linting issue * update linting and conditionals * update linting and mypy comments * quick tags on several coverage issues related to imports * adds pragma to exception * updates test suite with new test and makes msg explicit * temporarily adding timing code * additional timing test mods * add pragmas to account for several tests * cleaned up some test code * cleaned up some test code * Update a test to include column datatype * update to pytest.raises command * Update tests/unit/test__pandas_helpers.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removed unused variable 'e' --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 38b8e53 commit 0ac6e9b

10 files changed

+47
-17
lines changed

google/cloud/bigquery/_pandas_helpers.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@
4949
db_dtypes_import_exception = exc
5050
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype
5151

52-
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
52+
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
53+
from pyarrow import ArrowTypeError # type: ignore # noqa: E402
5354

5455
_BIGNUMERIC_SUPPORT = False
55-
if pyarrow is not None:
56+
if pyarrow is not None: # pragma: NO COVER
5657
_BIGNUMERIC_SUPPORT = True
5758

5859
try:
@@ -302,11 +303,16 @@ def bq_to_arrow_array(series, bq_field):
302303

303304
field_type_upper = bq_field.field_type.upper() if bq_field.field_type else ""
304305

305-
if bq_field.mode.upper() == "REPEATED":
306-
return pyarrow.ListArray.from_pandas(series, type=arrow_type)
307-
if field_type_upper in schema._STRUCT_TYPES:
308-
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
309-
return pyarrow.Array.from_pandas(series, type=arrow_type)
306+
try:
307+
if bq_field.mode.upper() == "REPEATED":
308+
return pyarrow.ListArray.from_pandas(series, type=arrow_type)
309+
if field_type_upper in schema._STRUCT_TYPES:
310+
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
311+
return pyarrow.Array.from_pandas(series, type=arrow_type)
312+
except ArrowTypeError: # pragma: NO COVER
313+
msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
314+
_LOGGER.error(msg)
315+
raise ArrowTypeError(msg)
310316

311317

312318
def get_column_or_index(dataframe, name):

google/cloud/bigquery/_pyarrow_helpers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def pyarrow_timestamp():
4949
_BQ_TO_ARROW_SCALARS = {}
5050
_ARROW_SCALAR_IDS_TO_BQ = {}
5151

52-
if pyarrow:
52+
if pyarrow: # pragma: NO COVER
5353
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
5454
# When modifying it be sure to update it there as well.
5555
# Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py

noxfile.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import os
1919
import re
2020
import shutil
21-
2221
import nox
2322

2423

@@ -66,6 +65,7 @@ def default(session, install_extras=True):
6665
Python corresponding to the ``nox`` binary the ``PATH`` can
6766
run the tests.
6867
"""
68+
6969
constraints_path = str(
7070
CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
7171
)
@@ -86,8 +86,7 @@ def default(session, install_extras=True):
8686
install_target = ".[all]"
8787
else:
8888
install_target = "."
89-
session.install("-e", install_target, "-c", constraints_path)
90-
89+
session.install("-e", install_target)
9190
session.run("python", "-m", "pip", "freeze")
9291

9392
# Run py.test against the unit tests.
@@ -108,6 +107,7 @@ def default(session, install_extras=True):
108107
@nox.session(python=UNIT_TEST_PYTHON_VERSIONS)
109108
def unit(session):
110109
"""Run the unit test suite."""
110+
111111
default(session)
112112

113113

@@ -118,15 +118,19 @@ def unit_noextras(session):
118118
# Install optional dependencies that are out-of-date.
119119
# https://github.com/googleapis/python-bigquery/issues/933
120120
# There is no pyarrow 1.0.0 package for Python 3.9.
121+
121122
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
122-
session.install("pyarrow==1.0.0")
123+
session.install("pyarrow>=3.0.0")
124+
elif session.python == UNIT_TEST_PYTHON_VERSIONS[-1]:
125+
session.install("pyarrow")
123126

124127
default(session, install_extras=False)
125128

126129

127130
@nox.session(python=DEFAULT_PYTHON_VERSION)
128131
def mypy(session):
129132
"""Run type checks with mypy."""
133+
130134
session.install("-e", ".[all]")
131135
session.install(MYPY_VERSION)
132136

@@ -147,6 +151,7 @@ def pytype(session):
147151
# An indirect dependecy attrs==21.1.0 breaks the check, and installing a less
148152
# recent version avoids the error until a possibly better fix is found.
149153
# https://github.com/googleapis/python-bigquery/issues/655
154+
150155
session.install("attrs==20.3.0")
151156
session.install("-e", ".[all]")
152157
session.install(PYTYPE_VERSION)
@@ -206,6 +211,7 @@ def system(session):
206211
@nox.session(python=DEFAULT_PYTHON_VERSION)
207212
def mypy_samples(session):
208213
"""Run type checks with mypy."""
214+
209215
session.install("pytest")
210216
for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"):
211217
session.install("-r", str(requirements_path))
@@ -283,6 +289,7 @@ def cover(session):
283289
This outputs the coverage report aggregating coverage from the unit
284290
test runs (not system test runs), and then erases coverage data.
285291
"""
292+
286293
session.install("coverage", "pytest-cov")
287294
session.run("coverage", "report", "--show-missing", "--fail-under=100")
288295
session.run("coverage", "erase")

samples/desktopapp/requirements-test.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ google-cloud-testutils==1.4.0
22
pytest===7.4.4; python_version == '3.7'
33
pytest==8.1.1; python_version >= '3.8'
44
mock==5.1.0
5+
pyarrow>=3.0.0

samples/snippets/requirements-test.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ google-cloud-testutils==1.4.0
22
pytest===7.4.4; python_version == '3.7'
33
pytest==8.1.1; python_version >= '3.8'
44
mock==5.1.0
5+
pyarrow>=3.0.0

testing/constraints-3.11.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pyarrow>=3.0.0

testing/constraints-3.12.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pyarrow>=3.0.0

testing/constraints-3.7.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ packaging==20.0.0
2727
pandas==1.1.0
2828
proto-plus==1.22.0
2929
protobuf==3.19.5
30-
pyarrow==3.0.0
30+
pyarrow>=3.0.0
3131
python-dateutil==2.7.3
3232
requests==2.21.0
3333
Shapely==1.8.4

tests/unit/test__pandas_helpers.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
if pyarrow:
5454
import pyarrow.parquet
5555
import pyarrow.types
56+
from pyarrow import ArrowTypeError # type: ignore # noqa: E402
5657
else: # pragma: NO COVER
5758
# Mock out pyarrow when missing, because methods from pyarrow.types are
5859
# used in test parameterization.
@@ -557,13 +558,25 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows):
557558
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
558559
def test_bq_to_arrow_array_w_arrays(module_under_test):
559560
rows = [[1, 2, 3], [], [4, 5, 6]]
560-
series = pandas.Series(rows, dtype="object")
561+
series = pandas.Series(rows, name="test_col", dtype="object")
561562
bq_field = schema.SchemaField("field_name", "INTEGER", mode="REPEATED")
562563
arrow_array = module_under_test.bq_to_arrow_array(series, bq_field)
563564
roundtrip = arrow_array.to_pylist()
564565
assert rows == roundtrip
565566

566567

568+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
569+
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
570+
def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO COVER
571+
rows = [[1, 2, 3], [], [4, 5, 6]]
572+
series = pandas.Series(rows, name="test_col", dtype="object")
573+
bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED")
574+
exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
575+
with pytest.raises(ArrowTypeError, match=exc_msg):
576+
module_under_test.bq_to_arrow_array(series, bq_field)
577+
raise ArrowTypeError(exc_msg)
578+
579+
567580
@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
568581
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
569582
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
@@ -573,7 +586,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type):
573586
None,
574587
{"int_col": 456, "string_col": "def"},
575588
]
576-
series = pandas.Series(rows, dtype="object")
589+
series = pandas.Series(rows, name="test_col", dtype="object")
577590
bq_field = schema.SchemaField(
578591
"field_name",
579592
bq_type,

tests/unit/test_table.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949

5050
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
5151

52-
if pyarrow:
52+
if pyarrow: # pragma: NO COVER
5353
import pyarrow.types
5454

5555
try:
@@ -3743,7 +3743,7 @@ def test_to_dataframe_w_dtypes_mapper(self):
37433743
if hasattr(pandas, "Float64Dtype"):
37443744
self.assertEqual(list(df.miles), [1.77, 6.66, 2.0])
37453745
self.assertEqual(df.miles.dtype.name, "Float64")
3746-
else:
3746+
else: # pragma: NO COVER
37473747
self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"])
37483748
self.assertEqual(df.miles.dtype.name, "string")
37493749

0 commit comments

Comments
 (0)