Skip to content

Commit 2cb3563

Browse files
busunkim96gcf-owl-bot[bot]dinagravestswast
authored
fix: remove pytz dependency and require pyarrow>=3.0.0 (#875)
* fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Dina Graves Portman <[email protected]> Co-authored-by: Tim Swast <[email protected]>
1 parent cd21df1 commit 2cb3563

12 files changed

+78
-123
lines changed

docs/snippets.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete):
363363

364364
# [START bigquery_update_table_expiration]
365365
import datetime
366-
import pytz
367366

368367
# from google.cloud import bigquery
369368
# client = bigquery.Client()
@@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete):
375374
assert table.expires is None
376375

377376
# set table to expire 5 days from now
378-
expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5)
377+
expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
378+
days=5
379+
)
379380
table.expires = expiration
380381
table = client.update_table(table, ["expires"]) # API request
381382

google/cloud/bigquery/_pandas_helpers.py

+4-13
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
import queue
2121
import warnings
2222

23-
from packaging import version
24-
2523
try:
2624
import pandas
2725
except ImportError: # pragma: NO COVER
@@ -110,6 +108,7 @@ def pyarrow_timestamp():
110108
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
111109
# When modifying it be sure to update it there as well.
112110
BQ_TO_ARROW_SCALARS = {
111+
"BIGNUMERIC": pyarrow_bignumeric,
113112
"BOOL": pyarrow.bool_,
114113
"BOOLEAN": pyarrow.bool_,
115114
"BYTES": pyarrow.binary,
@@ -146,23 +145,15 @@ def pyarrow_timestamp():
146145
pyarrow.date64().id: "DATETIME", # because millisecond resolution
147146
pyarrow.binary().id: "BYTES",
148147
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
149-
# The exact scale and precision don't matter, see below.
150-
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
151-
}
152-
153-
if version.parse(pyarrow.__version__) >= version.parse("3.0.0"):
154-
BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
155148
# The exact decimal's scale and precision are not important, as only
156149
# the type ID matters, and it's the same for all decimal256 instances.
157-
ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
158-
_BIGNUMERIC_SUPPORT = True
159-
else:
160-
_BIGNUMERIC_SUPPORT = False
150+
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
151+
pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
152+
}
161153

162154
else: # pragma: NO COVER
163155
BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER
164156
ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER
165-
_BIGNUMERIC_SUPPORT = False # pragma: NO COVER
166157

167158

168159
def bq_to_arrow_struct_data_type(field):

google/cloud/bigquery/table.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import datetime
2121
import functools
2222
import operator
23-
import pytz
2423
import typing
2524
from typing import Any, Dict, Iterable, Iterator, Optional, Tuple
2625
import warnings
@@ -1969,7 +1968,7 @@ def to_dataframe(
19691968
# Pandas, we set the timestamp_as_object parameter to True, if necessary.
19701969
types_to_check = {
19711970
pyarrow.timestamp("us"),
1972-
pyarrow.timestamp("us", tz=pytz.UTC),
1971+
pyarrow.timestamp("us", tz=datetime.timezone.utc),
19731972
}
19741973

19751974
for column in record_batch:

samples/client_query_w_timestamp_params.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def client_query_w_timestamp_params():
1818
# [START bigquery_query_params_timestamps]
1919
import datetime
2020

21-
import pytz
2221
from google.cloud import bigquery
2322

2423
# Construct a BigQuery client object.
@@ -30,7 +29,7 @@ def client_query_w_timestamp_params():
3029
bigquery.ScalarQueryParameter(
3130
"ts_value",
3231
"TIMESTAMP",
33-
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
32+
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc),
3433
)
3534
]
3635
)

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@
5454
# grpc.Channel.close() method isn't added until 1.32.0.
5555
# https://github.com/grpc/grpc/pull/15254
5656
"grpcio >= 1.38.1, < 2.0dev",
57-
"pyarrow >= 1.0.0, < 6.0dev",
57+
"pyarrow >= 3.0.0, < 6.0dev",
5858
],
59-
"pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"],
59+
"pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"],
6060
"bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
6161
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
6262
"opentelemetry": [

testing/constraints-3.6.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0
1616
pandas==0.23.0
1717
proto-plus==1.10.0
1818
protobuf==3.12.0
19-
pyarrow==1.0.0
19+
pyarrow==3.0.0
2020
requests==2.18.0
2121
six==1.13.0
2222
tqdm==4.7.4

tests/system/test_client.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import psutil
3131
import pytest
3232

33-
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
3433
from . import helpers
3534

3635
try:
@@ -1972,15 +1971,12 @@ def test_query_w_query_params(self):
19721971
"expected": {"friends": [phred_name, bharney_name]},
19731972
"query_parameters": [with_friends_param],
19741973
},
1974+
{
1975+
"sql": "SELECT @bignum_param",
1976+
"expected": bignum,
1977+
"query_parameters": [bignum_param],
1978+
},
19751979
]
1976-
if _BIGNUMERIC_SUPPORT:
1977-
examples.append(
1978-
{
1979-
"sql": "SELECT @bignum_param",
1980-
"expected": bignum,
1981-
"query_parameters": [bignum_param],
1982-
}
1983-
)
19841980

19851981
for example in examples:
19861982
jconfig = QueryJobConfig()

tests/system/test_pandas.py

+18-24
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@
2424
import google.api_core.retry
2525
import pkg_resources
2626
import pytest
27-
import pytz
2827

2928
from google.cloud import bigquery
30-
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
3129
from . import helpers
3230

3331

@@ -64,7 +62,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i
6462
datetime.datetime(2012, 3, 14, 15, 16),
6563
],
6664
dtype="datetime64[ns]",
67-
).dt.tz_localize(pytz.utc),
65+
).dt.tz_localize(datetime.timezone.utc),
6866
),
6967
(
7068
"dt_col",
@@ -189,12 +187,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
189187
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
190188
bigquery.SchemaField("int_col", "INTEGER"),
191189
bigquery.SchemaField("num_col", "NUMERIC"),
190+
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
192191
bigquery.SchemaField("str_col", "STRING"),
193192
bigquery.SchemaField("time_col", "TIME"),
194193
bigquery.SchemaField("ts_col", "TIMESTAMP"),
195194
)
196-
if _BIGNUMERIC_SUPPORT:
197-
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)
198195

199196
table_schema = scalars_schema + (
200197
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
@@ -216,12 +213,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
216213
("geo_col", nulls),
217214
("int_col", nulls),
218215
("num_col", nulls),
216+
("bignum_col", nulls),
219217
("str_col", nulls),
220218
("time_col", nulls),
221219
("ts_col", nulls),
222220
]
223-
if _BIGNUMERIC_SUPPORT:
224-
df_data.append(("bignum_col", nulls))
225221
df_data = collections.OrderedDict(df_data)
226222
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
227223

@@ -297,12 +293,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
297293
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
298294
bigquery.SchemaField("int_col", "INTEGER"),
299295
bigquery.SchemaField("num_col", "NUMERIC"),
296+
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
300297
bigquery.SchemaField("str_col", "STRING"),
301298
bigquery.SchemaField("time_col", "TIME"),
302299
bigquery.SchemaField("ts_col", "TIMESTAMP"),
303300
)
304-
if _BIGNUMERIC_SUPPORT:
305-
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)
306301

307302
table_schema = scalars_schema + (
308303
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
@@ -340,6 +335,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
340335
decimal.Decimal("99999999999999999999999999999.999999999"),
341336
],
342337
),
338+
(
339+
"bignum_col",
340+
[
341+
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
342+
None,
343+
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
344+
],
345+
),
343346
("str_col", ["abc", None, "def"]),
344347
(
345348
"time_col",
@@ -348,23 +351,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
348351
(
349352
"ts_col",
350353
[
351-
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
354+
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
352355
None,
353-
datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc),
356+
datetime.datetime(
357+
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
358+
),
354359
],
355360
),
356361
]
357-
if _BIGNUMERIC_SUPPORT:
358-
df_data.append(
359-
(
360-
"bignum_col",
361-
[
362-
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
363-
None,
364-
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
365-
],
366-
)
367-
)
368362
df_data = collections.OrderedDict(df_data)
369363
dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
370364

@@ -484,10 +478,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(
484478
(
485479
"ts_col",
486480
[
487-
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
481+
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
488482
None,
489483
datetime.datetime(
490-
9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc
484+
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
491485
),
492486
],
493487
),

tests/unit/job/test_base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,11 @@ def test_user_email(self):
295295
@staticmethod
296296
def _datetime_and_millis():
297297
import datetime
298-
import pytz
299298
from google.cloud._helpers import _millis
300299

301300
now = datetime.datetime.utcnow().replace(
302-
microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision
301+
microsecond=123000,
302+
tzinfo=datetime.timezone.utc, # stats timestamps have ms precision
303303
)
304304
return now, _millis(now)
305305

0 commit comments

Comments
 (0)