Skip to content

Commit ea8519c

Browse files
Avoid importing pandas and numpy in runtime and module level (#33483)
* Avoid importing pandas and numpy in runtime, and import them in the methods which use them instead of the module * fix salesforce tests
1 parent 996d8c5 commit ea8519c

File tree

9 files changed

+44
-26
lines changed

9 files changed

+44
-26
lines changed

airflow/providers/apache/hive/hooks/hive.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,12 @@
2626
import warnings
2727
from collections import OrderedDict
2828
from tempfile import NamedTemporaryFile, TemporaryDirectory
29-
from typing import Any, Iterable, Mapping
29+
from typing import TYPE_CHECKING, Any, Iterable, Mapping
3030

3131
from airflow.exceptions import AirflowProviderDeprecationWarning
3232

33-
try:
33+
if TYPE_CHECKING:
3434
import pandas as pd
35-
except ImportError as e:
36-
from airflow.exceptions import AirflowOptionalProviderFeatureException
37-
38-
raise AirflowOptionalProviderFeatureException(e)
3935

4036
import csv
4137

@@ -1055,6 +1051,13 @@ def get_pandas_df( # type: ignore
10551051
10561052
:return: pandas.DateFrame
10571053
"""
1054+
try:
1055+
import pandas as pd
1056+
except ImportError as e:
1057+
from airflow.exceptions import AirflowOptionalProviderFeatureException
1058+
1059+
raise AirflowOptionalProviderFeatureException(e)
1060+
10581061
res = self.get_results(sql, schema=schema, hive_conf=hive_conf)
10591062
df = pd.DataFrame(res["data"], columns=[c[0] for c in res["header"]], **kwargs)
10601063
return df

airflow/providers/exasol/hooks/exasol.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@
1818
from __future__ import annotations
1919

2020
from contextlib import closing
21-
from typing import Any, Callable, Iterable, Mapping, Sequence, TypeVar, overload
21+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Mapping, Sequence, TypeVar, overload
2222

23-
import pandas as pd
2423
import pyexasol
2524
from pyexasol import ExaConnection, ExaStatement
2625

2726
from airflow.providers.common.sql.hooks.sql import DbApiHook, return_single_query_results
2827

28+
if TYPE_CHECKING:
29+
import pandas as pd
30+
2931
T = TypeVar("T")
3032

3133

airflow/providers/google/cloud/hooks/bigquery.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@
2828
import warnings
2929
from copy import deepcopy
3030
from datetime import datetime, timedelta
31-
from typing import Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
31+
from typing import TYPE_CHECKING, Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
3232

33-
import pandas as pd
3433
from aiohttp import ClientSession as ClientSession
3534
from gcloud.aio.bigquery import Job, Table as Table_async
3635
from google.api_core.page_iterator import HTTPIterator
@@ -69,6 +68,9 @@
6968
from airflow.utils.helpers import convert_camel_to_snake
7069
from airflow.utils.log.logging_mixin import LoggingMixin
7170

71+
if TYPE_CHECKING:
72+
import pandas as pd
73+
7274
log = logging.getLogger(__name__)
7375

7476
BigQueryJob = Union[CopyJob, QueryJob, LoadJob, ExtractJob]

airflow/providers/influxdb/hooks/influxdb.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
"""
2525
from __future__ import annotations
2626

27-
import pandas as pd
27+
from typing import TYPE_CHECKING
28+
2829
from influxdb_client import InfluxDBClient
2930
from influxdb_client.client.flux_table import FluxTable
3031
from influxdb_client.client.write.point import Point
@@ -33,6 +34,9 @@
3334
from airflow.hooks.base import BaseHook
3435
from airflow.models import Connection
3536

37+
if TYPE_CHECKING:
38+
import pandas as pd
39+
3640

3741
class InfluxDBHook(BaseHook):
3842
"""Interact with InfluxDB.

airflow/providers/oracle/hooks/oracle.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,6 @@
2424
import oracledb
2525

2626
from airflow.exceptions import AirflowProviderDeprecationWarning
27-
28-
try:
29-
import numpy
30-
except ImportError:
31-
numpy = None # type: ignore
32-
3327
from airflow.providers.common.sql.hooks.sql import DbApiHook
3428

3529
PARAM_TYPES = {bool, float, int, str}
@@ -280,6 +274,11 @@ def insert_rows(
280274
Set 1 to insert each row in each single transaction
281275
:param replace: Whether to replace instead of insert
282276
"""
277+
try:
278+
import numpy
279+
except ImportError:
280+
numpy = None # type: ignore
281+
283282
if target_fields:
284283
target_fields = ", ".join(target_fields)
285284
target_fields = f"({target_fields})"

airflow/providers/salesforce/hooks/salesforce.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,16 @@
2626
import logging
2727
import time
2828
from functools import cached_property
29-
from typing import Any, Iterable
29+
from typing import TYPE_CHECKING, Any, Iterable
3030

31-
import pandas as pd
3231
from requests import Session
3332
from simple_salesforce import Salesforce, api
3433

3534
from airflow.hooks.base import BaseHook
3635

36+
if TYPE_CHECKING:
37+
import pandas as pd
38+
3739
log = logging.getLogger(__name__)
3840

3941

@@ -240,6 +242,8 @@ def _to_timestamp(cls, column: pd.Series) -> pd.Series:
240242
# between 0 and 10 are turned into timestamps
241243
# if the column cannot be converted,
242244
# just return the original column untouched
245+
import pandas as pd
246+
243247
try:
244248
column = pd.to_datetime(column)
245249
except ValueError:
@@ -355,6 +359,8 @@ def object_to_df(
355359
to the resulting data that marks when the data was fetched from Salesforce. Default: False
356360
:return: the dataframe.
357361
"""
362+
import pandas as pd
363+
358364
# this line right here will convert all integers to floats
359365
# if there are any None/np.nan values in the column
360366
# that's because None/np.nan cannot exist in an integer column

airflow/providers/slack/transfers/sql_to_slack.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from tempfile import NamedTemporaryFile
2020
from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence
2121

22-
import pandas as pd
2322
from tabulate import tabulate
2423

2524
from airflow.exceptions import AirflowException
@@ -31,6 +30,8 @@
3130
from airflow.providers.slack.utils import parse_filename
3231

3332
if TYPE_CHECKING:
33+
import pandas as pd
34+
3435
from airflow.utils.context import Context
3536

3637

tests/providers/salesforce/hooks/test_salesforce.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def test_write_object_to_file_invalid_format(self):
338338
self.salesforce_hook.write_object_to_file(query_results=[], filename="test", fmt="test")
339339

340340
@patch(
341-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
341+
"pandas.DataFrame.from_records",
342342
return_value=pd.DataFrame({"test": [1, 2, 3], "dict": [nan, nan, {"foo": "bar"}]}),
343343
)
344344
def test_write_object_to_file_csv(self, mock_data_frame):
@@ -360,7 +360,7 @@ def test_write_object_to_file_csv(self, mock_data_frame):
360360
return_value={"fields": [{"name": "field_1", "type": "date"}]},
361361
)
362362
@patch(
363-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
363+
"pandas.DataFrame.from_records",
364364
return_value=pd.DataFrame({"test": [1, 2, 3], "field_1": ["2019-01-01", "2019-01-02", "2019-01-03"]}),
365365
)
366366
def test_write_object_to_file_json_with_timestamp_conversion(self, mock_data_frame, mock_describe_object):
@@ -383,7 +383,7 @@ def test_write_object_to_file_json_with_timestamp_conversion(self, mock_data_fra
383383

384384
@patch("airflow.providers.salesforce.hooks.salesforce.time.time", return_value=1.23)
385385
@patch(
386-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
386+
"pandas.DataFrame.from_records",
387387
return_value=pd.DataFrame({"test": [1, 2, 3]}),
388388
)
389389
def test_write_object_to_file_ndjson_with_record_time(self, mock_data_frame, mock_time):
@@ -416,7 +416,7 @@ def test_write_object_to_file_ndjson_with_record_time(self, mock_data_frame, moc
416416
return_value={"fields": [{"name": "field_1", "type": "date"}]},
417417
)
418418
@patch(
419-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
419+
"pandas.DataFrame.from_records",
420420
return_value=pd.DataFrame({"test": [1, 2, 3], "field_1": ["2019-01-01", "2019-01-02", "2019-01-03"]}),
421421
)
422422
def test_object_to_df_with_timestamp_conversion(self, mock_data_frame, mock_describe_object):
@@ -434,7 +434,7 @@ def test_object_to_df_with_timestamp_conversion(self, mock_data_frame, mock_desc
434434

435435
@patch("airflow.providers.salesforce.hooks.salesforce.time.time", return_value=1.23)
436436
@patch(
437-
"airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
437+
"pandas.DataFrame.from_records",
438438
return_value=pd.DataFrame({"test": [1, 2, 3]}),
439439
)
440440
def test_object_to_df_with_record_time(self, mock_data_frame, mock_time):

tests/serialization/serializers/test_serializers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import decimal
2121

2222
import numpy
23-
import pandas as pd
2423
import pendulum.tz
2524
import pytest
2625
from pendulum import DateTime
@@ -94,6 +93,8 @@ def test_params(self):
9493
assert i["x"] == d["x"]
9594

9695
def test_pandas(self):
96+
import pandas as pd
97+
9798
i = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
9899
e = serialize(i)
99100
d = deserialize(e)

0 commit comments

Comments
 (0)