Skip to content

Commit cff210a

Browse files
authored
refactor(dependencies): pandas and numpy are now optional for non-backend installs (#9564)
1 parent 524a2fa commit cff210a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+794
-621
lines changed

.github/workflows/ibis-main.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,13 @@ jobs:
8080
- name: install ibis
8181
run: poetry install --without dev --without docs --extras "visualization decompiler"
8282

83-
- name: install pyarrow
83+
- name: install numpy/pandas/pyarrow
8484
if: matrix.pyarrow
85-
run: poetry run pip install pyarrow pyarrow-hotfix
85+
run: poetry run pip install numpy pandas pyarrow pyarrow-hotfix
8686

87-
- name: check pyarrow import
87+
- name: check imports
8888
if: matrix.pyarrow
89-
run: poetry run python -c 'import pyarrow, pyarrow_hotfix'
89+
run: poetry run python -c 'import numpy, pandas, pyarrow, pyarrow_hotfix'
9090

9191
- uses: extractions/setup-just@v2
9292
env:

.github/workflows/nix.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,19 @@ jobs:
5959
name: ibis
6060
authToken: ${{ secrets.CACHIX_AUTH_TOKEN }}
6161
extraPullNames: nix-community,poetry2nix
62-
- name: nix build and test
62+
- name: nix build and test core
6363
run: |
6464
set -euo pipefail
6565
6666
version='${{ matrix.python-version }}'
67-
nix build ".#ibis${version//./}" --fallback --keep-going --print-build-logs
67+
nix build ".#ibisCore${version//./}" --fallback --keep-going --print-build-logs
68+
69+
- name: nix build and test local
70+
run: |
71+
set -euo pipefail
72+
73+
version='${{ matrix.python-version }}'
74+
nix build ".#ibisLocal${version//./}" --fallback --keep-going --print-build-logs
6875
6976
- name: nix build devShell
7077
run: |

flake.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,9 @@
122122
in
123123
rec {
124124
packages = {
125-
inherit (pkgs) ibis310 ibis311 ibis312;
125+
inherit (pkgs) ibisCore310 ibisCore311 ibisCore312 ibisLocal310 ibisLocal311 ibisLocal312;
126126

127-
default = pkgs.ibis312;
127+
default = pkgs.ibisCore312;
128128

129129
inherit (pkgs) update-lock-files gen-examples check-release-notes-spelling;
130130
};

ibis/backends/bigquery/client.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
from __future__ import annotations
44

5+
import contextlib
56
import functools
67

8+
import dateutil.parser
79
import google.cloud.bigquery as bq
8-
import pandas as pd
910

1011
import ibis.common.exceptions as com
1112
import ibis.expr.datatypes as dt
@@ -69,9 +70,9 @@ def bq_param_array(dtype: dt.Array, value, name):
6970

7071
@bigquery_param.register
7172
def bq_param_timestamp(_: dt.Timestamp, value, name):
72-
# TODO(phillipc): Not sure if this is the correct way to do this.
73-
timestamp_value = pd.Timestamp(value, tz="UTC").to_pydatetime()
74-
return bq.ScalarQueryParameter(name, "TIMESTAMP", timestamp_value)
73+
with contextlib.suppress(TypeError):
74+
value = dateutil.parser.parse(value)
75+
return bq.ScalarQueryParameter(name, "TIMESTAMP", value.isoformat())
7576

7677

7778
@bigquery_param.register
@@ -96,9 +97,13 @@ def bq_param_boolean(_: dt.Boolean, value, name):
9697

9798
@bigquery_param.register
9899
def bq_param_date(_: dt.Date, value, name):
99-
return bq.ScalarQueryParameter(
100-
name, "DATE", pd.Timestamp(value).to_pydatetime().date()
101-
)
100+
with contextlib.suppress(TypeError):
101+
value = dateutil.parser.parse(value)
102+
103+
with contextlib.suppress(AttributeError):
104+
value = value.date()
105+
106+
return bq.ScalarQueryParameter(name, "DATE", value.isoformat())
102107

103108

104109
def rename_partitioned_column(table_expr, bq_table, partition_col):

ibis/backends/conftest.py

Lines changed: 4 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from typing import TYPE_CHECKING, Any
1111

1212
import _pytest
13-
import pandas as pd
1413
import pytest
1514
from packaging.requirements import Requirement
1615
from packaging.version import parse as vparse
@@ -571,7 +570,7 @@ def geo_df(geo):
571570

572571

573572
@pytest.fixture
574-
def temp_table(con) -> str:
573+
def temp_table(con):
575574
"""Return a temporary table name.
576575
577576
Parameters
@@ -590,7 +589,7 @@ def temp_table(con) -> str:
590589

591590

592591
@pytest.fixture
593-
def temp_table2(con) -> str:
592+
def temp_table2(con):
594593
name = util.gen_name("temp_table2")
595594
yield name
596595
with contextlib.suppress(NotImplementedError):
@@ -606,7 +605,7 @@ def temp_table_orig(con, temp_table):
606605

607606

608607
@pytest.fixture
609-
def temp_view(ddl_con) -> str:
608+
def temp_view(ddl_con):
610609
"""Return a temporary view name.
611610
612611
Parameters
@@ -625,7 +624,7 @@ def temp_view(ddl_con) -> str:
625624

626625

627626
@pytest.fixture
628-
def alternate_current_database(ddl_con, ddl_backend) -> str:
627+
def alternate_current_database(ddl_con, ddl_backend):
629628
"""Create a temporary database and yield its name. Drops the created
630629
database upon completion.
631630
@@ -648,48 +647,6 @@ def alternate_current_database(ddl_con, ddl_backend) -> str:
648647
ddl_con.drop_database(name, force=True)
649648

650649

651-
@pytest.fixture
652-
def test_employee_schema() -> ibis.schema:
653-
sch = ibis.schema(
654-
[
655-
("first_name", "string"),
656-
("last_name", "string"),
657-
("department_name", "string"),
658-
("salary", "float64"),
659-
]
660-
)
661-
662-
return sch
663-
664-
665-
@pytest.fixture
666-
def test_employee_data_1():
667-
df = pd.DataFrame(
668-
{
669-
"first_name": ["A", "B", "C"],
670-
"last_name": ["D", "E", "F"],
671-
"department_name": ["AA", "BB", "CC"],
672-
"salary": [100.0, 200.0, 300.0],
673-
}
674-
)
675-
676-
return df
677-
678-
679-
@pytest.fixture
680-
def test_employee_data_2():
681-
df2 = pd.DataFrame(
682-
{
683-
"first_name": ["X", "Y", "Z"],
684-
"last_name": ["A", "B", "C"],
685-
"department_name": ["XX", "YY", "ZZ"],
686-
"salary": [400.0, 500.0, 600.0],
687-
}
688-
)
689-
690-
return df2
691-
692-
693650
@pytest.fixture
694651
def assert_sql(con, snapshot):
695652
def checker(expr, file_name="out.sql"):

ibis/backends/duckdb/converter.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
from __future__ import annotations
22

3-
import numpy as np
4-
53
from ibis.formats.pandas import PandasData
64

75

86
class DuckDBPandasData(PandasData):
97
@staticmethod
108
def convert_Array(s, dtype, pandas_type):
11-
return s.replace(np.nan, None)
9+
return s.replace(float("nan"), None)

ibis/backends/duckdb/tests/test_client.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,11 +266,13 @@ def test_connect_duckdb(url, tmp_path):
266266
@pytest.mark.parametrize(
267267
"out_method, extension", [("to_csv", "csv"), ("to_parquet", "parquet")]
268268
)
269-
def test_connect_local_file(out_method, extension, test_employee_data_1, tmp_path):
270-
getattr(test_employee_data_1, out_method)(tmp_path / f"out.{extension}")
269+
def test_connect_local_file(out_method, extension, tmp_path):
270+
df = pd.DataFrame({"a": [1, 2, 3]})
271+
path = tmp_path / f"out.{extension}"
272+
getattr(df, out_method)(path)
271273
with pytest.warns(FutureWarning, match="v9.1"):
272274
# ibis.connect uses con.register
273-
con = ibis.connect(tmp_path / f"out.{extension}")
275+
con = ibis.connect(path)
274276
t = next(iter(con.tables.values()))
275277
assert not t.head().execute().empty
276278

ibis/backends/mysql/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from typing import TYPE_CHECKING, Any
1111
from urllib.parse import unquote_plus
1212

13-
import numpy as np
1413
import pymysql
1514
import sqlglot as sg
1615
import sqlglot.expressions as sge
@@ -509,7 +508,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
509508

510509
df = op.data.to_frame()
511510
# nan can not be used with MySQL
512-
df = df.replace(np.nan, None)
511+
df = df.replace(float("nan"), None)
513512

514513
data = df.itertuples(index=False)
515514
sql = self._build_insert_template(

ibis/backends/oracle/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from typing import TYPE_CHECKING, Any
1212
from urllib.parse import unquote_plus
1313

14-
import numpy as np
1514
import oracledb
1615
import sqlglot as sg
1716
import sqlglot.expressions as sge
@@ -534,7 +533,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
534533
properties=sge.Properties(expressions=[sge.TemporaryProperty()]),
535534
).sql(self.name)
536535

537-
data = op.data.to_frame().replace({np.nan: None})
536+
data = op.data.to_frame().replace(float("nan"), None)
538537
insert_stmt = self._build_insert_template(
539538
name, schema=schema, placeholder=":{i:d}"
540539
)

ibis/backends/polars/compiler.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
from functools import partial, reduce, singledispatch
99
from math import isnan
1010

11-
import numpy as np
12-
import pandas as pd
1311
import polars as pl
1412

1513
import ibis.common.exceptions as com
@@ -834,7 +832,7 @@ def count_star(op, **kw):
834832

835833
@translate.register(ops.TimestampNow)
836834
def timestamp_now(op, **_):
837-
return pl.lit(pd.Timestamp("now", tz="UTC").tz_localize(None))
835+
return pl.lit(datetime.datetime.now())
838836

839837

840838
@translate.register(ops.DateNow)
@@ -1175,12 +1173,12 @@ def elementwise_udf(op, **kw):
11751173

11761174
@translate.register(ops.E)
11771175
def execute_e(op, **_):
1178-
return pl.lit(np.e)
1176+
return pl.lit(math.e)
11791177

11801178

11811179
@translate.register(ops.Pi)
11821180
def execute_pi(op, **_):
1183-
return pl.lit(np.pi)
1181+
return pl.lit(math.pi)
11841182

11851183

11861184
@translate.register(ops.Time)

ibis/backends/postgres/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
from typing import TYPE_CHECKING, Any
99
from urllib.parse import unquote_plus
1010

11-
import numpy as np
12-
import pandas as pd
1311
import sqlglot as sg
1412
import sqlglot.expressions as sge
1513
from pandas.api.types import is_float_dtype
@@ -139,7 +137,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
139137
convert_df = df.convert_dtypes()
140138
for col in convert_df.columns:
141139
if not is_float_dtype(convert_df[col]):
142-
df[col] = df[col].replace(np.nan, None)
140+
df[col] = df[col].replace(float("nan"), None)
143141

144142
data = df.itertuples(index=False)
145143
sql = self._build_insert_template(

ibis/backends/sqlite/converter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from __future__ import annotations
22

33
import pandas as pd
4+
from packaging.version import parse as vparse
45

56
from ibis.formats.pandas import PandasData
67

78
# The "mixed" format was added in pandas 2
8-
_DATETIME_FORMAT = "mixed" if pd.__version__ >= "2.0.0" else None
9+
_DATETIME_FORMAT = "mixed" if vparse(pd.__version__) >= vparse("2.0.0") else None
910

1011

1112
class SQLitePandasData(PandasData):

ibis/backends/tests/base.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
from pathlib import Path
99
from typing import TYPE_CHECKING, Any, Literal
1010

11-
import numpy as np
12-
import pandas as pd
13-
import pandas.testing as tm
1411
import pytest
1512
from filelock import FileLock
1613

@@ -22,6 +19,10 @@
2219

2320
PYTHON_SHORT_VERSION = f"{sys.version_info.major}{sys.version_info.minor}"
2421

22+
np = pytest.importorskip("numpy")
23+
pd = pytest.importorskip("pandas")
24+
tm = pytest.importorskip("pandas.testing")
25+
2526

2627
class BackendTest(abc.ABC):
2728
"""

ibis/backends/tests/data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from __future__ import annotations
22

3-
import numpy as np
4-
import pandas as pd
53
import pytest
64

5+
np = pytest.importorskip("numpy")
6+
pd = pytest.importorskip("pandas")
77
pa = pytest.importorskip("pyarrow")
88

99
array_types = pd.DataFrame(

ibis/backends/tests/test_aggregation.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
from datetime import date
44
from operator import methodcaller
55

6-
import numpy as np
7-
import pandas as pd
86
import pytest
97
from pytest import param
108

@@ -32,6 +30,9 @@
3230
)
3331
from ibis.legacy.udf.vectorized import reduction
3432

33+
np = pytest.importorskip("numpy")
34+
pd = pytest.importorskip("pandas")
35+
3536
with pytest.warns(FutureWarning, match="v9.0"):
3637

3738
@reduction(input_type=[dt.double], output_type=dt.double)

ibis/backends/tests/test_array.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
from datetime import datetime
77
from functools import partial
88

9-
import numpy as np
10-
import pandas as pd
11-
import pandas.testing as tm
129
import pytest
1310
import pytz
1411
import toolz
@@ -35,6 +32,10 @@
3532
)
3633
from ibis.common.collections import frozendict
3734

35+
np = pytest.importorskip("numpy")
36+
pd = pytest.importorskip("pandas")
37+
tm = pytest.importorskip("pandas.testing")
38+
3839
pytestmark = [
3940
pytest.mark.never(
4041
["sqlite", "mysql", "exasol"], reason="No array support", raises=Exception

0 commit comments

Comments
 (0)