Skip to content

Commit 2e56acb

Browse files
mfatihaktascpcloud
andauthored
fix(datatypes): return pd.Timestamp or pd.Series[datetime64] for date.to_pandas() (#8784)
Co-authored-by: Phillip Cloud <[email protected]>
1 parent cfc9724 commit 2e56acb

File tree

12 files changed

+66
-51
lines changed

12 files changed

+66
-51
lines changed

docs/contribute/02_workflow.qmd

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,11 @@ you are going only up).
274274
```bash
275275
$ colima delete
276276
```
277+
278+
### `x86_64` or `amd64` based containers
279+
280+
While starting the containers based on `x86_64` / `amd64`, the architecture flag needs to be set in two places:
281+
1. Add `platform: linux/amd64` for the service in `compose.yaml`.
282+
2. Set the `--arch` flag while starting the VM `colima start --arch x86_64`
283+
284+
For instance, this step is necessary for the `oracle` service in `compose.yaml`. Otherwise, the container will fail shortly after getting started.

ibis/backends/oracle/converter.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
from __future__ import annotations
22

3-
import datetime
3+
import pandas as pd
44

55
from ibis.formats.pandas import PandasData
66

77

88
class OraclePandasData(PandasData):
99
@classmethod
1010
def convert_Timestamp_element(cls, dtype):
11-
return datetime.datetime.fromisoformat
11+
return pd.Timestamp.fromisoformat
1212

1313
@classmethod
1414
def convert_Date_element(cls, dtype):
15-
return datetime.date.fromisoformat
15+
return pd.Timestamp.fromisoformat
1616

1717
@classmethod
1818
def convert_Time_element(cls, dtype):
19-
return datetime.time.fromisoformat
19+
return pd.Timestamp.fromisoformat

ibis/backends/snowflake/converter.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from __future__ import annotations
22

3-
import datetime
43
import json
54
from typing import TYPE_CHECKING
65

6+
import pandas as pd
77
import pyarrow as pa
88

99
from ibis.formats.pandas import PandasData
@@ -52,15 +52,15 @@ def __arrow_ext_scalar_class__(self):
5252
class SnowflakePandasData(PandasData):
5353
@classmethod
5454
def convert_Timestamp_element(cls, dtype):
55-
return datetime.datetime.fromisoformat
55+
return pd.Timestamp.fromisoformat
5656

5757
@classmethod
5858
def convert_Date_element(cls, dtype):
59-
return datetime.date.fromisoformat
59+
return pd.Timestamp.fromisoformat
6060

6161
@classmethod
6262
def convert_Time_element(cls, dtype):
63-
return datetime.time.fromisoformat
63+
return pd.Timestamp.fromisoformat
6464

6565
@classmethod
6666
def convert_JSON(cls, s, dtype, pandas_type):

ibis/backends/sql/compilers/oracle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def visit_Limit(self, op, *, parent, n, offset):
203203
return result
204204

205205
def visit_Date(self, op, *, arg):
206-
return sg.cast(arg, to="date")
206+
return self.f.trunc(arg, "DDD")
207207

208208
def visit_IsNan(self, op, *, arg):
209209
return arg.eq(self.NAN)

ibis/backends/sqlite/tests/test_types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ def test_type_map(db):
9292
sol = pd.DataFrame(
9393
{"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")}
9494
)
95+
sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype)
96+
9597
assert res.equals(sol)
9698

9799

ibis/backends/tests/test_aggregation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ def test_string_quantile(alltypes, func):
12511251
)
12521252
def test_date_quantile(alltypes):
12531253
expr = alltypes.timestamp_col.date().quantile(0.5)
1254-
result = expr.execute()
1254+
result = expr.execute().to_pydatetime().date()
12551255
assert result == date(2009, 12, 31)
12561256

12571257

ibis/backends/tests/test_array.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@
2222
GoogleBadRequest,
2323
MySQLOperationalError,
2424
PolarsComputeError,
25-
PsycoPg2ArraySubscriptError,
2625
PsycoPg2IndeterminateDatatype,
2726
PsycoPg2InternalError,
2827
PsycoPg2ProgrammingError,
29-
PsycoPg2SyntaxError,
28+
PsycoPgInvalidTextRepresentation,
3029
PsycoPgSyntaxError,
3130
Py4JJavaError,
3231
PyAthenaDatabaseError,
@@ -1118,7 +1117,7 @@ def test_unnest_struct(con):
11181117

11191118

11201119
@builtin_array
1121-
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
1120+
@pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError)
11221121
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError)
11231122
@pytest.mark.notimpl(
11241123
["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
@@ -1209,7 +1208,7 @@ def test_zip_null(con, fn):
12091208

12101209

12111210
@builtin_array
1212-
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
1211+
@pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError)
12131212
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
12141213
@pytest.mark.notimpl(["datafusion"], raises=Exception, reason="not yet supported")
12151214
@pytest.mark.notimpl(
@@ -1769,7 +1768,7 @@ def test_table_unnest_column_expr(backend):
17691768
@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError)
17701769
@pytest.mark.notimpl(["trino"], raises=TrinoUserError)
17711770
@pytest.mark.notimpl(["athena"], raises=PyAthenaOperationalError)
1772-
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
1771+
@pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError)
17731772
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
17741773
@pytest.mark.notyet(
17751774
["risingwave"], raises=PsycoPg2InternalError, reason="not supported in risingwave"
@@ -1890,7 +1889,7 @@ def test_array_agg_bool(con, data, agg, baseline_func):
18901889

18911890
@pytest.mark.notyet(
18921891
["postgres"],
1893-
raises=PsycoPg2ArraySubscriptError,
1892+
raises=PsycoPgInvalidTextRepresentation,
18941893
reason="all dimensions must match in size",
18951894
)
18961895
@pytest.mark.notimpl(["risingwave", "flink"], raises=com.OperationNotDefinedError)

ibis/backends/tests/test_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
ImpalaHiveServer2Error,
3333
OracleDatabaseError,
3434
PsycoPg2InternalError,
35-
PsycoPg2UndefinedObject,
35+
PsycoPgUndefinedObject,
3636
Py4JJavaError,
3737
PyAthenaDatabaseError,
3838
PyODBCProgrammingError,
@@ -725,7 +725,7 @@ def test_list_database_contents(con):
725725
@pytest.mark.notyet(["databricks"], raises=DatabricksServerOperationError)
726726
@pytest.mark.notyet(["bigquery"], raises=com.UnsupportedBackendType)
727727
@pytest.mark.notyet(
728-
["postgres"], raises=PsycoPg2UndefinedObject, reason="no unsigned int types"
728+
["postgres"], raises=PsycoPgUndefinedObject, reason="no unsigned int types"
729729
)
730730
@pytest.mark.notyet(
731731
["oracle"], raises=OracleDatabaseError, reason="no unsigned int types"

ibis/backends/tests/test_generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
OracleDatabaseError,
2626
PolarsInvalidOperationError,
2727
PsycoPg2InternalError,
28-
PsycoPg2SyntaxError,
28+
PsycoPgSyntaxError,
2929
Py4JJavaError,
3030
PyAthenaDatabaseError,
3131
PyAthenaOperationalError,
@@ -1736,7 +1736,7 @@ def hash_256(col):
17361736
pytest.mark.notimpl(["flink"], raises=Py4JJavaError),
17371737
pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError),
17381738
pytest.mark.notimpl(["oracle"], raises=OracleDatabaseError),
1739-
pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError),
1739+
pytest.mark.notimpl(["postgres"], raises=PsycoPgSyntaxError),
17401740
pytest.mark.notimpl(["risingwave"], raises=PsycoPg2InternalError),
17411741
pytest.mark.notimpl(["snowflake"], raises=AssertionError),
17421742
pytest.mark.never(

ibis/backends/tests/test_struct.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
DatabricksServerOperationError,
1414
PolarsColumnNotFoundError,
1515
PsycoPg2InternalError,
16-
PsycoPg2SyntaxError,
16+
PsycoPgSyntaxError,
1717
Py4JJavaError,
1818
PyAthenaDatabaseError,
1919
PyAthenaOperationalError,
@@ -138,7 +138,7 @@ def test_collect_into_struct(alltypes):
138138

139139

140140
@pytest.mark.notimpl(
141-
["postgres"], reason="struct literals not implemented", raises=PsycoPg2SyntaxError
141+
["postgres"], reason="struct literals not implemented", raises=PsycoPgSyntaxError
142142
)
143143
@pytest.mark.notimpl(
144144
["risingwave"],
@@ -155,7 +155,7 @@ def test_field_access_after_case(con):
155155

156156

157157
@pytest.mark.notimpl(
158-
["postgres"], reason="struct literals not implemented", raises=PsycoPg2SyntaxError
158+
["postgres"], reason="struct literals not implemented", raises=PsycoPgSyntaxError
159159
)
160160
@pytest.mark.notimpl(["flink"], raises=IbisError, reason="not implemented in ibis")
161161
@pytest.mark.parametrize(
@@ -242,7 +242,7 @@ def test_keyword_fields(con, nullable):
242242

243243
@pytest.mark.notyet(
244244
["postgres"],
245-
raises=PsycoPg2SyntaxError,
245+
raises=PsycoPgSyntaxError,
246246
reason="sqlglot doesn't implement structs for postgres correctly",
247247
)
248248
@pytest.mark.notyet(

ibis/backends/tests/test_temporal.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -650,9 +650,7 @@ def convert_to_offset(x):
650650
"ignore", category=(UserWarning, pd.errors.PerformanceWarning)
651651
)
652652
expected = (
653-
pd.to_datetime(df.date_string_col)
654-
.add(offset)
655-
.map(lambda ts: ts.normalize().date(), na_action="ignore")
653+
pd.to_datetime(df.date_string_col).add(offset).astype("datetime64[s]")
656654
)
657655

658656
expected = backend.default_series_rename(expected)
@@ -727,12 +725,7 @@ def convert_to_offset(x):
727725
),
728726
param(
729727
lambda t, _: t.timestamp_col.date() + ibis.interval(days=4),
730-
lambda t, _: (
731-
t.timestamp_col.dt.floor("d")
732-
.add(pd.Timedelta(days=4))
733-
.dt.normalize()
734-
.dt.date
735-
),
728+
lambda t, _: t.timestamp_col.dt.floor("d").add(pd.Timedelta(days=4)),
736729
id="date-add-interval",
737730
marks=[
738731
pytest.mark.notimpl(
@@ -743,12 +736,7 @@ def convert_to_offset(x):
743736
),
744737
param(
745738
lambda t, _: t.timestamp_col.date() - ibis.interval(days=14),
746-
lambda t, _: (
747-
t.timestamp_col.dt.floor("d")
748-
.sub(pd.Timedelta(days=14))
749-
.dt.normalize()
750-
.dt.date
751-
),
739+
lambda t, _: t.timestamp_col.dt.floor("d").sub(pd.Timedelta(days=14)),
752740
id="date-subtract-interval",
753741
marks=[
754742
pytest.mark.notimpl(
@@ -1013,14 +1001,15 @@ def test_interval_add_cast_column(backend, alltypes, df):
10131001
delta = alltypes.bigint_col.cast("interval('D')")
10141002
expr = alltypes.select("id", (timestamp_date + delta).name("tmp"))
10151003
result = expr.execute().sort_values("id").reset_index().tmp
1004+
10161005
df = df.sort_values("id").reset_index(drop=True)
10171006
expected = (
10181007
df["timestamp_col"]
10191008
.dt.normalize()
10201009
.add(df.bigint_col.astype("timedelta64[D]"))
10211010
.rename("tmp")
1022-
.dt.date
10231011
)
1012+
10241013
backend.assert_series_equal(result, expected.astype(result.dtype))
10251014

10261015

@@ -2265,6 +2254,14 @@ def test_time_literal_sql(dialect, snapshot, micros):
22652254
reason="clickhouse doesn't support dates after 2149-06-06",
22662255
),
22672256
pytest.mark.notyet(["datafusion"], raises=Exception),
2257+
pytest.mark.xfail_version(
2258+
pyspark=["pyspark<3.5"],
2259+
raises=pd._libs.tslib.OutOfBoundsDatetime,
2260+
reason=(
2261+
"versions of pandas supported by PySpark <3.5 don't allow "
2262+
"pd.Timestamps with out-of-bounds timestamp values"
2263+
),
2264+
),
22682265
],
22692266
id="large",
22702267
),
@@ -2278,6 +2275,14 @@ def test_time_literal_sql(dialect, snapshot, micros):
22782275
reason="clickhouse doesn't support dates before the UNIX epoch",
22792276
),
22802277
pytest.mark.notyet(["datafusion"], raises=Exception),
2278+
pytest.mark.xfail_version(
2279+
pyspark=["pyspark<3.5"],
2280+
raises=pd._libs.tslib.OutOfBoundsDatetime,
2281+
reason=(
2282+
"versions of pandas supported by PySpark <3.5 don't allow "
2283+
"pd.Timestamps with out-of-bounds timestamp values"
2284+
),
2285+
),
22812286
],
22822287
),
22832288
param(
@@ -2296,20 +2301,18 @@ def test_time_literal_sql(dialect, snapshot, micros):
22962301
)
22972302
def test_date_scalar(con, value, func):
22982303
expr = ibis.date(func(value)).name("tmp")
2299-
23002304
result = con.execute(expr)
23012305

2302-
assert not isinstance(result, datetime.datetime)
2303-
assert isinstance(result, datetime.date)
2304-
2305-
assert result == datetime.date.fromisoformat(value)
2306+
assert isinstance(result, pd.Timestamp)
2307+
assert result == pd.Timestamp.fromisoformat(value)
23062308

23072309

23082310
@pytest.mark.notyet(
23092311
["datafusion", "druid", "exasol"], raises=com.OperationNotDefinedError
23102312
)
23112313
def test_simple_unix_date_offset(con):
2312-
d = ibis.date("2023-04-07")
2314+
s = "2023-04-07"
2315+
d = ibis.date(s)
23132316
expr = d.epoch_days()
23142317
result = con.execute(expr)
23152318
delta = datetime.date(2023, 4, 7) - datetime.date(1970, 1, 1)

ibis/formats/pandas.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,17 +222,20 @@ def convert_Timestamp(cls, s, dtype, pandas_type):
222222
def convert_Date(cls, s, dtype, pandas_type):
223223
if isinstance(s.dtype, pd.DatetimeTZDtype):
224224
s = s.dt.tz_convert("UTC").dt.tz_localize(None)
225+
225226
try:
226-
return s.astype(pandas_type).dt.date
227+
return s.astype(pandas_type)
227228
except (ValueError, TypeError, pd._libs.tslibs.OutOfBoundsDatetime):
228229

229230
def try_date(v):
230-
if isinstance(v, datetime.datetime):
231-
return v.date()
231+
if isinstance(v, datetime.date):
232+
return pd.Timestamp(v)
232233
elif isinstance(v, str):
233234
if v.endswith("Z"):
234-
return datetime.datetime.fromisoformat(v[:-1]).date()
235-
return datetime.date.fromisoformat(v)
235+
datetime_obj = datetime.datetime.fromisoformat(v[:-1])
236+
else:
237+
datetime_obj = datetime.datetime.fromisoformat(v)
238+
return pd.Timestamp(datetime_obj)
236239
else:
237240
return v
238241

0 commit comments

Comments
 (0)