Skip to content

Commit 1650d7d

Browse files
committed
feat(timestamp): allow passing nullable flag to ibis.timestamp
1 parent d7cd846 commit 1650d7d

File tree

10 files changed

+193
-54
lines changed

10 files changed

+193
-54
lines changed

ibis/common/temporal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def normalize_timezone(tz):
208208

209209

210210
@lazy_singledispatch
211-
def normalize_datetime(value):
211+
def normalize_datetime(value) -> datetime.datetime:
212212
raise TypeError(f"Unable to normalize {type(value)} to timestamp")
213213

214214

ibis/expr/api.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -804,11 +804,17 @@ def timestamp(
804804
second: int | ir.IntegerValue | Deferred,
805805
/,
806806
timezone: str | None = None,
807+
nullable: bool = True,
807808
) -> TimestampValue: ...
808809

809810

810811
@overload
811-
def timestamp(value_or_year: Any, /, timezone: str | None = None) -> TimestampValue: ...
812+
def timestamp(
813+
value_or_year: str | datetime.datetime,
814+
/,
815+
timezone: str | None = None,
816+
nullable: bool = True,
817+
) -> TimestampValue: ...
812818

813819

814820
@deferrable
@@ -821,6 +827,7 @@ def timestamp(
821827
second=None,
822828
/,
823829
timezone=None,
830+
nullable: bool = True,
824831
):
825832
"""Construct a timestamp scalar or column.
826833
@@ -841,6 +848,8 @@ def timestamp(
841848
The timestamp second component; required if `value_or_year` is a year.
842849
timezone
843850
The timezone name, or none for a timezone-naive timestamp.
851+
nullable
852+
Whether the resulting timestamp should be nullable. Defaults to True.
844853
845854
Returns
846855
-------
@@ -888,16 +897,17 @@ def timestamp(
888897
raise NotImplementedError(
889898
"Timezone currently not supported when creating a timestamp from components"
890899
)
891-
return ops.TimestampFromYMDHMS(*args).to_expr()
900+
return ops.TimestampFromYMDHMS(*args, nullable=nullable).to_expr()
892901
elif isinstance(value_or_year, (numbers.Real, ir.IntegerValue)):
893902
raise TypeError("Use ibis.literal(...).as_timestamp() instead")
894903
elif isinstance(value_or_year, ir.Expr):
895-
return value_or_year.cast(dt.Timestamp(timezone=timezone))
904+
return value_or_year.cast(dt.Timestamp(timezone=timezone, nullable=nullable))
896905
else:
897906
value = normalize_datetime(value_or_year)
898907
tzinfo = normalize_timezone(timezone or value.tzinfo)
899908
timezone = tzinfo.tzname(value) if tzinfo is not None else None
900-
return literal(value, type=dt.Timestamp(timezone=timezone))
909+
dtype = dt.Timestamp.from_datetime(value, timezone=timezone, nullable=nullable)
910+
return literal(value, type=dtype)
901911

902912

903913
@overload

ibis/expr/datatypes/cast.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ def higher_precedence(left: dt.DataType, right: dt.DataType) -> dt.DataType:
142142
@public
143143
def highest_precedence(dtypes: Iterator[dt.DataType]) -> dt.DataType:
144144
"""Compute the highest precedence of `dtypes`."""
145+
# TODO: currently,
146+
# highest_precedence([dt.Timestamp(scale=3), dt.Timestamp(timezone="UTC")])
147+
# returns dt.Timestamp(timezone="UTC").
148+
# Perhaps it should return dt.Timestamp(scale=3, timezone="UTC") instead.
145149
if collected := list(dtypes):
146150
return functools.reduce(higher_precedence, collected)
147151
else:

ibis/expr/datatypes/core.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
if TYPE_CHECKING:
3535
import numpy as np
36+
import pandas as pd
3637
import polars as pl
3738
import pyarrow as pa
3839
from pandas.api.extensions import ExtensionDtype
@@ -83,6 +84,62 @@ def dtype(
8384
) -> DataType: ...
8485

8586

87+
if TYPE_CHECKING:
88+
import numpy as np
89+
import polars as pl
90+
import pyarrow as pa
91+
from pandas.api.extensions import ExtensionDtype
92+
93+
94+
@overload
95+
def dtype(value: type[int] | Literal["int"], nullable: bool = True) -> Int64: ...
96+
@overload
97+
def dtype(
98+
value: type[str] | Literal["str", "string"], nullable: bool = True
99+
) -> String: ...
100+
@overload
101+
def dtype(
102+
value: type[bool] | Literal["bool", "boolean"], nullable: bool = True
103+
) -> Boolean: ...
104+
@overload
105+
def dtype(value: type[bytes] | Literal["bytes"], nullable: bool = True) -> Binary: ...
106+
@overload
107+
def dtype(value: type[Real] | Literal["float"], nullable: bool = True) -> Float64: ...
108+
@overload
109+
def dtype(
110+
value: type[pydecimal.Decimal] | Literal["decimal"], nullable: bool = True
111+
) -> Decimal: ...
112+
@overload
113+
def dtype(
114+
value: type[pydatetime.datetime] | Literal["timestamp"], nullable: bool = True
115+
) -> Timestamp: ...
116+
@overload
117+
def dtype(
118+
value: type[pydatetime.date] | Literal["date"], nullable: bool = True
119+
) -> Date: ...
120+
@overload
121+
def dtype(
122+
value: type[pydatetime.time] | Literal["time"], nullable: bool = True
123+
) -> Time: ...
124+
@overload
125+
def dtype(
126+
value: type[pydatetime.timedelta] | Literal["interval"], nullable: bool = True
127+
) -> Interval: ...
128+
@overload
129+
def dtype(
130+
value: type[pyuuid.UUID] | Literal["uuid"], nullable: bool = True
131+
) -> UUID: ...
132+
@overload
133+
def dtype(
134+
value: DataType | str | np.dtype | ExtensionDtype | pl.DataType | pa.DataType,
135+
nullable: bool = True,
136+
) -> DataType: ...
137+
138+
139+
if TYPE_CHECKING:
140+
import pandas as pd
141+
142+
86143
@lazy_singledispatch
87144
def dtype(value, nullable=True) -> DataType:
88145
"""Create a DataType object.
@@ -697,6 +754,34 @@ def from_unit(cls, unit, timezone=None, nullable=True) -> Self:
697754
raise ValueError(f"Invalid unit {unit}")
698755
return cls(scale=scale, timezone=timezone, nullable=nullable)
699756

757+
@classmethod
758+
def from_datetime(
759+
cls, dt: pydatetime.datetime, timezone: str | None = None, nullable: bool = True
760+
):
761+
"""Infer from a python datetime.datetime object."""
762+
if dt.microsecond:
763+
scale = 6
764+
else:
765+
scale = 0
766+
if timezone is None and dt.tzinfo is not None:
767+
timezone = str(dt.tzinfo)
768+
return cls(scale=scale, timezone=timezone, nullable=nullable)
769+
770+
@classmethod
771+
def from_pandas(
772+
cls, value: pd.Timestamp, timezone: str | None = None, nullable: bool = True
773+
):
774+
"""Infer from a pandas.Timestamp."""
775+
if value.nanosecond:
776+
scale = 9
777+
elif value.microsecond:
778+
scale = 6
779+
else:
780+
scale = 0
781+
if timezone is None and value.tz is not None:
782+
timezone = str(value.tz)
783+
return cls(timezone=timezone, scale=scale, nullable=nullable)
784+
700785
@property
701786
def unit(self) -> str:
702787
"""Return the unit of the timestamp."""

ibis/expr/datatypes/value.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from collections.abc import Mapping, Sequence
1111
from functools import partial
1212
from operator import attrgetter
13-
from typing import Any
13+
from typing import TYPE_CHECKING, Any
1414

1515
import toolz
1616
from public import public
@@ -28,6 +28,9 @@
2828
)
2929
from ibis.expr.datatypes.cast import highest_precedence
3030

31+
if TYPE_CHECKING:
32+
import pandas as pd
33+
3134

3235
@lazy_singledispatch
3336
def infer(value: Any) -> dt.DataType:
@@ -82,10 +85,7 @@ def infer_date(value: datetime.date) -> dt.Date:
8285

8386
@infer.register(datetime.datetime)
8487
def infer_timestamp(value: datetime.datetime) -> dt.Timestamp:
85-
if value.tzinfo:
86-
return dt.Timestamp(timezone=str(value.tzinfo))
87-
else:
88-
return dt.timestamp
88+
return dt.Timestamp.from_datetime(value)
8989

9090

9191
@infer.register(datetime.timedelta)
@@ -171,11 +171,8 @@ def infer_numpy_scalar(value):
171171

172172

173173
@infer.register("pandas.Timestamp")
174-
def infer_pandas_timestamp(value):
175-
if value.tz is not None:
176-
return dt.Timestamp(timezone=str(value.tz))
177-
else:
178-
return dt.timestamp
174+
def infer_pandas_timestamp(value: pd.Timestamp) -> dt.Timestamp:
175+
return dt.Timestamp.from_pandas(value)
179176

180177

181178
@infer.register("pandas.Timedelta")

ibis/expr/operations/temporal.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,17 @@ class TimestampFromYMDHMS(Value):
258258
hours: Value[dt.Integer]
259259
minutes: Value[dt.Integer]
260260
seconds: Value[dt.Integer]
261+
nullable: bool = True
261262

262-
dtype = dt.timestamp
263-
shape = rlz.shape_like("args")
263+
shape = rlz.shape_like(["year", "month", "day", "hours", "minutes", "seconds"])
264+
265+
@attribute
266+
def dtype(self):
267+
return dt.Timestamp(
268+
nullable=self.nullable,
269+
timezone=None,
270+
scale=0, # second precision
271+
)
264272

265273

266274
@public

ibis/expr/rules.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from __future__ import annotations
22

33
from itertools import product, starmap
4-
from typing import Optional
4+
from typing import TYPE_CHECKING, Any, Optional
55

66
from public import public
77

8+
import ibis.expr.datashape as ds
89
import ibis.expr.datatypes as dt
910
import ibis.expr.operations as ops
1011
from ibis import util
@@ -13,14 +14,17 @@
1314
from ibis.common.patterns import CoercionError, NoMatch, Pattern
1415
from ibis.common.temporal import IntervalUnit
1516

17+
if TYPE_CHECKING:
18+
from collections.abc import Iterable
19+
1620

1721
@public
18-
def highest_precedence_shape(nodes):
22+
def highest_precedence_shape(nodes: Iterable[ops.Value]) -> ds.DataShape:
1923
return max(node.shape for node in nodes)
2024

2125

2226
@public
23-
def highest_precedence_dtype(nodes):
27+
def highest_precedence_dtype(nodes: Iterable[ops.Value]) -> dt.DataType:
2428
"""Return the highest precedence type from the passed expressions.
2529
2630
Also verifies that there are valid implicit casts between any of the types
@@ -29,8 +33,8 @@ def highest_precedence_dtype(nodes):
2933
3034
Parameters
3135
----------
32-
nodes : Iterable[ops.Value]
33-
A sequence of Expressions
36+
nodes
37+
An Iterable of Expressions
3438
3539
Returns
3640
-------
@@ -42,7 +46,7 @@ def highest_precedence_dtype(nodes):
4246

4347

4448
@public
45-
def castable(source, target):
49+
def castable(source: ops.Value, target: ops.Value) -> bool:
4650
"""Return whether source ir type is implicitly castable to target.
4751
4852
Based on the underlying datatypes and the value in case of Literals
@@ -52,7 +56,7 @@ def castable(source, target):
5256

5357

5458
@public
55-
def comparable(left, right):
59+
def comparable(left: ops.Value, right: ops.Value) -> bool:
5660
return castable(left, right) or castable(right, left)
5761

5862

@@ -61,28 +65,31 @@ def comparable(left, right):
6165

6266

6367
@public
64-
def dtype_like(name):
68+
def dtype_like(names: str | Iterable[str]):
6569
@attribute
66-
def dtype(self):
67-
args = getattr(self, name)
68-
args = args if util.is_iterable(args) else [args]
69-
return highest_precedence_dtype(args)
70+
def dtype(self) -> dt.DataType:
71+
return highest_precedence_dtype(_attributes(self, names))
7072

7173
return dtype
7274

7375

7476
@public
75-
def shape_like(name):
77+
def shape_like(name: str | Iterable[str]):
7678
@attribute
77-
def shape(self):
78-
args = getattr(self, name)
79-
args = args if util.is_iterable(args) else [args]
79+
def shape(self) -> ds.DataShape:
80+
args = _attributes(self, name)
8081
args = [a for a in args if a is not None]
8182
return highest_precedence_shape(args)
8283

8384
return shape
8485

8586

87+
def _attributes(obj: Any, names: str | Iterable[str]) -> tuple:
88+
if isinstance(names, str):
89+
return util.promote_tuple(getattr(obj, names))
90+
return tuple(getattr(obj, name) for name in names)
91+
92+
8693
# TODO(kszucs): might just use bounds instead of actual literal values
8794
# that could simplify interval binop output_type methods
8895
# TODO(kszucs): pre-generate mapping?

0 commit comments

Comments
 (0)