diff --git a/ibis/backends/sql/compilers/bigquery/__init__.py b/ibis/backends/sql/compilers/bigquery/__init__.py index 61dae613b8f3..5aeb3f4bd1e2 100644 --- a/ibis/backends/sql/compilers/bigquery/__init__.py +++ b/ibis/backends/sql/compilers/bigquery/__init__.py @@ -527,9 +527,11 @@ def visit_StringContains(self, op, *, haystack, needle): return self.f.strpos(haystack, needle) > 0 def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp ): - return self.f.anon.DATETIME(year, month, day, hours, minutes, seconds) + if dtype.timezone is not None: + raise NotImplementedError() + return self.f.datetime_from_parts(year, month, day, hours, minutes, seconds) def visit_NonNullLiteral(self, op, *, value, dtype): if dtype.is_inet() or dtype.is_macaddr(): diff --git a/ibis/backends/sql/compilers/clickhouse.py b/ibis/backends/sql/compilers/clickhouse.py index a9892e70c388..1556e4b4ce47 100644 --- a/ibis/backends/sql/compilers/clickhouse.py +++ b/ibis/backends/sql/compilers/clickhouse.py @@ -430,7 +430,7 @@ def visit_DateFromYMD(self, op, *, year, month, day): ) def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds, **_ + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp, **_ ): to_datetime = self.f.toDateTime( self.f.concat( @@ -447,7 +447,7 @@ def visit_TimestampFromYMDHMS( self.f.leftPad(self.f.toString(seconds), 2, "0"), ) ) - if timezone := op.dtype.timezone: + if timezone := dtype.timezone: return self.f.toTimeZone(to_datetime, timezone) return to_datetime diff --git a/ibis/backends/sql/compilers/datafusion.py b/ibis/backends/sql/compilers/datafusion.py index d960441f002e..3d1af65ff7be 100644 --- a/ibis/backends/sql/compilers/datafusion.py +++ b/ibis/backends/sql/compilers/datafusion.py @@ -424,24 +424,25 @@ def visit_DateFromYMD(self, op, *, year, month, day): ) def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds, **_ + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp, **_ ): - return self.f.to_timestamp_micros( - self.f.concat( - self.f.lpad(self.cast(self.cast(year, dt.int64), dt.string), 4, "0"), - "-", - self.f.lpad(self.cast(self.cast(month, dt.int64), dt.string), 2, "0"), - "-", - self.f.lpad(self.cast(self.cast(day, dt.int64), dt.string), 2, "0"), - "T", - self.f.lpad(self.cast(self.cast(hours, dt.int64), dt.string), 2, "0"), - ":", - self.f.lpad(self.cast(self.cast(minutes, dt.int64), dt.string), 2, "0"), - ":", - self.f.lpad(self.cast(self.cast(seconds, dt.int64), dt.string), 2, "0"), - ".000000Z", - ) - ) + args = [ + self.f.lpad(self.cast(self.cast(year, dt.int64), dt.string), 4, "0"), + "-", + self.f.lpad(self.cast(self.cast(month, dt.int64), dt.string), 2, "0"), + "-", + self.f.lpad(self.cast(self.cast(day, dt.int64), dt.string), 2, "0"), + "T", + self.f.lpad(self.cast(self.cast(hours, dt.int64), dt.string), 2, "0"), + ":", + self.f.lpad(self.cast(self.cast(minutes, dt.int64), dt.string), 2, "0"), + ":", + self.f.lpad(self.cast(self.cast(seconds, dt.int64), dt.string), 2, "0"), + "Z", + ] + if dtype.timezone is not None: + args.append(dtype.timezone) + return self.f.to_timestamp_seconds(self.f.concat(*args)) def visit_IsInf(self, op, *, arg): return sg.and_(sg.not_(self.f.isnan(arg)), self.f.abs(arg).eq(self.POS_INF)) diff --git a/ibis/backends/sql/compilers/druid.py b/ibis/backends/sql/compilers/druid.py index 07f1d3b7047f..00207fd622c3 100644 --- a/ibis/backends/sql/compilers/druid.py +++ b/ibis/backends/sql/compilers/druid.py @@ -182,8 +182,10 @@ def visit_TimestampFromUNIX(self, op, *, arg, unit): raise exc.UnsupportedArgumentError(f"Druid doesn't support {unit} units") def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp ): + if dtype.timezone is not None: + raise NotImplementedError() return self.f.time_parse( self.f.concat( self.f.lpad(self.cast(year, dt.string), 4, "0"), diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index 09718d0d1e40..03ca5702fcf8 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -403,16 +403,13 @@ def visit_TimestampFromUNIX(self, op, *, arg, unit): raise com.UnsupportedOperationError(f"{unit!r} unit is not supported!") def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds, **_ + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp, **_ ): args = [year, month, day, hours, minutes, seconds] - - func = "make_timestamp" - if (timezone := op.dtype.timezone) is not None: - func += "tz" - args.append(timezone) - - return self.f[func](*args) + if (timezone := dtype.timezone) is not None: + return self.f.make_timestamptz(*args, timezone) + else: + return self.f.make_timestamp(*args) def visit_Cast(self, op, *, arg, to): dtype = op.arg.dtype diff --git a/ibis/backends/sql/compilers/flink.py b/ibis/backends/sql/compilers/flink.py index 9abc814d2ac4..24dd5da3c90e 100644 --- a/ibis/backends/sql/compilers/flink.py +++ b/ibis/backends/sql/compilers/flink.py @@ -381,7 +381,7 @@ def visit_DateFromYMD(self, op, *, year, month, day): ) def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp ): padded_year = self.f.lpad(self.cast(year, dt.string), 4, "0") padded_month = self.f.lpad(self.cast(month, dt.string), 2, "0") @@ -403,7 +403,7 @@ def visit_TimestampFromYMDHMS( ":", padded_second, ), - op.dtype, + dtype, ) def visit_ExtractEpochSeconds(self, op, *, arg): diff --git a/ibis/backends/sql/compilers/mssql.py b/ibis/backends/sql/compilers/mssql.py index 425cee067066..b9aa99c689a5 100644 --- a/ibis/backends/sql/compilers/mssql.py +++ b/ibis/backends/sql/compilers/mssql.py @@ -334,9 +334,13 @@ def visit_TimeFromHMS(self, op, *, hours, minutes, seconds): return self.f.timefromparts(hours, minutes, seconds, 0, 0) def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp ): - return self.f.datetimefromparts(year, month, day, hours, minutes, seconds, 0) + if dtype.timezone is not None: + raise NotImplementedError() + return self.f.datetime2fromparts( + year, month, day, hours, minutes, seconds, 0, dtype.scale + ) def visit_StringFind(self, op, *, arg, substr, start, end): if start is not None: diff --git a/ibis/backends/sql/compilers/postgres.py b/ibis/backends/sql/compilers/postgres.py index e0903d0dc30a..a0903bc6c74e 100644 --- a/ibis/backends/sql/compilers/postgres.py +++ b/ibis/backends/sql/compilers/postgres.py @@ -662,10 +662,10 @@ def visit_NonNullLiteral(self, op, *, value, dtype): return None def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp ): to_int32 = partial(self.cast, to=dt.int32) - return self.f.make_timestamp( + args = ( to_int32(year), to_int32(month), to_int32(day), @@ -673,6 +673,10 @@ def visit_TimestampFromYMDHMS( to_int32(minutes), self.cast(seconds, dt.float64), ) + if dtype.timezone: + return self.f.make_timestamptz(*args, dtype.timezone) + else: + return self.f.make_timestamp(*args) def visit_DateFromYMD(self, op, *, year, month, day): to_int32 = partial(self.cast, to=dt.int32) diff --git a/ibis/backends/sql/compilers/sqlite.py b/ibis/backends/sql/compilers/sqlite.py index ace3ca07713e..9f292d701cd7 100644 --- a/ibis/backends/sql/compilers/sqlite.py +++ b/ibis/backends/sql/compilers/sqlite.py @@ -274,8 +274,10 @@ def visit_TimeFromHMS(self, op, *, hours, minutes, seconds): return self.f.time(self.f.printf("%02d:%02d:%02d", hours, minutes, seconds)) def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp ): + if dtype.timezone not in (None, "UTC"): + raise NotImplementedError return self.f.datetime( self.f.printf( "%04d-%02d-%02d %02d:%02d:%02d%s", diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index ecaac4c8a70a..acdc794255ac 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -323,7 +323,7 @@ def visit_TimeFromHMS(self, op, *, hours, minutes, seconds): ) def visit_TimestampFromYMDHMS( - self, op, *, year, month, day, hours, minutes, seconds + self, op, *, year, month, day, hours, minutes, seconds, dtype: dt.Timestamp ): return self.cast( self.f.from_iso8601_timestamp( @@ -337,7 +337,7 @@ def visit_TimestampFromYMDHMS( seconds, ) ), - dt.timestamp, + dtype, ) def visit_TimestampFromUNIX(self, op, *, arg, unit): diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 3bc85f56d19e..e1fd5190d141 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1783,9 +1783,6 @@ def test_cast(con, from_type, to_type, from_val, expected): pytest.mark.notimpl( ["polars"], reason="casts to 1672531200000000000 (nanoseconds)" ), - pytest.mark.notimpl( - ["datafusion"], reason="casts to 1672531200000000 (microseconds)" - ), pytest.mark.notimpl(["mysql"], reason="returns 20230101000000"), pytest.mark.notyet(["mssql"], raises=PyODBCDataError), ], diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 3b3817b4b561..d632e132f2c1 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -813,11 +813,6 @@ def convert_to_offset(x): raises=PyAthenaOperationalError, reason="not supported in hive", ), - pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="pyarrow.lib.ArrowNotImplementedError: Unsupported cast", - ), pytest.mark.notimpl( ["oracle"], raises=com.OperationNotDefinedError, @@ -1477,7 +1472,7 @@ def test_date_literal(con, backend): "impala": "TIMESTAMP", "snowflake": "TIMESTAMP_NTZ", "sqlite": "text", - "trino": "timestamp(3)", + "trino": "timestamp(0)", "athena": "timestamp(3)", "duckdb": "TIMESTAMP", "postgres": "timestamp without time zone", diff --git a/ibis/common/temporal.py b/ibis/common/temporal.py index c73c6d076978..84ddb9ee0e2b 100644 --- a/ibis/common/temporal.py +++ b/ibis/common/temporal.py @@ -203,7 +203,7 @@ def normalize_timedelta( return int(value) -def normalize_timezone(tz): +def normalize_timezone(tz) -> datetime.tzinfo | None: if tz is None: return None elif isinstance(tz, str): @@ -223,7 +223,7 @@ def normalize_timezone(tz): @lazy_singledispatch -def normalize_datetime(value): +def normalize_datetime(value) -> datetime.datetime: raise TypeError(f"Unable to normalize {type(value)} to timestamp") diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 7fa61eb2442d..5e1b0bfffca9 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -806,11 +806,17 @@ def timestamp( second: int | ir.IntegerValue | Deferred, /, timezone: str | None = None, + nullable: bool = True, ) -> TimestampValue: ... @overload -def timestamp(value_or_year: Any, /, timezone: str | None = None) -> TimestampValue: ... +def timestamp( + value_or_year: str | datetime.datetime, + /, + timezone: str | None = None, + nullable: bool = True, +) -> TimestampValue: ... @deferrable @@ -823,6 +829,7 @@ def timestamp( second=None, /, timezone=None, + nullable: bool = True, ): """Construct a timestamp scalar or column. @@ -843,6 +850,8 @@ def timestamp( The timestamp second component; required if `value_or_year` is a year. timezone The timezone name, or none for a timezone-naive timestamp. + nullable + Whether the resulting timestamp should be nullable. Defaults to True. Returns ------- @@ -886,20 +895,18 @@ def timestamp( is_ymdhms = any(a is not None for a in args[1:]) if is_ymdhms: - if timezone is not None: - raise NotImplementedError( - "Timezone currently not supported when creating a timestamp from components" - ) - return ops.TimestampFromYMDHMS(*args).to_expr() + dtype = dt.Timestamp(timezone=timezone, nullable=nullable, scale=0) + return ops.TimestampFromYMDHMS(*args, dtype=dtype).to_expr() elif isinstance(value_or_year, (numbers.Real, ir.IntegerValue)): raise TypeError("Use ibis.literal(...).as_timestamp() instead") elif isinstance(value_or_year, ir.Expr): - return value_or_year.cast(dt.Timestamp(timezone=timezone)) + return value_or_year.cast(dt.Timestamp(timezone=timezone, nullable=nullable)) else: value = normalize_datetime(value_or_year) tzinfo = normalize_timezone(timezone or value.tzinfo) - timezone = tzinfo.tzname(value) if tzinfo is not None else None - return literal(value, type=dt.Timestamp(timezone=timezone)) + value = value.astimezone(tzinfo) if tzinfo is not None else value + dtype = dt.Timestamp.from_datetime(value, nullable=nullable) + return literal(value, type=dtype) @overload diff --git a/ibis/expr/datatypes/cast.py b/ibis/expr/datatypes/cast.py index 1edfb1e783be..8ef3c1365a9e 100644 --- a/ibis/expr/datatypes/cast.py +++ b/ibis/expr/datatypes/cast.py @@ -142,6 +142,10 @@ def higher_precedence(left: dt.DataType, right: dt.DataType) -> dt.DataType: @public def highest_precedence(dtypes: Iterator[dt.DataType]) -> dt.DataType: """Compute the highest precedence of `dtypes`.""" + # TODO: currently, + # highest_precedence([dt.Timestamp(scale=3), dt.Timestamp(timezone="UTC")]) + # returns dt.Timestamp(timezone="UTC"). + # Perhaps it should return dt.Timestamp(scale=3, timezone="UTC") instead. if collected := list(dtypes): return functools.reduce(higher_precedence, collected) else: diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index 06466473e811..e0a0750a1b3e 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -33,6 +33,7 @@ if TYPE_CHECKING: import numpy as np + import pandas as pd import polars as pl import pyarrow as pa from pandas.api.extensions import ExtensionDtype @@ -83,6 +84,62 @@ def dtype( ) -> DataType: ... +if TYPE_CHECKING: + import numpy as np + import polars as pl + import pyarrow as pa + from pandas.api.extensions import ExtensionDtype + + +@overload +def dtype(value: type[int] | Literal["int"], nullable: bool = True) -> Int64: ... +@overload +def dtype( + value: type[str] | Literal["str", "string"], nullable: bool = True +) -> String: ... +@overload +def dtype( + value: type[bool] | Literal["bool", "boolean"], nullable: bool = True +) -> Boolean: ... +@overload +def dtype(value: type[bytes] | Literal["bytes"], nullable: bool = True) -> Binary: ... +@overload +def dtype(value: type[Real] | Literal["float"], nullable: bool = True) -> Float64: ... +@overload +def dtype( + value: type[pydecimal.Decimal] | Literal["decimal"], nullable: bool = True +) -> Decimal: ... +@overload +def dtype( + value: type[pydatetime.datetime] | Literal["timestamp"], nullable: bool = True +) -> Timestamp: ... +@overload +def dtype( + value: type[pydatetime.date] | Literal["date"], nullable: bool = True +) -> Date: ... +@overload +def dtype( + value: type[pydatetime.time] | Literal["time"], nullable: bool = True +) -> Time: ... +@overload +def dtype( + value: type[pydatetime.timedelta] | Literal["interval"], nullable: bool = True +) -> Interval: ... +@overload +def dtype( + value: type[pyuuid.UUID] | Literal["uuid"], nullable: bool = True +) -> UUID: ... +@overload +def dtype( + value: DataType | str | np.dtype | ExtensionDtype | pl.DataType | pa.DataType, + nullable: bool = True, +) -> DataType: ... + + +if TYPE_CHECKING: + import pandas as pd + + @lazy_singledispatch def dtype(value, nullable=True) -> DataType: """Create a DataType object. @@ -696,6 +753,35 @@ def from_unit(cls, unit, timezone=None, nullable=True) -> Self: scale=TimestampUnit.to_scale(unit), timezone=timezone, nullable=nullable ) + @classmethod + def from_datetime(cls, dt: pydatetime.datetime, *, nullable: bool = True): + """Infer from a python datetime.datetime object.""" + if dt.microsecond: + scale = 6 + else: + scale = 0 + + if (tzinfo := dt.tzinfo) is not None: + timezone_string = tzinfo.tzname(dt) + else: + timezone_string = None + return cls(scale=scale, timezone=timezone_string, nullable=nullable) + + @classmethod + def from_pandas( + cls, value: pd.Timestamp, timezone: str | None = None, nullable: bool = True + ): + """Infer from a pandas.Timestamp.""" + if value.nanosecond: + scale = 9 + elif value.microsecond: + scale = 6 + else: + scale = 0 + if timezone is None and value.tz is not None: + timezone = str(value.tz) + return cls(timezone=timezone, scale=scale, nullable=nullable) + @property def unit(self) -> str: """Return the unit of the timestamp.""" diff --git a/ibis/expr/datatypes/value.py b/ibis/expr/datatypes/value.py index ad40f9047ecf..146678494eb0 100644 --- a/ibis/expr/datatypes/value.py +++ b/ibis/expr/datatypes/value.py @@ -10,7 +10,7 @@ from collections.abc import Mapping, Sequence from functools import partial from operator import attrgetter -from typing import Any +from typing import TYPE_CHECKING, Any import toolz from public import public @@ -28,6 +28,9 @@ ) from ibis.expr.datatypes.cast import highest_precedence +if TYPE_CHECKING: + import pandas as pd + @lazy_singledispatch def infer(value: Any) -> dt.DataType: @@ -82,10 +85,7 @@ def infer_date(value: datetime.date) -> dt.Date: @infer.register(datetime.datetime) def infer_timestamp(value: datetime.datetime) -> dt.Timestamp: - if value.tzinfo: - return dt.Timestamp(timezone=str(value.tzinfo)) - else: - return dt.timestamp + return dt.Timestamp.from_datetime(value) @infer.register(datetime.timedelta) @@ -171,11 +171,8 @@ def infer_numpy_scalar(value): @infer.register("pandas.Timestamp") -def infer_pandas_timestamp(value): - if value.tz is not None: - return dt.Timestamp(timezone=str(value.tz)) - else: - return dt.timestamp +def infer_pandas_timestamp(value: pd.Timestamp) -> dt.Timestamp: + return dt.Timestamp.from_pandas(value) @infer.register("pandas.Timedelta") diff --git a/ibis/expr/operations/temporal.py b/ibis/expr/operations/temporal.py index da4ae348b466..b42172f86710 100644 --- a/ibis/expr/operations/temporal.py +++ b/ibis/expr/operations/temporal.py @@ -259,8 +259,8 @@ class TimestampFromYMDHMS(Value): minutes: Value[dt.Integer] seconds: Value[dt.Integer] - dtype = dt.timestamp - shape = rlz.shape_like("args") + dtype: dt.Timestamp + shape = rlz.shape_like(["year", "month", "day", "hours", "minutes", "seconds"]) @public diff --git a/ibis/expr/rules.py b/ibis/expr/rules.py index af1167cb527a..42b9d2ad0f05 100644 --- a/ibis/expr/rules.py +++ b/ibis/expr/rules.py @@ -1,10 +1,11 @@ from __future__ import annotations from itertools import product, starmap -from typing import Optional +from typing import TYPE_CHECKING, Any, Optional from public import public +import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis import util @@ -13,14 +14,17 @@ from ibis.common.patterns import CoercionError, NoMatch, Pattern from ibis.common.temporal import IntervalUnit +if TYPE_CHECKING: + from collections.abc import Iterable + @public -def highest_precedence_shape(nodes): +def highest_precedence_shape(nodes: Iterable[ops.Value]) -> ds.DataShape: return max(node.shape for node in nodes) @public -def highest_precedence_dtype(nodes): +def highest_precedence_dtype(nodes: Iterable[ops.Value]) -> dt.DataType: """Return the highest precedence type from the passed expressions. Also verifies that there are valid implicit casts between any of the types @@ -29,8 +33,8 @@ def highest_precedence_dtype(nodes): Parameters ---------- - nodes : Iterable[ops.Value] - A sequence of Expressions + nodes + An Iterable of Expressions Returns ------- @@ -42,7 +46,7 @@ def highest_precedence_dtype(nodes): @public -def castable(source, target): +def castable(source: ops.Value, target: ops.Value) -> bool: """Return whether source ir type is implicitly castable to target. Based on the underlying datatypes and the value in case of Literals @@ -52,7 +56,7 @@ def castable(source, target): @public -def comparable(left, right): +def comparable(left: ops.Value, right: ops.Value) -> bool: return castable(left, right) or castable(right, left) @@ -61,28 +65,31 @@ def comparable(left, right): @public -def dtype_like(name): +def dtype_like(names: str | Iterable[str]): @attribute - def dtype(self): - args = getattr(self, name) - args = args if util.is_iterable(args) else [args] - return highest_precedence_dtype(args) + def dtype(self) -> dt.DataType: + return highest_precedence_dtype(_attributes(self, names)) return dtype @public -def shape_like(name): +def shape_like(name: str | Iterable[str]): @attribute - def shape(self): - args = getattr(self, name) - args = args if util.is_iterable(args) else [args] + def shape(self) -> ds.DataShape: + args = _attributes(self, name) args = [a for a in args if a is not None] return highest_precedence_shape(args) return shape +def _attributes(obj: Any, names: str | Iterable[str]) -> tuple: + if isinstance(names, str): + return util.promote_tuple(getattr(obj, names)) + return tuple(getattr(obj, name) for name in names) + + # TODO(kszucs): might just use bounds instead of actual literal values # that could simplify interval binop output_type methods # TODO(kszucs): pre-generate mapping? diff --git a/ibis/expr/tests/test_api.py b/ibis/expr/tests/test_api.py index f58e58471614..805bd4052340 100644 --- a/ibis/expr/tests/test_api.py +++ b/ibis/expr/tests/test_api.py @@ -39,84 +39,105 @@ def test_schema_from_names_and_typesield_names(): @pytest.mark.parametrize( - ("string", "expected_value", "expected_timezone"), + ("string", "expected_value", "expected_dtype"), [ + param( + "2015-01-01 12:34:56", + datetime(2015, 1, 1, 12, 34, 56), + dt.Timestamp(scale=0), + id="from_string_seconds", + ), param( "2015-01-01 12:34:56.789", datetime(2015, 1, 1, 12, 34, 56, 789000), - None, + dt.Timestamp(scale=6), id="from_string_millis", ), param( "2015-01-01 12:34:56.789321", datetime(2015, 1, 1, 12, 34, 56, 789321), - None, + dt.Timestamp(scale=6), id="from_string_micros", ), param( "2015-01-01 12:34:56.789 UTC", datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzutc()), - "UTC", + dt.Timestamp(timezone="UTC", scale=6), id="from_string_millis_utc", ), param( "2015-01-01 12:34:56.789321 UTC", datetime(2015, 1, 1, 12, 34, 56, 789321, tzinfo=tzutc()), - "UTC", + dt.Timestamp(timezone="UTC", scale=6), id="from_string_micros_utc", ), param( "2015-01-01 12:34:56.789+00:00", datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzutc()), - "UTC", + dt.Timestamp(timezone="UTC", scale=6), id="from_string_millis_utc_offset", ), param( "2015-01-01 12:34:56.789+01:00", datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzoffset(None, 3600)), - "UTC+01:00", + dt.Timestamp(timezone="UTC+01:00", scale=6), id="from_string_millis_utc_+1_offset", ), ], ) -def test_timestamp(string, expected_value, expected_timezone): - expr = ibis.timestamp(string) +@pytest.mark.parametrize("nullable", [True, False]) +def test_timestamp(string, expected_value, expected_dtype: dt.Timestamp, nullable): + expr = ibis.timestamp(string, nullable=nullable) op = expr.op() assert isinstance(expr, ibis.expr.types.TimestampScalar) assert op.value == expected_value - assert op.dtype == dt.Timestamp(timezone=expected_timezone) + assert op.dtype == expected_dtype.copy(nullable=nullable) @pytest.mark.parametrize( - ("string", "expected_value", "expected_timezone"), + ("string", "expected_value", "expected_dtype"), [ + param( + "2015-01-01 12:34:56", + datetime(2015, 1, 1, 12, 34, 56), + dt.Timestamp(scale=0), + id="from_pandas_seconds", + ), param( "2015-01-01 12:34:56.789", datetime(2015, 1, 1, 12, 34, 56, 789000), - None, + dt.Timestamp(scale=6), id="from_pandas_millis", ), param( "2015-01-01 12:34:56.789+00:00", datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzutc()), - "UTC", + dt.Timestamp(scale=6, timezone="UTC"), id="from_pandas_millis_utc", ), + # TODO: make this work with nanosecond precision + # param( + # "2015-01-01 12:34:56.789123456+00:00", + # datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzutc()), + # dt.Timestamp(scale=9, timezone="UTC"), + # id="from_pandas_nanos_utc", + # ), param( "2015-01-01 12:34:56.789+03:00", datetime(2015, 1, 1, 12, 34, 56, 789000, tzinfo=tzoffset(None, 10800)), - "UTC+03:00", + dt.Timestamp(scale=6, timezone="UTC+03:00"), id="from_pandas_millis_+3_offset", ), ], ) -def test_timestamp_pandas(string, expected_value, expected_timezone): +@pytest.mark.parametrize("nullable", [True, False]) +def test_timestamp_pandas(string, expected_value, expected_dtype, nullable): pd = pytest.importorskip("pandas") - expr = ibis.timestamp(pd.Timestamp(string)) + expr = ibis.timestamp(pd.Timestamp(string), nullable=nullable) op = expr.op() assert isinstance(expr, ibis.expr.types.TimestampScalar) assert op.value == expected_value - assert op.dtype == dt.Timestamp(timezone=expected_timezone) + assert op.dtype == expected_dtype.copy(nullable=nullable) @pytest.mark.parametrize( diff --git a/ibis/tests/expr/test_temporal.py b/ibis/tests/expr/test_temporal.py index 9daf487abf5f..2679986c5697 100644 --- a/ibis/tests/expr/test_temporal.py +++ b/ibis/tests/expr/test_temporal.py @@ -871,7 +871,7 @@ def test_time_expression(): def test_timestamp_literals(): - assert ibis.timestamp(2022, 2, 4, 16, 20, 00).type() == dt.timestamp + assert ibis.timestamp(2022, 2, 4, 16, 20, 00).type() == dt.Timestamp(scale=0) def test_timestamp_literal(): @@ -880,11 +880,13 @@ def test_timestamp_literal(): assert expr.equals(sol) expr = ibis.timestamp("2022-02-04T01:02:03") - sol = ibis.literal("2022-02-04T01:02:03", type=dt.timestamp) + sol = ibis.literal("2022-02-04T01:02:03", type=dt.Timestamp(scale=0)) assert expr.equals(sol) expr = ibis.timestamp("2022-02-04T01:02:03Z") - sol = ibis.literal("2022-02-04T01:02:03", type=dt.Timestamp(timezone="UTC")) + sol = ibis.literal( + "2022-02-04T01:02:03", type=dt.Timestamp(scale=0, timezone="UTC") + ) assert expr.equals(sol) diff --git a/ibis/tests/expr/test_timestamp.py b/ibis/tests/expr/test_timestamp.py index d175afe6e440..13516fc0e871 100644 --- a/ibis/tests/expr/test_timestamp.py +++ b/ibis/tests/expr/test_timestamp.py @@ -104,7 +104,12 @@ def test_comparisons_pandas_timestamp(alltypes): def test_timestamp_precedence(): ts = ibis.literal(datetime.now()) highest_type = rlz.highest_precedence_dtype([ibis.null().op(), ts.op()]) - assert highest_type == dt.timestamp + assert highest_type == dt.Timestamp(scale=6) + + highest = dt.highest_precedence( + [dt.Timestamp(scale=6), dt.Timestamp(scale=3), dt.Timestamp(timezone="UTC")] + ) + assert highest == dt.Timestamp(timezone="UTC") @pytest.mark.parametrize(